<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v14i1e84095</article-id><article-id pub-id-type="doi">10.2196/84095</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Detection of Interpretable and Fine-Grained Brain Tumor Magnetic Resonance Imaging Based on Progressive Pruning: Machine Learning Model Development and Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Liu</surname><given-names>Yupeng</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Song</surname><given-names>Shuwei</given-names></name><degrees>ME</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lian</surname><given-names>Shibo</given-names></name><degrees>ME</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Zhang</surname><given-names>Xiaochen</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>School of Computer Science and Technology, Harbin University of Science and Technology</institution><addr-line>Harbin</addr-line><country>China</country></aff><aff id="aff2"><institution>Heilongjiang Institute of Technology</institution><addr-line>No. 999 Hongqi Street, Daowai District, Harbin City, Heilong</addr-line><addr-line>Harbin</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Thanh</surname><given-names>Hoang Van</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Wang</surname><given-names>Maofa</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Xiaochen Zhang, PhD, Heilongjiang Institute of Technology, No. 999 Hongqi Street, Daowai District, Harbin City, Heilong, Harbin, 150001, China, 86 13608701118; <email>zxc161616@126.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>29</day><month>4</month><year>2026</year></pub-date><volume>14</volume><elocation-id>e84095</elocation-id><history><date date-type="received"><day>14</day><month>09</month><year>2025</year></date><date date-type="rev-recd"><day>28</day><month>02</month><year>2026</year></date><date date-type="accepted"><day>28</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; Yupeng Liu, Shuwei Song, Shibo Lian, Xiaochen Zhang. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 29.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2026/1/e84095"/><abstract><sec><title>Background</title><p>Brain tumor is one of the most malignant diseases of the central nervous system, and early accurate detection is of great significance for improving patient survival rate. However, the heterogeneity of brain tumors in terms of morphology, size, and location on magnetic resonance imaging (MRI) image, as well as their similarity to surrounding normal brain tissue, poses significant challenges for tumor detection.</p></sec><sec><title>Objective</title><p>This study aims to develop a high-performance brain tumor detection framework that integrates feature enhancement, channel attention, and progressive pruning, achieving an optimal balance between detection accuracy, model efficiency, and interpretability for slice-level MRI tumor localization tasks.</p></sec><sec sec-type="methods"><title>Methods</title><p>This paper proposes a convolution Prewitt-and-pooling&#x2013;based preprocessing (CSPP) approach, based on the &#x201C;you only look once&#x201D; version 11 (YOLOv11) framework, which highlights important structural detail more effectively than traditional statistics. A dynamic convolution&#x2013;based C3k2 (DCC) module was integrated to more efficiently capture both local and global features. A channel prior convolutional attention (CPCA) module was introduced before the detection head, enabling the network to specifically focus on information-rich channels and key spatial regions. Through a progressive hybrid pruning strategy (PHPS), the model was optimized for efficient inference. Furthermore, Eigen-class activation mapping (Eigen-CAM) was used to interpret the prediction result, making them more transparent.</p></sec><sec sec-type="results"><title>Results</title><p>Extensive experiments on 3 brain tumor MRI datasets demonstrated the superior performance of CDCP-YOLO (CSPP-DCC-CPCA-PHPS&#x2013;YOLO). On Br35H, the mean average precision (mAP) at an intersection-over-union (IoU) threshold of 0.5 (mAP<sub>0.5</sub>) increased by 2.6%, average mAP over several IoU thresholds (0.50-0.95; mAP<sub>0.5:0.95</sub>) increased by 5.9%, and number of floating-point operations (&#x00D7;10&#x2079;; GFLOPs) decreased by 47.7%. On Roboflow, mAP<sub>0.5</sub> increased by 19.5%, mAP<sub>0.5:0.95</sub> increased by 7.7%, and GFLOPs decreased by 47.7%. On Capstone, mAP<sub>0.5</sub> increased by 6.9%, mAP<sub>0.5:0.95</sub> increased by 5.8%, and GFLOPs decreased by 47.7%.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The proposed CDCP-YOLO framework achieves an optimal balance between accuracy, efficiency, and interpretability, providing a lightweight and reliable solution for slice-level brain tumor detection in MRI images.</p></sec></abstract><kwd-group><kwd>magnetic resonance imaging</kwd><kwd>MRI</kwd><kwd>brain tumor</kwd><kwd>convolution Prewitt-and-pooling&#x2013;based preprocessing</kwd><kwd>progressive hybrid pruning strategy</kwd><kwd>dynamic convolution-based C3k2</kwd><kwd>feature fusion</kwd><kwd>medical imaging</kwd><kwd>brain tumor detection</kwd><kwd>deep learning</kwd><kwd>class activation mapping</kwd><kwd>Eigen-CAM</kwd><kwd>lightweight model</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Brain tumors are a type of highly complex and rapidly progressing major disease within the central nervous system. Their malignancy is often characterized by extreme invasiveness and recurrence, posing a serious threat to patients&#x2019; lives and health. Medical research indicates that accurate identification and timely diagnosis of tumor types in the early stages can significantly extend patient survival and improve quality of life. Magnetic resonance imaging (MRI) has become the gold standard for preoperative imaging diagnosis of most brain tumors due to its noninvasive, high-resolution, and multimodal imaging advantages, playing a key role in the auxiliary diagnosis and efficacy evaluation of brain tumors [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. Although MRI provides rich information at the image level, brain tumor detection still faces several challenges:</p><list list-type="order"><list-item><p>The tumor shape is complex and diverse, with blurred boundaries, and its grayscale distribution often overlaps with normal brain tissue, making it difficult to distinguish and locate accurately.</p></list-item><list-item><p>The anatomical structure of the brain is complex and varies significantly between individuals, making it difficult for traditional methods to establish a universally applicable and robust expression model.</p></list-item><list-item><p>Manual image segmentation is not only time-consuming and labor-intensive but also susceptible to subjective factors, necessitating efficient and intelligent auxiliary diagnostic tools.</p></list-item></list><p>In recent years, an increasing number of researchers have begun to focus on intelligent methods for brain tumor detection. The main research is based on digital image processing and machine learning to assist in identifying brain tumor regions [<xref ref-type="bibr" rid="ref4">4</xref>]. This type of method usually first uses MRI image acquisition to obtain brain image data; then uses image processing to extract key visual features, such as gray scale, shape contours, and texture patterns; and finally inputs these features into support vector machines, random forests, or k-nearest neighbors for judging the tumor site and type. This type of method, which relies on handcrafted features, is effective for the detection of target lesions, but when faced with complex organizational structures and highly heterogeneous tumor morphologies, it often exhibits poor robustness, sensitivity to image noise, and difficulty in generalization, which limits its widespread application in clinical practice. With the rapid development of the convolutional neural network (CNN), target detection has entered a new stage with end-to-end learning as its core. Girshick et al [<xref ref-type="bibr" rid="ref5">5</xref>] proposed region-based CNN (R-CNN), which first extracts candidate target regions and then uses support vector machines for classification, significantly improving detection performance. Subsequently, Fast R-CNN and Faster R-CNN [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>] further accelerated detection speed and improved accuracy by sharing features and a region proposal network. To achieve real-time detection, Redmon et al [<xref ref-type="bibr" rid="ref8">8</xref>] proposed the &#x201C;you only look once&#x201D; (YOLO) series of algorithms, which transformed target detection into a regression problem, simultaneously predicting multiple bounding boxes and categories in a single forward pass, greatly increasing the detection speed. Subsequent versions of YOLO have continuously improved the detection accuracy and network structure and are widely used in scenarios such as autonomous driving and medical image analysis. In addition, single shot multibox detector (SSD) [<xref ref-type="bibr" rid="ref9">9</xref>] also achieves a balance between speed and accuracy by detecting at multiple feature scales. These deep learning&#x2013;based algorithms not only overcome the limitations of traditional manual feature engineering but also greatly improve the modeling accuracy and practical application value, becoming the mainstream direction of current image target detection, including brain tumor detection.</p></sec><sec id="s1-2"><title>Related Works</title><sec id="s1-2-1"><title>YOLO-Based Detection on MRI</title><p>In recent years, target detection models based on YOLO have been widely applied and continuously optimized in medical imaging. In the area of noninvasive disease detection, researchers have primarily focused on introducing attention mechanism, multitask learning, and lightweight model optimization. Chen et al [<xref ref-type="bibr" rid="ref10">10</xref>] proposed the MSA-YOLOv5, which combines multiple attention mechanisms to focus on the automatic detection of lesions in acute ischemic stroke. In a multimodal MRI model, the detection of small lesions and embolic signals is enhanced, improving detection accuracy while reducing the number of parameters. Tang et al [<xref ref-type="bibr" rid="ref11">11</xref>] developed a YOLOv5 model based on the squeeze-and-excitation attention mechanism, which improved the detection of Parkinson disease. The model enhances detection accuracy by adaptively focusing on key features, especially in distinguishing substantia nigra and red nucleus lesions in T2-weighted MRI. Wang et al [<xref ref-type="bibr" rid="ref12">12</xref>] introduced an improved YOLOv5 architecture for the diagnosis and grading of lumbar disc herniation. This method adds an attention module to the Cross Stage Partial part and enhances the Spatial Pyramid Pooling-Fast part, achieving multitask learning for both classification and grading of Pfirrmann grades, highlighting prominent features in the intervertebral space and high-intensity zones.</p><p>In the context of brain tumor MRI detection, the research on improving YOLO has also advanced rapidly. Kang et al [<xref ref-type="bibr" rid="ref13">13</xref>] proposed a new YOLO architecture named RCS-YOLO, which is optimized for brain tumor detection in medical imaging. RCS-YOLO uses reparameterized convolution combined with channel shuffle (RCS) to enhance the model&#x2019;s computational efficiency and detection accuracy. Kang et al [<xref ref-type="bibr" rid="ref14">14</xref>] developed BGF-YOLO, an improved YOLOv8 that enhances brain tumor MRI detection performance by introducing multilevel feature fusion, a dynamic attention mechanism, and an additional detection head. Kang et al [<xref ref-type="bibr" rid="ref15">15</xref>] proposed a new YOLO architecture, pretrained knowledge-guided YOLO (PK-YOLO), optimized for brain tumor detection in multiplanar MRI slices. PK-YOLO uses a pretrained RepViT backbone network combined with sparse mask modeling technology and Focaler-intersection-over-union (IoU) regression loss to improve the detection performance of small targets, making it the first YOLO object detector to introduce pretrained knowledge guidance. Dixit et al [<xref ref-type="bibr" rid="ref16">16</xref>] proposed a brain tumor detection method based on YOLOv4-tiny, which uses transfer learning and a fine-tuning technique optimized for MRI images. This method leverages features from the pretrained Common Objects in Context (COCO) dataset with the 29-layer YOLOv4-tiny architecture, improving the model&#x2019;s computational efficiency and detection accuracy through precise hyperparameter tuning. Abdusalomov et al [<xref ref-type="bibr" rid="ref17">17</xref>] proposed an improved YOLOv7 architecture optimized for brain tumor detection in MRI images. This method integrates a Convolutional Block Attention Module, a Spatial Pyramid Pooling Fast+ layer, and a Bidirectional Feature Pyramid Network to improve the detection accuracy for glioma, meningioma, and pituitary tumors.</p></sec><sec id="s1-2-2"><title>Pruning Method</title><p>As the complexity of the deep neural network continues to increase, how to reduce computational and storage requirements while maintaining high performance has become a key challenge in medical imaging processing. Pruning techniques offer an effective solution to this problem and have demonstrated considerable potential in various medical image analysis applications. Fernandes et al [<xref ref-type="bibr" rid="ref18">18</xref>] proposed a generative adversarial pruning method based on an evolutionary strategy, specifically optimizing medical image diagnosis. This method selects options through minimal Wasserstein distance. Wu et al [<xref ref-type="bibr" rid="ref19">19</xref>] developed the FairPrune method, a new technique that achieves fairness by pruning, specifically applied to the diagnosis of skin diseases. This method prunes based on differences in parameter importance, significantly improving the fairness of the model across different demographic groups. Adnan et al [<xref ref-type="bibr" rid="ref20">20</xref>] proposed a structured pruning method specifically for optimizing the U-Net architecture. This method addresses the pruning complexity between the encoder and decoder in U-Net, compressing the model by assessing the importance of individual channels and tasks. Fernandes et al [<xref ref-type="bibr" rid="ref21">21</xref>] further developed a structured pruning framework that integrates multitask learning and pruning. This method uses iterative pruning and block-based network deepening, optimizing the model with a policy-based and multiobjective decision-making process. Cocosco et al [<xref ref-type="bibr" rid="ref22">22</xref>] proposed a fully automated and nonparametric brain tissue classification method. This method uses a nonparametric implementation, training sample selection through the minimum spanning tree method and stereotaxic space priors, showing significant performance improvement in subjects with large morphological variations. Xuan et al [<xref ref-type="bibr" rid="ref23">23</xref>] introduced a pruning method for k-space subsampling and reconstruction based on a generative model. This method is inspired by network pruning, starting with a fully sampled k-space model and iteratively removing less important k-space phase encoding, demonstrating good performance in single-coil and multicoil MRI reconstruction. Graziani et al [<xref ref-type="bibr" rid="ref24">24</xref>] developed an interpretable pruning strategy on CNN specifically for scale-variant features in medical images. This method uses deep learning interpretability techniques to analyze the hierarchical scale coding of the InceptionV3 and ResNet50 architectures, finding that scale information peaks in the middle layers and decreases near the softmax layer. This discovery leads to a pruning strategy that significantly improves the performance of nucleus regression and mitosis classification in histopathological images. Holste et al [<xref ref-type="bibr" rid="ref25">25</xref>] systematically analyzed the impact of the pruning on medical image classification across various long-tailed multilabel disease datasets for the first time. This study in the chest X-ray image diagnosis experimentally demonstrated that the pruning has differential effects on different diseases, with rare diseases being more susceptible to being &#x201C;forgotten&#x201D; than common diseases. The study also introduces the concept of pruning-identified exemplars, revealing through human reader studies that pruning-identified exemplars often have more label noise, lower image quality, and higher diagnostic uncertainty. Saleh et al [<xref ref-type="bibr" rid="ref26">26</xref>] investigated the effectiveness of different network architectures (GoogLeNet, ResNet, and EfficientNet) combined with transfer learning and the network pruning algorithm for medical image classification. They validated the effectiveness of these techniques in both brain tumor classification and chest X-ray inflammation detection. Jaiswal et al [<xref ref-type="bibr" rid="ref27">27</xref>] proposed a pruning-assisted, self-supervised image localization method. This method uses a &#x201C;learning by forgetting&#x201D; training scheme, which significantly improves skin disease localization performance under unsupervised, weakly supervised, and sparsely supervised settings.</p></sec><sec id="s1-2-3"><title>Visualization Based on Class Activation Mapping</title><p>In recent years, visualization methods based on class activation mapping (CAM) have been widely applied in interpretability research on deep neural networks. The classic gradient-weighted CAM (Grad-CAM) [<xref ref-type="bibr" rid="ref28">28</xref>] calculated specific category information relative to the feature maps of each layer by determining the weights of each channel, thereby generating a heat map that is closely related to the target category. However, when dealing with multitarget and complex background scenes, Grad-CAM sometimes struggles to capture fine-grained details and local features. To address this limitation, Grad-CAM++ [<xref ref-type="bibr" rid="ref29">29</xref>] further improved the high-order information, significantly enhancing sensitivity to small targets and overlapping regions. On the other hand, Eigen-CAM [<xref ref-type="bibr" rid="ref30">30</xref>], which got rid of the reliance on gradient calculation, was based on principal component analysis to extract convolution features, generating class-independent visual interpretations in an unsupervised manner. Furthermore, to further enhance the detail and perceptual awareness of heat maps, LayerCAM [<xref ref-type="bibr" rid="ref31">31</xref>] was proposed to fuse the activation maps from the deep and shallow convolution layers, resulting in a more pixel-level local correspondence and generating finer-grained heat maps that better express boundary and structural information. To validate the CAM method in medical diagnosis, many recent studies have explored its use. Windisch et al [<xref ref-type="bibr" rid="ref32">32</xref>] proposed a ResNet50-based brain tumor detection model and used Grad-CAM for model interpretability analysis, used for the identification of meningiomas and gliomas in MRI slices. Shawon et al [<xref ref-type="bibr" rid="ref33">33</xref>] proposed a cost-sensitive deep neural network that integrated multiple interpretable techniques (including Grad-CAM, LIME, and Score-CAM) for model interpretation, used for brain tumor detection under imbalanced data conditions. Dasanayaka et al [<xref ref-type="bibr" rid="ref34">34</xref>] proposed a deep learning model based on U-Net and DenseNet, which used Grad-CAM to generate heat maps for brain tumor segmentation and classification. This combined strategy not only achieves effective segmentation and classification but also provides effective visualization. Zeineldin et al [<xref ref-type="bibr" rid="ref35">35</xref>] proposed an enhanced EfficientNetv2 that integrates a global attention mechanism and efficient channel attention and uses Grad-CAM visualization for model interpretation, applied to MRI-based brain tumor classification. Guluwadi et al [<xref ref-type="bibr" rid="ref36">36</xref>] proposed a brain MRI detection method that combines ResNet50 with Grad-CAM.</p></sec></sec><sec id="s1-3"><title>Objectives</title><p>In recent years, the rapid advancement of deep learning has significantly propelled the development of brain tumor detection, achieving remarkable results. However, current brain tumor detection models still face several challenges:</p><list list-type="order"><list-item><p>The boundaries of brain tumors are often blurry and the structures are irregular. The shallow features extracted by traditional convolutions are insufficient to effectively capture the key regions, leading to high rates of missed detection and false positives.</p></list-item><list-item><p>The parameter size of the target detection model is massive, and the computational overhead is high, which is not conducive to deployment on clinical edge devices, thus limiting their practicality. Traditional pruning methods, typically based on a single-weight threshold for coarse-grained pruning, can easily damage key model structures. At the same time, the sparse fine-tuning process, due to its inefficiency, struggles to meet the demands of high efficiency and deployability in medical scenarios.</p></list-item><list-item><p>Most target detection methods lack interpretability, making it difficult to clearly present the decision-making basis of the model to doctors, which limits its credibility in auxiliary clinical decision-making.</p></list-item></list><p>This paper proposes an interpretable, fine-grained brain tumor MRI detection method based on progressive pruning to address these challenges in an integrated manner. Importantly, CDCP-YOLO (CDC-YOLO with PHPS) is not a simple stacking of existing techniques but a task-driven co-design framework specifically tailored to the structural complexity, computational constraints, and interpretability requirements of brain tumor MRI detection. Its methodological distinctions are summarized as follows. First, unlike RCS-YOLO, which primarily accelerates inference through convolution reparameterization and channel shuffling, CDCP-YOLO introduces structure-aware input-level modeling (convolution Prewitt-and-pooling&#x2013;based preprocessing [CSPP]) that explicitly embeds classical edge priors into the network. This design targets blurred and ambiguous tumor boundaries at the earliest stage of feature extraction&#x2014;an issue that reparameterization alone cannot effectively address. Second, whereas BGF-YOLO improves detection performance mainly through multilevel feature fusion and additional detection heads, CDCP-YOLO adopts dynamic convolution&#x2013;based C3k2 (DCC) feature adaptation, enabling input-dependent kernel generation. This strategy allows the network to flexibly adapt to heterogeneous tumor morphologies and scales without introducing detection head redundancy. Third, while PK-YOLO relies on external pretrained knowledge and sparse mask modeling to enhance small-target detection, CDCP-YOLO does not depend on any external pretraining paradigm. Instead, it improves representation capacity through internal, data-adaptive mechanisms, ensuring robustness and generalizability under limited or domain-specific medical datasets. Fourth, existing pruning-based detectors typically use single-criterion or one-shot pruning strategies that may disrupt critical feature pathways. In contrast, the proposed progressive hybrid pruning strategy (PHPS) jointly considers global channel sparsity and local structural dependency in a staged manner, enabling aggressive model compression while preserving detection-critical structures. Finally, unlike prior works that treat Grad-CAM&#x2013;style visualization as a post hoc analysis tool, CDCP-YOLO tightly integrates Eigen-CAM into the detection head, treating interpretability as a core design objective rather than an auxiliary component. This integration ensures semantic consistency between detection results and visual explanations, which is essential for clinical decision support.</p><p>The main contributions of this work are summarized as follows:</p><list list-type="order"><list-item><p>Multiscale feature enhancement mechanism: a CSPP module was introduced at the model&#x2019;s input stage, integrating Prewitt edge detection with pooling operations to strengthen the construction of tumor edges and structures. A DCC module was introduced into the backbone and neck networks to achieve adaptive expression for different tumor forms and sizes. A channel prior convolutional attention (CPCA) module was introduced before the detection head to guide the network to focus on the key region of the most discriminative brain tumor.</p></list-item><list-item><p>Lightweight pruning strategy: a progressive hybrid pruning strategy was proposed, combining L1-norm and GroupNorm feature statistics. The pruning process is carried out in stages, prioritizing the pruning of redundant channels while maximally preserving key information flow. This method significantly reduces parameter size and computational overhead while effectively mitigating the performance degradation caused by large-scale pruning.</p></list-item><list-item><p>Interpretable confidence enhancement: a gradient-free principal component analysis method was adopted, which can generate clear saliency heat maps, visually demonstrating the most important basis for the brain tumor MRI model&#x2019;s decision-making. This significantly improves the transparency, credibility, and practicality of the model in clinical applications.</p></list-item></list></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overall Framework</title><p>This paper proposes an interpretable and fine-grained brain tumor detection model based on progressive pruning. As shown in <xref ref-type="fig" rid="figure1">Figure 1A</xref>, the overall framework was as follows: MRI images were trained on the improved CDC-YOLO network, and an efficient CDCP-YOLO model was obtained through a progressive hybrid pruning strategy and fine-tuning. As shown in <xref ref-type="fig" rid="figure1">Figure 1B</xref>, CDC-YOLO is composed of three main structures:</p><list list-type="order"><list-item><p>Backbone stage: the CSPP module is used to replace the first 2 convolutional layers. CSPP combines Prewitt edge detection with pooling operations to enhance the model&#x2019;s initial feature extraction capability for edges and textures. Subsequently, the main network uses multiple DCC modules, using dynamic convolution to enhance the perceptual ability for multiscale tumor regions.</p></list-item><list-item><p>Neck stage: multiscale feature maps are fused through upsampling and concatenation. The C3k2 module introduces dynamic convolution and key location fusion, and the CPCA module is introduced, combining channel and spatial attention mechanisms to guide the model to focus on key diagnostic areas and improve detection accuracy.</p></list-item><list-item><p>Head stage: multiple detection heads of different sizes are used to detect targets of different sizes.</p></list-item></list><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Overall framework and network architecture of the proposed CDCP-YOLO model for brain tumor magnetic resonance imaging detection. (A) The complete training and pruning pipeline, including feature enhancement, progressive hybrid pruning, and fine-tuning. (B) Detailed architecture of the CDC-YOLO backbone, neck, and detection head, where CSPP enhances edge features at the input stage, DCC adapts to multiscale tumor morphology, and channel prior convolutional attention (CPCA) guides the detection head to focus on discriminative channels and spatial regions. C2PSA, C2 block integrated with partial self-attention; CAM, class activation mapping; CDC-YOLO, cross-scale dynamic convolution&#x2013;based YOLO; CDCP-YOLO, CDC-YOLO with PHPS; CSPP, convolution Prewitt-and-pooling&#x2013;based preprocessing; Concat, concatenation; Conv, convolution; DCC, dynamic convolution&#x2013;based C3k2 module; SPPF, spatial pyramid pooling-fast; Val, validation subset; YOLO, &#x201C;you only look once&#x201D; framework.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig01.png"/></fig></sec><sec id="s2-2"><title>Convolution Prewitt-and-Pooling&#x2013;Based Preprocessing</title><p>The head of YOLOv11 includes 2 initial convolution layers, primarily used to extract initial features from the input image. By extracting deeper features layer by layer, the network can construct sufficient information for effective target detection. To enhance the model&#x2019;s perceptual ability for edge structures, the first 2 standard convolutional layers in YOLOv11 are replaced with a self-designed CSPP (<xref ref-type="fig" rid="figure2">Figure 2</xref>), which more effectively extracts contour information and local texture features from the image. As the input feature processing unit of the entire CDCP-YOLO, CSPP integrates standard convolution, Prewitt edge detection, and multiscale pooling operations, aiming to enhance the model&#x2019;s perceptual ability for brain tumor edge features and its adaptability to different shapes.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Structural illustration of the convolution Prewitt-and-pooling&#x2013;based preprocessing (CSPP) module. The module integrates standard convolution, Prewitt edge detection (horizontal and vertical), and max pooling to enhance tumor boundary and texture representation at the input stage. Maxpool, maximum pool.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig02.png"/></fig><p>First, the input image <inline-formula><mml:math id="ieqn1"><mml:mi>X</mml:mi></mml:math></inline-formula> undergoes a 3&#x00D7;3 convolution operation to obtain the initial feature map <inline-formula><mml:math id="ieqn2"><mml:msub><mml:mrow><mml:mi mathvariant="normal">X</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula>; this operation completes spatial compression and channel expansion.</p><disp-formula id="E1"><label>(1)</label><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi mathvariant="normal">X</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>v</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi mathvariant="normal">X</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mtext>&#x00A0;</mml:mtext></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Next, the Prewitt edge detection operator is applied to <inline-formula><mml:math id="ieqn3"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula>, filtering in the horizontal direction <inline-formula><mml:math id="ieqn4"><mml:msub><mml:mrow><mml:mi>G</mml:mi></mml:mrow><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and the vertical direction <inline-formula><mml:math id="ieqn5"><mml:msub><mml:mrow><mml:mi>G</mml:mi></mml:mrow><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> to extract the edge response in the image.</p><disp-formula id="E2"><label>(2)</label><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>v</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>The output of the Prewitt operator is obtained by calculating the Euclidean distance to combine the responses from both directions, resulting in the complete edge information <inline-formula><mml:math id="ieqn6"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>.</p><disp-formula id="E3"><label>(3)</label><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msqrt><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>G</mml:mi><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>G</mml:mi><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:msqrt></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Simultaneously, another branch inputs <inline-formula><mml:math id="ieqn7"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> into a max-pooling layer, and the spatial dimensions are kept unchanged through zero-padding, generating the pooled feature <inline-formula><mml:math id="ieqn8"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. The 2 output features, <inline-formula><mml:math id="ieqn9"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn10"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, are concatenated along the channel dimension to generate the feature <inline-formula><mml:math id="ieqn11"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula>. This fusion retains both structural edges and spatial context information.</p><disp-formula id="E4"><label>(4)</label><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>The fused feature map <inline-formula><mml:math id="ieqn12"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> is then passed through a 3&#x00D7;3 convolution to further extract high-level features.</p><disp-formula id="E5"><label>(5)</label><mml:math id="eqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mn>3</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Finally, the number of channels is compressed, and the dimensions are adjusted using a 1&#x00D7;1 convolution to generate the final feature <inline-formula><mml:math id="ieqn13"><mml:mi>Y</mml:mi></mml:math></inline-formula>, which serves as the input to the backbone structure.</p><disp-formula id="E6"><label>(6)</label><mml:math id="eqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>Y</mml:mi><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>CSPP uses the Prewitt operator in both horizontal and vertical directions to automatically extract prominent edge contours in the image, significantly enhancing the detection capability for blurry contours and unclear tumor regions. At the same time, the output of Prewitt is fused with the original feature map, which not only preserves the local details but also strengthens the expression of structural information. In addition, the pooling operation compresses the spatial dimensions while keeping the feature map size unchanged, providing the network more robustness to variations in tumor size and location.</p><p>The CSPP module is designed to introduce deterministic structural priors into the early stages of feature extraction. While standard convolutional layers learn kernels stochastically, the inclusion of a fixed Prewitt operator provides explicit edge-sensitive cues that are critical for delineating tumor boundaries. We acknowledge that in medical imaging physics, difference operators are typically sensitive to high-frequency noise. Therefore, rather than using the Prewitt output directly, the CSPP module integrates it through a multistage fusion strategy. Specifically, the gradient maps are processed via max pooling to perform local maximum selection, which emphasizes strong structural edges while suppressing isolated noise spikes. These features are then fused with learnable convolutional features using a 1&#x00D7;1 convolution layer and batch normalization. This design allows the network to adaptively weight the explicit structural priors against learnable representations, ensuring that the model captures fine-grained morphological details without amplifying imaging artifacts.</p></sec><sec id="s2-3"><title>Dynamic Convolution-Based C3k2</title><p>The C3k2 module is a core feature extraction unit in YOLOv11, and its design goal is to achieve the best balance between feature representation capability and computational efficiency. This module uses a multibranch structure and residual connections to enhance the multiscale feature extraction capability while maintaining training stability. C3k2 supports two configuration modes:</p><list list-type="order"><list-item><p>In the C3k=True mode, a lightweight C3k branch is used. The input features are divided into multiple groups for separate processing and then fused through concatenation. This group-wise convolution strategy significantly reduces computational complexity, making it particularly suitable for applications with high real-time requirements.</p></list-item><list-item><p>In the C3k=False mode, a more complex bottleneck structure is introduced, combined with additional convolution and activation layers to extract deeper and more discriminative features, thus showing superiority in more accuracy-sensitive tasks.</p></list-item></list><p>To enhance the network&#x2019;s modeling capability for complex tumor regions, in this study, we built a DCC module (<xref ref-type="fig" rid="figure3">Figure 3</xref>) based on the original C3k2 module by introducing a dynamic convolution [<xref ref-type="bibr" rid="ref37">37</xref>]. The DCC module enhances the model&#x2019;s capabilities in multiscale structure modeling, detail preservation, and contextual awareness by replacing the fixed convolution with the input-adaptive dynamic convolution.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Architecture of the dynamic convolution&#x2013;based C3k2 (DCC) module. The convolution kernel is dynamically generated according to the global context of the input feature map, enabling adaptive modeling of tumors with diverse sizes and shapes. Concat, concatenation; Conv, convolution.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig03.png"/></fig><p>Given an input feature <inline-formula><mml:math id="ieqn14"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>X</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>&#x00D7;</mml:mo><mml:mi>H</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>W</mml:mi></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, the DCC module uses a dynamic kernel <italic>W(X</italic>) to perform the convolution operation:</p><disp-formula id="E7"><label>(7)</label><mml:math id="eqn7"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>Y</mml:mi><mml:mo>=</mml:mo><mml:mi>W</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>X</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x00D7;</mml:mo><mml:mi>X</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>The generation process of the convolution kernel <inline-formula><mml:math id="ieqn15"><mml:mi>W</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mi>X</mml:mi></mml:mrow></mml:mfenced></mml:math></inline-formula> depends on the global context information of the input image. To achieve this weight regeneration, a global average pooling layer is first used to compute the channel-wise global vector:</p><disp-formula id="E8"><label>(8)</label><mml:math id="eqn8"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>g</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mi>H</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>W</mml:mi></mml:mrow></mml:mfrac><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>H</mml:mi></mml:mrow></mml:munderover><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>W</mml:mi></mml:mrow></mml:munderover><mml:mi>X</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo>:</mml:mo><mml:mo>,</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Then, 2 fully connected layers and a nonlinear activation function are used to generate the dynamic attention weights:</p><disp-formula id="E9"><label>(9)</label><mml:math id="eqn9"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>&#x03B8;</mml:mi><mml:mo>=</mml:mo><mml:mi>F</mml:mi><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>F</mml:mi><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>g</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Here <italic>&#x03C3;</italic>() is the ReLU activation function, and <inline-formula><mml:math id="ieqn16"><mml:msub><mml:mrow><mml:mi>F</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn17"><mml:msub><mml:mrow><mml:mi>F</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> are the first and second fully connected layers, respectively. The final parameter <inline-formula><mml:math id="ieqn18"><mml:mi>&#x03B8;</mml:mi></mml:math></inline-formula> determines the weighting of the dynamic convolution kernel <inline-formula><mml:math id="ieqn19"><mml:mi>W</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mi>X</mml:mi></mml:mrow></mml:mfenced></mml:math></inline-formula>, thereby adapting to the feature distribution of the current input image for optimal perception.</p><p>The C3k2 module mainly extracts higher-level features through multiple bottleneck and convolution layers. This module stacks multiple bottleneck units, replacing standard static convolution operations, enabling the network to adaptively select more suitable feature representations at different spatial scales, and thereby improving the modeling capability for diverse tumor features. Unlike fixed convolution kernels, the dynamic convolution mechanism automatically adjusts the kernel weights based on the input features, making it adaptable to different image contexts. Especially when dealing with tumor regions with blurry boundaries and irregular shapes, dynamic convolution can more finely capture local structures and texture changes. In addition, dynamic convolution also enhances the model&#x2019;s ability to resist interference from complex backgrounds and low signal-to-noise ratios, reducing the occurrence of missed and false detections. At the same time, the DCC module introduces an input-dependent kernel selection strategy, which endows the C3k2 module with stronger language modeling and morphological adaptation capabilities, providing richer and more discriminative feature representations for subsequent detection heads.</p></sec><sec id="s2-4"><title>Channel Prior Convolutional Attention</title><p>The detection head is the key part for the final bounding box regression. However, the detection head relies on the feature maps from the previous layer, and these feature maps may contain a large amount of information, where many channels may be redundant or irrelevant to the target detection. The attention mechanism is proposed to focus on important information. The CPCA module (<xref ref-type="fig" rid="figure4">Figure 4</xref>) [<xref ref-type="bibr" rid="ref38">38</xref>] uses multiscale depth-wise separable convolutions to maintain the channel prior while extracting spatial relationships, enabling the network to focus on information-rich channels and key spatial regions. The CPCA module includes the sequential placement of channel attention and spatial attention. The spatial information of the feature maps is aggregated by channel attention through operations such as average pooling and max pooling. The spatial attention is then processed through a shared multilayer perceptron and added to generate a channel attention map. The channel prior is obtained by element-wise multiplication of the input feature and the channel attention map. Subsequently, the channel prior is input into a depth-wise convolution block to generate a spatial attention map. The convolution block receives the spatial attention map to perform channel mixing. Finally, the channel mixing result is element-wise multiplied with the channel prior to obtain the optimized feature as output. The channel mixing process helps to enhance the feature representation.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Structure of the channel prior convolutional attention (CPCA) module, which sequentially applies channel attention and spatial attention to emphasize tumor-relevant channels and spatial regions before the detection head. Conv, convolution; MaxPool, maximum pool; AvgPool, average pool.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig04.png"/></fig><p>Given an intermediate feature map <inline-formula><mml:math id="ieqn20"><mml:mi>F</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>H</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>W</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>, a 1D channel attention map <inline-formula><mml:math id="ieqn21"><mml:msub><mml:mrow><mml:mi>M</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> is first inferred through the channel attention module. Then, <inline-formula><mml:math id="ieqn22"><mml:msub><mml:mrow><mml:mi>M</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is element-wise multiplied with the input feature <inline-formula><mml:math id="ieqn23"><mml:mi>F</mml:mi></mml:math></inline-formula> to obtain the channel attention&#x2013;optimized feature map <inline-formula><mml:math id="ieqn24"><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>H</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>W</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>. Subsequently, the spatial attention (SA) processes <inline-formula><mml:math id="ieqn25"><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> to generate a 3D spatial attention map <inline-formula><mml:math id="ieqn26"><mml:msub><mml:mrow><mml:mi>M</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>H</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>W</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>. The final output feature map <inline-formula><mml:math id="ieqn27"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mover><mml:mi>F</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is obtained by element-wise multiplication of <inline-formula><mml:math id="ieqn28"><mml:msub><mml:mrow><mml:mi>M</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>and <inline-formula><mml:math id="ieqn29"><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>:</p><disp-formula id="E10"><label>(10)</label><mml:math id="eqn10"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>A</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>F</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2297;</mml:mo><mml:mi>F</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E11"><label>(11)</label><mml:math id="eqn11"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mover><mml:mi>F</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo>=</mml:mo><mml:mi>S</mml:mi><mml:mi>A</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2297;</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>, where <inline-formula><mml:math id="ieqn30"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mo>&#x2297;</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> represents element-wise multiplication. Channel attention (CA) is obtained as follows:</p><disp-formula id="E12"><label>(12)</label><mml:math id="eqn12"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>C</mml:mi><mml:mi>A</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>F</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>M</mml:mi><mml:mi>L</mml:mi><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mi>v</mml:mi><mml:mi>g</mml:mi><mml:mi>P</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>F</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>+</mml:mo><mml:mi>M</mml:mi><mml:mi>L</mml:mi><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>M</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi>P</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>F</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>, where <inline-formula><mml:math id="ieqn31"><mml:mi mathvariant="normal">&#x03C3;</mml:mi></mml:math></inline-formula> represents the sigmoid function. Spatial attention (SA) is obtained as follows:</p><disp-formula id="E13"><label>(13)</label><mml:math id="eqn13"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>S</mml:mi><mml:mi>A</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>F</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x00D7;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:munderover><mml:mi>B</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>D</mml:mi><mml:mi>w</mml:mi><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>v</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>F</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>, where <inline-formula><mml:math id="ieqn32"><mml:mi mathvariant="normal">D</mml:mi><mml:mi mathvariant="normal">w</mml:mi><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">v</mml:mi></mml:math></inline-formula> represents depth-wise convolution, <inline-formula><mml:math id="ieqn33"><mml:mi>B</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> represents the <italic>i</italic>th branch, and <inline-formula><mml:math id="ieqn34"><mml:mi>B</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> is an identity connection.</p><p>Applying CPCA to the predetection layer optimizes the semantic aggregation capability of the predetection feature layer. The detection head, as the key component for classification and bounding box regression, heavily relies on the quality of the preceding feature layers. However, the original feature maps often contain a large amount of redundant channels or low-response regions, which interfere with the model&#x2019;s discrimination process, especially in medical images with blurry tumor boundaries and complex backgrounds. CPCA performs adaptive channel modeling, enabling it to automatically identify and enhance significant channels related to tumors, thereby effectively improving the discriminability of features. Furthermore, this mechanism, combined with a spatial attention strategy, guides the network to focus on key areas of the image, enhancing the perceptual ability for fine-grained features such as lesion edges and structural changes. By introducing CPCA before the detection head, the model can perform inference based on more refined, robust, and contextually sensitive features.</p></sec><sec id="s2-5"><title>Progressive Hybrid Pruning Strategy</title><p>To address the dual requirements of high-precision modeling for complex brain tumor structures and efficient inference, we propose the PHPS, as summarized in <xref ref-type="other" rid="box1">Textbox 1</xref>. The PHPS first uses an L1-norm&#x2013;based channel importance evaluation to perform coarse-grained pruning, enabling the rapid removal of globally redundant feature channels. Subsequently, GroupNorm-based grouping information is incorporated to conduct fine-grained structural pruning, which corrects channel importance by considering local structural dependencies and avoids distortions caused by sole reliance on weight magnitude.</p><boxed-text id="box1"><title> Pruning processing</title><list list-type="bullet"><list-item><p>Input: pretrained model parameters W, overall pruning ratio s, pruning threshold SC, step pruning ratio M.</p></list-item><list-item><p>Output: pruned and fine-tuned model W1</p></list-item></list><list list-type="order"><list-item><p><named-content content-type="indent">&#x2003;</named-content>Initialize CPC-YOLO model.</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content>Set target sparsity s; define thresholds SC.</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content>Initialize current global sparsity <inline-formula><mml:math id="ieqn35"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>S</mml:mi><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mn>0</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>.</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content>While <inline-formula><mml:math id="ieqn36"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>S</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mi>s</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, do:</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>For each prunable layer <inline-formula><mml:math id="ieqn37"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>l</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> in the model, do:</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content>If the current layer&#x2019;s sparsity SL &#x003C; SC, then:</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Compute importance score: <inline-formula><mml:math id="ieqn38"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mi>L</mml:mi><mml:mi>I</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>.</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Sort channels in ascending order.</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Prune the lowest-ranked channels with ratio M.</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> Else:</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Compute importance score:<inline-formula><mml:math id="ieqn39"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mi>G</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>G</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:msqrt><mml:munder><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msub><mml:mi>G</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:munder><mml:mrow><mml:msubsup><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:msqrt></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>.</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Sort groups by <inline-formula><mml:math id="ieqn40"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mi>G</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>G</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> in ascending order.</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>Prune the lowest-ranked groups with ratio M.</p></list-item><list-item><p>End if</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>End for</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content>Update the global sparsity <inline-formula><mml:math id="ieqn41"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>.</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content>End while</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content>Fine-tune the pruned model on the training dataset.</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content>Return the final pruned model <inline-formula><mml:math id="ieqn42"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>.</p></list-item></list></boxed-text><p>Through this dual evaluation mechanism of global coarse-grained pruning combined with local fine-grained structural correction, the PHPS can more accurately preserve discriminative feature channels under challenging conditions such as blurred tumor boundaries and high intertumor heterogeneity. Importantly, the PHPS adopts a progressive pruning process, in which sparsity is gradually increased toward a predefined global target. Model fine-tuning is performed only once after the entire pruning process is completed, allowing effective recovery of detection performance while avoiding excessive training overhead. This design achieves a balanced optimization between model compression and precision retention.</p></sec><sec id="s2-6"><title>Interpretability</title><p>In the detection head output stage, the feature map is directly sent to the Eigen-CAM branch while being used for bounding box regression. The most representative convolution activation regions are extracted through principal component analysis. This design not only ensures the semantic consistency between interpretation and detection but also allows for reverse influence on model optimization during training. The extraction of saliency regions enables the network to focus more on real lesion boundaries and key structures, enhancing the model&#x2019;s adaptability to blurry edges and complex backgrounds. Unlike traditional Grad-CAM, which relies on backpropagation, Eigen-CAM is based on forward features, is class agnostic, lightweight, and efficient, making it more suitable for brain tumor MRI detection.</p><p>In this work, Eigen-CAM was applied to the high-level convolutional feature maps before the detection head, rather than to class-specific logits. Although Eigen-CAM is a class-agnostic visualization method that does not rely on gradients with respect to a specific category, the visualized activations are still strongly task driven. Since the detector is trained to localize tumor regions using bounding box regression and objective supervision, the learned high-level features inherently emphasize spatial regions that are most relevant to tumor detection. As a result, the generated heat maps naturally concentrate on tumor areas instead of irrelevant anatomical structures such as the skull or eyes.</p><p>For a certain convolution feature layer <inline-formula><mml:math id="ieqn43"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>F</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>H</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>W</mml:mi></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, it is first reshaped into a 2D matrix:</p><disp-formula id="E16"><label>(14)</label><mml:math id="eqn14"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:msup><mml:mi>F</mml:mi><mml:mrow><mml:mo>&#x2032;</mml:mo></mml:mrow></mml:msup><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mi mathvariant="double-struck">R</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>H</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>W</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Then the covariance matrix is calculated:</p><disp-formula id="E17"><label>(15)</label><mml:math id="eqn15"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi mathvariant="normal">&#x03A3;</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mi>F</mml:mi><mml:mrow><mml:mi mathvariant="normal">&#x2032;</mml:mi></mml:mrow></mml:msup><mml:msup><mml:mi>F</mml:mi><mml:mrow><mml:msup><mml:mi mathvariant="normal">&#x2032;</mml:mi><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Next, eigenvalue decomposition is performed on it, and taking the eigenvector <inline-formula><mml:math id="ieqn44"><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> corresponding to the largest eigenvalue, a heat map is finally generated by weighting the principal components:</p><disp-formula id="E18"><label>(16)</label><mml:math id="eqn16"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>M</mml:mi><mml:mo>=</mml:mo><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>u</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mi>c</mml:mi><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula></sec><sec id="s2-7"><title>Ethical Considerations</title><p>This study is based entirely on publicly available brain tumor MRI datasets, including Br35H [<xref ref-type="bibr" rid="ref39">39</xref>], Roboflow dataset [<xref ref-type="bibr" rid="ref40">40</xref>], and Capstone Brain Tumor dataset [<xref ref-type="bibr" rid="ref41">41</xref>]. All datasets were obtained from open-access platforms (Kaggle and Roboflow) under their respective terms of use. Each dataset contains anonymized medical images that do not include any personally identifiable information, patient metadata, or clinical identifiers. Therefore, no additional ethical approval or informed consent was required for the use of these datasets.</p><p>The datasets were used solely for academic and noncommercial research purposes, strictly following ethical research guidelines and data-sharing policies. All experimental procedures comply with the principles of the Declaration of Helsinki and institutional data protection standards.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Experimental Setup</title><p>Br35H [<xref ref-type="bibr" rid="ref39">39</xref>] was used as the main dataset. This dataset is one of the most representative and widely used public benchmarks in brain tumor MRI detection, covering multiple tumor types and MRI modalities. However, it should be noted that the specific clinical metadata regarding the exact MRI sequences (eg, T1-weighted, T2-weighted, or fluid-attenuated inversion recovery) and the specific tumor subtypes are not provided in the original dataset repository. Despite the absence of these specific labels, its high-quality annotations, imaging diversity, and clinical coverage make it a standard for performance evaluation and comparison in automatic brain tumor detection research. To verify the robustness of CDCP-YOLO, the experiments were also conducted on 2 other datasets, Roboflow [<xref ref-type="bibr" rid="ref40">40</xref>] and Capstone [<xref ref-type="bibr" rid="ref41">41</xref>]. We strictly adhered to the original dataset split, as detailed in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Dataset splits used for experiments on the Br35H, Roboflow, and Capstone brain tumor magnetic resonance imaging (MRI) datasets.<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Dataset</td><td align="left" valign="bottom">Total number of images, n</td><td align="left" valign="bottom">Training set, n</td><td align="left" valign="bottom">Validation set, n</td><td align="left" valign="bottom">Test set, n</td></tr></thead><tbody><tr><td align="left" valign="top">Br35H</td><td align="left" valign="top">801</td><td align="left" valign="top">&#x2003;500</td><td align="left" valign="top">201</td><td align="left" valign="top">&#x2003;100</td></tr><tr><td align="left" valign="top">Roboflow</td><td align="left" valign="top">300</td><td align="left" valign="top">&#x2003;210</td><td align="left" valign="top">60</td><td align="left" valign="top">&#x2003;30</td></tr><tr><td align="left" valign="top">Capstone</td><td align="left" valign="top">911</td><td align="left" valign="top">&#x2003;638</td><td align="left" valign="top">182</td><td align="left" valign="top">&#x2003;91</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>The table reports the total number of images and the corresponding training, validation, and test splits for each dataset. These official splits were strictly followed in all experiments to ensure fair evaluation and reproducibility of the reported results.</p></fn></table-wrap-foot></table-wrap><p>To enhance the robustness and generalizability of the model, data augmentation was performed during the training phase, including geometric transformations such as rotation, flipping, scaling, and shearing of MRI images, as well as random adjustments to the brightness and contrast. This strategy effectively expands the diversity of the training samples, thereby reducing the risk of model overfitting. This enhancement operation was generated in real time during each training epoch, ensuring that the model obtained &#x201C;new&#x201D; samples for training in different epochs, improving its adaptability to complex scenes and diverse tumor morphology. The specific data augmentation hyperparameters used during training included mosaic image composition (mosaic=1.0), geometric transformations (random rotation [degress=10], scaling [scale=0.5], and horizontal flipping [fliplr=0.5]), and color-space perturbations in the hue-saturation-value (HSV) domain (hsv_h=0.015, hsv_s=0.7, and hsv_v=0.4). These augmentation strategies are widely adopted in YOLO-based detection frameworks to enhance data diversity and improve model robustness against variations in object scale, orientation, and imaging conditions.</p><p>The experiment was conducted using an Intel Xeon Gold 5320 CPU @ 2.20GHz and an NVIDIA A40 48GB GPU. The training was conducted for 300 epochs, with a batch size of 16, using a stochastic gradient descent (SGD) optimizer, and a patience parameter of 50. The learning rate was set to 0.01. The software environment consisted of Ubuntu 20.04 (Canonical), CUDA 11.8 (NVIDIA), and PyTorch 2.1.0 (Meta AI Research).</p><p>After the completion of the initial full training stage, the PHPS was applied to the converged model. Specifically, the base model was first trained for 300 epochs using the standard training configuration described above. Once convergence was achieved, structured pruning was performed according to the PHPS criteria to remove redundant channels and blocks.</p><p>Following pruning, the resulting compact model was subjected to a dedicated fine-tuning stage, which was treated as an independent optimization phase and clearly distinguished from the initial training process. During this postpruning fine-tuning, the model was trained for 300 epochs under the same hardware and software environment as the initial training stage, using the SGD optimizer. The batch size was set to 8, the learning rate was fixed at 0.01, and a patience parameter of 50 was applied for early stopping.</p></sec><sec id="s3-2"><title>Evaluation Metrics</title><p>Precision is the ratio of the number of correctly predicted positive samples to the total number of the detected samples, as shown below:</p><disp-formula id="E19"><label>(17)</label><mml:math id="eqn17"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mi>P</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mi>P</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>P</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x00D7;</mml:mo><mml:mn>100</mml:mn><mml:mi mathvariant="normal">%</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Recall is the ratio of the number of correctly predicted positive samples to the number of the actual positive samples, as shown below:</p><disp-formula id="E20"><label>(18)</label><mml:math id="eqn18"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mi>P</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mi>P</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x00D7;</mml:mo><mml:mn>100</mml:mn><mml:mi mathvariant="normal">%</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Mean average precision (mAP) is the result obtained by averaging the average precision of all categories, used to measure the detection performance of the model across all categories.</p><disp-formula id="E21"><label>(19)</label><mml:math id="eqn19"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mi>m</mml:mi><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mo>&#x2211;</mml:mo><mml:mi>A</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:msub></mml:mfrac><mml:mo>&#x00D7;</mml:mo><mml:mn>100</mml:mn><mml:mi mathvariant="normal">%</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>, where mAP<sub>0.5</sub> represents the mAP at an IoU threshold of 0.5 and mAP<sub>0.5:0.95</sub> represents the average mAP over several IoU thresholds (0.50-0.95). Params represents parameter size in the model, the number of floating-point operations (&#x00D7;10&#x2079;; GFLOPs) signify computational complexity, and frames per second (FPS) were used to measure inference complexity.</p><p>SD was used to measure the variability of a given evaluation metric for the same method across multiple repeated experiments. It was computed as follows:</p><disp-formula id="E22"><label>(20)</label><mml:math id="eqn20"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mtext>S</mml:mtext><mml:mtext>D</mml:mtext><mml:mo>=</mml:mo><mml:msqrt><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfrac><mml:munderover><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>M</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mover><mml:mi>M</mml:mi><mml:mo accent="false">&#x00AF;</mml:mo></mml:mover></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mrow><mml:mtext>&#x00A0;</mml:mtext></mml:mrow></mml:msqrt></mml:mtd></mml:mtr></mml:mtable></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>, where <inline-formula><mml:math id="ieqn45"><mml:msub><mml:mrow><mml:mi>M</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> denotes the value of the evaluation metric obtained in the <inline-formula><mml:math id="ieqn46"><mml:mi>i</mml:mi></mml:math></inline-formula>th experiment, and <inline-formula><mml:math id="ieqn47"><mml:mover accent="false"><mml:mrow><mml:mi>M</mml:mi></mml:mrow><mml:mo>&#x00AF;</mml:mo></mml:mover></mml:math></inline-formula> represents the mean value of the corresponding metric over <inline-formula><mml:math id="ieqn48"><mml:mi>N</mml:mi></mml:math></inline-formula> experiments.</p></sec><sec id="s3-3"><title>Comparative Experiments</title><p>This paper compares between a variety of mainstream detection models on Br35H, including the classic YOLO series models (such as YOLOv3, YOLOv5n, YOLOv10n, and YOLOv11n); 2-stage detectors (such as Faster R-CNN and Cascade R-CNN); and a 1-stage detection method, TOOD (<xref ref-type="table" rid="table2">Table 2</xref>). To ensure a fair and unbiased comparison, all baseline models reported in <xref ref-type="table" rid="table2">Table 2</xref> were trained from scratch under a unified experimental protocol.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Performance comparison of different object detection models on the Br35H brain tumor magnetic resonance imaging dataset.<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">Precision<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom">mAP<sub>0.5<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></sub></td><td align="left" valign="bottom">mAP<sub>0.5:0.95<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></sub></td><td align="left" valign="bottom">Params (M)<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="bottom">GFLOPs<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup></td><td align="left" valign="bottom">FPS<sup><xref ref-type="table-fn" rid="table2fn7">g</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">YOLOv3-tiny</td><td align="left" valign="top">&#x2003;0.932</td><td align="left" valign="top">&#x2003;0.836</td><td align="left" valign="top">&#x2003;0.898</td><td align="left" valign="top">&#x2003;0.569</td><td align="left" valign="top">&#x2003;12.12</td><td align="left" valign="top">&#x2003;18.9</td><td align="left" valign="top">&#x2003;240.8</td></tr><tr><td align="left" valign="top">YOLOv3</td><td align="left" valign="top">&#x2003;0.926</td><td align="left" valign="top">&#x2003;0.836</td><td align="left" valign="top">&#x2003;0.861</td><td align="left" valign="top">&#x2003;0.602</td><td align="left" valign="top">&#x2003;103.67</td><td align="left" valign="top">&#x2003;282.2</td><td align="left" valign="top">&#x2003;70.2</td></tr><tr><td align="left" valign="top">YOLOv5n</td><td align="left" valign="top">&#x2003;0.891</td><td align="left" valign="top">&#x2003;0.869</td><td align="left" valign="top">&#x2003;0.938</td><td align="left" valign="top">&#x2003;0.601</td><td align="left" valign="top">&#x2003;2.50</td><td align="left" valign="top">&#x2003;7.1</td><td align="left" valign="top">&#x2003;134</td></tr><tr><td align="left" valign="top">YOLOv6n</td><td align="left" valign="top">&#x2003;0.91</td><td align="left" valign="top">&#x2003;0.844</td><td align="left" valign="top">&#x2003;0.931</td><td align="left" valign="top">&#x2003;0.578</td><td align="left" valign="top">&#x2003;4.23</td><td align="left" valign="top">&#x2003;11.8</td><td align="left" valign="top">&#x2003;145</td></tr><tr><td align="left" valign="top">YOLOv8n</td><td align="left" valign="top">&#x2003;0.928</td><td align="left" valign="top">&#x2003;0.843</td><td align="left" valign="top">&#x2003;0.914</td><td align="left" valign="top">&#x2003;0.604</td><td align="left" valign="top">&#x2003;3</td><td align="left" valign="top">&#x2003;8.1</td><td align="left" valign="top">&#x2003;144</td></tr><tr><td align="left" valign="top">TOOD</td><td align="left" valign="top">&#x2003;0.888</td><td align="left" valign="top">&#x2003;0.895</td><td align="left" valign="top">&#x2003;0.925</td><td align="left" valign="top">&#x2003;0.630</td><td align="left" valign="top">&#x2003;32.02</td><td align="left" valign="top">&#x2003;144</td><td align="left" valign="top">&#x2003;13.1</td></tr><tr><td align="left" valign="top">YOLOv10n</td><td align="left" valign="top">&#x2003;0.919</td><td align="left" valign="top">&#x2003;0.84</td><td align="left" valign="top">&#x2003;0.914</td><td align="left" valign="top">&#x2003;0.608</td><td align="left" valign="top">&#x2003;2.27</td><td align="left" valign="top">&#x2003;6.5</td><td align="left" valign="top">&#x2003;131</td></tr><tr><td align="left" valign="top">YOLOv11n</td><td align="left" valign="top">&#x2003;0.904</td><td align="left" valign="top">&#x2003;0.853</td><td align="left" valign="top">&#x2003;0.918</td><td align="left" valign="top">&#x2003;0.585</td><td align="left" valign="top">&#x2003;2.58</td><td align="left" valign="top">&#x2003;6.3</td><td align="left" valign="top">&#x2003;142.2</td></tr><tr><td align="left" valign="top">Faster R-CNN</td><td align="left" valign="top">&#x2003;0.854</td><td align="left" valign="top">&#x2003;0.836</td><td align="left" valign="top">&#x2003;0.896</td><td align="left" valign="top">&#x2003;0.578</td><td align="left" valign="top">&#x2003;41.35</td><td align="left" valign="top">&#x2003;155</td><td align="left" valign="top">&#x2003;15.2</td></tr><tr><td align="left" valign="top">Cascade R-CNN</td><td align="left" valign="top">&#x2003;0.86</td><td align="left" valign="top">&#x2003;0.895</td><td align="left" valign="top">&#x2003;0.874</td><td align="left" valign="top">&#x2003;0.584</td><td align="left" valign="top">&#x2003;69.15</td><td align="left" valign="top">&#x2003;183</td><td align="left" valign="top">&#x2003;14</td></tr><tr><td align="left" valign="top">DINO</td><td align="left" valign="top">&#x2003;0.939</td><td align="left" valign="top">&#x2003;0.75</td><td align="left" valign="top">&#x2003;0.84</td><td align="left" valign="top">&#x2003;0.563</td><td align="left" valign="top">&#x2003;47.54</td><td align="left" valign="top">&#x2003;205</td><td align="left" valign="top">&#x2003;10.5</td></tr><tr><td align="left" valign="top">RCS-YOLO</td><td align="left" valign="top">&#x2003;0.953</td><td align="left" valign="top">&#x2003;0.828</td><td align="left" valign="top">&#x2003;0.824</td><td align="left" valign="top">&#x2003;0.548</td><td align="left" valign="top">&#x2003;45.70</td><td align="left" valign="top">&#x2003;94.5</td><td align="left" valign="top">&#x2003;215.2</td></tr><tr><td align="left" valign="top">BGF-YOLO</td><td align="left" valign="top">&#x2003;0.964</td><td align="left" valign="top">&#x2003;0.885</td><td align="left" valign="top">&#x2003;0.959</td><td align="left" valign="top">&#x2003;0.648</td><td align="left" valign="top">&#x2003;84.30</td><td align="left" valign="top">&#x2003;568.9</td><td align="left" valign="top">&#x2003;32.5</td></tr><tr><td align="left" valign="top">CDC-YOLO</td><td align="left" valign="top">&#x2003;0.881</td><td align="left" valign="top">&#x2003;0.906</td><td align="left" valign="top">&#x2003;0.946</td><td align="left" valign="top">&#x2003;0.660</td><td align="left" valign="top">&#x2003;3.64</td><td align="left" valign="top">&#x2003;7</td><td align="left" valign="top">&#x2003;130</td></tr><tr><td align="left" valign="top">CDCP-YOLO</td><td align="left" valign="top">&#x2003;0.918</td><td align="left" valign="top">&#x2003;0.904</td><td align="left" valign="top">&#x2003;0.944</td><td align="left" valign="top">&#x2003;0.644</td><td align="left" valign="top">&#x2003;2.07</td><td align="left" valign="top">&#x2003;3.3</td><td align="left" valign="top">&#x2003;152</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>All models were trained and evaluated under the same experimental protocol to ensure a fair comparison. Higher values indicate better performance for accuracy and speed metrics, whereas lower values indicate better efficiency for model complexity metrics.</p></fn><fn id="table2fn2"><p><sup>b</sup>Precision, recall, mAP<sub>0.5</sub>, and mAP<sub>0.5:0.95</sub> were used to evaluate detection accuracy.</p></fn><fn id="table2fn3"><p><sup>c</sup>mAP<sub>0.5</sub>: mean average precision at an intersection-over-union threshold of 0.5.</p></fn><fn id="table2fn4"><p><sup>d</sup>mAP<sub>0.5:0.95</sub>: average mean average precision over several intersection-over-union thresholds (0.50-0.95).</p></fn><fn id="table2fn5"><p><sup>e</sup>Params (M) was used to measure model size.</p></fn><fn id="table2fn6"><p><sup>f</sup>GFLOPs: number of floating-point operations (&#x00D7;10&#x2079;); used to measure computational complexity.</p></fn><fn id="table2fn7"><p><sup>g</sup>FPS: frames per second; used to measure inference efficiency.</p></fn></table-wrap-foot></table-wrap><p>CDCP-YOLO reached 0.946 in mAP<sub>0.5</sub>, approaching CDC-YOLO&#x2019;s 0.944, significantly outperforming traditional models such as YOLOv3-tiny (0.898) and YOLOv3 (0.861). At the same time, it achieved a high score of 0.644 on the more challenging mAP<sub>0.5:0.95</sub> metric, second only to CDC-YOLO (0.660). In addition, CDCP-YOLO&#x2019;s recall (0.904) and precision (0.918) were both at a leading level, indicating its excellent detection capabilities. In terms of model complexity, the parameter count of CDCP-YOLO was only 2.07M, which is about 1/33 of that of Cascade R-CNN (69.15M) and 1/20 of that of Faster R-CNN (41.35M), and was also smaller than that of YOLOv3 (103.67M), achieving the goal of lightweight design. Its computational complexity was 3.3 GFLOPs, which was also significantly lower than that of YOLOv5n (7.1 GFLOPs) and YOLOv6n (11.8 GFLOPs), demonstrating good computational efficiency. In terms of inference speed, CDCP-YOLO reached 152 FPS, which is significantly faster than Cascade R-CNN (14 FPS), Faster R-CNN (15.2 FPS), and even some lightweight YOLO models such as YOLOv5n (134 FPS) and YOLOv10n (131 FPS), showing significant advantages in practical deployment.</p><p>As seen in <xref ref-type="fig" rid="figure5">Figure 5</xref>, CDCP-YOLO was in the optimal or near-optimal state in terms of accuracy, model size, computational complexity, and inference speed, demonstrating the advantage of balancing accuracy and efficiency. It is particularly noteworthy that CDCP-YOLO significantly surpassed YOLOv5n (0.644 vs 0.601) and was superior to TOOD (0.644 vs 0.630) on mAP<sub>0.5:0.95</sub>, while it had a much lower parameter count and computational load than these models. This indicates that the model still has stable performance at high IoU thresholds, possessing good robustness and generalizability. CDCP-YOLO achieved high detection accuracy and inference speed under the premise of maintaining a small model size and low computational overhead.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Multimetric normalization comparison of different object detection models on the Br35H brain tumor magnetic resonance imaging dataset. All evaluation metrics are normalized to the range [0, 1] to enable fair comparison across models with different scales. The compared models included YOLOv3-tiny, YOLOv3, YOLOv5n, YOLOv6n, YOLOv8n, YOLOv10n, YOLOv11n, Faster R-CNN, Cascade R-CNN, DINO, RCS-YOLO, BGF-YOLO, CDC-YOLO, and the proposed CDCP-YOLO model. The metrics include precision, recall, mAP<sub>0.5</sub>, mAP<sub>0.5:0.95</sub>, number of parameters (Params), computational complexity (GFLOPs), and inference speed (FPS). Arrows indicate optimization direction: &#x2191; denotes that higher values are better (precision, recall, mAP<sub>0.5</sub>, mAP<sub>0.5:0.95</sub>, and FPS), while &#x2193; denotes that lower values are better (Params and GFLOPs). FPS, frames per second; GFLOPs, number of floating-point operations (&#x00D7;10&#x2079;); mAP, mean average precision; mAP<sub>0.5</sub>, mAP at an intersection-over-union threshold of 0.5; mAP<sub>0.5:0.95</sub>, average mAP over several intersection-over-union thresholds (0.50-0.95); R-CNN, region-based convolutional neural network; YOLO, &#x201C;you only look once&#x201D; framework.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig05.png"/></fig></sec><sec id="s3-4"><title>Ablation Experiment</title><p>Using YOLOv11n as a baseline, the effectiveness of each module is compared (<xref ref-type="table" rid="table3">Table 3</xref>).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Ablation study results of individual modules on the Br35H brain tumor magnetic resonance imaging dataset, showing the incremental impact of the CSPP<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>, DCC<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup>, CPCA<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup>, and PHPS<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup> modules on detection performance and model efficiency.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">CSPP</td><td align="left" valign="bottom">DCC</td><td align="left" valign="bottom">CPCA</td><td align="left" valign="bottom">PHPS</td><td align="left" valign="bottom">Precision<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom">mAP<sub>0.5<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></sub></td><td align="left" valign="bottom">mAP<sub>0.5:0.95<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup></sub></td><td align="left" valign="bottom">Params (M)<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup></td><td align="left" valign="bottom">GFLOPs<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup></td><td align="left" valign="bottom">FPS<sup><xref ref-type="table-fn" rid="table3fn10">j</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">YOLOv11n</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table3fn11">k</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.904</td><td align="left" valign="top">0.853</td><td align="left" valign="top">0.918</td><td align="left" valign="top">0.585</td><td align="left" valign="top">2.58</td><td align="left" valign="top">6.3</td><td align="left" valign="top">142.2</td></tr><tr><td align="left" valign="top">C-YOLO</td><td align="left" valign="top">&#x2713;<sup><xref ref-type="table-fn" rid="table3fn12">l</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.904</td><td align="left" valign="top">0.852</td><td align="left" valign="top">0.921</td><td align="left" valign="top">0.639</td><td align="left" valign="top">2.58</td><td align="left" valign="top">6.5</td><td align="left" valign="top">140</td></tr><tr><td align="left" valign="top">CD-YOLO</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.900</td><td align="left" valign="top">0.893</td><td align="left" valign="top">0.941</td><td align="left" valign="top">0.660</td><td align="left" valign="top">3.46</td><td align="left" valign="top">6.3</td><td align="left" valign="top">133</td></tr><tr><td align="left" valign="top">CDC-YOLO</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.881</td><td align="left" valign="top">0.906</td><td align="left" valign="top">0.946</td><td align="left" valign="top">0.660</td><td align="left" valign="top">3.64</td><td align="left" valign="top">7.0</td><td align="left" valign="top">130</td></tr><tr><td align="left" valign="top">CDCP-YOLO</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">&#x2713;</td><td align="left" valign="top">0.918</td><td align="left" valign="top">0.904</td><td align="left" valign="top">0.944</td><td align="left" valign="top">0.644</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">152</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>CSPP: convolution Prewitt-and-pooling&#x2013;based preprocessing.</p></fn><fn id="table3fn2"><p><sup>b</sup>DCC: dynamic convolution&#x2013;based C3k2.</p></fn><fn id="table3fn3"><p><sup>c</sup>CPCA: channel prior convolutional attention.</p></fn><fn id="table3fn4"><p><sup>d</sup>PHPS: progressive hybrid pruning strategy.</p></fn><fn id="table3fn5"><p><sup>e</sup>Precision, recall, mAP<sub>0.5</sub>, and mAP<sub>0.5:0.95</sub> were used to evaluate detection accuracy.</p></fn><fn id="table3fn6"><p><sup>f</sup>mAP<sub>0.5</sub>: mean average precision at an intersection-over-union threshold of 0.5.</p></fn><fn id="table3fn7"><p><sup>g</sup>mAP<sub>0.5:0.95</sub>: average mean average precision over several intersection-over-union thresholds (0.50-0.95).</p></fn><fn id="table3fn8"><p><sup>h</sup>Params (M) was used to measure model size.</p></fn><fn id="table3fn9"><p><sup>i</sup>GFLOPs: number of floating-point operations (&#x00D7;10&#x2079;); used to measure computational complexity.</p></fn><fn id="table3fn10"><p><sup>j</sup>FPS: frames per second; used to measure inference efficiency.</p></fn><fn id="table3fn11"><p><sup>k</sup>Indicates that the corresponding module is absent in the model.</p></fn><fn id="table3fn12"><p><sup>l</sup>A check mark (&#x2713;) indicates that the corresponding module is included in the model.</p></fn></table-wrap-foot></table-wrap><p>The mAP<sub>0.5</sub> of the original YOLOv11n was 0.918, mAP<sub>0.5:0.95</sub> was 0.585, parameter size was 2.58M, computational complexity was 6.3 GFLOPs, and inference speed was 142.2 FPS. After adding the CSPP module, the mAP<sub>0.5</sub> of C-YOLO slightly increased to 0.921 and its mAP<sub>0.5:0.95</sub> significantly increased to 0.639, while the number of parameters and computational complexity remained unchanged, indicating that the CSPP structure has a good effect on improving the model&#x2019;s ability to capture fine-grained features. After introducing the DCC module, the detection accuracy was further improved: the mAP<sub>0.5</sub> of CD-YOLO increased to 0.941, and the mAP<sub>0.5:0.95</sub> increased to 0.660. Although the parameter size slightly increased to 3.46M, computational complexity remained at 6.3 GFLOPs, and inference speed dropped to 133 FPS, the model&#x2019;s improvement in accuracy effectively enhanced its contextual modeling capability. Subsequently, with the addition of the CPCA module to obtain CDC-YOLO, the mAP<sub>0.5</sub> reached 0.946 (the highest value), and the mAP<sub>0.5:0.95</sub> remained unchanged at 0.660. Although parameter size slightly increased to 3.64M and inference speed dropped to 130 FPS, CDC-YOLO still performed outstandingly in terms of detection accuracy, further verifying the performance improvement effect of CPCA. On the basis of CDC-YOLO, PHPS was introduced to obtain CDCP-YOLO, which achieved a rebalance of model lightweighting and performance. This model reached 0.944 in mAP<sub>0.5</sub> and 0.644 in mAP<sub>0.5:0.95</sub>, with only 2.07M parameters and computation complexity of 3.3 GFLOPs. The inference speed significantly increased to 152 FPS. This result shows that the PHPS basically retains high performance while compressing the model size.</p><p>As <xref ref-type="fig" rid="figure6">Figure 6</xref> shows, CDCP-YOLO achieved a good balance in terms of accuracy, model size, computational load, and speed, showing a high degree of practicality and deployment advantages. Compared with other improved models, CDCP-YOLO had an absolute advantage in the 2 dimensions &#x201C;fewest parameters&#x201D; and &#x201C;fastest inference,&#x201D; while also maintaining excellent performance in detection accuracy.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Ablation study results of the proposed CDCP-YOLO on the Br35H brain tumor magnetic resonance imaging dataset. The figure on the left shows the multimetric normalization bar chart, where all evaluation metrics are normalized to the range [0, 1] for fair comparison across models. The metrics include precision, recall, mAP<sub>0.5</sub>, mAP<sub>0.5:0.95</sub>, number of parameters (Params), computational complexity (GFLOPs), and inference speed (FPS). The figure on the right presents a radar-based performance analysis plot summarizing the overall accuracy-efficiency trade-off of different ablation variants (YOLOv11n, C-YOLO, CD-YOLO, CDC-YOLO, and CDCP-YOLO). Arrows indicate metric preference: &#x2191; denotes that higher values are better (precision, recall, mAP<sub>0.5</sub>, mAP<sub>0.5:0.95</sub>, and FPS), while &#x2193; denotes that lower values are better (Params and GFLOPs). This figure demonstrates how each module (convolution Prewitt-and-pooling&#x2013;based preprocessing [CSPP], dynamic convolution&#x2013;based C3k2 [DCC], channel prior convolutional attention [CPCA], and progressive hybrid pruning strategy [PHPS]) progressively improves detection accuracy while reducing model complexity. FPS, frames per second; GFLOPs, number of floating-point operations (&#x00D7;10&#x2079;); mAP, mean average precision; mAP<sub>0.5</sub>, mAP at an intersection-over-union threshold of 0.5; mAP<sub>0.5:0.95</sub>, average mAP over several intersection-over-union thresholds (0.50-0.95); YOLO, &#x201C;you only look once&#x201D; framework.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig06.png"/></fig></sec><sec id="s3-5"><title>Performance on Different Datasets</title><p>To comprehensively evaluate the performance of CDCP-YOLO, the performance of YOLOv11, CDC-YOLO, and CDCP-YOLO was compared on different datasets (<xref ref-type="table" rid="table4">Table 4</xref>).</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Performance comparison of YOLOv11, CDC-YOLO, and CDCP-YOLO on 3 brain tumor magnetic resonance imaging datasets: Br35H, Roboflow, and Capstone.<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Dataset and model</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom">mAP<sub>0.5<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></sub></td><td align="left" valign="bottom">mAP<sub>0.5:0.95<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></sub></td><td align="left" valign="bottom">Params (M)<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="left" valign="bottom">GFLOPs<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="left" valign="bottom">FPS<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Br35H</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>YOLOv11</td><td align="left" valign="top">0.904</td><td align="left" valign="top">0.853</td><td align="left" valign="top">0.918</td><td align="left" valign="top">0.585</td><td align="left" valign="top">2.58</td><td align="left" valign="top">6.3</td><td align="left" valign="top">142.2</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CDC-YOLO</td><td align="left" valign="top">0.881</td><td align="left" valign="top">0.906</td><td align="left" valign="top">0.946</td><td align="left" valign="top">0.660</td><td align="left" valign="top">3.64</td><td align="left" valign="top">7.0</td><td align="left" valign="top">130</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CDCP-YOLO</td><td align="left" valign="top">0.918</td><td align="left" valign="top">0.904</td><td align="left" valign="top">0.944</td><td align="left" valign="top">0.644</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">152</td></tr><tr><td align="left" valign="top">Roboflow</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>YOLOv11</td><td align="left" valign="top">0.615</td><td align="left" valign="top">0.467</td><td align="left" valign="top">0.561</td><td align="left" valign="top">0.276</td><td align="left" valign="top">2.58</td><td align="left" valign="top">6.3</td><td align="left" valign="top">117.6</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CDC-YOLO</td><td align="left" valign="top">0.706</td><td align="left" valign="top">0.716</td><td align="left" valign="top">0.77</td><td align="left" valign="top">0.361</td><td align="left" valign="top">3.64</td><td align="left" valign="top">7.0</td><td align="left" valign="top">115</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CDCP-YOLO</td><td align="left" valign="top">0.699</td><td align="left" valign="top">0.729</td><td align="left" valign="top">0.756</td><td align="left" valign="top">0.353</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">130.5</td></tr><tr><td align="left" valign="top">Capstone</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>YOLOv11</td><td align="left" valign="top">0.768</td><td align="left" valign="top">0.806</td><td align="left" valign="top">0.840</td><td align="left" valign="top">0.567</td><td align="left" valign="top">2.58</td><td align="left" valign="top">6.3</td><td align="left" valign="top">132.2</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CDC-YOLO</td><td align="left" valign="top">0.877</td><td align="left" valign="top">0.828</td><td align="left" valign="top">0.876</td><td align="left" valign="top">0.625</td><td align="left" valign="top">3.64</td><td align="left" valign="top">7.0</td><td align="left" valign="top">130.3</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CDCP-YOLO</td><td align="left" valign="top">0.897</td><td align="left" valign="top">0.845</td><td align="left" valign="top">0.909</td><td align="left" valign="top">0.625</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">150</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>The table reports detection accuracy metrics (precision, recall, mAP<sub>0.5</sub>, and mAP<sub>0.5:0.95</sub>) and efficiency metrics (Params, GFLOPs, and FPS) to evaluate both effectiveness and deployability across different datasets.</p></fn><fn id="table4fn2"><p><sup>b</sup>mAP<sub>0.5</sub>: mean average precision at an intersection-over-union threshold of 0.5.</p></fn><fn id="table4fn3"><p><sup>c</sup>mAP<sub>0.5:0.95</sub>: average mean average precision over several intersection-over-union thresholds (0.50-0.95).</p></fn><fn id="table4fn4"><p><sup>d</sup>Params (M) was used to measure model size.</p></fn><fn id="table4fn5"><p><sup>e</sup>GFLOPs: number of floating-point operations (&#x00D7;10&#x2079;).</p></fn><fn id="table4fn6"><p><sup>f</sup>FPS: frames per second.</p></fn></table-wrap-foot></table-wrap><p>On Br35H, CDCP-YOLO was superior to other models in terms of precision (0.918) and inference speed (152 FPS). Although its mAP<sub>0.5</sub> was slightly lower than that of CDC-YOLO (0.944 vs 0.946), the outstanding performance of CDCP-YOLO in terms of parameter size (2.07M) and computational complexity (3.3 GFLOPs) makes it more suitable for practical application scenarios with limited resources. In addition, the mAP<sub>0.5:0.95</sub> metric showed that it achieved a good trade-off between performance and computational efficiency. The Roboflow dataset represented a more challenging general-purpose detection scenario, resulting in lower overall detection accuracy than that achieved on the Br35H dataset. CDC-YOLO achieved the highest mAP<sub>0.5</sub> (0.770) and mAP<sub>0.5:0.95</sub> (0.361), while CDCP-YOLO performed best in recall (0.729), and it had a higher inference speed (130.5 FPS) than CDC-YOLO (115 FPS). The overall performance of YOLOv11 was the weakest, especially with obvious shortcomings in recall (0.467) and mAP<sub>0.5:0.95</sub> (0.276), indicating that the original model lacked sufficient generalizability on this dataset. On Capstone, CDCP-YOLO once again demonstrated the best overall performance. It also showed the highest precision (0.897), mAP<sub>0.5</sub> (0.909), and FPS (150), indicating that the model has stronger stability and efficiency in actual inference tasks. Although CDC-YOLO and CDCP-YOLO were comparable in mAP<sub>0.5:0.95</sub> (both 0.625), CDCP-YOLO demonstrated higher comprehensive advantages by virtue of its smaller model size and faster speed.</p><p>As seen in <xref ref-type="fig" rid="figure7">Figure 7</xref>, CDCP-YOLO performed outstandingly in terms of &#x201C;fewest parameters,&#x201D; &#x201C;lowest computational load,&#x201D; and &#x201C;fastest FPS,&#x201D; while also maintaining a leading position in detection accuracy metrics, demonstrating an extremely high balance and deployment advantage. YOLOv11, although fast in some scenarios, has lower precision and recall. CDC-YOLO had a slight advantage in precision, but its computational complexity and parameter size were higher.</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Multimetric normalization analysis of CDCP-YOLO and baseline models on 3 brain tumor magnetic resonance imaging datasets: Br35H, Roboflow and Capstone. Each graph corresponds to 1 dataset, showing the normalized performance of YOLOv11, CDC-YOLO, and CDCP-YOLO across multiple evaluation metrics. All metrics are normalized to the range [0, 1] to enable fair comparison across datasets with different scales. The evaluated metrics include precision, recall, mAP<sub>0.5</sub>, mAP<sub>0.5:0.95</sub>, number of parameters (Params), computational complexity (GFLOPs), and inference speed (FPS). Arrows indicate the optimization direction: &#x2191; denotes that higher values are better (precision, recall, mAP<sub>0.5</sub>, mAP<sub>0.5:0.95</sub>, and FPS), while &#x2193; denotes that lower values are better (Params and GFLOPs). FPS, frames per second; GFLOPs, number of floating-point operations (&#x00D7;10&#x2079;); mAP, mean average precision; mAP<sub>0.5</sub>, mAP at an intersection-over-union threshold of 0.5; mAP<sub>0.5:0.95</sub>, average mAP over several intersection-over-union thresholds (0.50-0.95); YOLO, &#x201C;you only look once&#x201D; framework.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig07.png"/></fig></sec><sec id="s3-6"><title>Impact of Different Attention Mechanisms</title><p>This paper introduces these modules including MixStructureBlock [<xref ref-type="bibr" rid="ref42">42</xref>], MSCAttention [<xref ref-type="bibr" rid="ref43">43</xref>], MSPABlock [<xref ref-type="bibr" rid="ref44">44</xref>], and CPCA for comparing different attention mechanisms (<xref ref-type="table" rid="table5">Table 5</xref>).</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Comparative analysis of different attention mechanisms on the Br35H brain tumor magnetic resonance imaging dataset.<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Attention mechanism</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom">mAP<sub>0.5<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></sub></td><td align="left" valign="bottom">mAP<sub>0.5:0.95<sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup></sub></td><td align="left" valign="bottom">Params (M)<sup><xref ref-type="table-fn" rid="table5fn5">e</xref></sup></td><td align="left" valign="bottom">GFLOPs<sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></td><td align="left" valign="bottom">FPS<sup><xref ref-type="table-fn" rid="table5fn6">f</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">+MixStructureBlock</td><td align="left" valign="top">0.827</td><td align="left" valign="top">0.665</td><td align="left" valign="top">0.754</td><td align="left" valign="top">0.354</td><td align="left" valign="top">67.90</td><td align="left" valign="top">11</td><td align="left" valign="top">120</td></tr><tr><td align="left" valign="top">+MSCAttention</td><td align="left" valign="top">0.891</td><td align="left" valign="top">0.877</td><td align="left" valign="top">0.933</td><td align="left" valign="top">0.624</td><td align="left" valign="top">2.00</td><td align="left" valign="top">3.3</td><td align="left" valign="top">117.6</td></tr><tr><td align="left" valign="top">+MSPABlock</td><td align="left" valign="top">0.883</td><td align="left" valign="top">0.863</td><td align="left" valign="top">0.924</td><td align="left" valign="top">0.637</td><td align="left" valign="top">2.50</td><td align="left" valign="top">3.7</td><td align="left" valign="top">110</td></tr><tr><td align="left" valign="top">+CPCA<sup><xref ref-type="table-fn" rid="table5fn7">g</xref></sup></td><td align="left" valign="top">0.918</td><td align="left" valign="top">0.904</td><td align="left" valign="top">0.944</td><td align="left" valign="top">0.644</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">152</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>The table reports detection accuracy metrics (precision, recall, mAP<sub>0.5</sub>, and mAP<sub>0.5:0.95</sub>) and efficiency metrics (Params, GFLOPs, and FPS) to evaluate the effectiveness and computational cost of different attention modules.</p></fn><fn id="table5fn2"><p><sup>b</sup>mAP<sub>0.5</sub>: mean average precision at an intersection-over-union threshold of 0.5.</p></fn><fn id="table5fn3"><p><sup>c</sup>mAP<sub>0.5:0.95</sub>: average mean average precision over several intersection-over-union thresholds (0.50-0.95).</p></fn><fn id="table5fn4"><p><sup>d</sup>GFLOPs: number of floating-point operations (&#x00D7;10&#x2079;).</p></fn><fn id="table5fn5"><p><sup>e</sup>Params (M) was used to measure model size.</p></fn><fn id="table5fn6"><p><sup>f</sup>FPS: frames per second.</p></fn><fn id="table5fn7"><p><sup>g</sup>CPCA: channel prior convolutional attention.</p></fn></table-wrap-foot></table-wrap><p>As <xref ref-type="fig" rid="figure8">Figure 8</xref> shows, CPCA performed excellently in terms of accuracy, efficiency, and resource consumption, making it the most cost-effective attention choice. In contrast, although MixStructureBlock has a complex structure, it was in a disadvantageous position in all performance indicators, indicating that the redundancy in its design was not converted into performance gains.</p><fig position="float" id="figure8"><label>Figure 8.</label><caption><p>Comparative analysis of different attention mechanisms on the Br35H brain tumor magnetic resonance imaging dataset. The subfigure on the right shows the multimetric normalization bar chart, where all evaluation metrics are normalized to the range [0,1] for fair comparison among different attention modules. The compared attention mechanisms include MixStructureBlock, MSCAttention, MSPABlock, and the proposed channel prior convolutional attention (CPCA) module. The evaluated metrics include precision, recall, mAP<sub>0.5</sub>, mAP<sub>0.5:0.95</sub>, number of parameters (Params), computational complexity (GFLOPs), and inference speed (FPS). The subfigure on the right presents a radar-based performance analysis plot, summarizing the overall accuracy-efficiency trade-offs of different attention mechanisms. Arrows indicate metric preference: &#x2191; denotes that higher values are better (precision, recall, mAP<sub>0.5</sub>, mAP<sub>0.5:0.95</sub>, and FPS), while &#x2193; denotes that lower values are better (Params and GFLOPs). FPS, frames per second; GFLOPs, number of floating-point operations (&#x00D7;10&#x2079;); mAP<sub>0.5</sub>, mean average precision at an intersection-over-union threshold of 0.5; mAP<sub>0.5:0.95</sub>, average mean average precision over several intersection-over-union thresholds (0.50-0.95).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig08.png"/></fig></sec><sec id="s3-7"><title>Impact of Different Loss Functions</title><p>Comparative experiments were conducted by introducing loss functions such as GIoU [<xref ref-type="bibr" rid="ref45">45</xref>], DIoU [<xref ref-type="bibr" rid="ref46">46</xref>], EIoU [<xref ref-type="bibr" rid="ref47">47</xref>], SIoU, ShapeIoU, PIoU, WIoU, and CIoU (<xref ref-type="table" rid="table6">Table 6</xref>).</p><table-wrap id="t6" position="float"><label>Table 6.</label><caption><p>Performance comparison of different bounding box regression loss functions on the Br35H brain tumor magnetic resonance imaging dataset.<sup><xref ref-type="table-fn" rid="table6fn1">a</xref></sup></p></caption><table id="table6" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Loss function</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom">mAP<sub>0.5<sup><xref ref-type="table-fn" rid="table6fn2">b</xref></sup></sub></td><td align="left" valign="bottom">mAP<sub>0.5:0.95<sup><xref ref-type="table-fn" rid="table6fn3">c</xref></sup></sub></td><td align="left" valign="bottom">Params (M)<sup><xref ref-type="table-fn" rid="table6fn4">d</xref></sup></td><td align="left" valign="bottom">GFLOPs<sup><xref ref-type="table-fn" rid="table6fn5">e</xref></sup></td><td align="left" valign="bottom">FPS<sup><xref ref-type="table-fn" rid="table6fn6">f</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">GIoU</td><td align="left" valign="top">0.893</td><td align="left" valign="top">0.820</td><td align="left" valign="top">0.926</td><td align="left" valign="top">0.635</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">150</td></tr><tr><td align="left" valign="top">DIoU</td><td align="left" valign="top">0.880</td><td align="left" valign="top">0.877</td><td align="left" valign="top">0.933</td><td align="left" valign="top">0.629</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">146.2</td></tr><tr><td align="left" valign="top">EIoU</td><td align="left" valign="top">0.867</td><td align="left" valign="top">0.787</td><td align="left" valign="top">0.875</td><td align="left" valign="top">0.546</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">126.1</td></tr><tr><td align="left" valign="top">SIoU</td><td align="left" valign="top">0.948</td><td align="left" valign="top">0.753</td><td align="left" valign="top">0.872</td><td align="left" valign="top">0.564</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">145</td></tr><tr><td align="left" valign="top">ShapeIoU</td><td align="left" valign="top">0.945</td><td align="left" valign="top">0.842</td><td align="left" valign="top">0.930</td><td align="left" valign="top">0.642</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">136.1</td></tr><tr><td align="left" valign="top">PIoU</td><td align="left" valign="top">0.890</td><td align="left" valign="top">0.885</td><td align="left" valign="top">0.923</td><td align="left" valign="top">0.646</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">124.8</td></tr><tr><td align="left" valign="top">WIoU</td><td align="left" valign="top">0.900</td><td align="left" valign="top">0.889</td><td align="left" valign="top">0.939</td><td align="left" valign="top">0.643</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">143</td></tr><tr><td align="left" valign="top">CIoU</td><td align="left" valign="top">0.899</td><td align="left" valign="top">0.873</td><td align="left" valign="top">0.944</td><td align="left" valign="top">0.644</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">152</td></tr></tbody></table><table-wrap-foot><fn id="table6fn1"><p><sup>a</sup>The table reports detection accuracy metrics (precision, recall, mAP<sub>0.5</sub>, and mAP<sub>0.5:0.95</sub>) and efficiency metrics (Params, GFLOPs, and FPS) to evaluate the impact of different loss functions on detection performance and computational efficiency.</p></fn><fn id="table6fn2"><p><sup>b</sup>mAP<sub>0.5</sub>: mean average precision at an intersection-over-union threshold of 0.5.</p></fn><fn id="table6fn3"><p><sup>c</sup>mAP<sub>0.5:0.95</sub>: average mean average precision over several intersection-over-union thresholds (0.50-0.95).</p></fn><fn id="table6fn4"><p><sup>d</sup>Params (M) was used to measure model size.</p></fn><fn id="table6fn5"><p><sup>e</sup>GFLOPs: number of floating-point operations (&#x00D7;10&#x2079;).</p></fn><fn id="table6fn6"><p><sup>f</sup>FPS: frames per second.</p></fn></table-wrap-foot></table-wrap><p>As <xref ref-type="fig" rid="figure9">Figure 9</xref> shows, CIoU, as a new generation of bounding box regression loss function, significantly improved the accuracy and stability of target detection under the premise of ensuring model lightweighting, making it the most practical and valuable choice at present. At the same time, ShapeIoU and PIoU also had advantages in scenarios with high precision requirements, while WIoU provided good recall and generalization performance.</p><fig position="float" id="figure9"><label>Figure 9.</label><caption><p>Multimetric normalization comparison of different bounding box regression loss functions on the Br35H brain tumor magnetic resonance imaging dataset. All evaluation metrics are normalized to the range [0,1] to enable fair comparison among different loss functions. The compared loss functions include GIoU, DIoU, EIoU, SIoU, ShapeIoU, PIoU, WIoU, and CIoU. The evaluated metrics include precision, recall, mAP<sub>0.5</sub>, mAP<sub>0.5:0.95</sub>, number of parameters (Params), computational complexity (GFLOPs), and inference speed (FPS). Arrows indicate optimization direction: &#x2191; denotes that higher values are better (precision, recall, mAP<sub>0.5</sub>, mAP<sub>0.5:0.95</sub>, and FPS), while &#x2193; denotes that lower values are better (Params and GFLOPs). FPS, frames per second; GFLOPs, number of floating-point operations (&#x00D7;10&#x2079;); mAP, mean average precision; mAP<sub>0.5</sub>, mAP at an intersection-over-union threshold of 0.5; mAP<sub>0.5:0.95</sub>, average mAP over several intersection-over-union thresholds (0.50-0.95).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig09.png"/></fig></sec><sec id="s3-8"><title>Pruning Experiment</title><p>Five common pruning methods were systematically evaluated, including LAMP, L1, Random, GroupNorm, and PHPS (<xref ref-type="table" rid="table7">Table 7</xref>). All methods were kept consistent in terms of the number of parameters (2.07M) and computational complexity (3.3 GFLOPs) to facilitate a fair comparison of their performance differences.</p><table-wrap id="t7" position="float"><label>Table 7.</label><caption><p>Performance comparison of different pruning strategies on the Br35H brain tumor magnetic resonance imaging dataset under the same parameter budget.<sup><xref ref-type="table-fn" rid="table7fn1">a</xref></sup></p></caption><table id="table7" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Pruning method</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom">mAP<sub>0.5<sup><xref ref-type="table-fn" rid="table7fn2">b</xref></sup></sub></td><td align="left" valign="bottom">mAP<sub>0.5:0.95<sup><xref ref-type="table-fn" rid="table7fn3">c</xref></sup></sub></td><td align="left" valign="bottom">Params<sup><xref ref-type="table-fn" rid="table7fn4">d</xref></sup></td><td align="left" valign="bottom">GFLOPs<sup><xref ref-type="table-fn" rid="table7fn5">e</xref></sup></td><td align="left" valign="bottom">FPS<sup><xref ref-type="table-fn" rid="table7fn6">f</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Lamp</td><td align="left" valign="top">0.858</td><td align="left" valign="top">0.891</td><td align="left" valign="top">0.941</td><td align="left" valign="top">0.640</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">137.1</td></tr><tr><td align="left" valign="top">L1</td><td align="left" valign="top">0.893</td><td align="left" valign="top">0.902</td><td align="left" valign="top">0.934</td><td align="left" valign="top">0.640</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">147</td></tr><tr><td align="left" valign="top">Random</td><td align="left" valign="top">0.866</td><td align="left" valign="top">0.902</td><td align="left" valign="top">0.940</td><td align="left" valign="top">0.637</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">150</td></tr><tr><td align="left" valign="top">GroupNorm</td><td align="left" valign="top">0.909</td><td align="left" valign="top">0.861</td><td align="left" valign="top">0.944</td><td align="left" valign="top">0.632</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">146.5</td></tr><tr><td align="left" valign="top">PHPS<sup><xref ref-type="table-fn" rid="table7fn7">g</xref></sup></td><td align="left" valign="top">0.899</td><td align="left" valign="top">0.873</td><td align="left" valign="top">0.944</td><td align="left" valign="top">0.644</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">152</td></tr></tbody></table><table-wrap-foot><fn id="table7fn1"><p><sup>a</sup>The table reports detection accuracy metrics (precision, recall, mAP<sub>0.5</sub>, and mAP<sub>0.5:0.95</sub>) and efficiency metrics (Params, GFLOPs, and FPS) to evaluate the impact of different pruning methods on detection performance and inference efficiency.</p></fn><fn id="table7fn2"><p><sup>b</sup>mAP<sub>0.5</sub>: mean average precision at an intersection-over-union threshold of 0.5.</p></fn><fn id="table7fn3"><p><sup>c</sup>mAP<sub>0.5:0.95</sub>: average mean average precision over several intersection-over-union thresholds (0.50-0.95).</p></fn><fn id="table7fn4"><p><sup>d</sup>Params (M) was used to measure model size.</p></fn><fn id="table7fn5"><p><sup>e</sup>GFLOPs: number of floating-point operations (&#x00D7;10&#x2079;).</p></fn><fn id="table7fn6"><p><sup>f</sup>FPS: frames per second.</p></fn><fn id="table7fn7"><p><sup>g</sup>PHPS: progressive hybrid pruning strategy.</p></fn></table-wrap-foot></table-wrap><p>The performance of PHPS was balanced and optimal across all performance indicators. The mAP<sub>0.5</sub> was the same as the GroupNorm method at 0.944, but PHPS reached the highest value of 0.644 on mAP<sub>0.5:0.95</sub> and also had the fastest inference speed (152 FPS), showing the best balance of precision and speed. GroupNorm had an advantage in precision (0.909), but recall rate was low (0.861), and mAP<sub>0.5:0.95</sub> was not as good as that of PHPS, indicating that its strength in boundary fitting may affect the completeness of target detection. Among the other methods, L1 had a slight advantage in recall (0.902), and the overall precision was stable. Random pruning is simple and has not undergone structural optimization, but it performed well in terms of inference speed (150 FPS). LAMP was inferior in all indicators, especially precision (0.858) and mAP<sub>0.5:0.95</sub> (0.640), indicating that its pruning method failed to effectively retain key features, weakening the model&#x2019;s detection ability.</p><p><xref ref-type="fig" rid="figure10">Figure 10</xref> shows that PHPS was in the outermost layer in dimensions such as &#x201C;precision,&#x201D; &#x201C;recall,&#x201D; &#x201C;mAP&#x201D; and &#x201C;inference speed,&#x201D; representing that it scored the highest on each metric and had extremely strong practicality and deployment advantages. In contrast, the normalized evaluation of LAMP and GroupNorm showed obvious shortcomings, with deficiencies in detection accuracy and speed, respectively, making it difficult to meet the requirements of high-performance real-time tasks.</p><fig position="float" id="figure10"><label>Figure 10.</label><caption><p>Comparative analysis of different pruning strategies on the Br35H brain tumor magnetic resonance imaging dataset under the same parameter budget. The left subfigure shows the multimetric normalization bar chart, where all evaluation metrics are normalized to the range [0,1] for fair comparison. The compared pruning methods include LAMP, L1-norm pruning, random pruning, GroupNorm-based pruning, and the proposed progressive hybrid pruning strategy (PHPS). The evaluated metrics include precision, recall, mAP<sub>0.5</sub>, mAP<sub>0.5:0.95</sub>, number of parameters (Params), computational complexity (GFLOPs), and inference speed (FPS). The right subfigure presents a radar-based performance analysis plot summarizing the overall accuracy-efficiency trade-offs of different pruning strategies. Arrows indicate optimization direction: &#x2191; denotes that higher values are better (precision, recall, mAP<sub>0.5</sub>, mAP<sub>0.5:0.95</sub>, and FPS), while &#x2193; denotes that lower values are better (Params and GFLOPs). FPS, frames per second; GFLOPs, number of floating-point operations (&#x00D7;10&#x2079;); mAP, mean average precision; mAP<sub>0.5</sub>, mAP at an intersection-over-union threshold of 0.5; mAP<sub>0.5:0.95</sub>, average mAP over several intersection-over-union thresholds (0.50-0.95).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig10.png"/></fig><p>To assess robustness, we fixed the same fully trained baseline checkpoint and repeated the pruning procedure 5 times with different random seeds. For each run, we pruned the model to the same parameter budget as that reported in <xref ref-type="table" rid="table7">Table 7</xref> and performed a single final fine-tuning stage. These results demonstrated that, although the mean mAP gap between PHPS and random pruning was limited, PHPS provided significantly improved robustness and stability, which is particularly important for reliable deployment in medical imaging applications (<xref ref-type="table" rid="table8">Table 8</xref>).</p><table-wrap id="t8" position="float"><label>Table 8.</label><caption><p>Stability comparison of different pruning strategies under the same parameter budget on the Br35H brain tumor magnetic resonance imaging dataset.<sup><xref ref-type="table-fn" rid="table8fn1">a</xref></sup></p></caption><table id="table8" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Pruning method</td><td align="left" valign="bottom">SD (mAP<sub>0.5<sup><xref ref-type="table-fn" rid="table8fn2">b</xref></sup></sub>) &#x00D7;10<sup>-3</sup></td><td align="left" valign="bottom">SD (mAP<sub>0.5:0.95<sup><xref ref-type="table-fn" rid="table8fn3">c</xref></sup></sub>) &#x00D7;10<sup>-3</sup></td></tr></thead><tbody><tr><td align="left" valign="top">Lamp</td><td align="left" valign="top">3.35</td><td align="left" valign="top">5.66</td></tr><tr><td align="left" valign="top">L1</td><td align="left" valign="top">1.41</td><td align="left" valign="top">3.54</td></tr><tr><td align="left" valign="top">Random</td><td align="left" valign="top">7.07</td><td align="left" valign="top">2.12</td></tr><tr><td align="left" valign="top">GroupNorm</td><td align="left" valign="top">2.83</td><td align="left" valign="top">1.42</td></tr><tr><td align="left" valign="top">PHPS<sup><xref ref-type="table-fn" rid="table8fn4">d</xref></sup></td><td align="left" valign="top">0.71</td><td align="left" valign="top">0.71</td></tr></tbody></table><table-wrap-foot><fn id="table8fn1"><p><sup>a</sup>The table reports the SD of mAP<sub>0.5</sub> and mAP<sub>0.5:0.95</sub> (scaled by 10&#x207B;&#x00B3;) across multiple training runs, which reflects the robustness and training stability of each pruning method.</p></fn><fn id="table8fn2"><p><sup>b</sup>mAP<sub>0.5</sub>: mean average precision at an intersection-over-union threshold of 0.5.</p></fn><fn id="table8fn3"><p><sup>c</sup>mAP<sub>0.5:0.95</sub>: average mean average precision over several intersection-over-union thresholds (0.50-0.95).</p></fn><fn id="table8fn4"><p><sup>d</sup>PHPS: progressive hybrid pruning strategy.</p></fn></table-wrap-foot></table-wrap><p>Although different pruning strategies vary in their effectiveness, they also differ in practical implementation costs. Unstructured pruning often introduces limited pruning time overhead but requires sparse inference support, whereas structured pruning incurs additional pruning and fine-tuning costs during training while enabling direct acceleration at inference. The proposed PHPS performs pruning offline during training and does not introduce extra computational overhead at inference, making it suitable for deployment without modifying existing inference pipelines.</p></sec><sec id="s3-9"><title>Five-Fold Cross-Validation</title><p>To further evaluate the statistical reliability of the performance gain between CDCP-YOLO and YOLOv11, we conducted a 5-fold cross-validation. The dataset was partitioned into 5 mutually exclusive folds. In each fold, 4 folds were used for training and the remaining one for testing, ensuring that each sample was evaluated exactly once.</p><p>All models were trained from scratch under identical training settings for each fold. The final performance was reported as the mean (SD) across the 5 folds.</p><p>The mean (SD) values of precision, recall, mAP<sub>0.5</sub>, and mAP<sub>0.5:0.95</sub> across the 5 folds are reported in <xref ref-type="table" rid="table9">Table 9</xref>. The results showed that CDCP-YOLO consistently outperformed YOLOv11 across different folds, with lower variance and more stable performance, confirming that the observed improvement was statistically reliable and reproducible, rather than an artifact of random initialization.</p><table-wrap id="t9" position="float"><label>Table 9.</label><caption><p>Five-fold cross-validation results of YOLOv11 and CDCP-YOLO on the Br35H brain tumor magnetic resonance imaging dataset.<sup><xref ref-type="table-fn" rid="table9fn1">a</xref></sup></p></caption><table id="table9" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">Precision, mean (SD &#x00D7;10<sup>-3</sup>)</td><td align="left" valign="bottom">Recall, mean (SD &#x00D7;10<sup>-3</sup>)</td><td align="left" valign="bottom">mAP<sub>0.5</sub>, mean (SD &#x00D7;10<sup>-3</sup>)</td><td align="left" valign="bottom">mAP<sub>0.5:0.95</sub>, mean (SD &#x00D7;10<sup>-3</sup>)</td></tr></thead><tbody><tr><td align="left" valign="top">YOLOv11</td><td align="left" valign="top">0.907 (17.880)</td><td align="left" valign="top">0.854 (2.492)</td><td align="left" valign="top">0.920 (3.030)</td><td align="left" valign="top">0.588 (11.500)</td></tr><tr><td align="left" valign="top">CDCP-YOLO</td><td align="left" valign="top">0.918 (1.410)</td><td align="left" valign="top">0.906 (2.610)</td><td align="left" valign="top">0.943 (2.590)</td><td align="left" valign="top">0.644 (1.410)</td></tr></tbody></table><table-wrap-foot><fn id="table9fn1"><p><sup>a</sup>The table reports the mean values of precision, recall, mAP<sub>0.5</sub>, and mAP<sub>0.5:0.95</sub> across 5 folds, together with the corresponding SD (&#x00D7;10&#x207B;&#x00B3;) values, which reflect the training stability and robustness of each model.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-10"><title>Interpretability</title><p>In <xref ref-type="fig" rid="figure11">Figure 11</xref>, for each dataset, the first column shows the original MRI image, and the remaining columns show the detection boxes generated by the YOLOv11 and CDCP-YOLO frameworks and the corresponding Eigen-CAM heat maps, respectively. The overall comparison results showed that CDCP-YOLO performed with higher detection confidence, more compact detection boxes, and more focused responses on different datasets, significantly outperforming the YOLOv11 baseline.</p><fig position="float" id="figure11"><label>Figure 11.</label><caption><p>Qualitative visual comparison of detection and interpretability results between YOLOv11 and the proposed CDCP-YOLO model on 3 brain tumor magnetic resonance imaging (MRI) datasets. (A) Br35H dataset, (B) Roboflow dataset, and (C) Capstone dataset. For each dataset, the columns (from left to right) show the original MRI slice, detection results of YOLOv11, detection results of CDCP-YOLO, Eigen-CAM heat map of YOLOv11, and Eigen-CAM heat map of CDCP-YOLO. The heat map color gradient ranges from blue to red, indicating low to high activation intensity, respectively, where warmer colors correspond to regions that contribute most to the detection decision. Compared with YOLOv11, CDCP-YOLO produces more compact and accurate bounding boxes and generates more focused and lesion-aligned activation maps, especially for small, blurry, or irregular tumor regions. These results demonstrate the improved detection reliability and interpretability of the proposed framework. Eigen-CAM, Eigen-class activation mapping; YOLO, &#x201C;you only look once&#x201D; framework.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig11.png"/></fig><p>From both quantitative and qualitative perspectives: on Br35H, the detection confidence of CDCP-YOLO in the first image was much higher than YOLOv11; in the second image, YOLOv11 failed to detect the tumor on the left, while CDCP-YOLO successfully identified it; in the third image, YOLOv11 produced 2 bounding boxes with inaccurate positioning, while the detection box of CDCP-YOLO was closer to the lesion outline; the Eigen-CAM heat map also mainly covered the high-signal core and extended to the indistinct boundary. On Roboflow, the confidence level of CDCP-YOLO was also significantly higher than that of YOLOv11, and it was more accurate in terms of boundary fit and shape depiction. On Capstone, the 3 sets of images consistently showed that CDCP-YOLO not only has higher detection confidence but its bounding boxes were also more consistent with the actual lesion, and the thermal response was tighter and more focused. In MRI with complex backgrounds, YOLOv11 is often interfered with by shadows and anatomical structures (such as blood vessels and choroid plexus), leading to boundary offsets or missed detections. The heat map of CDCP-YOLO can stably focus on the tumor lesion area, effectively suppressing the interference of shadows and structural noise. This advantage stems from the deep integration of multiscale feature enhancement modules (CSPP and DCC) and CPCA, which significantly enhances the sensitivity and discrimination of tumor regions during feature extraction and judgment.</p></sec><sec id="s3-11"><title>Clinical Sensitivity and False Negative Analysis</title><p>In medical diagnosis, false negatives&#x2014;particularly missed detections of small or visually ambiguous lesions&#x2014;pose a critical risk. To evaluate the potential impact of pruning on clinical sensitivity, we further analyzed the behavior of the proposed PHPS from both quantitative and qualitative perspectives.</p><p>Quantitatively, the recall remained stable across different pruning configurations, indicating that the progressive pruning process did not significantly impair the model&#x2019;s ability to detect tumor regions. This suggests that the proposed strategy effectively preserved detection-critical channels during compression. Qualitatively, visualization results on representative cases involving small-scale or blurred tumor boundaries showed that the pruned model maintained consistent activation patterns compared with the unpruned baseline. These findings indicate that PHPS mitigates the risk of increased false negatives while achieving substantial model compression, thereby enhancing its suitability for clinical deployment. Despite significant model compression, the pruned CDCP-YOLO preserves consistent tumor localization and activation patterns compared with the unpruned baseline, especially for small-scale or low-contrast lesions. These results indicate that the proposed PHPS did not increase false negative rates and maintained clinical sensitivity under challenging conditions (<xref ref-type="fig" rid="figure12">Figure 12</xref>).</p><fig position="float" id="figure12"><label>Figure 12.</label><caption><p>Qualitative comparison of detection sensitivity on small or ambiguous brain tumor regions before and after pruning on the Br35H brain tumor magnetic resonance imaging (MRI) dataset. From left to right, each column shows the original MRI slice, detection result of the unpruned CDC-YOLO, detection result of the pruned CDCP-YOLO, Eigen-CAM heat map of CDC-YOLO, and Eigen-CAM heat map of CDCP-YOLO. The heat map color gradient ranges from blue to red, indicating low to high activation intensity, respectively, where warmer colors correspond to regions that contribute most to the detection decision. Eigen-CAM, Eigen-class activation mapping; YOLO, &#x201C;you only look once&#x201D; framework.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig12.png"/></fig><p>To statistically substantiate the &#x201C;fine-grained&#x201D; detection capability and robustness to small-scale tumors, we further evaluated the models using the AP<sub>small</sub> metric, following the standard COCO evaluation protocol. As summarized in <xref ref-type="table" rid="table10">Table 10</xref>, CDCP-YOLO exhibited a superior sensitivity to small lesions compared to all baseline models. Specifically, CDCP-YOLO achieved an AP<sub>small</sub> of 0.487, outperforming the YOLOv11n baseline (0.412) by a substantial margin of 18.2%. This quantitative improvement indicates that the integration of the CSPP and DCC modules for edge and feature enhancement, alongside the CPCA mechanism for precise spatial attention, effectively preserves the subtle morphological features of early-stage or small-scale tumors&#x2014;details that are frequently overlooked by standard architectures. These results provide rigorous evidence that the proposed framework is not only efficient but also highly reliable for detecting clinically significant small lesions.</p><table-wrap id="t10" position="float"><label>Table 10.</label><caption><p>Quantitative evaluation of small-lesion detection performance, AP<sub>small</sub> (the baseline YOLOv11n and the proposed CDCP-YOLO model), on the Br35H brain tumor magnetic resonance imaging dataset using the standard Common Objects in Context (COCO)&#x2013;style detection metrics.</p></caption><table id="table10" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">mAP<sub>0.5<sup><xref ref-type="table-fn" rid="table10fn1">a</xref></sup></sub></td><td align="left" valign="bottom">AP<sub>small<sup><xref ref-type="table-fn" rid="table10fn2">b</xref></sup></sub></td><td align="left" valign="bottom">Relative gain</td></tr></thead><tbody><tr><td align="left" valign="top">YOLOv11n</td><td align="left" valign="top">0.918</td><td align="left" valign="top">0.412</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table10fn3">c</xref></sup></td></tr><tr><td align="left" valign="top">CDCP-YOLO</td><td align="left" valign="top">0.944</td><td align="left" valign="top">0.487</td><td align="left" valign="top">+18.2%</td></tr></tbody></table><table-wrap-foot><fn id="table10fn1"><p><sup>a</sup>mAP<sub>0.5</sub> denotes the mean average precision at an intersection-over-union threshold of 0.5.</p></fn><fn id="table10fn2"><p><sup>b</sup>AP<sub>small</sub> denotes the average precision computed only for small objects, following the COCO definition (ie, objects whose bounding-box area falls within the &#x201C;small&#x201D; size range specified by the COCO evaluation protocol).</p></fn><fn id="table10fn3"><p><sup>c</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><p>To further assess the reliability and specificity of CDCP-YOLO, we conducted an inference-only validation on a negative control cohort of 50 healthy brain MRI slices obtained from the brain tumor dataset [<xref ref-type="bibr" rid="ref48">48</xref>]. This test aimed to verify that the model does not produce &#x201C;hallucinated&#x201D; detections on normal anatomical structures. As shown in <xref ref-type="fig" rid="figure13">Figure 13</xref>, the model successfully classified all healthy slices as negative, producing no false positive detection boxes. Moreover, the Eigen-CAM visualizations showed only very weak background-level activations (cool colors) within healthy brain regions, without the high-intensity highlighted responses typically observed in tumor areas. This negative control experiment provided empirical evidence for the model&#x2019;s robust safety profile, ensuring that its high tumor sensitivity is not achieved at the expense of misdiagnosing healthy anatomy.</p><fig position="float" id="figure13"><label>Figure 13.</label><caption><p>Representative examples of negative-control validation on healthy brain magnetic resonance imaging (MRI) slices using inference-only CDCP-YOLO and Eigen-CAM visualization. This figure presents an inference-only negative control experiment conducted on healthy brain MRI slices without tumor lesions, used to evaluate the specificity and safety of the proposed CDCP-YOLO model. The healthy slices were obtained from a publicly available brain MRI dataset and were not included in the training process. For each example, the first column shows the original MRI slice, the second column shows the detection result produced by CDCP-YOLO, and the third column presents the corresponding Eigen-CAM heat map overlaid on the original image. The color scale of the heat map represents the relative contribution of image regions to the model&#x2019;s prediction, where warm colors (red/yellow) indicate higher activation intensity, and cool colors (blue) indicate low or background-level responses. Eigen-CAM, Eigen-class activation mapping; YOLO, &#x201C;you only look once&#x201D; framework.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e84095_fig13.png"/></fig></sec><sec id="s3-12"><title>Hyperparameter Sensitivity Analysis of PHPS</title><p>To further assess the robustness of the proposed method, we conducted a sensitivity analysis on key training and pruning hyperparameters involved in PHPS. Specifically, we analyzed the sparsity coefficient (SC) and the learning rate (LR) schedule, as these parameters directly influence pruning behavior and convergence stability.</p><p>In this analysis, only one hyperparameter was varied at a time while all other settings were kept fixed, allowing us to isolate the effect of each parameter. The results are summarized in <xref ref-type="table" rid="table11">Table 11</xref>. As shown, the proposed method exhibited stable detection performance across a reasonable range of SC values, indicating that PHPS is not overly sensitive to the pruning threshold. In addition, different learning rate schedules led to comparable performance, suggesting that the training process remained robust under different optimization dynamics.</p><table-wrap id="t11" position="float"><label>Table 11.</label><caption><p>Sensitivity analysis of key hyperparameters in the progressive hybrid pruning strategy (PHPS) on the Br35H brain tumor magnetic resonance imaging dataset.<sup><xref ref-type="table-fn" rid="table11fn1">a</xref></sup></p></caption><table id="table11" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Hyperparameter</td><td align="left" valign="bottom">Setting</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom">mAP<sub>0.5<sup><xref ref-type="table-fn" rid="table11fn2">b</xref></sup></sub></td><td align="left" valign="bottom">mAP<sub>0.5:0.95<sup><xref ref-type="table-fn" rid="table11fn3">c</xref></sup></sub></td></tr></thead><tbody><tr><td align="left" valign="top" rowspan="3">SC<sup><xref ref-type="table-fn" rid="table11fn4">d</xref></sup></td><td align="left" valign="top">0.2</td><td align="left" valign="top">0.912</td><td align="left" valign="top">0.898</td><td align="left" valign="top">0.939</td><td align="left" valign="top">0.637</td></tr><tr><td align="left" valign="top">0.3</td><td align="left" valign="top">0.918</td><td align="left" valign="top">0.904</td><td align="left" valign="top">0.944</td><td align="left" valign="top">0.644</td></tr><tr><td align="left" valign="top">0.4</td><td align="left" valign="top">0.910</td><td align="left" valign="top">0.895</td><td align="left" valign="top">0.936</td><td align="left" valign="top">0.631</td></tr><tr><td align="left" valign="top" rowspan="3">LR<sup><xref ref-type="table-fn" rid="table11fn5">e</xref></sup> schedule</td><td align="left" valign="top">StepLR</td><td align="left" valign="top">0.914</td><td align="left" valign="top">0.899</td><td align="left" valign="top">0.941</td><td align="left" valign="top">0.639</td></tr><tr><td align="left" valign="top">CosineAnnealingLR</td><td align="left" valign="top">0.918</td><td align="left" valign="top">0.904</td><td align="left" valign="top">0.944</td><td align="left" valign="top">0.644</td></tr><tr><td align="left" valign="top">CosineAnnealingWarmRestarts</td><td align="left" valign="top">0.916</td><td align="left" valign="top">0.902</td><td align="left" valign="top">0.940</td><td align="left" valign="top">0.635</td></tr></tbody></table><table-wrap-foot><fn id="table11fn1"><p><sup>a</sup>The table reports the detection performance under different settings of the sparsity coefficient (SC) and learning rate (LR) scheduling strategies, including StepLR, CosineAnnealingLR, and CosineAnnealingWarmRestarts. Detection accuracy was evaluated using precision, recall, mAP<sub>0.5</sub> and mAP<sub>0.5:0.95</sub>.</p></fn><fn id="table11fn2"><p><sup>b</sup>mAP<sub>0.5</sub>: mean average precision at an intersection-over-union threshold of 0.5. </p></fn><fn id="table11fn3"><p><sup>c</sup>mAP<sub>0.5:0.95</sub>: average mean average precision over several intersection-over-union thresholds (0.50-0.95).</p></fn><fn id="table11fn4"><p><sup>d</sup>SC: sparsity coefficient.</p></fn><fn id="table11fn5"><p><sup>e</sup>LR: learning rate.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-13"><title>Ablation Experiments for Architectural and Pruning Analysis</title><p>To rigorously isolate the architectural contributions of the proposed CSPP, DCC, and CPCA modules and analyze the impact of pruning, a pruned baseline model (YOLOv11n-1) was included in the ablation study, as summarized in <xref ref-type="table" rid="table12">Table 12</xref>. Specifically, the standard YOLOv11n backbone was pruned to the same parameter budget (2.07M) using the same PHPS and fine-tuning protocol but without introducing any of the proposed architectural modules. This design allows us to decouple the effect of pruning from that of architectural enhancement. As shown in <xref ref-type="table" rid="table12">Table 12</xref>, applying PHPS to the standard YOLOv11n resulted in performance degradation, with mAP<sub>0.5</sub> dropping from 0.918 to 0.902. This outcome demonstrates that aggressive pruning on a baseline backbone without specialized modules inevitably disrupts critical feature pathways, challenging the notion that pruning alone can enhance model capability. In contrast, the proposed CDCP-YOLO model achieved a significantly higher mAP<sub>0.5</sub> of 0.944 under the same 2.07M parameter budget. These results clarify that the observed performance gains originated from the architectural enhancements&#x2014;CSPP, DCC, and CPCA&#x2014;which provided robust feature representation that successfully compensated for the pruning-induced losses. This confirms that jointly designing architectural enhancements and progressive pruning is essential for achieving a superior accuracy-efficiency trade-off in resource-constrained environments.</p><table-wrap id="t12" position="float"><label>Table 12.</label><caption><p>Ablation study isolating the contributions of architectural modules and pruning, through comparison of (1) the unpruned baseline YOLOv11n; (2) a pruned baseline (YOLOv11n-1) obtained by applying the progressive hybrid pruning strategy (PHPS) to the standard YOLOv11n (without CSPP<sup><xref ref-type="table-fn" rid="table12fn1">a</xref></sup>, DCC<sup><xref ref-type="table-fn" rid="table12fn2">b</xref></sup>, or CPCA<sup><xref ref-type="table-fn" rid="table12fn3">c</xref></sup>) to match the parameter budget of 2.07M; and (3) the proposed CDCP-YOLO model, which integrated the proposed architectural modules (CSPP, DCC, and CPCA) and was pruned and fine-tuned under the same PHPS setting.</p></caption><table id="table12" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom">mAP<sub>0.5<sup><xref ref-type="table-fn" rid="table12fn4">d</xref></sup></sub></td><td align="left" valign="bottom">mAP<sub>0.5:0.95<sup><xref ref-type="table-fn" rid="table12fn5">e</xref></sup></sub></td><td align="left" valign="bottom">Params (M)<sup><xref ref-type="table-fn" rid="table12fn6">f</xref></sup></td><td align="left" valign="bottom">GFLOPs<sup><xref ref-type="table-fn" rid="table12fn7">g</xref></sup></td><td align="left" valign="bottom">FPS<sup><xref ref-type="table-fn" rid="table12fn8">h</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">YOLOv11n</td><td align="left" valign="top">0.904</td><td align="left" valign="top">0.853</td><td align="left" valign="top">0.918</td><td align="left" valign="top">0.585</td><td align="left" valign="top">2.58</td><td align="left" valign="top">6.3</td><td align="left" valign="top">142.2</td></tr><tr><td align="left" valign="top">YOLOv11n-1</td><td align="left" valign="top">0.905</td><td align="left" valign="top">0.855</td><td align="left" valign="top">0.902</td><td align="left" valign="top">0.580</td><td align="left" valign="top">2.07</td><td align="left" valign="top">2.8</td><td align="left" valign="top">145.3</td></tr><tr><td align="left" valign="top">CDCP-YOLO</td><td align="left" valign="top">0.918</td><td align="left" valign="top">0.904</td><td align="left" valign="top">0.944</td><td align="left" valign="top">0.644</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td><td align="left" valign="top">152</td></tr></tbody></table><table-wrap-foot><fn id="table12fn1"><p><sup>a</sup>CSPP: convolution Prewitt-and-pooling&#x2013;based preprocessing.</p></fn><fn id="table12fn2"><p><sup>b</sup>DCC: dynamic convolution&#x2013;based C3k2.</p></fn><fn id="table12fn3"><p><sup>c</sup>CPCA: channel prior convolutional attention.</p></fn><fn id="table12fn4"><p><sup>d</sup>mAP<sub>0.5</sub>: mean average precision at an intersection-over-union threshold of 0.5.</p></fn><fn id="table12fn5"><p><sup>e</sup>mAP<sub>0.5:0.95</sub>: average mean average precision over several intersection-over-union thresholds (0.50-0.95).</p></fn><fn id="table12fn6"><p><sup>f</sup>Params (M) was used to measure model size.</p></fn><fn id="table12fn7"><p><sup>g</sup>GFLOPs: number of floating-point operations (&#x00D7;10&#x2079;).</p></fn><fn id="table12fn8"><p><sup>h</sup>FPS: frames per second.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-14"><title>Impact of Training Budget on Performance Gain</title><p>To verify that the performance gains were not merely a result of longer total training duration, we retrained the YOLOv11n baseline for 600 epochs (YOLOv11n-2), matching the combined 2-stage budget of CDCP-YOLO. Results demonstrated that despite doubling the convergence time, the mAP<sub>0.5</sub> of YOLOv11n only achieved a negligible increase of 0.001, remaining significantly below the value (0.944) achieved by CDCP-YOLO (<xref ref-type="table" rid="table13">Table 13</xref>). This finding underscored that the core contributions to detection accuracy stemmed from the proposed feature enhancement modules and the PHPS strategy rather than extended training time.</p><table-wrap id="t13" position="float"><label>Table 13.</label><caption><p>Impact of training budget alignment on model performance on the Br35H brain tumor magnetic resonance imaging dataset, assessed through comparison of the detection performance under three experimental settings: (1) the standard YOLOv11n baseline trained for 300 epochs, (2) the YOLOv11n baseline retrained for 600 epochs to align with the total 2-stage training budget of CDCP-YOLO (denoted as YOLOv11n-2), and (3) the proposed CDCP-YOLO trained using a 2-stage training strategy (300 epochs of initial training followed by 300 epochs of postpruning fine-tuning).</p></caption><table id="table13" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">Total epochs, n</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom">mAP<sub>0.5<sup><xref ref-type="table-fn" rid="table13fn1">a</xref></sup></sub></td><td align="left" valign="bottom">mAP<sub>0.5:0.95<sup><xref ref-type="table-fn" rid="table13fn2">b</xref></sup></sub></td><td align="left" valign="bottom">Params (M)<sup><xref ref-type="table-fn" rid="table13fn3">c</xref></sup></td><td align="left" valign="bottom">GFLOPs<sup><xref ref-type="table-fn" rid="table13fn4">d</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">YOLOv11n</td><td align="left" valign="top">300</td><td align="left" valign="top">0.904</td><td align="left" valign="top">0.853</td><td align="left" valign="top">0.918</td><td align="left" valign="top">0.585</td><td align="left" valign="top">2.58</td><td align="left" valign="top">6.3</td></tr><tr><td align="left" valign="top">YOLOv11n-2</td><td align="left" valign="top">600</td><td align="left" valign="top">0.905</td><td align="left" valign="top">0.855</td><td align="left" valign="top">0.919</td><td align="left" valign="top">0.580</td><td align="left" valign="top">2.58</td><td align="left" valign="top">6.3</td></tr><tr><td align="left" valign="top">CDCP-YOLO</td><td align="left" valign="top">600</td><td align="left" valign="top">0.918</td><td align="left" valign="top">0.904</td><td align="left" valign="top">0.944</td><td align="left" valign="top">0.644</td><td align="left" valign="top">2.07</td><td align="left" valign="top">3.3</td></tr></tbody></table><table-wrap-foot><fn id="table13fn1"><p><sup>a</sup>mAP<sub>0.5</sub>: mean average precision at an intersection-over-union threshold of 0.5.</p></fn><fn id="table13fn2"><p><sup>b</sup>mAP<sub>0.5:0.95</sub>: average mean average precision over several intersection-over-union thresholds (0.50-0.95).</p></fn><fn id="table13fn3"><p><sup>c</sup>Params (M) was used to measure model size.</p></fn><fn id="table13fn4"><p><sup>d</sup>GFLOPs: number of floating-point operations (&#x00D7;10&#x2079;).</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-15"><title>Computational Overhead and Deployment Considerations</title><p>Although different pruning strategies vary in their compression effectiveness, they also differ in practical implementation cost. The computational overhead of pruning methods can be analyzed from 3 aspects: pruning time overhead, fine-tuning cost, and inference-time efficiency.</p><p>Unstructured pruning typically introduces limited overhead during pruning but requires specialized sparse inference support at deployment. In contrast, structured pruning methods incur additional pruning and fine-tuning costs during training while enabling direct reductions in parameter size and number of floating-point operations (FLOPs) without modifying standard inference pipelines.</p><p>The proposed strategy PHPS performs pruning offline during training in a stage-wise manner. Consistent with the standard characteristics of structured pruning, PHPS physically removes redundant parameters from the network, resulting in a reduced model size that naturally avoids additional computational overhead during the inference phase. This lack of inference overhead is a direct consequence of the physical parameter removal inherent to structured pruning rather than a unique architectural capability of the PHPS itself.</p><p>This design makes PHPS particularly suitable for real-world deployment scenarios, such as edge devices or clinical systems, where inference efficiency and implementation simplicity are critical and modifying inference engines is impractical.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This study proposes CDCP-YOLO, an interpretable and lightweight brain tumor MRI detection framework that integrates feature enhancement, channel-prior attention, and progressive hybrid pruning. Experimental results across 3 public datasets demonstrated that CDCP-YOLO achieved a favorable balance between detection accuracy, computational efficiency, and model interpretability for slice-level brain tumor localization tasks. Notably, the proposed method consistently outperformed the YOLOv11 baseline while reducing parameters and GFLOPs by nearly half.</p><p>Furthermore, 5-fold cross-validation confirmed that the observed performance gains were statistically stable rather than artifacts of random initialization. The integration of Eigen-CAM enables visually consistent and task-aligned explanations that highlight tumor regions across MRI slices.</p></sec><sec id="s4-2"><title>Strengths and Limitations</title><p>Compared with prior YOLO-based brain tumor detectors that primarily rely on deeper architectures, additional detection heads, or external pretraining, CDCP-YOLO adopts a task-driven co-design strategy. Unlike RCS-YOLO or BGF-YOLO, which emphasize reparameterization or feature fusion, the proposed method enhances edge-sensitive feature extraction at the input stage and introduces dynamic convolution to adapt to heterogeneous tumor morphologies. In contrast to PK-YOLO, which depends on external pretrained knowledge, CDCP-YOLO improves representation capacity through internal data-adaptive mechanisms, making it more suitable for limited or domain-specific medical datasets. Moreover, most existing pruning-based detectors use one-shot or single-criterion pruning, whereas the proposed PHPS method preserves detection-critical structures through progressive and structure-aware pruning.</p><p>Several limitations should be acknowledged. First, the Br35H dataset used in this study does not provide patient identifiers, and the train, validation, and test split is therefore performed at the image (slice) level following the official dataset partitioning. As a result, strict patient-level splitting cannot be enforced, which may limit the assessment of patient-level generalization. This limitation is inherent to the dataset itself and will be addressed in future work using datasets with explicit patient-level annotations. Second, all datasets used in this study (Br35H, Roboflow, and Capstone) consist exclusively of tumor-positive MRI slices and do not include healthy control images. Consequently, strict negative control visualization for CAM-based interpretability cannot be conducted without introducing external data. Future work will incorporate MRI cohorts with healthy subjects to enable negative control interpretability analysis and more rigorous validation of model behavior on normal anatomy. Third, while the proposed pruning strategy preserves recall performance, extremely small or highly ambiguous lesions may still pose challenges under aggressive compression. This limitation was partially addressed through qualitative sensitivity analysis, but further prospective validation is required. Finally, this study focused on 2D slice&#x2013;based detection and did not explicitly model interslice spatial continuity, which could further improve robustness in volumetric MRI analysis.</p></sec><sec id="s4-3"><title>Future Directions</title><p>Future work will focus on expanding the dataset through multi-institutional collaboration and evaluating the framework on volumetric MRI sequences. In addition, integrating efficient transformer&#x2013;based modules within the 2-stage framework will be explored to further enhance contextual modeling while maintaining deployability. Clinical-oriented studies, including cost-benefit and workflow integration analyses, will also be conducted to support real-world adoption.</p></sec><sec id="s4-4"><title>Conclusions</title><p>In conclusion, this study demonstrated that CDCP-YOLO achieves a well-balanced integration of detection accuracy, computational efficiency, and interpretability for slice-level brain tumor localization in MRI images. The proposed framework is built upon a task-driven co-design, incorporating structure-aware feature enhancement, dynamic convolution, and a channel-prior attention mechanism to strengthen discriminative feature representation. In particular, the attention module guides the network to focus on lesion-relevant tumor regions, improving robustness to small lesions and tumors with ambiguous boundaries.</p><p>Furthermore, the proposed strategy PHPS effectively compresses the model by jointly considering global sparsity and local structural dependency, enabling substantial reductions in parameters and computational cost while preserving detection-critical pathways. In combination with the integrated Eigen-CAM&#x2013;based interpretability design, CDCP-YOLO provides visually consistent and task-aligned explanations that reflect the model&#x2019;s detection behavior, enhancing the transparency and reliability of the framework.</p><p>Overall, the experimental results confirmed the feasibility of deploying lightweight, attention-enhanced, and interpretable object detection models for efficient slice-level brain tumor analysis and screening in MRI images. This work provides a practical and reliable solution for resource-constrained imaging environments and represents a meaningful step toward methodological advancement and potential clinical translation, rather than a direct clinical diagnostic system.</p></sec></sec></body><back><notes><sec><title>Funding</title><p>This work was supported by the National Natural Science Youth Fund of China (grant No. 61300115), the China Postdoctoral Science Foundation (grant number 2014M561331), and the Science and Technology Research Project of Heilongjiang Provincial Education Department of China (grant number 12521073).</p></sec><sec><title>Data Availability</title><p>All datasets used in this study are publicly available and can be accessed through their original published sources. No private, restricted, or proprietary data were used in this work. The use of publicly accessible datasets ensures the transparency and reproducibility of the experimental results. To support the reproducibility of the reported results, the source code for the proposed CDCP-YOLO architecture and the progressive hybrid pruning strategy (PHPS) has been made publicly available on GitHub [<xref ref-type="bibr" rid="ref49">49</xref>].</p></sec></notes><fn-group><fn fn-type="con"><p>LYP contributed to conceptualization, formal analysis, investigation, methodology, and writing &#x2013; original draft. SSW contributed to data curation, formal analysis, investigation, methodology, and writing &#x2013; original draft. LSB contributed to data curation and investigation. ZXC contributed to conceptualization, funding acquisition, project administration, and writing &#x2013; review and editing and served as the corresponding author. LYP is the first author of this study. All authors have read and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">CA</term><def><p>channel attention</p></def></def-item><def-item><term id="abb2">CAM</term><def><p>class activation mapping</p></def></def-item><def-item><term id="abb3">CNN</term><def><p>convolutional neural network</p></def></def-item><def-item><term id="abb4">CPCA</term><def><p>channel prior convolutional attention</p></def></def-item><def-item><term id="abb5">CSPP</term><def><p>convolution Prewitt-and-pooling&#x2013;based preprocessing</p></def></def-item><def-item><term id="abb6">DCC</term><def><p>dynamic convolution&#x2013;based C3k2</p></def></def-item><def-item><term id="abb7">DCM</term><def><p>disease classification module</p></def></def-item><def-item><term id="abb8">FLOPs</term><def><p>number of floating-point operations</p></def></def-item><def-item><term id="abb9">FPS</term><def><p>frames per second</p></def></def-item><def-item><term id="abb10">GFLOPs</term><def><p>number of floating-point operations (&#x00D7;10&#x2079;)</p></def></def-item><def-item><term id="abb11">Grad-CAM</term><def><p>gradient-weighted class activation mapping</p></def></def-item><def-item><term id="abb12">HSV</term><def><p>hue-saturation-value</p></def></def-item><def-item><term id="abb13">IoU</term><def><p>intersection over union</p></def></def-item><def-item><term id="abb14">IQM</term><def><p>image quality module</p></def></def-item><def-item><term id="abb15">mAP</term><def><p>mean average precision</p></def></def-item><def-item><term id="abb16">MRI</term><def><p>magnetic resonance imaging</p></def></def-item><def-item><term id="abb17">PHPS</term><def><p>progressive hybrid pruning strategy</p></def></def-item><def-item><term id="abb18">R-CNN</term><def><p>region-based convolutional neural network</p></def></def-item><def-item><term id="abb19">RCS</term><def><p>reparameterized convolution combined with channel shuffle</p></def></def-item><def-item><term id="abb20">SA</term><def><p>spatial attention</p></def></def-item><def-item><term id="abb21">SGD</term><def><p>stochastic gradient descent</p></def></def-item><def-item><term id="abb22">SSD</term><def><p>single shot multibox detector</p></def></def-item><def-item><term id="abb23">YOLO</term><def><p>you only look once</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sung</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ferlay</surname><given-names>J</given-names> </name><name name-style="western"><surname>Siegel</surname><given-names>RL</given-names> </name><etal/></person-group><article-title>Global Cancer Statistics 2020: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title><source>CA Cancer J Clin</source><year>2021</year><month>05</month><volume>71</volume><issue>3</issue><fpage>209</fpage><lpage>249</lpage><pub-id pub-id-type="doi">10.3322/caac.21660</pub-id><pub-id pub-id-type="medline">33538338</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fitzgerald</surname><given-names>RC</given-names> </name></person-group><article-title>Big data is crucial to the early detection of cancer</article-title><source>Nat Med</source><year>2020</year><month>01</month><volume>26</volume><issue>1</issue><fpage>19</fpage><lpage>20</lpage><pub-id pub-id-type="doi">10.1038/s41591-019-0725-7</pub-id><pub-id pub-id-type="medline">31932790</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ramtekkar</surname><given-names>PK</given-names> </name><name name-style="western"><surname>Pandey</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pawar</surname><given-names>MK</given-names> </name></person-group><article-title>A comprehensive review of brain tumour detection mechanisms</article-title><source>Comput J</source><year>2024</year><month>04</month><day>14</day><volume>67</volume><issue>3</issue><fpage>1126</fpage><lpage>1152</lpage><pub-id pub-id-type="doi">10.1093/comjnl/bxad047</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>El-Dahshan</surname><given-names>ESA</given-names> </name><name name-style="western"><surname>Hosny</surname><given-names>T</given-names> </name><name name-style="western"><surname>Salem</surname><given-names>ABM</given-names> </name></person-group><article-title>Hybrid intelligent techniques for MRI brain images classification</article-title><source>Digit Signal Process</source><year>2010</year><month>03</month><volume>20</volume><issue>2</issue><fpage>433</fpage><lpage>441</lpage><pub-id pub-id-type="doi">10.1016/j.dsp.2009.07.002</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Girshick</surname><given-names>R</given-names> </name><name name-style="western"><surname>Donahue</surname><given-names>J</given-names> </name><name name-style="western"><surname>Darrell</surname><given-names>T</given-names> </name><name name-style="western"><surname>Malik</surname><given-names>J</given-names> </name></person-group><article-title>Rich feature hierarchies for accurate object detection and semantic segmentation</article-title><source>2014 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</source><fpage>580</fpage><lpage>587</lpage><pub-id pub-id-type="doi">10.1109/CVPR.2014.81</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Girshick</surname><given-names>R</given-names> </name></person-group><article-title>Fast R-CNN</article-title><source>Proceedings of the IEEE International Conference on Computer Vision</source><year>2015</year><publisher-name>IEEE</publisher-name><fpage>1440</fpage><lpage>1448</lpage><pub-id pub-id-type="doi">10.1109/ICCV.2015.169</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ren</surname><given-names>S</given-names> </name><name name-style="western"><surname>He</surname><given-names>K</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>J</given-names> </name></person-group><article-title>Faster r-cnn: Towards real-time object detection with region proposal networks</article-title><source>Adv Neural Inf Process Syst</source><year>2015</year><pub-id pub-id-type="doi">10.1109/TPAMI.2016.2577031</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Redmon</surname><given-names>J</given-names> </name><name name-style="western"><surname>Divvala</surname><given-names>S</given-names> </name><name name-style="western"><surname>Girshick</surname><given-names>R</given-names> </name><name name-style="western"><surname>Farhadi</surname><given-names>A</given-names> </name></person-group><article-title>You only look once: unified, real-time object detection</article-title><source>Proceedings of IEEE Conference on Computer Vision and Pattern Recognition</source><year>2016</year><publisher-name>IEEE</publisher-name><fpage>779</fpage><lpage>788</lpage><pub-id pub-id-type="doi">10.1109/CVPR.2016.91</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Anguelov</surname><given-names>D</given-names> </name><name name-style="western"><surname>Erhan</surname><given-names>D</given-names> </name><etal/></person-group><article-title>SSD: single shot multibox detector</article-title><source>European Conference on Computer Vision</source><year>2016</year><publisher-name>Springer International Publishing</publisher-name><fpage>21</fpage><lpage>37</lpage><pub-id pub-id-type="doi">10.1007/978-3-319-46448-0_2</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Duan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>N</given-names> </name><etal/></person-group><article-title>MSA-YOLOv5: multi-scale attention-based YOLOv5 for automatic detection of acute ischemic stroke from multi-modality MRI images</article-title><source>Comput Biol Med</source><year>2023</year><month>10</month><volume>165</volume><fpage>107471</fpage><pub-id pub-id-type="doi">10.1016/j.compbiomed.2023.107471</pub-id><pub-id pub-id-type="medline">37716245</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xiongfeng</surname><given-names>T</given-names> </name><name name-style="western"><surname>Yingzhi</surname><given-names>L</given-names> </name><name name-style="western"><surname>Xianyue</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Automated detection of knee cystic lesions on magnetic resonance imaging using deep learning</article-title><source>Front Med (Lausanne)</source><year>2022</year><volume>9</volume><fpage>928642</fpage><pub-id pub-id-type="doi">10.3389/fmed.2022.928642</pub-id><pub-id pub-id-type="medline">36016997</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Automated diagnosis and grading of lumbar intervertebral disc degeneration based on a modified YOLO framework</article-title><source>Front Bioeng Biotechnol</source><year>2025</year><volume>13</volume><fpage>1526478</fpage><pub-id pub-id-type="doi">10.3389/fbioe.2025.1526478</pub-id><pub-id pub-id-type="medline">39912111</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>FF</given-names> </name><name name-style="western"><surname>Phan</surname><given-names>RCW</given-names> </name></person-group><article-title>RCS-YOLO: a fast and high-accuracy object detector for brain tumor detection</article-title><source>International Conference on Medical Image Computing and Computer-Assisted Intervention</source><year>2023</year><publisher-name>Springer Nature Switzerland</publisher-name><fpage>600</fpage><lpage>610</lpage><pub-id pub-id-type="doi">10.1007/978-3-031-43901-8_57</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>FF</given-names> </name><name name-style="western"><surname>Phan</surname><given-names>RCW</given-names> </name></person-group><article-title>BGF-YOLO: enhanced YOLOv8 with multiscale attentional feature fusion for brain tumor detection</article-title><source>International Conference on Medical Image Computing and Computer-Assisted Intervention</source><year>2024</year><publisher-name>Springer Nature Switzerland</publisher-name><fpage>35</fpage><lpage>45</lpage><pub-id pub-id-type="doi">10.1007/978-3-031-72111-3_4</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>FF</given-names> </name><name name-style="western"><surname>Phan</surname><given-names>RCW</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>CM</given-names> </name></person-group><article-title>PK-YOLO: pretrained knowledge guided YOLO for brain tumor detection in multiplanar MRI slices</article-title><source>2025 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)</source><year>2025</year><publisher-name>IEEE</publisher-name><fpage>3732</fpage><lpage>3741</lpage><pub-id pub-id-type="doi">10.1109/WACV61041.2025.00367</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Dixit</surname><given-names>A</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>P</given-names> </name></person-group><article-title>Brain tumor detection using fine-tuned YOLO model with transfer learning</article-title><source>Artificial Intelligence on Medical Data: Proceedings of International Symposium, ISCMM 2021</source><year>2022</year><publisher-name>Springer Nature</publisher-name><fpage>363</fpage><lpage>371</lpage><pub-id pub-id-type="doi">10.1007/978-981-19-0151-5_30</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abdusalomov</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Mukhiddinov</surname><given-names>M</given-names> </name><name name-style="western"><surname>Whangbo</surname><given-names>TK</given-names> </name></person-group><article-title>Brain tumor detection based on deep learning approaches and magnetic resonance imaging</article-title><source>Cancers (Basel)</source><year>2023</year><month>08</month><day>18</day><volume>15</volume><issue>16</issue><fpage>4172</fpage><pub-id pub-id-type="doi">10.3390/cancers15164172</pub-id><pub-id pub-id-type="medline">37627200</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fernandes</surname><given-names>FE</given-names> </name><name name-style="western"><surname>Yen</surname><given-names>GG</given-names> </name></person-group><article-title>Pruning of generative adversarial neural networks for medical imaging diagnostics with evolution strategy</article-title><source>Inf Sci</source><year>2021</year><month>05</month><volume>558</volume><fpage>91</fpage><lpage>102</lpage><pub-id pub-id-type="doi">10.1016/j.ins.2020.12.086</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zeng</surname><given-names>D</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>X</given-names> </name><etal/></person-group><article-title>FairPrune: achieving fairness through pruning for dermatological disease diagnosis</article-title><source>International Conference on Medical Image Computing and Computer-Assisted Intervention</source><year>2022</year><publisher-name>Springer Nature</publisher-name><fpage>743</fpage><lpage>753</lpage><pub-id pub-id-type="doi">10.1007/978-3-031-16431-6_70</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Adnan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ba</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Shaikh</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Structured model pruning for efficient inference in computational pathology</article-title><source>International Workshop on Medical Optical Imaging and Virtual Microscopy Image Analysis</source><year>2024</year><publisher-name>Springer Nature</publisher-name><fpage>140</fpage><lpage>149</lpage><pub-id pub-id-type="doi">10.1007/978-3-031-77786-8_14</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fernandes</surname><given-names>FE</given-names> </name><name name-style="western"><surname>Yen</surname><given-names>GG</given-names> </name></person-group><article-title>Automatic searching and pruning of deep neural networks for medical imaging diagnostic</article-title><source>IEEE Trans Neural Netw Learning Syst</source><year>2020</year><volume>32</volume><issue>12</issue><fpage>5664</fpage><lpage>5674</lpage><pub-id pub-id-type="doi">10.1109/TNNLS.2020.3027308</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cocosco</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Zijdenbos</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Evans</surname><given-names>AC</given-names> </name></person-group><article-title>A fully automatic and robust brain MRI tissue classification method</article-title><source>Med Image Anal</source><year>2003</year><month>12</month><volume>7</volume><issue>4</issue><fpage>513</fpage><lpage>527</lpage><pub-id pub-id-type="doi">10.1016/s1361-8415(03)00037-9</pub-id><pub-id pub-id-type="medline">14561555</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Xuan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>S</given-names> </name><name name-style="western"><surname>Xue</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Liao</surname><given-names>S</given-names> </name></person-group><article-title>Learning MRI k-space subsampling pattern using progressive weight pruning</article-title><source>International Conference on Medical Image Computing and Computer-Assisted Intervention</source><year>2020</year><publisher-name>Springer</publisher-name><fpage>178</fpage><lpage>187</lpage><pub-id pub-id-type="doi">10.1007/978-3-030-59713-9_18</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Graziani</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lompech</surname><given-names>T</given-names> </name><name name-style="western"><surname>M&#x00FC;ller</surname><given-names>H</given-names> </name><name name-style="western"><surname>Depeursinge</surname><given-names>A</given-names></name><name name-style="western"><surname>Andrearczyk</surname><given-names>V</given-names> </name></person-group><article-title>Interpretable CNN pruning for preserving scale-covariant features in medical imaging</article-title><source>International Workshop on Interpretability of Machine Intelligence in Medical Image Computing</source><year>2020</year><publisher-name>Springer International Publishing</publisher-name><fpage>23</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1007/978-3-030-61166-8_3</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Holste</surname><given-names>G</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Jaiswal</surname><given-names>A</given-names> </name><etal/></person-group><article-title>How does pruning impact long-tailed multi-label medical image classifiers?</article-title><source>International Conference on Medical Image Computing and Computer-Assisted Intervention</source><year>2023</year><publisher-name>Springer Nature</publisher-name><fpage>663</fpage><lpage>673</lpage><pub-id pub-id-type="doi">10.1007/978-3-031-43904-9_64</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Saleh</surname><given-names>L</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>L</given-names> </name></person-group><article-title>Medical image classification using transfer learning and network pruning algorithms</article-title><source>2023 IEEE International Conference on Systems, Man, and Cybernetics (SMC)</source><year>2023</year><publisher-name>IEEE</publisher-name><fpage>1932</fpage><lpage>1938</lpage><pub-id pub-id-type="doi">10.1109/SMC53992.2023.10393868</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Jaiswal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Rousseau</surname><given-names>JF</given-names> </name><name name-style="western"><surname>Peng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ding</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name></person-group><article-title>Attend who is weak: pruning-assisted medical image localization under sophisticated and implicit imbalances</article-title><year>2023</year><conf-name>Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision</conf-name><publisher-name>IEEE/CVF</publisher-name><fpage>4987</fpage><lpage>4996</lpage><pub-id pub-id-type="doi">10.1109/WACV56688.2023.00496</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Selvaraju</surname><given-names>RR</given-names> </name><name name-style="western"><surname>Cogswell</surname><given-names>M</given-names> </name><name name-style="western"><surname>Das</surname><given-names>A</given-names> </name><name name-style="western"><surname>Vedantam</surname><given-names>R</given-names> </name><name name-style="western"><surname>Parikh</surname><given-names>D</given-names> </name><name name-style="western"><surname>Batra</surname><given-names>D</given-names> </name></person-group><article-title>Grad-CAM: visual explanations from deep networks via gradient-based localization</article-title><source>Proceedings of IEEE International Conference on Computer Vision</source><year>2017</year><publisher-name>IEEE</publisher-name><fpage>618</fpage><lpage>626</lpage><pub-id pub-id-type="doi">10.1109/ICCV.2017.74</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Chattopadhay</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sarkar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Howlader</surname><given-names>P</given-names> </name><name name-style="western"><surname>Balasubramanian</surname><given-names>VN</given-names> </name></person-group><article-title>Grad-CAM++: generalized gradient-based visual explanations for deep convolutional networks</article-title><source>2018 IEEE Winter Conference on Applications of Computer Vision (WACV)</source><year>2018</year><publisher-name>IEEE</publisher-name><fpage>839</fpage><lpage>847</lpage><pub-id pub-id-type="doi">10.1109/WACV.2018.00097</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Muhammad</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Yeasin</surname><given-names>M</given-names> </name></person-group><article-title>Eigen-CAM: class activation map using principal components</article-title><source>2020 International Joint Conference on Neural Networks (IJCNN)</source><year>2020</year><publisher-name>IEEE</publisher-name><fpage>1</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1109/IJCNN48605.2020.9206626</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>PT</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>CB</given-names> </name><name name-style="western"><surname>Hou</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Wei</surname><given-names>Y</given-names> </name></person-group><article-title>LayerCAM: exploring hierarchical class activation maps for localization</article-title><source>IEEE Trans Image Process</source><year>2021</year><volume>30</volume><fpage>5875</fpage><lpage>5888</lpage><pub-id pub-id-type="doi">10.1109/TIP.2021.3089943</pub-id><pub-id pub-id-type="medline">34156941</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Windisch</surname><given-names>P</given-names> </name><name name-style="western"><surname>Weber</surname><given-names>P</given-names> </name><name name-style="western"><surname>F&#x00FC;rweger</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Implementation of model explainability for a basic brain tumor detection using convolutional neural networks on MRI slices</article-title><source>Neuroradiology</source><year>2020</year><month>11</month><volume>62</volume><issue>11</issue><fpage>1515</fpage><lpage>1518</lpage><pub-id pub-id-type="doi">10.1007/s00234-020-02465-1</pub-id><pub-id pub-id-type="medline">32500277</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shawon</surname><given-names>MTR</given-names> </name><name name-style="western"><surname>Shibli</surname><given-names>GMS</given-names> </name><name name-style="western"><surname>Ahmed</surname><given-names>F</given-names> </name><name name-style="western"><surname>Joy</surname><given-names>SKS</given-names> </name></person-group><article-title>Explainable cost-sensitive deep neural networks for brain tumor detection from brain MRI images considering data imbalance</article-title><source>Multimed Tools Appl</source><year>2025</year><volume>84</volume><issue>35</issue><fpage>43615</fpage><lpage>43642</lpage><pub-id pub-id-type="doi">10.1007/s11042-025-20842-x</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Dasanayaka</surname><given-names>S</given-names> </name><name name-style="western"><surname>Silva</surname><given-names>S</given-names> </name><name name-style="western"><surname>Shantha</surname><given-names>V</given-names> </name><name name-style="western"><surname>Meedeniya</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ambegoda</surname><given-names>T</given-names> </name></person-group><article-title>Interpretable machine learning for brain tumor analysis using MRI</article-title><source>2022 2nd International Conference on Advanced Research in Computing (ICARC)</source><year>2022</year><publisher-name>IEEE</publisher-name><fpage>212</fpage><lpage>217</lpage><pub-id pub-id-type="doi">10.1109/ICARC54489.2022.9754131</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zeineldin</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Karar</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Elshaer</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Explainability of deep neural networks for MRI analysis of brain tumors</article-title><source>Int J Comput Assist Radiol Surg</source><year>2022</year><month>09</month><volume>17</volume><issue>9</issue><fpage>1673</fpage><lpage>1683</lpage><pub-id pub-id-type="doi">10.1007/s11548-022-02619-x</pub-id><pub-id pub-id-type="medline">35460019</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mohamed</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Mahesh</surname><given-names>TR</given-names> </name><name name-style="western"><surname>Vinoth</surname><given-names>KV</given-names> </name><name name-style="western"><surname>Guluwadi</surname><given-names>S</given-names> </name></person-group><article-title>Enhancing brain tumor detection in MRI images through explainable AI using Grad-CAM with Resnet 50</article-title><source>BMC Med Imaging</source><year>2024</year><month>05</month><day>11</day><volume>24</volume><issue>1</issue><fpage>107</fpage><pub-id pub-id-type="doi">10.1186/s12880-024-01292-7</pub-id><pub-id pub-id-type="medline">38734629</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Han</surname><given-names>K</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Guo</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>E</given-names> </name></person-group><article-title>ParameterNet: parameters are all you need for large-scale visual pretraining of mobile networks</article-title><source>IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</source><year>2024</year><publisher-name>IEEE/CVF</publisher-name><fpage>15751</fpage><lpage>15761</lpage><pub-id pub-id-type="doi">10.1109/CVPR52733.2024.01491</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zou</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Channel prior convolutional attention for medical image segmentation</article-title><source>Comput Biol Med</source><year>2024</year><month>08</month><volume>178</volume><fpage>108784</fpage><pub-id pub-id-type="doi">10.1016/j.compbiomed.2024.108784</pub-id><pub-id pub-id-type="medline">38941900</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Hamada</surname><given-names>A</given-names> </name></person-group><article-title>Br35H:: brain tumor detection 2020</article-title><source>Kaggle</source><year>2020</year><access-date>2026-04-20</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/ahmedhamada0/brain-tumor-detection">https://www.kaggle.com/datasets/ahmedhamada0/brain-tumor-detection</ext-link></comment></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="web"><article-title>YOLO computer vision model</article-title><source>Universe</source><access-date>2026-04-20</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://universe.roboflow.com/yolo-hz3ua/yolo-fj4s3">https://universe.roboflow.com/yolo-hz3ua/yolo-fj4s3</ext-link></comment></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="web"><article-title>Capstone computer vision dataset</article-title><source>Universe</source><access-date>2026-04-20</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://universe.roboflow.com/naufal-irfani/capstone-dk1nt">https://universe.roboflow.com/naufal-irfani/capstone-dk1nt</ext-link></comment></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Lu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Xiong</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>B</given-names> </name><name name-style="western"><surname>Chu</surname><given-names>D</given-names> </name></person-group><article-title>MixDehazeNet: mix structure block for image dehazing network</article-title><source>2024 International Joint Conference on Neural Networks (IJCNN)</source><year>2024</year><publisher-name>IEEE</publisher-name><fpage>1</fpage><lpage>10</lpage><pub-id pub-id-type="doi">10.1109/IJCNN60899.2024.10651326</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shao</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zeng</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Hou</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>J</given-names> </name></person-group><article-title>MCANet: medical image segmentation with multi-scale cross-axis attention</article-title><source>Mach Intell Res</source><year>2025</year><month>06</month><volume>22</volume><issue>3</issue><fpage>437</fpage><lpage>451</lpage><pub-id pub-id-type="doi">10.1007/s11633-025-1552-6</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Song</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Tang</surname><given-names>C</given-names> </name></person-group><article-title>Multi-scale spatial pyramid attention mechanism for image recognition: an effective approach</article-title><source>Eng Appl Artif Intell</source><year>2024</year><month>07</month><volume>133</volume><fpage>108261</fpage><pub-id pub-id-type="doi">10.1016/j.engappai.2024.108261</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Rezatofighi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Tsoi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Gwak</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sadeghian</surname><given-names>A</given-names> </name><name name-style="western"><surname>Reid</surname><given-names>I</given-names> </name><name name-style="western"><surname>Savarese</surname><given-names>S</given-names> </name></person-group><article-title>Generalized intersection over union: a metric and a loss for bounding box regression</article-title><source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</source><year>2019</year><publisher-name>IEEE/CVF</publisher-name><fpage>658</fpage><lpage>666</lpage><pub-id pub-id-type="doi">10.1109/CVPR.2019.00075</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Zheng</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>P</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Li</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ye</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ren</surname><given-names>D</given-names> </name></person-group><article-title>Distance-iou loss: faster and better learning for bounding box regression</article-title><source>Proceedings of the AAAI Conference on Artificial Intelligence</source><year>2020</year><volume>34</volume><publisher-name>2020</publisher-name><fpage>12993</fpage><lpage>13000</lpage><pub-id pub-id-type="doi">10.1609/aaai.v34i07.6999</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>YF</given-names> </name><name name-style="western"><surname>Ren</surname><given-names>W</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Jia</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>T</given-names> </name></person-group><article-title>Focal and efficient IOU loss for accurate bounding box regression</article-title><source>Neurocomputing</source><year>2022</year><month>09</month><volume>506</volume><fpage>146</fpage><lpage>157</lpage><pub-id pub-id-type="doi">10.1016/j.neucom.2022.07.042</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Backert</surname><given-names>T</given-names> </name></person-group><article-title>Brain tumor dataset</article-title><source>Kaggle</source><year>2024</year><access-date>2026-04-20</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/tombackert/brain-tumor-mri-data">https://www.kaggle.com/datasets/tombackert/brain-tumor-mri-data</ext-link></comment></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="report"><person-group person-group-type="author"><collab>GitHub</collab></person-group><article-title>Brain-tumor-detection: interpretable and fine-grained brain tumor MRI detection based on progressive pruning</article-title><access-date>2026-04-25</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/song68/Brain-Tumor-Detection">https://github.com/song68/Brain-Tumor-Detection</ext-link></comment></nlm-citation></ref></ref-list></back></article>