@Article{info:doi/10.2196/65937, author="Yang, Liuyang and Zhang, Xinzhang and Li, Zhenhui and Wang, Jian and Zhang, Yiwen and Shan, Liyu and Shi, Xin and Si, Yapeng and Wang, Shuailong and Li, Lin and Wu, Ping and Xu, Ning and Liu, Lizhu and Yang, Junfeng and Leng, Jinjun and Yang, Maolin and Zhang, Zhuorui and Wang, Junfeng and Dong, Xingxiang and Yang, Guangjun and Yan, Ruiying and Li, Wei and Liu, Zhimin and Li, Wenliang", title="Localization and Classification of Adrenal Masses in Multiphase Computed Tomography: Retrospective Study", journal="J Med Internet Res", year="2025", month="Apr", day="24", volume="27", pages="e65937", keywords="MA-YOLO model", keywords="multi-class adrenal masses", keywords="multi-phase CT images", keywords="localization", keywords="classification", abstract="Background: The incidence of adrenal incidentalomas is increasing annually, and most types of adrenal masses require surgical intervention. Accurate classification of common adrenal masses based on tumor computed tomography (CT) images by radiologists or clinicians requires extensive experience and is often challenging, which increases the workload of radiologists and leads to unnecessary adrenal surgeries. There is an urgent need for a fully automated, noninvasive, and precise approach for the identification and accurate classification of common adrenal masses. Objective: This study aims to enhance diagnostic efficiency and transform the current clinical practice of preoperative diagnosis of adrenal masses. Methods: This study is a retrospective analysis that includes patients with adrenal masses who underwent adrenalectomy from January 1, 2021, to May 31, 2023, at Center 1 (internal dataset), and from January 1, 2016, to May 31, 2023, at Center 2 (external dataset). The images include unenhanced, arterial, and venous phases, with 21,649 images used for the training set, 2406 images used for the validation set, and 12,857 images used for the external test set. We invited 3 experienced radiologists to precisely annotate the images, and these annotations served as references. We developed a deep learning--based adrenal mass detection model, Multi-Attention YOLO (MA-YOLO), which can automatically localize and classify 6 common types of adrenal masses. In order to scientifically evaluate the model performance, we used a variety of evaluation metrics, in addition, we compared the improvement in diagnostic efficacy of 6 doctors after incorporating model assistance. Results: A total of 516 patients were included. In the external test set, the MA-YOLO model achieved an intersection over union of 0.838, 0.885, and 0.890 for the localization of 6 types of adrenal masses in unenhanced, arterial, and venous phase CT images, respectively. The corresponding mean average precision for classification was 0.885, 0.913, and 0.915, respectively. Additionally, with the assistance of this model, the classification diagnostic performance of 6 radiologists and clinicians for adrenal masses improved. Except for adrenal cysts, at least 1 physician significantly improved diagnostic performance for the other 5 types of tumors. Notably, in the categories of adrenal adenoma (for senior clinician: P=.04, junior radiologist: P=.01, and senior radiologist: P=.01) and adrenal cortical carcinoma (junior clinician: P=.02, junior radiologist: P=.01, and intermediate radiologist: P=.001), half of the physicians showed significant improvements after using the model for assistance. Conclusions: The MA-YOLO model demonstrates the ability to achieve efficient, accurate, and noninvasive preoperative localization and classification of common adrenal masses in CT examinations, showing promising potential for future applications. ", doi="10.2196/65937", url="https://www.jmir.org/2025/1/e65937" } @Article{info:doi/10.2196/74333, author="Zhang, Yahan and Chun, Yi and Tu, Liping and Xu, Jiatuo", title="Authors' Reply: The Anemia Risk Warning Model Based on a Noninvasive Method: Key Insights and Clarifications", journal="JMIR Med Inform", year="2025", month="Apr", day="22", volume="13", pages="e74333", keywords="anemia", keywords="hemoglobin", keywords="spectroscopy", keywords="machine learning", keywords="risk warning model", keywords="Shapley Additive Explanation", doi="10.2196/74333", url="https://medinform.jmir.org/2025/1/e74333" } @Article{info:doi/10.2196/73297, author="Wei, Jiaqi and Zheng, Nana and Wu, Depei", title="The Anemia Risk Warning Model Based on a Noninvasive Method: Key Insights and Clarifications", journal="JMIR Med Inform", year="2025", month="Apr", day="22", volume="13", pages="e73297", keywords="anemia", keywords="hemoglobin", keywords="spectroscopy", keywords="machine learning", keywords="risk warning model", keywords="Shapley Additive Explanation", doi="10.2196/73297", url="https://medinform.jmir.org/2025/1/e73297" } @Article{info:doi/10.2196/66530, author="Wang, Longyun and Wang, Zeyu and Zhao, Bowei and Wang, Kai and Zheng, Jingying and Zhao, Lijing", title="Diagnosis Test Accuracy of Artificial Intelligence for Endometrial Cancer: Systematic Review and Meta-Analysis", journal="J Med Internet Res", year="2025", month="Apr", day="18", volume="27", pages="e66530", keywords="artificial intelligence", keywords="endometrial cancer", keywords="diagnostic test accuracy", keywords="systematic review", keywords="meta-analysis", keywords="machine learning", keywords="deep learning", abstract="Background: Endometrial cancer is one of the most common gynecological tumors, and early screening and diagnosis are crucial for its treatment. Research on the application of artificial intelligence (AI) in the diagnosis of endometrial cancer is increasing, but there is currently no comprehensive meta-analysis to evaluate the diagnostic accuracy of AI in screening for endometrial cancer. Objective: This paper presents a systematic review of AI-based endometrial cancer screening, which is needed to clarify its diagnostic accuracy and provide evidence for the application of AI technology in screening for endometrial cancer. Methods: A search was conducted across PubMed, Embase, Cochrane Library, Web of Science, and Scopus databases to include studies published in English, which evaluated the performance of AI in endometrial cancer screening. A total of 2 independent reviewers screened the titles and abstracts, and the quality of the selected studies was assessed using the Quality Assessment of Diagnostic Accuracy Studies---2 (QUADAS-2) tool. The certainty of the diagnostic test evidence was evaluated using the Grading of Recommendations Assessment, Development, and Evaluation (GRADE) system. Results: A total of 13 studies were included, and the hierarchical summary receiver operating characteristic model used for the meta-analysis showed that the overall sensitivity of AI-based endometrial cancer screening was 86\% (95\% CI 79\%-90\%) and specificity was 92\% (95\% CI 87\%-95\%). Subgroup analysis revealed similar results across AI type, study region, publication year, and study type, but the overall quality of evidence was low. Conclusions: AI-based endometrial cancer screening can effectively detect patients with endometrial cancer, but large-scale population studies are needed in the future to further clarify the diagnostic accuracy of AI in screening for endometrial cancer. Trial Registration: PROSPERO CRD42024519835; https://www.crd.york.ac.uk/PROSPERO/view/CRD42024519835 ", doi="10.2196/66530", url="https://www.jmir.org/2025/1/e66530" } @Article{info:doi/10.2196/63130, author="Ferr{\'e}, Fabrice and Allassonni{\`e}re, St{\'e}phanie and Chadebec, Cl{\'e}ment and Minville, Vincent", title="Generating Artificial Patients With Reliable Clinical Characteristics Using a Geometry-Based Variational Autoencoder: Proof-of-Concept Feasibility Study", journal="J Med Internet Res", year="2025", month="Apr", day="17", volume="27", pages="e63130", keywords="digital health", keywords="artificial data", keywords="variational autoencoder", keywords="data science", keywords="artificial intelligence", keywords="health monitoring", keywords="deep learning", keywords="medical imaging", keywords="imaging", keywords="magnetic resonance imaging", keywords="Alzheimer disease", keywords="anesthesia", keywords="prediction", keywords="data augmentation", abstract="Background: Artificial patient technology could transform health care by accelerating diagnosis, treatment, and mapping clinical pathways. Deep learning methods for generating artificial data in health care include data augmentation by variational autoencoders (VAE) technology. Objective: We aimed to test the feasibility of generating artificial patients with reliable clinical characteristics by using a geometry-based VAE applied, for the first time, on high-dimension, low-sample-size tabular data. Methods: Clinical tabular data were extracted from 521 real patients of the ``MAX'' digital conversational agent (BOTdesign) created for preparing patients for anesthesia. A 3-stage methodological approach was implemented to generate up to 10,000 artificial patients: training the model and generating artificial data, assessing the consistency and confidentiality of artificial data, and validating the plausibility of the newly created artificial patients. Results: We demonstrated the feasibility of applying the VAE technique to tabular data to generate large artificial patient cohorts with high consistency (fidelity scores>94\%). Moreover, artificial patients could not be matched with real patients (filter similarity scores>99\%, $\kappa$ coefficients of agreement<0.2), thus guaranteeing the essential ethical concern of confidentiality. Conclusions: This proof-of-concept study has demonstrated our ability to augment real tabular data to generate artificial patients. These promising results make it possible to envisage in silico trials carried out on large cohorts of artificial patients, thereby overcoming the pitfalls usually encountered in in vivo trials. Further studies integrating longitudinal dynamics are needed to map patient trajectories. ", doi="10.2196/63130", url="https://www.jmir.org/2025/1/e63130" } @Article{info:doi/10.2196/67144, author="Rahman, Mahmudur and Gao, Jifan and Carey, A. Kyle and Edelson, P. Dana and Afshar, Askar and Garrett, W. John and Chen, Guanhua and Afshar, Majid and Churpek, M. Matthew", title="Comparison of Deep Learning Approaches Using Chest Radiographs for Predicting Clinical Deterioration: Retrospective Observational Study", journal="JMIR AI", year="2025", month="Apr", day="10", volume="4", pages="e67144", keywords="chest X-ray", keywords="critical care", keywords="deep learning", keywords="chest radiographs", keywords="radiographs", keywords="clinical deterioration", keywords="prediction", keywords="predictive", keywords="deterioration", keywords="retrospective", keywords="data", keywords="dataset", keywords="artificial intelligence", keywords="AI", keywords="chest", keywords="patient", keywords="hospitalized", abstract="Background: The early detection of clinical deterioration and timely intervention for hospitalized patients can improve patient outcomes. The currently existing early warning systems rely on variables from structured data, such as vital signs and laboratory values, and do not incorporate other potentially predictive data modalities. Because respiratory failure is a common cause of deterioration, chest radiographs are often acquired in patients with clinical deterioration, which may be informative for predicting their risk of intensive care unit (ICU) transfer. Objective: This study aimed to compare and validate different computer vision models and data augmentation approaches with chest radiographs for predicting clinical deterioration. Methods: This retrospective observational study included adult patients hospitalized at the University of Wisconsin Health System between 2009 and 2020 with an elevated electronic cardiac arrest risk triage (eCART) score, a validated clinical deterioration early warning score, on the medical-surgical wards. Patients with a chest radiograph obtained within 48 hours prior to the elevated score were included in this study. Five computer vision model architectures (VGG16, DenseNet121, Vision Transformer, ResNet50, and Inception V3) and four data augmentation methods (histogram normalization, random flip, random Gaussian noise, and random rotate) were compared using the area under the receiver operating characteristic curve (AUROC) and the area under the precision-recall curve (AUPRC) for predicting clinical deterioration (ie, ICU transfer or ward death in the following 24 hours). Results: The study included 21,817 patient admissions, of which 1655 (7.6\%) experienced clinical deterioration. The DenseNet121 model pretrained on chest radiograph datasets with histogram normalization and random Gaussian noise augmentation had the highest discrimination (AUROC 0.734 and AUPRC 0.414), while the vision transformer having 24 transformer blocks with random rotate augmentation had the lowest discrimination (AUROC 0.598). Conclusions: The study shows the potential of chest radiographs in deep learning models for predicting clinical deterioration. The DenseNet121 architecture pretrained with chest radiographs performed better than other architectures in most experiments, and the addition of histogram normalization with random Gaussian noise data augmentation may enhance the performance of DenseNet121 and pretrained VGG16 architectures. ", doi="10.2196/67144", url="https://ai.jmir.org/2025/1/e67144" } @Article{info:doi/10.2196/67706, author="Mahyoub, Mohammed and Dougherty, Kacie and Shukla, Ajit", title="Extracting Pulmonary Embolism Diagnoses From Radiology Impressions Using GPT-4o: Large Language Model Evaluation Study", journal="JMIR Med Inform", year="2025", month="Apr", day="9", volume="13", pages="e67706", keywords="pulmonary embolism", keywords="large language models", keywords="LLMs", keywords="natural language processing", keywords="GPT-4o", keywords="Clinical Longformer", keywords="text classification", keywords="radiology reports", abstract="Background: Pulmonary embolism (PE) is a critical condition requiring rapid diagnosis to reduce mortality. Extracting PE diagnoses from radiology reports manually is time-consuming, highlighting the need for automated solutions. Advances in natural language processing, especially transformer models like GPT-4o, offer promising tools to improve diagnostic accuracy and workflow efficiency in clinical settings. Objective: This study aimed to develop an automatic extraction system using GPT-4o to extract PE diagnoses from radiology report impressions, enhancing clinical decision-making and workflow efficiency. Methods: In total, 2 approaches were developed and evaluated: a fine-tuned Clinical Longformer as a baseline model and a GPT-4o-based extractor. Clinical Longformer, an encoder-only model, was chosen for its robustness in text classification tasks, particularly on smaller scales. GPT-4o, a decoder-only instruction-following LLM, was selected for its advanced language understanding capabilities. The study aimed to evaluate GPT-4o's ability to perform text classification compared to the baseline Clinical Longformer. The Clinical Longformer was trained on a dataset of 1000 radiology report impressions and validated on a separate set of 200 samples, while the GPT-4o extractor was validated using the same 200-sample set. Postdeployment performance was further assessed on an additional 200 operational records to evaluate model efficacy in a real-world setting. Results: GPT-4o outperformed the Clinical Longformer in 2 of the metrics, achieving a sensitivity of 1.0 (95\% CI 1.0-1.0; Wilcoxon test, P<.001) and an F1-score of 0.975 (95\% CI 0.9495-0.9947; Wilcoxon test, P<.001) across the validation dataset. Postdeployment evaluations also showed strong performance of the deployed GPT-4o model with a sensitivity of 1.0 (95\% CI 1.0-1.0), a specificity of 0.94 (95\% CI 0.8913-0.9804), and an F1-score of 0.97 (95\% CI 0.9479-0.9908). This high level of accuracy supports a reduction in manual review, streamlining clinical workflows and improving diagnostic precision. Conclusions: The GPT-4o model provides an effective solution for the automatic extraction of PE diagnoses from radiology reports, offering a reliable tool that aids timely and accurate clinical decision-making. This approach has the potential to significantly improve patient outcomes by expediting diagnosis and treatment pathways for critical conditions like PE. ", doi="10.2196/67706", url="https://medinform.jmir.org/2025/1/e67706" } @Article{info:doi/10.2196/65547, author="Hu, Danqing and Zhang, Shanyuan and Liu, Qing and Zhu, Xiaofeng and Liu, Bing", title="Large Language Models in Summarizing Radiology Report Impressions for Lung Cancer in Chinese: Evaluation Study", journal="J Med Internet Res", year="2025", month="Apr", day="3", volume="27", pages="e65547", keywords="large language model", keywords="impression summarization", keywords="radiology report", keywords="radiology", keywords="evaluation study", keywords="ChatGPT", keywords="natural language processing", keywords="ultrasound", keywords="radiologist", keywords="thoracic surgeons", abstract="Background: Large language models (LLMs), such as ChatGPT, have demonstrated impressive capabilities in various natural language processing tasks, particularly in text generation. However, their effectiveness in summarizing radiology report impressions remains uncertain. Objective: This study aims to evaluate the capability of nine LLMs, that is, Tongyi Qianwen, ERNIE Bot, ChatGPT, Bard, Claude, Baichuan, ChatGLM, HuatuoGPT, and ChatGLM-Med, in summarizing Chinese radiology report impressions for lung cancer. Methods: We collected 100 Chinese computed tomography (CT), positron emission tomography (PET)--CT, and ultrasound (US) reports each from Peking University Cancer Hospital and Institute. All these reports were from patients with suspected or confirmed lung cancer. Using these reports, we created zero-shot, one-shot, and three-shot prompts with or without complete example reports as inputs to generate impressions. We used both automatic quantitative evaluation metrics and five human evaluation metrics (completeness, correctness, conciseness, verisimilitude, and replaceability) to assess the generated impressions. Two thoracic surgeons (SZ and BL) and one radiologist (QL) compared the generated impressions with reference impressions, scoring them according to the five human evaluation metrics. Results: In the automatic quantitative evaluation, ERNIE Bot, Tongyi Qianwen, and Claude demonstrated the best overall performance in generating impressions for CT, PET-CT, and US reports, respectively. In the human semantic evaluation, ERNIE Bot outperformed the other LLMs in terms of conciseness, verisimilitude, and replaceability on CT impression generation, while its completeness and correctness scores were comparable to those of other LLMs. Tongyi Qianwen excelled in PET-CT impression generation, with the highest scores for correctness, conciseness, verisimilitude, and replaceability. Claude achieved the best conciseness, verisimilitude, and replaceability scores on US impression generation, and its completeness and correctness scores are close to the best results obtained by other LLMs. The generated impressions were generally complete and correct but lacked conciseness and verisimilitude. Although one-shot and few-shot prompts improved conciseness and verisimilitude, clinicians noted a significant gap between the generated impressions and those written by radiologists. Conclusions: Current LLMs can produce radiology impressions with high completeness and correctness but fall short in conciseness and verisimilitude, indicating they cannot yet fully replace impressions written by radiologists. ", doi="10.2196/65547", url="https://www.jmir.org/2025/1/e65547" } @Article{info:doi/10.2196/53567, author="Xu, He-Li and Gong, Ting-Ting and Song, Xin-Jian and Chen, Qian and Bao, Qi and Yao, Wei and Xie, Meng-Meng and Li, Chen and Grzegorzek, Marcin and Shi, Yu and Sun, Hong-Zan and Li, Xiao-Han and Zhao, Yu-Hong and Gao, Song and Wu, Qi-Jun", title="Artificial Intelligence Performance in Image-Based Cancer Identification: Umbrella Review of Systematic Reviews", journal="J Med Internet Res", year="2025", month="Apr", day="1", volume="27", pages="e53567", keywords="artificial intelligence", keywords="biomedical imaging", keywords="cancer diagnosis", keywords="meta-analysis", keywords="systematic review", keywords="umbrella review", abstract="Background: Artificial intelligence (AI) has the potential to transform cancer diagnosis, ultimately leading to better patient outcomes. Objective: We performed an umbrella review to summarize and critically evaluate the evidence for the AI-based imaging diagnosis of cancers. Methods: PubMed, Embase, Web of Science, Cochrane, and IEEE databases were searched for relevant systematic reviews from inception to June 19, 2024. Two independent investigators abstracted data and assessed the quality of evidence, using the Joanna Briggs Institute (JBI) Critical Appraisal Checklist for Systematic Reviews and Research Syntheses. We further assessed the quality of evidence in each meta-analysis by applying the Grading of Recommendations, Assessment, Development, and Evaluation (GRADE) criteria. Diagnostic performance data were synthesized narratively. Results: In a comprehensive analysis of 158 included studies evaluating the performance of AI algorithms in noninvasive imaging diagnosis across 8 major human system cancers, the accuracy of the classifiers for central nervous system cancers varied widely (ranging from 48\% to 100\%). Similarities were observed in the diagnostic performance for cancers of the head and neck, respiratory system, digestive system, urinary system, female-related systems, skin, and other sites. Most meta-analyses demonstrated positive summary performance. For instance, 9 reviews meta-analyzed sensitivity and specificity for esophageal cancer, showing ranges of 90\%-95\% and 80\%-93.8\%, respectively. In the case of breast cancer detection, 8 reviews calculated the pooled sensitivity and specificity within the ranges of 75.4\%-92\% and 83\%-90.6\%, respectively. Four meta-analyses reported the ranges of sensitivity and specificity in ovarian cancer, and both were 75\%-94\%. Notably, in lung cancer, the pooled specificity was relatively low, primarily distributed between 65\% and 80\%. Furthermore, 80.4\% (127/158) of the included studies were of high quality according to the JBI Critical Appraisal Checklist, with the remaining studies classified as medium quality. The GRADE assessment indicated that the overall quality of the evidence was moderate to low. Conclusions: Although AI shows great potential for achieving accelerated, accurate, and more objective diagnoses of multiple cancers, there are still hurdles to overcome before its implementation in clinical settings. The present findings highlight that a concerted effort from the research community, clinicians, and policymakers is required to overcome existing hurdles and translate this potential into improved patient outcomes and health care delivery. Trial Registration: PROSPERO CRD42022364278; https://www.crd.york.ac.uk/PROSPERO/view/CRD42022364278 ", doi="10.2196/53567", url="https://www.jmir.org/2025/1/e53567" } @Article{info:doi/10.2196/69672, author="Wu, Tong and Wang, Yuting and Cui, Xiaoli and Xue, Peng and Qiao, Youlin", title="AI-Based Identification Method for Cervical Transformation Zone Within Digital Colposcopy: Development and Multicenter Validation Study", journal="JMIR Cancer", year="2025", month="Mar", day="31", volume="11", pages="e69672", keywords="artificial intelligence", keywords="AI", keywords="cervical cancer screening", keywords="transformation zone", keywords="diagnosis and early treatment", keywords="lightweight neural network", abstract="Background: In low- and middle-income countries, cervical cancer remains a leading cause of death and morbidity for women. Early detection and treatment of precancerous lesions are critical in cervical cancer prevention, and colposcopy is a primary diagnostic tool for identifying cervical lesions and guiding biopsies. The transformation zone (TZ) is where a stratified squamous epithelium develops from the metaplasia of simple columnar epithelium and is the most common site of precancerous lesions. However, inexperienced colposcopists may find it challenging to accurately identify the type and location of the TZ during a colposcopy examination. Objective: This study aims to present an artificial intelligence (AI) method for identifying the TZ to enhance colposcopy examination and evaluate its potential clinical application. Methods: The study retrospectively collected data from 3616 women who underwent colposcopy at 6 tertiary hospitals in China between 2019 and 2021. A dataset from 4 hospitals was collected for model conduction. An independent dataset was collected from the other 2 geographic hospitals to validate model performance. There is no overlap between the training and validation datasets. Anonymized digital records, including each colposcopy image, baseline clinical characteristics, colposcopic findings, and pathological outcomes, were collected. The classification model was proposed as a lightweight neural network with multiscale feature enhancement capabilities and designed to classify the 3 types of TZ. The pretrained FastSAM model was first implemented to identify the location of the new squamocolumnar junction for segmenting the TZ. Overall accuracy, average precision, and recall were evaluated for the classification and segmentation models. The classification performance on the external validation was assessed by sensitivity and specificity. Results: The optimal TZ classification model performed with 83.97\% classification accuracy on the test set, which achieved average precision of 91.84\%, 89.06\%, and 95.62\% for types 1, 2, and 3, respectively. The recall and mean average precision of the TZ segmentation model were 0.78 and 0.75, respectively. The proposed model demonstrated outstanding performance in predicting 3 types of the TZ, achieving the sensitivity with 95\% CIs for TZ1, TZ2, and TZ3 of 0.78 (0.74-0.81), 0.81 (0.78-0.82), and 0.8 (0.74-0.87), respectively, with specificity with 95\% CIs of 0.94 (0.92-0.96), 0.83 (0.81-0.86), and 0.91 (0.89-0.92), based on a comprehensive external dataset of 1335 cases from 2 of the 6 hospitals. Conclusions: Our proposed AI-based identification system classified the type of cervical TZs and delineated their location on multicenter, colposcopic, high-resolution images. The findings of this study have shown its potential to predict TZ types and specific regions accurately. It was developed as a valuable assistant to encourage precise colposcopic examination in clinical practice. ", doi="10.2196/69672", url="https://cancer.jmir.org/2025/1/e69672" } @Article{info:doi/10.2196/60887, author="Bastiaansen, P. Wietske A. and Klein, Stefan and Hojeij, Batoul and Rubini, Eleonora and Koning, J. Anton H. and Niessen, Wiro and Steegers-Theunissen, M. R{\'e}gine P. and Rousian, Melek", title="Automatic Human Embryo Volume Measurement in First Trimester Ultrasound From the Rotterdam Periconception Cohort: Quantitative and Qualitative Evaluation of Artificial Intelligence", journal="J Med Internet Res", year="2025", month="Mar", day="31", volume="27", pages="e60887", keywords="first trimester, artificial intelligence, embryo, ultrasound, biometry", keywords="US", keywords="Rotterdam", keywords="The Netherlands", keywords="Cohort", keywords="quantitative", keywords="qualitative", keywords="evaluation", keywords="noninvasive", keywords="pregnancy", keywords="embryonic growth", keywords="algorithm", keywords="embryonic volume", keywords="monitoring", keywords="development", abstract="Background: Noninvasive volumetric measurements during the first trimester of pregnancy provide unique insight into human embryonic growth and development. However, current methods, such as semiautomatic (eg, virtual reality [VR]) or manual segmentation (eg, VOCAL) are not used in routine care due to their time-consuming nature, requirement for specialized training, and introduction of inter- and intrarater variability. Objective: This study aimed to address the challenges of manual and semiautomatic measurements, our objective is to develop an automatic artificial intelligence (AI) algorithm to segment the region of interest and measure embryonic volume (EV) and head volume (HV) during the first trimester of pregnancy. Methods: We used 3D ultrasound datasets from the Rotterdam Periconception Cohort, collected between 7 and 11 weeks of gestational age. We measured the EV in gestational weeks 7, 9 and 11, and the HV in weeks 9 and 11. To develop the AI algorithms for measuring EV and HV, we used nnU-net, a state-of-the-art segmentation algorithm that is publicly available. We tested the algorithms on 164 (EV) and 92 (HV) datasets, both acquired before 2020. The AI algorithm's generalization to data acquired in the future was evaluated by testing on 116 (EV) and 58 (HV) datasets from 2020. The performance of the model was assessed using the intraclass correlation coefficient (ICC) between the volume obtained using AI and using VR. In addition, 2 experts qualitatively rated both VR and AI segmentations for the EV and HV. Results: We found that segmentation of both the EV and HV using AI took around a minute additionally, rating took another minute, hence in total, volume measurement took 2 minutes per ultrasound dataset, while experienced raters needed 5-10 minutes using a VR tool. For both the EV and HV, we found an ICC of 0.998 on the test set acquired before 2020 and an ICC of 0.996 (EV) and 0.997 (HV) for data acquired in 2020. During qualitative rating for the EV, a comparable proportion (AI: 42\%, VR: 38\%) were rated as excellent; however, we found that major errors were more common with the AI algorithm, as it more frequently missed limbs. For the HV, the AI segmentations were rated as excellent in 79\% of cases, compared with only 17\% for VR. Conclusions: We developed 2 fully automatic AI algorithms to accurately measure the EV and HV in the first trimester on 3D ultrasound data. In depth qualitative analysis revealed that the quality of the measurement for AI and VR were similar. Since automatic volumetric assessment now only takes a couple of minutes, the use of these measurements in pregnancy for monitoring growth and development during this crucial period, becomes feasible, which may lead to better screening, diagnostics, and treatment of developmental disorders in pregnancy. ", doi="10.2196/60887", url="https://www.jmir.org/2025/1/e60887" } @Article{info:doi/10.2196/62774, author="Lei, Changbin and Jiang, Yan and Xu, Ke and Liu, Shanshan and Cao, Hua and Wang, Cong", title="Convolutional Neural Network Models for Visual Classification of Pressure Ulcer Stages: Cross-Sectional Study", journal="JMIR Med Inform", year="2025", month="Mar", day="25", volume="13", pages="e62774", keywords="pressure ulcer", keywords="deep learning", keywords="artificial intelligence", keywords="neural network", keywords="CNN", keywords="machine learning", keywords="image", keywords="imaging", keywords="classification", keywords="ulcer", keywords="sore", keywords="pressure", keywords="wound", keywords="skin", abstract="Background: Pressure injuries (PIs) pose a negative health impact and a substantial economic burden on patients and society. Accurate staging is crucial for treating PIs. Owing to the diversity in the clinical manifestations of PIs and the lack of objective biochemical and pathological examinations, accurate staging of PIs is a major challenge. The deep learning algorithm, which uses convolutional neural networks (CNNs), has demonstrated exceptional classification performance in the intricate domain of skin diseases and wounds and has the potential to improve the staging accuracy of PIs. Objective: We explored the potential of applying AlexNet, VGGNet16, ResNet18, and DenseNet121 to PI staging, aiming to provide an effective tool to assist in staging. Methods: PI images from patients---including those with stage I, stage II, stage III, stage IV, unstageable, and suspected deep tissue injury (SDTI)---were collected at a tertiary hospital in China. Additionally, we augmented the PI data by cropping and flipping the PI images 9 times. The collected images were then divided into training, validation, and test sets at a ratio of 8:1:1. We subsequently trained them via AlexNet, VGGNet16, ResNet18, and DenseNet121 to develop staging models. Results: We collected 853 raw PI images with the following distributions across stages: stage I (n=148), stage II (n=121), stage III (n=216), stage IV (n=110), unstageable (n=128), and SDTI (n=130). A total of 7677 images were obtained after data augmentation. Among all the CNN models, DenseNet121 demonstrated the highest overall accuracy of 93.71\%. The classification performances of AlexNet, VGGNet16, and ResNet18 exhibited overall accuracies of 87.74\%, 82.42\%, and 92.42\%, respectively. Conclusions: The CNN-based models demonstrated strong classification ability for PI images, which might promote highly efficient, intelligent PI staging methods. In the future, the models can be compared with nurses with different levels of experience to further verify the clinical application effect. ", doi="10.2196/62774", url="https://medinform.jmir.org/2025/1/e62774" } @Article{info:doi/10.2196/63686, author="Imani, Mahdi and Borda, G. Miguel and Vogrin, Sara and Meijering, Erik and Aarsland, Dag and Duque, Gustavo", title="Using Deep Learning to Perform Automatic Quantitative Measurement of Masseter and Tongue Muscles in Persons With Dementia: Cross-Sectional Study", journal="JMIR Aging", year="2025", month="Mar", day="19", volume="8", pages="e63686", keywords="artificial intelligence", keywords="machine learning", keywords="sarcopenia", keywords="dementia", keywords="masseter muscle", keywords="tongue muscle", keywords="deep learning", keywords="head", keywords="tongue", keywords="face", keywords="magnetic resonance imaging", keywords="MRI", keywords="image", keywords="imaging", keywords="muscle", keywords="muscles", keywords="neural network", keywords="aging", keywords="gerontology", keywords="older adults", keywords="geriatrics", keywords="older adult health", abstract="Background: Sarcopenia (loss of muscle mass and strength) increases adverse outcomes risk and contributes to cognitive decline in older adults. Accurate methods to quantify muscle mass and predict adverse outcomes, particularly in older persons with dementia, are still lacking. Objective: This study's main objective was to assess the feasibility of using deep learning techniques for segmentation and quantification of musculoskeletal tissues in magnetic resonance imaging (MRI) scans of the head in patients with neurocognitive disorders. This study aimed to pave the way for using automated techniques for opportunistic detection of sarcopenia in patients with neurocognitive disorder. Methods: In a cross-sectional analysis of 53 participants, we used 7 U-Net-like deep learning models to segment 5 different tissues in head MRI images and used the Dice similarity coefficient and average symmetric surface distance as main assessment techniques to compare results. We also analyzed the relationship between BMI and muscle and fat volumes. Results: Our framework accurately quantified masseter and subcutaneous fat on the left and right sides of the head and tongue muscle (mean Dice similarity coefficient 92.4\%). A significant correlation exists between the area and volume of tongue muscle, left masseter muscle, and BMI. Conclusions: Our study demonstrates the successful application of a deep learning model to quantify muscle volumes in head MRI in patients with neurocognitive disorders. This is a promising first step toward clinically applicable artificial intelligence and deep learning methods for estimating masseter and tongue muscle and predicting adverse outcomes in this population. ", doi="10.2196/63686", url="https://aging.jmir.org/2025/1/e63686" } @Article{info:doi/10.2196/60431, author="Kammies, Chamandra and Archer, Elize and Engel-Hills, Penelope and Volschenk, Mariette", title="Exploring Curriculum Considerations to Prepare Future Radiographers for an AI-Assisted Health Care Environment: Protocol for Scoping Review", journal="JMIR Res Protoc", year="2025", month="Mar", day="6", volume="14", pages="e60431", keywords="artificial intelligence", keywords="machine learning", keywords="radiography", keywords="education", keywords="scoping review", abstract="Background: The use of artificial intelligence (AI) technologies in radiography practice is increasing. As this advanced technology becomes more embedded in radiography systems and clinical practice, the role of radiographers will evolve. In the context of these anticipated changes, it may be reasonable to expect modifications to the competencies and educational requirements of current and future practitioners to ensure successful AI adoption. Objective: The aim of this scoping review is to explore and synthesize the literature on the adjustments needed in the radiography curriculum to prepare radiography students for the demands of AI-assisted health care environments. Methods: Using the Joanna Briggs Institute methodology, an initial search was run in Scopus to determine whether the search strategy that was developed with a library specialist would capture the relevant literature by screening the title and abstract of the first 50 articles. Additional search terms identified in the articles were added to the search strategy. Next, EBSCOhost, PubMed, and Web of Science databases were searched. In total, 2 reviewers will independently review the title, abstract, and full-text articles according to the predefined inclusion and exclusion criteria, with conflicts resolved by a third reviewer. Results: The search results will be reported using the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) checklist. The final scoping review will present the data analysis as findings in tabular form and through narrative descriptions. The final database searches were completed in October 2024 and yielded 2224 records. Title and abstract screening of 1930 articles is underway after removing 294 duplicates. The scoping review is expected to be finalized by the end of March 2025. Conclusions: A scoping review aims to systematically map the evidence on the adjustments needed in the radiography curriculum to prepare radiography students for the integration of AI technologies in the health care environment. It is relevant to map the evidence because increased integration of AI-based technologies in clinical practice has been noted and changes in practice must be underpinned by appropriate education and training. The findings in this study will provide a better understanding of how the radiography curriculum should adapt to meet the educational needs of current and future radiographers to ensure competent and safe practice in response to AI technologies. Trial Registration: Open Science Framework 3nx2a; https://osf.io/3nx2a International Registered Report Identifier (IRRID): PRR1-10.2196/60431 ", doi="10.2196/60431", url="https://www.researchprotocols.org/2025/1/e60431", url="http://www.ncbi.nlm.nih.gov/pubmed/40053777" } @Article{info:doi/10.2196/66622, author="Zhang, Subo and Zhu, Zhitao and Yu, Zhenfei and Sun, Haifeng and Sun, Yi and Huang, Hai and Xu, Lei and Wan, Jinxin", title="Effectiveness of AI for Enhancing Computed Tomography Image Quality and Radiation Protection in Radiology: Systematic Review and Meta-Analysis", journal="J Med Internet Res", year="2025", month="Feb", day="27", volume="27", pages="e66622", keywords="artificial intelligence", keywords="computed tomography", keywords="image quality", keywords="radiation protection", keywords="meta-analysis", abstract="Background: Artificial intelligence (AI) presents a promising approach to balancing high image quality with reduced radiation exposure in computed tomography (CT) imaging. Objective: This meta-analysis evaluates the effectiveness of AI in enhancing CT image quality and lowering radiation doses. Methods: A thorough literature search was performed across several databases, including PubMed, Embase, Web of Science, Science Direct, and Cochrane Library, with the final update in 2024. We included studies that compared AI-based interventions to conventional CT techniques. The quality of these studies was assessed using the Newcastle-Ottawa Scale. Random effect models were used to pool results, and heterogeneity was measured using the I{\texttwosuperior} statistic. Primary outcomes included image quality, CT dose index, and diagnostic accuracy. Results: This meta-analysis incorporated 5 clinical validation studies published between 2022 and 2024, totaling 929 participants. Results indicated that AI-based interventions significantly improved image quality (mean difference 0.70, 95\% CI 0.43-0.96; P<.001) and showed a positive trend in reducing the CT dose index, though not statistically significant (mean difference 0.47, 95\% CI --0.21 to 1.15; P=.18). AI also enhanced image analysis efficiency (odds ratio 1.57, 95\% CI 1.08-2.27; P=.02) and demonstrated high accuracy and sensitivity in detecting intracranial aneurysms, with low-dose CT using AI reconstruction showing noninferiority for liver lesion detection. Conclusions: The findings suggest that AI-based interventions can significantly enhance CT imaging practices by improving image quality and potentially reducing radiation doses, which may lead to better diagnostic accuracy and patient safety. However, these results should be interpreted with caution due to the limited number of studies and the variability in AI algorithms. Further research is needed to clarify AI's impact on radiation reduction and to establish clinical standards. ", doi="10.2196/66622", url="https://www.jmir.org/2025/1/e66622", url="http://www.ncbi.nlm.nih.gov/pubmed/40053787" } @Article{info:doi/10.2196/64204, author="Zhang, Yahan and Chun, Yi and Fu, Hongyuan and Jiao, Wen and Bao, Jizhang and Jiang, Tao and Cui, Longtao and Hu, Xiaojuan and Cui, Ji and Qiu, Xipeng and Tu, Liping and Xu, Jiatuo", title="A Risk Warning Model for Anemia Based on Facial Visible Light Reflectance Spectroscopy: Cross-Sectional Study", journal="JMIR Med Inform", year="2025", month="Feb", day="14", volume="13", pages="e64204", keywords="anemia", keywords="hemoglobin", keywords="spectroscopy", keywords="machine learning", keywords="risk warning model", keywords="Shapley additive explanation", abstract="Background: Anemia is a global public health issue causing symptoms such as fatigue, weakness, and cognitive decline. Furthermore, anemia is associated with various diseases and increases the risk of postoperative complications and mortality. Frequent invasive blood tests for diagnosis also pose additional discomfort and risks to patients. Objective: This study aims to assess the facial spectral characteristics of patients with anemia and to develop a predictive model for anemia risk using machine learning approaches. Methods: Between August 2022 and September 2023, we collected facial image data from 78 anemic patients who met the inclusion criteria from the Hematology Department of Shanghai Hospital of Traditional Chinese Medicine. Between March 2023 and September 2023, we collected data from 78 healthy adult participants from Shanghai Jiading Community Health Center and Shanghai Gaohang Community Health Center. A comprehensive statistical analysis was performed to evaluate differences in spectral characteristics between the anemic patients and healthy controls. Then, we used 10 different machine learning algorithms to create a predictive model for anemia. The least absolute shrinkage and selection operator was used to analyze the predictors. We integrated multiple machine learning classification models to identify the optimal model and developed Shapley additive explanations (SHAP) for personalized risk assessment. Results: The study identified significant differences in facial spectral features between anemic patients and healthy controls. The support vector machine classifier outperformed other classification models, achieving an accuracy of 0.875 (95\% CI 0.825-0.925) for distinguishing between the anemia and healthy control groups. In the SHAP interpretation of the model, forehead-570 nm, right cheek-520 nm, right zygomatic-570 nm, jaw-570 nm, and left cheek-610 nm were the features with the highest contributions. Conclusions: Facial spectral data demonstrated clinical significance in anemia diagnosis, and the early warning model for anemia risk constructed based on spectral information demonstrated a high accuracy rate. ", doi="10.2196/64204", url="https://medinform.jmir.org/2025/1/e64204" } @Article{info:doi/10.2196/48328, author="Kottlors, Jonathan and Hahnfeldt, Robert and G{\"o}rtz, Lukas and Iuga, Andra-Iza and Fervers, Philipp and Bremm, Johannes and Zopfs, David and Laukamp, R. Kai and Onur, A. Oezguer and Lennartz, Simon and Sch{\"o}nfeld, Michael and Maintz, David and Kabbasch, Christoph and Persigehl, Thorsten and Schlamann, Marc", title="Large Language Models--Supported Thrombectomy Decision-Making in Acute Ischemic Stroke Based on Radiology Reports: Feasibility Qualitative Study", journal="J Med Internet Res", year="2025", month="Feb", day="13", volume="27", pages="e48328", keywords="artificial intelligence", keywords="radiology", keywords="report", keywords="large language model", keywords="text-based augmented supporting system", keywords="mechanical thrombectomy", keywords="GPT", keywords="stroke", keywords="decision-making", keywords="thrombectomy", keywords="imaging", keywords="model", keywords="machine learning", keywords="ischemia", abstract="Background: The latest advancement of artificial intelligence (AI) is generative pretrained transformer large language models (LLMs). They have been trained on massive amounts of text, enabling humanlike and semantical responses to text-based inputs and requests. Foreshadowing numerous possible applications in various fields, the potential of such tools for medical data integration and clinical decision-making is not yet clear. Objective: In this study, we investigate the potential of LLMs in report-based medical decision-making on the example of acute ischemic stroke (AIS), where clinical and image-based information may indicate an immediate need for mechanical thrombectomy (MT). The purpose was to elucidate the feasibility of integrating radiology report data and other clinical information in the context of therapy decision-making using LLMs. Methods: A hundred patients with AIS were retrospectively included, for which 50\% (50/100) was indicated for MT, whereas the other 50\% (50/100) was not. The LLM was provided with the computed tomography report, information on neurological symptoms and onset, and patients' age. The performance of the AI decision-making model was compared with an expert consensus regarding the binary determination of MT indication, for which sensitivity, specificity, and accuracy were calculated. Results: The AI model had an overall accuracy of 88\%, with a specificity of 96\% and a sensitivity of 80\%. The area under the curve for the report-based MT decision was 0.92. Conclusions: The LLM achieved promising accuracy in determining the eligibility of patients with AIS for MT based on radiology reports and clinical information. Our results underscore the potential of LLMs for radiological and medical data integration. This investigation should serve as a stimulus for further clinical applications of LLMs, in which this AI should be used as an augmented supporting system for human decision-making. ", doi="10.2196/48328", url="https://www.jmir.org/2025/1/e48328", url="http://www.ncbi.nlm.nih.gov/pubmed/39946168" } @Article{info:doi/10.2196/55825, author="Bhak, Youngmin and Lee, Ho Yu and Kim, Joonhyung and Lee, Kiwon and Lee, Daehwan and Jang, Chan Eun and Jang, Eunjeong and Lee, Seungkyu Christopher and Kang, Seok Eun and Park, Sehee and Han, Wook Hyun and Nam, Min Sang", title="Diagnosis of Chronic Kidney Disease Using Retinal Imaging and Urine Dipstick Data: Multimodal Deep Learning Approach", journal="JMIR Med Inform", year="2025", month="Feb", day="7", volume="13", pages="e55825", keywords="multimodal deep learning", keywords="chronic kidney disease", keywords="fundus image", keywords="saliency map", keywords="urine dipstick", abstract="Background: Chronic kidney disease (CKD) is a prevalent condition with significant global health implications. Early detection and management are critical to prevent disease progression and complications. Deep learning (DL) models using retinal images have emerged as potential noninvasive screening tools for CKD, though their performance may be limited, especially in identifying individuals with proteinuria and in specific subgroups. Objective: We aim to evaluate the efficacy of integrating retinal images and urine dipstick data into DL models for enhanced CKD diagnosis. Methods: The 3 models were developed and validated: eGFR-RIDL (estimated glomerular filtration rate--retinal image deep learning), eGFR-UDLR (logistic regression using urine dipstick data), and eGFR-MMDL (multimodal deep learning combining retinal images and urine dipstick data). All models were trained to predict an eGFR<60 mL/min/1.73 m{\texttwosuperior}, a key indicator of CKD, calculated using the 2009 CKD-EPI (Chronic Kidney Disease Epidemiology Collaboration) equation. This study used a multicenter dataset of participants aged 20?79 years, including a development set (65,082 people) and an external validation set (58,284 people). Wide Residual Networks were used for DL, and saliency maps were used to visualize model attention. Sensitivity analyses assessed the impact of numerical variables. Results: eGFR-MMDL outperformed eGFR-RIDL in both the test and external validation sets, with area under the curves of 0.94 versus 0.90 and 0.88 versus 0.77 (P<.001 for both, DeLong test). eGFR-UDLR outperformed eGFR-RIDL and was comparable to eGFR-MMDL, particularly in the external validation. However, in the subgroup analysis, eGFR-MMDL showed improvement across all subgroups, while eGFR-UDLR demonstrated no such gains. This suggested that the enhanced performance of eGFR-MMDL was not due to urine data alone, but rather from the synergistic integration of both retinal images and urine data. The eGFR-MMDL model demonstrated the best performance in individuals younger than 65 years or those with proteinuria. Age and proteinuria were identified as critical factors influencing model performance. Saliency maps indicated that urine data and retinal images provide complementary information, with urine offering insights into retinal abnormalities and retinal images, particularly the arcade vessels, being key for predicting kidney function. Conclusions: The MMDL model integrating retinal images and urine dipstick data show significant promise for noninvasive CKD screening, outperforming the retinal image--only model. However, routine blood tests are still recommended for individuals aged 65 years and older due to the model's limited performance in this age group. ", doi="10.2196/55825", url="https://medinform.jmir.org/2025/1/e55825" } @Article{info:doi/10.2196/62647, author="Mubonanyikuzo, Vivens and Yan, Hongjie and Komolafe, Emmanuel Temitope and Zhou, Liang and Wu, Tao and Wang, Nizhuan", title="Detection of Alzheimer Disease in Neuroimages Using Vision Transformers: Systematic Review and Meta-Analysis", journal="J Med Internet Res", year="2025", month="Feb", day="5", volume="27", pages="e62647", keywords="diagnostic accuracy", keywords="vision transformer", keywords="Alzheimer disease", keywords="detection", keywords="neuroimaging, meta-analysis", keywords="neuroimaging", keywords="deep learning", keywords="medical database", keywords="diagnostic", keywords="clinical implementation", keywords="machine learning", keywords="magnetic resonance imaging", keywords="neural networks", abstract="Background: Alzheimer disease (AD) is a progressive condition characterized by cognitive decline and memory loss. Vision transformers (ViTs) are emerging as promising deep learning models in medical imaging, with potential applications in the detection and diagnosis of AD. Objective: This review systematically examines recent studies on the application of ViTs in detecting AD, evaluating the diagnostic accuracy and impact of network architecture on model performance. Methods: We conducted a systematic search across major medical databases, including China National Knowledge Infrastructure, CENTRAL (Cochrane Central Register of Controlled Trials), ScienceDirect, PubMed, Web of Science, and Scopus, covering publications from January 1, 2020, to March 1, 2024. A manual search was also performed to include relevant gray literature. The included papers used ViT models for AD detection versus healthy controls based on neuroimaging data, and the included studies used magnetic resonance imaging and positron emission tomography. Pooled diagnostic accuracy estimates, including sensitivity, specificity, likelihood ratios, and diagnostic odds ratios, were derived using random-effects models. Subgroup analyses comparing the diagnostic performance of different ViT network architectures were performed. Results: The meta-analysis, encompassing 11 studies with 95\% CIs and P values, demonstrated pooled diagnostic accuracy: sensitivity 0.925 (95\% CI 0.892-0.959; P<.01), specificity 0.957 (95\% CI 0.932-0.981; P<.01), positive likelihood ratio 21.84 (95\% CI 12.26-38.91; P<.01), and negative likelihood ratio 0.08 (95\% CI 0.05-0.14; P<.01). The area under the curve was notably high at 0.924. The findings highlight the potential of ViTs as effective tools for early and accurate AD diagnosis, offering insights for future neuroimaging-based diagnostic approaches. Conclusions: This systematic review provides valuable evidence for the utility of ViT models in distinguishing patients with AD from healthy controls, thereby contributing to advancements in neuroimaging-based diagnostic methodologies. Trial Registration: PROSPERO CRD42024584347; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=584347 ", doi="10.2196/62647", url="https://www.jmir.org/2025/1/e62647" } @Article{info:doi/10.2196/57723, author="Iratni, Maya and Abdullah, Amira and Aldhaheri, Mariam and Elharrouss, Omar and Abd-alrazaq, Alaa and Rustamov, Zahiriddin and Zaki, Nazar and Damseh, Rafat", title="Transformers for Neuroimage Segmentation: Scoping Review", journal="J Med Internet Res", year="2025", month="Jan", day="29", volume="27", pages="e57723", keywords="3D segmentation", keywords="brain tumor segmentation", keywords="deep learning", keywords="neuroimaging", keywords="transformer", abstract="Background: Neuroimaging segmentation is increasingly important for diagnosing and planning treatments for neurological diseases. Manual segmentation is time-consuming, apart from being prone to human error and variability. Transformers are a promising deep learning approach for automated medical image segmentation. Objective: This scoping review will synthesize current literature and assess the use of various transformer models for neuroimaging segmentation. Methods: A systematic search in major databases, including Scopus, IEEE Xplore, PubMed, and ACM Digital Library, was carried out for studies applying transformers to neuroimaging segmentation problems from 2019 through 2023. The inclusion criteria allow only for peer-reviewed journal papers and conference papers focused on transformer-based segmentation of human brain imaging data. Excluded are the studies dealing with nonneuroimaging data or raw brain signals and electroencephalogram data. Data extraction was performed to identify key study details, including image modalities, datasets, neurological conditions, transformer models, and evaluation metrics. Results were synthesized using a narrative approach. Results: Of the 1246 publications identified, 67 (5.38\%) met the inclusion criteria. Half of all included studies were published in 2022, and more than two-thirds used transformers for segmenting brain tumors. The most common imaging modality was magnetic resonance imaging (n=59, 88.06\%), while the most frequently used dataset was brain tumor segmentation dataset (n=39, 58.21\%). 3D transformer models (n=42, 62.69\%) were more prevalent than their 2D counterparts. The most developed were those of hybrid convolutional neural network-transformer architectures (n=57, 85.07\%), where the vision transformer is the most frequently used type of transformer (n=37, 55.22\%). The most frequent evaluation metric was the Dice score (n=63, 94.03\%). Studies generally reported increased segmentation accuracy and the ability to model both local and global features in brain images. Conclusions: This review represents the recent increase in the adoption of transformers for neuroimaging segmentation, particularly for brain tumor detection. Currently, hybrid convolutional neural network-transformer architectures achieve state-of-the-art performances on benchmark datasets over standalone models. Nevertheless, their applicability remains highly limited by high computational costs and potential overfitting on small datasets. The heavy reliance of the field on the brain tumor segmentation dataset hints at the use of a more diverse set of datasets to validate the performances of models on a variety of neurological diseases. Further research is needed to define the optimal transformer architectures and training methods for clinical applications. Continuing development may make transformers the state-of-the-art for fast, accurate, and reliable brain magnetic resonance imaging segmentation, which could lead to improved clinical tools for diagnosing and evaluating neurological disorders. ", doi="10.2196/57723", url="https://www.jmir.org/2025/1/e57723" } @Article{info:doi/10.2196/64649, author="Liu, Weiqi and Wu, You and Zheng, Zhuozhao and Bittle, Mark and Yu, Wei and Kharrazi, Hadi", title="Enhancing Diagnostic Accuracy of Lung Nodules in Chest Computed Tomography Using Artificial Intelligence: Retrospective Analysis", journal="J Med Internet Res", year="2025", month="Jan", day="27", volume="27", pages="e64649", keywords="artificial intelligence", keywords="diagnostic accuracy", keywords="lung nodule", keywords="radiology", keywords="AI system", abstract="Background: Uncertainty in the diagnosis of lung nodules is a challenge for both patients and physicians. Artificial intelligence (AI) systems are increasingly being integrated into medical imaging to assist diagnostic procedures. However, the accuracy of AI systems in identifying and measuring lung nodules on chest computed tomography (CT) scans remains unclear, which requires further evaluation. Objective: This study aimed to evaluate the impact of an AI-assisted diagnostic system on the diagnostic efficiency of radiologists. It specifically examined the report modification rates and missed and misdiagnosed rates of junior radiologists with and without AI assistance. Methods: We obtained effective data from 12,889 patients in 2 tertiary hospitals in Beijing before and after the implementation of the AI system, covering the period from April 2018 to March 2022. Diagnostic reports written by both junior and senior radiologists were included in each case. Using reports by senior radiologists as a reference, we compared the modification rates of reports written by junior radiologists with and without AI assistance. We further evaluated alterations in lung nodule detection capability over 3 years after the integration of the AI system. Evaluation metrics of this study include lung nodule detection rate, accuracy, false negative rate, false positive rate, and positive predictive value. The statistical analyses included descriptive statistics and chi-square, Cochran-Armitage, and Mann-Kendall tests. Results: The AI system was implemented in Beijing Anzhen Hospital (Hospital A) in January 2019 and Tsinghua Changgung Hospital (Hospital C) in June 2021. The modification rate of diagnostic reports in the detection of lung nodules increased from 4.73\% to 7.23\% ($\chi$21=12.15; P<.001) at Hospital A. In terms of lung nodule detection rates postimplementation, Hospital C increased from 46.19\% to 53.45\% ($\chi$21=25.48; P<.001) and Hospital A increased from 39.29\% to 55.22\% ($\chi$21=122.55; P<.001). At Hospital A, the false negative rate decreased from 8.4\% to 5.16\% ($\chi$21=9.85; P=.002), while the false positive rate increased from 2.36\% to 9.77\% ($\chi$21=53.48; P<.001). The detection accuracy demonstrated a decrease from 93.33\% to 92.23\% for Hospital A and from 95.27\% to 92.77\% for Hospital C. Regarding the changes in lung nodule detection capability over a 3-year period following the integration of the AI system, the detection rates for lung nodules exhibited a modest increase from 54.6\% to 55.84\%, while the overall accuracy demonstrated a slight improvement from 92.79\% to 93.92\%. Conclusions: The AI system enhanced lung nodule detection, offering the possibility of earlier disease identification and timely intervention. Nevertheless, the initial reduction in accuracy underscores the need for standardized diagnostic criteria and comprehensive training for radiologists to maximize the effectiveness of AI-enabled diagnostic systems. ", doi="10.2196/64649", url="https://www.jmir.org/2025/1/e64649" } @Article{info:doi/10.2196/57275, author="Yamagishi, Yosuke and Nakamura, Yuta and Hanaoka, Shouhei and Abe, Osamu", title="Large Language Model Approach for Zero-Shot Information Extraction and Clustering of Japanese Radiology Reports: Algorithm Development and Validation", journal="JMIR Cancer", year="2025", month="Jan", day="23", volume="11", pages="e57275", keywords="radiology reports", keywords="clustering", keywords="large language model", keywords="natural language processing", keywords="information extraction", keywords="lung cancer", keywords="machine learning", abstract="Background: The application of natural language processing in medicine has increased significantly, including tasks such as information extraction and classification. Natural language processing plays a crucial role in structuring free-form radiology reports, facilitating the interpretation of textual content, and enhancing data utility through clustering techniques. Clustering allows for the identification of similar lesions and disease patterns across a broad dataset, making it useful for aggregating information and discovering new insights in medical imaging. However, most publicly available medical datasets are in English, with limited resources in other languages. This scarcity poses a challenge for development of models geared toward non-English downstream tasks. Objective: This study aimed to develop and evaluate an algorithm that uses large language models (LLMs) to extract information from Japanese lung cancer radiology reports and perform clustering analysis. The effectiveness of this approach was assessed and compared with previous supervised methods. Methods: This study employed the MedTxt-RR dataset, comprising 135 Japanese radiology reports from 9 radiologists who interpreted the computed tomography images of 15 lung cancer patients obtained from Radiopaedia. Previously used in the NTCIR-16 (NII Testbeds and Community for Information Access Research) shared task for clustering performance competition, this dataset was ideal for comparing the clustering ability of our algorithm with those of previous methods. The dataset was split into 8 cases for development and 7 for testing, respectively. The study's approach involved using the LLM to extract information pertinent to lung cancer findings and transforming it into numeric features for clustering, using the K-means method. Performance was evaluated using 135 reports for information extraction accuracy and 63 test reports for clustering performance. This study focused on the accuracy of automated systems for extracting tumor size, location, and laterality from clinical reports. The clustering performance was evaluated using normalized mutual information, adjusted mutual information , and the Fowlkes-Mallows index for both the development and test data. Results: The tumor size was accurately identified in 99 out of 135 reports (73.3\%), with errors in 36 reports (26.7\%), primarily due to missing or incorrect size information. Tumor location and laterality were identified with greater accuracy in 112 out of 135 reports (83\%); however, 23 reports (17\%) contained errors mainly due to empty values or incorrect data. Clustering performance of the test data yielded an normalized mutual information of 0.6414, adjusted mutual information of 0.5598, and Fowlkes-Mallows index of 0.5354. The proposed method demonstrated superior performance across all evaluation metrics compared to previous methods. Conclusions: The unsupervised LLM approach surpassed the existing supervised methods in clustering Japanese radiology reports. These findings suggest that LLMs hold promise for extracting information from radiology reports and integrating it into disease-specific knowledge structures. ", doi="10.2196/57275", url="https://cancer.jmir.org/2025/1/e57275" } @Article{info:doi/10.2196/53928, author="Martinez, Stanford and Ramirez-Tamayo, Carolina and Akhter Faruqui, Hasib Syed and Clark, Kal and Alaeddini, Adel and Czarnek, Nicholas and Aggarwal, Aarushi and Emamzadeh, Sahra and Mock, R. Jeffrey and Golob, J. Edward", title="Discrimination of Radiologists' Experience Level Using Eye-Tracking Technology and Machine Learning: Case Study", journal="JMIR Form Res", year="2025", month="Jan", day="22", volume="9", pages="e53928", keywords="machine learning", keywords="eye-tracking", keywords="experience level determination", keywords="radiology education", keywords="search pattern feature extraction", keywords="search pattern", keywords="radiology", keywords="classification", keywords="gaze", keywords="fixation", keywords="education", keywords="experience", keywords="spatio-temporal", keywords="image", keywords="x-ray", keywords="eye movement", abstract="Background: Perception-related errors comprise most diagnostic mistakes in radiology. To mitigate this problem, radiologists use personalized and high-dimensional visual search strategies, otherwise known as search patterns. Qualitative descriptions of these search patterns, which involve the physician verbalizing or annotating the order he or she analyzes the image, can be unreliable due to discrepancies in what is reported versus the actual visual patterns. This discrepancy can interfere with quality improvement interventions and negatively impact patient care. Objective: The objective of this study is to provide an alternative method for distinguishing between radiologists by means of captured eye-tracking data such that the raw gaze (or processed fixation data) can be used to discriminate users based on subconscious behavior in visual inspection. Methods: We present a novel discretized feature encoding based on spatiotemporal binning of fixation data for efficient geometric alignment and temporal ordering of eye movement when reading chest x-rays. The encoded features of the eye-fixation data are used by machine learning classifiers to discriminate between faculty and trainee radiologists. A clinical trial case study was conducted using metrics such as the area under the curve, accuracy, F1-score, sensitivity, and specificity to evaluate the discriminability between the 2 groups regarding their level of experience. The classification performance was then compared with state-of-the-art methodologies. In addition, a repeatability experiment using a separate dataset, experimental protocol, and eye tracker was performed with 8 participants to evaluate the robustness of the proposed approach. Results: The numerical results from both experiments demonstrate that classifiers using the proposed feature encoding methods outperform the current state-of-the-art in differentiating between radiologists in terms of experience level. An average performance gain of 6.9\% is observed compared with traditional features while classifying experience levels of radiologists. This gain in accuracy is also substantial across different eye tracker--collected datasets, with improvements of 6.41\% using the Tobii eye tracker and 7.29\% using the EyeLink eye tracker. These results signify the potential impact of the proposed method for identifying radiologists' level of expertise and those who would benefit from additional training. Conclusions: The effectiveness of the proposed spatiotemporal discretization approach, validated across diverse datasets and various classification metrics, underscores its potential for objective evaluation, informing targeted interventions and training strategies in radiology. This research advances reliable assessment tools, addressing challenges in perception-related errors to enhance patient care outcomes. ", doi="10.2196/53928", url="https://formative.jmir.org/2025/1/e53928" } @Article{info:doi/10.2196/67378, author="Liu, Chaofeng and Liu, Yan and Yi, Chunyan and Xie, Tao and Tian, Jingjun and Deng, Peishen and Liu, Changyu and Shan, Yan and Dong, Hangyu and Xu, Yanhua", title="Application of a 3D Fusion Model to Evaluate the Efficacy of Clear Aligner Therapy in Malocclusion Patients: Prospective Observational Study", journal="J Med Internet Res", year="2025", month="Jan", day="15", volume="27", pages="e67378", keywords="clear aligners", keywords="CBCT", keywords="intraoral scanning", keywords="fusion model", keywords="artificial intelligence", keywords="efficacy evaluation", keywords="orthodontic treatment", abstract="Background: Investigating the safe range of orthodontic tooth movement is essential for maintaining oral and maxillofacial stability posttreatment. Although clear aligners rely on pretreatment digital models, their effect on periodontal hard tissues remains uncertain. By integrating cone beam computed tomography--derived cervical and root data with crown data from digital intraoral scans, a 3D fusion model may enhance precision and safety. Objective: This study aims to construct a 3D fusion model based on artificial intelligence software that matches cone beam computed tomography and intraoral scanning data using the Andrews' Six Element standard. The model will be used to assess the 3D effects of clear aligners on tooth movement, to provide a reference for the design of pretreatment target positions. Methods: Between May 2022 and May 2024, a total of 320 patients who completed clear aligner therapy at our institution were screened; 136 patients (aged 13-35 years, fully erupted permanent dentition and periodontal pocket depth <3 mm) met the criteria. Baseline (``simulation'') and posttreatment (``fusion'') models were compared. Outcomes included upper core discrepancy (UCD), upper incisors anteroposterior discrepancy (UAP), lower Spee curve deep discrepancy (LSD), upper anterior teeth width discrepancy (UAW), upper canine width discrepancy (UCW), upper molar width discrepancy (UMW), and total scores. Subanalyses examined sex, age stage (adolescent vs adult), and treatment method (extraction vs nonextraction). Results: The study was funded in May 2022, with data collection beginning the same month and continuing until May 2024. Of 320 initial participants, 136 met the inclusion criteria. Data analysis is ongoing, and final results are expected by late 2024. Among the 136 participants, 90 (66\%) were female, 46 (34\%) were male, 64 (47\%) were adolescents, 72 (53\%) were adults, 38 (28\%) underwent extraction, and 98 (72\%) did not. Total scores did not differ significantly by sex (mean difference 0.01, 95\% CI --0.13 to 0.15; P=.85), age stage (mean difference 0.03, 95\% CI --0.10 to 0.17; P=.60), or treatment method (mean difference 0.07, 95\% CI --0.22 to 0.07; P=.32). No significant differences were found in UCD (mean difference 0.001, 95\% CI --0.02 to 0.01; P=.90) or UAP (mean difference 0.01, 95\% CI --0.03 to 0.00; P=.06) by treatment method. However, adolescents exhibited smaller differences in UCD, UAW, UCW, and UMW yet larger differences in UAP and LSD (df=134; P<.001). Extraction cases showed smaller LSD, UAW, and UCW but larger UMW differences compared with nonextraction (df=134; P<.001). Conclusions: The 3D fusion model provides a reliable clinical reference for target position design and treatment outcome evaluation in clear aligner systems. The construction and application of a 3D fusion model in clear aligner orthodontics represent a significant leap forward, offering substantial clinical benefits while establishing a new standard for precision, personalization, and evidence-based treatment planning in the field. Trial Registration: Chinese Clinical Trial Registry ChiCTR2400094304, https://www.chictr.org.cn/hvshowproject.html?id=266090\&v=1.0 ", doi="10.2196/67378", url="https://www.jmir.org/2025/1/e67378" } @Article{info:doi/10.2196/67621, author="Chetla, Nitin and Tandon, Mihir and Chang, Joseph and Sukhija, Kunal and Patel, Romil and Sanchez, Ramon", title="Evaluating ChatGPT's Efficacy in Pediatric Pneumonia Detection From Chest X-Rays: Comparative Analysis of Specialized AI Models", journal="JMIR AI", year="2025", month="Jan", day="10", volume="4", pages="e67621", keywords="artificial intelligence", keywords="ChatGPT", keywords="pneumonia", keywords="chest x-ray", keywords="pediatric", keywords="radiology", keywords="large language models", keywords="machine learning", keywords="pneumonia detection", keywords="diagnosis", keywords="pediatric pneumonia", doi="10.2196/67621", url="https://ai.jmir.org/2025/1/e67621" } @Article{info:doi/10.2196/67256, author="Yang, Xiaomeng and Li, Zeyan and Lei, Lei and Shi, Xiaoyu and Zhang, Dingming and Zhou, Fei and Li, Wenjing and Xu, Tianyou and Liu, Xinyu and Wang, Songyun and Yuan, Quan and Yang, Jian and Wang, Xinyu and Zhong, Yanfei and Yu, Lilei", title="Noninvasive Oral Hyperspectral Imaging--Driven Digital Diagnosis of Heart Failure With Preserved Ejection Fraction: Model Development and Validation Study", journal="J Med Internet Res", year="2025", month="Jan", day="7", volume="27", pages="e67256", keywords="heart failure with preserved ejection fraction", keywords="HFpEF", keywords="hyperspectral imaging", keywords="HSI", keywords="diagnostic model", keywords="digital health", keywords="Shapley Additive Explanations", keywords="SHAP", keywords="machine learning", keywords="artificial intelligence", keywords="AI", keywords="cardiovascular disease", keywords="predictive modeling", keywords="oral health", abstract="Background: Oral microenvironmental disorders are associated with an increased risk of heart failure with preserved ejection fraction (HFpEF). Hyperspectral imaging (HSI) technology enables the detection of substances that are visually indistinguishable to the human eye, providing a noninvasive approach with extensive applications in medical diagnostics. Objective: The objective of this study is to develop and validate a digital, noninvasive oral diagnostic model for patients with HFpEF using HSI combined with various machine learning algorithms. Methods: Between April 2023 and August 2023, a total of 140 patients were recruited from Renmin Hospital of Wuhan University to serve as the training and internal testing groups for this study. Subsequently, from August 2024 to September 2024, an additional 35 patients were enrolled from Three Gorges University and Yichang Central People's Hospital to constitute the external testing group. After preprocessing to ensure image quality, spectral and textural features were extracted from the images. We extracted 25 spectral bands from each patient image and obtained 8 corresponding texture features to evaluate the performance of 28 machine learning algorithms for their ability to distinguish control participants from participants with HFpEF. The model demonstrating the optimal performance in both internal and external testing groups was selected to construct the HFpEF diagnostic model. Hyperspectral bands significant for identifying participants with HFpEF were identified for further interpretative analysis. The Shapley Additive Explanations (SHAP) model was used to provide analytical insights into feature importance. Results: Participants were divided into a training group (n=105), internal testing group (n=35), and external testing group (n=35), with consistent baseline characteristics across groups. Among the 28 algorithms tested, the random forest algorithm demonstrated superior performance with an area under the receiver operating characteristic curve (AUC) of 0.884 and an accuracy of 82.9\% in the internal testing group, as well as an AUC of 0.812 and an accuracy of 85.7\% in the external testing group. For model interpretation, we used the top 25 features identified by the random forest algorithm. The SHAP analysis revealed discernible distinctions between control participants and participants with HFpEF, thereby validating the diagnostic model's capacity to accurately identify participants with HFpEF. Conclusions: This noninvasive and efficient model facilitates the identification of individuals with HFpEF, thereby promoting early detection, diagnosis, and treatment. Our research presents a clinically advanced diagnostic framework for HFpEF, validated using independent data sets and demonstrating significant potential to enhance patient care. Trial Registration: China Clinical Trial Registry ChiCTR2300078855; https://www.chictr.org.cn/showproj.html?proj=207133 ", doi="10.2196/67256", url="https://www.jmir.org/2025/1/e67256" } @Article{info:doi/10.2196/60684, author="Stephan, Daniel and Bertsch, Annika and Burwinkel, Matthias and Vinayahalingam, Shankeeth and Al-Nawas, Bilal and K{\"a}mmerer, W. Peer and Thiem, GE Daniel", title="AI in Dental Radiology---Improving the Efficiency of Reporting With ChatGPT: Comparative Study", journal="J Med Internet Res", year="2024", month="Dec", day="23", volume="26", pages="e60684", keywords="artificial intelligence", keywords="ChatGPT", keywords="radiology report", keywords="dental radiology", keywords="dental orthopantomogram", keywords="panoramic radiograph", keywords="dental", keywords="radiology", keywords="chatbot", keywords="medical documentation", keywords="medical application", keywords="imaging", keywords="disease detection", keywords="clinical decision support", keywords="natural language processing", keywords="medical licensing", keywords="dentistry", keywords="patient care", abstract="Background: Structured and standardized documentation is critical for accurately recording diagnostic findings, treatment plans, and patient progress in health care. Manual documentation can be labor-intensive and error-prone, especially under time constraints, prompting interest in the potential of artificial intelligence (AI) to automate and optimize these processes, particularly in medical documentation. Objective: This study aimed to assess the effectiveness of ChatGPT (OpenAI) in generating radiology reports from dental panoramic radiographs, comparing the performance of AI-generated reports with those manually created by dental students. Methods: A total of 100 dental students were tasked with analyzing panoramic radiographs and generating radiology reports manually or assisted by ChatGPT using a standardized prompt derived from a diagnostic checklist. Results: Reports generated by ChatGPT showed a high degree of textual similarity to reference reports; however, they often lacked critical diagnostic information typically included in reports authored by students. Despite this, the AI-generated reports were consistent in being error-free and matched the readability of student-generated reports. Conclusions: The findings from this study suggest that ChatGPT has considerable potential for generating radiology reports, although it currently faces challenges in accuracy and reliability. This underscores the need for further refinement in the AI's prompt design and the development of robust validation mechanisms to enhance its use in clinical settings. ", doi="10.2196/60684", url="https://www.jmir.org/2024/1/e60684" } @Article{info:doi/10.2196/59370, author="Battineni, Gopi and Chintalapudi, Nalini and Amenta, Francesco", title="Machine Learning Driven by Magnetic Resonance Imaging for the Classification of Alzheimer Disease Progression: Systematic Review and Meta-Analysis", journal="JMIR Aging", year="2024", month="Dec", day="23", volume="7", pages="e59370", keywords="Alzheimer disease", keywords="ML-based diagnosis", keywords="machine learning", keywords="prevalence", keywords="cognitive impairment", keywords="classification", keywords="biomarkers", keywords="imaging modalities", keywords="MRI", keywords="magnetic resonance imaging", keywords="systematic review", keywords="meta-analysis", abstract="Background: To diagnose Alzheimer disease (AD), individuals are classified according to the severity of their cognitive impairment. There are currently no specific causes or conditions for this disease. Objective: The purpose of this systematic review and meta-analysis was to assess AD prevalence across different stages using machine learning (ML) approaches comprehensively. Methods: The selection of papers was conducted in 3 phases, as per PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analysis) 2020 guidelines: identification, screening, and final inclusion. The final analysis included 24 papers that met the criteria. The selection of ML approaches for AD diagnosis was rigorously based on their relevance to the investigation. The prevalence of patients with AD at 2, 3, 4, and 6 stages was illustrated through the use of forest plots. Results: The prevalence rate for both cognitively normal (CN) and AD across 6 studies was 49.28\% (95\% CI 46.12\%-52.45\%; P=.32). The prevalence estimate for the 3 stages of cognitive impairment (CN, mild cognitive impairment, and AD) is 29.75\% (95\% CI 25.11\%-34.84\%, P<.001). Among 5 studies with 14,839 participants, the analysis of 4 stages (nondemented, moderately demented, mildly demented, and AD) found an overall prevalence of 13.13\% (95\% CI 3.75\%-36.66\%; P<.001). In addition, 4 studies involving 3819 participants estimated the prevalence of 6 stages (CN, significant memory concern, early mild cognitive impairment, mild cognitive impairment, late mild cognitive impairment, and AD), yielding a prevalence of 23.75\% (95\% CI 12.22\%-41.12\%; P<.001). Conclusions: The significant heterogeneity observed across studies reveals that demographic and setting characteristics are responsible for the impact on AD prevalence estimates. This study shows how ML approaches can be used to describe AD prevalence across different stages, which provides valuable insights for future research. ", doi="10.2196/59370", url="https://aging.jmir.org/2024/1/e59370" } @Article{info:doi/10.2196/59839, author="Parekh, Pranav and Oyeleke, Richard and Vishwanath, Tejas", title="The Depth Estimation and Visualization of Dermatological Lesions: Development and Usability Study", journal="JMIR Dermatol", year="2024", month="Dec", day="18", volume="7", pages="e59839", keywords="machine learning", keywords="ML", keywords="computer vision", keywords="neural networks", keywords="explainable AI", keywords="XAI", keywords="computer graphics", keywords="red spot analysis", keywords="mixed reality", keywords="MR", keywords="artificial intelligence", keywords="visualization", abstract="Background: Thus far, considerable research has been focused on classifying a lesion as benign or malignant. However, there is a requirement for quick depth estimation of a lesion for the accurate clinical staging of the lesion. The lesion could be malignant and quickly grow beneath the skin. While biopsy slides provide clear information on lesion depth, it is an emerging domain to find quick and noninvasive methods to estimate depth, particularly based on 2D images. Objective: This study proposes a novel methodology for the depth estimation and visualization of skin lesions. Current diagnostic methods are approximate in determining how much a lesion may have proliferated within the skin. Using color gradients and depth maps, this method will give us a definite estimate and visualization procedure for lesions and other skin issues. We aim to generate 3D holograms of the lesion depth such that dermatologists can better diagnose melanoma. Methods: We started by performing classification using a convolutional neural network (CNN), followed by using explainable artificial intelligence to localize the image features responsible for the CNN output. We used the gradient class activation map approach to perform localization of the lesion from the rest of the image. We applied computer graphics for depth estimation and developing the 3D structure of the lesion. We used the depth from defocus method for depth estimation from single images and Gabor filters for volumetric representation of the depth map. Our novel method, called red spot analysis, measures the degree of infection based on how a conical hologram is constructed. We collaborated with a dermatologist to analyze the 3D hologram output and received feedback on how this method can be introduced to clinical implementation. Results: The neural model plus the explainable artificial intelligence algorithm achieved an accuracy of 86\% in classifying the lesions correctly as benign or malignant. For the entire pipeline, we mapped the benign and malignant cases to their conical representations. We received exceedingly positive feedback while pitching this idea at the King Edward Memorial Institute in India. Dermatologists considered this a potentially useful tool in the depth estimation of lesions. We received a number of ideas for evaluating the technique before it can be introduced to the clinical scene. Conclusions: When we map the CNN outputs (benign or malignant) to the corresponding hologram, we observe that a malignant lesion has a higher concentration of red spots (infection) in the upper and deeper portions of the skin, and that the malignant cases have deeper conical sections when compared with the benign cases. This proves that the qualitative results map with the initial classification performed by the neural model. The positive feedback provided by the dermatologist suggests that the qualitative conclusion of the method is sufficient. ", doi="10.2196/59839", url="https://derma.jmir.org/2024/1/e59839" } @Article{info:doi/10.2196/55833, author="Gupta, Vikash and Erdal, Barbaros and Ramirez, Carolina and Floca, Ralf and Genereaux, Bradley and Bryson, Sidney and Bridge, Christopher and Kleesiek, Jens and Nensa, Felix and Braren, Rickmer and Younis, Khaled and Penzkofer, Tobias and Bucher, Michael Andreas and Qin, Melvin Ming and Bae, Gigon and Lee, Hyeonhoon and Cardoso, Jorge M. and Ourselin, Sebastien and Kerfoot, Eric and Choudhury, Rahul and White, D. Richard and Cook, Tessa and Bericat, David and Lungren, Matthew and Haukioja, Risto and Shuaib, Haris", title="Current State of Community-Driven Radiological AI Deployment in Medical Imaging", journal="JMIR AI", year="2024", month="Dec", day="9", volume="3", pages="e55833", keywords="radiology", keywords="open-source", keywords="radiology in practice", keywords="deep learning", keywords="artificial intelligence", keywords="imaging informatics", keywords="clinical deployment", keywords="imaging", keywords="medical informatics", keywords="workflow", keywords="operation", keywords="implementation", keywords="adoption", keywords="taxonomy", keywords="use case", keywords="model", keywords="integration", keywords="machine learning", keywords="mobile phone", doi="10.2196/55833", url="https://ai.jmir.org/2024/1/e55833" } @Article{info:doi/10.2196/63834, author="AboArab, A. Mohammed and Potsika, T. Vassiliki and Theodorou, Alexis and Vagena, Sylvia and Gravanis, Miltiadis and Sigala, Fragiska and Fotiadis, I. Dimitrios", title="Advancing Progressive Web Applications to Leverage Medical Imaging for Visualization of Digital Imaging and Communications in Medicine and Multiplanar Reconstruction: Software Development and Validation Study", journal="JMIR Med Inform", year="2024", month="Dec", day="9", volume="12", pages="e63834", keywords="medical image visualization", keywords="peripheral artery computed tomography imaging", keywords="multiplanar reconstruction", keywords="progressive web applications", abstract="Background: In medical imaging, 3D visualization is vital for displaying volumetric organs, enhancing diagnosis and analysis. Multiplanar reconstruction (MPR) improves visual and diagnostic capabilities by transforming 2D images from computed tomography (CT) and magnetic resonance imaging into 3D representations. Web-based Digital Imaging and Communications in Medicine (DICOM) viewers integrated into picture archiving and communication systems facilitate access to pictures and interaction with remote data. However, the adoption of progressive web applications (PWAs) for web-based DICOM and MPR visualization remains limited. This paper addresses this gap by leveraging PWAs for their offline access and enhanced performance. Objective: This study aims to evaluate the integration of DICOM and MPR visualization into the web using PWAs, addressing challenges related to cross-platform compatibility, integration capabilities, and high-resolution image reconstruction for medical image visualization. Methods: Our paper introduces a PWA that uses a modular design for enhancing DICOM and MPR visualization in web-based medical imaging. By integrating React.js and Cornerstone.js, the application offers seamless DICOM image processing, ensures cross-browser compatibility, and delivers a responsive user experience across multiple devices. It uses advanced interpolation techniques to make volume reconstructions more accurate. This makes MPR analysis and visualization better in a web environment, thus promising a substantial advance in medical imaging analysis. Results: In our approach, the performance of DICOM- and MPR-based PWAs for medical image visualization and reconstruction was evaluated through comprehensive experiments. The application excelled in terms of loading time and volume reconstruction, particularly in Google Chrome, whereas Firefox showed superior performance in viewing slices. This study uses a dataset comprising 22 CT scans of peripheral artery patients to demonstrate the application's robust performance, with Google Chrome outperforming other browsers in both the local area network and wide area network settings. In addition, the application's accuracy in MPR reconstructions was validated with an error margin of <0.05 mm and outperformed the state-of-the-art methods by 84\% to 98\% in loading and volume rendering time. Conclusions: This paper highlights advancements in DICOM and MPR visualization using PWAs, addressing the gaps in web-based medical imaging. By exploiting PWA features such as offline access and improved performance, we have significantly advanced medical imaging technology, focusing on cross-platform compatibility, integration efficiency, and speed. Our application outperforms existing platforms for handling complex MPR analyses and accurate analysis of medical imaging as validated through peripheral artery CT imaging. ", doi="10.2196/63834", url="https://medinform.jmir.org/2024/1/e63834" } @Article{info:doi/10.2196/59045, author="Chen, Hongbo and Alfred, Myrtede and Brown, D. Andrew and Atinga, Angela and Cohen, Eldan", title="Intersection of Performance, Interpretability, and Fairness in Neural Prototype Tree for Chest X-Ray Pathology Detection: Algorithm Development and Validation Study", journal="JMIR Form Res", year="2024", month="Dec", day="5", volume="8", pages="e59045", keywords="explainable artificial intelligence", keywords="deep learning", keywords="chest x-ray", keywords="thoracic pathology", keywords="fairness", keywords="interpretability", abstract="Background: While deep learning classifiers have shown remarkable results in detecting chest X-ray (CXR) pathologies, their adoption in clinical settings is often hampered by the lack of transparency. To bridge this gap, this study introduces the neural prototype tree (NPT), an interpretable image classifier that combines the diagnostic capability of deep learning models and the interpretability of the decision tree for CXR pathology detection. Objective: This study aimed to investigate the utility of the NPT classifier in 3 dimensions, including performance, interpretability, and fairness, and subsequently examined the complex interaction between these dimensions. We highlight both local and global explanations of the NPT classifier and discuss its potential utility in clinical settings. Methods: This study used CXRs from the publicly available Chest X-ray 14, CheXpert, and MIMIC-CXR datasets. We trained 6 separate classifiers for each CXR pathology in all datasets, 1 baseline residual neural network (ResNet)--152, and 5 NPT classifiers with varying levels of interpretability. Performance, interpretability, and fairness were measured using the area under the receiver operating characteristic curve (ROC AUC), interpretation complexity (IC), and mean true positive rate (TPR) disparity, respectively. Linear regression analyses were performed to investigate the relationship between IC and ROC AUC, as well as between IC and mean TPR disparity. Results: The performance of the NPT classifier improved as the IC level increased, surpassing that of ResNet-152 at IC level 15 for the Chest X-ray 14 dataset and IC level 31 for the CheXpert and MIMIC-CXR datasets. The NPT classifier at IC level 1 exhibited the highest degree of unfairness, as indicated by the mean TPR disparity. The magnitude of unfairness, as measured by the mean TPR disparity, was more pronounced in groups differentiated by age (chest X-ray 14 0.112, SD 0.015; CheXpert 0.097, SD 0.010; MIMIC 0.093, SD 0.017) compared to sex (chest X-ray 14 0.054 SD 0.012; CheXpert 0.062, SD 0.008; MIMIC 0.066, SD 0.013). A significant positive relationship between interpretability (ie, IC level) and performance (ie, ROC AUC) was observed across all CXR pathologies (P<.001). Furthermore, linear regression analysis revealed a significant negative relationship between interpretability and fairness (ie, mean TPR disparity) across age and sex subgroups (P<.001). Conclusions: By illuminating the intricate relationship between performance, interpretability, and fairness of the NPT classifier, this research offers insightful perspectives that could guide future developments in effective, interpretable, and equitable deep learning classifiers for CXR pathology detection. ", doi="10.2196/59045", url="https://formative.jmir.org/2024/1/e59045" } @Article{info:doi/10.2196/54641, author="Song, Kyungchul and Ko, Taehoon and Chae, Wook Hyun and Oh, Suk Jun and Kim, Ho-Seong and Shin, Joo Hyun and Kim, Jeong-Ho and Na, Ji-Hoon and Park, Jung Chae and Sohn, Beomseok", title="Development and Validation of a Prediction Model Using Sella Magnetic Resonance Imaging--Based Radiomics and Clinical Parameters for the Diagnosis of Growth Hormone Deficiency and Idiopathic Short Stature: Cross-Sectional, Multicenter Study", journal="J Med Internet Res", year="2024", month="Nov", day="27", volume="26", pages="e54641", keywords="dwarfism", keywords="pituitary", keywords="idiopathic short stature", keywords="child", keywords="adolescent", keywords="machine learning", keywords="magnetic resonance imaging", keywords="MRI", abstract="Background: Growth hormone deficiency (GHD) and idiopathic short stature (ISS) are the major etiologies of short stature in children. For the diagnosis of GHD and ISS, meticulous evaluations are required, including growth hormone provocation tests, which are invasive and burdensome for children. Additionally, sella magnetic resonance imaging (MRI) is necessary for assessing etiologies of GHD, which cannot evaluate hormonal secretion. Recently, radiomics has emerged as a revolutionary technique that uses mathematical algorithms to extract various features for the quantitative analysis of medical images. Objective: This study aimed to develop a machine learning--based model using sella MRI--based radiomics and clinical parameters to diagnose GHD and ISS. Methods: A total of 293 children with short stature who underwent sella MRI and growth hormone provocation tests were included in the training set, and 47 children who met the same inclusion criteria were enrolled in the test set from different hospitals for this study. A total of 186 radiomic features were extracted from the pituitary glands using a semiautomatic segmentation process for both the T2-weighted and contrast-enhanced T1-weighted image. The clinical parameters included auxological data, insulin-like growth factor-I, and bone age. The extreme gradient boosting algorithm was used to train the prediction models. Internal validation was conducted using 5-fold cross-validation on the training set, and external validation was conducted on the test set. Model performance was assessed by plotting the area under the receiver operating characteristic curve. The mean absolute Shapley values were computed to quantify the impact of each parameter. Results: The area under the receiver operating characteristic curves (95\% CIs) of the clinical, radiomics, and combined models were 0.684 (0.590-0.778), 0.691 (0.620-0.762), and 0.830 (0.741-0.919), respectively, in the external validation. Among the clinical parameters, the major contributing factors to prediction were BMI SD score (SDS), chronological age--bone age, weight SDS, growth velocity, and insulin-like growth factor-I SDS in the clinical model. In the combined model, radiomic features including maximum probability from a T2-weighted image and run length nonuniformity normalized from a T2-weighted image added incremental value to the prediction (combined model vs clinical model, P=.03; combined model vs radiomics model, P=.02). The code for our model is available in a public repository on GitHub. Conclusions: Our model combining both radiomics and clinical parameters can accurately predict GHD from ISS, which was also proven in the external validation. These findings highlight the potential of machine learning--based models using radiomics and clinical parameters for diagnosing GHD and ISS. ", doi="10.2196/54641", url="https://www.jmir.org/2024/1/e54641" } @Article{info:doi/10.2196/52514, author="Drogt, Jojanneke and Milota, Megan and Veldhuis, Wouter and Vos, Shoko and Jongsma, Karin", title="The Promise of AI for Image-Driven Medicine: Qualitative Interview Study of Radiologists' and Pathologists' Perspectives", journal="JMIR Hum Factors", year="2024", month="Nov", day="21", volume="11", pages="e52514", keywords="digital medicine", keywords="computer vision", keywords="medical AI", keywords="image-driven specialisms", keywords="qualitative interview study", keywords="digital health ethics", keywords="artificial intelligence", keywords="AI", keywords="imaging", keywords="imaging informatics", keywords="radiology", keywords="pathology", abstract="Background: Image-driven specialisms such as radiology and pathology are at the forefront of medical artificial intelligence (AI) innovation. Many believe that AI will lead to significant shifts in professional roles, so it is vital to investigate how professionals view the pending changes that AI innovation will initiate and incorporate their views in ongoing AI developments. Objective: Our study aimed to gain insights into the perspectives and wishes of radiologists and pathologists regarding the promise of AI. Methods: We have conducted the first qualitative interview study investigating the perspectives of both radiologists and pathologists regarding the integration of AI in their fields. The study design is in accordance with the consolidated criteria for reporting qualitative research (COREQ). Results: In total, 21 participants were interviewed for this study (7 pathologists, 10 radiologists, and 4 computer scientists). The interviews revealed a diverse range of perspectives on the impact of AI. Respondents discussed various task-specific benefits of AI; yet, both pathologists and radiologists agreed that AI had yet to live up to its hype. Overall, our study shows that AI could facilitate welcome changes in the workflows of image-driven professionals and eventually lead to better quality of care. At the same time, these professionals also admitted that many hopes and expectations for AI were unlikely to become a reality in the next decade. Conclusions: This study points to the importance of maintaining a ``healthy skepticism'' on the promise of AI in imaging specialisms and argues for more structural and inclusive discussions about whether AI is the right technology to solve current problems encountered in daily clinical practice. ", doi="10.2196/52514", url="https://humanfactors.jmir.org/2024/1/e52514" } @Article{info:doi/10.2196/53780, author="Almashmoum, Maryam and Cunningham, James and Ainsworth, John", title="Evaluating Factors Affecting Knowledge Sharing Among Health Care Professionals in the Medical Imaging Departments of 2 Cancer Centers: Concurrent Mixed Methods Study", journal="JMIR Hum Factors", year="2024", month="Nov", day="13", volume="11", pages="e53780", keywords="knowledge management", keywords="knowledge sharing", keywords="medical imaging departments", keywords="cancer centers", keywords="The Christie", keywords="Kuwait Cancer Control Center", keywords="concurrent mixed methods", keywords="factors", keywords="challenges", keywords="definition", keywords="mechanisms", keywords="practices", abstract="Background: Knowledge sharing is a crucial part of any knowledge management implementation. It refers to sharing skills and experience among team members in an organization. In a health care setting, sharing knowledge, whether tacit or explicit, is important and can lead to better health care services. In medical imaging departments, knowledge sharing can be of particular importance. There are several factors that affect knowledge-sharing practices in medical imaging departments: individual, departmental, and technological. Evaluating the importance of these factors and understanding their use can help with improving knowledge-sharing practices in medical imaging departments. Objective: We aimed to assess the level of motivation, identify current knowledge-sharing tools, and evaluate factors affecting knowledge sharing in the medical imaging departments of 2 cancer centers, The Christie, United Kingdom, and the Kuwait Cancer Control Center (KCCC). Methods: A concurrent mixed methods study was conducted through nonprobability sampling techniques between February 1, 2023, and July 30, 2023. Semistructured interviews were used to validate the results of the quantitative analysis. Data were collected using an electronic questionnaire that was distributed among health care professionals in both cancer centers using Qualtrics. Semistructured interviews were conducted online using Microsoft Teams. The quantitative data were analyzed using the Qualtrics MX software to report the results for each question, whereas the qualitative data were analyzed using a thematic approach with codes classified through NVivo. Results: In total, 56 respondents from the KCCC and 29 from The Christie participated, with a 100\% response rate (56/56, 100\% and 29/29, 100\%, respectively) based on the Qualtrics survey tool. A total of 59\% (17/29) of health care professionals from The Christie shared their knowledge using emails and face-to-face communication as their main tools on a daily basis, and 57\% (32/56) of health care professionals from the KCCC used face-to-face communication for knowledge sharing. The mean Likert-scale score of all the components that assessed the factors that affected knowledge-sharing behaviors fell between ``somewhat agree'' and ``strongly agree'' in both centers, excepting extrinsic motivation, which was rated as ``neither agree nor disagree.'' This was similar to the results related to incentives. It was shown that 52\% (15/29) of health care professionals at The Christie had no incentives to encourage knowledge-sharing practices. Therefore, establishing clear policies to manage incentives is important to increase knowledge-sharing practices. Conclusions: This study offered an evaluation of factors that affect knowledge sharing in 2 cancer centers. Most health care professionals were aware of the importance of knowledge-sharing practices in enhancing health care services. Several challenges were identified, such as time constraints, a lack of staff, and the language barrier, which limit knowledge-sharing practices. Therefore, establishing a clear policy for knowledge sharing is vital to practicing knowledge-sharing behaviors and facing any challenges that limit this practice. ", doi="10.2196/53780", url="https://humanfactors.jmir.org/2024/1/e53780" } @Article{info:doi/10.2196/59556, author="Gutman, Barak and Shmilovitch, Amit-Haim and Aran, Dvir and Shelly, Shahar", title="Twenty-Five Years of AI in Neurology: The Journey of Predictive Medicine and Biological Breakthroughs", journal="JMIR Neurotech", year="2024", month="Nov", day="8", volume="3", pages="e59556", keywords="neurology", keywords="artificial intelligence", keywords="telemedicine", keywords="clinical advancements", keywords="mobile phone", doi="10.2196/59556", url="https://neuro.jmir.org/2024/1/e59556" } @Article{info:doi/10.2196/58776, author="Kim, Heon Ho and Jeong, Chan Won and Pi, Kyungran and Lee, Soeun Angela and Kim, Soo Min and Kim, Jin Hye and Kim, Hong Jae", title="A Deep Learning Model to Predict Breast Implant Texture Types Using Ultrasonography Images: Feasibility Development Study", journal="JMIR Form Res", year="2024", month="Nov", day="5", volume="8", pages="e58776", keywords="breast implants", keywords="mammoplasty", keywords="ultrasonography: AI-assisted diagnosis", keywords="cshell surface topography", keywords="artificial intelligence", keywords="deep learning", keywords="machine learning", abstract="Background: Breast implants, including textured variants, have been widely used in aesthetic and reconstructive mammoplasty. However, the textured type, which is one of the shell texture types of breast implants, has been identified as a possible etiologic factor for lymphoma, specifically breast implant--associated anaplastic large cell lymphoma (BIA-ALCL). Identifying the shell texture type of the implant is critical to diagnosing BIA-ALCL. However, distinguishing the shell texture type can be difficult due to the loss of human memory and medical history. An alternative approach is to use ultrasonography, but this method also has limitations in quantitative assessment. Objective: This study aims to determine the feasibility of using a deep learning model to classify the shell texture type of breast implants and make robust predictions from ultrasonography images from heterogeneous sources. Methods: A total of 19,502 breast implant images were retrospectively collected from heterogeneous sources, including images captured from both Canon and GE devices, images of ruptured implants, and images without implants, as well as publicly available images. The Canon images were trained using ResNet-50. The model's performance on the Canon dataset was evaluated using stratified 5-fold cross-validation. Additionally, external validation was conducted using the GE and publicly available datasets. The area under the receiver operating characteristic curve (AUROC) and the area under the precision-recall curve (PRAUC) were calculated based on the contribution of the pixels with Gradient-weighted Class Activation Mapping (Grad-CAM). To identify the significant pixels for classification, we masked the pixels that contributed less than 10\%, up to a maximum of 100\%. To assess the model's robustness to uncertainty, Shannon entropy was calculated for 4 image groups: Canon, GE, ruptured implants, and without implants. Results: The deep learning model achieved an average AUROC of 0.98 and a PRAUC of 0.88 in the Canon dataset. The model achieved an AUROC of 0.985 and a PRAUC of 0.748 for images captured with GE devices. Additionally, the model predicted an AUROC of 0.909 and a PRAUC of 0.958 for the publicly available dataset. This model maintained the PRAUC values for quantitative validation when masking up to 90\% of the least-contributing pixels and the remnant pixels in breast shell layers. Furthermore, the prediction uncertainty increased in the following order: Canon (0.066), GE (0072), ruptured implants (0.371), and no implants (0.777). Conclusions: We have demonstrated the feasibility of using deep learning to predict the shell texture type of breast implants. This approach quantifies the shell texture types of breast implants, supporting the first step in the diagnosis of BIA-ALCL. ", doi="10.2196/58776", url="https://formative.jmir.org/2024/1/e58776" } @Article{info:doi/10.2196/52639, author="Hesso, Iman and Zacharias, Lithin and Kayyali, Reem and Charalambous, Andreas and Lavdaniti, Maria and Stalika, Evangelia and Ajami, Tarek and Acampa, Wanda and Boban, Jasmina and Nabhani-Gebara, Shereen", title="Artificial Intelligence for Optimizing Cancer Imaging: User Experience Study", journal="JMIR Cancer", year="2024", month="Oct", day="10", volume="10", pages="e52639", keywords="artificial intelligence", keywords="cancer", keywords="cancer imaging", keywords="UX design workshops", keywords="Delphi method", keywords="INCISIVE AI toolbox", keywords="user experience", abstract="Background: The need for increased clinical efficacy and efficiency has been the main force in developing artificial intelligence (AI) tools in medical imaging. The INCISIVE project is a European Union--funded initiative aiming to revolutionize cancer imaging methods using AI technology. It seeks to address limitations in imaging techniques by developing an AI-based toolbox that improves accuracy, specificity, sensitivity, interpretability, and cost-effectiveness. Objective: To ensure the successful implementation of the INCISIVE AI service, a study was conducted to understand the needs, challenges, and expectations of health care professionals (HCPs) regarding the proposed toolbox and any potential implementation barriers. Methods: A mixed methods study consisting of 2 phases was conducted. Phase 1 involved user experience (UX) design workshops with users of the INCISIVE AI toolbox. Phase 2 involved a Delphi study conducted through a series of sequential questionnaires. To recruit, a purposive sampling strategy based on the project's consortium network was used. In total, 16 HCPs from Serbia, Italy, Greece, Cyprus, Spain, and the United Kingdom participated in the UX design workshops and 12 completed the Delphi study. Descriptive statistics were performed using SPSS (IBM Corp), enabling the calculation of mean rank scores of the Delphi study's lists. The qualitative data collected via the UX design workshops was analyzed using NVivo (version 12; Lumivero) software. Results: The workshops facilitated brainstorming and identification of the INCISIVE AI toolbox's desired features and implementation barriers. Subsequently, the Delphi study was instrumental in ranking these features, showing a strong consensus among HCPs (W=0.741, P<.001). Additionally, this study also identified implementation barriers, revealing a strong consensus among HCPs (W=0.705, P<.001). Key findings indicated that the INCISIVE AI toolbox could assist in areas such as misdiagnosis, overdiagnosis, delays in diagnosis, detection of minor lesions, decision-making in disagreement, treatment allocation, disease prognosis, prediction, treatment response prediction, and care integration throughout the patient journey. Limited resources, lack of organizational and managerial support, and data entry variability were some of the identified barriers. HCPs also had an explicit interest in AI explainability, desiring feature relevance explanations or a combination of feature relevance and visual explanations within the toolbox. Conclusions: The results provide a thorough examination of the INCISIVE AI toolbox's design elements as required by the end users and potential barriers to its implementation, thus guiding the design and implementation of the INCISIVE technology. The outcome offers information about the degree of AI explainability required of the INCISIVE AI toolbox across the three services: (1) initial diagnosis; (2) disease staging, differentiation, and characterization; and (3) treatment and follow-up indicated for the toolbox. By considering the perspective of end users, INCISIVE aims to develop a solution that effectively meets their needs and drives adoption. ", doi="10.2196/52639", url="https://cancer.jmir.org/2024/1/e52639" } @Article{info:doi/10.2196/56851, author="Tao, Jin and Liu, Dan and Hu, Fu-Bi and Zhang, Xiao and Yin, Hongkun and Zhang, Huiling and Zhang, Kai and Huang, Zixing and Yang, Kun", title="Development and Validation of a Computed Tomography--Based Model for Noninvasive Prediction of the T Stage in Gastric Cancer: Multicenter Retrospective Study", journal="J Med Internet Res", year="2024", month="Oct", day="9", volume="26", pages="e56851", keywords="gastric cancer", keywords="computed tomography", keywords="radiomics", keywords="T stage", keywords="deep learning", keywords="cancer", keywords="multicenter study", keywords="accuracy", keywords="binary classification", keywords="tumor", keywords="hybrid model", keywords="performance", keywords="pathological stage", abstract="Background: As part of the TNM (tumor-node-metastasis) staging system, T staging based on tumor depth is crucial for developing treatment plans. Previous studies have constructed a deep learning model based on computed tomographic (CT) radiomic signatures to predict the number?of?lymph?node?metastases and survival in patients with resected gastric cancer (GC). However, few studies have reported the combination of deep learning and radiomics in predicting T staging in GC. Objective: This study aimed to develop a CT-based model for automatic prediction of the T stage of GC via radiomics and deep learning. Methods: A total of 771 GC patients from 3 centers were retrospectively enrolled and divided into training, validation, and testing cohorts. Patients with GC were classified into mild (stage T1 and T2), moderate (stage T3), and severe (stage T4) groups. Three predictive models based on the labeled CT images were constructed using the radiomics features (radiomics model), deep features (deep learning model), and a combination of both (hybrid model). Results: The overall classification accuracy of the radiomics model was 64.3\% in the internal testing data set. The deep learning model and hybrid model showed better performance than the radiomics model, with overall classification accuracies of 75.7\% (P=.04) and 81.4\% (P=.001), respectively. On the subtasks of binary classification of tumor severity, the areas under the curve of the radiomics, deep learning, and hybrid models were 0.875, 0.866, and 0.886 in the internal testing data set and 0.820, 0.818, and 0.972 in the external testing data set, respectively, for differentiating mild (stage T1{\textasciitilde}T2) from nonmild (stage T3{\textasciitilde}T4) patients, and were 0.815, 0.892, and 0.894 in the internal testing data set and 0.685, 0.808, and 0.897 in the external testing data set, respectively, for differentiating nonsevere (stage T1{\textasciitilde}T3) from severe (stage T4) patients. Conclusions: The hybrid model integrating radiomics features and deep features showed favorable performance in diagnosing the pathological stage of GC. ", doi="10.2196/56851", url="https://www.jmir.org/2024/1/e56851", url="http://www.ncbi.nlm.nih.gov/pubmed/39382960" } @Article{info:doi/10.2196/58741, author="Zhang, Daiwen and Ma, Zixuan and Gong, Ru and Lian, Liangliang and Li, Yanzhuo and He, Zhenghui and Han, Yuhan and Hui, Jiyuan and Huang, Jialin and Jiang, Jiyao and Weng, Weiji and Feng, Junfeng", title="Using Natural Language Processing (GPT-4) for Computed Tomography Image Analysis of Cerebral Hemorrhages in Radiology: Retrospective Analysis", journal="J Med Internet Res", year="2024", month="Sep", day="26", volume="26", pages="e58741", keywords="GPT-4", keywords="natural language processing", keywords="NLP", keywords="artificial intelligence", keywords="AI", keywords="cerebral hemorrhage", keywords="computed tomography", keywords="CT", abstract="Background: Cerebral hemorrhage is a critical medical condition that necessitates a rapid and precise diagnosis for timely medical intervention, including emergency operation. Computed tomography (CT) is essential for identifying cerebral hemorrhage, but its effectiveness is limited by the availability of experienced radiologists, especially in resource-constrained regions or when shorthanded during holidays or at night. Despite advancements in artificial intelligence--driven diagnostic tools, most require technical expertise. This poses a challenge for widespread adoption in radiological imaging. The introduction of advanced natural language processing (NLP) models such as GPT-4, which can annotate and analyze images without extensive algorithmic training, offers a potential solution. Objective: This study investigates GPT-4's capability to identify and annotate cerebral hemorrhages in cranial CT scans. It represents a novel application of NLP models in radiological imaging. Methods: In this retrospective analysis, we collected 208 CT scans with 6 types of cerebral hemorrhages at Ren Ji Hospital, Shanghai Jiao Tong University School of Medicine, between January and September 2023. All CT images were mixed together and sequentially numbered, so each CT image had its own corresponding number. A random sequence from 1 to 208 was generated, and all CT images were inputted into GPT-4 for analysis in the order of the random sequence. The outputs were subsequently examined using Photoshop and evaluated by experienced radiologists on a 4-point scale to assess identification completeness, accuracy, and success. Results: The overall identification completeness percentage for the 6 types of cerebral hemorrhages was 72.6\% (SD 18.6\%). Specifically, GPT-4 achieved higher identification completeness in epidural and intraparenchymal hemorrhages (89.0\%, SD 19.1\% and 86.9\%, SD 17.7\%, respectively), yet its identification completeness percentage in chronic subdural hemorrhages was very low (37.3\%, SD 37.5\%). The misidentification percentages for complex hemorrhages (54.0\%, SD 28.0\%), epidural hemorrhages (50.2\%, SD 22.7\%), and subarachnoid hemorrhages (50.5\%, SD 29.2\%) were relatively high, whereas they were relatively low for acute subdural hemorrhages (32.6\%, SD 26.3\%), chronic subdural hemorrhages (40.3\%, SD 27.2\%), and intraparenchymal hemorrhages (26.2\%, SD 23.8\%). The identification completeness percentages in both massive and minor bleeding showed no significant difference (P=.06). However, the misidentification percentage in recognizing massive bleeding was significantly lower than that for minor bleeding (P=.04). The identification completeness percentages and misidentification percentages for cerebral hemorrhages at different locations showed no significant differences (all P>.05). Lastly, radiologists showed relative acceptance regarding identification completeness (3.60, SD 0.54), accuracy (3.30, SD 0.65), and success (3.38, SD 0.64). Conclusions: GPT-4, a standout among NLP models, exhibits both promising capabilities and certain limitations in the realm of radiological imaging, particularly when it comes to identifying cerebral hemorrhages in CT scans. This opens up new directions and insights for the future development of NLP models in radiology. Trial Registration: ClinicalTrials.gov NCT06230419; https://clinicaltrials.gov/study/NCT06230419 ", doi="10.2196/58741", url="https://www.jmir.org/2024/1/e58741" } @Article{info:doi/10.2196/59914, author="Arunga, Simon and Morley, Elise Katharine and Kwaga, Teddy and Morley, Gerard Michael and Nakayama, Filipe Luis and Mwavu, Rogers and Kaggwa, Fred and Ssempiira, Julius and Celi, Anthony Leo and Haberer, E. Jessica and Obua, Celestino", title="Assessment of Clinical Metadata on the Accuracy of Retinal Fundus Image Labels in Diabetic Retinopathy in Uganda: Case-Crossover Study Using the Multimodal Database of Retinal Images in Africa", journal="JMIR Form Res", year="2024", month="Sep", day="18", volume="8", pages="e59914", keywords="image labeling", keywords="metadata", keywords="diabetic retinopathy", keywords="assessment", keywords="bias", keywords="multimodal database", keywords="retinal images", keywords="Africa", keywords="African", keywords="artificial intelligence", keywords="AI", keywords="screening algorithms", keywords="screening", keywords="algorithms", keywords="diabetic", keywords="diabetes", keywords="treatment", keywords="sensitivity", keywords="clinical images", abstract="Background: Labeling color fundus photos (CFP) is an important step in the development of artificial intelligence screening algorithms for the detection of diabetic retinopathy (DR). Most studies use the International Classification of Diabetic Retinopathy (ICDR) to assign labels to CFP, plus the presence or absence of macular edema (ME). Images can be grouped as referrable or nonreferrable according to these classifications. There is little guidance in the literature about how to collect and use metadata as a part of the CFP labeling process. Objective: This study aimed to improve the quality of the Multimodal Database of Retinal Images in Africa (MoDRIA) by determining whether the availability of metadata during the image labeling process influences the accuracy, sensitivity, and specificity of image labels. MoDRIA was developed as one of the inaugural research projects of the Mbarara University Data Science Research Hub, part of the Data Science for Health Discovery and Innovation in Africa (DS-I Africa) initiative. Methods: This is a crossover assessment with 2 groups and 2 phases. Each group had 10 randomly assigned labelers who provided an ICDR score and the presence or absence of ME for each of the 50 CFP in a test image with and without metadata including blood pressure, visual acuity, glucose, and medical history. Specificity and sensitivity of referable retinopathy were based on ICDR scores, and ME was calculated using a 2-sided t test. Comparison of sensitivity and specificity for ICDR scores and ME with and without metadata for each participant was calculated using the Wilcoxon signed rank test. Statistical significance was set at P<.05. Results: The sensitivity for identifying referrable DR with metadata was 92.8\% (95\% CI 87.6-98.0) compared with 93.3\% (95\% CI 87.6-98.9) without metadata, and the specificity was 84.9\% (95\% CI 75.1-94.6) with metadata compared with 88.2\% (95\% CI 79.5-96.8) without metadata. The sensitivity for identifying the presence of ME was 64.3\% (95\% CI 57.6-71.0) with metadata, compared with 63.1\% (95\% CI 53.4-73.0) without metadata, and the specificity was 86.5\% (95\% CI 81.4-91.5) with metadata compared with 87.7\% (95\% CI 83.9-91.5) without metadata. The sensitivity and specificity of the ICDR score and the presence or absence of ME were calculated for each labeler with and without metadata. No findings were statistically significant. Conclusions: The sensitivity and specificity scores for the detection of referrable DR were slightly better without metadata, but the difference was not statistically significant. We cannot make definitive conclusions about the impact of metadata on the sensitivity and specificity of image labels in our study. Given the importance of metadata in clinical situations, we believe that metadata may benefit labeling quality. A more rigorous study to determine the sensitivity and specificity of CFP labels with and without metadata is recommended. ", doi="10.2196/59914", url="https://formative.jmir.org/2024/1/e59914" } @Article{info:doi/10.2196/57335, author="Shetty, Shishir and Mubarak, Saleh Auwalu and R David, Leena and Al Jouhari, Omar Mhd and Talaat, Wael and Al-Rawi, Natheer and AlKawas, Sausan and Shetty, Sunaina and Uzun Ozsahin, Dilber", title="The Application of Mask Region-Based Convolutional Neural Networks in the Detection of Nasal Septal Deviation Using Cone Beam Computed Tomography Images: Proof-of-Concept Study", journal="JMIR Form Res", year="2024", month="Sep", day="3", volume="8", pages="e57335", keywords="convolutional neural networks", keywords="nasal septal deviation", keywords="cone beam computed tomography", keywords="tomographic", keywords="tomography", keywords="nasal", keywords="nose", keywords="face", keywords="facial", keywords="image", keywords="images", keywords="imagery", keywords="artificial intelligence", keywords="CNN", keywords="neural network", keywords="neural networks", keywords="ResNet", abstract="Background: Artificial intelligence (AI) models are being increasingly studied for the detection of variations and pathologies in different imaging modalities. Nasal septal deviation (NSD) is an important anatomical structure with clinical implications. However, AI-based radiographic detection of NSD has not yet been studied. Objective: This research aimed to develop and evaluate a real-time model that can detect probable NSD using cone beam computed tomography (CBCT) images. Methods: Coronal section images were obtained from 204 full-volume CBCT scans. The scans were classified as normal and deviated by 2 maxillofacial radiologists. The images were then used to train and test the AI model. Mask region-based convolutional neural networks (Mask R-CNNs) comprising 3 different backbones---ResNet50, ResNet101, and MobileNet---were used to detect deviated nasal septum in 204 CBCT images. To further improve the detection, an image preprocessing technique (contrast enhancement [CEH]) was added. Results: The best-performing model---CEH-ResNet101---achieved a mean average precision of 0.911, with an area under the curve of 0.921. Conclusions: The performance of the model shows that the model is capable of detecting nasal septal deviation. Future research in this field should focus on additional preprocessing of images and detection of NSD based on multiple planes using 3D images. ", doi="10.2196/57335", url="https://formative.jmir.org/2024/1/e57335", url="http://www.ncbi.nlm.nih.gov/pubmed/39226096" } @Article{info:doi/10.2196/53119, author="Cescon, Corrado and Landolfi, Giuseppe and Bonomi, Niko and Derboni, Marco and Giuffrida, Vincenzo and Rizzoli, Emilio Andrea and Maino, Paolo and Koetsier, Eva and Barbero, Marco", title="Automated Pain Spots Recognition Algorithm Provided by a Web Service--Based Platform: Instrument Validation Study", journal="JMIR Mhealth Uhealth", year="2024", month="Aug", day="27", volume="12", pages="e53119", keywords="pain drawing", keywords="image processing", keywords="body charts", keywords="scan", keywords="pain", keywords="draw", keywords="drawing", keywords="scanner", keywords="scanners", keywords="app", keywords="apps", keywords="applications", keywords="device", keywords="devices", keywords="image", keywords="images", keywords="smartphone", keywords="smartphones", keywords="scale", keywords="musculoskeletal", keywords="body chart", keywords="accuracy", keywords="reliability", keywords="accurate", keywords="reliable", keywords="picture", keywords="pictures", keywords="mobile phone", abstract="Background: Understanding the causes and mechanisms underlying musculoskeletal pain is crucial for developing effective treatments and improving patient outcomes. Self-report measures, such as the Pain Drawing Scale, involve individuals rating their level of pain on a scale. In this technique, individuals color the area where they experience pain, and the resulting picture is rated based on the depicted pain intensity. Analyzing pain drawings (PDs) typically involves measuring the size of the pain region. There are several studies focusing on assessing the clinical use of PDs, and now, with the introduction of digital PDs, the usability and reliability of these platforms need validation. Comparative studies between traditional and digital PDs have shown good agreement and reliability. The evolution of PD acquisition over the last 2 decades mirrors the commercialization of digital technologies. However, the pen-on-paper approach seems to be more accepted by patients, but there is currently no standardized method for scanning PDs. Objective: The objective of this study was to evaluate the accuracy of PD analysis performed by a web platform using various digital scanners. The primary goal was to demonstrate that simple and affordable mobile devices can be used to acquire PDs without losing important information. Methods: Two sets of PDs were generated: one with the addition of 216 colored circles and another composed of various red shapes distributed randomly on a frontal view body chart of an adult male. These drawings were then printed in color on A4 sheets, including QR codes at the corners in order to allow automatic alignment, and subsequently scanned using different devices and apps. The scanners used were flatbed scanners of different sizes and prices (professional, portable flatbed, and home printer or scanner), smartphones with varying price ranges, and 6 virtual scanner apps. The acquisitions were made under normal light conditions by the same operator. Results: High-saturation colors, such as red, cyan, magenta, and yellow, were accurately identified by all devices. The percentage error for small, medium, and large pain spots was consistently below 20\% for all devices, with smaller values associated with larger areas. In addition, a significant negative correlation was observed between the percentage of error and spot size (R=?0.237; P=.04). The proposed platform proved to be robust and reliable for acquiring paper PDs via a wide range of scanning devices. Conclusions: This study demonstrates that a web platform can accurately analyze PDs acquired through various digital scanners. The findings support the use of simple and cost-effective mobile devices for PD acquisition without compromising the quality of data. Standardizing the scanning process using the proposed platform can contribute to more efficient and consistent PD analysis in clinical and research settings. ", doi="10.2196/53119", url="https://mhealth.jmir.org/2024/1/e53119" } @Article{info:doi/10.2196/55641, author="Ridhi, Smriti and Robert, Dennis and Soren, Pitamber and Kumar, Manish and Pawar, Saniya and Reddy, Bhargava", title="Comparing the Output of an Artificial Intelligence Algorithm in Detecting Radiological Signs of Pulmonary Tuberculosis in Digital Chest X-Rays and Their Smartphone-Captured Photos of X-Ray Films: Retrospective Study", journal="JMIR Form Res", year="2024", month="Aug", day="21", volume="8", pages="e55641", keywords="artificial intelligence", keywords="AI", keywords="deep learning", keywords="early detection", keywords="tuberculosis", keywords="TB", keywords="computer-aided detection", keywords="diagnostic accuracy", keywords="chest x-ray", keywords="mobile phone", abstract="Background: Artificial intelligence (AI) based computer-aided detection devices are recommended for screening and triaging of pulmonary tuberculosis (TB) using digital chest x-ray (CXR) images (soft copies). Most AI algorithms are trained using input data from digital CXR Digital Imaging and Communications in Medicine (DICOM) files. There can be scenarios when only digital CXR films (hard copies) are available for interpretation. A smartphone-captured photo of the digital CXR film may be used for AI to process in such a scenario. There is a gap in the literature investigating if there is a significant difference in the performance of AI algorithms when digital CXR DICOM files are used as input for AI to process as opposed to photos of the digital CXR films being used as input. Objective: The primary objective was to compare the agreement of AI in detecting radiological signs of TB when using DICOM files (denoted as CXRd) as input versus when using smartphone-captured photos of digital CXR films (denoted as CXRp) with human readers. Methods: Pairs of CXRd and CXRp images were obtained retrospectively from patients screened for TB. AI results were obtained using both the CXRd and CXRp files. The majority consensus on the presence or absence of TB in CXR pairs was obtained from a panel of 3 independent radiologists. The positive and negative percent agreement of AI in detecting radiological signs of TB in CXRd and CXRp were estimated by comparing with the majority consensus. The distribution of AI probability scores was also compared. Results: A total of 1278 CXR pairs were analyzed. The positive percent agreement of AI was found to be 92.22\% (95\% CI 89.94-94.12) and 90.75\% (95\% CI 88.32-92.82), respectively, for CXRd and CXRp images (P=.09). The negative percent agreement of AI was 82.08\% (95\% CI 78.76-85.07) and 79.23\% (95\% CI 75.75-82.42), respectively, for CXRd and CXRp images (P=.06). The median of the AI probability score was 0.72 (IQR 0.11-0.97) in CXRd and 0.72 (IQR 0.14-0.96) in CXRp images (P=.75). Conclusions: We did not observe any statistically significant differences in the output of AI in digital CXRs and photos of digital CXR films. ", doi="10.2196/55641", url="https://formative.jmir.org/2024/1/e55641" } @Article{info:doi/10.2196/51706, author="Chen, Binjun and Li, Yike and Sun, Yu and Sun, Haojie and Wang, Yanmei and Lyu, Jihan and Guo, Jiajie and Bao, Shunxing and Cheng, Yushu and Niu, Xun and Yang, Lian and Xu, Jianghong and Yang, Juanmei and Huang, Yibo and Chi, Fanglu and Liang, Bo and Ren, Dongdong", title="A 3D and Explainable Artificial Intelligence Model for Evaluation of Chronic Otitis Media Based on Temporal Bone Computed Tomography: Model Development, Validation, and Clinical Application", journal="J Med Internet Res", year="2024", month="Aug", day="8", volume="26", pages="e51706", keywords="artificial intelligence", keywords="cholesteatoma", keywords="deep learning", keywords="otitis media", keywords="tomography, x-ray computed", keywords="machine learning", keywords="mastoidectomy", keywords="convolutional neural networks", keywords="temporal bone", abstract="Background: Temporal bone computed tomography (CT) helps diagnose chronic otitis media (COM). However, its interpretation requires training and expertise. Artificial intelligence (AI) can help clinicians evaluate COM through CT scans, but existing models lack transparency and may not fully leverage multidimensional diagnostic information. Objective: We aimed to develop an explainable AI system based on 3D convolutional neural networks (CNNs) for automatic CT-based evaluation of COM. Methods: Temporal bone CT scans were retrospectively obtained from patients operated for COM between December 2015 and July 2021 at 2 independent institutes. A region of interest encompassing the middle ear was automatically segmented, and 3D CNNs were subsequently trained to identify pathological ears and cholesteatoma. An ablation study was performed to refine model architecture. Benchmark tests were conducted against a baseline 2D model and 7 clinical experts. Model performance was measured through cross-validation and external validation. Heat maps, generated using Gradient-Weighted Class Activation Mapping, were used to highlight critical decision-making regions. Finally, the AI system was assessed with a prospective cohort to aid clinicians in preoperative COM assessment. Results: Internal and external data sets contained 1661 and 108 patients (3153 and 211 eligible ears), respectively. The 3D model exhibited decent performance with mean areas under the receiver operating characteristic curves of 0.96 (SD 0.01) and 0.93 (SD 0.01), and mean accuracies of 0.878 (SD 0.017) and 0.843 (SD 0.015), respectively, for detecting pathological ears on the 2 data sets. Similar outcomes were observed for cholesteatoma identification (mean area under the receiver operating characteristic curve 0.85, SD 0.03 and 0.83, SD 0.05; mean accuracies 0.783, SD 0.04 and 0.813, SD 0.033, respectively). The proposed 3D model achieved a commendable balance between performance and network size relative to alternative models. It significantly outperformed the 2D approach in detecting COM (P?.05) and exhibited a substantial gain in identifying cholesteatoma (P<.001). The model also demonstrated superior diagnostic capabilities over resident fellows and the attending otologist (P<.05), rivaling all senior clinicians in both tasks. The generated heat maps properly highlighted the middle ear and mastoid regions, aligning with human knowledge in interpreting temporal bone CT. The resulting AI system achieved an accuracy of 81.8\% in generating preoperative diagnoses for 121 patients and contributed to clinical decision-making in 90.1\% cases. Conclusions: We present a 3D CNN model trained to detect pathological changes and identify cholesteatoma via temporal bone CT scans. In both tasks, this model significantly outperforms the baseline 2D approach, achieving levels comparable with or surpassing those of human experts. The model also exhibits decent generalizability and enhanced comprehensibility. This AI system facilitates automatic COM assessment and shows promising viability in real-world clinical settings. These findings underscore AI's potential as a valuable aid for clinicians in COM evaluation. Trial Registration: Chinese Clinical Trial Registry ChiCTR2000036300; https://www.chictr.org.cn/showprojEN.html?proj=58685 ", doi="10.2196/51706", url="https://www.jmir.org/2024/1/e51706" } @Article{info:doi/10.2196/53108, author="Baghdadi, R. Leena and Mobeirek, A. Arwa and Alhudaithi, R. Dania and Albenmousa, A. Fatimah and Alhadlaq, S. Leen and Alaql, S. Maisa and Alhamlan, A. Sarah", title="Patients' Attitudes Toward the Use of Artificial Intelligence as a Diagnostic Tool in Radiology in Saudi Arabia: Cross-Sectional Study", journal="JMIR Hum Factors", year="2024", month="Aug", day="7", volume="11", pages="e53108", keywords="artificial intelligence", keywords="diagnostic radiology", keywords="patients", keywords="attitudes", keywords="questionnaire", keywords="patient", keywords="attitude", keywords="diagnostic tool", keywords="diagnostic tools", keywords="AI", keywords="radiologists", keywords="prognosis", keywords="treatment", keywords="Saudi Arabia", keywords="sociodemographic factors", keywords="sociodemographic factor", keywords="sociodemographic", keywords="cross-sectional study", keywords="participant", keywords="men", keywords="women", keywords="analysis", keywords="distrust", keywords="trust", abstract="Background: Artificial intelligence (AI) is widely used in various medical fields, including diagnostic radiology as a tool for greater efficiency, precision, and accuracy. The integration of AI as a radiological diagnostic tool has the potential to mitigate delays in diagnosis, which could, in turn, impact patients' prognosis and treatment outcomes. The literature shows conflicting results regarding patients' attitudes to AI as a diagnostic tool. To the best of our knowledge, no similar study has been conducted in Saudi Arabia. Objective: The objectives of this study are to examine patients' attitudes toward the use of AI as a tool in diagnostic radiology at King Khalid University Hospital, Saudi Arabia. Additionally, we sought to explore potential associations between patients' attitudes and various sociodemographic factors. Methods: This descriptive-analytical cross-sectional study was conducted in a tertiary care hospital. Data were collected from patients scheduled for radiological imaging through a validated self-administered questionnaire. The main outcome was to measure patients' attitudes to the use of AI in radiology by calculating mean scores of 5 factors, distrust and accountability (factor 1), procedural knowledge (factor 2), personal interaction and communication (factor 3), efficiency (factor 4), and methods of providing information to patients (factor 5). Data were analyzed using the student t test, one-way analysis of variance followed by post hoc and multivariable analysis. Results: A total of 382 participants (n=273, 71.5\% women and n=109, 28.5\% men) completed the surveys and were included in the analysis. The mean age of the respondents was 39.51 (SD 13.26) years. Participants favored physicians over AI for procedural knowledge, personal interaction, and being informed. However, the participants demonstrated a neutral attitude for distrust and accountability and for efficiency. Marital status was found to be associated with distrust and accountability, procedural knowledge, and personal interaction. Associations were also found between self-reported health status and being informed and between the field of specialization and distrust and accountability. Conclusions: Patients were keen to understand the work of AI in radiology but favored personal interaction with a radiologist. Patients were impartial toward AI replacing radiologists and the efficiency of AI, which should be a consideration in future policy development and integration. Future research involving multicenter studies in different regions of Saudi Arabia is required. ", doi="10.2196/53108", url="https://humanfactors.jmir.org/2024/1/e53108" } @Article{info:doi/10.2196/56627, author="Naseem, Usman and Thapa, Surendrabikram and Masood, Anum", title="Advancing Accuracy in Multimodal Medical Tasks Through Bootstrapped Language-Image Pretraining (BioMedBLIP): Performance Evaluation Study", journal="JMIR Med Inform", year="2024", month="Aug", day="5", volume="12", pages="e56627", keywords="biomedical text mining", keywords="BioNLP", keywords="vision-language pretraining", keywords="multimodal models", keywords="medical image analysis", abstract="Background: Medical image analysis, particularly in the context of visual question answering (VQA) and image captioning, is crucial for accurate diagnosis and educational purposes. Objective: Our study aims to introduce BioMedBLIP models, fine-tuned for VQA tasks using specialized medical data sets such as Radiology Objects in Context and Medical Information Mart for Intensive Care-Chest X-ray, and evaluate their performance in comparison to the state of the art (SOTA) original Bootstrapping Language-Image Pretraining (BLIP) model. Methods: We present 9 versions of BioMedBLIP across 3 downstream tasks in various data sets. The models are trained on a varying number of epochs. The findings indicate the strong overall performance of our models. We proposed BioMedBLIP for the VQA generation model, VQA classification model, and BioMedBLIP image caption model. We conducted pretraining in BLIP using medical data sets, producing an adapted BLIP model tailored for medical applications. Results: In VQA generation tasks, BioMedBLIP models outperformed the SOTA on the Semantically-Labeled Knowledge-Enhanced (SLAKE) data set, VQA in Radiology (VQA-RAD), and Image Cross-Language Evaluation Forum data sets. In VQA classification, our models consistently surpassed the SOTA on the SLAKE data set. Our models also showed competitive performance on the VQA-RAD and PathVQA data sets. Similarly, in image captioning tasks, our model beat the SOTA, suggesting the importance of pretraining with medical data sets. Overall, in 20 different data sets and task combinations, our BioMedBLIP excelled in 15 (75\%) out of 20 tasks. BioMedBLIP represents a new SOTA in 15 (75\%) out of 20 tasks, and our responses were rated higher in all 20 tasks (P<.005) in comparison to SOTA models. Conclusions: Our BioMedBLIP models show promising performance and suggest that incorporating medical knowledge through pretraining with domain-specific medical data sets helps models achieve higher performance. Our models thus demonstrate their potential to advance medical image analysis, impacting diagnosis, medical education, and research. However, data quality, task-specific variability, computational resources, and ethical considerations should be carefully addressed. In conclusion, our models represent a contribution toward the synergy of artificial intelligence and medicine. We have made BioMedBLIP freely available, which will help in further advancing research in multimodal medical tasks. ", doi="10.2196/56627", url="https://medinform.jmir.org/2024/1/e56627", url="http://www.ncbi.nlm.nih.gov/pubmed/39102281" } @Article{info:doi/10.2196/56361, author="Zha, Bowen and Cai, Angshu and Wang, Guiqi", title="Diagnostic Accuracy of Artificial Intelligence in Endoscopy: Umbrella Review", journal="JMIR Med Inform", year="2024", month="Jul", day="15", volume="12", pages="e56361", keywords="endoscopy", keywords="artificial intelligence", keywords="umbrella review", keywords="meta-analyses", keywords="AI", keywords="diagnostic", keywords="researchers", keywords="researcher", keywords="tools", keywords="tool", keywords="assessment", abstract="Background: Some research has already reported the diagnostic value of artificial intelligence (AI) in different endoscopy outcomes. However, the evidence is confusing and of varying quality. Objective: This review aimed to comprehensively evaluate the credibility of the evidence of AI's diagnostic accuracy in endoscopy. Methods: Before the study began, the protocol was registered on PROSPERO (CRD42023483073). First, 2 researchers searched PubMed, Web of Science, Embase, and Cochrane Library using comprehensive search terms. Then, researchers screened the articles and extracted information. We used A Measurement Tool to Assess Systematic Reviews 2 (AMSTAR2) to evaluate the quality of the articles. When there were multiple studies aiming at the same result, we chose the study with higher-quality evaluations for further analysis. To ensure the reliability of the conclusions, we recalculated each outcome. Finally, the Grading of Recommendations, Assessment, Development, and Evaluation (GRADE) was used to evaluate the credibility of the outcomes. Results: A total of 21 studies were included for analysis. Through AMSTAR2, it was found that 8 research methodologies were of moderate quality, while other studies were regarded as having low or critically low quality. The sensitivity and specificity of 17 different outcomes were analyzed. There were 4 studies on esophagus, 4 studies on stomach, and 4 studies on colorectal regions. Two studies were associated with capsule endoscopy, two were related to laryngoscopy, and one was related to ultrasonic endoscopy. In terms of sensitivity, gastroesophageal reflux disease had the highest accuracy rate, reaching 97\%, while the invasion depth of colon neoplasia, with 71\%, had the lowest accuracy rate. On the other hand, the specificity of colorectal cancer was the highest, reaching 98\%, while the gastrointestinal stromal tumor, with only 80\%, had the lowest specificity. The GRADE evaluation suggested that the reliability of most outcomes was low or very low. Conclusions: AI proved valuabe in endoscopic diagnoses, especially in esophageal and colorectal diseases. These findings provide a theoretical basis for developing and evaluating AI-assisted systems, which are aimed at assisting endoscopists in carrying out examinations, leading to improved patient health outcomes. However, further high-quality research is needed in the future to fully validate AI's effectiveness. ", doi="10.2196/56361", url="https://medinform.jmir.org/2024/1/e56361" } @Article{info:doi/10.2196/59187, author="Ji, Hyerim and Kim, Seok and Sunwoo, Leonard and Jang, Sowon and Lee, Ho-Young and Yoo, Sooyoung", title="Integrating Clinical Data and Medical Imaging in Lung Cancer: Feasibility Study Using the Observational Medical Outcomes Partnership Common Data Model Extension", journal="JMIR Med Inform", year="2024", month="Jul", day="12", volume="12", pages="e59187", keywords="DICOM", keywords="OMOP", keywords="CDM", keywords="lung cancer", keywords="medical imaging", keywords="data integration", keywords="data quality", keywords="Common Data Model", keywords="Digital Imaging and Communications in Medicine", keywords="Observational Medical Outcomes Partnership", abstract="Background: Digital transformation, particularly the integration of medical imaging with clinical data, is vital in personalized medicine. The Observational Medical Outcomes Partnership (OMOP) Common Data Model (CDM) standardizes health data. However, integrating medical imaging remains a challenge. Objective: This study proposes a method for combining medical imaging data with the OMOP CDM to improve multimodal research. Methods: Our approach included the analysis and selection of digital imaging and communications in medicine header tags, validation of data formats, and alignment according to the OMOP CDM framework. The Fast Healthcare Interoperability Resources ImagingStudy profile guided our consistency in column naming and definitions. Imaging Common Data Model (I-CDM), constructed using the entity-attribute-value model, facilitates scalable and efficient medical imaging data management. For patients with lung cancer diagnosed between 2010 and 2017, we introduced 4 new tables---IMAGING\_STUDY, IMAGING\_SERIES, IMAGING\_ANNOTATION, and FILEPATH---to standardize various imaging-related data and link to clinical data. Results: This framework underscores the effectiveness of I-CDM in enhancing our understanding of lung cancer diagnostics and treatment strategies. The implementation of the I-CDM tables enabled the structured organization of a comprehensive data set, including 282,098 IMAGING\_STUDY, 5,674,425 IMAGING\_SERIES, and 48,536 IMAGING\_ANNOTATION records, illustrating the extensive scope and depth of the approach. A scenario-based analysis using actual data from patients with lung cancer underscored the feasibility of our approach. A data quality check applying 44 specific rules confirmed the high integrity of the constructed data set, with all checks successfully passed, underscoring the reliability of our findings. Conclusions: These findings indicate that I-CDM can improve the integration and analysis of medical imaging and clinical data. By addressing the challenges in data standardization and management, our approach contributes toward enhancing diagnostics and treatment strategies. Future research should expand the application of I-CDM to diverse disease populations and explore its wide-ranging utility for medical conditions. ", doi="10.2196/59187", url="https://medinform.jmir.org/2024/1/e59187" } @Article{info:doi/10.2196/48535, author="Kong, Hye Sung and Cho, Wonwoo and Park, Bae Sung and Choo, Jaegul and Kim, Hee Jung and Kim, Wan Sang and Shin, Soo Chan", title="A Computed Tomography--Based Fracture Prediction Model With Images of Vertebral Bones and Muscles by Employing Deep Learning: Development and Validation Study", journal="J Med Internet Res", year="2024", month="Jul", day="12", volume="26", pages="e48535", keywords="fracture", keywords="bone", keywords="bones", keywords="muscle", keywords="muscles", keywords="musculoskeletal", keywords="prediction", keywords="deep learning", keywords="prospective cohort", keywords="fracture risk assessment", keywords="predict", keywords="predictive", keywords="machine learning", keywords="develop", keywords="development", keywords="validate", keywords="validation", keywords="imaging", keywords="tomography", keywords="scanning", abstract="Background: With the progressive increase in aging populations, the use of opportunistic computed tomography (CT) scanning is increasing, which could be a valuable method for acquiring information on both muscles and bones of aging populations. Objective: The aim of this study was to develop and externally validate opportunistic CT-based fracture prediction models by using images of vertebral bones and paravertebral muscles. Methods: The models were developed based on a retrospective longitudinal cohort study of 1214 patients with abdominal CT images between 2010 and 2019. The models were externally validated in 495 patients. The primary outcome of this study was defined as the predictive accuracy for identifying vertebral fracture events within a 5-year follow-up. The image models were developed using an attention convolutional neural network--recurrent neural network model from images of the vertebral bone and paravertebral muscles. Results: The mean ages of the patients in the development and validation sets were 73 years and 68 years, and 69.1\% (839/1214) and 78.8\% (390/495) of them were females, respectively. The areas under the receiver operator curve (AUROCs) for predicting vertebral fractures were superior in images of the vertebral bone and paravertebral muscles than those in the bone-only images in the external validation cohort (0.827, 95\% CI 0.821-0.833 vs 0.815, 95\% CI 0.806-0.824, respectively; P<.001). The AUROCs of these image models were higher than those of the fracture risk assessment models (0.810 for major osteoporotic risk, 0.780 for hip fracture risk). For the clinical model using age, sex, BMI, use of steroids, smoking, possible secondary osteoporosis, type 2 diabetes mellitus, HIV, hepatitis C, and renal failure, the AUROC value in the external validation cohort was 0.749 (95\% CI 0.736-0.762), which was lower than that of the image model using vertebral bones and muscles (P<.001). Conclusions: The model using the images of the vertebral bone and paravertebral muscle showed better performance than that using the images of the bone-only or clinical variables. Opportunistic CT screening may contribute to identifying patients with a high fracture risk in the future. ", doi="10.2196/48535", url="https://www.jmir.org/2024/1/e48535", url="http://www.ncbi.nlm.nih.gov/pubmed/38995678" } @Article{info:doi/10.2196/51397, author="Duggan, M. Nicole and Jin, Mike and Duran Mendicuti, Alejandra Maria and Hallisey, Stephen and Bernier, Denie and Selame, A. Lauren and Asgari-Targhi, Ameneh and Fischetti, E. Chanel and Lucassen, Ruben and Samir, E. Anthony and Duhaime, Erik and Kapur, Tina and Goldsmith, J. Andrew", title="Gamified Crowdsourcing as a Novel Approach to Lung Ultrasound Data Set Labeling: Prospective Analysis", journal="J Med Internet Res", year="2024", month="Jul", day="4", volume="26", pages="e51397", keywords="crowdsource", keywords="crowdsourced", keywords="crowdsourcing", keywords="machine learning", keywords="artificial intelligence", keywords="point-of-care ultrasound", keywords="POCUS", keywords="lung ultrasound", keywords="B-lines", keywords="gamification", keywords="gamify", keywords="gamified", keywords="label", keywords="labels", keywords="labeling", keywords="classification", keywords="lung", keywords="pulmonary", keywords="respiratory", keywords="ultrasound", keywords="imaging", keywords="medical image", keywords="diagnostic", keywords="diagnose", keywords="diagnosis", keywords="data science", abstract="Background: Machine learning (ML) models can yield faster and more accurate medical diagnoses; however, developing ML models is limited by a lack of high-quality labeled training data. Crowdsourced labeling is a potential solution but can be constrained by concerns about label quality. Objective: This study aims to examine whether a gamified crowdsourcing platform with continuous performance assessment, user feedback, and performance-based incentives could produce expert-quality labels on medical imaging data. Methods: In this diagnostic comparison study, 2384 lung ultrasound clips were retrospectively collected from 203 emergency department patients. A total of 6 lung ultrasound experts classified 393 of these clips as having no B-lines, one or more discrete B-lines, or confluent B-lines to create 2 sets of reference standard data sets (195 training clips and 198 test clips). Sets were respectively used to (1) train users on a gamified crowdsourcing platform and (2) compare the concordance of the resulting crowd labels to the concordance of individual experts to reference standards. Crowd opinions were sourced from DiagnosUs (Centaur Labs) iOS app users over 8 days, filtered based on past performance, aggregated using majority rule, and analyzed for label concordance compared with a hold-out test set of expert-labeled clips. The primary outcome was comparing the labeling concordance of collated crowd opinions to trained experts in classifying B-lines on lung ultrasound clips. Results: Our clinical data set included patients with a mean age of 60.0 (SD 19.0) years; 105 (51.7\%) patients were female and 114 (56.1\%) patients were White. Over the 195 training clips, the expert-consensus label distribution was 114 (58\%) no B-lines, 56 (29\%) discrete B-lines, and 25 (13\%) confluent B-lines. Over the 198 test clips, expert-consensus label distribution was 138 (70\%) no B-lines, 36 (18\%) discrete B-lines, and 24 (12\%) confluent B-lines. In total, 99,238 opinions were collected from 426 unique users. On a test set of 198 clips, the mean labeling concordance of individual experts relative to the reference standard was 85.0\% (SE 2.0), compared with 87.9\% crowdsourced label concordance (P=.15). When individual experts' opinions were compared with reference standard labels created by majority vote excluding their own opinion, crowd concordance was higher than the mean concordance of individual experts to reference standards (87.4\% vs 80.8\%, SE 1.6 for expert concordance; P<.001). Clips with discrete B-lines had the most disagreement from both the crowd consensus and individual experts with the expert consensus. Using randomly sampled subsets of crowd opinions, 7 quality-filtered opinions were sufficient to achieve near the maximum crowd concordance. Conclusions: Crowdsourced labels for B-line classification on lung ultrasound clips via a gamified approach achieved expert-level accuracy. This suggests a strategic role for gamified crowdsourcing in efficiently generating labeled image data sets for training ML systems. ", doi="10.2196/51397", url="https://www.jmir.org/2024/1/e51397" } @Article{info:doi/10.2196/55342, author="Chun, Minki and Yu, Ha-Jin and Jung, Hyunggu", title="A Deep Learning--Based Rotten Food Recognition App for Older Adults: Development and Usability Study", journal="JMIR Form Res", year="2024", month="Jul", day="3", volume="8", pages="e55342", keywords="digital health", keywords="mobile health", keywords="mHealth", keywords="app", keywords="apps", keywords="application", keywords="applications", keywords="smartphone", keywords="smartphones", keywords="classification", keywords="digital sensor", keywords="deep learning", keywords="artificial intelligence", keywords="machine learning", keywords="food", keywords="foods", keywords="fruit", keywords="fruits", keywords="experience", keywords="experiences", keywords="attitude", keywords="attitudes", keywords="opinion", keywords="opinions", keywords="perception", keywords="perceptions", keywords="perspective", keywords="perspectives", keywords="acceptance", keywords="adoption", keywords="usability", keywords="gerontology", keywords="geriatric", keywords="geriatrics", keywords="older adult", keywords="older adults", keywords="elder", keywords="elderly", keywords="older person", keywords="older people", keywords="ageing", keywords="aging", keywords="aged", keywords="camera", keywords="image", keywords="imaging", keywords="photo", keywords="photos", keywords="photograph", keywords="photographs", keywords="recognition", keywords="picture", keywords="pictures", keywords="sensor", keywords="sensors", keywords="develop", keywords="development", keywords="design", abstract="Background: Older adults are at greater risk of eating rotten fruits and of getting food poisoning because cognitive function declines as they age, making it difficult to distinguish rotten fruits. To address this problem, researchers have developed and evaluated various tools to detect rotten food items in various ways. Nevertheless, little is known about how to create an app to detect rotten food items to support older adults at a risk of health problems from eating rotten food items. Objective: This study aimed to (1) create a smartphone app that enables older adults to take a picture of food items with a camera and classifies the fruit as rotten or not rotten for older adults and (2) evaluate the usability of the app and the perceptions of older adults about the app. Methods: We developed a smartphone app that supports older adults in determining whether the 3 fruits selected for this study (apple, banana, and orange) were fresh enough to eat. We used several residual deep networks to check whether the fruit photos collected were of fresh fruit. We recruited healthy older adults aged over 65 years (n=15, 57.7\%, males and n=11, 42.3\%, females) as participants. We evaluated the usability of the app and the participants' perceptions about the app through surveys and interviews. We analyzed the survey responses, including an after-scenario questionnaire, as evaluation indicators of the usability of the app and collected qualitative data from the interviewees for in-depth analysis of the survey responses. Results: The participants were satisfied with using an app to determine whether a fruit is fresh by taking a picture of the fruit but are reluctant to use the paid version of the app. The survey results revealed that the participants tended to use the app efficiently to take pictures of fruits and determine their freshness. The qualitative data analysis on app usability and participants' perceptions about the app revealed that they found the app simple and easy to use, they had no difficulty taking pictures, and they found the app interface visually satisfactory. Conclusions: This study suggests the possibility of developing an app that supports older adults in identifying rotten food items effectively and efficiently. Future work to make the app distinguish the freshness of various food items other than the 3 fruits selected still remains. ", doi="10.2196/55342", url="https://formative.jmir.org/2024/1/e55342" } @Article{info:doi/10.2196/48811, author="Marri, Shankar Shiva and Albadri, Warood and Hyder, Salman Mohammed and Janagond, B. Ajit and Inamadar, C. Arun", title="Efficacy of an Artificial Intelligence App (Aysa) in Dermatological Diagnosis: Cross-Sectional Analysis", journal="JMIR Dermatol", year="2024", month="Jul", day="2", volume="7", pages="e48811", keywords="artificial intelligence", keywords="AI", keywords="AI-aided diagnosis", keywords="dermatology", keywords="mobile app", keywords="application", keywords="neural network", keywords="machine learning", keywords="dermatological", keywords="skin", keywords="computer-aided diagnosis", keywords="diagnostic", keywords="imaging", keywords="lesion", abstract="Background: Dermatology is an ideal specialty for artificial intelligence (AI)--driven image recognition to improve diagnostic accuracy and patient care. Lack of dermatologists in many parts of the world and the high frequency of cutaneous disorders and malignancies highlight the increasing need for AI-aided diagnosis. Although AI-based applications for the identification of dermatological conditions are widely available, research assessing their reliability and accuracy is lacking. Objective: The aim of this study was to analyze the efficacy of the Aysa AI app as a preliminary diagnostic tool for various dermatological conditions in a semiurban town in India. Methods: This observational cross-sectional study included patients over the age of 2 years who visited the dermatology clinic. Images of lesions from individuals with various skin disorders were uploaded to the app after obtaining informed consent. The app was used to make a patient profile, identify lesion morphology, plot the location on a human model, and answer questions regarding duration and symptoms. The app presented eight differential diagnoses, which were compared with the clinical diagnosis. The model's performance was evaluated using sensitivity, specificity, accuracy, positive predictive value, negative predictive value, and F1-score. Comparison of categorical variables was performed with the $\chi$2 test and statistical significance was considered at P<.05. Results: A total of 700 patients were part of the study. A wide variety of skin conditions were grouped into 12 categories. The AI model had a mean top-1 sensitivity of 71\% (95\% CI 61.5\%-74.3\%), top-3 sensitivity of 86.1\% (95\% CI 83.4\%-88.6\%), and all-8 sensitivity of 95.1\% (95\% CI 93.3\%-96.6\%). The top-1 sensitivities for diagnosis of skin infestations, disorders of keratinization, other inflammatory conditions, and bacterial infections were 85.7\%, 85.7\%, 82.7\%, and 81.8\%, respectively. In the case of photodermatoses and malignant tumors, the top-1 sensitivities were 33.3\% and 10\%, respectively. Each category had a strong correlation between the clinical diagnosis and the probable diagnoses (P<.001). Conclusions: The Aysa app showed promising results in identifying most dermatoses. ", doi="10.2196/48811", url="https://derma.jmir.org/2024/1/e48811" } @Article{info:doi/10.2196/48212, author="Umeria, Rishi and Mowforth, Oliver and Veremu, Munashe and Davies, Benjamin and Kotter, Mark", title="Radiological Progression of Degenerative Cervical Myelopathy in a Clinically Stable Patient: Case Report", journal="Interact J Med Res", year="2024", month="Jun", day="27", volume="13", pages="e48212", keywords="degenerative cervical myelopathy", keywords="neurosurgery", keywords="radiology", keywords="magnetic resonance imaging", doi="10.2196/48212", url="https://www.i-jmr.org/2024/1/e48212" } @Article{info:doi/10.2196/49613, author="Lin, Z. Rebecca and Amith, Tuan Muhammad and Wang, X. Cynthia and Strickley, John and Tao, Cui", title="Dermoscopy Differential Diagnosis Explorer (D3X) Ontology to Aggregate and Link Dermoscopic Patterns to Differential Diagnoses: Development and Usability Study", journal="JMIR Med Inform", year="2024", month="Jun", day="21", volume="12", pages="e49613", keywords="medical informatics", keywords="biomedical ontology", keywords="ontology", keywords="ontologies", keywords="vocabulary", keywords="OWL", keywords="web ontology language", keywords="skin", keywords="semiotic", keywords="web app", keywords="web application", keywords="visual", keywords="visualization", keywords="dermoscopic", keywords="diagnosis", keywords="diagnoses", keywords="diagnostic", keywords="information storage", keywords="information retrieval", keywords="skin lesion", keywords="skin diseases", keywords="dermoscopy differential diagnosis explorer", keywords="dermatology", keywords="dermoscopy", keywords="differential diagnosis", keywords="information storage and retrieval", abstract="Background: Dermoscopy is a growing field that uses microscopy to allow dermatologists and primary care physicians to identify skin lesions. For a given skin lesion, a wide variety of differential diagnoses exist, which may be challenging for inexperienced users to name and understand. Objective: In this study, we describe the creation of the dermoscopy differential diagnosis explorer (D3X), an ontology linking dermoscopic patterns to differential diagnoses. Methods: Existing ontologies that were incorporated into D3X include the elements of visuals ontology and dermoscopy elements of visuals ontology, which connect visual features to dermoscopic patterns. A list of differential diagnoses for each pattern was generated from the literature and in consultation with domain experts. Open-source images were incorporated from DermNet, Dermoscopedia, and open-access research papers. Results: D3X was encoded in the OWL 2 web ontology language and includes 3041 logical axioms, 1519 classes, 103 object properties, and 20 data properties. We compared D3X with publicly available ontologies in the dermatology domain using a semiotic theory--driven metric to measure the innate qualities of D3X with others. The results indicate that D3X is adequately comparable with other ontologies of the dermatology domain. Conclusions: The D3X ontology is a resource that can link and integrate dermoscopic differential diagnoses and supplementary information with existing ontology-based resources. Future directions include developing a web application based on D3X for dermoscopy education and clinical practice. ", doi="10.2196/49613", url="https://medinform.jmir.org/2024/1/e49613", url="http://www.ncbi.nlm.nih.gov/pubmed/38904996" } @Article{info:doi/10.2196/56726, author="De Sanctis, Pierfilippo and Mahoney, R. Jeannette and Wagner, Johanna and Blumen, M. Helena and Mowrey, Wenzhu and Ayers, Emmeline and Schneider, Claudia and Orellana, Natasha and Molholm, Sophie and Verghese, Joe", title="Linking Dementia Pathology and Alteration in Brain Activation to Complex Daily Functional Decline During the Preclinical Dementia Stages: Protocol for a Prospective Observational Cohort Study", journal="JMIR Res Protoc", year="2024", month="Jun", day="6", volume="13", pages="e56726", keywords="EEG", keywords="electroencephalographic", keywords="mobility", keywords="preclinical dementia stages", abstract="Background: Progressive difficulty in performing everyday functional activities is a key diagnostic feature of dementia syndromes. However, not much is known about the neural signature of functional decline, particularly during the very early stages of dementia. Early intervention before overt impairment is observed offers the best hope of reducing the burdens of Alzheimer disease (AD) and other dementias. However, to justify early intervention, those at risk need to be detected earlier and more accurately. The decline in complex daily function (CdF) such as managing medications has been reported to precede impairment in basic activities of daily living (eg, eating and dressing). Objective: Our goal is to establish the neural signature of decline in CdF during the preclinical dementia period. Methods: Gait is central to many CdF and community-based activities. Hence, to elucidate the neural signature of CdF, we validated a novel electroencephalographic approach to measuring gait-related brain activation while participants perform complex gait-based functional tasks. We hypothesize that dementia-related pathology during the preclinical period activates a unique gait-related electroencephalographic (grEEG) pattern that predicts a subsequent decline in CdF. Results: We provide preliminary findings showing that older adults reporting CdF limitations can be characterized by a unique gait-related neural signature: weaker sensorimotor and stronger motor control activation. This subsample also had smaller brain volume and white matter hyperintensities in regions affected early by dementia and engaged in less physical exercise. We propose a prospective observational cohort study in cognitively unimpaired older adults with and without subclinical AD (plasma amyloid-$\beta$) and vascular (white matter hyperintensities) pathologies. We aim to (1) establish the unique grEEG activation as the neural signature and predictor of decline in CdF during the preclinical dementia period; (2) determine associations between dementia-related pathologies and incidence of the neural signature of CdF; and (3) establish associations between a dementia risk factor, physical inactivity, and the neural signature of CdF. Conclusions: By establishing the clinical relevance and biological basis of the neural signature of CdF decline, we aim to improve prediction during the preclinical stages of ADs and other dementias. Our approach has important research and translational implications because grEEG protocols are relatively inexpensive and portable, and predicting CdF decline may have real-world benefits. International Registered Report Identifier (IRRID): DERR1-10.2196/56726 ", doi="10.2196/56726", url="https://www.researchprotocols.org/2024/1/e56726", url="http://www.ncbi.nlm.nih.gov/pubmed/38842914" } @Article{info:doi/10.2196/52637, author="Kim, Min Hyung and Kang, Hyoeun and Lee, Chaeyoon and Park, Hyuk Jong and Chung, Kyung Mi and Kim, Miran and Kim, Young Na and Lee, Jun Hye", title="Evaluation of the Clinical Efficacy and Trust in AI-Assisted Embryo Ranking: Survey-Based Prospective Study", journal="J Med Internet Res", year="2024", month="Jun", day="3", volume="26", pages="e52637", keywords="assisted reproductive technology", keywords="in vitro fertilization", keywords="artificial intelligence", keywords="intraobserver and interobserver agreements", keywords="embryos", keywords="embryologists", abstract="Background: Current embryo assessment methods for in vitro fertilization depend on subjective morphological assessments. Recently, artificial intelligence (AI) has emerged as a promising tool for embryo assessment; however, its clinical efficacy and trustworthiness remain unproven. Simulation studies may provide additional evidence, provided that they are meticulously designed to mitigate bias and variance. Objective: The primary objective of this study was to evaluate the benefits of an AI model for predicting clinical pregnancy through well-designed simulations. The secondary objective was to identify the characteristics of and potential bias in the subgroups of embryologists with varying degrees of experience. Methods: This simulation study involved a questionnaire-based survey conducted on 61 embryologists with varying levels of experience from 12 in vitro fertilization clinics. The survey was conducted via Google Forms (Google Inc) in three phases: (1) phase 1, an initial assessment (December 23, 2022, to January 22, 2023); (2) phase 2, a validation assessment (March 6, 2023, to April 5, 2023); and (3) phase 3 an AI-guided assessment (March 6, 2023, to April 5, 2023). Inter- and intraobserver assessments and the accuracy of embryo selection from 360 day-5 embryos before and after AI guidance were analyzed for all embryologists and subgroups of senior and junior embryologists. Results: With AI guidance, the interobserver agreement increased from 0.355 to 0.527 and from 0.440 to 0.524 for junior and senior embryologists, respectively, thus reaching similar levels of agreement. In a test of accurate embryo selection with 90 questions, the numbers of correct responses by the embryologists only, embryologists with AI guidance, and AI only were 34 (38\%), 45 (50\%), and 59 (66\%), respectively. Without AI, the average score (accuracy) of the junior group was 33.516 (37\%), while that of the senior group was 35.967 (40\%), with P<.001 in the t test. With AI guidance, the average score (accuracy) of the junior group increased to 46.581 (52\%), reaching a level similar to that of the senior embryologists of 44.833 (50\%), with P=.34. Junior embryologists had a higher level of trust in the AI score. Conclusions: This study demonstrates the potential benefits of AI in selecting embryos with high chances of pregnancy, particularly for embryologists with 5 years or less of experience, possibly due to their trust in AI. Thus, using AI as an auxiliary tool in clinical practice has the potential to improve embryo assessment and increase the probability of a successful pregnancy. ", doi="10.2196/52637", url="https://www.jmir.org/2024/1/e52637", url="http://www.ncbi.nlm.nih.gov/pubmed/38830209" } @Article{info:doi/10.2196/54948, author="Busch, Felix and Han, Tianyu and Makowski, R. Marcus and Truhn, Daniel and Bressem, K. Keno and Adams, Lisa", title="Integrating Text and Image Analysis: Exploring GPT-4V's Capabilities in Advanced Radiological Applications Across Subspecialties", journal="J Med Internet Res", year="2024", month="May", day="1", volume="26", pages="e54948", keywords="GPT-4", keywords="ChatGPT", keywords="Generative Pre-Trained Transformer", keywords="multimodal large language models", keywords="artificial intelligence", keywords="AI applications in medicine", keywords="diagnostic radiology", keywords="clinical decision support systems", keywords="generative AI", keywords="medical image analysis", doi="10.2196/54948", url="https://www.jmir.org/2024/1/e54948", url="http://www.ncbi.nlm.nih.gov/pubmed/38691404" } @Article{info:doi/10.2196/45545, author="Lin, Senlin and Ma, Yingyan and Jiang, Yanwei and Li, Wenwen and Peng, Yajun and Yu, Tao and Xu, Yi and Zhu, Jianfeng and Lu, Lina and Zou, Haidong", title="Service Quality and Residents' Preferences for Facilitated Self-Service Fundus Disease Screening: Cross-Sectional Study", journal="J Med Internet Res", year="2024", month="Apr", day="17", volume="26", pages="e45545", keywords="digital technology", keywords="screening", keywords="self-service", keywords="eye disease", keywords="health economics evaluation", keywords="health technology assessment", keywords="disease screening", keywords="artificial intelligence", keywords="AI", keywords="eye", keywords="community", keywords="effectiveness", keywords="screening efficiency", keywords="safety", abstract="Background: Fundus photography is the most important examination in eye disease screening. A facilitated self-service eye screening pattern based on the fully automatic fundus camera was developed in 2022 in Shanghai, China; it may help solve the problem of insufficient human resources in primary health care institutions. However, the service quality and residents' preference for this new pattern are unclear. Objective: This study aimed to compare the service quality and residents' preferences between facilitated self-service eye screening and traditional manual screening and to explore the relationships between the screening service's quality and residents' preferences. Methods: We conducted a cross-sectional study in Shanghai, China. Residents who underwent facilitated self-service fundus disease screening at one of the screening sites were assigned to the exposure group; those who were screened with a traditional fundus camera operated by an optometrist at an adjacent site comprised the control group. The primary outcome was the screening service quality, including effectiveness (image quality and screening efficiency), physiological discomfort, safety, convenience, and trustworthiness. The secondary outcome was the participants' preferences. Differences in service quality and the participants' preferences between the 2 groups were compared using chi-square tests separately. Subgroup analyses for exploring the relationships between the screening service's quality and residents' preference were conducted using generalized logit models. Results: A total of 358 residents enrolled; among them, 176 (49.16\%) were included in the exposure group and the remaining 182 (50.84\%) in the control group. Residents' basic characteristics were balanced between the 2 groups. There was no significant difference in service quality between the 2 groups (image quality pass rate: P=.79; average screening time: P=.57; no physiological discomfort rate: P=.92; safety rate: P=.78; convenience rate: P=.95; trustworthiness rate: P=.20). However, the proportion of participants who were willing to use the same technology for their next screening was significantly lower in the exposure group than in the control group (P<.001). Subgroup analyses suggest that distrust in the facilitated self-service eye screening might increase the probability of refusal to undergo screening (P=.02). Conclusions: This study confirms that the facilitated self-service fundus disease screening pattern could achieve good service quality. However, it was difficult to reverse residents' preferences for manual screening in a short period, especially when the original manual service was already excellent. Therefore, the digital transformation of health care must be cautious. We suggest that attention be paid to the residents' individual needs. More efficient man-machine collaboration and personalized health management solutions based on large language models are both needed. ", doi="10.2196/45545", url="https://www.jmir.org/2024/1/e45545", url="http://www.ncbi.nlm.nih.gov/pubmed/38630535" } @Article{info:doi/10.2196/51250, author="Huo, Jian and Yu, Yan and Lin, Wei and Hu, Anmin and Wu, Chaoran", title="Application of AI in Multilevel Pain Assessment Using Facial Images: Systematic Review and Meta-Analysis", journal="J Med Internet Res", year="2024", month="Apr", day="12", volume="26", pages="e51250", keywords="computer vision", keywords="facial image", keywords="monitoring", keywords="multilevel pain assessment", keywords="pain", keywords="postoperative", keywords="status", abstract="Background: The continuous monitoring and recording of patients' pain status is a major problem in current research on postoperative pain management. In the large number of original or review articles focusing on different approaches for pain assessment, many researchers have investigated how computer vision (CV) can help by capturing facial expressions. However, there is a lack of proper comparison of results between studies to identify current research gaps. Objective: The purpose of this systematic review and meta-analysis was to investigate the diagnostic performance of artificial intelligence models for multilevel pain assessment from facial images. Methods: The PubMed, Embase, IEEE, Web of Science, and Cochrane Library databases were searched for related publications before September 30, 2023. Studies that used facial images alone to estimate multiple pain values were included in the systematic review. A study quality assessment was conducted using the Quality Assessment of Diagnostic Accuracy Studies, 2nd edition tool. The performance of these studies was assessed by metrics including sensitivity, specificity, log diagnostic odds ratio (LDOR), and area under the curve (AUC). The intermodal variability was assessed and presented by forest plots. Results: A total of 45 reports were included in the systematic review. The reported test accuracies ranged from 0.27-0.99, and the other metrics, including the mean standard error (MSE), mean absolute error (MAE), intraclass correlation coefficient (ICC), and Pearson correlation coefficient (PCC), ranged from 0.31-4.61, 0.24-2.8, 0.19-0.83, and 0.48-0.92, respectively. In total, 6 studies were included in the meta-analysis. Their combined sensitivity was 98\% (95\% CI 96\%-99\%), specificity was 98\% (95\% CI 97\%-99\%), LDOR was 7.99 (95\% CI 6.73-9.31), and AUC was 0.99 (95\% CI 0.99-1). The subgroup analysis showed that the diagnostic performance was acceptable, although imbalanced data were still emphasized as a major problem. All studies had at least one domain with a high risk of bias, and for 20\% (9/45) of studies, there were no applicability concerns. Conclusions: This review summarizes recent evidence in automatic multilevel pain estimation from facial expressions and compared the test accuracy of results in a meta-analysis. Promising performance for pain estimation from facial images was established by current CV algorithms. Weaknesses in current studies were also identified, suggesting that larger databases and metrics evaluating multiclass classification performance could improve future studies. Trial Registration: PROSPERO CRD42023418181; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=418181 ", doi="10.2196/51250", url="https://www.jmir.org/2024/1/e51250", url="http://www.ncbi.nlm.nih.gov/pubmed/38607660" } @Article{info:doi/10.2196/52602, author="Ong, Yuhan Ariel and Hogg, Jeffry Henry David and Kale, U. Aditya and Taribagil, Priyal and Kras, Ashley and Dow, Eliot and Macdonald, Trystan and Liu, Xiaoxuan and Keane, A. Pearse and Denniston, K. Alastair", title="AI as a Medical Device for Ophthalmic Imaging in Europe, Australia, and the United States: Protocol for a Systematic Scoping Review of Regulated Devices", journal="JMIR Res Protoc", year="2024", month="Mar", day="14", volume="13", pages="e52602", keywords="AIaMD", keywords="artificial intelligence as a medical device", keywords="artificial intelligence", keywords="deep learning", keywords="machine learning", keywords="ophthalmic imaging", keywords="regulatory approval", abstract="Background: Artificial intelligence as a medical device (AIaMD) has the potential to transform many aspects of ophthalmic care, such as improving accuracy and speed of diagnosis, addressing capacity issues in high-volume areas such as screening, and detecting novel biomarkers of systemic disease in the eye (oculomics). In order to ensure that such tools are safe for the target population and achieve their intended purpose, it is important that these AIaMD have adequate clinical evaluation to support any regulatory decision. Currently, the evidential requirements for regulatory approval are less clear for AIaMD compared to more established interventions such as drugs or medical devices. There is therefore value in understanding the level of evidence that underpins AIaMD currently on the market, as a step toward identifying what the best practices might be in this area. In this systematic scoping review, we will focus on AIaMD that contributes to clinical decision-making (relating to screening, diagnosis, prognosis, and treatment) in the context of ophthalmic imaging. Objective: This study aims to identify regulator-approved AIaMD for ophthalmic imaging in Europe, Australia, and the United States; report the characteristics of these devices and their regulatory approvals; and report the available evidence underpinning these AIaMD. Methods: The Food and Drug Administration (United States), the Australian Register of Therapeutic Goods (Australia), the Medicines and Healthcare products Regulatory Agency (United Kingdom), and the European Database on Medical Devices (European Union) regulatory databases will be searched for ophthalmic imaging AIaMD through a snowballing approach. PubMed and clinical trial registries will be systematically searched, and manufacturers will be directly contacted for studies investigating the effectiveness of eligible AIaMD. Preliminary regulatory database searches, evidence searches, screening, data extraction, and methodological quality assessment will be undertaken by 2 independent review authors and arbitrated by a third at each stage of the process. Results: Preliminary searches were conducted in February 2023. Data extraction, data synthesis, and assessment of methodological quality commenced in October 2023. The review is on track to be completed and submitted for peer review by April 2024. Conclusions: This systematic review will provide greater clarity on ophthalmic imaging AIaMD that have achieved regulatory approval as well as the evidence that underpins them. This should help adopters understand the range of tools available and whether they can be safely incorporated into their clinical workflow, and it should also support developers in navigating regulatory approval more efficiently. International Registered Report Identifier (IRRID): DERR1-10.2196/52602 ", doi="10.2196/52602", url="https://www.researchprotocols.org/2024/1/e52602", url="http://www.ncbi.nlm.nih.gov/pubmed/38483456" } @Article{info:doi/10.2196/42904, author="Reiter, Vittoria Alisa Maria and Pantel, Tori Jean and Danyel, Magdalena and Horn, Denise and Ott, Claus-Eric and Mensah, Atta Martin", title="Validation of 3 Computer-Aided Facial Phenotyping Tools (DeepGestalt, GestaltMatcher, and D-Score): Comparative Diagnostic Accuracy Study", journal="J Med Internet Res", year="2024", month="Mar", day="13", volume="26", pages="e42904", keywords="facial phenotyping", keywords="DeepGestalt", keywords="facial recognition", keywords="Face2Gene", keywords="medical genetics", keywords="diagnostic accuracy", keywords="genetic syndrome", keywords="machine learning", keywords="GestaltMatcher", keywords="D-Score", keywords="genetics", abstract="Background: While characteristic facial features provide important clues for finding the correct diagnosis in genetic syndromes, valid assessment can be challenging. The next-generation phenotyping algorithm DeepGestalt analyzes patient images and provides syndrome suggestions. GestaltMatcher matches patient images with similar facial features. The new D-Score provides a score for the degree of facial dysmorphism. Objective: We aimed to test state-of-the-art facial phenotyping tools by benchmarking GestaltMatcher and D-Score and comparing them to DeepGestalt. Methods: Using a retrospective sample of 4796 images of patients with 486 different genetic syndromes (London Medical Database, GestaltMatcher Database, and literature images) and 323 inconspicuous control images, we determined the clinical use of D-Score, GestaltMatcher, and DeepGestalt, evaluating sensitivity; specificity; accuracy; the number of supported diagnoses; and potential biases such as age, sex, and ethnicity. Results: DeepGestalt suggested 340 distinct syndromes and GestaltMatcher suggested 1128 syndromes. The top-30 sensitivity was higher for DeepGestalt (88\%, SD 18\%) than for GestaltMatcher (76\%, SD 26\%). DeepGestalt generally assigned lower scores but provided higher scores for patient images than for inconspicuous control images, thus allowing the 2 cohorts to be separated with an area under the receiver operating characteristic curve (AUROC) of 0.73. GestaltMatcher could not separate the 2 classes (AUROC 0.55). Trained for this purpose, D-Score achieved the highest discriminatory power (AUROC 0.86). D-Score's levels increased with the age of the depicted individuals. Male individuals yielded higher D-scores than female individuals. Ethnicity did not appear to influence D-scores. Conclusions: If used with caution, algorithms such as D-score could help clinicians with constrained resources or limited experience in syndromology to decide whether a patient needs further genetic evaluation. Algorithms such as DeepGestalt could support diagnosing rather common genetic syndromes with facial abnormalities, whereas algorithms such as GestaltMatcher could suggest rare diagnoses that are unknown to the clinician in patients with a characteristic, dysmorphic face. ", doi="10.2196/42904", url="https://www.jmir.org/2024/1/e42904", url="http://www.ncbi.nlm.nih.gov/pubmed/38477981" } @Article{info:doi/10.2196/49965, author="Weber, Isaac and Zagona-Prizio, Caterina and Sivesind, E. Torunn and Adelman, Madeline and Szeto, D. Mindy and Liu, Ying and Sillau, H. Stefan and Bainbridge, Jacquelyn and Klawitter, Jost and Sempio, Cristina and Dunnick, A. Cory and Leehey, A. Maureen and Dellavalle, P. Robert", title="Oral Cannabidiol for Seborrheic Dermatitis in Patients With Parkinson Disease: Randomized Clinical Trial", journal="JMIR Dermatol", year="2024", month="Mar", day="11", volume="7", pages="e49965", keywords="cannabidiol", keywords="cannabis", keywords="CBD treatment", keywords="CBD", keywords="image", keywords="photograph", keywords="photographs", keywords="imaging", keywords="sebum", keywords="clinical trials", keywords="seborrheic dermatitis", keywords="Parkinson disease", keywords="clinical trial", keywords="RCT", keywords="randomized", keywords="controlled trial", keywords="drug response", keywords="SEDASI", keywords="drug", keywords="Parkinson", keywords="dermatitis", keywords="skin", keywords="dermatology", keywords="treatment", keywords="outcome", keywords="chi-square", abstract="Background: Seborrheic dermatitis (SD) affects 18.6\%-59\% of persons with Parkinson disease (PD), and recent studies provide evidence that oral cannabidiol (CBD) therapy could reduce sebum production in addition to improving motor and psychiatric symptoms in PD. Therefore, oral CBD could be useful for improving symptoms of both commonly co-occurring conditions. Objective: This study investigates whether oral CBD therapy is associated with a decrease in SD severity in PD. Methods: Facial photographs were collected as a component of a randomized (1:1 CBD vs placebo), parallel, double-blind, placebo-controlled trial assessing the efficacy of a short-term 2.5 mg per kg per day oral sesame solution CBD-rich cannabis extract (formulated to 100 mg/mL CBD and 3.3 mg/mL THC) for reducing motor symptoms in PD. Participants took 1.25 mg per kg per day each morning for 4 {\textpm}1 days and then twice daily for 10 {\textpm}4 days. Reviewers analyzed the photographs independently and provided a severity ranking based on the Seborrheic Dermatitis Area and Severity Index (SEDASI) scale. Baseline demographic and disease characteristics, as well as posttreatment SEDASI averages and the presence of SD, were analyzed with 2-tailed t tests and Pearson $\chi$2 tests. SEDASI was analyzed with longitudinal regression, and SD was analyzed with generalized estimating equations. Results: A total of 27 participants received a placebo and 26 received CBD for 16 days. SD severity was low in both groups at baseline, and there was no treatment effect. The risk ratio for patients receiving CBD, post versus pre, was 0.69 (95\% CI 0.41-1.18; P=.15), compared to 1.20 (95\% CI 0.88-1.65; P=.26) for the patients receiving the placebo. The within-group pre-post change was not statistically significant for either group, but they differed from each other (P=.07) because there was an estimated improvement for the CBD group and an estimated worsening for the placebo group. Conclusions: This study does not provide solid evidence that oral CBD therapy reduces the presence of SD among patients with PD. While this study was sufficiently powered to detect the primary outcome (efficacy of CBD on PD motor symptoms), it was underpowered for the secondary outcomes of detecting changes in the presence and severity of SD. Multiple mechanisms exist through which CBD can exert beneficial effects on SD pathogenesis. Larger studies, including participants with increased disease severity and longer treatment periods, may better elucidate treatment effects and are needed to determine CBD's true efficacy for affecting SD severity. Trial Registration: ClinicalTrials.gov NCT03582137; https://clinicaltrials.gov/ct2/show/NCT03582137 ", doi="10.2196/49965", url="https://derma.jmir.org/2024/1/e49965", url="http://www.ncbi.nlm.nih.gov/pubmed/38466972" } @Article{info:doi/10.2196/46817, author="Tenda, Daniel Eric and Yunus, Eddy Reyhan and Zulkarnaen, Benny and Yugo, Reynalzi Muhammad and Pitoyo, Wicaksono Ceva and Asaf, Mazmur Moses and Islamiyati, Nur Tiara and Pujitresnani, Arierta and Setiadharma, Andry and Henrina, Joshua and Rumende, Martin Cleopas and Wulani, Vally and Harimurti, Kuntjoro and Lydia, Aida and Shatri, Hamzah and Soewondo, Pradana and Yusuf, Astagiri Prasandhya", title="Comparison of the Discrimination Performance of AI Scoring and the Brixia Score in Predicting COVID-19 Severity on Chest X-Ray Imaging: Diagnostic Accuracy Study", journal="JMIR Form Res", year="2024", month="Mar", day="7", volume="8", pages="e46817", keywords="artificial intelligence", keywords="Brixia", keywords="chest x-ray", keywords="COVID-19", keywords="CAD4COVID", keywords="pneumonia", keywords="radiograph", keywords="artificial intelligence scoring system", keywords="AI scoring system", keywords="prediction", keywords="disease severity", abstract="Background: The artificial intelligence (AI) analysis of chest x-rays can increase the precision of binary COVID-19 diagnosis. However, it is unknown if AI-based chest x-rays can predict who will develop severe COVID-19, especially in low- and middle-income countries. Objective: The study aims to compare the performance of human radiologist Brixia scores versus 2 AI scoring systems in predicting the severity of COVID-19 pneumonia. Methods: We performed a cross-sectional study of 300 patients suspected with and with confirmed COVID-19 infection in Jakarta, Indonesia. A total of 2 AI scores were generated using CAD4COVID x-ray software. Results: The AI probability score had slightly lower discrimination (area under the curve [AUC] 0.787, 95\% CI 0.722-0.852). The AI score for the affected lung area (AUC 0.857, 95\% CI 0.809-0.905) was almost as good as the human Brixia score (AUC 0.863, 95\% CI 0.818-0.908). Conclusions: The AI score for the affected lung area and the human radiologist Brixia score had similar and good discrimination performance in predicting COVID-19 severity. Our study demonstrated that using AI-based diagnostic tools is possible, even in low-resource settings. However, before it is widely adopted in daily practice, more studies with a larger scale and that are prospective in nature are needed to confirm our findings. ", doi="10.2196/46817", url="https://formative.jmir.org/2024/1/e46817", url="http://www.ncbi.nlm.nih.gov/pubmed/38451633" } @Article{info:doi/10.2196/50647, author="Fernandez, L. Laura and Griswold, Dylan and Khun, Isla and Rodriguez De Francisco, Victoria Diana", title="Innovative Solutions for Patients Who Undergo Craniectomy: Protocol for a Scoping Review", journal="JMIR Res Protoc", year="2024", month="Mar", day="7", volume="13", pages="e50647", keywords="cranioplasty", keywords="decompressive craniectomy", keywords="global neurosurgery", keywords="intellectual property", keywords="stroke", keywords="traumatic brain injury", keywords="innovative", keywords="innovative solutions", keywords="craniectomy", keywords="increased intracranial pressure", keywords="intracranial pressure", keywords="prototypes", keywords="medical devices", keywords="middle-income countries", keywords="low-income countries", keywords="noninvasive", abstract="Background: Decompressive craniectomy (DC) is a widely used procedure to alleviate high intracranial pressure. Multidisciplinary teams have designed and implemented external medical prototypes to improve patient life quality and avoid complications following DC in patients awaiting cranioplasty (CP), including 3D printing and plaster prototypes when available. Objective: This scoping review aims to understand the extent and type of evidence about innovative external prototypes for patients who undergo DC while awaiting CP. Methods: This scoping review will use the Joanna Briggs Institute methodology for scoping reviews. This scoping review will include noninvasive medical devices for adult patients who undergo DC while waiting for CP. The search strategy will be implemented in MEDLINE, Embase, Web of Science, Scielo, Scopus, and the World Health Organization (WHO) Global Health Index Medicus. Patent documents were also allocated in Espacenet, Google Patents, and the World Intellectual Property Organization (WIPO) database. Results: This scoping review is not subject to ethical approval as there will be no involvement of patients. The dissemination plan includes publishing the review findings in a peer-reviewed journal and presenting results at conferences that engage the most pertinent stakeholders in innovation and neurosurgery. Conclusions: This scoping review will serve as a baseline to provide evidence for multidisciplinary teams currently designing these noninvasive innovations to reduce the risk of associated complications after DC, hoping that more cost-effective models can be implemented, especially in low- and middle-income countries. International Registered Report Identifier (IRRID): DERR1-10.2196/50647 ", doi="10.2196/50647", url="https://www.researchprotocols.org/2024/1/e50647", url="http://www.ncbi.nlm.nih.gov/pubmed/38451601" } @Article{info:doi/10.2196/52155, author="Kumar, Ajay and Burr, Pierce and Young, Michael Tim", title="Using AI Text-to-Image Generation to Create Novel Illustrations for Medical Education: Current Limitations as Illustrated by Hypothyroidism and Horner Syndrome", journal="JMIR Med Educ", year="2024", month="Feb", day="22", volume="10", pages="e52155", keywords="artificial intelligence", keywords="AI", keywords="medical illustration", keywords="medical images", keywords="medical education", keywords="image", keywords="images", keywords="illustration", keywords="illustrations", keywords="photo", keywords="photos", keywords="photographs", keywords="face", keywords="facial", keywords="paralysis", keywords="photograph", keywords="photography", keywords="Horner's syndrome", keywords="Horner syndrome", keywords="Bernard syndrome", keywords="Bernard's syndrome", keywords="miosis", keywords="oculosympathetic", keywords="ptosis", keywords="ophthalmoplegia", keywords="nervous system", keywords="autonomic", keywords="eye", keywords="eyes", keywords="pupil", keywords="pupils", keywords="neurologic", keywords="neurological", doi="10.2196/52155", url="https://mededu.jmir.org/2024/1/e52155", url="http://www.ncbi.nlm.nih.gov/pubmed/38386400" } @Article{info:doi/10.2196/52505, author="Dark, Frances and Galloway, Graham and Gray, Marcus and Cella, Matteo and De Monte, Veronica and Gore-Jones, Victoria and Ritchie, Gabrielle", title="Reward Learning as a Potential Mechanism for Improvement in Schizophrenia Spectrum Disorders Following Cognitive Remediation: Protocol for a Clinical, Nonrandomized, Pre-Post Pilot Study", journal="JMIR Res Protoc", year="2024", month="Jan", day="22", volume="13", pages="e52505", keywords="cognitive remediation", keywords="fMRI", keywords="functional magnetic resonance imaging", keywords="negative symptoms", keywords="psychosocial functioning", keywords="reward learning", abstract="Background: Cognitive impairment is common with schizophrenia spectrum disorders. Cognitive remediation (CR) is effective in improving global cognition, but not all individuals benefit from this type of intervention. A better understanding of the potential mechanism of action of CR is needed. One proposed mechanism is reward learning (RL), the cognitive processes responsible for adapting behavior following positive or negative feedback. It is proposed that the structure of CR enhances RL and motivation to engage in increasingly challenging tasks, and this is a potential mechanism by which CR improves cognitive functioning in schizophrenia. Objective: Our primary objective is to examine reward processing in individuals with schizophrenia before and after completing CR and to compare this with a group of matched clinical controls. We will assess whether RL mediates the relationship between CR and improved cognitive function and reduced negative symptoms. Potential differences in social RL and nonsocial RL in individuals with schizophrenia will also be investigated and compared with a healthy matched control group. Methods: We propose a clinical, nonrandomized, pre-post pilot study comparing the impact of CR on RL and neurocognitive outcomes. The study will use a combination of objective and subjective measures to assess neurocognitive, psychiatric symptoms, and neurophysiological domains. A total of 40 individuals with schizophrenia spectrum disorders (aged 18-35 years) will receive 12 weeks of CR therapy (n=20) or treatment as usual (n=20). Reward processing will be evaluated using a reinforcement learning task with 2 conditions (social reward vs nonsocial reward) at baseline and the 12-week follow-up. Functional magnetic resonance imaging responses will be measured during this task. To validate the reinforcement learning task, RL will also be assessed in 20 healthy controls, matched for age, sex, and premorbid functioning. Mixed-factorial ANOVAs will be conducted to evaluate treatment group differences. For the functional magnetic resonance imaging analysis, computational modeling will allow the estimation of learning parameters at each point in time, during each task condition, for each participant. We will use a variational Bayesian framework to measure how learning occurred during the experimental task and the subprocesses that underlie this learning. Second-level group analyses will examine how learning in patients differs from that observed in control participants and how CR alters learning efficiency and the underlying neural activity. Results: As of September 2023, this study has enrolled 15 participants in the CR group, 1 participant in the treatment-as-usual group, and 11 participants in the healthy control group. Recruitment is expected to be completed by September 2024. Data analysis is expected to be completed and published in early 2025. Conclusions: The results of this study will contribute to the knowledge of CR and RL processes in severe mental illness and the understanding of the systems that impact negative symptoms and cognitive impairments within this population. International Registered Report Identifier (IRRID): DERR1-10.2196/52505 ", doi="10.2196/52505", url="https://www.researchprotocols.org/2024/1/e52505", url="http://www.ncbi.nlm.nih.gov/pubmed/38252470" } @Article{info:doi/10.2196/52880, author="Tabja Bortesi, Pablo Juan and Ranisau, Jonathan and Di, Shuang and McGillion, Michael and Rosella, Laura and Johnson, Alistair and Devereaux, PJ and Petch, Jeremy", title="Machine Learning Approaches for the Image-Based Identification of Surgical Wound Infections: Scoping Review", journal="J Med Internet Res", year="2024", month="Jan", day="18", volume="26", pages="e52880", keywords="surgical site infection", keywords="machine learning", keywords="postoperative surveillance", keywords="wound imaging", keywords="mobile phone", abstract="Background: Surgical site infections (SSIs) occur frequently and impact patients and health care systems. Remote surveillance of surgical wounds is currently limited by the need for manual assessment by clinicians. Machine learning (ML)--based methods have recently been used to address various aspects of the postoperative wound healing process and may be used to improve the scalability and cost-effectiveness of remote surgical wound assessment. Objective: The objective of this review was to provide an overview of the ML methods that have been used to identify surgical wound infections from images. Methods: We conducted a scoping review of ML approaches for visual detection of SSIs following the JBI (Joanna Briggs Institute) methodology. Reports of participants in any postoperative context focusing on identification of surgical wound infections were included. Studies that did not address SSI identification, surgical wounds, or did not use image or video data were excluded. We searched MEDLINE, Embase, CINAHL, CENTRAL, Web of Science Core Collection, IEEE Xplore, Compendex, and arXiv for relevant studies in November 2022. The records retrieved were double screened for eligibility. A data extraction tool was used to chart the relevant data, which was described narratively and presented using tables. Employment of TRIPOD (Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis) guidelines was evaluated and PROBAST (Prediction Model Risk of Bias Assessment Tool) was used to assess risk of bias (RoB). Results: In total, 10 of the 715 unique records screened met the eligibility criteria. In these studies, the clinical contexts and surgical procedures were diverse. All papers developed diagnostic models, though none performed external validation. Both traditional ML and deep learning methods were used to identify SSIs from mostly color images, and the volume of images used ranged from under 50 to thousands. Further, 10 TRIPOD items were reported in at least 4 studies, though 15 items were reported in fewer than 4 studies. PROBAST assessment led to 9 studies being identified as having an overall high RoB, with 1 study having overall unclear RoB. Conclusions: Research on the image-based identification of surgical wound infections using ML remains novel, and there is a need for standardized reporting. Limitations related to variability in image capture, model building, and data sources should be addressed in the future. ", doi="10.2196/52880", url="https://www.jmir.org/2024/1/e52880", url="http://www.ncbi.nlm.nih.gov/pubmed/38236623" } @Article{info:doi/10.2196/46709, author="Okamoto, Tomoko and Ishizuka, Takami and Shimizu, Reiko and Asahina, Yasuko and Nakamura, Harumasa and Shimizu, Yuko and Nishida, Yoichiro and Yokota, Takanori and Lin, Youwei and Sato, Wakiro and Yamamura, Takashi", title="Efficacy and Safety of the Natural Killer T Cell--Stimulatory Glycolipid OCH-NCNP1 for Patients With Relapsing Multiple Sclerosis: Protocol for a Randomized Placebo-Controlled Clinical Trial", journal="JMIR Res Protoc", year="2024", month="Jan", day="15", volume="13", pages="e46709", keywords="OCH-NCNP1", keywords="natural killer cell", keywords="multiple sclerosis", keywords="clinical study", keywords="randomized controlled trial", keywords="autoimmune inflammatory disease", keywords="degeneration", keywords="clinical efficacy", keywords="biomarker", keywords="relapse", keywords="disability", keywords="imaging", keywords="autoimmune", keywords="RCT", keywords="Expanded Disability Status Scale", keywords="EDSS", keywords="neuromyelitis optica", keywords="optic neuritis", keywords="acute myelitis", keywords="Fisher exact test", keywords="MRI", keywords="magnetic resonance imaging", keywords="myelin sheath", keywords="demyelinating lesion", keywords="aminotransferase", keywords="clinicopathology", keywords="pathology", abstract="Background: Multiple sclerosis (MS) is an autoimmune inflammatory disease of the central nervous system that causes myelin sheath damage and axonal degeneration. The glycolipid (2S, 3S, 4R)-1-O-($\alpha$-d-galactosyl)-2-tetracosanoylamino-1,3,4-nonaetriol (OCH-NCNP1 or OCH) exerts an immunoregulatory action that suppresses T helper (Th)1 cell--mediated immune responses through natural killer T cell activation, selective interleukin-4 production, and Th2 bias induction in human CD4-positive natural killer T cells. Objective: This trial aims to investigate the efficacy and safety of the immunomodulator OCH in patients with relapsing MS through 24-week repeated administration. Methods: This protocol describes a double-blind, multicenter, placebo-controlled, randomized phase II clinical trial that was initiated in September 2019. The participants were randomly assigned to either a placebo control group or an OCH-NCNP1 group and the investigational drug (3.0 mg) was orally administered once weekly for the 24-week duration. Major inclusion criteria are as follows: patients had been diagnosed with relapsing MS (relapsing-remitting and/or secondary progressive MS) based on the revised McDonald criteria or were diagnosed with MS by an attending physician as noted in their medical records; patients with at least two medically confirmed clinical exacerbations within 24 months prior to consent or one exacerbation within 12 months prior to consent; patients with at least one lesion suspected to be MS on screening magnetic resonance imaging (MRI); and patients with 7 points or less in the Expanded Disability Status Scale during screening. Major exclusion criteria are as follows: diagnosis of neuromyelitis optica and one of optic neuritis, acute myelitis, and satisfying at least two of the following three items: (1) spinal cord MRI lesion extending across at least three vertebral bodies, (2) no brain MRI lesions during onset (at least four cerebral white matter lesions or three lesions, one of which is around the lateral ventricle), and (3) neuromyelitis optica--immunoglobulin G or antiaquaporin-4 antibody-positive. Outcome measures include the primary outcome of MRI changes (the percentage of subjects with new or newly expanded lesions at 24 weeks on T2-weighted MRI) and the secondary outcomes annual relapse rate (number of recurrences per year), relapse-free period (time to recurrence), sustained reduction in disability (SRD) occurrence rate, period until SRD (time to SRD occurrence), no evidence of disease activity, and exploratory biomarkers from phase I trials (such as gene expression, cell frequency, and intestinal and oral microbiome). Results: We plan to enroll 30 patients in the full analysis set. Enrollment was closed in June 2021 and the study analysis was completed in March 2023. Conclusions: This randomized controlled trial will determine whether OCH-NCNP1 is effective and safe in patients with MS as well as provide evidence for the potential of OCH-NCNP1 as a therapeutic agent for MS. Trial Registration: ClinicalTrials.gov NCT04211740; https://clinicaltrials.gov/study/NCT04211740 International Registered Report Identifier (IRRID): DERR1-10.2196/46709 ", doi="10.2196/46709", url="https://www.researchprotocols.org/2024/1/e46709", url="http://www.ncbi.nlm.nih.gov/pubmed/38224478" } @Article{info:doi/10.2196/58911, author="Athreya, Shreeram and Radhachandran, Ashwath and Ivezi{\'c}, Vedrana and Sant, R. Vivek and Arnold, W. Corey and Speier, William", title="Enhancing Ultrasound Image Quality Across Disease Domains: Application of Cycle-Consistent Generative Adversarial Network and Perceptual Loss", journal="JMIR Biomed Eng", year="2024", month="Dec", day="17", volume="9", pages="e58911", keywords="generative networks", keywords="cycle generative adversarial network", keywords="image enhancement", keywords="perceptual loss", keywords="ultrasound scans", keywords="ultrasound images", keywords="imaging", keywords="machine learning", keywords="portable handheld devices", abstract="Background: Numerous studies have explored image processing techniques aimed at enhancing ultrasound images to narrow the performance gap between low-quality portable devices and high-end ultrasound equipment. These investigations often use registered image pairs created by modifying the same image through methods like down sampling or adding noise, rather than using separate images from different machines. Additionally, they rely on organ-specific features, limiting the models' generalizability across various imaging conditions and devices. The challenge remains to develop a universal framework capable of improving image quality across different devices and conditions, independent of registration or specific organ characteristics. Objective: This study aims to develop a robust framework that enhances the quality of ultrasound images, particularly those captured with compact, portable devices, which are often constrained by low quality due to hardware limitations. The framework is designed to effectively process nonregistered ultrasound image pairs, a common challenge in medical imaging, across various clinical settings and device types. By addressing these challenges, the research seeks to provide a more generalized and adaptable solution that can be widely applied across diverse medical scenarios, improving the accessibility and quality of diagnostic imaging. Methods: A retrospective analysis was conducted by using a cycle-consistent generative adversarial network (CycleGAN) framework enhanced with perceptual loss to improve the quality of ultrasound images, focusing on nonregistered image pairs from various organ systems. The perceptual loss was integrated to preserve anatomical integrity by comparing deep features extracted from pretrained neural networks. The model's performance was evaluated against corresponding high-resolution images, ensuring that the enhanced outputs closely mimic those from high-end ultrasound devices. The model was trained and validated using a publicly available, diverse dataset to ensure robustness and generalizability across different imaging scenarios. Results: The advanced CycleGAN framework, enhanced with perceptual loss, significantly outperformed the previous state-of-the-art, stable CycleGAN, in multiple evaluation metrics. Specifically, our method achieved a structural similarity index of 0.2889 versus 0.2502 (P<.001), a peak signal-to-noise ratio of 15.8935 versus 14.9430 (P<.001), and a learned perceptual image patch similarity score of 0.4490 versus 0.5005 (P<.001). These results demonstrate the model's superior ability to enhance image quality while preserving critical anatomical details, thereby improving diagnostic usefulness. Conclusions: This study presents a significant advancement in ultrasound imaging by leveraging a CycleGAN model enhanced with perceptual loss to bridge the quality gap between images from different devices. By processing nonregistered image pairs, the model not only enhances visual quality but also ensures the preservation of essential anatomical structures, crucial for accurate diagnosis. This approach holds the potential to democratize high-quality ultrasound imaging, making it accessible through low-cost portable devices, thereby improving health care outcomes, particularly in resource-limited settings. Future research will focus on further validation and optimization for clinical use. ", doi="10.2196/58911", url="https://biomedeng.jmir.org/2024/1/e58911" } @Article{info:doi/10.2196/48589, author="McNeil, J. Andrew and Parks, Kelsey and Liu, Xiaoqi and Jiang, Bohan and Coco, Joseph and McCool, Kira and Fabbri, Daniel and Duhaime, P. Erik and Dawant, M. Benoit and Tkaczyk, R. Eric", title="Crowdsourcing Skin Demarcations of Chronic Graft-Versus-Host Disease in Patient Photographs: Training Versus Performance Study", journal="JMIR Dermatol", year="2023", month="Dec", day="26", volume="6", pages="e48589", keywords="graft-versus-host disease", keywords="cGVHD", keywords="crowdsourcing", keywords="dermatology", keywords="labeling", keywords="segmentation", keywords="skin", keywords="medical image", keywords="imaging", keywords="feasibility", keywords="artificial intelligence", abstract="Background: Chronic graft-versus-host disease (cGVHD) is a significant cause of long-term morbidity and mortality in patients after allogeneic hematopoietic cell transplantation. Skin is the most commonly affected organ, and visual assessment of cGVHD can have low reliability. Crowdsourcing data from nonexpert participants has been used for numerous medical applications, including image labeling and segmentation tasks. Objective: This study aimed to assess the ability of crowds of nonexpert raters---individuals without any prior training for identifying or marking cGHVD---to demarcate photos of cGVHD-affected skin. We also studied the effect of training and feedback on crowd performance. Methods: Using a Canfield Vectra H1 3D camera, 360 photographs of the skin of 36 patients with cGVHD were taken. Ground truth demarcations were provided in 3D by a trained expert and reviewed by a board-certified dermatologist. In total, 3000 2D images (projections from various angles) were created for crowd demarcation through the DiagnosUs mobile app. Raters were split into high and low feedback groups. The performances of 4 different crowds of nonexperts were analyzed, including 17 raters per image for the low and high feedback groups, 32-35 raters per image for the low feedback group, and the top 5 performers for each image from the low feedback group. Results: Across 8 demarcation competitions, 130 raters were recruited to the high feedback group and 161 to the low feedback group. This resulted in a total of 54,887 individual demarcations from the high feedback group and 78,967 from the low feedback group. The nonexpert crowds achieved good overall performance for segmenting cGVHD-affected skin with minimal training, achieving a median surface area error of less than 12\% of skin pixels for all crowds in both the high and low feedback groups. The low feedback crowds performed slightly poorer than the high feedback crowd, even when a larger crowd was used. Tracking the 5 most reliable raters from the low feedback group for each image recovered a performance similar to that of the high feedback crowd. Higher variability between raters for a given image was not found to correlate with lower performance of the crowd consensus demarcation and cannot therefore be used as a measure of reliability. No significant learning was observed during the task as more photos and feedback were seen. Conclusions: Crowds of nonexpert raters can demarcate cGVHD images with good overall performance. Tracking the top 5 most reliable raters provided optimal results, obtaining the best performance with the lowest number of expert demarcations required for adequate training. However, the agreement amongst individual nonexperts does not help predict whether the crowd has provided an accurate result. Future work should explore the performance of crowdsourcing in standard clinical photos and further methods to estimate the reliability of consensus demarcations. ", doi="10.2196/48589", url="https://derma.jmir.org/2023/1/e48589", url="http://www.ncbi.nlm.nih.gov/pubmed/38147369" } @Article{info:doi/10.2196/44119, author="Chen, Chaoyue and Teng, Yuen and Tan, Shuo and Wang, Zizhou and Zhang, Lei and Xu, Jianguo", title="Performance Test of a Well-Trained Model for Meningioma Segmentation in Health Care Centers: Secondary Analysis Based on Four Retrospective Multicenter Data Sets", journal="J Med Internet Res", year="2023", month="Dec", day="15", volume="25", pages="e44119", keywords="meningioma segmentation", keywords="magnetic resonance imaging", keywords="MRI", keywords="convolutional neural network", keywords="model test and verification", keywords="CNN", keywords="radiographic image interpretation", abstract="Background: Convolutional neural networks (CNNs) have produced state-of-the-art results in meningioma segmentation on magnetic resonance imaging (MRI). However, images obtained from different institutions, protocols, or scanners may show significant domain shift, leading to performance degradation and challenging model deployment in real clinical scenarios. Objective: This research aims to investigate the realistic performance of a well-trained meningioma segmentation model when deployed across different health care centers and verify the methods to enhance its generalization. Methods: This study was performed in four centers. A total of 606 patients with 606 MRIs were enrolled between January 2015 and December 2021. Manual segmentations, determined through consensus readings by neuroradiologists, were used as the ground truth mask. The model was previously trained using a standard supervised CNN called Deeplab V3+ and was deployed and tested separately in four health care centers. To determine the appropriate approach to mitigating the observed performance degradation, two methods were used: unsupervised domain adaptation and supervised retraining. Results: The trained model showed a state-of-the-art performance in tumor segmentation in two health care institutions, with a Dice ratio of 0.887 (SD 0.108, 95\% CI 0.903-0.925) in center A and a Dice ratio of 0.874 (SD 0.800, 95\% CI 0.854-0.894) in center B. Whereas in the other health care institutions, the performance declined, with Dice ratios of 0.631 (SD 0.157, 95\% CI 0.556-0.707) in center C and 0.649 (SD 0.187, 95\% CI 0.566-0.732) in center D, as they obtained the MRI using different scanning protocols. The unsupervised domain adaptation showed a significant improvement in performance scores, with Dice ratios of 0.842 (SD 0.073, 95\% CI 0.820-0.864) in center C and 0.855 (SD 0.097, 95\% CI 0.826-0.886) in center D. Nonetheless, it did not overperform the supervised retraining, which achieved Dice ratios of 0.899 (SD 0.026, 95\% CI 0.889-0.906) in center C and 0.886 (SD 0.046, 95\% CI 0.870-0.903) in center D. Conclusions: Deploying the trained CNN model in different health care institutions may show significant performance degradation due to the domain shift of MRIs. Under this circumstance, the use of unsupervised domain adaptation or supervised retraining should be considered, taking into account the balance between clinical requirements, model performance, and the size of the available data. ", doi="10.2196/44119", url="https://www.jmir.org/2023/1/e44119", url="http://www.ncbi.nlm.nih.gov/pubmed/38100181" } @Article{info:doi/10.2196/51578, author="Jones, Bree and Michou, Stavroula and Chen, Tong and Moreno-Betancur, Margarita and Kilpatrick, Nicky and Burgner, David and Vannahme, Christoph and Silva, Mihiri", title="Caries Detection in Primary Teeth Using Intraoral Scanners Featuring Fluorescence: Protocol for a Diagnostic Agreement Study", journal="JMIR Res Protoc", year="2023", month="Dec", day="14", volume="12", pages="e51578", keywords="dental caries", keywords="diagnosis", keywords="oral", keywords="technology", keywords="dental", keywords="image interpretation", keywords="computer-assisted", keywords="imaging", keywords="3D", keywords="quantitative light-induced fluorescence", keywords="diagnostic agreement", keywords="intra oral scanners", keywords="oral health", keywords="teeth", keywords="3D model", keywords="color", keywords="fluorescence", keywords="intraoral scanner", keywords="device", keywords="dentistry", abstract="Background: Digital methods that enable early caries identification can streamline data collection in research and optimize dental examinations for young children. Intraoral scanners are devices used for creating 3D models of teeth in dentistry and are being rapidly adopted into clinical workflows. Integrating fluorescence technology into scanner hardware can support early caries detection. However, the performance of caries detection methods using 3D models featuring color and fluorescence in primary teeth is unknown. Objective: This study aims to assess the diagnostic agreement between visual examination (VE), on-screen assessment of 3D models in approximate natural colors with and without fluorescence, and application of an automated caries scoring system to the 3D models with fluorescence for caries detection in primary teeth. Methods: The study sample will be drawn from eligible participants in a randomized controlled trial at the Royal Children's Hospital, Melbourne, Australia, where a dental assessment was conducted, including VE using the International Caries Detection and Assessment System (ICDAS) and intraoral scan using the TRIOS 4 (3Shape TRIOS A/S). Participant clinical records will be collected, and all records meeting eligibility criteria will be subject to an on-screen assessment of 3D models by 4 dental practitioners. First, all primary tooth surfaces will be examined for caries based on 3D geometry and color, using a merged ICDAS index. Second, the on-screen assessment of 3D models will include fluorescence, where caries will be classified using a merged ICDAS index that has been modified to incorporate fluorescence criteria. After 4 weeks, all examiners will repeat the on-screen assessment for all 3D models. Finally, an automated caries scoring system will be used to classify caries on primary occlusal surfaces. The agreement in the total number of caries detected per person between methods will be assessed using a Bland-Altman analysis and intraclass correlation coefficients. At a tooth surface level, agreement between methods will be estimated using multilevel models to account for the clustering of dental data. Results: Automated caries scoring of 3D models was completed as of October 2023, with the publication of results expected by July 2024. On-screen assessment has commenced, with the expected completion of scoring and data analysis by March 2024. Results will be disseminated by the end of 2024. Conclusions: The study outcomes may inform new practices that use digital models to facilitate dental assessments. Novel approaches that enable remote dental examination without compromising the accuracy of VE have wide applications in the research environment, clinical practice, and the provision of teledentistry. Trial Registration: Australian New Zealand Clinical Trials Registry ACTRN12622001237774; https://www.anzctr.org.au/Trial/Registration/TrialReview.aspx?id=384632 International Registered Report Identifier (IRRID): DERR1-10.2196/51578 ", doi="10.2196/51578", url="https://www.researchprotocols.org/2023/1/e51578", url="http://www.ncbi.nlm.nih.gov/pubmed/38096003" } @Article{info:doi/10.2196/47445, author="Ali, Hazrat and Qureshi, Rizwan and Shah, Zubair", title="Artificial Intelligence--Based Methods for Integrating Local and Global Features for Brain Cancer Imaging: Scoping Review", journal="JMIR Med Inform", year="2023", month="Nov", day="17", volume="11", pages="e47445", keywords="artificial intelligence", keywords="AI", keywords="brain cancer", keywords="brain tumor", keywords="medical imaging", keywords="segmentation", keywords="vision transformers", abstract="Background: Transformer-based models are gaining popularity in medical imaging and cancer imaging applications. Many recent studies have demonstrated the use of transformer-based models for brain cancer imaging applications such as diagnosis and tumor segmentation. Objective: This study aims to review how different vision transformers (ViTs) contributed to advancing brain cancer diagnosis and tumor segmentation using brain image data. This study examines the different architectures developed for enhancing the task of brain tumor segmentation. Furthermore, it explores how the ViT-based models augmented the performance of convolutional neural networks for brain cancer imaging. Methods: This review performed the study search and study selection following the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) guidelines. The search comprised 4 popular scientific databases: PubMed, Scopus, IEEE Xplore, and Google Scholar. The search terms were formulated to cover the interventions (ie, ViTs) and the target application (ie, brain cancer imaging). The title and abstract for study selection were performed by 2 reviewers independently and validated by a third reviewer. Data extraction was performed by 2 reviewers and validated by a third reviewer. Finally, the data were synthesized using a narrative approach. Results: Of the 736 retrieved studies, 22 (3\%) were included in this review. These studies were published in 2021 and 2022. The most commonly addressed task in these studies was tumor segmentation using ViTs. No study reported early detection of brain cancer. Among the different ViT architectures, Shifted Window transformer--based architectures have recently become the most popular choice of the research community. Among the included architectures, UNet transformer and TransUNet had the highest number of parameters and thus needed a cluster of as many as 8 graphics processing units for model training. The brain tumor segmentation challenge data set was the most popular data set used in the included studies. ViT was used in different combinations with convolutional neural networks to capture both the global and local context of the input brain imaging data. Conclusions: It can be argued that the computational complexity of transformer architectures is a bottleneck in advancing the field and enabling clinical transformations. This review provides the current state of knowledge on the topic, and the findings of this review will be helpful for researchers in the field of medical artificial intelligence and its applications in brain cancer. ", doi="10.2196/47445", url="https://medinform.jmir.org/2023/1/e47445", url="http://www.ncbi.nlm.nih.gov/pubmed/37976086" } @Article{info:doi/10.2196/50448, author="Gong, Jeong Eun and Bang, Seok Chang and Lee, Jun Jae and Jeong, Min Hae and Baik, Ho Gwang and Jeong, Hoon Jae and Dick, Sigmund and Lee, Hun Gi", title="Clinical Decision Support System for All Stages of Gastric Carcinogenesis in Real-Time Endoscopy: Model Establishment and Validation Study", journal="J Med Internet Res", year="2023", month="Oct", day="30", volume="25", pages="e50448", keywords="atrophy", keywords="intestinal metaplasia", keywords="metaplasia", keywords="deep learning", keywords="endoscopy", keywords="gastric neoplasms", keywords="neoplasm", keywords="neoplasms", keywords="internal medicine", keywords="cancer", keywords="oncology", keywords="decision support", keywords="real time", keywords="gastrointestinal", keywords="gastric", keywords="intestinal", keywords="machine learning", keywords="clinical decision support system", keywords="CDSS", keywords="computer aided", keywords="diagnosis", keywords="diagnostic", keywords="carcinogenesis", abstract="Background: Our research group previously established a deep-learning--based clinical decision support system (CDSS) for real-time endoscopy-based detection and classification of gastric neoplasms. However, preneoplastic conditions, such as atrophy and intestinal metaplasia (IM) were not taken into account, and there is no established model that classifies all stages of gastric carcinogenesis. Objective: This study aims to build and validate a CDSS for real-time endoscopy for all stages of gastric carcinogenesis, including atrophy and IM. Methods: A total of 11,868 endoscopic images were used for training and internal testing. The primary outcomes were lesion classification accuracy (6 classes: advanced gastric cancer, early gastric cancer, dysplasia, atrophy, IM, and normal) and atrophy and IM lesion segmentation rates for the segmentation model. The following tests were carried out to validate the performance of lesion classification accuracy: (1) external testing using 1282 images from another institution and (2) evaluation of the classification accuracy of atrophy and IM in real-world procedures in a prospective manner. To estimate the clinical utility, 2 experienced endoscopists were invited to perform a blind test with the same data set. A CDSS was constructed by combining the established 6-class lesion classification model and the preneoplastic lesion segmentation model with the previously established lesion detection model. Results: The overall lesion classification accuracy (95\% CI) was 90.3\% (89\%-91.6\%) in the internal test. For the performance validation, the CDSS achieved 85.3\% (83.4\%-97.2\%) overall accuracy. The per-class external test accuracies for atrophy and IM were 95.3\% (92.6\%-98\%) and 89.3\% (85.4\%-93.2\%), respectively. CDSS-assisted endoscopy showed an accuracy of 92.1\% (88.8\%-95.4\%) for atrophy and 95.5\% (92\%-99\%) for IM in the real-world application of 522 consecutive screening endoscopies. There was no significant difference in the overall accuracy between the invited endoscopists and established CDSS in the prospective real-clinic evaluation (P=.23). The CDSS demonstrated a segmentation rate of 93.4\% (95\% CI 92.4\%-94.4\%) for atrophy or IM lesion segmentation in the internal testing. Conclusions: The CDSS achieved high performance in terms of computer-aided diagnosis of all stages of gastric carcinogenesis and demonstrated real-world application potential. ", doi="10.2196/50448", url="https://www.jmir.org/2023/1/e50448", url="http://www.ncbi.nlm.nih.gov/pubmed/37902818" } @Article{info:doi/10.2196/48381, author="Kim, Yu-Hee and Park, In and Cho, Buem Soo and Yang, Seoyon and Kim, Il and Lee, Kyong-Ha and Choi, Kwangnam and Han, Seung-Ho", title="Three-Dimensional Virtual Reconstructions of Shoulder Movements Using Computed Tomography Images: Model Development", journal="Interact J Med Res", year="2023", month="Oct", day="5", volume="12", pages="e48381", keywords="human digital twin", keywords="musculoskeletal twin", keywords="shoulder movement", keywords="visualization application", keywords="digital twin", keywords="musculoskeletal", keywords="visualization", keywords="movement", keywords="joint", keywords="shoulder", keywords="tomography", keywords="development", keywords="animation", keywords="animated", keywords="anatomy", keywords="anatomical", keywords="digital health", keywords="representation", keywords="simulation", keywords="virtual", doi="10.2196/48381", url="https://www.i-jmr.org/2023/1/e48381", url="http://www.ncbi.nlm.nih.gov/pubmed/37796554" } @Article{info:doi/10.2196/48357, author="Nervil, Gede Gustav and Ternov, Kvorning Niels and Vestergaard, Tine and S{\o}lvsten, Henrik and Chakera, Hougaard Annette and Tolsgaard, Gr{\o}nneb{\ae}k Martin and H{\"o}lmich, Rosenkrantz Lisbet", title="Improving Skin Cancer Diagnostics Through a Mobile App With a Large Interactive Image Repository: Randomized Controlled Trial", journal="JMIR Dermatol", year="2023", month="Aug", day="9", volume="6", pages="e48357", keywords="dermoscopy", keywords="nevi", keywords="skin neoplasms", keywords="benign skin tumors", keywords="melanoma", keywords="skin cancer", keywords="medical education", keywords="eLearning", keywords="digital learning", keywords="diagnostic test", keywords="mHealth", keywords="mobile app", keywords="recognition training", keywords="skin lesions", abstract="Background: Skin cancer diagnostics is challenging, and mastery requires extended periods of dedicated practice. Objective: The aim of the study was to determine if self-paced pattern recognition training in skin cancer diagnostics with clinical and dermoscopic images of skin lesions using a large-scale interactive image repository (LIIR) with patient cases improves primary care physicians' (PCPs') diagnostic skills and confidence. Methods: A total of 115 PCPs were randomized (allocation ratio 3:1) to receive or not receive self-paced pattern recognition training in skin cancer diagnostics using an LIIR with patient cases through a quiz-based smartphone app during an 8-day period. The participants' ability to diagnose skin cancer was evaluated using a 12-item multiple-choice questionnaire prior to and 8 days after the educational intervention period. Their thoughts on the use of dermoscopy were assessed using a study-specific questionnaire. A learning curve was calculated through the analysis of data from the mobile app. Results: On average, participants in the intervention group spent 2 hours 26 minutes quizzing digital patient cases and 41 minutes reading the educational material. They had an average preintervention multiple choice questionnaire score of 52.0\% of correct answers, which increased to 66.4\% on the postintervention test; a statistically significant improvement of 14.3 percentage points (P<.001; 95\% CI 9.8-18.9) with intention-to-treat analysis. Analysis of participants who received the intervention as per protocol (500 patient cases in 8 days) showed an average increase of 16.7 percentage points (P<.001; 95\% CI 11.3-22.0) from 53.9\% to 70.5\%. Their overall ability to correctly recognize malignant lesions in the LIIR patient cases improved over the intervention period by 6.6 percentage points from 67.1\% (95\% CI 65.2-69.3) to 73.7\% (95\% CI 72.5-75.0) and their ability to set the correct diagnosis improved by 10.5 percentage points from 42.5\% (95\% CI 40.2\%-44.8\%) to 53.0\% (95\% CI 51.3-54.9). The diagnostic confidence of participants in the intervention group increased on a scale from 1 to 4 by 32.9\% from 1.6 to 2.1 (P<.001). Participants in the control group did not increase their postintervention score or their diagnostic confidence during the same period. Conclusions: Self-paced pattern recognition training in skin cancer diagnostics through the use of a digital LIIR with patient cases delivered by a quiz-based mobile app improves the diagnostic accuracy of PCPs. Trial Registration: ClinicalTrials.gov NCT05661370; https://classic.clinicaltrials.gov/ct2/show/NCT05661370 ", doi="10.2196/48357", url="https://derma.jmir.org/2023/1/e48357", url="http://www.ncbi.nlm.nih.gov/pubmed/37624707" } @Article{info:doi/10.2196/44327, author="Almashmoum, Maryam and Cunningham, James and Alkhaldi, Ohoud and Anisworth, John", title="Factors That Affect Knowledge-Sharing Behaviors in Medical Imaging Departments in Cancer Centers: Systematic Review", journal="JMIR Hum Factors", year="2023", month="Jul", day="12", volume="10", pages="e44327", keywords="knowledge management", keywords="knowledge sharing", keywords="medical imaging department", keywords="radiology department", keywords="nuclear medicine department", keywords="facilitators", keywords="barriers", keywords="systematic review", abstract="Background: Knowledge management plays a significant role in health care institutions. It consists of 4 processes: knowledge creation, knowledge capture, knowledge sharing, and knowledge application. The success of health care institutions relies on effective knowledge sharing among health care professionals, so the facilitators and barriers to knowledge sharing must be identified and understood. Medical imaging departments play a key role in cancer centers. Therefore, an understanding of the factors that affect knowledge sharing in medical imaging departments should be sought to increase patient outcomes and reduce medical errors. Objective: The purpose of this systematic review was to identify the facilitators and barriers that affect knowledge-sharing behaviors in medical imaging departments and identify the differences between medical imaging departments in general hospitals and cancer centers. Methods: We performed a systematic search in PubMed Central, EBSCOhost (CINAHL), Ovid MEDLINE, Ovid Embase, Elsevier (Scopus), ProQuest, and Clarivate (Web of Science) in December 2021. Relevant articles were identified by examining the titles and abstracts. In total, 2 reviewers independently screened the full texts of relevant papers according to the inclusion and exclusion criteria. We included qualitative, quantitative, and mixed methods studies that investigated the facilitators and barriers that affect knowledge sharing. We used the Mixed Methods Appraisal Tool to assess the quality of the included articles and narrative synthesis to report the results. Results: A total of 49 articles were selected for the full in-depth analysis, and 38 (78\%) studies were included in the final review, with 1 article added from other selected databases. There were 31 facilitators and 10 barriers identified that affected knowledge-sharing practices in medical imaging departments. These facilitators were divided according to their characteristics into 3 categories: individual, departmental, and technological facilitators. The barriers that hindered knowledge sharing were divided into 4 categories: financial, administrative, technological, and geographical barriers. Conclusions: This review highlighted the factors that influenced knowledge-sharing practices in medical imaging departments in cancer centers and general hospitals. In terms of the facilitators and barriers to knowledge sharing, this study shows that these are the same in medical imaging departments, whether in general hospitals or cancer centers. Our findings can be used as guidelines for medical imaging departments to support knowledge-sharing frameworks and enhance knowledge sharing by understanding the facilitators and barriers. ", doi="10.2196/44327", url="https://humanfactors.jmir.org/2023/1/e44327", url="http://www.ncbi.nlm.nih.gov/pubmed/37436810" } @Article{info:doi/10.2196/41906, author="Caelers, Inge and Boselie, Toon and van Hemert, Wouter and Rijkers, Kim and De Bie, Rob and van Santbrink, Henk", title="The Variability of Lumbar Sequential Motion Patterns: Observational Study", journal="JMIR Biomed Eng", year="2023", month="Jun", day="20", volume="8", pages="e41906", keywords="lumbar spine", keywords="cinematographic recordings", keywords="sequence", keywords="motion pattern", keywords="flexion", keywords="extension", keywords="rotation", keywords="physiological", keywords="musculoskeletal", keywords="motion", keywords="spine", keywords="upper lumbar", keywords="observational study", keywords="physiological motion", abstract="Background: Physiological motion of the lumbar spine is a topic of interest for musculoskeletal health care professionals since abnormal motion is believed to be related to lumbar complaints. Many researchers have described ranges of motion for the lumbar spine, but only few have mentioned specific motion patterns of each individual segment during flexion and extension, mostly comprising the sequence of segmental initiation in sagittal rotation. However, an adequate definition of physiological motion is still lacking. For the lower cervical spine, a consistent pattern of segmental contributions in a flexion-extension movement in young healthy individuals was described, resulting in a definition of physiological motion of the cervical spine. Objective: This study aimed to define the lumbar spines' physiological motion pattern by determining the sequence of segmental contribution in sagittal rotation of each vertebra during maximum flexion and extension in healthy male participants. Methods: Cinematographic recordings were performed twice in 11 healthy male participants, aged 18-25 years, without a history of spine problems, with a 2-week interval (time point T1 and T2). Image recognition software was used to identify specific patterns in the sequence of segmental contributions per individual by plotting segmental rotation of each individual segment against the cumulative rotation of segments L1 to S1. Intraindividual variability was determined by testing T1 against T2. Intraclass correlation coefficients were tested by reevaluation of 30 intervertebral sequences by a second researcher. Results: No consistent pattern was found when studying the graphs of the cinematographic recordings during flexion. A much more consistent pattern was found during extension, especially in the last phase. It consisted of a peak in rotation in L3L4, followed by a peak in L2L3, and finally, in L1L2. This pattern was present in 71\% (15/21) of all recordings; 64\% (7/11) of the participants had a consistent pattern at both time points. Sequence of segmental contribution was less consistent in the lumbar spine than the cervical spine, possibly caused by differences in facet orientation, intervertebral discs, overprojection of the pelvis, and muscle recruitment. Conclusions: In 64\% (7/11) of the recordings, a consistent motion pattern was found in the upper lumbar spine during the last phase of extension in asymptomatic young male participants. Physiological motion of the lumbar spine is a broad concept, influenced by multiple factors, which cannot be captured in a firm definition yet. Trial Registration: ClinicalTrials.gov NCT03737227; https://clinicaltrials.gov/ct2/show/NCT03737227 International Registered Report Identifier (IRRID): RR2-10.2196/14741 ", doi="10.2196/41906", url="https://biomedeng.jmir.org/2023/1/e41906", url="http://www.ncbi.nlm.nih.gov/pubmed/38875682" } @Article{info:doi/10.2196/41808, author="Liman, Leon and May, Bernd and Fette, Georg and Krebs, Jonathan and Puppe, Frank", title="Using a Clinical Data Warehouse to Calculate and Present Key Metrics for the Radiology Department: Implementation and Performance Evaluation", journal="JMIR Med Inform", year="2023", month="May", day="22", volume="11", pages="e41808", keywords="data warehouse", keywords="electronic health records", keywords="radiology", keywords="statistics and numerical data", keywords="hospital data", keywords="eHealth", keywords="medical records", abstract="Background: Due to the importance of radiologic examinations, such as X-rays or computed tomography scans, for many clinical diagnoses, the optimal use of the radiology department is 1 of the primary goals of many hospitals. Objective: This study aims to calculate the key metrics of this use by creating a radiology data warehouse solution, where data from radiology information systems (RISs) can be imported and then queried using a query language as well as a graphical user interface (GUI). Methods: Using a simple configuration file, the developed system allowed for the processing of radiology data exported from any kind of RIS into a Microsoft Excel, comma-separated value (CSV), or JavaScript Object Notation (JSON) file. These data were then imported into a clinical data warehouse. Additional values based on the radiology data were calculated during this import process by implementing 1 of several provided interfaces. Afterward, the query language and GUI of the data warehouse were used to configure and calculate reports on these data. For the most common types of requested reports, a web interface was created to view their numbers as graphics. Results: The tool was successfully tested with the data of 4 different German hospitals from 2018 to 2021, with a total of 1,436,111 examinations. The user feedback was good, since all their queries could be answered if the available data were sufficient. The initial processing of the radiology data for using them with the clinical data warehouse took (depending on the amount of data provided by each hospital) between 7 minutes and 1 hour 11 minutes. Calculating 3 reports of different complexities on the data of each hospital was possible in 1-3 seconds for reports with up to 200 individual calculations and in up to 1.5 minutes for reports with up to 8200 individual calculations. Conclusions: A system was developed with the main advantage of being generic concerning the export of different RISs as well as concerning the configuration of queries for various reports. The queries could be configured easily using the GUI of the data warehouse, and their results could be exported into the standard formats Excel and CSV for further processing. ", doi="10.2196/41808", url="https://medinform.jmir.org/2023/1/e41808", url="http://www.ncbi.nlm.nih.gov/pubmed/37213191" } @Article{info:doi/10.2196/45299, author="Moon, Tae In and Kim, Sun-Hwa and Chin, Yeon Jung and Park, Hun Sung and Yoon, Chang-Hwan and Youn, Tae-Jin and Chae, In-Ho and Kang, Si-Hyuck", title="Accuracy of Artificial Intelligence--Based Automated Quantitative Coronary Angiography Compared to Intravascular Ultrasound: Retrospective Cohort Study", journal="JMIR Cardio", year="2023", month="Apr", day="26", volume="7", pages="e45299", keywords="artificial intelligence", keywords="AI", keywords="coronary angiography", keywords="coronary stenosis", keywords="interventional ultrasonography", keywords="coronary", keywords="machine learning", keywords="angiography", keywords="stenosis", keywords="automated analysis", keywords="computer vision", abstract="Background: An accurate quantitative analysis of coronary artery stenotic lesions is essential to make optimal clinical decisions. Recent advances in computer vision and machine learning technology have enabled the automated analysis of coronary angiography. Objective: The aim of this paper is to validate the performance of artificial intelligence--based quantitative coronary angiography (AI-QCA) in comparison with that of intravascular ultrasound (IVUS). Methods: This retrospective study included patients who underwent IVUS-guided coronary intervention at a single tertiary center in Korea. Proximal and distal reference areas, minimal luminal area, percent plaque burden, and lesion length were measured by AI-QCA and human experts using IVUS. First, fully automated QCA analysis was compared with IVUS analysis. Next, we adjusted the proximal and distal margins of AI-QCA to avoid geographic mismatch. Scatter plots, Pearson correlation coefficients, and Bland-Altman were used to analyze the data. Results: A total of 54 significant lesions were analyzed in 47 patients. The proximal and distal reference areas, as well as the minimal luminal area, showed moderate to strong correlation between the 2 modalities (correlation coefficients of 0.57, 0.80, and 0.52, respectively; P<.001). The correlation was weaker for percent area stenosis and lesion length, although statistically significant (correlation coefficients of 0.29 and 0.33, respectively). AI-QCA tended to measure reference vessel areas smaller and lesion lengths shorter than IVUS did. Systemic proportional bias was not observed in Bland-Altman plots. The biggest cause of bias originated from the geographic mismatch of AI-QCA with IVUS. Discrepancies in the proximal or distal lesion margins were observed between the 2 modalities, which were more frequent at the distal margins. After the adjustment of proximal or distal margins, there was a stronger correlation of proximal and distal reference areas between AI-QCA and IVUS (correlation coefficients of 0.70 and 0.83, respectively). Conclusions: AI-QCA showed a moderate to strong correlation compared with IVUS in analyzing coronary lesions with significant stenosis. The main discrepancy was in the perception of the distal margins by AI-QCA, and the correction of margins improved the correlation coefficients. We believe that this novel tool could provide confidence to treating physicians and help in making optimal clinical decisions. ", doi="10.2196/45299", url="https://cardio.jmir.org/2023/1/e45299", url="http://www.ncbi.nlm.nih.gov/pubmed/37099368" } @Article{info:doi/10.2196/41233, author="Brady, J. Christopher and Cockrell, Chase R. and Aldrich, R. Lindsay and Wolle, A. Meraf and West, K. Sheila", title="A Virtual Reading Center Model Using Crowdsourcing to Grade Photographs for Trachoma: Validation Study", journal="J Med Internet Res", year="2023", month="Apr", day="6", volume="25", pages="e41233", keywords="trachoma", keywords="crowdsourcing", keywords="telemedicine", keywords="ophthalmic photography", keywords="Amazon Mechanical Turk", keywords="image analysis", keywords="diagnosis", keywords="detection", keywords="cloud-based", keywords="image interpretation", keywords="disease identification", keywords="diagnostics", keywords="image grading", keywords="disease grading", keywords="trachomatous inflammation---follicular", keywords="ophthalmology", abstract="Background: As trachoma is eliminated, skilled field graders become less adept at correctly identifying active disease (trachomatous inflammation---follicular [TF]). Deciding if trachoma has been eliminated from a district or if treatment strategies need to be continued or reinstated is of critical public health importance. Telemedicine solutions require both connectivity, which can be poor in the resource-limited regions of the world in which trachoma occurs, and accurate grading of the images. Objective: Our purpose was to develop and validate a cloud-based ``virtual reading center'' (VRC) model using crowdsourcing for image interpretation. Methods: The Amazon Mechanical Turk (AMT) platform was used to recruit lay graders to interpret 2299 gradable images from a prior field trial of a smartphone-based camera system. Each image received 7 grades for US \$0.05 per grade in this VRC. The resultant data set was divided into training and test sets to internally validate the VRC. In the training set, crowdsourcing scores were summed, and the optimal raw score cutoff was chosen to optimize kappa agreement and the resulting prevalence of TF. The best method was then applied to the test set, and the sensitivity, specificity, kappa, and TF prevalence were calculated. Results: In this trial, over 16,000 grades were rendered in just over 60 minutes for US \$1098 including AMT fees. After choosing an AMT raw score cut point to optimize kappa near the World Health Organization (WHO)--endorsed level of 0.7 (with a simulated 40\% prevalence TF), crowdsourcing was 95\% sensitive and 87\% specific for TF in the training set with a kappa of 0.797. All 196 crowdsourced-positive images received a skilled overread to mimic a tiered reading center and specificity improved to 99\%, while sensitivity remained above 78\%. Kappa for the entire sample improved from 0.162 to 0.685 with overreads, and the skilled grader burden was reduced by over 80\%. This tiered VRC model was then applied to the test set and produced a sensitivity of 99\% and a specificity of 76\% with a kappa of 0.775 in the entire set. The prevalence estimated by the VRC was 2.70\% (95\% CI 1.84\%-3.80\%) compared to the ground truth prevalence of 2.87\% (95\% CI 1.98\%-4.01\%). Conclusions: A VRC model using crowdsourcing as a first pass with skilled grading of positive images was able to identify TF rapidly and accurately in a low prevalence setting. The findings from this study support further validation of a VRC and crowdsourcing for image grading and estimation of trachoma prevalence from field-acquired images, although further prospective field testing is required to determine if diagnostic characteristics are acceptable in real-world surveys with a low prevalence of the disease. ", doi="10.2196/41233", url="https://www.jmir.org/2023/1/e41233", url="http://www.ncbi.nlm.nih.gov/pubmed/37023420" } @Article{info:doi/10.2196/39917, author="Banerjee, Agnik and Mutlu, Cezmi Onur and Kline, Aaron and Surabhi, Saimourya and Washington, Peter and Wall, Paul Dennis", title="Training and Profiling a Pediatric Facial Expression Classifier for Children on Mobile Devices: Machine Learning Study", journal="JMIR Form Res", year="2023", month="Mar", day="21", volume="7", pages="e39917", keywords="edge computing", keywords="affective computing", keywords="autism spectrum disorder", keywords="autism", keywords="ASD", keywords="classifier", keywords="classification", keywords="model", keywords="algorithm", keywords="mobile health", keywords="computer vision", keywords="deep learning", keywords="machine learning for health", keywords="pediatrics", keywords="emotion recognition", keywords="mHealth", keywords="diagnostic tool", keywords="digital therapy", keywords="child", keywords="developmental disorder", keywords="smartphone", keywords="image analysis", keywords="machine learning", keywords="Image classification", keywords="neural network", abstract="Background: Implementing automated facial expression recognition on mobile devices could provide an accessible diagnostic and therapeutic tool for those who struggle to recognize facial expressions, including children with developmental behavioral conditions such as autism. Despite recent advances in facial expression classifiers for children, existing models are too computationally expensive for smartphone use. Objective: We explored several state-of-the-art facial expression classifiers designed for mobile devices, used posttraining optimization techniques for both classification performance and efficiency on a Motorola Moto G6 phone, evaluated the importance of training our classifiers on children versus adults, and evaluated the models' performance against different ethnic groups. Methods: We collected images from 12 public data sets and used video frames crowdsourced from the GuessWhat app to train our classifiers. All images were annotated for 7 expressions: neutral, fear, happiness, sadness, surprise, anger, and disgust. We tested 3 copies for each of 5 different convolutional neural network architectures: MobileNetV3-Small 1.0x, MobileNetV2 1.0x, EfficientNetB0, MobileNetV3-Large 1.0x, and NASNetMobile. We trained the first copy on images of children, second copy on images of adults, and third copy on all data sets. We evaluated each model against the entire Child Affective Facial Expression (CAFE) set and by ethnicity. We performed weight pruning, weight clustering, and quantize-aware training when possible and profiled each model's performance on the Moto G6. Results: Our best model, a MobileNetV3-Large network pretrained on ImageNet, achieved 65.78\% accuracy and 65.31\% F1-score on the CAFE and a 90-millisecond inference latency on a Moto G6 phone when trained on all data. This accuracy is only 1.12\% lower than the current state of the art for CAFE, a model with 13.91x more parameters that was unable to run on the Moto G6 due to its size, even when fully optimized. When trained solely on children, this model achieved 60.57\% accuracy and 60.29\% F1-score. When trained only on adults, the model received 53.36\% accuracy and 53.10\% F1-score. Although the MobileNetV3-Large trained on all data sets achieved nearly a 60\% F1-score across all ethnicities, the data sets for South Asian and African American children achieved lower accuracy (as much as 11.56\%) and F1-score (as much as 11.25\%) than other groups. Conclusions: With specialized design and optimization techniques, facial expression classifiers can become lightweight enough to run on mobile devices and achieve state-of-the-art performance. There is potentially a ``data shift'' phenomenon between facial expressions of children compared with adults; our classifiers performed much better when trained on children. Certain underrepresented ethnic groups (e.g., South Asian and African American) also perform significantly worse than groups such as European Caucasian despite similar data quality. Our models can be integrated into mobile health therapies to help diagnose autism spectrum disorder and provide targeted therapeutic treatment to children. ", doi="10.2196/39917", url="https://formative.jmir.org/2023/1/e39917", url="http://www.ncbi.nlm.nih.gov/pubmed/35962462" } @Article{info:doi/10.2196/41421, author="Cooley, Sarah and Nelson, M. Brittany and Rosenow, Alexander and Westerhaus, Elizabeth and Cade, Todd W. and Reeds, N. Dominic and Vaida, Florin and Yarasheski, E. Kevin and Paul, H. Robert and Ances, M. Beau", title="Exercise Training to Improve Brain Health in Older People Living With HIV: Study Protocol for a Randomized Controlled Trial", journal="JMIR Res Protoc", year="2023", month="Mar", day="21", volume="12", pages="e41421", keywords="cardiorespiratory fitness", keywords="cognition", keywords="exercise", keywords="HIV", keywords="magnetic resonance imaging", keywords="resistance training", abstract="Background: With the advent of antiretrovirals, people living with HIV are living near-normal lifespans. However, people living with HIV are at greater risk of experiencing cognitive impairment and reduced brain integrity despite well-controlled viremia. A robust literature supports exercise interventions as a method of improving cognition and structural brain integrity in older individuals without HIV. The effects of exercise on cardiometabolic, neurocognitive, and neural structures in middle-aged to older people living with HIV are less well known, with few prospective studies examining these measures. Objective: This prospective randomized clinical trial will examine the effects of a 6-month exercise training intervention compared to a 6-month stretching intervention (control) on cardiorespiratory fitness, physical function and strength, cognition, and neuroimaging measures of brain volumes and cerebral blood flow in people living with HIV. Methods: Sedentary middle-aged to older people living with HIV (ages?40; n=150) with undetectable HIV viral load (<20 copies/mL) will be enrolled in the study. At the baseline and final visit, fasting plasma lipid, insulin, glucose, and brain neurotrophic factor concentrations; cardiorespiratory fitness; cognitive performance; brain volumes; and cerebral blood flow via a magnetic resonance imaging scan will be measured. Participants will be randomized in a 2:1 ratio to either the exercise or control stretching intervention. All participants will complete their assigned programs at a community fitness center 3 times a week for 6 months. A professional fitness trainer will provide personal training guidance at all sessions for individuals enrolled in both arms. Individuals randomized to the exercise intervention will perform endurance and strength training exercises, while those randomized to the control intervention will perform stretches to increase flexibility. A midpoint visit (at 3 months) will assess cognitive performance, and at the end point visit, subjects will undergo cardiorespiratory fitness and cognition testing, and a magnetic resonance imaging scan. Physical activity throughout the duration of the trial will be recorded using an actigraph. Results: Recruitment and data collection are complete as of December 2020. Data processing, cleaning, and organization are complete as of December 2021. Data analysis began in January 2022, with the publication of study results for primary aims 1 and 2 expected by early 2023. Conclusions: This study will investigate the effects of a 6-month aerobic and resistance exercise training intervention to improve cardiometabolic risk factors, cognitive performance, cerebral structure, and blood flow in sedentary people living with HIV. Results will inform clinicians and patients of the potential benefits of a structured aerobic exercise training program on the cognitive, functional, and cardiometabolic health status of older people living with HIV. Assessment of compliance will inform the development and implementation of future exercise programs for people living with HIV. Trial Registration: ClinicalTrials.gov NCT02663934; https://clinicaltrials.gov/ct2/show/NCT02663934 International Registered Report Identifier (IRRID): DERR1-10.2196/41421 ", doi="10.2196/41421", url="https://www.researchprotocols.org/2023/1/e41421", url="http://www.ncbi.nlm.nih.gov/pubmed/36943345" } @Article{info:doi/10.2196/44932, author="Huang, Kai and Wu, Xian and Li, Yixin and Lv, Chengzhi and Yan, Yangtian and Wu, Zhe and Zhang, Mi and Huang, Weihong and Jiang, Zixi and Hu, Kun and Li, Mingjia and Su, Juan and Zhu, Wu and Li, Fangfang and Chen, Mingliang and Chen, Jing and Li, Yongjian and Zeng, Mei and Zhu, Jianjian and Cao, Duling and Huang, Xing and Huang, Lei and Hu, Xing and Chen, Zeyu and Kang, Jian and Yuan, Lei and Huang, Chengji and Guo, Rui and Navarini, Alexander and Kuang, Yehong and Chen, Xiang and Zhao, Shuang", title="Artificial Intelligence--Based Psoriasis Severity Assessment: Real-world Study and Application", journal="J Med Internet Res", year="2023", month="Mar", day="16", volume="25", pages="e44932", keywords="artificial intelligence", keywords="psoriasis severity assessment", keywords="Psoriasis Area and Severity Index", keywords="PASI", keywords="deep learning system", keywords="mobile app", keywords="psoriasis", keywords="inflammation", keywords="dermatology", keywords="tools", keywords="management", keywords="model", keywords="design", keywords="users", keywords="chronic disease", abstract="Background: Psoriasis is one of the most frequent inflammatory skin conditions and could be treated via tele-dermatology, provided that the current lack of reliable tools for objective severity assessments is overcome. Psoriasis Area and Severity Index (PASI) has a prominent level of subjectivity and is rarely used in real practice, although it is the most widely accepted metric for measuring psoriasis severity currently. Objective: This study aimed to develop an image--artificial intelligence (AI)--based validated system for severity assessment with the explicit intention of facilitating long-term management of patients with psoriasis. Methods: A deep learning system was trained to estimate the PASI score by using 14,096 images from 2367 patients with psoriasis. We used 1962 patients from January 2015 to April 2021 to train the model and the other 405 patients from May 2021 to July 2021 to validate it. A multiview feature enhancement block was designed to combine vision features from different perspectives to better simulate the visual diagnostic method in clinical practice. A classification header along with a regression header was simultaneously applied to generate PASI scores, and an extra cross-teacher header after these 2 headers was designed to revise their output. The mean average error (MAE) was used as the metric to evaluate the accuracy of the predicted PASI score. By making the model minimize the MAE value, the model becomes closer to the target value. Then, the proposed model was compared with 43 experienced dermatologists. Finally, the proposed model was deployed into an app named SkinTeller on the WeChat platform. Results: The proposed image-AI--based PASI-estimating model outperformed the average performance of 43 experienced dermatologists with a 33.2\% performance gain in the overall PASI score. The model achieved the smallest MAE of 2.05 at 3 input images by the ablation experiment. In other words, for the task of psoriasis severity assessment, the severity score predicted by our model was close to the PASI score diagnosed by experienced dermatologists. The SkinTeller app has been used 3369 times for PASI scoring in 1497 patients from 18 hospitals, and its excellent performance was confirmed by a feedback survey of 43 dermatologist users. Conclusions: An image-AI--based psoriasis severity assessment model has been proposed to automatically calculate PASI scores in an efficient, objective, and accurate manner. The SkinTeller app may be a promising alternative for dermatologists' accurate assessment in the real world and chronic disease self-management in patients with psoriasis. ", doi="10.2196/44932", url="https://www.jmir.org/2023/1/e44932", url="http://www.ncbi.nlm.nih.gov/pubmed/36927843" } @Article{info:doi/10.2196/41355, author="Burnette, Colin and Sivesind, E. Torunn and Dellavalle, Robert", title="From the Cochrane Library: Optical Coherence Tomography for Diagnosing Skin Cancer in Adults", journal="JMIR Dermatol", year="2023", month="Mar", day="13", volume="6", pages="e41355", keywords="systematic review", keywords="optical coherence tomography", keywords="tomography", keywords="diagnostic imaging", keywords="optical imaging", keywords="laser", keywords="skin lesions", keywords="diagnostic techniques", keywords="melanoma", keywords="basal cell carcinoma", keywords="cancer", keywords="skin cancer", keywords="clinical", keywords="cell", keywords="diagnose", doi="10.2196/41355", url="https://derma.jmir.org/2023/1/e41355", url="http://www.ncbi.nlm.nih.gov/pubmed/37632933" } @Article{info:doi/10.2196/43832, author="Xue, Peng and Si, Mingyu and Qin, Dongxu and Wei, Bingrui and Seery, Samuel and Ye, Zichen and Chen, Mingyang and Wang, Sumeng and Song, Cheng and Zhang, Bo and Ding, Ming and Zhang, Wenling and Bai, Anying and Yan, Huijiao and Dang, Le and Zhao, Yuqian and Rezhake, Remila and Zhang, Shaokai and Qiao, Youlin and Qu, Yimin and Jiang, Yu", title="Unassisted Clinicians Versus Deep Learning--Assisted Clinicians in Image-Based Cancer Diagnostics: Systematic Review With Meta-analysis", journal="J Med Internet Res", year="2023", month="Mar", day="2", volume="25", pages="e43832", keywords="deep learning", keywords="cancer diagnosis", keywords="systematic review", keywords="meta-analysis", abstract="Background: A number of publications have demonstrated that deep learning (DL) algorithms matched or outperformed clinicians in image-based cancer diagnostics, but these algorithms are frequently considered as opponents rather than partners. Despite the clinicians-in-the-loop DL approach having great potential, no study has systematically quantified the diagnostic accuracy of clinicians with and without the assistance of DL in image-based cancer identification. Objective: We systematically quantified the diagnostic accuracy of clinicians with and without the assistance of DL in image-based cancer identification. Methods: PubMed, Embase, IEEEXplore, and the Cochrane Library were searched for studies published between January 1, 2012, and December 7, 2021. Any type of study design was permitted that focused on comparing unassisted clinicians and DL-assisted clinicians in cancer identification using medical imaging. Studies using medical waveform-data graphics material and those investigating image segmentation rather than classification were excluded. Studies providing binary diagnostic accuracy data and contingency tables were included for further meta-analysis. Two subgroups were defined and analyzed, including cancer type and imaging modality. Results: In total, 9796 studies were identified, of which 48 were deemed eligible for systematic review. Twenty-five of these studies made comparisons between unassisted clinicians and DL-assisted clinicians and provided sufficient data for statistical synthesis. We found a pooled sensitivity of 83\% (95\% CI 80\%-86\%) for unassisted clinicians and 88\% (95\% CI 86\%-90\%) for DL-assisted clinicians. Pooled specificity was 86\% (95\% CI 83\%-88\%) for unassisted clinicians and 88\% (95\% CI 85\%-90\%) for DL-assisted clinicians. The pooled sensitivity and specificity values for DL-assisted clinicians were higher than for unassisted clinicians, at ratios of 1.07 (95\% CI 1.05-1.09) and 1.03 (95\% CI 1.02-1.05), respectively. Similar diagnostic performance by DL-assisted clinicians was also observed across the predefined subgroups. Conclusions: The diagnostic performance of DL-assisted clinicians appears better than unassisted clinicians in image-based cancer identification. However, caution should be exercised, because the evidence provided in the reviewed studies does not cover all the minutiae involved in real-world clinical practice. Combining qualitative insights from clinical practice with data-science approaches may improve DL-assisted practice, although further research is required. Trial Registration: PROSPERO CRD42021281372; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=281372 ", doi="10.2196/43832", url="https://www.jmir.org/2023/1/e43832", url="http://www.ncbi.nlm.nih.gov/pubmed/36862499" } @Article{info:doi/10.2196/42324, author="Phumkuea, Thanakorn and Wongsirichot, Thakerng and Damkliang, Kasikrit and Navasakulpong, Asma", title="Classifying COVID-19 Patients From Chest X-ray Images Using Hybrid Machine Learning Techniques: Development and Evaluation", journal="JMIR Form Res", year="2023", month="Feb", day="28", volume="7", pages="e42324", keywords="COVID-19", keywords="machine learning", keywords="medical informatics", keywords="coronavirus", keywords="diagnosis", keywords="model", keywords="detection", keywords="healthy", keywords="unhealthy", keywords="public", keywords="usage", keywords="data", keywords="database", keywords="accuracy", keywords="development", keywords="x-ray", keywords="imaging", abstract="Background: The COVID-19 pandemic has raised global concern, with moderate to severe cases displaying lung inflammation and respiratory failure. Chest x-ray (CXR) imaging is crucial for diagnosis and is usually interpreted by experienced medical specialists. Machine learning has been applied with acceptable accuracy, but computational efficiency has received less attention. Objective: We introduced a novel hybrid machine learning model to accurately classify COVID-19, non-COVID-19, and healthy patients from CXR images with reduced computational time and promising results. Our proposed model was thoroughly evaluated and compared with existing models. Methods: A retrospective study was conducted to analyze 5 public data sets containing 4200 CXR images using machine learning techniques including decision trees, support vector machines, and neural networks. The images were preprocessed to undergo image segmentation, enhancement, and feature extraction. The best performing machine learning technique was selected and combined into a multilayer hybrid classification model for COVID-19 (MLHC-COVID-19). The model consisted of 2 layers. The first layer was designed to differentiate healthy individuals from infected patients, while the second layer aimed to classify COVID-19 and non-COVID-19 patients. Results: The MLHC-COVID-19 model was trained and evaluated on unseen COVID-19 CXR images, achieving reasonably high accuracy and F measures of 0.962 and 0.962, respectively. These results show the effectiveness of the MLHC-COVID-19 in classifying COVID-19 CXR images, with improved accuracy and a reduction in interpretation time. The model was also embedded into a web-based MLHC-COVID-19 computer-aided diagnosis system, which was made publicly available. Conclusions: The study found that the MLHC-COVID-19 model effectively differentiated CXR images of COVID-19 patients from those of healthy and non-COVID-19 individuals. It outperformed other state-of-the-art deep learning techniques and showed promising results. These results suggest that the MLHC-COVID-19 model could have been instrumental in early detection and diagnosis of COVID-19 patients, thus playing a significant role in controlling and managing the pandemic. Although the pandemic has slowed down, this model can be adapted and utilized for future similar situations. The model was also integrated into a publicly accessible web-based computer-aided diagnosis system. ", doi="10.2196/42324", url="https://formative.jmir.org/2023/1/e42324", url="http://www.ncbi.nlm.nih.gov/pubmed/36780315" } @Article{info:doi/10.2196/41080, author="van Spaendonck, Zita and Leeuwenburgh, Pieter Koen and Dremmen, Marjolein and van Schuppen, Joost and Starreveld, Dani{\"e}lle and Dierckx, Bram and Legerstee, S. Jeroen", title="Comparing Smartphone Virtual Reality Exposure Preparation to Care as Usual in Children Aged 6 to 14 Years Undergoing Magnetic Resonance Imaging: Protocol for a Multicenter, Observer-Blinded, Randomized Controlled Trial", journal="JMIR Res Protoc", year="2023", month="Jan", day="24", volume="12", pages="e41080", keywords="virtual reality", keywords="VR", keywords="children", keywords="anxiety", keywords="magnetic resonance imaging", keywords="MRI", keywords="MRI scans", keywords="imaging", keywords="randomized controlled trial", keywords="MRI preparation", keywords="smartphone virtual reality", keywords="smartphone intervention", keywords="procedural anxiety", keywords="psychosocial intervention", abstract="Background: A magnetic resonance imaging (MRI) procedure can cause preprocedural and periprocedural anxiety in children. Psychosocial interventions are used to prepare children for the procedure to alleviate anxiety, but these interventions are time-consuming and costly, limiting their clinical use. Virtual reality (VR) is a promising way to overcome these limitations in the preparation of children before an MRI scan. Objective: The objective of this study is (1) to develop a VR smartphone intervention to prepare children at home for an MRI procedure; and (2) to examine the effect of the VR intervention in a randomized controlled trial, in which the VR intervention will be compared to care as usual (CAU). CAU involves an information letter about an MRI examination. The primary outcome is the child's procedural anxiety during the MRI procedure. Secondary outcomes include preprocedural anxiety and parental anxiety. We hypothesize that the VR preparation will result in a higher reduction of the periprocedural anxiety of both parents and children as compared to CAU. Methods: The VR intervention provides a highly realistic and child-friendly representation of an MRI environment. In this randomized controlled trial, 128 children (aged 6 to 14 years) undergoing an MRI scan will be randomly allocated to the VR intervention or CAU. Children in the VR intervention will receive a log-in code for the VR app and are sent cardboard VR glasses. Results: The VR smartphone preparation app was developed in 2020. The recruitment of participants is expected to be completed in December 2022. Data will be analyzed, and scientific papers will be submitted for publication in 2023. Conclusions: The VR smartphone app is expected to significantly reduce pre- and periprocedural anxiety in pediatric patients undergoing an MRI scan. The VR app offers a realistic and child-friendly experience that can contribute to modern care. A smartphone version of the VR app has the advantage that children, and potentially their parents, can get habituated to the VR environment and noises in their own home environment and can do this VR MRI preparation as often and as long as needed. Trial Registration: ISRCTN Registry ISRCTN20976625; https://www.isrctn.com/ISRCTN20976625 International Registered Report Identifier (IRRID): DERR1-10.2196/41080 ", doi="10.2196/41080", url="https://www.researchprotocols.org/2023/1/e41080", url="http://www.ncbi.nlm.nih.gov/pubmed/36692931" } @Article{info:doi/10.2196/38412, author="Kentley, Jonathan and Weber, Jochen and Liopyris, Konstantinos and Braun, P. Ralph and Marghoob, A. Ashfaq and Quigley, A. Elizabeth and Nelson, Kelly and Prentice, Kira and Duhaime, Erik and Halpern, C. Allan and Rotemberg, Veronica", title="Agreement Between Experts and an Untrained Crowd for Identifying Dermoscopic Features Using a Gamified App: Reader Feasibility Study", journal="JMIR Med Inform", year="2023", month="Jan", day="18", volume="11", pages="e38412", keywords="dermatology", keywords="dermatologist", keywords="diagnosis", keywords="diagnostic", keywords="labeling", keywords="classification", keywords="deep learning", keywords="dermoscopy", keywords="dermatoscopy", keywords="skin", keywords="pigmentation", keywords="microscopy", keywords="dermascopic", keywords="artificial intelligence", keywords="machine learning", keywords="crowdsourcing", keywords="crowdsourced", keywords="melanoma", keywords="cancer", keywords="lesion", keywords="medical image", keywords="imaging", keywords="development", keywords="feasibility", abstract="Background: Dermoscopy is commonly used for the evaluation of pigmented lesions, but agreement between experts for identification of dermoscopic structures is known to be relatively poor. Expert labeling of medical data is a bottleneck in the development of machine learning (ML) tools, and crowdsourcing has been demonstrated as a cost- and time-efficient method for the annotation of medical images. Objective: The aim of this study is to demonstrate that crowdsourcing can be used to label basic dermoscopic structures from images of pigmented lesions with similar reliability to a group of experts. Methods: First, we obtained labels of 248 images of melanocytic lesions with 31 dermoscopic ``subfeatures'' labeled by 20 dermoscopy experts. These were then collapsed into 6 dermoscopic ``superfeatures'' based on structural similarity, due to low interrater reliability (IRR): dots, globules, lines, network structures, regression structures, and vessels. These images were then used as the gold standard for the crowd study. The commercial platform DiagnosUs was used to obtain annotations from a nonexpert crowd for the presence or absence of the 6 superfeatures in each of the 248 images. We replicated this methodology with a group of 7 dermatologists to allow direct comparison with the nonexpert crowd. The Cohen $\kappa$ value was used to measure agreement across raters. Results: In total, we obtained 139,731 ratings of the 6 dermoscopic superfeatures from the crowd. There was relatively lower agreement for the identification of dots and globules (the median $\kappa$ values were 0.526 and 0.395, respectively), whereas network structures and vessels showed the highest agreement (the median $\kappa$ values were 0.581 and 0.798, respectively). This pattern was also seen among the expert raters, who had median $\kappa$ values of 0.483 and 0.517 for dots and globules, respectively, and 0.758 and 0.790 for network structures and vessels. The median $\kappa$ values between nonexperts and thresholded average--expert readers were 0.709 for dots, 0.719 for globules, 0.714 for lines, 0.838 for network structures, 0.818 for regression structures, and 0.728 for vessels. Conclusions: This study confirmed that IRR for different dermoscopic features varied among a group of experts; a similar pattern was observed in a nonexpert crowd. There was good or excellent agreement for each of the 6 superfeatures between the crowd and the experts, highlighting the similar reliability of the crowd for labeling dermoscopic images. This confirms the feasibility and dependability of using crowdsourcing as a scalable solution to annotate large sets of dermoscopic images, with several potential clinical and educational applications, including the development of novel, explainable ML tools. ", doi="10.2196/38412", url="https://medinform.jmir.org/2023/1/e38412", url="http://www.ncbi.nlm.nih.gov/pubmed/36652282" } @Article{info:doi/10.2196/43769, author="Croci, Eleonora and Eckers, Franziska and N{\"u}esch, Corina and Aghlmandi, Soheila and Kovacs, Krisztian Balazs and Genter, Jeremy and Baumgartner, Daniel and M{\"u}ller, Marc Andreas and M{\"u}ndermann, Annegret", title="Load-Induced Glenohumeral Translation After Rotator Cuff Tears: Protocol for an In Vivo Study", journal="JMIR Res Protoc", year="2022", month="Dec", day="23", volume="11", number="12", pages="e43769", keywords="abduction", keywords="shoulder", keywords="rotator cuff", keywords="humeral head migration", keywords="fluoroscopy", keywords="MRI", keywords="motion capture", keywords="dynamometer", abstract="Background: Rotator cuff tears are a common shoulder injury, but they sometimes remain undiagnosed, as symptoms can be limited. Altered shoulder biomechanics can lead to secondary damage and degeneration. In biomechanical analyses, the shoulder (ie, the glenohumeral joint) is normally idealized as a ball-and-socket joint, even though a translation is often observed clinically. To date, no conclusive changes in glenohumeral translation have been reported in patients with rotator cuff tears, and it is unknown how an additional handheld weight that is comparable to those used during daily activities will affect glenohumeral translations in patients with rotator cuff tears. Objective: This study aims to assess the load-induced glenohumeral translation (liTr) in patients with rotator cuff tears and its association with the load-induced changes in muscle activation (liMA). Methods: Patients and asymptomatic controls will be recruited. Participants will fill out health questionnaires and perform 30{\textdegree} arm abduction and adduction trials, during which they will hold different handheld weights of a maximum of 4 kg while motion capture and electromyographic data are collected. In addition, fluoroscopic images of the shoulders will be taken for the same movements. Isometric shoulder muscle strength for abduction and rotation will be assessed with a dynamometer. Finally, shoulder magnetic resonance images will be acquired to assess muscle status and injury presence. The dose-response relationship between additional weight, liTr, and liMA will be evaluated. Results: Recruitment and data collection began in May 2021, and they will last until the recruitment target is achieved. Data collection is expected to be completed by the end of 2022. As of November 2022, data processing and analysis are in progress, and the first results are expected to be submitted for publication in 2023. Conclusions: This study will aid our understanding of biological variations in liTr, the influence of disease pathology on liTr, the potential compensation of rotator cuff tears by muscle activation and size, and the association between liTr and patient outcomes. The outcomes will be relevant for diagnosis, treatment, and rehabilitation planning in patients with rotator cuff tears. Trial Registration: ClinicalTrials.gov NCT04819724; https://clinicaltrials.gov/ct2/show/NCT04819724 International Registered Report Identifier (IRRID): DERR1-10.2196/43769 ", doi="10.2196/43769", url="https://www.researchprotocols.org/2022/12/e43769", url="http://www.ncbi.nlm.nih.gov/pubmed/36563028" } @Article{info:doi/10.2196/38655, author="Ghuwalewala, Suraj and Kulkarni, Viraj and Pant, Richa and Kharat, Amit", title="Levels of Autonomous Radiology", journal="Interact J Med Res", year="2022", month="Dec", day="7", volume="11", number="2", pages="e38655", keywords="artificial intelligence", keywords="automation", keywords="machine learning", keywords="radiology", keywords="explainability", keywords="model decay", keywords="generalizability", keywords="fairness and bias", keywords="distributed learning", keywords="autonomous radiology", keywords="AI assistance", doi="10.2196/38655", url="https://www.i-jmr.org/2022/2/e38655", url="http://www.ncbi.nlm.nih.gov/pubmed/36476422" } @Article{info:doi/10.2196/40485, author="Wenderott, Katharina and Gambashidze, Nikoloz and Weigl, Matthias", title="Integration of Artificial Intelligence Into Sociotechnical Work Systems---Effects of Artificial Intelligence Solutions in Medical Imaging on Clinical Efficiency: Protocol for a Systematic Literature Review", journal="JMIR Res Protoc", year="2022", month="Dec", day="1", volume="11", number="12", pages="e40485", keywords="artificial intelligence", keywords="clinical care", keywords="clinical efficiency", keywords="sociotechnical work system", keywords="sociotechnical", keywords="review methodology", keywords="systematic review", keywords="facilitator", keywords="barrier", keywords="diagnostic", keywords="diagnosis", keywords="diagnoses", keywords="digital health", keywords="adoption", keywords="implementation", keywords="literature review", keywords="literature search", keywords="search strategy", keywords="library science", keywords="medical librarian", keywords="narrative review", keywords="narrative synthesis", abstract="Background: When introducing artificial intelligence (AI) into clinical care, one of the main objectives is to improve workflow efficiency because AI-based solutions are expected to take over or support routine tasks. Objective: This study sought to synthesize the current knowledge base on how the use of AI technologies for medical imaging affects efficiency and what facilitators or barriers moderating the impact of AI implementation have been reported. Methods: In this systematic literature review, comprehensive literature searches will be performed in relevant electronic databases, including PubMed/MEDLINE, Embase, PsycINFO, Web of Science, IEEE Xplore, and CENTRAL. Studies in English and German published from 2000 onwards will be included. The following inclusion criteria will be applied: empirical studies targeting the workflow integration or adoption of AI-based software in medical imaging used for diagnostic purposes in a health care setting. The efficiency outcomes of interest include workflow adaptation, time to complete tasks, and workload. Two reviewers will independently screen all retrieved records, full-text articles, and extract data. The study's methodological quality will be appraised using suitable tools. The findings will be described qualitatively, and a meta-analysis will be performed, if possible. Furthermore, a narrative synthesis approach that focuses on work system factors affecting the integration of AI technologies reported in eligible studies will be adopted. Results: This review is anticipated to begin in September 2022 and will be completed in April 2023. Conclusions: This systematic review and synthesis aims to summarize the existing knowledge on efficiency improvements in medical imaging through the integration of AI into clinical workflows. Moreover, it will extract the facilitators and barriers of the AI implementation process in clinical care settings. Therefore, our findings have implications for future clinical implementation processes of AI-based solutions, with a particular focus on diagnostic procedures. This review is additionally expected to identify research gaps regarding the focus on seamless workflow integration of novel technologies in clinical settings. Trial Registration: PROSPERO CRD42022303439; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=303439 International Registered Report Identifier (IRRID): PRR1-10.2196/40485 ", doi="10.2196/40485", url="https://www.researchprotocols.org/2022/12/e40485", url="http://www.ncbi.nlm.nih.gov/pubmed/36454624" } @Article{info:doi/10.2196/27421, author="Ami, Olivier and Maran, Jean-Christophe and Musset, Dominique and Dubray, Claude and Mage, G{\'e}rard and Boyer, Louis", title="Using Magnetic Resonance Imaging During Childbirth to Demonstrate Fetal Head Moldability and Brain Compression: Prospective Cohort Study", journal="JMIR Form Res", year="2022", month="Nov", day="30", volume="6", number="11", pages="e27421", keywords="parturition", keywords="magnetic resonance imaging", keywords="obstetrics", keywords="fetus", keywords="cephalopelvic disproportion", abstract="Background: Childbirth is a physiological process with significant medical risk, given that neurological impairment due to the birthing process can occur at any time. Improvements in risk assessment and anticipatory interventions are constantly needed; however, the birthing process is difficult to assess using simple imaging technology because the maternal bony pelvis and fetal skeleton interfere with visualizing the soft tissues. Magnetic resonance imaging (MRI) is a noninvasive technique with no ionizing radiation that can monitor the biomechanics of the birthing process. However, the effective use of this modality requires teamwork and the implementation of the appropriate safeguards to achieve appropriate safety levels. Objective: This study describes a clinically effective and safe method to perform real-time MRI during the birthing process. We reported the experience of our team as part of the IMAGINAITRE study protocol (France), which aimed to better understand the biomechanics of childbirth. Methods: A total of 27 pregnant women were examined with 3D MRI sequences before going into labor using a 1-Tesla open-field MRI. Of these 27 patients, 7 (26\%) subsequently had another set of 3D MRI sequences during the second stage of labor. Volumes of 2D images were transformed into finite element 3D reconstructions. Polygonal meshes for each part of the fetal body were used to study fetal head moldability and brain compression. Results: All 7 observed babies showed a sugarloaf skull deformity and brain compression at the middle strait. The fetus showing the greatest degree of molding and brain shape deformation weighed 4525 g and was born spontaneously but also presented with a low Apgar score. In this case, observable brain shape deformation demonstrated that brain compression had occurred, and it was not necessarily well tolerated by the fetus. Depending on fetal head moldability, these observations suggest that cephalopelvic disproportion can result in either obstructed labor or major fetal head molding with brain compression. Conclusions: This study suggests the presence of skull moldability as a confounding factor explaining why MRI, even with the best precision to measure radiological landmarks, fails to accurately predict the modality of childbirth. This introduces the fetal head compliance criterion as a way to better understand cephalopelvic disproportion mechanisms in obstetrics. MRI might be the best imaging technology by which to explore all combined aspects of cephalopelvic disproportion and achieve a better understanding of the underlying mechanisms of fetal head molding and moldability. ", doi="10.2196/27421", url="https://formative.jmir.org/2022/11/e27421", url="http://www.ncbi.nlm.nih.gov/pubmed/36322921" } @Article{info:doi/10.2196/42853, author="Wahyudi, Irfan and Utomo, Prasetyo Chandra and Djauzi, Samsuridjal and Fathurahman, Muhamad and Situmorang, Reinaldi Gerhard and Rodjani, Arry and Yonathan, Kevin and Santoso, Budi", title="Digital Pattern Recognition for the Identification of Various Hypospadias Parameters via an Artificial Neural Network: Protocol for the Development and Validation of a System and Mobile App", journal="JMIR Res Protoc", year="2022", month="Nov", day="25", volume="11", number="11", pages="e42853", keywords="artificial intelligence", keywords="digital recognition", keywords="hypospadias", keywords="machine learning", abstract="Background: Hypospadias remains the most prevalent congenital abnormality in boys worldwide. However, the limited infrastructure and number of pediatric urologists capable of diagnosing and managing the condition hinder the management of hypospadias in Indonesia. The use of artificial intelligence and image recognition is thought to be beneficial in improving the management of hypospadias cases in Indonesia. Objective: We aim to develop and validate a digital pattern recognition system and a mobile app based on an artificial neural network to determine various parameters of hypospadias. Methods: Hypospadias and normal penis images from an age-matched database will be used to train the artificial neural network. Images of 3 aspects of the penis (ventral, dorsal, and lateral aspects, which include the glans, shaft, and scrotum) will be taken from each participant. The images will be labeled with the following hypospadias parameters: hypospadias status, meatal location, meatal shape, the quality of the urethral plate, glans diameter, and glans shape. The data will be uploaded to train the image recognition model. Intrarater and interrater analyses will be performed, using the test images provided to the algorithm. Results: Our study is at the protocol development stage. A preliminary study regarding the system's development and feasibility will start in December 2022. The results of our study are expected to be available by the end of 2023. Conclusions: A digital pattern recognition system using an artificial neural network will be developed and designed to improve the diagnosis and management of patients with hypospadias, especially those residing in regions with limited infrastructure and health personnel. International Registered Report Identifier (IRRID): PRR1-10.2196/42853 ", doi="10.2196/42853", url="https://www.researchprotocols.org/2022/11/e42853", url="http://www.ncbi.nlm.nih.gov/pubmed/36427238" } @Article{info:doi/10.2196/39536, author="Mir{\'o} Catalina, Queralt and Fuster-Casanovas, A{\"i}na and Sol{\'e}-Casals, Jordi and Vidal-Alaball, Josep", title="Developing an Artificial Intelligence Model for Reading Chest X-rays: Protocol for a Prospective Validation Study", journal="JMIR Res Protoc", year="2022", month="Nov", day="16", volume="11", number="11", pages="e39536", keywords="artificial intelligence", keywords="machine learning", keywords="chest x-ray", keywords="radiology", keywords="validation", abstract="Background: Chest x-rays are the most commonly used type of x-rays today, accounting for up to 26\% of all radiographic tests performed. However, chest radiography is a complex imaging modality to interpret. Several studies have reported discrepancies in chest x-ray interpretations among emergency physicians and radiologists. It is of vital importance to be able to offer a fast and reliable diagnosis for this kind of x-ray, using artificial intelligence (AI) to support the clinician. Oxipit has developed an AI algorithm for reading chest x-rays, available through a web platform called ChestEye. This platform is an automatic computer-aided diagnosis system where a reading of the inserted chest x-ray is performed, and an automatic report is returned with a capacity to detect 75 pathologies, covering 90\% of diagnoses. Objective: The overall objective of the study is to perform validation with prospective data of the ChestEye algorithm as a diagnostic aid. We wish to validate the algorithm for a single pathology and multiple pathologies by evaluating the accuracy, sensitivity, and specificity of the algorithm. Methods: A prospective validation study will be carried out to compare the diagnosis of the reference radiologists for the users attending the primary care center in the Osona region (Spain), with the diagnosis of the ChestEye AI algorithm. Anonymized chest x-ray images will be acquired and fed into the AI algorithm interface, which will return an automatic report. A radiologist will evaluate the same chest x-ray, and both assessments will be compared to calculate the precision, sensitivity, specificity, and accuracy of the AI algorithm. Results will be represented globally and individually for each pathology using a confusion matrix and the One-vs-All methodology. Results: Patient recruitment was conducted from February 7, 2022, and it is expected that data can be obtained in 5 to 6 months. In June 2022, more than 450 x-rays have been collected, so it is expected that 600 samples will be gathered in July 2022. We hope to obtain sufficient evidence to demonstrate that the use of AI in the reading of chest x-rays can be a good tool for diagnostic support. However, there is a decreasing number of radiology professionals and, therefore, it is necessary to develop and validate tools to support professionals who have to interpret these tests. Conclusions: If the results of the validation of the model are satisfactory, it could be implemented as a support tool and allow an increase in the accuracy and speed of diagnosis, patient safety, and agility in the primary care system, while reducing the cost of unnecessary tests. International Registered Report Identifier (IRRID): PRR1-10.2196/39536 ", doi="10.2196/39536", url="https://www.researchprotocols.org/2022/11/e39536", url="http://www.ncbi.nlm.nih.gov/pubmed/36383419" } @Article{info:doi/10.2196/40878, author="Tsai, Ming-Chin and Lu, Horng-Shing Henry and Chang, Yueh-Chuan and Huang, Yung-Chieh and Fu, Lin-Shien", title="Automatic Screening of Pediatric Renal Ultrasound Abnormalities: Deep Learning and Transfer Learning Approach", journal="JMIR Med Inform", year="2022", month="Nov", day="2", volume="10", number="11", pages="e40878", keywords="transfer learning", keywords="convolutional neural networks", keywords="pediatric renal ultrasound image", keywords="screening", keywords="pediatric", keywords="medical image", keywords="clinical informatics", keywords="deep learning", keywords="ultrasound image", keywords="artificial intelligence", keywords="diagnostic system", abstract="Background: In recent years, the progress and generalization surrounding portable ultrasonic probes has made ultrasound (US) a useful tool for physicians when making a diagnosis. With the advent of machine learning and deep learning, the development of a computer-aided diagnostic system for screening renal US abnormalities can assist general practitioners in the early detection of pediatric kidney diseases. Objective: In this paper, we sought to evaluate the diagnostic performance of deep learning techniques to classify kidney images as normal and abnormal. Methods: We chose 330 normal and 1269 abnormal pediatric renal US images for establishing a model for artificial intelligence. The abnormal images involved stones, cysts, hyperechogenicity, space-occupying lesions, and hydronephrosis. We performed preprocessing of the original images for subsequent deep learning. We redefined the final connecting layers for classification of the extracted features as abnormal or normal from the ResNet-50 pretrained model. The performances of the model were tested by a validation data set using area under the receiver operating characteristic curve, accuracy, specificity, and sensitivity. Results: The deep learning model, 94 MB parameters in size, based on ResNet-50, was built for classifying normal and abnormal images. The accuracy, (\%)/area under curve, of the validated images of stone, cyst, hyperechogenicity, space-occupying lesions, and hydronephrosis were 93.2/0.973, 91.6/0.940, 89.9/0.940, 91.3/0.934, and 94.1/0.996, respectively. The accuracy of normal image classification in the validation data set was 90.1\%. Overall accuracy of (\%)/area under curve was 92.9/0.959.. Conclusions: We established a useful, computer-aided model for automatic classification of pediatric renal US images in terms of normal and abnormal categories. ", doi="10.2196/40878", url="https://medinform.jmir.org/2022/11/e40878", url="http://www.ncbi.nlm.nih.gov/pubmed/36322109" } @Article{info:doi/10.2196/38640, author="Kim, Changgyun and Jeong, Hogul and Park, Wonse and Kim, Donghyun", title="Tooth-Related Disease Detection System Based on Panoramic Images and Optimization Through Automation: Development Study", journal="JMIR Med Inform", year="2022", month="Oct", day="31", volume="10", number="10", pages="e38640", keywords="object detection", keywords="tooth", keywords="diagnosis", keywords="panorama", keywords="dentistry", keywords="dental health", keywords="oral health", keywords="dental caries", keywords="image analysis", keywords="artificial intelligence", keywords="detection model", keywords="machine learning", keywords="automation", keywords="diagnosis system", abstract="Background: Early detection of tooth-related diseases in patients plays a key role in maintaining their dental health and preventing future complications. Since dentists are not overly attentive to tooth-related diseases that may be difficult to judge visually, many patients miss timely treatment. The 5 representative tooth-related diseases, that is, coronal caries or defect, proximal caries, cervical caries or abrasion, periapical radiolucency, and residual root can be detected on panoramic images. In this study, a web service was constructed for the detection of these diseases on panoramic images in real time, which helped shorten the treatment planning time and reduce the probability of misdiagnosis. Objective: This study designed a model to assess tooth-related diseases in panoramic images by using artificial intelligence in real time. This model can perform an auxiliary role in the diagnosis of tooth-related diseases by dentists and reduce the treatment planning time spent through telemedicine. Methods: For learning the 5 tooth-related diseases, 10,000 panoramic images were modeled: 4206 coronal caries or defects, 4478 proximal caries, 6920 cervical caries or abrasion, 8290 periapical radiolucencies, and 1446 residual roots. To learn the model, the fast region-based convolutional network (Fast R-CNN), residual neural network (ResNet), and inception models were used. Learning about the 5 tooth-related diseases completely did not provide accurate information on the diseases because of indistinct features present in the panoramic pictures. Therefore, 1 detection model was applied to each tooth-related disease, and the models for each of the diseases were integrated to increase accuracy. Results: The Fast R-CNN model showed the highest accuracy, with an accuracy of over 90\%, in diagnosing the 5 tooth-related diseases. Thus, Fast R-CNN was selected as the final judgment model as it facilitated the real-time diagnosis of dental diseases that are difficult to judge visually from radiographs and images, thereby assisting the dentists in their treatment plans. Conclusions: The Fast R-CNN model showed the highest accuracy in the real-time diagnosis of dental diseases and can therefore play an auxiliary role in shortening the treatment planning time after the dentists diagnose the tooth-related disease. In addition, by updating the captured panoramic images of patients on the web service developed in this study, we are looking forward to increasing the accuracy of diagnosing these 5 tooth-related diseases. The dental diagnosis system in this study takes 2 minutes for diagnosing 5 diseases in 1 panoramic image. Therefore, this system plays an effective role in setting a dental treatment schedule. ", doi="10.2196/38640", url="https://medinform.jmir.org/2022/10/e38640", url="http://www.ncbi.nlm.nih.gov/pubmed/36315222" } @Article{info:doi/10.2196/37730, author="Wink, Elisabeth Alexandra and Telfer, N. Amanda and Pascoe, A. Michael", title="Google Images Search Results as a Resource in the Anatomy Laboratory: Rating of Educational Value", journal="JMIR Med Educ", year="2022", month="Oct", day="21", volume="8", number="4", pages="e37730", keywords="anatomy laboratory", keywords="information literacy", keywords="internet search", keywords="anatomical images", keywords="scoring rubric", keywords="Google", keywords="images", keywords="educational value", keywords="literacy information", keywords="medical education", keywords="medical students", keywords="anatomy", abstract="Background: Preclinical medical learners are embedded in technology-rich environments, allowing them rapid access to a large volume of information. The anatomy laboratory is an environment in which faculty can assess the development of professional skills such as information literacy in preclinical medical learners. In the anatomy laboratory, many students use Google Images searches in addition to or in place of other course materials as a resource to locate and identify anatomical structures. However, the most frequent sources as well as the educational quality of these images are unknown. Objective: This study was designed to assess the sources and educational value of Google Images search results for commonly searched anatomical structures. Methods: The top 10 Google Images search results were collected for 39 anatomical structures. Image source websites were recorded and categorized based on the purpose and target audience of the site publishing the image. Educational value was determined through assessment of relevance (is the searched structure depicted in the image?), accuracy (does the image contain errors?), and usefulness (will the image assist a learner in locating the structure on an anatomical donor?). A reliable scoring rubric was developed to assess an image's usefulness. Results: A total of 390 images were analyzed. Most often, images were sourced from websites targeting health care professionals and health care professions students (38\% of images), while Wikipedia was the most frequent single source of image results (62/390 results). Of the 390 total images, 363 (93.1\%) depicted the searched structure and were therefore considered relevant. However, only 43.0\% (156/363) of relevant images met the threshold to be deemed useful in identifying the searched structure in an anatomical donor. The usefulness of images did not significantly differ across source categories. Conclusions: Anatomy faculty may use these results to develop interventions for gaps in information literacy in preclinical medical learners in the context of image searches in the anatomy laboratory. ", doi="10.2196/37730", url="https://mededu.jmir.org/2022/4/e37730", url="http://www.ncbi.nlm.nih.gov/pubmed/36269663" } @Article{info:doi/10.2196/36660, author="Li, Zhongqiang and Li, Zheng and Yao, Luke and Chen, Qing and Zhang, Jian and Li, Xin and Feng, Ji-Ming and Li, Yanping and Xu, Jian", title="Multiple-Inputs Convolutional Neural Network for COVID-19 Classification and Critical Region Screening From Chest X-ray Radiographs: Model Development and Performance Evaluation", journal="JMIR Bioinform Biotech", year="2022", month="Oct", day="4", volume="3", number="1", pages="e36660", keywords="COVID-19", keywords="chest X-ray radiography", keywords="multiple-inputs convolutional neural network", keywords="screening critical COVID regions", abstract="Background: The COVID-19 pandemic is becoming one of the largest, unprecedented health crises, and chest X-ray radiography (CXR) plays a vital role in diagnosing COVID-19. However, extracting and finding useful image features from CXRs demand a heavy workload for radiologists. Objective: The aim of this study was to design a novel multiple-inputs (MI) convolutional neural network (CNN) for the classification of COVID-19 and extraction of critical regions from CXRs. We also investigated the effect of the number of inputs on the performance of our new MI-CNN model. Methods: A total of 6205 CXR images (including 3021 COVID-19 CXRs and 3184 normal CXRs) were used to test our MI-CNN models. CXRs could be evenly segmented into different numbers (2, 4, and 16) of individual regions. Each region could individually serve as one of the MI-CNN inputs. The CNN features of these MI-CNN inputs would then be fused for COVID-19 classification. More importantly, the contributions of each CXR region could be evaluated through assessing the number of images that were accurately classified by their corresponding regions in the testing data sets. Results: In both the whole-image and left- and right-lung region of interest (LR-ROI) data sets, MI-CNNs demonstrated good efficiency for COVID-19 classification. In particular, MI-CNNs with more inputs (2-, 4-, and 16-input MI-CNNs) had better efficiency in recognizing COVID-19 CXRs than the 1-input CNN. Compared to the whole-image data sets, the efficiency of LR-ROI data sets showed approximately 4\% lower accuracy, sensitivity, specificity, and precision (over 91\%). In considering the contributions of each region, one of the possible reasons for this reduced performance was that nonlung regions (eg, region 16) provided false-positive contributions to COVID-19 classification. The MI-CNN with the LR-ROI data set could provide a more accurate evaluation of the contribution of each region and COVID-19 classification. Additionally, the right-lung regions had higher contributions to the classification of COVID-19 CXRs, whereas the left-lung regions had higher contributions to identifying normal CXRs. Conclusions: Overall, MI-CNNs could achieve higher accuracy with an increasing number of inputs (eg, 16-input MI-CNN). This approach could assist radiologists in identifying COVID-19 CXRs and in screening the critical regions related to COVID-19 classifications. ", doi="10.2196/36660", url="https://bioinform.jmir.org/2022/1/e36660", url="http://www.ncbi.nlm.nih.gov/pubmed/36277075" } @Article{info:doi/10.2196/38178, author="Singh, Pulkit and Haimovich, Julian and Reeder, Christopher and Khurshid, Shaan and Lau, S. Emily and Cunningham, W. Jonathan and Philippakis, Anthony and Anderson, D. Christopher and Ho, E. Jennifer and Lubitz, A. Steven and Batra, Puneet", title="One Clinician Is All You Need--Cardiac Magnetic Resonance Imaging Measurement Extraction: Deep Learning Algorithm Development", journal="JMIR Med Inform", year="2022", month="Sep", day="16", volume="10", number="9", pages="e38178", keywords="natural language processing", keywords="transformers", keywords="machine learning", keywords="cardiac MRI", keywords="clinical outcomes", keywords="deep learning", abstract="Background: Cardiac magnetic resonance imaging (CMR) is a powerful diagnostic modality that provides detailed quantitative assessment of cardiac anatomy and function. Automated extraction of CMR measurements from clinical reports that are typically stored as unstructured text in electronic health record systems would facilitate their use in research. Existing machine learning approaches either rely on large quantities of expert annotation or require the development of engineered rules that are time-consuming and are specific to the setting in which they were developed. Objective: We hypothesize that the use of pretrained transformer-based language models may enable label-efficient numerical extraction from clinical text without the need for heuristics or large quantities of expert annotations. Here, we fine-tuned pretrained transformer-based language models on a small quantity of CMR annotations to extract 21 CMR measurements. We assessed the effect of clinical pretraining to reduce labeling needs and explored alternative representations of numerical inputs to improve performance. Methods: Our study sample comprised 99,252 patients that received longitudinal cardiology care in a multi-institutional health care system. There were 12,720 available CMR reports from 9280 patients. We adapted PRAnCER (Platform Enabling Rapid Annotation for Clinical Entity Recognition), an annotation tool for clinical text, to collect annotations from a study clinician on 370 reports. We experimented with 5 different representations of numerical quantities and several model weight initializations. We evaluated extraction performance using macroaveraged F1-scores across the measurements of interest. We applied the best-performing model to extract measurements from the remaining CMR reports in the study sample and evaluated established associations between selected extracted measures with clinical outcomes to demonstrate validity. Results: All combinations of weight initializations and numerical representations obtained excellent performance on the gold-standard test set, suggesting that transformer models fine-tuned on a small set of annotations can effectively extract numerical quantities. Our results further indicate that custom numerical representations did not appear to have a significant impact on extraction performance. The best-performing model achieved a macroaveraged F1-score of 0.957 across the evaluated CMR measurements (range 0.92 for the lowest-performing measure of left atrial anterior-posterior dimension to 1.0 for the highest-performing measures of left ventricular end systolic volume index and left ventricular end systolic diameter). Application of the best-performing model to the study cohort yielded 136,407 measurements from all available reports in the study sample. We observed expected associations between extracted left ventricular mass index, left ventricular ejection fraction, and right ventricular ejection fraction with clinical outcomes like atrial fibrillation, heart failure, and mortality. Conclusions: This study demonstrated that a domain-agnostic pretrained transformer model is able to effectively extract quantitative clinical measurements from diagnostic reports with a relatively small number of gold-standard annotations. The proposed workflow may serve as a roadmap for other quantitative entity extraction. ", doi="10.2196/38178", url="https://medinform.jmir.org/2022/9/e38178", url="http://www.ncbi.nlm.nih.gov/pubmed/35960155" } @Article{info:doi/10.2196/35150, author="Oloruntoba, I. Ayooluwatomiwa and Vestergaard, Tine and Nguyen, D. Toan and Yu, Zhen and Sashindranath, Maithili and Betz-Stablein, Brigid and Soyer, Peter H. and Ge, Zongyuan and Mar, Victoria", title="Assessing the Generalizability of Deep Learning Models Trained on Standardized and Nonstandardized Images and Their Performance Against Teledermatologists: Retrospective Comparative Study", journal="JMIR Dermatol", year="2022", month="Sep", day="12", volume="5", number="3", pages="e35150", keywords="artificial intelligence", keywords="AI", keywords="convolutional neural network", keywords="CNN", keywords="teledermatology", keywords="standardized Image", keywords="nonstandardized image", keywords="machine learning", keywords="skin cancer", keywords="cancer", abstract="Background: Convolutional neural networks (CNNs) are a type of artificial intelligence that shows promise as a diagnostic aid for skin cancer. However, the majority are trained using retrospective image data sets with varying image capture standardization. Objective: The aim of our study was to use CNN models with the same architecture---trained on image sets acquired with either the same image capture device and technique (standardized) or with varied devices and capture techniques (nonstandardized)---and test variability in performance when classifying skin cancer images in different populations. Methods: In all, 3 CNNs with the same architecture were trained. CNN nonstandardized (CNN-NS) was trained on 25,331 images taken from the International Skin Imaging Collaboration (ISIC) using different image capture devices. CNN standardized (CNN-S) was trained on 177,475 MoleMap images taken with the same capture device, and CNN standardized number 2 (CNN-S2) was trained on a subset of 25,331 standardized MoleMap images (matched for number and classes of training images to CNN-NS). These 3 models were then tested on 3 external test sets: 569 Danish images, the publicly available ISIC 2020 data set consisting of 33,126 images, and The University of Queensland (UQ) data set of 422 images. Primary outcome measures were sensitivity, specificity, and area under the receiver operating characteristic curve (AUROC). Teledermatology assessments available for the Danish data set were used to determine model performance compared to teledermatologists. Results: When tested on the 569 Danish images, CNN-S achieved an AUROC of 0.861 (95\% CI 0.830-0.889) and CNN-S2 achieved an AUROC of 0.831 (95\% CI 0.798-0.861; standardized models), with both outperforming CNN-NS (nonstandardized model; P=.001 and P=.009, respectively), which achieved an AUROC of 0.759 (95\% CI 0.722-0.794). When tested on 2 additional data sets (ISIC 2020 and UQ), CNN-S (P<.001 and P<.001, respectively) and CNN-S2 (P=.08 and P=.35, respectively) still outperformed CNN-NS. When the CNNs were matched to the mean sensitivity and specificity of the teledermatologists on the Danish data set, the models' resultant sensitivities and specificities were surpassed by the teledermatologists. However, when compared to CNN-S, the differences were not statistically significant (sensitivity: P=.10; specificity: P=.053). Performance across all CNN models as well as teledermatologists was influenced by image quality. Conclusions: CNNs trained on standardized images had improved performance and, therefore, greater generalizability in skin cancer classification when applied to unseen data sets. This finding is an important consideration for future algorithm development, regulation, and approval. ", doi="10.2196/35150", url="https://derma.jmir.org/2022/3/e35150" } @Article{info:doi/10.2196/35051, author="Dap, Matthieu and Chen, Bailiang and Banasiak, Claire and Hossu, Gabriela and Morel, Olivier and Beaumont, Marine and Bertholdt, Charline", title="Magnetic Resonance Imaging Angiography of Physiological and Pathological Pregnancy Placentas Ex Vivo: Protocol for a Prospective Pilot Study", journal="JMIR Res Protoc", year="2022", month="Aug", day="10", volume="11", number="8", pages="e35051", keywords="MRI", keywords="magnetic resonance imaging", keywords="placenta", keywords="IUGR", keywords="intrauterine growth restriction", keywords="preeclampsia", keywords="PE", keywords="uterine", keywords="intrauterine", keywords="pregnancy", keywords="vasculogenesis", keywords="pathology", keywords="physiology", abstract="Background: Preeclampsia (PE) and intrauterine growth restriction (IUGR) are 2 major pregnancy complications due to abnormal placental vasculogenesis. Data on whole fetoplacental vasculature are still missing; hence, these pathologies are not well understood. Ex vivo magnetic resonance imaging (MRI) angiography has been developed to characterize the human placental vasculature by injecting a contrast agent within the umbilical cord. Objective: The primary objective of this study is to compare the placental vascular architecture between normal and pathological pregnancies. This study's secondary objectives are to (1) compare texture features on MRI between groups (normal and pathological), (2) quantitatively compare the vascular architecture between both pathological groups (pathological IUGR, and pathological PE), (3) evaluate the quality of the histological examination in injected placentas, and (4) compare vascularization indices to histological characteristics. Methods: This is a prospective controlled study. We expect to include 100 placentas: 40 from normal pregnancies and 60 from pathological pregnancies (30 for IUGR and 30 for PE). Ex vivo MR image acquisition will be performed shortly after delivery and with preparation by injection of a contrast agent in the umbilical cord. The vascular architecture will be quantitatively described by vascularization indices measured from ex vivo MRI angiography data. Comparisons of vascularization indices and texture features in accordance with the group and within comparable gestational age will be also performed. After MR image acquisition, placental histopathological analysis will be performed. Results: The enrollment of women began in November 2019. In view of the recruitment capacity of our institution and the availability of the MRI, recruitment should be completed by March 2022. As of November 2021, we enrolled 70\% of the intended study population. Conclusions: This study protocol aims to provide information about the fetal side of placental vascular architecture in normal and pathological placenta through MRI. Trial Registration: Clinicaltrials.gov NCT04389099; https://clinicaltrials.gov/ct2/show/NCT04389099 International Registered Report Identifier (IRRID): DERR1-10.2196/35051 ", doi="10.2196/35051", url="https://www.researchprotocols.org/2022/8/e35051", url="http://www.ncbi.nlm.nih.gov/pubmed/35947435" } @Article{info:doi/10.2196/36427, author="Kurz, Alexander and Hauser, Katja and Mehrtens, Alexander Hendrik and Krieghoff-Henning, Eva and Hekler, Achim and Kather, Nikolas Jakob and Fr{\"o}hling, Stefan and von Kalle, Christof and Brinker, Josef Titus", title="Uncertainty Estimation in Medical Image Classification: Systematic Review", journal="JMIR Med Inform", year="2022", month="Aug", day="2", volume="10", number="8", pages="e36427", keywords="uncertainty estimation", keywords="network calibration", keywords="out-of-distribution detection", keywords="medical image classification", keywords="deep learning", keywords="medical imaging", abstract="Background: Deep neural networks are showing impressive results in different medical image classification tasks. However, for real-world applications, there is a need to estimate the network's uncertainty together with its prediction. Objective: In this review, we investigate in what form uncertainty estimation has been applied to the task of medical image classification. We also investigate which metrics are used to describe the effectiveness of the applied uncertainty estimation Methods: Google Scholar, PubMed, IEEE Xplore, and ScienceDirect were screened for peer-reviewed studies, published between 2016 and 2021, that deal with uncertainty estimation in medical image classification. The search terms ``uncertainty,'' ``uncertainty estimation,'' ``network calibration,'' and ``out-of-distribution detection'' were used in combination with the terms ``medical images,'' ``medical image analysis,'' and ``medical image classification.'' Results: A total of 22 papers were chosen for detailed analysis through the systematic review process. This paper provides a table for a systematic comparison of the included works with respect to the applied method for estimating the uncertainty. Conclusions: The applied methods for estimating uncertainties are diverse, but the sampling-based methods Monte-Carlo Dropout and Deep Ensembles are used most frequently. We concluded that future works can investigate the benefits of uncertainty estimation in collaborative settings of artificial intelligence systems and human experts. International Registered Report Identifier (IRRID): RR2-10.2196/11936 ", doi="10.2196/36427", url="https://medinform.jmir.org/2022/8/e36427", url="http://www.ncbi.nlm.nih.gov/pubmed/35916701" } @Article{info:doi/10.2196/32892, author="Powell, C. Adam and Long, W. James and Deshmukh, U. Uday and Simmons, D. Jeffrey", title="The Association Between the Use of Low-Slice Computed Tomography Machines and Downstream Care: Comparative Study of 16-Slice and 64-Slice Computed Tomography Angiography", journal="JMIR Form Res", year="2022", month="Jun", day="30", volume="6", number="6", pages="e32892", keywords="computed tomography", keywords="tomography", keywords="diagnostic imaging", keywords="outpatient", keywords="angiography", keywords="obsolescence", keywords="computed tomography angiography of the neck", keywords="neck", keywords="low-slice computed tomography", keywords="cervicocerebral angiography", keywords="downstream testing", keywords="computed tomography machine", keywords="invasive testing", keywords="machine", keywords="testing", keywords="invasive", abstract="Background: Although computed tomography (CT) studies on machines with more slices have reported higher positive and negative predictive values, the impact of using low-slice (16-slice) CT machines on downstream testing has not been well studied. In community outpatient settings, low-slice CT machines remain in use, although many hospitals have adopted higher-slice machines. Objective: This study examines the association between the use of low-slice CT machines and downstream invasive testing in the context of the CT angiography of the neck. Methods: Included health insurance claims pertained to adults with commercial or Medicare Advantage health plans who underwent the CT angiography of the neck. Site certification data were used to assign counts of slices to claims. Claims that were made in the 60 days after CT were examined for cervicocerebral angiography. The association between the number of slices and cervicocerebral angiography was evaluated by using a chi-square test and multivariate logistic regression. Results: Claims for 16-slice CT had a 5.1\% (33/641) downstream cervicocerebral angiography rate, while claims for 64-slice CT had a 3.1\% (35/1125) rate, and a significant difference (P=.03) was observed. An analysis that was adjusted for patient demographics also found a significant relationship (odds ratio 1.64, 95\% CI 1.00-2.69; P=.047). Conclusions: The use of low-slice CT machines in the community may impact the quality of care and result in more downstream testing. ", doi="10.2196/32892", url="https://formative.jmir.org/2022/6/e32892", url="http://www.ncbi.nlm.nih.gov/pubmed/35771601" } @Article{info:doi/10.2196/37365, author="Ali, Hazrat and Shah, Zubair", title="Combating COVID-19 Using Generative Adversarial Networks and Artificial Intelligence for Medical Images: Scoping Review", journal="JMIR Med Inform", year="2022", month="Jun", day="29", volume="10", number="6", pages="e37365", keywords="augmentation", keywords="artificial intelligence", keywords="COVID-19", keywords="diagnosis", keywords="generative adversarial networks", keywords="diagnostic", keywords="lung image", keywords="imaging", keywords="data augmentation", keywords="X-ray", keywords="CT scan", keywords="data scarcity", keywords="image data", keywords="neural network", keywords="clinical informatics", abstract="Background: Research on the diagnosis of COVID-19 using lung images is limited by the scarcity of imaging data. Generative adversarial networks (GANs) are popular for synthesis and data augmentation. GANs have been explored for data augmentation to enhance the performance of artificial intelligence (AI) methods for the diagnosis of COVID-19 within lung computed tomography (CT) and X-ray images. However, the role of GANs in overcoming data scarcity for COVID-19 is not well understood. Objective: This review presents a comprehensive study on the role of GANs in addressing the challenges related to COVID-19 data scarcity and diagnosis. It is the first review that summarizes different GAN methods and lung imaging data sets for COVID-19. It attempts to answer the questions related to applications of GANs, popular GAN architectures, frequently used image modalities, and the availability of source code. Methods: A search was conducted on 5 databases, namely PubMed, IEEEXplore, Association for Computing Machinery (ACM) Digital Library, Scopus, and Google Scholar. The search was conducted from October 11-13, 2021. The search was conducted using intervention keywords, such as ``generative adversarial networks'' and ``GANs,'' and application keywords, such as ``COVID-19'' and ``coronavirus.'' The review was performed following the Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews (PRISMA-ScR) guidelines for systematic and scoping reviews. Only those studies were included that reported GAN-based methods for analyzing chest X-ray images, chest CT images, and chest ultrasound images. Any studies that used deep learning methods but did not use GANs were excluded. No restrictions were imposed on the country of publication, study design, or outcomes. Only those studies that were in English and were published from 2020 to 2022 were included. No studies before 2020 were included. Results: This review included 57 full-text studies that reported the use of GANs for different applications in COVID-19 lung imaging data. Most of the studies (n=42, 74\%) used GANs for data augmentation to enhance the performance of AI techniques for COVID-19 diagnosis. Other popular applications of GANs were segmentation of lungs and superresolution of lung images. The cycleGAN and the conditional GAN were the most commonly used architectures, used in 9 studies each. In addition, 29 (51\%) studies used chest X-ray images, while 21 (37\%) studies used CT images for the training of GANs. For the majority of the studies (n=47, 82\%), the experiments were conducted and results were reported using publicly available data. A secondary evaluation of the results by radiologists/clinicians was reported by only 2 (4\%) studies. Conclusions: Studies have shown that GANs have great potential to address the data scarcity challenge for lung images in COVID-19. Data synthesized with GANs have been helpful to improve the training of the convolutional neural network (CNN) models trained for the diagnosis of COVID-19. In addition, GANs have also contributed to enhancing the CNNs' performance through the superresolution of the images and segmentation. This review also identified key limitations of the potential transformation of GAN-based methods in clinical applications. ", doi="10.2196/37365", url="https://medinform.jmir.org/2022/6/e37365", url="http://www.ncbi.nlm.nih.gov/pubmed/35709336" } @Article{info:doi/10.2196/36931, author="Tanwani, Jaya and Alam, Fahad and Matava, Clyde and Choi, Stephen and McHardy, Paul and Singer, Oskar and Cheong, Geraldine and Wiegelmann, Julian", title="Development of a Head-Mounted Holographic Needle Guidance System for Enhanced Ultrasound-Guided Neuraxial Anesthesia: System Development and Observational Evaluation", journal="JMIR Form Res", year="2022", month="Jun", day="23", volume="6", number="6", pages="e36931", keywords="mixed reality", keywords="virtual reality", keywords="augmented reality", keywords="HoloLens", keywords="holograms", keywords="neuraxial anesthesia", abstract="Background: Neuraxial anesthesia is conventionally performed using a landmark-based technique. Preprocedural ultrasound is often used in challenging clinical scenarios to identify an ideal needle path. The procedure is then carried out by the operator recreating the ultrasound needle path from memory. We suggest that a needle guidance system using the Microsoft HoloLens mixed reality headset, which projects a hologram of the ideal needle path, can assist operators in replicating the correct needle angulation and result in fewer needle passes. Objective: The objective of the study was to develop software for the mixed reality HoloLens headset, which could be used to augment the performance of neuraxial anesthesia, and establish its face validity in lumbar spine phantom models. Methods: We developed an ultrasound transducer marker and software for the HoloLens, which registers the position and angulation of the ultrasound transducer during preprocedural scans. Once an image of a clear path from skin to the intrathecal space is acquired, a hologram of the ideal needle path is projected onto the user's visual field. The ultrasound probe is removed while the hologram remains in the correct spatial position to visualize the needle trajectory during the procedure as if conducting real-time ultrasound. User testing was performed using a lumbar spine phantom. Results: Preliminary work demonstrates that novice (2 anesthesia residents) and experienced operators (5 attending anesthesiologists) can rapidly learn to use mixed reality holograms to perform neuraxial anesthesia on lumbar spine phantoms. Conclusions: Our study shows promising results for performing neuraxial anesthesia in phantoms using the HoloLens. Although this may have wide-ranging implications for image-guided therapies, further study is required to quantify the accuracy and safety benefit of using holographic guidance. Trial Registration: ClinicalTrials.gov NCT04028284; https://clinicaltrials.gov/ct2/show/NCT04028284 ", doi="10.2196/36931", url="https://formative.jmir.org/2022/6/e36931", url="http://www.ncbi.nlm.nih.gov/pubmed/35737430" } @Article{info:doi/10.2196/30616, author="Le May, Sylvie and Genest, Christine and Hung, Nicole and Francoeur, Maxime and Guingo, Estelle and Paquette, Julie and Fortin, Olivier and Guay, St{\'e}phane", title="The Efficacy of Virtual Reality Game Preparation for Children Scheduled for Magnetic Resonance Imaging Procedures (IMAGINE): Protocol for a Randomized Controlled Trial", journal="JMIR Res Protoc", year="2022", month="Jun", day="13", volume="11", number="6", pages="e30616", keywords="virtual reality", keywords="children", keywords="video games", keywords="magnetic resonance imaging", keywords="anxiety", keywords="pediatrics", keywords="patient collaboration", keywords="patient preparation", keywords="biofeedback", abstract="Background: It is known that magnetic resonance imaging (MRI) procedures generate fear and anxiety. Children may become restless during scanning, which results in movement artifacts requiring the MRI procedure to be repeated with sedation. Few studies seem to have looked at the effect of immersive virtual reality (IVR) on anxiety in children scheduled for MRI scans and how to identify which children are more responsive. Objective: The aims of this study are 3-fold: develop an algorithm of predictability based on biofeedback, address feasibility and acceptability of preprocedural IVR game preparation for anxiety management during MRI procedures, and examine the efficacy of IVR game preparation compared with usual care for the management of procedural anxiety during MRI scans. Methods: This study will have 2 phases. We will first conduct a field test with 10 participants aged 7 to 17 years to develop a predictive algorithm for biofeedback solution and to address the feasibility and acceptability of the research. After the field test, a randomized controlled trial will be completed using a parallel design with 2 groups: an experimental group (preprocedural IVR game preparation) and a usual care group (standard care as per the radiology department's protocol) in an equal ratio of 49 participants per group for 98 participants. Recruitment will be carried out at a hospital in Quebec, Canada. The experimental group will receive a preprocedural IVR game preparation (IMAGINE) that offers an immersive simulation of the MRI scan. Participants will complete a questionnaire to assess the acceptability, feasibility, and incidence of side effects related to the intervention and the biofeedback device. Data collected will include sociodemographic and clinical characteristics as well as measures of procedure-related anxiety with the French-Canadian version of the State-Trait Anxiety Inventory for Children (score 1-3) and the Children's Fear Scale (score 0-4). Physiological signs will be noted and include heart rate, skin conductance, hand temperature, and muscle tension. Measures of the level of satisfaction of health care professionals, parents, and participants will also be collected. Analyses will be carried out according to the intention-to-treat principle, with a Cronbach $\alpha$ significance level of .05. Results: As of May 10, 2022, no participant was enrolled in the clinical trial. The data collection time frame is projected to be between April 1, 2022, and March 31, 2023. Findings will be disseminated through peer-reviewed publications. Conclusions: Our study provides an alternative method for anxiety management to better prepare patients for an awake MRI procedure. The biofeedback will help predict which children are more responsive to this type of intervention. This study will guide future medical practice by providing evidence-based knowledge on a nonpharmacological therapeutic modality for anxiety management in children scheduled for an MRI scan. Trial Registration: ClinicalTrials.gov NCT04988516; https://clinicaltrials.gov/ct2/show/NCT04988516 International Registered Report Identifier (IRRID): PRR1-10.2196/30616 ", doi="10.2196/30616", url="https://www.researchprotocols.org/2022/6/e30616", url="http://www.ncbi.nlm.nih.gov/pubmed/35700000" } @Article{info:doi/10.2196/27694, author="Chen, Pei-Chin and Lu, Yun-Ru and Kang, Yi-No and Chang, Chun-Chao", title="The Accuracy of Artificial Intelligence in the Endoscopic Diagnosis of Early Gastric Cancer: Pooled Analysis Study", journal="J Med Internet Res", year="2022", month="May", day="16", volume="24", number="5", pages="e27694", keywords="artificial intelligence", keywords="early gastric cancer", keywords="endoscopy", abstract="Background: Artificial intelligence (AI) for gastric cancer diagnosis has been discussed in recent years. The role of AI in early gastric cancer is more important than in advanced gastric cancer since early gastric cancer is not easily identified in clinical practice. However, to our knowledge, past syntheses appear to have limited focus on the populations with early gastric cancer. Objective: The purpose of this study is to evaluate the diagnostic accuracy of AI in the diagnosis of early gastric cancer from endoscopic images. Methods: We conducted a systematic review from database inception to June 2020 of all studies assessing the performance of AI in the endoscopic diagnosis of early gastric cancer. Studies not concerning early gastric cancer were excluded. The outcome of interest was the diagnostic accuracy (comprising sensitivity, specificity, and accuracy) of AI systems. Study quality was assessed on the basis of the revised Quality Assessment of Diagnostic Accuracy Studies. Meta-analysis was primarily based on a bivariate mixed-effects model. A summary receiver operating curve and a hierarchical summary receiver operating curve were constructed, and the area under the curve was computed. Results: We analyzed 12 retrospective case control studies (n=11,685) in which AI identified early gastric cancer from endoscopic images. The pooled sensitivity and specificity of AI for early gastric cancer diagnosis were 0.86 (95\% CI 0.75-0.92) and 0.90 (95\% CI 0.84-0.93), respectively. The area under the curve was 0.94. Sensitivity analysis of studies using support vector machines and narrow-band imaging demonstrated more consistent results. Conclusions: For early gastric cancer, to our knowledge, this was the first synthesis study on the use of endoscopic images in AI in diagnosis. AI may support the diagnosis of early gastric cancer. However, the collocation of imaging techniques and optimal algorithms remain unclear. Competing models of AI for the diagnosis of early gastric cancer are worthy of future investigation. Trial Registration: PROSPERO CRD42020193223; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=193223 ", doi="10.2196/27694", url="https://www.jmir.org/2022/5/e27694", url="http://www.ncbi.nlm.nih.gov/pubmed/35576561" } @Article{info:doi/10.2196/28880, author="Liao, JunHua and Liu, LunXin and Duan, HaiHan and Huang, YunZhi and Zhou, LiangXue and Chen, LiangYin and Wang, ChaoHua", title="Using a Convolutional Neural Network and Convolutional Long Short-term Memory to Automatically Detect Aneurysms on 2D Digital Subtraction Angiography Images: Framework Development and Validation", journal="JMIR Med Inform", year="2022", month="Mar", day="16", volume="10", number="3", pages="e28880", keywords="convolutional neural network", keywords="convolutional long short-term memory", keywords="cerebral aneurysm", keywords="deep learning", abstract="Background: It is hard to distinguish cerebral aneurysms from overlapping vessels in 2D digital subtraction angiography (DSA) images due to these images' lack of spatial information. Objective: The aims of this study were to (1) construct a deep learning diagnostic system to improve the ability to detect posterior communicating artery aneurysms on 2D DSA images and (2) validate the efficiency of the deep learning diagnostic system in 2D DSA aneurysm detection. Methods: We proposed a 2-stage detection system. First, we established the region localization stage to automatically locate specific detection regions of raw 2D DSA sequences. Second, in the intracranial aneurysm detection stage, we constructed a bi-input+RetinaNet+convolutional long short-term memory (C-LSTM) framework to compare its performance for aneurysm detection with that of 3 existing frameworks. Each of the frameworks had a 5-fold cross-validation scheme. The receiver operating characteristic curve, the area under the curve (AUC) value, mean average precision, sensitivity, specificity, and accuracy were used to assess the abilities of different frameworks. Results: A total of 255 patients with posterior communicating artery aneurysms and 20 patients without aneurysms were included in this study. The best AUC values of the RetinaNet, RetinaNet+C-LSTM, bi-input+RetinaNet, and bi-input+RetinaNet+C-LSTM frameworks were 0.95, 0.96, 0.92, and 0.97, respectively. The mean sensitivities of the RetinaNet, RetinaNet+C-LSTM, bi-input+RetinaNet, and bi-input+RetinaNet+C-LSTM frameworks and human experts were 89\% (range 67.02\%-98.43\%), 88\% (range 65.76\%-98.06\%), 87\% (range 64.53\%-97.66\%), 89\% (range 67.02\%-98.43\%), and 90\% (range 68.30\%-98.77\%), respectively. The mean specificities of the RetinaNet, RetinaNet+C-LSTM, bi-input+RetinaNet, and bi-input+RetinaNet+C-LSTM frameworks and human experts were 80\% (range 56.34\%-94.27\%), 89\% (range 67.02\%-98.43\%), 86\% (range 63.31\%-97.24\%), 93\% (range 72.30\%-99.56\%), and 90\% (range 68.30\%-98.77\%), respectively. The mean accuracies of the RetinaNet, RetinaNet+C-LSTM, bi-input+RetinaNet, and bi-input+RetinaNet+C-LSTM frameworks and human experts were 84.50\% (range 69.57\%-93.97\%), 88.50\% (range 74.44\%-96.39\%), 86.50\% (range 71.97\%-95.22\%), 91\% (range 77.63\%-97.72\%), and 90\% (range 76.34\%-97.21\%), respectively. Conclusions: According to our results, more spatial and temporal information can help improve the performance of the frameworks. Therefore, the bi-input+RetinaNet+C-LSTM framework had the best performance when compared to that of the other frameworks. Our study demonstrates that our system can assist physicians in detecting intracranial aneurysms on 2D DSA images. ", doi="10.2196/28880", url="https://medinform.jmir.org/2022/3/e28880", url="http://www.ncbi.nlm.nih.gov/pubmed/35294371" } @Article{info:doi/10.2196/34896, author="Rezk, Eman and Eltorki, Mohamed and El-Dakhakhni, Wael", title="Leveraging Artificial Intelligence to Improve the Diversity of Dermatological Skin Color Pathology: Protocol for an Algorithm Development and Validation Study", journal="JMIR Res Protoc", year="2022", month="Mar", day="8", volume="11", number="3", pages="e34896", keywords="artificial intelligence", keywords="skin cancer", keywords="skin tone diversity", keywords="people of color", keywords="image blending", keywords="deep learning", keywords="classification", keywords="early diagnosis", abstract="Background: The paucity of dark skin images in dermatological textbooks and atlases is a reflection of racial injustice in medicine. The underrepresentation of dark skin images makes diagnosing skin pathology in people of color challenging. For conditions such as skin cancer, in which early diagnosis makes a difference between life and death, people of color have worse prognoses and lower survival rates than people with lighter skin tones as a result of delayed or incorrect diagnoses. Recent advances in artificial intelligence, such as deep learning, offer a potential solution that can be achieved by diversifying the mostly light-skin image repositories through generating images for darker skin tones. Thus, facilitating the development of inclusive cancer early diagnosis systems that are trained and tested on diverse images that truly represent human skin tones. Objective: We aim to develop and evaluate an artificial intelligence--based skin cancer early detection system for all skin tones using clinical images. Methods: This study consists of four phases: (1) Publicly available skin image repositories will be analyzed to quantify the underrepresentation of darker skin tones, (2) Images will be generated for the underrepresented skin tones, (3) Generated images will be extensively evaluated for realism and disease presentation with quantitative image quality assessment as well as qualitative human expert and nonexpert ratings, and (4) The images will be utilized with available light-skin images to develop a robust skin cancer early detection model. Results: This study started in September 2020. The first phase of quantifying the underrepresentation of darker skin tones was completed in March 2021. The second phase of generating the images is in progress and will be completed by March 2022. The third phase is expected to be completed by May 2022, and the final phase is expected to be completed by September 2022. Conclusions: This work is the first step toward expanding skin tone diversity in existing image databases to address the current gap in the underrepresentation of darker skin tones. Once validated, the image bank will be a valuable resource that can potentially be utilized in physician education and in research applications. Furthermore, generated images are expected to improve the generalizability of skin cancer detection. When completed, the model will assist family physicians and general practitioners in evaluating skin lesion severity and in efficient triaging for referral to expert dermatologists. In addition, the model can assist dermatologists in diagnosing skin lesions. International Registered Report Identifier (IRRID): DERR1-10.2196/34896 ", doi="10.2196/34896", url="https://www.researchprotocols.org/2022/3/e34896", url="http://www.ncbi.nlm.nih.gov/pubmed/34983017" } @Article{info:doi/10.2196/27394, author="Peyret, Remy and alSaeed, Duaa and Khelifi, Fouad and Al-Ghreimil, Nadia and Al-Baity, Heyam and Bouridane, Ahmed", title="Convolutional Neural Network--Based Automatic Classification of Colorectal and Prostate Tumor Biopsies Using Multispectral Imagery: System Development Study", journal="JMIR Bioinform Biotech", year="2022", month="Feb", day="9", volume="3", number="1", pages="e27394", keywords="convolutional neural networks", keywords="classification", keywords="colorectal tumor", keywords="prostate tumor", keywords="machine learning", keywords="image processing", abstract="Background: Colorectal and prostate cancers are the most common types of cancer in men worldwide. To diagnose colorectal and prostate cancer, a pathologist performs a histological analysis on needle biopsy samples. This manual process is time-consuming and error-prone, resulting in high intra- and interobserver variability, which affects diagnosis reliability. Objective: This study aims to develop an automatic computerized system for diagnosing colorectal and prostate tumors by using images of biopsy samples to reduce time and diagnosis error rates associated with human analysis. Methods: In this study, we proposed a convolutional neural network (CNN) model for classifying colorectal and prostate tumors from multispectral images of biopsy samples. The key idea was to remove the last block of the convolutional layers and halve the number of filters per layer. Results: Our results showed excellent performance, with an average test accuracy of 99.8\% and 99.5\% for the prostate and colorectal data sets, respectively. The system showed excellent performance when compared with pretrained CNNs and other classification methods, as it avoids the preprocessing phase while using a single CNN model for the whole classification task. Overall, the proposed CNN architecture was globally the best-performing system for classifying colorectal and prostate tumor images. Conclusions: The proposed CNN architecture was detailed and compared with previously trained network models used as feature extractors. These CNNs were also compared with other classification techniques. As opposed to pretrained CNNs and other classification approaches, the proposed CNN yielded excellent results. The computational complexity of the CNNs was also investigated, and it was shown that the proposed CNN is better at classifying images than pretrained networks because it does not require preprocessing. Thus, the overall analysis was that the proposed CNN architecture was globally the best-performing system for classifying colorectal and prostate tumor images. ", doi="10.2196/27394", url="https://bioinform.jmir.org/2022/1/e27394" } @Article{info:doi/10.2196/34415, author="Ko, Hoon and Huh, Jimi and Kim, Won Kyung and Chung, Heewon and Ko, Yousun and Kim, Keun Jai and Lee, Hee Jei and Lee, Jinseok", title="A Deep Residual U-Net Algorithm for Automatic Detection and Quantification of Ascites on Abdominopelvic Computed Tomography Images Acquired in the Emergency Department: Model Development and Validation", journal="J Med Internet Res", year="2022", month="Jan", day="3", volume="24", number="1", pages="e34415", keywords="ascites", keywords="computed tomography", keywords="deep residual U-Net", keywords="artificial intelligence", abstract="Background: Detection and quantification of intra-abdominal free fluid (ie, ascites) on computed tomography (CT) images are essential processes for finding emergent or urgent conditions in patients. In an emergency department, automatic detection and quantification of ascites will be beneficial. Objective: We aimed to develop an artificial intelligence (AI) algorithm for the automatic detection and quantification of ascites simultaneously using a single deep learning model (DLM). Methods: We developed 2D DLMs based on deep residual U-Net, U-Net, bidirectional U-Net, and recurrent residual U-Net (R2U-Net) algorithms to segment areas of ascites on abdominopelvic CT images. Based on segmentation results, the DLMs detected ascites by classifying CT images into ascites images and nonascites images. The AI algorithms were trained using 6337 CT images from 160 subjects (80 with ascites and 80 without ascites) and tested using 1635 CT images from 40 subjects (20 with ascites and 20 without ascites). The performance of the AI algorithms was evaluated for diagnostic accuracy of ascites detection and for segmentation accuracy of ascites areas. Of these DLMs, we proposed an AI algorithm with the best performance. Results: The segmentation accuracy was the highest for the deep residual U-Net model with a mean intersection over union (mIoU) value of 0.87, followed by U-Net, bidirectional U-Net, and R2U-Net models (mIoU values of 0.80, 0.77, and 0.67, respectively). The detection accuracy was the highest for the deep residual U-Net model (0.96), followed by U-Net, bidirectional U-Net, and R2U-Net models (0.90, 0.88, and 0.82, respectively). The deep residual U-Net model also achieved high sensitivity (0.96) and high specificity (0.96). Conclusions: We propose a deep residual U-Net--based AI algorithm for automatic detection and quantification of ascites on abdominopelvic CT scans, which provides excellent performance. ", doi="10.2196/34415", url="https://www.jmir.org/2022/1/e34415", url="http://www.ncbi.nlm.nih.gov/pubmed/34982041" } @Article{info:doi/10.2196/25328, author="Madalinski, Mariusz and Prudham, Roger", title="Can Real-time Computer-Aided Detection Systems Diminish the Risk of Postcolonoscopy Colorectal Cancer?", journal="JMIR Med Inform", year="2021", month="Dec", day="24", volume="9", number="12", pages="e25328", keywords="artificial intelligence", keywords="colonoscopy", keywords="adenoma", keywords="real-time computer-aided detection", keywords="colonic polyp", doi="10.2196/25328", url="https://medinform.jmir.org/2021/12/e25328", url="http://www.ncbi.nlm.nih.gov/pubmed/34571490" } @Article{info:doi/10.2196/33267, author="Bang, Seok Chang and Lee, Jun Jae and Baik, Ho Gwang", title="Computer-Aided Diagnosis of Gastrointestinal Ulcer and Hemorrhage Using Wireless Capsule Endoscopy: Systematic Review and Diagnostic Test Accuracy Meta-analysis", journal="J Med Internet Res", year="2021", month="Dec", day="14", volume="23", number="12", pages="e33267", keywords="artificial intelligence", keywords="computer-aided diagnosis", keywords="capsule endoscopy", keywords="ulcer", keywords="hemorrhage", keywords="gastrointestinal", keywords="endoscopy", keywords="review", keywords="accuracy", keywords="meta-analysis", keywords="diagnostic", keywords="performance", keywords="machine learning", keywords="prediction models", abstract="Background: Interpretation of capsule endoscopy images or movies is operator-dependent and time-consuming. As a result, computer-aided diagnosis (CAD) has been applied to enhance the efficacy and accuracy of the review process. Two previous meta-analyses reported the diagnostic performance of CAD models for gastrointestinal ulcers or hemorrhage in capsule endoscopy. However, insufficient systematic reviews have been conducted, which cannot determine the real diagnostic validity of CAD models. Objective: To evaluate the diagnostic test accuracy of CAD models for gastrointestinal ulcers or hemorrhage using wireless capsule endoscopic images. Methods: We conducted core databases searching for studies based on CAD models for the diagnosis of ulcers or hemorrhage using capsule endoscopy and presenting data on diagnostic performance. Systematic review and diagnostic test accuracy meta-analysis were performed. Results: Overall, 39 studies were included. The pooled area under the curve, sensitivity, specificity, and diagnostic odds ratio of CAD models for the diagnosis of ulcers (or erosions) were .97 (95\% confidence interval, .95--.98), .93 (.89--.95), .92 (.89--.94), and 138 (79--243), respectively. The pooled area under the curve, sensitivity, specificity, and diagnostic odds ratio of CAD models for the diagnosis of hemorrhage (or angioectasia) were .99 (.98--.99), .96 (.94--0.97), .97 (.95--.99), and 888 (343--2303), respectively. Subgroup analyses showed robust results. Meta-regression showed that published year, number of training images, and target disease (ulcers vs erosions, hemorrhage vs angioectasia) was found to be the source of heterogeneity. No publication bias was detected. Conclusions: CAD models showed high performance for the optical diagnosis of gastrointestinal ulcer and hemorrhage in wireless capsule endoscopy. ", doi="10.2196/33267", url="https://www.jmir.org/2021/12/e33267", url="http://www.ncbi.nlm.nih.gov/pubmed/34904949" } @Article{info:doi/10.2196/30066, author="Kim, Taewoo and Lee, Hyun Dong and Park, Eun-Kee and Choi, Sanghun", title="Deep Learning Techniques for Fatty Liver Using Multi-View Ultrasound Images Scanned by Different Scanners: Development and Validation Study", journal="JMIR Med Inform", year="2021", month="Nov", day="18", volume="9", number="11", pages="e30066", keywords="fatty liver", keywords="deep learning", keywords="transfer learning", keywords="classification", keywords="regression", keywords="magnetic resonance imaging--proton density fat fraction", keywords="multi-view ultrasound images", keywords="artificial intelligence", keywords="machine imaging", keywords="imaging", keywords="informatics", keywords="fatty liver disease", keywords="detection", keywords="diagnosis", abstract="Background: Fat fraction values obtained from magnetic resonance imaging (MRI) can be used to obtain an accurate diagnosis of fatty liver diseases. However, MRI is expensive and cannot be performed for everyone. Objective: In this study, we aim to develop multi-view ultrasound image--based convolutional deep learning models to detect fatty liver disease and yield fat fraction values. Methods: We extracted 90 ultrasound images of the right intercostal view and 90 ultrasound images of the right intercostal view containing the right renal cortex from 39 cases of fatty liver (MRI--proton density fat fraction [MRI--PDFF] ? 5\%) and 51 normal subjects (MRI--PDFF < 5\%), with MRI--PDFF values obtained from Good Gang-An Hospital. We obtained combined liver and kidney-liver (CLKL) images to train the deep learning models and developed classification and regression models based on the VGG19 model to classify fatty liver disease and yield fat fraction values. We employed the data augmentation techniques such as flip and rotation to prevent the deep learning model from overfitting. We determined the deep learning model with performance metrics such as accuracy, sensitivity, specificity, and coefficient of determination (R2). Results: In demographic information, all metrics such as age and sex were similar between the two groups---fatty liver disease and normal subjects. In classification, the model trained on CLKL images achieved 80.1\% accuracy, 86.2\% precision, and 80.5\% specificity to detect fatty liver disease. In regression, the predicted fat fraction values of the regression model trained on CLKL images correlated with MRI--PDFF values (R2=0.633), indicating that the predicted fat fraction values were moderately estimated. Conclusions: With deep learning techniques and multi-view ultrasound images, it is potentially possible to replace MRI--PDFF values with deep learning predictions for detecting fatty liver disease and estimating fat fraction values. ", doi="10.2196/30066", url="https://medinform.jmir.org/2021/11/e30066", url="http://www.ncbi.nlm.nih.gov/pubmed/34792476" } @Article{info:doi/10.2196/24936, author="AlShehry, Faiez Nawal and Shanker, Raja and Zaidi, Ahmed Syed Ziauddin and AlGhmlas, Fahad and Motabi, Hussein Ibraheem and Iqbal, Shahid and Butt, Ali Ahmad and AlShehri, Hassan and Tailor, Khan Imran and Altaf, Yasir Syed and AlGhamdi, Mubarak and Marie, Mohammed and AlFayez, Mansour and Al Zahrani, Kamal and Dwaimah, Mohammed and Al-Halouli, Tahani and Al-Shakweer, Wafaa and AlShehery, Zaher Maied and Zaidi, Zia Abdul Rehman and Gill, Munawar Atta and Albtoosh, Mohammed Belal and Ahmed, Musab", title="Role of 18F-Fluorodeoxyglucose--Positron Emission Tomography/Computed Tomography Imaging in the Prediction of Prognosis in Patients With Indolent Lymphoma: Prospective Study", journal="JMIR Form Res", year="2021", month="Nov", day="12", volume="5", number="11", pages="e24936", keywords="positron emission tomography", keywords="lymphoma", keywords="prognosis", keywords="indolent lymphoma", keywords="SUVmax", keywords="Deauville criteria", abstract="Background: The role of fluorodeoxyglucose--positron emission tomography/computed tomography (FDG-PET/CT) in indolent lymphoma has been minimally studied. Objective: This study aims to assess the value of FDG-PET/CT in predicting the prognosis of indolent lymphoma. Methods: We prospectively recruited 42 patients with indolent lymphoma. A total of 2 patients were excluded, and 40 underwent baseline PET/CT and follow-up at various time points. A total of 9 patients were observed only, 7 received 4 doses of rituximab alone, and 24 received chemoimmunotherapy. Metabolic response on follow-up PET/CT was assessed using the maximum standardized uptake value (SUVmax) and Deauville criteria (DC). We aimed to obtain the best SUVmax and DC to predict optimal survival rates, risk stratification, and optimize therapeutic strategies. The mean follow-up from the initial diagnosis was 33.83 months. Results: SUVmax <4.35 at interim PET/CT provided the best discrimination, with a progression-free survival (PFS) of 100\% and a median survival time of 106.67 months compared with SUVmax ?4.35 (P=.04), which had a PFS of 43.8\% and a median survival time of 50.17 months. This cutoff was also valuable in predicting overall survival at baseline, that is, 100\% overall survival with baseline SUVmax <4.35, versus 58.4\% for SUVmax ?4.35 (P=.13). The overall survival of patients with a baseline DC score <3.0 was 100\%, with a median overall survival of 106.67 months. Conclusions: We demonstrated the utility of PET/CT in indolent lymphomas. SUVmax (<4.35 vs ?4.35) on interim PET/CT performed best in predicting PFS. ", doi="10.2196/24936", url="https://formative.jmir.org/2021/11/e24936", url="http://www.ncbi.nlm.nih.gov/pubmed/34508363" } @Article{info:doi/10.2196/30259, author="Widmann, N. Catherine and Wieberneit, Michelle and Bieler, Luzie and Bernsen, Sarah and Gr{\"a}fenk{\"a}mper, Robin and Brosseron, Frederic and Schmeel, Carsten and Tacik, Pawel and Skowasch, Dirk and Radbruch, Alexander and Heneka, T. Michael", title="Longitudinal Neurocognitive and Pulmonological Profile of Long COVID-19: Protocol for the COVIMMUNE-Clin Study", journal="JMIR Res Protoc", year="2021", month="Nov", day="11", volume="10", number="11", pages="e30259", keywords="SARS-CoV-2", keywords="COVID-19", keywords="postacute COVID-19 syndrome", keywords="cognition", keywords="neuropsychology", keywords="lung", keywords="magnetic resonance imaging", abstract="Background: There is a dearth of information about ``brain fog,'' characterized by concentration, word-finding, or memory problems, which has been listed in the new World Health Organization provisional classification ``U09.9 Post-COVID-19 Condition.'' Moreover, the extent to which these symptoms may be associated with neurological, pulmonary, or psychiatric difficulties is unclear. Objective: This ongoing cohort study aims to carefully assess neurocognitive function in the context of the neurological, psychiatric, and pulmonary sequelae of SARS-CoV-2 infection among patients with asymptomatic/mild and severe cases of COVID-19 after remission, including actively recruited healthy controls. Methods: A total of 150 participants will be included in this pilot study. The cohort will comprise patients who tested positive for SARS-CoV-2 infection with either an asymptomatic course or a mild course defined as no symptoms except for olfactory and taste dysfunction (n=50), patients who tested positive for SARS-CoV-2 infection with a severe disease course (n=50), and a healthy control group (n=50) with similar age and sex distribution based on frequency matching. A comprehensive neuropsychological assessment will be performed comprising nuanced aspects of complex attention, including language, executive function, verbal and visual learning, and memory. Psychiatric, personality, social and lifestyle factors, sleep, and fatigue will be evaluated. Brain magnetic resonance imaging, neurological and physical assessment, and pulmonological and lung function examinations (including body plethysmography, diffusion capacity, clinical assessments, and questionnaires) will also be performed. Three visits are planned with comprehensive testing at the baseline and 12-month visits, along with brief neurological and neuropsychological examinations at the 6-month assessment. Blood-based biomarkers of neurodegeneration will be quantified at baseline and 12-month follow-up. Results: At the time of submission, the study had begun recruitment through telephone and in-person screenings. The first patient was enrolled in the study at the beginning of April 2021. Interim data analysis of baseline information is expected to be complete by December 2021 and study completion is expected at the end of December 2022. Preliminary group comparisons indicate worse word list learning, short- and long-delayed verbal recall, and verbal recognition in both patient cohorts compared with those of the healthy control group, adjusted for age and sex. Initial volumetric comparisons show smaller grey matter, frontal, and temporal brain volumes in both patient groups compared with those of healthy controls. These results are quite robust but are neither final nor placed in the needed context intended at study completion. Conclusions: To the best of our knowledge, this is the first study to include objective and comprehensive longitudinal analyses of neurocognitive sequelae of COVID-19 in an extreme group comparison stratified by disease severity with healthy controls actively recruited during the pandemic. Results from this study will contribute to the nascent literature on the prolonged effects of COVID-19 on neurocognitive performance via our coassessment of neuroradiological, neurological, pulmonary, psychiatric, and lifestyle factors. Trial Registration: International Clinical Trials Registry Platform DRKS00023806; https://trialsearch.who.int/Trial2.aspx?TrialID=DRKS00023806 International Registered Report Identifier (IRRID): DERR1-10.2196/30259 ", doi="10.2196/30259", url="https://www.researchprotocols.org/2021/11/e30259", url="http://www.ncbi.nlm.nih.gov/pubmed/34559059" } @Article{info:doi/10.2196/27875, author="Engelhard, M. Matthew and D'Arcy, Joshua and Oliver, A. Jason and Kozink, Rachel and McClernon, Joseph F.", title="Prediction of Smoking Risk From Repeated Sampling of Environmental Images: Model Validation", journal="J Med Internet Res", year="2021", month="Nov", day="1", volume="23", number="11", pages="e27875", keywords="smoking", keywords="smoking cessation", keywords="machine learning", keywords="computer vision", keywords="digital health", keywords="eHealth", keywords="behavior", keywords="CNN", keywords="neural network", keywords="artificial intelligence", keywords="AI", keywords="images", keywords="environment", keywords="ecological momentary assessment", keywords="mobile health", keywords="mHealth", keywords="mobile phone", abstract="Background: Viewing their habitual smoking environments increases smokers' craving and smoking behaviors in laboratory settings. A deep learning approach can differentiate between habitual smoking versus nonsmoking environments, suggesting that it may be possible to predict environment-associated smoking risk from continuously acquired images of smokers' daily environments. Objective: In this study, we aim to predict environment-associated risk from continuously acquired images of smokers' daily environments. We also aim to understand how model performance varies by location type, as reported by participants. Methods: Smokers from Durham, North Carolina and surrounding areas completed ecological momentary assessments both immediately after smoking and at randomly selected times throughout the day for 2 weeks. At each assessment, participants took a picture of their current environment and completed a questionnaire on smoking, craving, and the environmental setting. A convolutional neural network--based model was trained to predict smoking, craving, whether smoking was permitted in the current environment and whether the participant was outside based on images of participants' daily environments, the time since their last cigarette, and baseline data on daily smoking habits. Prediction performance, quantified using the area under the receiver operating characteristic curve (AUC) and average precision (AP), was assessed for out-of-sample prediction as well as personalized models trained on images from days 1 to 10. The models were optimized for mobile devices and implemented as a smartphone app. Results: A total of 48 participants completed the study, and 8008 images were acquired. The personalized models were highly effective in predicting smoking risk (AUC=0.827; AP=0.882), craving (AUC=0.837; AP=0.798), whether smoking was permitted in the current environment (AUC=0.932; AP=0.981), and whether the participant was outside (AUC=0.977; AP=0.956). The out-of-sample models were also effective in predicting smoking risk (AUC=0.723; AP=0.785), whether smoking was permitted in the current environment (AUC=0.815; AP=0.937), and whether the participant was outside (AUC=0.949; AP=0.922); however, they were not effective in predicting craving (AUC=0.522; AP=0.427). Omitting image features reduced AUC by over 0.1 when predicting all outcomes except craving. Prediction of smoking was more effective for participants whose self-reported location type was more variable (Spearman $\rho$=0.48; P=.001). Conclusions: Images of daily environments can be used to effectively predict smoking risk. Model personalization, achieved by incorporating information about daily smoking habits and training on participant-specific images, further improves prediction performance. Environment-associated smoking risk can be assessed in real time on a mobile device and can be incorporated into device-based smoking cessation interventions. ", doi="10.2196/27875", url="https://www.jmir.org/2021/11/e27875", url="http://www.ncbi.nlm.nih.gov/pubmed/34723819" } @Article{info:doi/10.2196/25378, author="Tsuji, Shintaro and Wen, Andrew and Takahashi, Naoki and Zhang, Hongjian and Ogasawara, Katsuhiko and Jiang, Gouqian", title="Developing a RadLex-Based Named Entity Recognition Tool for Mining Textual Radiology Reports: Development and Performance Evaluation Study", journal="J Med Internet Res", year="2021", month="Oct", day="29", volume="23", number="10", pages="e25378", keywords="named entity recognition (NER)", keywords="natural language processing (NLP)", keywords="RadLex", keywords="ontology", keywords="stem term", abstract="Background: Named entity recognition (NER) plays an important role in extracting the features of descriptions such as the name and location of a disease for mining free-text radiology reports. However, the performance of existing NER tools is limited because the number of entities that can be extracted depends on the dictionary lookup. In particular, the recognition of compound terms is very complicated because of the variety of patterns. Objective: The aim of this study is to develop and evaluate an NER tool concerned with compound terms using RadLex for mining free-text radiology reports. Methods: We leveraged the clinical Text Analysis and Knowledge Extraction System (cTAKES) to develop customized pipelines using both RadLex and SentiWordNet (a general purpose dictionary). We manually annotated 400 radiology reports for compound terms in noun phrases and used them as the gold standard for performance evaluation (precision, recall, and F-measure). In addition, we created a compound terms--enhanced dictionary (CtED) by analyzing false negatives and false positives and applied it to another 100 radiology reports for validation. We also evaluated the stem terms of compound terms by defining two measures: occurrence ratio (OR) and matching ratio (MR). Results: The F-measure of cTAKES+RadLex+general purpose dictionary was 30.9\% (precision 73.3\% and recall 19.6\%) and that of the combined CtED was 63.1\% (precision 82.8\% and recall 51\%). The OR indicated that the stem terms of effusion, node, tube, and disease were used frequently, but it still lacks capturing compound terms. The MR showed that 71.85\% (9411/13,098) of the stem terms matched with that of the ontologies, and RadLex improved approximately 22\% of the MR from the cTAKES default dictionary. The OR and MR revealed that the characteristics of stem terms would have the potential to help generate synonymous phrases using the ontologies. Conclusions: We developed a RadLex-based customized pipeline for parsing radiology reports and demonstrated that CtED and stem term analysis has the potential to improve dictionary-based NER performance with regard to expanding vocabularies. ", doi="10.2196/25378", url="https://www.jmir.org/2021/10/e25378", url="http://www.ncbi.nlm.nih.gov/pubmed/34714247" } @Article{info:doi/10.2196/27706, author="Cilia, Federica and Carette, Romuald and Elbattah, Mahmoud and Dequen, Gilles and Gu{\'e}rin, Jean-Luc and Bosche, J{\'e}r{\^o}me and Vandromme, Luc and Le Driant, Barbara", title="Computer-Aided Screening of Autism Spectrum Disorder: Eye-Tracking Study Using Data Visualization and Deep Learning", journal="JMIR Hum Factors", year="2021", month="Oct", day="25", volume="8", number="4", pages="e27706", keywords="autism spectrum disorder", keywords="screening", keywords="eye tracking", keywords="data visualization", keywords="machine learning", keywords="deep learning", keywords="AI", keywords="ASS", keywords="artificial intelligence", keywords="ML", keywords="adolescent", keywords="diagnosis", abstract="Background: The early diagnosis of autism spectrum disorder (ASD) is highly desirable but remains a challenging task, which requires a set of cognitive tests and hours of clinical examinations. In addition, variations of such symptoms exist, which can make the identification of ASD even more difficult. Although diagnosis tests are largely developed by experts, they are still subject to human bias. In this respect, computer-assisted technologies can play a key role in supporting the screening process. Objective: This paper follows on the path of using eye tracking as an integrated part of screening assessment in ASD based on the characteristic elements of the eye gaze. This study adds to the mounting efforts in using eye tracking technology to support the process of ASD screening Methods: The proposed approach basically aims to integrate eye tracking with visualization and machine learning. A group of 59 school-aged participants took part in the study. The participants were invited to watch a set of age-appropriate photographs and videos related to social cognition. Initially, eye-tracking scanpaths were transformed into a visual representation as a set of images. Subsequently, a convolutional neural network was trained to perform the image classification task. Results: The experimental results demonstrated that the visual representation could simplify the diagnostic task and also attained high accuracy. Specifically, the convolutional neural network model could achieve a promising classification accuracy. This largely suggests that visualizations could successfully encode the information of gaze motion and its underlying dynamics. Further, we explored possible correlations between the autism severity and the dynamics of eye movement based on the maximal information coefficient. The findings primarily show that the combination of eye tracking, visualization, and machine learning have strong potential in developing an objective tool to assist in the screening of ASD. Conclusions: Broadly speaking, the approach we propose could be transferable to screening for other disorders, particularly neurodevelopmental disorders. ", doi="10.2196/27706", url="https://humanfactors.jmir.org/2021/4/e27706", url="http://www.ncbi.nlm.nih.gov/pubmed/34694238" } @Article{info:doi/10.2196/31697, author="Aggarwal, Pushkar", title="Performance of Artificial Intelligence Imaging Models in Detecting Dermatological Manifestations in Higher Fitzpatrick Skin Color Classifications", journal="JMIR Dermatol", year="2021", month="Oct", day="12", volume="4", number="2", pages="e31697", keywords="deep learning", keywords="melanoma", keywords="basal cell carcinoma", keywords="skin of color", keywords="image recognition", keywords="dermatology", keywords="disease", keywords="convolutional neural network", keywords="specificity", keywords="prediction", keywords="artificial intelligence", keywords="skin color", keywords="skin tone", abstract="Background: The performance of deep-learning image recognition models is below par when applied to images with Fitzpatrick classification skin types 4 and 5. Objective: The objective of this research was to assess whether image recognition models perform differently when differentiating between dermatological diseases in individuals with darker skin color (Fitzpatrick skin types 4 and 5) than when differentiating between the same dermatological diseases in Caucasians (Fitzpatrick skin types 1, 2, and 3) when both models are trained on the same number of images. Methods: Two image recognition models were trained, validated, and tested. The goal of each model was to differentiate between melanoma and basal cell carcinoma. Open-source images of melanoma and basal cell carcinoma were acquired from the Hellenic Dermatological Atlas, the Dermatology Atlas, the Interactive Dermatology Atlas, and DermNet NZ. Results: The image recognition models trained and validated on images with light skin color had higher sensitivity, specificity, positive predictive value, negative predictive value, and F1 score than the image recognition models trained and validated on images of skin of color for differentiation between melanoma and basal cell carcinoma. Conclusions: A higher number of images of dermatological diseases in individuals with darker skin color than images of dermatological diseases in individuals with light skin color would need to be gathered for artificial intelligence models to perform equally well. ", doi="10.2196/31697", url="https://derma.jmir.org/2021/2/e31697", url="http://www.ncbi.nlm.nih.gov/pubmed/37632853" } @Article{info:doi/10.2196/32444, author="Chen, Hung-Chang and Tzeng, Shin-Shi and Hsiao, Yen-Chang and Chen, Ruei-Feng and Hung, Erh-Chien and Lee, K. Oscar", title="Smartphone-Based Artificial Intelligence--Assisted Prediction for Eyelid Measurements: Algorithm Development and Observational Validation Study", journal="JMIR Mhealth Uhealth", year="2021", month="Oct", day="8", volume="9", number="10", pages="e32444", keywords="artificial intelligence", keywords="AI", keywords="deep learning", keywords="margin reflex distance 1", keywords="margin reflex distance 2", keywords="levator muscle function", keywords="smartphone", keywords="measurement", keywords="eye", keywords="prediction", keywords="processing", keywords="limit", keywords="image", keywords="algorithm", keywords="observational", abstract="Background: Margin reflex distance 1 (MRD1), margin reflex distance 2 (MRD2), and levator muscle function (LF) are crucial metrics for ptosis evaluation and management. However, manual measurements of MRD1, MRD2, and LF are time-consuming, subjective, and prone to human error. Smartphone-based artificial intelligence (AI) image processing is a potential solution to overcome these limitations. Objective: We propose the first smartphone-based AI-assisted image processing algorithm for MRD1, MRD2, and LF measurements. Methods: This observational study included 822 eyes of 411 volunteers aged over 18 years from August 1, 2020, to April 30, 2021. Six orbital photographs (bilateral primary gaze, up-gaze, and down-gaze) were taken using a smartphone (iPhone 11 Pro Max). The gold-standard measurements and normalized eye photographs were obtained from these orbital photographs and compiled using AI-assisted software to create MRD1, MRD2, and LF models. Results: The Pearson correlation coefficients between the gold-standard measurements and the predicted values obtained with the MRD1 and MRD2 models were excellent (r=0.91 and 0.88, respectively) and that obtained with the LF model was good (r=0.73). The intraclass correlation coefficient demonstrated excellent agreement between the gold-standard measurements and the values predicted by the MRD1 and MRD2 models (0.90 and 0.84, respectively), and substantial agreement with the LF model (0.69). The mean absolute errors were 0.35 mm, 0.37 mm, and 1.06 mm for the MRD1, MRD2, and LF models, respectively. The 95\% limits of agreement were --0.94 to 0.94 mm for the MRD1 model, --0.92 to 1.03 mm for the MRD2 model, and --0.63 to 2.53 mm for the LF model. Conclusions: We developed the first smartphone-based AI-assisted image processing algorithm for eyelid measurements. MRD1, MRD2, and LF measures can be taken in a quick, objective, and convenient manner. Furthermore, by using a smartphone, the examiner can check these measurements anywhere and at any time, which facilitates data collection. ", doi="10.2196/32444", url="https://mhealth.jmir.org/2021/10/e32444", url="http://www.ncbi.nlm.nih.gov/pubmed/34538776" } @Article{info:doi/10.2196/27177, author="Yun, Donghwan and Cho, Semin and Kim, Chul Yong and Kim, Ki Dong and Oh, Kook-Hwan and Joo, Wook Kwon and Kim, Su Yon and Han, Seok Seung", title="Use of Deep Learning to Predict Acute Kidney Injury After Intravenous Contrast Media Administration: Prediction Model Development Study", journal="JMIR Med Inform", year="2021", month="Oct", day="1", volume="9", number="10", pages="e27177", keywords="acute kidney injury", keywords="artificial intelligence", keywords="contrast media", keywords="deep learning", keywords="machine learning", keywords="kidney injury", keywords="computed tomography", abstract="Background: Precise prediction of contrast media--induced acute kidney injury (CIAKI) is an important issue because of its relationship with poor outcomes. Objective: Herein, we examined whether a deep learning algorithm could predict the risk of intravenous CIAKI better than other machine learning and logistic regression models in patients undergoing computed tomography (CT). Methods: A total of 14,185 patients who were administered intravenous contrast media for CT at the preventive and monitoring facility in Seoul National University Hospital were reviewed. CIAKI was defined as an increase in serum creatinine of ?0.3 mg/dL within 2 days or ?50\% within 7 days. Using both time-varying and time-invariant features, machine learning models, such as the recurrent neural network (RNN), light gradient boosting machine (LGM), extreme gradient boosting machine (XGB), random forest (RF), decision tree (DT), support vector machine (SVM), $\kappa$-nearest neighbors, and logistic regression, were developed using a training set, and their performance was compared using the area under the receiver operating characteristic curve (AUROC) in a test set. Results: CIAKI developed in 261 cases (1.8\%). The RNN model had the highest AUROC of 0.755 (0.708-0.802) for predicting CIAKI, which was superior to that obtained from other machine learning models. Although CIAKI was defined as an increase in serum creatinine of ?0.5 mg/dL or ?25\% within 3 days, the highest performance was achieved in the RNN model with an AUROC of 0.716 (95\% confidence interval [CI] 0.664-0.768). In feature ranking analysis, the albumin level was the most highly contributing factor to RNN performance, followed by time-varying kidney function. Conclusions: Application of a deep learning algorithm improves the predictability of intravenous CIAKI after CT, representing a basis for future clinical alarming and preventive systems. ", doi="10.2196/27177", url="https://medinform.jmir.org/2021/10/e27177", url="http://www.ncbi.nlm.nih.gov/pubmed/34596574" } @Article{info:doi/10.2196/27122, author="Zhai, Huiwen and Yang, Xin and Xue, Jiaolong and Lavender, Christopher and Ye, Tiantian and Li, Ji-Bin and Xu, Lanyang and Lin, Li and Cao, Weiwei and Sun, Ying", title="Radiation Oncologists' Perceptions of Adopting an Artificial Intelligence--Assisted Contouring Technology: Model Development and Questionnaire Study", journal="J Med Internet Res", year="2021", month="Sep", day="30", volume="23", number="9", pages="e27122", keywords="artificial intelligence", keywords="technology acceptance model", keywords="intension", keywords="resistance", abstract="Background: An artificial intelligence (AI)--assisted contouring system benefits radiation oncologists by saving time and improving treatment accuracy. Yet, there is much hope and fear surrounding such technologies, and this fear can manifest as resistance from health care professionals, which can lead to the failure of AI projects. Objective: The objective of this study was to develop and test a model for investigating the factors that drive radiation oncologists' acceptance of AI contouring technology in a Chinese context. Methods: A model of AI-assisted contouring technology acceptance was developed based on the Unified Theory of Acceptance and Use of Technology (UTAUT) model by adding the variables of perceived risk and resistance that were proposed in this study. The model included 8 constructs with 29 questionnaire items. A total of 307 respondents completed the questionnaires. Structural equation modeling was conducted to evaluate the model's path effects, significance, and fitness. Results: The overall fitness indices for the model were evaluated and showed that the model was a good fit to the data. Behavioral intention was significantly affected by performance expectancy ($\beta$=.155; P=.01), social influence ($\beta$=.365; P<.001), and facilitating conditions ($\beta$=.459; P<.001). Effort expectancy ($\beta$=.055; P=.45), perceived risk ($\beta$=?.048; P=.35), and resistance bias ($\beta$=?.020; P=.63) did not significantly affect behavioral intention. Conclusions: The physicians' overall perceptions of an AI-assisted technology for radiation contouring were high. Technology resistance among Chinese radiation oncologists was low and not related to behavioral intention. Not all of the factors in the Venkatesh UTAUT model applied to AI technology adoption among physicians in a Chinese context. ", doi="10.2196/27122", url="https://www.jmir.org/2021/9/e27122", url="http://www.ncbi.nlm.nih.gov/pubmed/34591029" } @Article{info:doi/10.2196/29678, author="Park, Jung Chae and Cho, Sang Young and Chung, Jin Myung and Kim, Yi-Kyung and Kim, Hyung-Jin and Kim, Kyunga and Ko, Jae-Wook and Chung, Won-Ho and Cho, Hwan Baek", title="A Fully Automated Analytic System for Measuring Endolymphatic Hydrops Ratios in Patients With M{\'e}ni{\`e}re Disease via Magnetic Resonance Imaging: Deep Learning Model Development Study", journal="J Med Internet Res", year="2021", month="Sep", day="21", volume="23", number="9", pages="e29678", keywords="deep learning", keywords="magnetic resonance imaging", keywords="medical image segmentation", keywords="M{\'e}ni{\`e}re disease", keywords="inner ear", keywords="endolymphatic hydrops", keywords="artificial intelligence", keywords="machine learning", keywords="multi-class segmentation", keywords="convolutional neural network", keywords="end-to-end system", keywords="clinician support", keywords="clinical decision support system", keywords="image selection", keywords="clinical usability", keywords="automation", abstract="Background: Recently, the analysis of endolymphatic hydropses (EHs) via inner ear magnetic resonance imaging (MRI) for patients with M{\'e}ni{\`e}re disease has been attempted in various studies. In addition, artificial intelligence has rapidly been incorporated into the medical field. In our previous studies, an automated algorithm for EH analysis was developed by using a convolutional neural network. However, several limitations existed, and further studies were conducted to compensate for these limitations. Objective: The aim of this study is to develop a fully automated analytic system for measuring EH ratios that enhances EH analysis accuracy and clinical usability when studying M{\'e}ni{\`e}re disease via MRI. Methods: We proposed the 3into3Inception and 3intoUNet networks. Their network architectures were based on those of the Inception-v3 and U-Net networks, respectively. The developed networks were trained for inner ear segmentation by using the magnetic resonance images of 124 people and were embedded in a new, automated EH analysis system---inner-ear hydrops estimation via artificial intelligence (INHEARIT)-version 2 (INHEARIT-v2). After fivefold cross-validation, an additional test was performed by using 60 new, unseen magnetic resonance images to evaluate the performance of our system. The INHEARIT-v2 system has a new function that automatically selects representative images from a full MRI stack. Results: The average segmentation performance of the fivefold cross-validation was measured via the intersection of union method, resulting in performance values of 0.743 (SD 0.030) for the 3into3Inception network and 0.811 (SD 0.032) for the 3intoUNet network. The representative magnetic resonance slices (ie, from a data set of unseen magnetic resonance images) that were automatically selected by the INHEARIT-v2 system only differed from a maximum of 2 expert-selected slices. After comparing the ratios calculated by experienced physicians and those calculated by the INHEARIT-v2 system, we found that the average intraclass correlation coefficient for all cases was 0.941; the average intraclass correlation coefficient of the vestibules was 0.968, and that of the cochleae was 0.914. The time required for the fully automated system to accurately analyze EH ratios based on a patient's MRI stack was approximately 3.5 seconds. Conclusions: In this study, a fully automated full-stack magnetic resonance analysis system for measuring EH ratios was developed (named INHEARIT-v2), and the results showed that there was a high correlation between the expert-calculated EH ratio values and those calculated by the INHEARIT-v2 system. The system is an upgraded version of the INHEARIT system; it has higher segmentation performance and automatically selects representative images from an MRI stack. The new model can help clinicians by providing objective analysis results and reducing the workload for interpreting magnetic resonance images. ", doi="10.2196/29678", url="https://www.jmir.org/2021/9/e29678", url="http://www.ncbi.nlm.nih.gov/pubmed/34546181" } @Article{info:doi/10.2196/27414, author="Saeed, Q. Ali and Sheikh Abdullah, Huda Siti Norul and Che-Hamzah, Jemaima and Abdul Ghani, Tarmizi Ahmad", title="Accuracy of Using Generative Adversarial Networks for Glaucoma Detection: Systematic Review and Bibliometric Analysis", journal="J Med Internet Res", year="2021", month="Sep", day="21", volume="23", number="9", pages="e27414", keywords="glaucoma", keywords="generative adversarial network", keywords="deep learning", keywords="systematic literature review", keywords="retinal disease", keywords="blood vessels", keywords="optic disc", abstract="Background: Glaucoma leads to irreversible blindness. Globally, it is the second most common retinal disease that leads to blindness, slightly less common than cataracts. Therefore, there is a great need to avoid the silent growth of this disease using recently developed generative adversarial networks (GANs). Objective: This paper aims to introduce a GAN technology for the diagnosis of eye disorders, particularly glaucoma. This paper illustrates deep adversarial learning as a potential diagnostic tool and the challenges involved in its implementation. This study describes and analyzes many of the pitfalls and problems that researchers will need to overcome to implement this kind of technology. Methods: To organize this review comprehensively, articles and reviews were collected using the following keywords: (``Glaucoma,'' ``optic disc,'' ``blood vessels'') and (``receptive field,'' ``loss function,'' ``GAN,'' ``Generative Adversarial Network,'' ``Deep learning,'' ``CNN,'' ``convolutional neural network'' OR encoder). The records were identified from 5 highly reputed databases: IEEE Xplore, Web of Science, Scopus, ScienceDirect, and PubMed. These libraries broadly cover the technical and medical literature. Publications within the last 5 years, specifically 2015-2020, were included because the target GAN technique was invented only in 2014 and the publishing date of the collected papers was not earlier than 2016. Duplicate records were removed, and irrelevant titles and abstracts were excluded. In addition, we excluded papers that used optical coherence tomography and visual field images, except for those with 2D images. A large-scale systematic analysis was performed, and then a summarized taxonomy was generated. Furthermore, the results of the collected articles were summarized and a visual representation of the results was presented on a T-shaped matrix diagram. This study was conducted between March 2020 and November 2020. Results: We found 59 articles after conducting a comprehensive survey of the literature. Among the 59 articles, 30 present actual attempts to synthesize images and provide accurate segmentation/classification using single/multiple landmarks or share certain experiences. The other 29 articles discuss the recent advances in GANs, do practical experiments, and contain analytical studies of retinal disease. Conclusions: Recent deep learning techniques, namely GANs, have shown encouraging performance in retinal disease detection. Although this methodology involves an extensive computing budget and optimization process, it saturates the greedy nature of deep learning techniques by synthesizing images and solves major medical issues. This paper contributes to this research field by offering a thorough analysis of existing works, highlighting current limitations, and suggesting alternatives to support other researchers and participants in further improving and strengthening future work. Finally, new directions for this research have been identified. ", doi="10.2196/27414", url="https://www.jmir.org/2021/9/e27414", url="http://www.ncbi.nlm.nih.gov/pubmed/34236992" } @Article{info:doi/10.2196/26025, author="Huang, Kai and Jiang, Zixi and Li, Yixin and Wu, Zhe and Wu, Xian and Zhu, Wu and Chen, Mingliang and Zhang, Yu and Zuo, Ke and Li, Yi and Yu, Nianzhou and Liu, Siliang and Huang, Xing and Su, Juan and Yin, Mingzhu and Qian, Buyue and Wang, Xianggui and Chen, Xiang and Zhao, Shuang", title="The Classification of Six Common Skin Diseases Based on Xiangya-Derm: Development of a Chinese Database for Artificial Intelligence", journal="J Med Internet Res", year="2021", month="Sep", day="21", volume="23", number="9", pages="e26025", keywords="artificial intelligence", keywords="skin disease", keywords="convolutional neural network", keywords="medical image processing", keywords="automatic auxiliary diagnoses", keywords="dermatology", keywords="skin", keywords="classification", keywords="China", abstract="Background: Skin and subcutaneous disease is the fourth-leading cause of the nonfatal disease burden worldwide and constitutes one of the most common burdens in primary care. However, there is a severe lack of dermatologists, particularly in rural Chinese areas. Furthermore, although artificial intelligence (AI) tools can assist in diagnosing skin disorders from images, the database for the Chinese population is limited. Objective: This study aims to establish a database for AI based on the Chinese population and presents an initial study on six common skin diseases. Methods: Each image was captured with either a digital camera or a smartphone, verified by at least three experienced dermatologists and corresponding pathology information, and finally added to the Xiangya-Derm database. Based on this database, we conducted AI-assisted classification research on six common skin diseases and then proposed a network called Xy-SkinNet. Xy-SkinNet applies a two-step strategy to identify skin diseases. First, given an input image, we segmented the regions of the skin lesion. Second, we introduced an information fusion block to combine the output of all segmented regions. We compared the performance with 31 dermatologists of varied experiences. Results: Xiangya-Derm, as a new database that consists of over 150,000 clinical images of 571 different skin diseases in the Chinese population, is the largest and most diverse dermatological data set of the Chinese population. The AI-based six-category classification achieved a top 3 accuracy of 84.77\%, which exceeded the average accuracy of dermatologists (78.15\%). Conclusions: Xiangya-Derm, the largest database for the Chinese population, was created. The classification of six common skin conditions was conducted based on Xiangya-Derm to lay a foundation for product research. ", doi="10.2196/26025", url="https://www.jmir.org/2021/9/e26025", url="http://www.ncbi.nlm.nih.gov/pubmed/34546174" } @Article{info:doi/10.2196/24081, author="Liu, Songxiang and Xie, Mao and Zhang, Zhicai and Wu, Xinghuo and Gao, Fei and Lu, Lin and Zhang, Jiayao and Xie, Yi and Yang, Fan and Ye, Zhewei", title="A 3D Hologram With Mixed Reality Techniques to Improve Understanding of Pulmonary Lesions Caused by COVID-19: Randomized Controlled Trial", journal="J Med Internet Res", year="2021", month="Sep", day="10", volume="23", number="9", pages="e24081", keywords="COVID-19", keywords="mixed reality", keywords="hologram", keywords="pulmonary", keywords="lesion", keywords="diagnostic", keywords="imaging", abstract="Background: The COVID-19 outbreak has now become a pandemic and has had a serious adverse impact on global public health. The effect of COVID-19 on the lungs can be determined through 2D computed tomography (CT) imaging, which requires a high level of spatial imagination on the part of the medical provider. Objective: The purpose of this study is to determine whether viewing a 3D hologram with mixed reality techniques can improve medical professionals' understanding of the pulmonary lesions caused by COVID-19. Methods: The study involved 60 participants, including 20 radiologists, 20 surgeons, and 20 medical students. Each of the three groups was randomly divided into two groups, either the 2D CT group (n=30; mean age 29 years [range 19-38 years]; males=20) or the 3D holographic group (n=30; mean age 30 years [range 20=38 years]; males=20). The two groups completed the same task, which involved identifying lung lesions caused by COVID-19 for 6 cases using a 2D CT or 3D hologram. Finally, an independent radiology professor rated the participants' performance (out of 100). All participants in two groups completed a Likert scale questionnaire regarding the educational utility and efficiency of 3D holograms. The National Aeronautics and Space Administration Task Load Index (NASA-TLX) was completed by all participants. Results: The mean task score of the 3D hologram group (mean 91.98, SD 2.45) was significantly higher than that of the 2D CT group (mean 74.09, SD 7.59; P<.001). With the help of 3D holograms, surgeons and medical students achieved the same score as radiologists and made obvious progress in identifying pulmonary lesions caused by COVID-19. The Likert scale questionnaire results showed that the 3D hologram group had superior results compared to the 2D CT group (teaching: 2D CT group median 2, IQR 1-2 versus 3D group median 5, IQR 5-5; P<.001; understanding and communicating: 2D CT group median 1, IQR 1-1 versus 3D group median 5, IQR 5-5; P<.001; increasing interest: 2D CT group median 2, IQR 2-2 versus 3D group median 5, IQR 5-5; P<.001; lowering the learning curve: 2D CT group median 2, IQR 1-2 versus 3D group median 4, IQR 4-5; P<.001; spatial awareness: 2D CT group median 2, IQR 1-2 versus 3D group median 5, IQR 5-5; P<.001; learning: 2D CT group median 3, IQR 2-3 versus 3D group median 5, IQR 5-5; P<.001). The 3D group scored significantly lower than the 2D CT group for the ``mental,'' ``temporal,'' ``performance,'' and ``frustration'' subscales on the NASA-TLX. Conclusions: A 3D hologram with mixed reality techniques can be used to help medical professionals, especially medical students and newly hired doctors, better identify pulmonary lesions caused by COVID-19. It can be used in medical education to improve spatial awareness, increase interest, improve understandability, and lower the learning curve. Trial Registration: Chinese Clinical Trial Registry ChiCTR2100045845; http://www.chictr.org.cn/showprojen.aspx?proj=125761 ", doi="10.2196/24081", url="https://www.jmir.org/2021/9/e24081", url="http://www.ncbi.nlm.nih.gov/pubmed/34061760" } @Article{info:doi/10.2196/28776, author="Kulkarni, Viraj and Gawali, Manish and Kharat, Amit", title="Key Technology Considerations in Developing and Deploying Machine Learning Models in Clinical Radiology Practice", journal="JMIR Med Inform", year="2021", month="Sep", day="9", volume="9", number="9", pages="e28776", keywords="artificial intelligence", keywords="AI", keywords="machine learning", keywords="deep learning", keywords="radiology", keywords="privacy", keywords="neural networks", keywords="deployment", doi="10.2196/28776", url="https://medinform.jmir.org/2021/9/e28776", url="http://www.ncbi.nlm.nih.gov/pubmed/34499049" } @Article{info:doi/10.2196/27235, author="Chang, Panchun and Dang, Jun and Dai, Jianrong and Sun, Wenzheng", title="Real-Time Respiratory Tumor Motion Prediction Based on a Temporal Convolutional Neural Network: Prediction Model Development Study", journal="J Med Internet Res", year="2021", month="Aug", day="27", volume="23", number="8", pages="e27235", keywords="radiation therapy", keywords="temporal convolutional neural network", keywords="respiratory signal prediction", keywords="neural network", keywords="deep learning model", keywords="dynamic tracking", abstract="Background: The dynamic tracking of tumors with radiation beams in radiation therapy requires the prediction of real-time target locations prior to beam delivery, as treatment involving radiation beams and gating tracking results in time latency. Objective: In this study, a deep learning model that was based on a temporal convolutional neural network was developed to predict internal target locations by using multiple external markers. Methods: Respiratory signals from 69 treatment fractions of 21 patients with cancer who were treated with the CyberKnife Synchrony device (Accuray Incorporated) were used to train and test the model. The reported model's performance was evaluated by comparing the model to a long short-term memory model in terms of the root mean square errors (RMSEs) of real and predicted respiratory signals. The effect of the number of external markers was also investigated. Results: The average RMSEs of predicted (ahead time=400 ms) respiratory motion in the superior-inferior, anterior-posterior, and left-right directions and in 3D space were 0.49 mm, 0.28 mm, 0.25 mm, and 0.67 mm, respectively. Conclusions: The experiment results demonstrated that the temporal convolutional neural network--based respiratory prediction model could predict respiratory signals with submillimeter accuracy. ", doi="10.2196/27235", url="https://www.jmir.org/2021/8/e27235", url="http://www.ncbi.nlm.nih.gov/pubmed/34236336" } @Article{info:doi/10.2196/25290, author="Noriega, Alejandro and Meizner, Daniela and Camacho, Dalia and Enciso, Jennifer and Quiroz-Mercado, Hugo and Morales-Canton, Virgilio and Almaatouq, Abdullah and Pentland, Alex", title="Screening Diabetic Retinopathy Using an Automated Retinal Image Analysis System in Independent and Assistive Use Cases in Mexico: Randomized Controlled Trial", journal="JMIR Form Res", year="2021", month="Aug", day="26", volume="5", number="8", pages="e25290", keywords="diabetic retinopathy", keywords="automated diagnosis", keywords="retina", keywords="fundus image analysis", abstract="Background: The automated screening of patients at risk of developing diabetic retinopathy represents an opportunity to improve their midterm outcome and lower the public expenditure associated with direct and indirect costs of common sight-threatening complications of diabetes. Objective: This study aimed to develop and evaluate the performance of an automated deep learning--based system to classify retinal fundus images as referable and nonreferable diabetic retinopathy cases, from international and Mexican patients. In particular, we aimed to evaluate the performance of the automated retina image analysis (ARIA) system under an independent scheme (ie, only ARIA screening) and 2 assistive schemes (ie, hybrid ARIA plus ophthalmologist screening), using a web-based platform for remote image analysis to determine and compare the sensibility and specificity of the 3 schemes. Methods: A randomized controlled experiment was performed where 17 ophthalmologists were asked to classify a series of retinal fundus images under 3 different conditions. The conditions were to (1) screen the fundus image by themselves (solo); (2) screen the fundus image after exposure to the retina image classification of the ARIA system (ARIA answer); and (3) screen the fundus image after exposure to the classification of the ARIA system, as well as its level of confidence and an attention map highlighting the most important areas of interest in the image according to the ARIA system (ARIA explanation). The ophthalmologists' classification in each condition and the result from the ARIA system were compared against a gold standard generated by consulting and aggregating the opinion of 3 retina specialists for each fundus image. Results: The ARIA system was able to classify referable vs nonreferable cases with an area under the receiver operating characteristic curve of 98\%, a sensitivity of 95.1\%, and a specificity of 91.5\% for international patient cases. There was an area under the receiver operating characteristic curve of 98.3\%, a sensitivity of 95.2\%, and a specificity of 90\% for Mexican patient cases. The ARIA system performance was more successful than the average performance of the 17 ophthalmologists enrolled in the study. Additionally, the results suggest that the ARIA system can be useful as an assistive tool, as sensitivity was significantly higher in the experimental condition where ophthalmologists were exposed to the ARIA system's answer prior to their own classification (93.3\%), compared with the sensitivity of the condition where participants assessed the images independently (87.3\%; P=.05). Conclusions: These results demonstrate that both independent and assistive use cases of the ARIA system present, for Latin American countries such as Mexico, a substantial opportunity toward expanding the monitoring capacity for the early detection of diabetes-related blindness. ", doi="10.2196/25290", url="https://formative.jmir.org/2021/8/e25290", url="http://www.ncbi.nlm.nih.gov/pubmed/34435963" } @Article{info:doi/10.2196/29682, author="Bang, Seok Chang and Lee, Jun Jae and Baik, Ho Gwang", title="Computer-Aided Diagnosis of Diminutive Colorectal Polyps in Endoscopic Images: Systematic Review and Meta-analysis of Diagnostic Test Accuracy", journal="J Med Internet Res", year="2021", month="Aug", day="25", volume="23", number="8", pages="e29682", keywords="artificial intelligence", keywords="deep learning", keywords="polyps", keywords="colon", keywords="colonoscopy", keywords="diminutive", abstract="Background: Most colorectal polyps are diminutive and benign, especially those in the rectosigmoid colon, and the resection of these polyps is not cost-effective. Advancements in image-enhanced endoscopy have improved the optical prediction of colorectal polyp histology. However, subjective interpretability and inter- and intraobserver variability prohibits widespread implementation. The number of studies on computer-aided diagnosis (CAD) is increasing; however, their small sample sizes limit statistical significance. Objective: This review aims to evaluate the diagnostic test accuracy of CAD models in predicting the histology of diminutive colorectal polyps by using endoscopic images. Methods: Core databases were searched for studies that were based on endoscopic imaging, used CAD models for the histologic diagnosis of diminutive colorectal polyps, and presented data on diagnostic performance. A systematic review and diagnostic test accuracy meta-analysis were performed. Results: Overall, 13 studies were included. The pooled area under the curve, sensitivity, specificity, and diagnostic odds ratio of CAD models for the diagnosis of diminutive colorectal polyps (adenomatous or neoplastic vs nonadenomatous or nonneoplastic) were 0.96 (95\% CI 0.93-0.97), 0.93 (95\% CI 0.91-0.95), 0.87 (95\% CI 0.76-0.93), and 87 (95\% CI 38-201), respectively. The meta-regression analysis showed no heterogeneity, and no publication bias was detected. Subgroup analyses showed robust results. The negative predictive value of CAD models for the diagnosis of adenomatous polyps in the rectosigmoid colon was 0.96 (95\% CI 0.95-0.97), and this value exceeded the threshold of the diagnosis and leave strategy. Conclusions: CAD models show potential for the optical histological diagnosis of diminutive colorectal polyps via the use of endoscopic images. Trial Registration: PROSPERO CRD42021232189; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=232189 ", doi="10.2196/29682", url="https://www.jmir.org/2021/8/e29682", url="http://www.ncbi.nlm.nih.gov/pubmed/34432643" } @Article{info:doi/10.2196/20815, author="Hutchinson, Claire and Brereton, Michelle and Adams, Julie and De La Salle, Barbara and Sims, Jon and Hyde, Keith and Chasty, Richard and Brown, Rachel and Rees-Unwin, Karen and Burthem, John", title="The Use and Effectiveness of an Online Diagnostic Support System for Blood Film Interpretation: Comparative Observational Study", journal="J Med Internet Res", year="2021", month="Aug", day="9", volume="23", number="8", pages="e20815", keywords="blood cell morphology", keywords="decision support", keywords="external quality assessment in hematology", keywords="diagnosis", keywords="digital morphology", keywords="morphology education", abstract="Background: The recognition and interpretation of abnormal blood cell morphology is often the first step in diagnosing underlying serious systemic illness or leukemia. Supporting the staff who interpret blood film morphology is therefore essential for a safe laboratory service. This paper describes an open-access, web-based decision support tool, developed by the authors to support morphological diagnosis, arising from earlier studies identifying mechanisms of error in blood film reporting. The effectiveness of this intervention was assessed using the unique resource offered by the online digital morphology Continuing Professional Development scheme (DM scheme) offered by the UK National External Quality Assessment Service for Haematology, with more than 3000 registered users. This allowed the effectiveness of decision support to be tested within a defined user group, each of whom viewed and interpreted the morphology of identical digital blood films. Objective: The primary objective of the study was to test the effectiveness of the decision support system in supporting users to identify and interpret abnormal morphological features. The secondary objective was to determine the pattern and frequency of use of the system for different case types, and to determine how users perceived the support in terms of their confidence in decision-making. Methods: This was a comparative study of identical blood films evaluated either with or without decision support. Selected earlier cases from the DM scheme were rereleased as new cases but with decision support made available; this allowed a comparison of data sets for identical cases with or without decision support. To address the primary objectives, the study used quantitative evaluation and statistical comparisons of the identification and interpretation of morphological features between the two different case releases. To address the secondary objective, the use of decision support was assessed using web analytical tools, while a questionnaire was used to assess user perceptions of the system. Results: Cases evaluated with the aid of decision support had significantly improved accuracy of identification for relevant morphological features (mean improvement 9.8\%) and the interpretation of those features (mean improvement 11\%). The improvement was particularly significant for cases with higher complexity or for rarer diagnoses. Analysis of website usage demonstrated a high frequency of access for web pages relevant to each case (mean 9298 for each case, range 2661-24,276). Users reported that the decision support website increased their confidence for feature identification (4.8/5) and interpretation (4.3/5), both within the context of training (4.6/5) and also in their wider laboratory practice (4.4/5). Conclusions: The findings of this study demonstrate that directed online decision support for blood morphology evaluation improves accuracy and confidence in the context of educational evaluation of digital films, with effectiveness potentially extending to wider laboratory use. ", doi="10.2196/20815", url="https://www.jmir.org/2021/8/e20815", url="http://www.ncbi.nlm.nih.gov/pubmed/34383663" } @Article{info:doi/10.2196/26149, author="Zhang, Jia and Mihai, Carina and T{\"u}shaus, Laura and Scebba, Gaetano and Distler, Oliver and Karlen, Walter", title="Wound Image Quality From a Mobile Health Tool for Home-Based Chronic Wound Management With Real-Time Quality Feedback: Randomized Feasibility Study", journal="JMIR Mhealth Uhealth", year="2021", month="Jul", day="30", volume="9", number="7", pages="e26149", keywords="data quality", keywords="remote assessment", keywords="digital ulcers", keywords="scleroderma", keywords="mobile app", keywords="digital health", keywords="ehealth", keywords="mhealth", keywords="telemedicine", keywords="teledermatology", abstract="Background: Travel to clinics for chronic wound management is burdensome to patients. Remote assessment and management of wounds using mobile and telehealth approaches can reduce this burden and improve patient outcomes. An essential step in wound documentation is the capture of wound images, but poor image quality can have a negative influence on the reliability of the assessment. To date, no study has investigated the quality of remotely acquired wound images and whether these are suitable for wound self-management and telemedical interpretation of wound status. Objective: Our goal was to develop a mobile health (mHealth) tool for the remote self-assessment of digital ulcers (DUs) in patients with systemic sclerosis (SSc). We aimed to define and validate objective measures for assessing the image quality, evaluate whether an automated feedback feature based on real-time assessment of image quality improves the overall quality of acquired wound images, and evaluate the feasibility of deploying the mHealth tool for home-based chronic wound self-monitoring by patients with SSc. Methods: We developed an mHealth tool composed of a wound imaging and management app, a custom color reference sticker, and a smartphone holder. We introduced 2 objective image quality parameters based on the sharpness and presence of the color checker to assess the quality of the image during acquisition and enable a quality feedback mechanism in an advanced version of the app. We randomly assigned patients with SSc and DU to the 2 device groups (basic and feedback) to self-document their DU at home over 8 weeks. The color checker detection ratio (CCDR) and color checker sharpness (CCS) were compared between the 2 groups. We evaluated the feasibility of the mHealth tool by analyzing the usability feedback from questionnaires, user behavior and timings, and the overall quality of the wound images. Results: A total of 21 patients were enrolled, of which 15 patients were included in the image quality analysis. The average CCDR was 0.96 (191/199) in the feedback group and 0.86 (158/183) in the basic group. The feedback group showed significantly higher (P<.001) CCS compared to the basic group. The usability questionnaire results showed that the majority of patients were satisfied with the tool, but could benefit from disease-specific adaptations. The median assessment duration was <50 seconds in all patients, indicating the mHealth tool was efficient to use and could be integrated into the daily routine of patients. Conclusions: We developed an mHealth tool that enables patients with SSc to acquire good-quality DU images and demonstrated that it is feasible to deploy such an app in this patient group. The feedback mechanism improved the overall image quality. The introduced technical solutions consist of a further step towards reliable and trustworthy digital health for home-based self-management of wounds. ", doi="10.2196/26149", url="https://mhealth.jmir.org/2021/7/e26149", url="http://www.ncbi.nlm.nih.gov/pubmed/34328440" } @Article{info:doi/10.2196/23799, author="He, Xianying and Wang, Linlin and Wang, Li and Gao, Jinghong and Cui, Fangfang and Ma, Qianqian and Zhang, Wenjie and Wang, Lin and Zhai, Yunkai and Zhao, Jie", title="Effectiveness of a Cloud-Based Telepathology System in China: Large-Sample Observational Study", journal="J Med Internet Res", year="2021", month="Jul", day="29", volume="23", number="7", pages="e23799", keywords="telepathology", keywords="cloud-based system", keywords="whole-slide imaging", keywords="turnaround time", keywords="diagnostic accuracy", keywords="economic benefits", abstract="Background: Whole-slide imaging allows the entire slide to be viewed in a manner that simulates microscopy; therefore, it is widely used in telepathology. However, managing the large digital files needed for whole-slide imaging is difficult. To solve this problem, we set up the Chinese National Cloud-Based Telepathology System (CNCTPS). CNCTPS has been running for more than 4 years and has accumulated a large amount of data. Objective: The main purpose of this study was to comprehensively evaluate the effectiveness of the CNCTPS based on a large sample. The evaluation indicators included service volume, turnaround time, diagnosis accuracy, and economic benefits. Methods: Details of 23,167 cases submitted to the CNCTPS from January 2016 to December 2019 were collected to analyze the service volume, turnaround time, and economic benefits. A total of 564 patients who visited the First Affiliated Hospital of Zhengzhou University and obtained final diagnoses were followed up to analyze the diagnostic accuracy of the CNCTPS. Results: From 2016 to 2019, the service volume of the CNCTPS increased from 2335 to 9240, and the number of participating hospitals increased from 60 to 74. Consultation requests from county-level hospitals accounted for 86.57\% (20,287/23,167). A total of 17,495 of 23,167 cases (75.52\%) were confirmed, including 12,088 benign lesions, 5217 malignant lesions, and 190 borderline lesions. Of the cases, 3.85\% (893/23,167) failed to be diagnosed for reasons such as poor slice quality and incomplete sampling. The median turnaround time was 16.93 hours and was shortened yearly (between 2018 and 2019: adjusted P=.01; other groups: adjusted P<.001); 82.88\% cases were diagnosed in 48 hours. There was a discrepancy between the diagnosis and final diagnosis for 11 cases, including 4 false-positive cases and 7 false-negative cases. The sensitivity and specificity were 97.66\% and 98.49\%, respectively. The diagnostic accuracy of the system was 98.05\%, with no statistical difference from the final diagnosis in the hospital (P=.55). By using this system, a total of US \$300,000 was saved for patients every year. Conclusions: The novel cloud-based telepathology system has the potential to relieve the shortage of pathologists in primary hospitals. It can also simultaneously reduce medical costs for patients in China. It should, therefore, be further promoted to enhance the efficiency, quantity, and quality of telepathology diagnoses. ", doi="10.2196/23799", url="https://www.jmir.org/2021/7/e23799", url="http://www.ncbi.nlm.nih.gov/pubmed/34326037" } @Article{info:doi/10.2196/26000, author="Cha, KyeongMin and Woo, Hyun-Ki and Park, Dohyun and Chang, Kyung Dong and Kang, Mira", title="Effects of Background Colors, Flashes, and Exposure Values on the Accuracy of a Smartphone-Based Pill Recognition System Using a Deep Convolutional Neural Network: Deep Learning and Experimental Approach", journal="JMIR Med Inform", year="2021", month="Jul", day="28", volume="9", number="7", pages="e26000", keywords="pill recognition", keywords="deep neural network", keywords="image processing", keywords="color space", keywords="color difference", keywords="pharmaceutical", keywords="imaging", keywords="photography", keywords="neural network", keywords="mobile phone", abstract="Background: Pill image recognition systems are difficult to develop due to differences in pill color, which are influenced by external factors such as the illumination from and the presence of a flash. Objective: In this study, the differences in color between reference images and real-world images were measured to determine the accuracy of a pill recognition system under 12 real-world conditions (ie, different background colors, the presence and absence of a flash, and different exposure values [EVs]). Methods: We analyzed 19 medications with different features (ie, different colors, shapes, and dosages). The average color difference was calculated based on the color distance between a reference image and a real-world image. Results: For images with black backgrounds, as the EV decreased, the top-1 and top-5 accuracies increased independently of the presence of a flash. The top-5 accuracy for images with black backgrounds increased from 26.8\% to 72.6\% when the flash was on and increased from 29.5\% to 76.8\% when the flash was off as the EV decreased. However, the top-5 accuracy increased from 62.1\% to 78.4\% for images with white backgrounds when the flash was on. The best top-1 accuracy was 51.1\% (white background; flash on; EV of +2.0). The best top-5 accuracy was 78.4\% (white background; flash on; EV of 0). Conclusions: The accuracy generally increased as the color difference decreased, except for images with black backgrounds and an EV of ?2.0. This study revealed that background colors, the presence of a flash, and EVs in real-world conditions are important factors that affect the performance of a pill recognition model. ", doi="10.2196/26000", url="https://medinform.jmir.org/2021/7/e26000", url="http://www.ncbi.nlm.nih.gov/pubmed/34319239" } @Article{info:doi/10.2196/27370, author="Nazarian, Scarlet and Glover, Ben and Ashrafian, Hutan and Darzi, Ara and Teare, Julian", title="Diagnostic Accuracy of Artificial Intelligence and Computer-Aided Diagnosis for the Detection and Characterization of Colorectal Polyps: Systematic Review and Meta-analysis", journal="J Med Internet Res", year="2021", month="Jul", day="14", volume="23", number="7", pages="e27370", keywords="artificial intelligence", keywords="colonoscopy", keywords="computer-aided diagnosis", keywords="machine learning", keywords="polyp", abstract="Background: Colonoscopy reduces the incidence of colorectal cancer (CRC) by allowing detection and resection of neoplastic polyps. Evidence shows that many small polyps are missed on a single colonoscopy. There has been a successful adoption of artificial intelligence (AI) technologies to tackle the issues around missed polyps and as tools to increase the adenoma detection rate (ADR). Objective: The aim of this review was to examine the diagnostic accuracy of AI-based technologies in assessing colorectal polyps. Methods: A comprehensive literature search was undertaken using the databases of Embase, MEDLINE, and the Cochrane Library. PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) guidelines were followed. Studies reporting the use of computer-aided diagnosis for polyp detection or characterization during colonoscopy were included. Independent proportions and their differences were calculated and pooled through DerSimonian and Laird random-effects modeling. Results: A total of 48 studies were included. The meta-analysis showed a significant increase in pooled polyp detection rate in patients with the use of AI for polyp detection during colonoscopy compared with patients who had standard colonoscopy (odds ratio [OR] 1.75, 95\% CI 1.56-1.96; P<.001). When comparing patients undergoing colonoscopy with the use of AI to those without, there was also a significant increase in ADR (OR 1.53, 95\% CI 1.32-1.77; P<.001). Conclusions: With the aid of machine learning, there is potential to improve ADR and, consequently, reduce the incidence of CRC. The current generation of AI-based systems demonstrate impressive accuracy for the detection and characterization of colorectal polyps. However, this is an evolving field and before its adoption into a clinical setting, AI systems must prove worthy to patients and clinicians. Trial Registration: PROSPERO International Prospective Register of Systematic Reviews CRD42020169786; https://www.crd.york.ac.uk/prospero/display\_record.php?ID=CRD42020169786 ", doi="10.2196/27370", url="https://www.jmir.org/2021/7/e27370", url="http://www.ncbi.nlm.nih.gov/pubmed/34259645" } @Article{info:doi/10.2196/27822, author="Han, Yong and Li, Weiming and Liu, Mengmeng and Wu, Zhiyuan and Zhang, Feng and Liu, Xiangtong and Tao, Lixin and Li, Xia and Guo, Xiuhua", title="Application of an Anomaly Detection Model to Screen for Ocular Diseases Using Color Retinal Fundus Images: Design and Evaluation Study", journal="J Med Internet Res", year="2021", month="Jul", day="13", volume="23", number="7", pages="e27822", keywords="anomaly detection", keywords="artificial intelligence", keywords="cataract", keywords="diabetic retinopathy", keywords="disease screening", keywords="eye", keywords="fundus image", keywords="glaucoma", keywords="macular degeneration", keywords="ocular disease", keywords="ophthalmology", abstract="Background: The supervised deep learning approach provides state-of-the-art performance in a variety of fundus image classification tasks, but it is not applicable for screening tasks with numerous or unknown disease types. The unsupervised anomaly detection (AD) approach, which needs only normal samples to develop a model, may be a workable and cost-saving method of screening for ocular diseases. Objective: This study aimed to develop and evaluate an AD model for detecting ocular diseases on the basis of color fundus images. Methods: A generative adversarial network--based AD method for detecting possible ocular diseases was developed and evaluated using 90,499 retinal fundus images derived from 4 large-scale real-world data sets. Four other independent external test sets were used for external testing and further analysis of the model's performance in detecting 6 common ocular diseases (diabetic retinopathy [DR], glaucoma, cataract, age-related macular degeneration, hypertensive retinopathy [HR], and myopia), DR of different severity levels, and 36 categories of abnormal fundus images. The area under the receiver operating characteristic curve (AUC), accuracy, sensitivity, and specificity of the model's performance were calculated and presented. Results: Our model achieved an AUC of 0.896 with 82.69\% sensitivity and 82.63\% specificity in detecting abnormal fundus images in the internal test set, and it achieved an AUC of 0.900 with 83.25\% sensitivity and 85.19\% specificity in 1 external proprietary data set. In the detection of 6 common ocular diseases, the AUCs for DR, glaucoma, cataract, AMD, HR, and myopia were 0.891, 0.916, 0.912, 0.867, 0.895, and 0.961, respectively. Moreover, the AD model had an AUC of 0.868 for detecting any DR, 0.908 for detecting referable DR, and 0.926 for detecting vision-threatening DR. Conclusions: The AD approach achieved high sensitivity and specificity in detecting ocular diseases on the basis of fundus images, which implies that this model might be an efficient and economical tool for optimizing current clinical pathways for ophthalmologists. Future studies are required to evaluate the practical applicability of the AD approach in ocular disease screening. ", doi="10.2196/27822", url="https://www.jmir.org/2021/7/e27822", url="http://www.ncbi.nlm.nih.gov/pubmed/34255681" } @Article{info:doi/10.2196/26151, author="Nikolov, Stanislav and Blackwell, Sam and Zverovitch, Alexei and Mendes, Ruheena and Livne, Michelle and De Fauw, Jeffrey and Patel, Yojan and Meyer, Clemens and Askham, Harry and Romera-Paredes, Bernadino and Kelly, Christopher and Karthikesalingam, Alan and Chu, Carlton and Carnell, Dawn and Boon, Cheng and D'Souza, Derek and Moinuddin, Ali Syed and Garie, Bethany and McQuinlan, Yasmin and Ireland, Sarah and Hampton, Kiarna and Fuller, Krystle and Montgomery, Hugh and Rees, Geraint and Suleyman, Mustafa and Back, Trevor and Hughes, Owen C{\'i}an and Ledsam, R. Joseph and Ronneberger, Olaf", title="Clinically Applicable Segmentation of Head and Neck Anatomy for Radiotherapy: Deep Learning Algorithm Development and Validation Study", journal="J Med Internet Res", year="2021", month="Jul", day="12", volume="23", number="7", pages="e26151", keywords="radiotherapy", keywords="segmentation", keywords="contouring", keywords="machine learning", keywords="artificial intelligence", keywords="UNet", keywords="convolutional neural networks", keywords="surface DSC", abstract="Background: Over half a million individuals are diagnosed with head and neck cancer each year globally. Radiotherapy is an important curative treatment for this disease, but it requires manual time to delineate radiosensitive organs at risk. This planning process can delay treatment while also introducing interoperator variability, resulting in downstream radiation dose differences. Although auto-segmentation algorithms offer a potentially time-saving solution, the challenges in defining, quantifying, and achieving expert performance remain. Objective: Adopting a deep learning approach, we aim to demonstrate a 3D U-Net architecture that achieves expert-level performance in delineating 21 distinct head and neck organs at risk commonly segmented in clinical practice. Methods: The model was trained on a data set of 663 deidentified computed tomography scans acquired in routine clinical practice and with both segmentations taken from clinical practice and segmentations created by experienced radiographers as part of this research, all in accordance with consensus organ at risk definitions. Results: We demonstrated the model's clinical applicability by assessing its performance on a test set of 21 computed tomography scans from clinical practice, each with 21 organs at risk segmented by 2 independent experts. We also introduced surface Dice similarity coefficient, a new metric for the comparison of organ delineation, to quantify the deviation between organ at risk surface contours rather than volumes, better reflecting the clinical task of correcting errors in automated organ segmentations. The model's generalizability was then demonstrated on 2 distinct open-source data sets, reflecting different centers and countries to model training. Conclusions: Deep learning is an effective and clinically applicable technique for the segmentation of the head and neck anatomy for radiotherapy. With appropriate validation studies and regulatory approvals, this system could improve the efficiency, consistency, and safety of radiotherapy pathways. ", doi="10.2196/26151", url="https://www.jmir.org/2021/7/e26151", url="http://www.ncbi.nlm.nih.gov/pubmed/34255661" } @Article{info:doi/10.2196/23863, author="Wu, Jo-Hsuan and Liu, Alvin T. Y. and Hsu, Wan-Ting and Ho, Hui-Chun Jennifer and Lee, Chien-Chang", title="Performance and Limitation of Machine Learning Algorithms for Diabetic Retinopathy Screening: Meta-analysis", journal="J Med Internet Res", year="2021", month="Jul", day="5", volume="23", number="7", pages="e23863", keywords="machine learning", keywords="diabetic retinopathy", keywords="diabetes", keywords="deep learning", keywords="neural network", keywords="diagnostic accuracy", abstract="Background: Diabetic retinopathy (DR), whose standard diagnosis is performed by human experts, has high prevalence and requires a more efficient screening method. Although machine learning (ML)--based automated DR diagnosis has gained attention due to recent approval of IDx-DR, performance of this tool has not been examined systematically, and the best ML technique for use in a real-world setting has not been discussed. Objective: The aim of this study was to systematically examine the overall diagnostic accuracy of ML in diagnosing DR of different categories based on color fundus photographs and to determine the state-of-the-art ML approach. Methods: Published studies in PubMed and EMBASE were searched from inception to June 2020. Studies were screened for relevant outcomes, publication types, and data sufficiency, and a total of 60 out of 2128 (2.82\%) studies were retrieved after study selection. Extraction of data was performed by 2 authors according to PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses), and the quality assessment was performed according to the Quality Assessment of Diagnostic Accuracy Studies 2 (QUADAS-2). Meta-analysis of diagnostic accuracy was pooled using a bivariate random effects model. The main outcomes included diagnostic accuracy, sensitivity, and specificity of ML in diagnosing DR based on color fundus photographs, as well as the performances of different major types of ML algorithms. Results: The primary meta-analysis included 60 color fundus photograph studies (445,175 interpretations). Overall, ML demonstrated high accuracy in diagnosing DR of various categories, with a pooled area under the receiver operating characteristic (AUROC) ranging from 0.97 (95\% CI 0.96-0.99) to 0.99 (95\% CI 0.98-1.00). The performance of ML in detecting more-than-mild DR was robust (sensitivity 0.95; AUROC 0.97), and by subgroup analyses, we observed that robust performance of ML was not limited to benchmark data sets (sensitivity 0.92; AUROC 0.96) but could be generalized to images collected in clinical practice (sensitivity 0.97; AUROC 0.97). Neural network was the most widely used method, and the subgroup analysis revealed a pooled AUROC of 0.98 (95\% CI 0.96-0.99) for studies that used neural networks to diagnose more-than-mild DR. Conclusions: This meta-analysis demonstrated high diagnostic accuracy of ML algorithms in detecting DR on color fundus photographs, suggesting that state-of-the-art, ML-based DR screening algorithms are likely ready for clinical applications. However, a significant portion of the earlier published studies had methodology flaws, such as the lack of external validation and presence of spectrum bias. The results of these studies should be interpreted with caution. ", doi="10.2196/23863", url="https://www.jmir.org/2021/7/e23863", url="http://www.ncbi.nlm.nih.gov/pubmed/34407500" } @Article{info:doi/10.2196/20708, author="H{\"o}hn, Julia and Hekler, Achim and Krieghoff-Henning, Eva and Kather, Nikolas Jakob and Utikal, Sven Jochen and Meier, Friedegund and Gellrich, Friedrich Frank and Hauschild, Axel and French, Lars and Schlager, Gabriel Justin and Ghoreschi, Kamran and Wilhelm, Tabea and Kutzner, Heinz and Heppt, Markus and Haferkamp, Sebastian and Sondermann, Wiebke and Schadendorf, Dirk and Schilling, Bastian and Maron, C. Roman and Schmitt, Max and Jutzi, Tanja and Fr{\"o}hling, Stefan and Lipka, B. Daniel and Brinker, Josef Titus", title="Integrating Patient Data Into Skin Cancer Classification Using Convolutional Neural Networks: Systematic Review", journal="J Med Internet Res", year="2021", month="Jul", day="2", volume="23", number="7", pages="e20708", keywords="skin cancer classification", keywords="convolutional neural networks", keywords="patient data", abstract="Background: Recent years have been witnessing a substantial improvement in the accuracy of skin cancer classification using convolutional neural networks (CNNs). CNNs perform on par with or better than dermatologists with respect to the classification tasks of single images. However, in clinical practice, dermatologists also use other patient data beyond the visual aspects present in a digitized image, further increasing their diagnostic accuracy. Several pilot studies have recently investigated the effects of integrating different subtypes of patient data into CNN-based skin cancer classifiers. Objective: This systematic review focuses on the current research investigating the impact of merging information from image features and patient data on the performance of CNN-based skin cancer image classification. This study aims to explore the potential in this field of research by evaluating the types of patient data used, the ways in which the nonimage data are encoded and merged with the image features, and the impact of the integration on the classifier performance. Methods: Google Scholar, PubMed, MEDLINE, and ScienceDirect were screened for peer-reviewed studies published in English that dealt with the integration of patient data within a CNN-based skin cancer classification. The search terms skin cancer classification, convolutional neural network(s), deep learning, lesions, melanoma, metadata, clinical information, and patient data were combined. Results: A total of 11 publications fulfilled the inclusion criteria. All of them reported an overall improvement in different skin lesion classification tasks with patient data integration. The most commonly used patient data were age, sex, and lesion location. The patient data were mostly one-hot encoded. There were differences in the complexity that the encoded patient data were processed with regarding deep learning methods before and after fusing them with the image features for a combined classifier. Conclusions: This study indicates the potential benefits of integrating patient data into CNN-based diagnostic algorithms. However, how exactly the individual patient data enhance classification performance, especially in the case of multiclass classification problems, is still unclear. Moreover, a substantial fraction of patient data used by dermatologists remains to be analyzed in the context of CNN-based skin cancer classification. Further exploratory analyses in this promising field may optimize patient data integration into CNN-based skin cancer diagnostics for patients' benefits. ", doi="10.2196/20708", url="https://www.jmir.org/2021/7/e20708", url="http://www.ncbi.nlm.nih.gov/pubmed/34255646" } @Article{info:doi/10.2196/24543, author="Caine, A. Joshua and Klein, Britt and Edwards, L. Stephen", title="The Impact of a Novel Mimicry Task for Increasing Emotion Recognition in Adults with Autism Spectrum Disorder and Alexithymia: Protocol for a Randomized Controlled Trial", journal="JMIR Res Protoc", year="2021", month="Jun", day="17", volume="10", number="6", pages="e24543", keywords="alexithymia hypothesis", keywords="training facial expression emotion recognition", keywords="mimicry task", keywords="autism spectrum disorder", keywords="interoception", keywords="facial expression", keywords="emotion", keywords="emotion recognition", keywords="autism", keywords="spectrum disorder", keywords="mimicry", keywords="therapy", keywords="protocol", keywords="expression", keywords="disability", abstract="Background: Impaired facial emotion expression recognition (FEER) has typically been considered a correlate of autism spectrum disorder (ASD). Now, the alexithymia hypothesis is suggesting that this emotion processing problem is instead related to alexithymia, which frequently co-occurs with ASD. By combining predictive coding theories of ASD and simulation theories of emotion recognition, it is suggested that facial mimicry may improve the training of FEER in ASD and alexithymia. Objective: This study aims to evaluate a novel mimicry task to improve FEER in adults with and without ASD and alexithymia. Additionally, this study will aim to determine the contributions of alexithymia and ASD to FEER ability and assess which of these 2 populations benefit from this training task. Methods: Recruitment will primarily take place through an ASD community group with emphasis put on snowball recruiting. Included will be 64 consenting adults equally divided between participants without an ASD and participants with an ASD. Participants will be screened online using the Kessler Psychological Distress Scale (K-10; cut-off score of 22), Autism Spectrum Quotient (AQ-10), and Toronto Alexithymia Scale (TAS-20) followed by a clinical interview with a provisional psychologist at the Federation University psychology clinic. The clinical interview will include assessment of ability, anxiety, and depression as well as discussion of past ASD diagnosis and confirmatory administration of the Autism Mental Status Exam (AMSE). Following the clinical interview, the participant will complete the Bermond-Vorst Alexithymia Questionnaire (BVAQ) and then undertake a baseline assessment of FEER. Consenting participants will then be assigned using a permuted blocked randomization method into either the control task condition or the mimicry task condition. A brief measure of satisfaction of the task and a debriefing session will conclude the study. Results: The study has Federation University Human Research Ethics Committee approval and is registered with the Australian New Zealand Clinical Trials. Participant recruitment is predicted to begin in the third quarter of 2021. Conclusions: This study will be the first to evaluate the use of a novel facial mimicry task condition to increase FEER in adults with ASD and alexithymia. If efficacious, this task could prove useful as a cost-effective adjunct intervention that could be used at home and thus remove barriers to entry. This study will also explore the unique effectiveness of this task in people without an ASD, with an ASD, and with alexithymia. Trial Registration: Australian New Zealand Clinical Trial Registry ACTRN12619000705189p; https://www.anzctr.org.au/Trial/Registration/TrialReview.aspx?id=377455 International Registered Report Identifier (IRRID): PRR1-10.2196/24543 ", doi="10.2196/24543", url="https://www.researchprotocols.org/2021/6/e24543/", url="http://www.ncbi.nlm.nih.gov/pubmed/34170257" } @Article{info:doi/10.2196/25816, author="Kim, Youngkyu and Oh, Jeongmin and Choi, Seung-Ho and Jung, Ahra and Lee, June-Goo and Lee, Se Yoon and Kim, Ki Jun", title="A Portable Smartphone-Based Laryngoscope System for High-Speed Vocal Cord Imaging of Patients With Throat Disorders: Instrument Validation Study", journal="JMIR Mhealth Uhealth", year="2021", month="Jun", day="18", volume="9", number="6", pages="e25816", keywords="smartphone", keywords="mobile phone", keywords="endoscope", keywords="high-speed imaging", keywords="vocal cord", keywords="low-cost device", keywords="mHealth", keywords="otorhinolaryngology", keywords="head and neck", keywords="throat", abstract="Background: Currently, high-speed digital imaging (HSDI), especially endoscopic HSDI, is routinely used for the diagnosis of vocal cord disorders. However, endoscopic HSDI devices are usually large and costly, which limits access to patients in underdeveloped countries and in regions with inadequate medical infrastructure. Modern smartphones have sufficient functionality to process the complex calculations that are required for processing high-resolution images and videos with a high frame rate. Recently, several attempts have been made to integrate medical endoscopes with smartphones to make them more accessible to people in underdeveloped countries. Objective: This study aims to develop a smartphone adaptor for endoscopes, which enables smartphone-based vocal cord imaging, to demonstrate the feasibility of performing high-speed vocal cord imaging via the high-speed imaging functions of a high-performance smartphone camera, and to determine the acceptability of the smartphone-based high-speed vocal cord imaging system for clinical applications in developing countries. Methods: A customized smartphone adaptor optical relay was designed for clinical endoscopy using selective laser melting--based 3D printing. A standard laryngoscope was attached to the smartphone adaptor to acquire high-speed vocal cord endoscopic images. Only existing basic functions of the smartphone camera were used for HSDI of the vocal cords. Extracted still frames were observed for qualitative glottal volume and shape. For image processing, segmented glottal and vocal cord areas were calculated from whole HSDI frames to characterize the amplitude of the vibrations on each side of the glottis, including the frequency, edge length, glottal areas, base cord, and lateral phase differences over the acquisition time. The device was incorporated into a preclinical videokymography diagnosis routine to compare functionality. Results: Smartphone-based HSDI with the smartphone-endoscope adaptor could achieve 940 frames per second and a resolution of 1280 by 720 frames, which corresponds to the detection of 3 to 8 frames per vocal cycle at double the spatial resolution of existing devices. The device was used to image the vocal cords of 4 volunteers: 1 healthy individual and 3 patients with vocal cord paralysis, chronic laryngitis, or vocal cord polyps. The resultant image stacks were sufficient for most diagnostic purposes. The cost of the device including the smartphone was lower than that of existing HSDI devices. The image processing and analytics demonstrated the successful calculation of relevant diagnostic variables from the acquired images. Patients with vocal pathologies were easily differentiable in the quantitative data. Conclusions: A smartphone-based HSDI endoscope system can function as a point-of-care clinical diagnostic device. The resulting analysis is of higher quality than that accessible by videostroboscopy and promises comparable quality and greater accessibility than HSDI. In particular, this system is suitable for use as an accessible diagnostic tool in underdeveloped areas with inadequate medical service infrastructure. ", doi="10.2196/25816", url="https://mhealth.jmir.org/2021/6/e25816", url="http://www.ncbi.nlm.nih.gov/pubmed/34142978" } @Article{info:doi/10.2196/26601, author="Enriquez, S. Jos{\'e} and Chu, Yan and Pudakalakatti, Shivanand and Hsieh, Lin Kang and Salmon, Duncan and Dutta, Prasanta and Millward, Zacharias Niki and Lurie, Eugene and Millward, Steven and McAllister, Florencia and Maitra, Anirban and Sen, Subrata and Killary, Ann and Zhang, Jian and Jiang, Xiaoqian and Bhattacharya, K. Pratip and Shams, Shayan", title="Hyperpolarized Magnetic Resonance and Artificial Intelligence: Frontiers of Imaging in Pancreatic Cancer", journal="JMIR Med Inform", year="2021", month="Jun", day="17", volume="9", number="6", pages="e26601", keywords="artificial intelligence", keywords="deep learning", keywords="hyperpolarization", keywords="metabolic imaging", keywords="MRI", keywords="13C", keywords="HP-MR", keywords="pancreatic ductal adenocarcinoma", keywords="pancreatic cancer", keywords="early detection", keywords="assessment of treatment response", keywords="probes", keywords="cancer", keywords="marker", keywords="imaging", keywords="treatment", keywords="review", keywords="detection", keywords="efficacy", abstract="Background: There is an unmet need for noninvasive imaging markers that can help identify the aggressive subtype(s) of pancreatic ductal adenocarcinoma (PDAC) at diagnosis and at an earlier time point, and evaluate the efficacy of therapy prior to tumor reduction. In the past few years, there have been two major developments with potential for a significant impact in establishing imaging biomarkers for PDAC and pancreatic cancer premalignancy: (1) hyperpolarized metabolic (HP)-magnetic resonance (MR), which increases the sensitivity of conventional MR by over 10,000-fold, enabling real-time metabolic measurements; and (2) applications of artificial intelligence (AI). Objective: Our objective of this review was to discuss these two exciting but independent developments (HP-MR and AI) in the realm of PDAC imaging and detection from the available literature to date. Methods: A systematic review following the PRISMA extension for Scoping Reviews (PRISMA-ScR) guidelines was performed. Studies addressing the utilization of HP-MR and/or AI for early detection, assessment of aggressiveness, and interrogating the early efficacy of therapy in patients with PDAC cited in recent clinical guidelines were extracted from the PubMed and Google Scholar databases. The studies were reviewed following predefined exclusion and inclusion criteria, and grouped based on the utilization of HP-MR and/or AI in PDAC diagnosis. Results: Part of the goal of this review was to highlight the knowledge gap of early detection in pancreatic cancer by any imaging modality, and to emphasize how AI and HP-MR can address this critical gap. We reviewed every paper published on HP-MR applications in PDAC, including six preclinical studies and one clinical trial. We also reviewed several HP-MR--related articles describing new probes with many functional applications in PDAC. On the AI side, we reviewed all existing papers that met our inclusion criteria on AI applications for evaluating computed tomography (CT) and MR images in PDAC. With the emergence of AI and its unique capability to learn across multimodal data, along with sensitive metabolic imaging using HP-MR, this knowledge gap in PDAC can be adequately addressed. CT is an accessible and widespread imaging modality worldwide as it is affordable; because of this reason alone, most of the data discussed are based on CT imaging datasets. Although there were relatively few MR-related papers included in this review, we believe that with rapid adoption of MR imaging and HP-MR, more clinical data on pancreatic cancer imaging will be available in the near future. Conclusions: Integration of AI, HP-MR, and multimodal imaging information in pancreatic cancer may lead to the development of real-time biomarkers of early detection, assessing aggressiveness, and interrogating early efficacy of therapy in PDAC. ", doi="10.2196/26601", url="https://medinform.jmir.org/2021/6/e26601", url="http://www.ncbi.nlm.nih.gov/pubmed/34137725" } @Article{info:doi/10.2196/23808, author="Ramachandran, Arivan and Kathavarayan Ramu, Shivabalan", title="Neural Network Pattern Recognition of Ultrasound Image Gray Scale Intensity Histograms of Breast Lesions to Differentiate Between Benign and Malignant Lesions: Analytical Study", journal="JMIR Biomed Eng", year="2021", month="Jun", day="2", volume="6", number="2", pages="e23808", keywords="radiology", keywords="imaging", keywords="neural network", keywords="images", abstract="Background: Ultrasound-based radiomic features to differentiate between benign and malignant breast lesions with the help of machine learning is currently being researched. The mean echogenicity ratio has been used for the diagnosis of malignant breast lesions. However, gray scale intensity histogram values as a single radiomic feature for the detection of malignant breast lesions using machine learning algorithms have not been explored yet. Objective: This study aims to assess the utility of a simple convolutional neural network in classifying benign and malignant breast lesions using gray scale intensity values of the lesion. Methods: An open-access online data set of 200 ultrasonogram breast lesions were collected, and regions of interest were drawn over the lesions. The gray scale intensity values of the lesions were extracted. An input file containing the values and an output file consisting of the breast lesions' diagnoses were created. The convolutional neural network was trained using the files and tested on the whole data set. Results: The trained convolutional neural network had an accuracy of 94.5\% and a precision of 94\%. The sensitivity and specificity were 94.9\% and 94.1\%, respectively. Conclusions: Simple neural networks, which are cheap and easy to use, can be applied to diagnose malignant breast lesions with gray scale intensity values obtained from ultrasonogram images in low-resource settings with minimal personnel. ", doi="10.2196/23808", url="https://biomedeng.jmir.org/2021/2/e23808" } @Article{info:doi/10.2196/28868, author="Kang, Yu-Chuan Eugene and Yeung, Ling and Lee, Yi-Lun and Wu, Cheng-Hsiu and Peng, Shu-Yen and Chen, Yueh-Peng and Gao, Quan-Ze and Lin, Chihung and Kuo, Chang-Fu and Lai, Chi-Chun", title="A Multimodal Imaging--Based Deep Learning Model for Detecting Treatment-Requiring Retinal Vascular Diseases: Model Development and Validation Study", journal="JMIR Med Inform", year="2021", month="May", day="31", volume="9", number="5", pages="e28868", keywords="deep learning", keywords="retinal vascular diseases", keywords="multimodal imaging", keywords="treatment requirement", keywords="machine learning", keywords="eye", keywords="retinal", keywords="imaging", keywords="treatment", keywords="model", keywords="detection", keywords="vascular", abstract="Background: Retinal vascular diseases, including diabetic macular edema (DME), neovascular age-related macular degeneration (nAMD), myopic choroidal neovascularization (mCNV), and branch and central retinal vein occlusion (BRVO/CRVO), are considered vision-threatening eye diseases. However, accurate diagnosis depends on multimodal imaging and the expertise of retinal ophthalmologists. Objective: The aim of this study was to develop a deep learning model to detect treatment-requiring retinal vascular diseases using multimodal imaging. Methods: This retrospective study enrolled participants with multimodal ophthalmic imaging data from 3 hospitals in Taiwan from 2013 to 2019. Eye-related images were used, including those obtained through retinal fundus photography, optical coherence tomography (OCT), and fluorescein angiography with or without indocyanine green angiography (FA/ICGA). A deep learning model was constructed for detecting DME, nAMD, mCNV, BRVO, and CRVO and identifying treatment-requiring diseases. Model performance was evaluated and is presented as the area under the curve (AUC) for each receiver operating characteristic curve. Results: A total of 2992 eyes of 2185 patients were studied, with 239, 1209, 1008, 211, 189, and 136 eyes in the control, DME, nAMD, mCNV, BRVO, and CRVO groups, respectively. Among them, 1898 eyes required treatment. The eyes were divided into training, validation, and testing groups in a 5:1:1 ratio. In total, 5117 retinal fundus photos, 9316 OCT images, and 20,922 FA/ICGA images were used. The AUCs for detecting mCNV, DME, nAMD, BRVO, and CRVO were 0.996, 0.995, 0.990, 0.959, and 0.988, respectively. The AUC for detecting treatment-requiring diseases was 0.969. From the heat maps, we observed that the model could identify retinal vascular diseases. Conclusions: Our study developed a deep learning model to detect retinal diseases using multimodal ophthalmic imaging. Furthermore, the model demonstrated good performance in detecting treatment-requiring retinal diseases. ", doi="10.2196/28868", url="https://medinform.jmir.org/2021/5/e28868", url="http://www.ncbi.nlm.nih.gov/pubmed/34057419" } @Article{info:doi/10.2196/24653, author="Horsham, Caitlin and Ford, Helen and Herbert, Jeremy and Wall, Alexander and Walpole, Sebastian and Hacker, Elke", title="Assessing Sunscreen Protection Using UV Photography: Descriptive Study", journal="JMIR Dermatol", year="2021", month="May", day="26", volume="4", number="1", pages="e24653", keywords="skin neoplasms", keywords="melanoma", keywords="health promotion", keywords="public health", keywords="preventive medicine", keywords="sunburn", keywords="sunscreening agents", keywords="UV photography", keywords="mobile phone", abstract="Background: Photography using a UV transmitting filter allows UV light to pass and can be used to illuminate UV blocking lotions such as sunscreens. Objective: The aim of this study is to compare currently available UV photography cameras and assess whether these devices can be used as visualization tools for adequate coverage of sun protection lotions. Methods: This study was conducted in 3 parts: in phase 1, 3 different UV cameras were tested; in phase 2, we explored whether UV photography could work on a range of sun protection products; and in phase 3, a UV webcam was developed and was field-tested in a beach setting. In phase 1, volunteers were recruited, and researchers applied 3 sun protection products (ranging from sun protection factor [SPF] 15 to 50+) to the participants' faces and arms. UV photography was performed using 3 UV cameras, and the subsequent images were compared. In phase 2, volunteers were recruited and asked to apply their own SPF products to their faces in their usual manner. UV photographs were collected in the morning and afternoon to assess whether the coverage remained over time. Qualitative interviews were conducted to assess the participants' level of satisfaction with the UV image. In phase 3, a small portable UV webcam was designed using a plug-and-play approach to enable the viewing of UV images on a larger screen. The developed webcam was deployed at a public beach setting for use by the public for 7 days. Results: The 3 UV camera systems tested during phase 1 identified the application of a range of sun protection lotions of SPF 15 to 50+. The sensitivity of the UV camera devices was shown to be adequate, with SPF-containing products applied at concentrations of 2 and 1 mg/cm2 clearly visible and SPF-containing products applied at a concentration of 0.4 mg/cm2 having lower levels of coverage. Participants in phase 2 reported high satisfaction with the UV photography images, with 83\% (29/35) of participants likely to use UV photography in the future. During phase 2, it was noted that many participants used tinted SPF-containing cosmetics, and several tinted products were further tested. However, it was observed that UV photography could not identify the areas missed for all tinted products. During phase 3, the electrical components of the UV webcam remained operational, and the camera was used 233 times by the public during field-testing. Conclusions: In this study, we found that UV photography could identify the areas missed by sun protection lotions with chemical filters, and participants were engaged with personalized feedback. Trial Registration: Australian New Zealand Clinical Trials Registry (ANZCTR) ACTRN12619000975190; http://www.anzctr.org.au/Trial/Registration/TrialReview.aspx?id=377089 ; Australian New Zealand Clinical Trials Registry (ANZCTR) ACTRN12619000145101; https://www.anzctr.org.au/Trial/Registration/TrialReview.aspx?id=376672. ", doi="10.2196/24653", url="https://derma.jmir.org/2021/1/e24653", url="http://www.ncbi.nlm.nih.gov/pubmed/37632801" } @Article{info:doi/10.2196/14755, author="Pan, Qiong and Zhang, Kai and He, Lin and Dong, Zhou and Zhang, Lei and Wu, Xiaohang and Wu, Yi and Gao, Yanjun", title="Automatically Diagnosing Disk Bulge and Disk Herniation With Lumbar Magnetic Resonance Images by Using Deep Convolutional Neural Networks: Method Development Study", journal="JMIR Med Inform", year="2021", month="May", day="21", volume="9", number="5", pages="e14755", keywords="deep learning", keywords="object localization", keywords="disk herniation", keywords="disk bulge", keywords="image classification", abstract="Background: Disk herniation and disk bulge are two common disorders of lumbar intervertebral disks (IVDs) that often result in numbness, pain in the lower limbs, and lower back pain. Magnetic resonance (MR) imaging is one of the most efficient techniques for detecting lumbar diseases and is widely used for making clinical diagnoses at hospitals. However, there is a lack of efficient tools for effectively interpreting massive amounts of MR images to meet the requirements of many radiologists. Objective: The aim of this study was to present an automatic system for diagnosing disk bulge and herniation that saves time and can effectively and significantly reduce the workload of radiologists. Methods: The diagnosis of lumbar vertebral disorders is highly dependent on medical images. Therefore, we chose the two most common diseases---disk bulge and herniation---as research subjects. This study is mainly about identifying the position of IVDs (lumbar vertebra [L] 1 to L2, L2-L3, L3-L4, L4-L5, and L5 to sacral vertebra [S] 1) by analyzing the geometrical relationship between sagittal and axial images and classifying axial lumbar disk MR images via deep convolutional neural networks. Results: This system involved 4 steps. In the first step, it automatically located vertebral bodies (including the L1, L2, L3, L4, L5, and S1) in sagittal images by using the faster region-based convolutional neural network, and our fourfold cross-validation showed 100\% accuracy. In the second step, it spontaneously identified the corresponding disk in each axial lumbar disk MR image with 100\% accuracy. In the third step, the accuracy for automatically locating the intervertebral disk region of interest in axial MR images was 100\%. In the fourth step, the 3-class classification (normal disk, disk bulge, and disk herniation) accuracies for the L1-L2, L2-L3, L3-L4, L4-L5, and L5-S1 IVDs were 92.7\%, 84.4\%, 92.1\%, 90.4\%, and 84.2\%, respectively. Conclusions: The automatic diagnosis system was successfully built, and it could classify images of normal disks, disk bulge, and disk herniation. This system provided a web-based test for interpreting lumbar disk MR images that could significantly improve diagnostic efficiency and standardized diagnosis reports. This system can also be used to detect other lumbar abnormalities and cervical spondylosis. ", doi="10.2196/14755", url="https://medinform.jmir.org/2021/5/e14755", url="http://www.ncbi.nlm.nih.gov/pubmed/34018488" } @Article{info:doi/10.2196/22664, author="Li, Lei and Zhu, Haogang and Zhang, Zhenyu and Zhao, Liang and Xu, Liang and Jonas, A. Rahul and Garway-Heath, F. David and Jonas, B. Jost and Wang, Xing Ya", title="Neural Network--Based Retinal Nerve Fiber Layer Profile Compensation for Glaucoma Diagnosis in Myopia: Model Development and Validation", journal="JMIR Med Inform", year="2021", month="May", day="18", volume="9", number="5", pages="e22664", keywords="retinal nerve fiber layer thickness", keywords="radial basis neural network", keywords="neural network", keywords="glaucoma", keywords="optic nerve head", keywords="optical coherence tomography", keywords="myopia", keywords="optic nerve", abstract="Background: Due to the axial elongation--associated changes in the optic nerve and retina in high myopia, traditional methods like optic disc evaluation and visual field are not able to correctly differentiate glaucomatous lesions. It has been clinically challenging to detect glaucoma in highly myopic eyes. Objective: This study aimed to develop a neural network to adjust for the dependence of the peripapillary retinal nerve fiber layer (RNFL) thickness (RNFLT) profile on age, gender, and ocular biometric parameters and to evaluate the network's performance for glaucoma diagnosis, especially in high myopia. Methods: RNFLT with 768 points on the circumferential 3.4-mm scan was measured using spectral-domain optical coherence tomography. A fully connected network and a radial basis function network were trained for vertical (scaling) and horizontal (shift) transformation of the RNFLT profile with adjustment for age, axial length (AL), disc-fovea angle, and distance in a test group of 2223 nonglaucomatous eyes. The performance of RNFLT compensation was evaluated in an independent group of 254 glaucoma patients and 254 nonglaucomatous participants. Results: By applying the RNFL compensation algorithm, the area under the receiver operating characteristic curve for detecting glaucoma increased from 0.70 to 0.84, from 0.75 to 0.89, from 0.77 to 0.89, and from 0.78 to 0.87 for eyes in the highest 10\% percentile subgroup of the AL distribution (mean 26.0, SD 0.9 mm), highest 20\% percentile subgroup of the AL distribution (mean 25.3, SD 1.0 mm), highest 30\% percentile subgroup of the AL distribution (mean 24.9, SD 1.0 mm), and any AL (mean 23.5, SD 1.2 mm), respectively, in comparison with unadjusted RNFLT. The difference between uncompensated and compensated RNFLT values increased with longer axial length, with enlargement of 19.8\%, 18.9\%, 16.2\%, and 11.3\% in the highest 10\% percentile subgroup, highest 20\% percentile subgroup, highest 30\% percentile subgroup, and all eyes, respectively. Conclusions: In a population-based study sample, an algorithm-based adjustment for age, gender, and ocular biometric parameters improved the diagnostic precision of the RNFLT profile for glaucoma detection particularly in myopic and highly myopic eyes. ", doi="10.2196/22664", url="https://medinform.jmir.org/2021/5/e22664", url="http://www.ncbi.nlm.nih.gov/pubmed/34003137" } @Article{info:doi/10.2196/25869, author="Lee, Haeyun and Chai, Jun Young and Joo, Hyunjin and Lee, Kyungsu and Hwang, Youn Jae and Kim, Seok-Mo and Kim, Kwangsoon and Nam, Inn-Chul and Choi, Young June and Yu, Won Hyeong and Lee, Myung-Chul and Masuoka, Hiroo and Miyauchi, Akira and Lee, Eun Kyu and Kim, Sungwan and Kong, Hyoun-Joong", title="Federated Learning for Thyroid Ultrasound Image Analysis to Protect Personal Information: Validation Study in a Real Health Care Environment", journal="JMIR Med Inform", year="2021", month="May", day="18", volume="9", number="5", pages="e25869", keywords="deep learning", keywords="federated learning", keywords="thyroid nodules", keywords="ultrasound image", abstract="Background: Federated learning is a decentralized approach to machine learning; it is a training strategy that overcomes medical data privacy regulations and generalizes deep learning algorithms. Federated learning mitigates many systemic privacy risks by sharing only the model and parameters for training, without the need to export existing medical data sets. In this study, we performed ultrasound image analysis using federated learning to predict whether thyroid nodules were benign or malignant. Objective: The goal of this study was to evaluate whether the performance of federated learning was comparable with that of conventional deep learning. Methods: A total of 8457 (5375 malignant, 3082 benign) ultrasound images were collected from 6 institutions and used for federated learning and conventional deep learning. Five deep learning networks (VGG19, ResNet50, ResNext50, SE-ResNet50, and SE-ResNext50) were used. Using stratified random sampling, we selected 20\% (1075 malignant, 616 benign) of the total images for internal validation. For external validation, we used 100 ultrasound images (50 malignant, 50 benign) from another institution. Results: For internal validation, the area under the receiver operating characteristic (AUROC) curve for federated learning was between 78.88\% and 87.56\%, and the AUROC for conventional deep learning was between 82.61\% and 91.57\%. For external validation, the AUROC for federated learning was between 75.20\% and 86.72\%, and the AUROC curve for conventional deep learning was between 73.04\% and 91.04\%. Conclusions: We demonstrated that the performance of federated learning using decentralized data was comparable to that of conventional deep learning using pooled data. Federated learning might be potentially useful for analyzing medical images while protecting patients' personal information. ", doi="10.2196/25869", url="https://medinform.jmir.org/2021/5/e25869", url="http://www.ncbi.nlm.nih.gov/pubmed/33858817" } @Article{info:doi/10.2196/24803, author="Park, Hyung and Song, Min and Lee, Byul Eun and Seo, Kyung Bo and Choi, Min Chang", title="An Attention Model With Transfer Embeddings to Classify Pneumonia-Related Bilingual Imaging Reports: Algorithm Development and Validation", journal="JMIR Med Inform", year="2021", month="May", day="17", volume="9", number="5", pages="e24803", keywords="deep learning", keywords="natural language process", keywords="attention", keywords="clinical data", keywords="pneumonia", keywords="classification", keywords="medical imaging", keywords="electronic health record", keywords="machine learning", keywords="model", abstract="Background: In the analysis of electronic health records, proper labeling of outcomes is mandatory. To obtain proper information from radiologic reports, several studies were conducted to classify radiologic reports using deep learning. However, the classification of pneumonia in bilingual radiologic reports has not been conducted previously. Objective: The aim of this research was to classify radiologic reports into pneumonia or no pneumonia using a deep learning method. Methods: A data set of radiology reports for chest computed tomography and chest x-rays of surgical patients from January 2008 to January 2018 in the Asan Medical Center in Korea was retrospectively analyzed. The classification performance of our long short-term memory (LSTM)--Attention model was compared with various deep learning and machine learning methods. The area under the receiver operating characteristic curve (AUROC), area under the precision-recall curve, sensitivity, specificity, accuracy, and F1 score for the models were compared. Results: A total of 5450 radiologic reports were included that contained at least one pneumonia-related word. In the test set (n=1090), our proposed model showed 91.01\% (992/1090) accuracy (AUROCs for negative, positive, and obscure were 0.98, 0.97, and 0.90, respectively). The top 3 performances of the models were based on FastText or LSTM. The convolutional neural network--based model showed a lower accuracy 73.03\% (796/1090) than the other 2 algorithms. The classification of negative results had an F1 score of 0.96, whereas the classification of positive and uncertain results showed a lower performance (positive F1 score 0.83; uncertain F1 score 0.62). In the extra-validation set, our model showed 80.0\% (642/803) accuracy (AUROCs for negative, positive, and obscure were 0.92, 0.96, and 0.84, respectively). Conclusions: Our method showed excellent performance in classifying pneumonia in bilingual radiologic reports. The method could enrich the research on pneumonia by obtaining exact outcomes from electronic health data. ", doi="10.2196/24803", url="https://medinform.jmir.org/2021/5/e24803", url="http://www.ncbi.nlm.nih.gov/pubmed/33820755" } @Article{info:doi/10.2196/24381, author="Yu, X. Amy Y. and Liu, A. Zhongyu and Pou-Prom, Chloe and Lopes, Kaitlyn and Kapral, K. Moira and Aviv, I. Richard and Mamdani, Muhammad", title="Automating Stroke Data Extraction From Free-Text Radiology Reports Using Natural Language Processing: Instrument Validation Study", journal="JMIR Med Inform", year="2021", month="May", day="4", volume="9", number="5", pages="e24381", keywords="stroke", keywords="diagnostic imaging", keywords="data extraction", keywords="natural language processing", keywords="neurovascular", keywords="imaging", keywords="stroke surveillance", keywords="surveillance", abstract="Background: Diagnostic neurovascular imaging data are important in stroke research, but obtaining these data typically requires laborious manual chart reviews. Objective: We aimed to determine the accuracy of a natural language processing (NLP) approach to extract information on the presence and location of vascular occlusions as well as other stroke-related attributes based on free-text reports. Methods: From the full reports of 1320 consecutive computed tomography (CT), CT angiography, and CT perfusion scans of the head and neck performed at a tertiary stroke center between October 2017 and January 2019, we manually extracted data on the presence of proximal large vessel occlusion (primary outcome), as well as distal vessel occlusion, ischemia, hemorrhage, Alberta stroke program early CT score (ASPECTS), and collateral status (secondary outcomes). Reports were randomly split into training (n=921) and validation (n=399) sets, and attributes were extracted using rule-based NLP. We reported the sensitivity, specificity, positive predictive value (PPV), negative predictive value (NPV), and the overall accuracy of the NLP approach relative to the manually extracted data. Results: The overall prevalence of large vessel occlusion was 12.2\%. In the training sample, the NLP approach identified this attribute with an overall accuracy of 97.3\% (95.5\% sensitivity, 98.1\% specificity, 84.1\% PPV, and 99.4\% NPV). In the validation set, the overall accuracy was 95.2\% (90.0\% sensitivity, 97.4\% specificity, 76.3\% PPV, and 98.5\% NPV). The accuracy of identifying distal or basilar occlusion as well as hemorrhage was also high, but there were limitations in identifying cerebral ischemia, ASPECTS, and collateral status. Conclusions: NLP may improve the efficiency of large-scale imaging data collection for stroke surveillance and research. ", doi="10.2196/24381", url="https://medinform.jmir.org/2021/5/e24381", url="http://www.ncbi.nlm.nih.gov/pubmed/33944791" } @Article{info:doi/10.2196/21394, author="Poly, Nasrin Tahmina and Islam, Mohaimenul Md and Li, Jack Yu-Chuan and Alsinglawi, Belal and Hsu, Min-Huei and Jian, Shan Wen and Yang, Hsuan-Chia", title="Application of Artificial Intelligence for Screening COVID-19 Patients Using Digital Images: Meta-analysis", journal="JMIR Med Inform", year="2021", month="Apr", day="29", volume="9", number="4", pages="e21394", keywords="COVID-19", keywords="SARS-CoV-2", keywords="pneumonia", keywords="artificial intelligence", keywords="deep learning", abstract="Background: The COVID-19 outbreak has spread rapidly and hospitals are overwhelmed with COVID-19 patients. While analysis of nasal and throat swabs from patients is the main way to detect COVID-19, analyzing chest images could offer an alternative method to hospitals, where health care personnel and testing kits are scarce. Deep learning (DL), in particular, has shown impressive levels of performance when analyzing medical images, including those related to COVID-19 pneumonia. Objective: The goal of this study was to perform a systematic review with a meta-analysis of relevant studies to quantify the performance of DL algorithms in the automatic stratification of COVID-19 patients using chest images. Methods: A search strategy for use in PubMed, Scopus, Google Scholar, and Web of Science was developed, where we searched for articles published between January 1 and April 25, 2020. We used the key terms ``COVID-19,'' or ``coronavirus,'' or ``SARS-CoV-2,'' or ``novel corona,'' or ``2019-ncov,'' and ``deep learning,'' or ``artificial intelligence,'' or ``automatic detection.'' Two authors independently extracted data on study characteristics, methods, risk of bias, and outcomes. Any disagreement between them was resolved by consensus. Results: A total of 16 studies were included in the meta-analysis, which included 5896 chest images from COVID-19 patients. The pooled sensitivity and specificity of the DL models in detecting COVID-19 were 0.95 (95\% CI 0.94-0.95) and 0.96 (95\% CI 0.96-0.97), respectively, with an area under the receiver operating characteristic curve of 0.98. The positive likelihood, negative likelihood, and diagnostic odds ratio were 19.02 (95\% CI 12.83-28.19), 0.06 (95\% CI 0.04-0.10), and 368.07 (95\% CI 162.30-834.75), respectively. The pooled sensitivity and specificity for distinguishing other types of pneumonia from COVID-19 were 0.93 (95\% CI 0.92-0.94) and 0.95 (95\% CI 0.94-0.95), respectively. The performance of radiologists in detecting COVID-19 was lower than that of the DL models; however, the performance of junior radiologists was improved when they used DL-based prediction tools. Conclusions: Our study findings show that DL models have immense potential in accurately stratifying COVID-19 patients and in correctly differentiating them from patients with other types of pneumonia and normal patients. Implementation of DL-based tools can assist radiologists in correctly and quickly detecting COVID-19 and, consequently, in combating the COVID-19 pandemic. ", doi="10.2196/21394", url="https://medinform.jmir.org/2021/4/e21394", url="http://www.ncbi.nlm.nih.gov/pubmed/33764884" } @Article{info:doi/10.2196/27468, author="Ghaderzadeh, Mustafa and Asadi, Farkhondeh and Jafari, Ramezan and Bashash, Davood and Abolghasemi, Hassan and Aria, Mehrad", title="Deep Convolutional Neural Network--Based Computer-Aided Detection System for COVID-19 Using Multiple Lung Scans: Design and Implementation Study", journal="J Med Internet Res", year="2021", month="Apr", day="26", volume="23", number="4", pages="e27468", keywords="artificial intelligence", keywords="classification", keywords="computer-aided detection", keywords="computed tomography scan", keywords="convolutional neural network", keywords="coronavirus", keywords="COVID-19", keywords="deep learning", keywords="machine learning", keywords="machine vision", keywords="model", keywords="pandemic", abstract="Background: Owing to the COVID-19 pandemic and the imminent collapse of health care systems following the exhaustion of financial, hospital, and medicinal resources, the World Health Organization changed the alert level of the COVID-19 pandemic from high to very high. Meanwhile, more cost-effective and precise COVID-19 detection methods are being preferred worldwide. Objective: Machine vision--based COVID-19 detection methods, especially deep learning as a diagnostic method in the early stages of the pandemic, have been assigned great importance during the pandemic. This study aimed to design a highly efficient computer-aided detection (CAD) system for COVID-19 by using a neural search architecture network (NASNet)--based algorithm. Methods: NASNet, a state-of-the-art pretrained convolutional neural network for image feature extraction, was adopted to identify patients with COVID-19 in their early stages of the disease. A local data set, comprising 10,153 computed tomography scans of 190 patients with and 59 without COVID-19 was used. Results: After fitting on the training data set, hyperparameter tuning, and topological alterations of the classifier block, the proposed NASNet-based model was evaluated on the test data set and yielded remarkable results. The proposed model's performance achieved a detection sensitivity, specificity, and accuracy of 0.999, 0.986, and 0.996, respectively. Conclusions: The proposed model achieved acceptable results in the categorization of 2 data classes. Therefore, a CAD system was designed on the basis of this model for COVID-19 detection using multiple lung computed tomography scans. The system differentiated all COVID-19 cases from non--COVID-19 ones without any error in the application phase. Overall, the proposed deep learning--based CAD system can greatly help radiologists detect COVID-19 in its early stages. During the COVID-19 pandemic, the use of a CAD system as a screening tool would accelerate disease detection and prevent the loss of health care resources. ", doi="10.2196/27468", url="https://www.jmir.org/2021/4/e27468", url="http://www.ncbi.nlm.nih.gov/pubmed/33848973" } @Article{info:doi/10.2196/25181, author="Montazeri, Mahdieh and ZahediNasab, Roxana and Farahani, Ali and Mohseni, Hadis and Ghasemian, Fahimeh", title="Machine Learning Models for Image-Based Diagnosis and Prognosis of COVID-19: Systematic Review", journal="JMIR Med Inform", year="2021", month="Apr", day="23", volume="9", number="4", pages="e25181", keywords="machine learning", keywords="diagnosis", keywords="prognosis", keywords="COVID-19", abstract="Background: Accurate and timely diagnosis and effective prognosis of the disease is important to provide the best possible care for patients with COVID-19 and reduce the burden on the health care system. Machine learning methods can play a vital role in the diagnosis of COVID-19 by processing chest x-ray images. Objective: The aim of this study is to summarize information on the use of intelligent models for the diagnosis and prognosis of COVID-19 to help with early and timely diagnosis, minimize prolonged diagnosis, and improve overall health care. Methods: A systematic search of databases, including PubMed, Web of Science, IEEE, ProQuest, Scopus, bioRxiv, and medRxiv, was performed for COVID-19--related studies published up to May 24, 2020. This study was performed in accordance with the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-analyses) guidelines. All original research articles describing the application of image processing for the prediction and diagnosis of COVID-19 were considered in the analysis. Two reviewers independently assessed the published papers to determine eligibility for inclusion in the analysis. Risk of bias was evaluated using the Prediction Model Risk of Bias Assessment Tool. Results: Of the 629 articles retrieved, 44 articles were included. We identified 4 prognosis models for calculating prediction of disease severity and estimation of confinement time for individual patients, and 40 diagnostic models for detecting COVID-19 from normal or other pneumonias. Most included studies used deep learning methods based on convolutional neural networks, which have been widely used as a classification algorithm. The most frequently reported predictors of prognosis in patients with COVID-19 included age, computed tomography data, gender, comorbidities, symptoms, and laboratory findings. Deep convolutional neural networks obtained better results compared with non--neural network--based methods. Moreover, all of the models were found to be at high risk of bias due to the lack of information about the study population, intended groups, and inappropriate reporting. Conclusions: Machine learning models used for the diagnosis and prognosis of COVID-19 showed excellent discriminative performance. However, these models were at high risk of bias, because of various reasons such as inadequate information about study participants, randomization process, and the lack of external validation, which may have resulted in the optimistic reporting of these models. Hence, our findings do not recommend any of the current models to be used in practice for the diagnosis and prognosis of COVID-19. ", doi="10.2196/25181", url="https://medinform.jmir.org/2021/4/e25181", url="http://www.ncbi.nlm.nih.gov/pubmed/33735095" } @Article{info:doi/10.2196/25167, author="Bang, Seok Chang and Lim, Hyun and Jeong, Min Hae and Hwang, Hyeon Sung", title="Use of Endoscopic Images in the Prediction of Submucosal Invasion of Gastric Neoplasms: Automated Deep Learning Model Development and Usability Study", journal="J Med Internet Res", year="2021", month="Apr", day="15", volume="23", number="4", pages="e25167", keywords="convolutional neural network", keywords="deep learning", keywords="automated deep learning", keywords="endoscopy", keywords="gastric neoplasms", keywords="neural network", keywords="deep learning model", keywords="artificial intelligence", abstract="Background: In a previous study, we examined the use of deep learning models to classify the invasion depth (mucosa-confined versus submucosa-invaded) of gastric neoplasms using endoscopic images. The external test accuracy reached 77.3\%. However, model establishment is labor intense, requiring high performance. Automated deep learning (AutoDL) models, which enable fast searching of optimal neural architectures and hyperparameters without complex coding, have been developed. Objective: The objective of this study was to establish AutoDL models to classify the invasion depth of gastric neoplasms. Additionally, endoscopist--artificial intelligence interactions were explored. Methods: The same 2899 endoscopic images that were employed to establish the previous model were used. A prospective multicenter validation using 206 and 1597 novel images was conducted. The primary outcome was external test accuracy. Neuro-T, Create ML Image Classifier, and AutoML Vision were used in establishing the models. Three doctors with different levels of endoscopy expertise were asked to classify the invasion depth of gastric neoplasms for each image without AutoDL support, with faulty AutoDL support, and with best performance AutoDL support in sequence. Results: The Neuro-T--based model reached 89.3\% (95\% CI 85.1\%-93.5\%) external test accuracy. For the model establishment time, Create ML Image Classifier showed the fastest time of 13 minutes while reaching 82.0\% (95\% CI 76.8\%-87.2\%) external test accuracy. While the expert endoscopist's decisions were not influenced by AutoDL, the faulty AutoDL misled the endoscopy trainee and the general physician. However, this was corrected by the support of the best performance AutoDL model. The trainee gained the most benefit from the AutoDL support. Conclusions: AutoDL is deemed useful for the on-site establishment of customized deep learning models. An inexperienced endoscopist with at least a certain level of expertise can benefit from AutoDL support. ", doi="10.2196/25167", url="https://www.jmir.org/2021/4/e25167", url="http://www.ncbi.nlm.nih.gov/pubmed/33856356" } @Article{info:doi/10.2196/22394, author="Castaldo, Rossana and Cavaliere, Carlo and Soricelli, Andrea and Salvatore, Marco and Pecchia, Leandro and Franzese, Monica", title="Radiomic and Genomic Machine Learning Method Performance for Prostate Cancer Diagnosis: Systematic Literature Review", journal="J Med Internet Res", year="2021", month="Apr", day="1", volume="23", number="4", pages="e22394", keywords="prostate cancer", keywords="machine learning", keywords="systematic review", keywords="meta-analysis", keywords="diagnosis", keywords="imaging", keywords="radiomics", keywords="genomics", keywords="clinical", keywords="biomarkers", abstract="Background: Machine learning algorithms have been drawing attention at the joining of pathology and radiology in prostate cancer research. However, due to their algorithmic learning complexity and the variability of their architecture, there is an ongoing need to analyze their performance. Objective: This study assesses the source of heterogeneity and the performance of machine learning applied to radiomic, genomic, and clinical biomarkers for the diagnosis of prostate cancer. One research focus of this study was on clearly identifying problems and issues related to the implementation of machine learning in clinical studies. Methods: Following the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) protocol, 816 titles were identified from the PubMed, Scopus, and OvidSP databases. Studies that used machine learning to detect prostate cancer and provided performance measures were included in our analysis. The quality of the eligible studies was assessed using the QUADAS-2 (quality assessment of diagnostic accuracy studies--version 2) tool. The hierarchical multivariate model was applied to the pooled data in a meta-analysis. To investigate the heterogeneity among studies, I2 statistics were performed along with visual evaluation of coupled forest plots. Due to the internal heterogeneity among machine learning algorithms, subgroup analysis was carried out to investigate the diagnostic capability of machine learning systems in clinical practice. Results: In the final analysis, 37 studies were included, of which 29 entered the meta-analysis pooling. The analysis of machine learning methods to detect prostate cancer reveals the limited usage of the methods and the lack of standards that hinder the implementation of machine learning in clinical applications. Conclusions: The performance of machine learning for diagnosis of prostate cancer was considered satisfactory for several studies investigating the multiparametric magnetic resonance imaging and urine biomarkers; however, given the limitations indicated in our study, further studies are warranted to extend the potential use of machine learning to clinical settings. Recommendations on the use of machine learning techniques were also provided to help researchers to design robust studies to facilitate evidence generation from the use of radiomic and genomic biomarkers. ", doi="10.2196/22394", url="https://www.jmir.org/2021/4/e22394", url="http://www.ncbi.nlm.nih.gov/pubmed/33792552" } @Article{info:doi/10.2196/21695, author="Maron, C. Roman and Hekler, Achim and Krieghoff-Henning, Eva and Schmitt, Max and Schlager, G. Justin and Utikal, S. Jochen and Brinker, J. Titus", title="Reducing the Impact of Confounding Factors on Skin Cancer Classification via Image Segmentation: Technical Model Study", journal="J Med Internet Res", year="2021", month="Mar", day="25", volume="23", number="3", pages="e21695", keywords="dermatology", keywords="diagnosis", keywords="artificial intelligence", keywords="neural networks", keywords="image segmentation", keywords="confounding factors", keywords="artifacts", keywords="melanoma", keywords="nevus", keywords="deep learning", abstract="Background: Studies have shown that artificial intelligence achieves similar or better performance than dermatologists in specific dermoscopic image classification tasks. However, artificial intelligence is susceptible to the influence of confounding factors within images (eg, skin markings), which can lead to false diagnoses of cancerous skin lesions. Image segmentation can remove lesion-adjacent confounding factors but greatly change the image representation. Objective: The aim of this study was to compare the performance of 2 image classification workflows where images were either segmented or left unprocessed before the subsequent training and evaluation of a binary skin lesion classifier. Methods: Separate binary skin lesion classifiers (nevus vs melanoma) were trained and evaluated on segmented and unsegmented dermoscopic images. For a more informative result, separate classifiers were trained on 2 distinct training data sets (human against machine [HAM] and International Skin Imaging Collaboration [ISIC]). Each training run was repeated 5 times. The mean performance of the 5 runs was evaluated on a multi-source test set (n=688) consisting of a holdout and an external component. Results: Our findings showed that when trained on HAM, the segmented classifiers showed a higher overall balanced accuracy (75.6\% [SD 1.1\%]) than the unsegmented classifiers (66.7\% [SD 3.2\%]), which was significant in 4 out of 5 runs (P<.001). The overall balanced accuracy was numerically higher for the unsegmented ISIC classifiers (78.3\% [SD 1.8\%]) than for the segmented ISIC classifiers (77.4\% [SD 1.5\%]), which was significantly different in 1 out of 5 runs (P=.004). Conclusions: Image segmentation does not result in overall performance decrease but it causes the beneficial removal of lesion-adjacent confounding factors. Thus, it is a viable option to address the negative impact that confounding factors have on deep learning models in dermatology. However, the segmentation step might introduce new pitfalls, which require further investigations. ", doi="10.2196/21695", url="https://www.jmir.org/2021/3/e21695", url="http://www.ncbi.nlm.nih.gov/pubmed/33764307" } @Article{info:doi/10.2196/23328, author="Park, Young Ho and Bae, Hyun-Jin and Hong, Gil-Sun and Kim, Minjee and Yun, JiHye and Park, Sungwon and Chung, Jung Won and Kim, NamKug", title="Realistic High-Resolution Body Computed Tomography Image Synthesis by Using Progressive Growing Generative Adversarial Network: Visual Turing Test", journal="JMIR Med Inform", year="2021", month="Mar", day="17", volume="9", number="3", pages="e23328", keywords="generative adversarial network", keywords="unsupervised deep learning", keywords="computed tomography", keywords="synthetic body images", keywords="visual Turing test", abstract="Background: Generative adversarial network (GAN)--based synthetic images can be viable solutions to current supervised deep learning challenges. However, generating highly realistic images is a prerequisite for these approaches. Objective: The aim of this study was to investigate and validate the unsupervised synthesis of highly realistic body computed tomography (CT) images by using a progressive growing GAN (PGGAN) trained to learn the probability distribution of normal data. Methods: We trained the PGGAN by using 11,755 body CT scans. Ten radiologists (4 radiologists with <5 years of experience [Group I], 4 radiologists with 5-10 years of experience [Group II], and 2 radiologists with >10 years of experience [Group III]) evaluated the results in a binary approach by using an independent validation set of 300 images (150 real and 150 synthetic) to judge the authenticity of each image. Results: The mean accuracy of the 10 readers in the entire image set was higher than random guessing (1781/3000, 59.4\% vs 1500/3000, 50.0\%, respectively; P<.001). However, in terms of identifying synthetic images as fake, there was no significant difference in the specificity between the visual Turing test and random guessing (779/1500, 51.9\% vs 750/1500, 50.0\%, respectively; P=.29). The accuracy between the 3 reader groups with different experience levels was not significantly different (Group I, 696/1200, 58.0\%; Group II, 726/1200, 60.5\%; and Group III, 359/600, 59.8\%; P=.36). Interreader agreements were poor ($\kappa$=0.11) for the entire image set. In subgroup analysis, the discrepancies between real and synthetic CT images occurred mainly in the thoracoabdominal junction and in the anatomical details. Conclusions: The GAN can synthesize highly realistic high-resolution body CT images that are indistinguishable from real images; however, it has limitations in generating body images of the thoracoabdominal junction and lacks accuracy in the anatomical details. ", doi="10.2196/23328", url="https://medinform.jmir.org/2021/3/e23328", url="http://www.ncbi.nlm.nih.gov/pubmed/33609339" } @Article{info:doi/10.2196/23415, author="Zhao, Zhixiang and Wu, Che-Ming and Zhang, Shuping and He, Fanping and Liu, Fangfen and Wang, Ben and Huang, Yingxue and Shi, Wei and Jian, Dan and Xie, Hongfu and Yeh, Chao-Yuan and Li, Ji", title="A Novel Convolutional Neural Network for the Diagnosis and Classification of Rosacea: Usability Study", journal="JMIR Med Inform", year="2021", month="Mar", day="15", volume="9", number="3", pages="e23415", keywords="rosacea", keywords="artificial intelligence", keywords="convolutional neural networks", abstract="Background: Rosacea is a chronic inflammatory disease with variable clinical presentations, including transient flushing, fixed erythema, papules, pustules, and phymatous changes on the central face. Owing to the diversity in the clinical manifestations of rosacea, the lack of objective biochemical examinations, and nonspecificity in histopathological findings, accurate identification of rosacea is a big challenge. Artificial intelligence has emerged as a potential tool in the identification and evaluation of some skin diseases such as melanoma, basal cell carcinoma, and psoriasis. Objective: The objective of our study was to utilize a convolutional neural network (CNN) to differentiate the clinical photos of patients with rosacea (taken from 3 different angles) from those of patients with other skin diseases such as acne, seborrheic dermatitis, and eczema that could be easily confused with rosacea. Methods: In this study, 24,736 photos comprising of 18,647 photos of patients with rosacea and 6089 photos of patients with other skin diseases such as acne, facial seborrheic dermatitis, and eczema were included and analyzed by our CNN model based on ResNet-50. Results: The CNN in our study achieved an overall accuracy and precision of 0.914 and 0.898, with an area under the receiver operating characteristic curve of 0.972 for the detection of rosacea. The accuracy of classifying 3 subtypes of rosacea, that is, erythematotelangiectatic rosacea, papulopustular rosacea, and phymatous rosacea was 83.9\%, 74.3\%, and 80.0\%, respectively. Moreover, the accuracy and precision of our CNN to distinguish rosacea from acne reached 0.931 and 0.893, respectively. For the differentiation between rosacea, seborrheic dermatitis, and eczema, the overall accuracy of our CNN was 0.757 and the precision was 0.667. Finally, by comparing the CNN diagnosis with the diagnoses by dermatologists of different expertise levels, we found that our CNN system is capable of identifying rosacea with a performance superior to that of resident doctors or attending physicians and comparable to that of experienced dermatologists. Conclusions: The findings of our study showed that by assessing clinical images, the CNN system in our study could identify rosacea with accuracy and precision comparable to that of an experienced dermatologist. ", doi="10.2196/23415", url="https://medinform.jmir.org/2021/3/e23415", url="http://www.ncbi.nlm.nih.gov/pubmed/33720027" } @Article{info:doi/10.2196/26443, author="Do Khac, Ariane and Jourdan, Claire and Fazilleau, Sylvain and Palayer, Claire and Laffont, Isabelle and Dupeyron, Arnaud and Verdun, St{\'e}phane and Gelis, Anthony", title="mHealth App for Pressure Ulcer Wound Assessment in Patients With Spinal Cord Injury: Clinical Validation Study", journal="JMIR Mhealth Uhealth", year="2021", month="Feb", day="23", volume="9", number="2", pages="e26443", keywords="mobile app", keywords="wound", keywords="pressure ulcer", keywords="assessment", keywords="validity", keywords="reliability", keywords="app", keywords="correlation", keywords="access", keywords="availability", keywords="reproducibility", abstract="Background: Clinical evaluation of a pressure ulcer is based on quantitative and qualitative evaluation. In clinical practice, acetate tracing is the standard technique used to measure wound surface area; however, it is difficult to use in daily practice (because of material availability, data storage issues, and time needed to calculate the surface area). Planimetry techniques developed with mobile health (mHealth) apps can be used to overcome these difficulties. Objective: The goal of this study was to evaluate the metrological properties of a free-access mHealth app, called imitoMeasure, to assess pressure ulcers. Methods: This was a noninterventional, validation study. We included patients with spinal cord injury presenting with a pressure ulcer, regardless of its stage or location. We performed wound measurements with a ruler, and we performed acetate tracing using a transparent dressing with a wound measurement grid. Wound evaluation via the mHealth app was conducted twice by the main investigator and also by a coinvestigator to determine validity, intrarater reproducibility, and interrater reproducibility. Bland-Altman plots and intraclass correlation coefficients were used to compute the minimal detectable change percentage. Results: Overall, 61 different pressure ulcers were included. The validity, intrarater reproducibility, and interrater reproducibility of the mHealth app vs acetate tracing (considered the method of reference) were good, with intraclass correlation coefficients of 0.97 (95\% CI 0.93-0.99), 0.99 (95\% CI 0.98-0.99), and 0.98 (95\% CI 0.96-0.99), respectively, and minimal detectable change percentages between 17\% and 35\%. Conclusions: The imitoMeasure app had good validity and reproducibility. It could be an alternative to standard wound assessment methods. Further studies on larger and more diverse wounds are needed. Trial Registration: ClinicalTrials.gov NCT04402398; http://clinicaltrials.gov/ct2/show/NCT04402398 ", doi="10.2196/26443", url="https://mhealth.jmir.org/2021/2/e26443", url="http://www.ncbi.nlm.nih.gov/pubmed/33620327" } @Article{info:doi/10.2196/24266, author="Giaretto, Simone and Renne, Lorenzo Salvatore and Rahal, Daoud and Bossi, Paola and Colombo, Piergiuseppe and Spaggiari, Paola and Manara, Sofia and Sollai, Mauro and Fiamengo, Barbara and Brambilla, Tatiana and Fernandes, Bethania and Rao, Stefania and Elamin, Abubaker and Valeri, Marina and De Carlo, Camilla and Belsito, Vincenzo and Lancellotti, Cesare and Cieri, Miriam and Cagini, Angelo and Terracciano, Luigi and Roncalli, Massimo and Di Tommaso, Luca", title="Digital Pathology During the COVID-19 Outbreak in Italy: Survey Study", journal="J Med Internet Res", year="2021", month="Feb", day="22", volume="23", number="2", pages="e24266", keywords="COVID19", keywords="digital pathology", keywords="Bayesian data analysis", keywords="probabilistic modeling", abstract="Background: Transition to digital pathology usually takes months or years to be completed. We were familiarizing ourselves with digital pathology solutions at the time when the COVID-19 outbreak forced us to embark on an abrupt transition to digital pathology. Objective: The aim of this study was to quantitatively describe how the abrupt transition to digital pathology might affect the quality of diagnoses, model possible causes by probabilistic modeling, and qualitatively gauge the perception of this abrupt transition. Methods: A total of 17 pathologists and residents participated in this study; these participants reviewed 25 additional test cases from the archives and completed a final psychologic survey. For each case, participants performed several different diagnostic tasks, and their results were recorded and compared with the original diagnoses performed using the gold standard method (ie, conventional microscopy). We performed Bayesian data analysis with probabilistic modeling. Results: The overall analysis, comprising 1345 different items, resulted in a 9\% (117/1345) error rate in using digital slides. The task of differentiating a neoplastic process from a nonneoplastic one accounted for an error rate of 10.7\% (42/392), whereas the distinction of a malignant process from a benign one accounted for an error rate of 4.2\% (11/258). Apart from residents, senior pathologists generated most discrepancies (7.9\%, 13/164). Our model showed that these differences among career levels persisted even after adjusting for other factors. Conclusions: Our findings are in line with previous findings, emphasizing that the duration of transition (ie, lengthy or abrupt) might not influence the diagnostic performance. Moreover, our findings highlight that senior pathologists may be limited by a digital gap, which may negatively affect their performance with digital pathology. These results can guide the process of digital transition in the field of pathology. ", doi="10.2196/24266", url="https://www.jmir.org/2021/2/e24266", url="http://www.ncbi.nlm.nih.gov/pubmed/33503002" } @Article{info:doi/10.2196/21037, author="Abrami, Avner and Gunzler, Steven and Kilbane, Camilla and Ostrand, Rachel and Ho, Bryan and Cecchi, Guillermo", title="Automated Computer Vision Assessment of Hypomimia in Parkinson Disease: Proof-of-Principle Pilot Study", journal="J Med Internet Res", year="2021", month="Feb", day="22", volume="23", number="2", pages="e21037", keywords="Parkinson disease", keywords="hypomimia", keywords="computer vision", keywords="telemedicine", abstract="Background: Facial expressions require the complex coordination of 43 different facial muscles. Parkinson disease (PD) affects facial musculature leading to ``hypomimia'' or ``masked facies.'' Objective: We aimed to determine whether modern computer vision techniques can be applied to detect masked facies and quantify drug states in PD. Methods: We trained a convolutional neural network on images extracted from videos of 107 self-identified people with PD, along with 1595 videos of controls, in order to detect PD hypomimia cues. This trained model was applied to clinical interviews of 35 PD patients in their on and off drug motor states, and seven journalist interviews of the actor Alan Alda obtained before and after he was diagnosed with PD. Results: The algorithm achieved a test set area under the receiver operating characteristic curve of 0.71 on 54 subjects to detect PD hypomimia, compared to a value of 0.75 for trained neurologists using the United Parkinson Disease Rating Scale-III Facial Expression score. Additionally, the model accuracy to classify the on and off drug states in the clinical samples was 63\% (22/35), in contrast to an accuracy of 46\% (16/35) when using clinical rater scores. Finally, each of Alan Alda's seven interviews were successfully classified as occurring before (versus after) his diagnosis, with 100\% accuracy (7/7). Conclusions: This proof-of-principle pilot study demonstrated that computer vision holds promise as a valuable tool for PD hypomimia and for monitoring a patient's motor state in an objective and noninvasive way, particularly given the increasing importance of telemedicine. ", doi="10.2196/21037", url="https://www.jmir.org/2021/2/e21037", url="http://www.ncbi.nlm.nih.gov/pubmed/33616535" } @Article{info:doi/10.2196/24572, author="Quiroz, Carlos Juan and Feng, You-Zhen and Cheng, Zhong-Yuan and Rezazadegan, Dana and Chen, Ping-Kang and Lin, Qi-Ting and Qian, Long and Liu, Xiao-Fang and Berkovsky, Shlomo and Coiera, Enrico and Song, Lei and Qiu, Xiaoming and Liu, Sidong and Cai, Xiang-Ran", title="Development and Validation of a Machine Learning Approach for Automated Severity Assessment of COVID-19 Based on Clinical and Imaging Data: Retrospective Study", journal="JMIR Med Inform", year="2021", month="Feb", day="11", volume="9", number="2", pages="e24572", keywords="algorithm", keywords="clinical data", keywords="clinical features", keywords="COVID-19", keywords="CT scans", keywords="development", keywords="imaging", keywords="imbalanced data", keywords="machine learning", keywords="oversampling", keywords="severity assessment", keywords="validation", abstract="Background: COVID-19 has overwhelmed health systems worldwide. It is important to identify severe cases as early as possible, such that resources can be mobilized and treatment can be escalated. Objective: This study aims to develop a machine learning approach for automated severity assessment of COVID-19 based on clinical and imaging data. Methods: Clinical data---including demographics, signs, symptoms, comorbidities, and blood test results---and chest computed tomography scans of 346 patients from 2 hospitals in the Hubei Province, China, were used to develop machine learning models for automated severity assessment in diagnosed COVID-19 cases. We compared the predictive power of the clinical and imaging data from multiple machine learning models and further explored the use of four oversampling methods to address the imbalanced classification issue. Features with the highest predictive power were identified using the Shapley Additive Explanations framework. Results: Imaging features had the strongest impact on the model output, while a combination of clinical and imaging features yielded the best performance overall. The identified predictive features were consistent with those reported previously. Although oversampling yielded mixed results, it achieved the best model performance in our study. Logistic regression models differentiating between mild and severe cases achieved the best performance for clinical features (area under the curve [AUC] 0.848; sensitivity 0.455; specificity 0.906), imaging features (AUC 0.926; sensitivity 0.818; specificity 0.901), and a combination of clinical and imaging features (AUC 0.950; sensitivity 0.764; specificity 0.919). The synthetic minority oversampling method further improved the performance of the model using combined features (AUC 0.960; sensitivity 0.845; specificity 0.929). Conclusions: Clinical and imaging features can be used for automated severity assessment of COVID-19 and can potentially help triage patients with COVID-19 and prioritize care delivery to those at a higher risk of severe disease. ", doi="10.2196/24572", url="http://medinform.jmir.org/2021/2/e24572/", url="http://www.ncbi.nlm.nih.gov/pubmed/33534723" } @Article{info:doi/10.2196/23693, author="Albahli, Saleh and Yar, Hassan Ghulam Nabi Ahmad", title="Fast and Accurate Detection of COVID-19 Along With 14 Other Chest Pathologies Using a Multi-Level Classification: Algorithm Development and Validation Study", journal="J Med Internet Res", year="2021", month="Feb", day="10", volume="23", number="2", pages="e23693", keywords="COVID-19", keywords="chest x-ray", keywords="convolutional neural network", keywords="data augmentation", keywords="biomedical imaging", keywords="automatic detection", abstract="Background: COVID-19 has spread very rapidly, and it is important to build a system that can detect it in order to help an overwhelmed health care system. Many research studies on chest diseases rely on the strengths of deep learning techniques. Although some of these studies used state-of-the-art techniques and were able to deliver promising results, these techniques are not very useful if they can detect only one type of disease without detecting the others. Objective: The main objective of this study was to achieve a fast and more accurate diagnosis of COVID-19. This study proposes a diagnostic technique that classifies COVID-19 x-ray images from normal x-ray images and those specific to 14 other chest diseases. Methods: In this paper, we propose a novel, multilevel pipeline, based on deep learning models, to detect COVID-19 along with other chest diseases based on x-ray images. This pipeline reduces the burden of a single network to classify a large number of classes. The deep learning models used in this study were pretrained on the ImageNet dataset, and transfer learning was used for fast training. The lungs and heart were segmented from the whole x-ray images and passed onto the first classifier that checks whether the x-ray is normal, COVID-19 affected, or characteristic of another chest disease. If it is neither a COVID-19 x-ray image nor a normal one, then the second classifier comes into action and classifies the image as one of the other 14 diseases. Results: We show how our model uses state-of-the-art deep neural networks to achieve classification accuracy for COVID-19 along with 14 other chest diseases and normal cases based on x-ray images, which is competitive with currently used state-of-the-art models. Due to the lack of data in some classes such as COVID-19, we applied 10-fold cross-validation through the ResNet50 model. Our classification technique thus achieved an average training accuracy of 96.04\% and test accuracy of 92.52\% for the first level of classification (ie, 3 classes). For the second level of classification (ie, 14 classes), our technique achieved a maximum training accuracy of 88.52\% and test accuracy of 66.634\% by using ResNet50. We also found that when all the 16 classes were classified at once, the overall accuracy for COVID-19 detection decreased, which in the case of ResNet50 was 88.92\% for training data and 71.905\% for test data. Conclusions: Our proposed pipeline can detect COVID-19 with a higher accuracy along with detecting 14 other chest diseases based on x-ray images. This is achieved by dividing the classification task into multiple steps rather than classifying them collectively. ", doi="10.2196/23693", url="http://www.jmir.org/2021/2/e23693/", url="http://www.ncbi.nlm.nih.gov/pubmed/33529154" } @Article{info:doi/10.2196/22164, author="Bhalodiya, Maganbhai Jayendra and Palit, Arnab and Giblin, Gerard and Tiwari, Kumar Manoj and Prasad, K. Sanjay and Bhudia, K. Sunil and Arvanitis, N. Theodoros and Williams, A. Mark", title="Identifying Myocardial Infarction Using Hierarchical Template Matching--Based Myocardial Strain: Algorithm Development and Usability Study", journal="JMIR Med Inform", year="2021", month="Feb", day="10", volume="9", number="2", pages="e22164", keywords="left ventricle", keywords="myocardial infarction", keywords="myocardium", keywords="strain", abstract="Background: Myocardial infarction (MI; location and extent of infarction) can be determined by late enhancement cardiac magnetic resonance (CMR) imaging, which requires the injection of a potentially harmful gadolinium-based contrast agent (GBCA). Alternatively, emerging research in the area of myocardial strain has shown potential to identify MI using strain values. Objective: This study aims to identify the location of MI by developing an applied algorithmic method of circumferential strain (CS) values, which are derived through a novel hierarchical template matching (HTM) method. Methods: HTM-based CS H-spread from end-diastole to end-systole was used to develop an applied method. Grid-tagging magnetic resonance imaging was used to calculate strain values in the left ventricular (LV) myocardium, followed by the 16-segment American Heart Association model. The data set was used with k-fold cross-validation to estimate the percentage reduction of H-spread among infarcted and noninfarcted LV segments. A total of 43 participants (38 MI and 5 healthy) who underwent CMR imaging were retrospectively selected. Infarcted segments detected by using this method were validated by comparison with late enhancement CMR, and the diagnostic performance of the applied algorithmic method was evaluated with a receiver operating characteristic curve test. Results: The H-spread of the CS was reduced in infarcted segments compared with noninfarcted segments of the LV. The reductions were 30\% in basal segments, 30\% in midventricular segments, and 20\% in apical LV segments. The diagnostic accuracy of detection, using the reported method, was represented by area under the curve values, which were 0.85, 0.82, and 0.87 for basal, midventricular, and apical slices, respectively, demonstrating good agreement with the late-gadolinium enhancement--based detections. Conclusions: The proposed applied algorithmic method has the potential to accurately identify the location of infarcted LV segments without the administration of late-gadolinium enhancement. Such an approach adds the potential to safely identify MI, potentially reduce patient scanning time, and extend the utility of CMR in patients who are contraindicated for the use of GBCA. ", doi="10.2196/22164", url="https://medinform.jmir.org/2021/2/e22164", url="http://www.ncbi.nlm.nih.gov/pubmed/33565992" } @Article{info:doi/10.2196/23436, author="Schmitt, Max and Maron, Christoph Roman and Hekler, Achim and Stenzinger, Albrecht and Hauschild, Axel and Weichenthal, Michael and Tiemann, Markus and Krahl, Dieter and Kutzner, Heinz and Utikal, Sven Jochen and Haferkamp, Sebastian and Kather, Nikolas Jakob and Klauschen, Frederick and Krieghoff-Henning, Eva and Fr{\"o}hling, Stefan and von Kalle, Christof and Brinker, Josef Titus", title="Hidden Variables in Deep Learning Digital Pathology and Their Potential to Cause Batch Effects: Prediction Model Study", journal="J Med Internet Res", year="2021", month="Feb", day="2", volume="23", number="2", pages="e23436", keywords="artificial intelligence", keywords="machine learning", keywords="deep learning", keywords="neural networks", keywords="convolutional neural networks", keywords="pathology", keywords="clinical pathology", keywords="digital pathology", keywords="pitfalls", keywords="artifacts", abstract="Background: An increasing number of studies within digital pathology show the potential of artificial intelligence (AI) to diagnose cancer using histological whole slide images, which requires large and diverse data sets. While diversification may result in more generalizable AI-based systems, it can also introduce hidden variables. If neural networks are able to distinguish/learn hidden variables, these variables can introduce batch effects that compromise the accuracy of classification systems. Objective: The objective of the study was to analyze the learnability of an exemplary selection of hidden variables (patient age, slide preparation date, slide origin, and scanner type) that are commonly found in whole slide image data sets in digital pathology and could create batch effects. Methods: We trained four separate convolutional neural networks (CNNs) to learn four variables using a data set of digitized whole slide melanoma images from five different institutes. For robustness, each CNN training and evaluation run was repeated multiple times, and a variable was only considered learnable if the lower bound of the 95\% confidence interval of its mean balanced accuracy was above 50.0\%. Results: A mean balanced accuracy above 50.0\% was achieved for all four tasks, even when considering the lower bound of the 95\% confidence interval. Performance between tasks showed wide variation, ranging from 56.1\% (slide preparation date) to 100\% (slide origin). Conclusions: Because all of the analyzed hidden variables are learnable, they have the potential to create batch effects in dermatopathology data sets, which negatively affect AI-based classification systems. Practitioners should be aware of these and similar pitfalls when developing and evaluating such systems and address these and potentially other batch effect variables in their data sets through sufficient data set stratification. ", doi="10.2196/23436", url="https://www.jmir.org/2021/2/e23436", url="http://www.ncbi.nlm.nih.gov/pubmed/33528370" } @Article{info:doi/10.2196/24973, author="Ho, Thi Thao and Park, Jongmin and Kim, Taewoo and Park, Byunggeon and Lee, Jaehee and Kim, Young Jin and Kim, Beom Ki and Choi, Sooyoung and Kim, Hwan Young and Lim, Jae-Kwang and Choi, Sanghun", title="Deep Learning Models for Predicting Severe Progression in COVID-19-Infected Patients: Retrospective Study", journal="JMIR Med Inform", year="2021", month="Jan", day="28", volume="9", number="1", pages="e24973", keywords="COVID-19", keywords="deep learning", keywords="artificial neural network", keywords="convolutional neural network", keywords="lung CT", abstract="Background: Many COVID-19 patients rapidly progress to respiratory failure with a broad range of severities. Identification of high-risk cases is critical for early intervention. Objective: The aim of this study is to develop deep learning models that can rapidly identify high-risk COVID-19 patients based on computed tomography (CT) images and clinical data. Methods: We analyzed 297 COVID-19 patients from five hospitals in Daegu, South Korea. A mixed artificial convolutional neural network (ACNN) model, combining an artificial neural network for clinical data and a convolutional neural network for 3D CT imaging data, was developed to classify these cases as either high risk of severe progression (ie, event) or low risk (ie, event-free). Results: Using the mixed ACNN model, we were able to obtain high classification performance using novel coronavirus pneumonia lesion images (ie, 93.9\% accuracy, 80.8\% sensitivity, 96.9\% specificity, and 0.916 area under the curve [AUC] score) and lung segmentation images (ie, 94.3\% accuracy, 74.7\% sensitivity, 95.9\% specificity, and 0.928 AUC score) for event versus event-free groups. Conclusions: Our study successfully differentiated high-risk cases among COVID-19 patients using imaging and clinical features. The developed model can be used as a predictive tool for interventions in aggressive therapies. ", doi="10.2196/24973", url="http://medinform.jmir.org/2021/1/e24973/", url="http://www.ncbi.nlm.nih.gov/pubmed/33455900" } @Article{info:doi/10.2196/21926, author="Bahador, Nooshin and Ferreira, Denzil and Tamminen, Satu and Kortelainen, Jukka", title="Deep Learning--Based Multimodal Data Fusion: Case Study in Food Intake Episodes Detection Using Wearable Sensors", journal="JMIR Mhealth Uhealth", year="2021", month="Jan", day="28", volume="9", number="1", pages="e21926", keywords="deep learning", keywords="image processing", keywords="data fusion", keywords="covariance distribution", keywords="food intake episode", keywords="wearable sensors", abstract="Background: Multimodal wearable technologies have brought forward wide possibilities in human activity recognition, and more specifically personalized monitoring of eating habits. The emerging challenge now is the selection of most discriminative information from high-dimensional data collected from multiple sources. The available fusion algorithms with their complex structure are poorly adopted to the computationally constrained environment which requires integrating information directly at the source. As a result, more simple low-level fusion methods are needed. Objective: In the absence of a data combining process, the cost of directly applying high-dimensional raw data to a deep classifier would be computationally expensive with regard to the response time, energy consumption, and memory requirement. Taking this into account, we aimed to develop a data fusion technique in a computationally efficient way to achieve a more comprehensive insight of human activity dynamics in a lower dimension. The major objective was considering statistical dependency of multisensory data and exploring intermodality correlation patterns for different activities. Methods: In this technique, the information in time (regardless of the number of sources) is transformed into a 2D space that facilitates classification of eating episodes from others. This is based on a hypothesis that data captured by various sensors are statistically associated with each other and the covariance matrix of all these signals has a unique distribution correlated with each activity which can be encoded on a contour representation. These representations are then used as input of a deep model to learn specific patterns associated with specific activity. Results: In order to show the generalizability of the proposed fusion algorithm, 2 different scenarios were taken into account. These scenarios were different in terms of temporal segment size, type of activity, wearable device, subjects, and deep learning architecture. The first scenario used a data set in which a single participant performed a limited number of activities while wearing the Empatica E4 wristband. In the second scenario, a data set related to the activities of daily living was used where 10 different participants wore inertial measurement units while performing a more complex set of activities. The precision metric obtained from leave-one-subject-out cross-validation for the second scenario reached 0.803. The impact of missing data on performance degradation was also evaluated. Conclusions: To conclude, the proposed fusion technique provides the possibility of embedding joint variability information over different modalities in just a single 2D representation which results in obtaining a more global view of different aspects of daily human activities at hand, and yet preserving the desired performance level in activity recognition. ", doi="10.2196/21926", url="http://mhealth.jmir.org/2021/1/e21926/", url="http://www.ncbi.nlm.nih.gov/pubmed/33507156" } @Article{info:doi/10.2196/19346, author="Taeger, Johannes and Bischoff, Stefanie and Hagen, Rudolf and Rak, Kristen", title="Utilization of Smartphone Depth Mapping Cameras for App-Based Grading of Facial Movement Disorders: Development and Feasibility Study", journal="JMIR Mhealth Uhealth", year="2021", month="Jan", day="26", volume="9", number="1", pages="e19346", keywords="facial nerve", keywords="facial palsy", keywords="app development", keywords="medical informatics", keywords="eHealth", keywords="mHealth", keywords="Stennert's index", keywords="depth mapping camera", keywords="smartphone sensors", abstract="Background: For the classification of facial paresis, various systems of description and evaluation in the form of clinician-graded or software-based scoring systems are available. They serve the purpose of scientific and clinical assessment of the spontaneous course of the disease or monitoring therapeutic interventions. Nevertheless, none have been able to achieve universal acceptance in everyday clinical practice. Hence, a quick and precise tool for assessing the functional status of the facial nerve would be desirable. In this context, the possibilities that the TrueDepth camera of recent iPhone models offer have sparked our interest. Objective: This paper describes the utilization of the iPhone's TrueDepth camera via a specially developed app prototype for quick, objective, and reproducible quantification of facial asymmetries. Methods: After conceptual and user interface design, a native app prototype for iOS was programmed that accesses and processes the data of the TrueDepth camera. Using a special algorithm, a new index for the grading of unilateral facial paresis ranging from 0\% to 100\% was developed. The algorithm was adapted to the well-established Stennert index by weighting the individual facial regions based on functional and cosmetic aspects. Test measurements with healthy subjects using the app were performed in order to prove the reliability of the system. Results: After the development process, the app prototype had no runtime or buildtime errors and also worked under suboptimal conditions such as different measurement angles, so it met our criteria for a safe and reliable app. The newly defined index expresses the result of the measurements as a generally understandable percentage value for each half of the face. The measurements that correctly rated the facial expressions of healthy individuals as symmetrical in all cases were reproducible and showed no statistically significant intertest variability. Conclusions: Based on the experience with the app prototype assessing healthy subjects, the use of the TrueDepth camera should have considerable potential for app-based grading of facial movement disorders. The app and its algorithm, which is based on theoretical considerations, should be evaluated in a prospective clinical study and correlated with common facial scores. ", doi="10.2196/19346", url="http://mhealth.jmir.org/2021/1/e19346/", url="http://www.ncbi.nlm.nih.gov/pubmed/33496670" } @Article{info:doi/10.2196/22739, author="Jeong, Uk Yeon and Yoo, Soyoung and Kim, Young-Hak and Shim, Hyun Woo", title="De-Identification of Facial Features in Magnetic Resonance Images: Software Development Using Deep Learning Technology", journal="J Med Internet Res", year="2020", month="Dec", day="10", volume="22", number="12", pages="e22739", keywords="de-identification", keywords="privacy protection", keywords="personal information protection", keywords="medical image", keywords="deep learning", keywords="facial feature detection", keywords="HIPAA", keywords="GDPR", abstract="Background: High-resolution medical images that include facial regions can be used to recognize the subject's face when reconstructing 3-dimensional (3D)-rendered images from 2-dimensional (2D) sequential images, which might constitute a risk of infringement of personal information when sharing data. According to the Health Insurance Portability and Accountability Act (HIPAA) privacy rules, full-face photographic images and any comparable image are direct identifiers and considered as protected health information. Moreover, the General Data Protection Regulation (GDPR) categorizes facial images as biometric data and stipulates that special restrictions should be placed on the processing of biometric data. Objective: This study aimed to develop software that can remove the header information from Digital Imaging and Communications in Medicine (DICOM) format files and facial features (eyes, nose, and ears) at the 2D sliced-image level to anonymize personal information in medical images. Methods: A total of 240 cranial magnetic resonance (MR) images were used to train the deep learning model (144, 48, and 48 for the training, validation, and test sets, respectively, from the Alzheimer's Disease Neuroimaging Initiative [ADNI] database). To overcome the small sample size problem, we used a data augmentation technique to create 576 images per epoch. We used attention-gated U-net for the basic structure of our deep learning model. To validate the performance of the software, we adapted an external test set comprising 100 cranial MR images from the Open Access Series of Imaging Studies (OASIS) database. Results: The facial features (eyes, nose, and ears) were successfully detected and anonymized in both test sets (48 from ADNI and 100 from OASIS). Each result was manually validated in both the 2D image plane and the 3D-rendered images. Furthermore, the ADNI test set was verified using Microsoft Azure's face recognition artificial intelligence service. By adding a user interface, we developed and distributed (via GitHub) software named ``Deface program'' for medical images as an open-source project. Conclusions: We developed deep learning--based software for the anonymization of MR images that distorts the eyes, nose, and ears to prevent facial identification of the subject in reconstructed 3D images. It could be used to share medical big data for secondary research while making both data providers and recipients compliant with the relevant privacy regulations. ", doi="10.2196/22739", url="http://www.jmir.org/2020/12/e22739/", url="http://www.ncbi.nlm.nih.gov/pubmed/33208302" } @Article{info:doi/10.2196/20633, author="Kim, Byeol and Loke, Yue-Hin and Mass, Paige and Irwin, R. Matthew and Capeland, Conrad and Olivieri, Laura and Krieger, Axel", title="A Novel Virtual Reality Medical Image Display System for Group Discussions of Congenital Heart Disease: Development and Usability Testing", journal="JMIR Cardio", year="2020", month="Dec", day="8", volume="4", number="1", pages="e20633", keywords="virtual reality", keywords="cardiac diagnostics", keywords="usability study", keywords="congenital heart disease", keywords="group collaboration", abstract="Background: The complex 3-dimensional (3D) nature of anatomical abnormalities in congenital heart disease (CHD) necessitates multidisciplinary group discussions centered around the review of medical images such as magnetic resonance imaging. Currently, group viewings of medical images are constrained to 2-dimensional (2D) cross-sectional displays of 3D scans. However, 2D display methods could introduce additional challenges since they require physicians to accurately reconstruct the images mentally into 3D anatomies for diagnosis, staging, and planning of surgery or other therapies. Virtual reality (VR) software may enhance diagnosis and care of CHD via 3D visualization of medical images. Yet, present-day VR developments for medicine lack the emphasis on multiuser collaborative environments, and the effect of displays and level of immersion for diagnosing CHDs have not been studied. Objective: The objective of the study was to evaluate and compare the diagnostic accuracies and preferences of various display systems, including the conventional 2D display and a novel group VR software, in group discussions of CHD. Methods: A total of 22 medical trainees consisting of 1 first-year, 10 second-year, 4 third-year, and 1 fourth-year residents and 6 medical students, who volunteered for the study, were formed into groups of 4 to 5 participants. Each group discussed three diagnostic cases of CHD with varying structural complexity using conventional 2D display and group VR software. A group VR software, Cardiac Review 3D, was developed by our team using the Unity engine. By using different display hardware, VR was classified into nonimmersive and full-immersive settings. The discussion time, diagnostic accuracy score, and peer assessment were collected to capture the group and individual diagnostic performances. The diagnostic accuracies for each participant were scored by two experienced cardiologists following a predetermined answer rubric. At the end of the study, all participants were provided a survey to rank their preferences of the display systems for performing group medical discussions. Results: Diagnostic accuracies were highest when groups used the full-immersive VR compared with the conventional and nonimmersive VR ($\chi$22=9.0, P=.01) displays. Differences between the display systems were more prominent with increasing case complexity ($\chi$22=14.1, P<.001) where full-immersive VR had accuracy scores that were 54.49\% and 146.82\% higher than conventional and nonimmersive VR, respectively. The diagnostic accuracies provided by the two cardiologists for each participant did not statistically differ from each other (t=--1.01, P=.31). The full-immersive VR was ranked as the most preferred display for performing group CHD discussions by 68\% of the participants. Conclusions: The most preferred display system among medical trainees for visualizing medical images during group diagnostic discussions is full-immersive VR, with a trend toward improved diagnostic accuracy in complex anatomical abnormalities. Immersion is a crucial feature of displays of medical images for diagnostic accuracy in collaborative discussions. ", doi="10.2196/20633", url="http://cardio.jmir.org/2020/1/e20633/", url="http://www.ncbi.nlm.nih.gov/pubmed/33289675" } @Article{info:doi/10.2196/21790, author="Owais, Muhammad and Arsalan, Muhammad and Mahmood, Tahir and Kim, Hwan Yu and Park, Ryoung Kang", title="Comprehensive Computer-Aided Decision Support Framework to Diagnose Tuberculosis From Chest X-Ray Images: Data Mining Study", journal="JMIR Med Inform", year="2020", month="Dec", day="7", volume="8", number="12", pages="e21790", keywords="tuberculosis", keywords="computer-aided diagnosis", keywords="chest radiograph", keywords="lung disease", keywords="neural network", keywords="classification-based retrieval", abstract="Background: Tuberculosis (TB) is one of the most infectious diseases that can be fatal. Its early diagnosis and treatment can significantly reduce the mortality rate. In the literature, several computer-aided diagnosis (CAD) tools have been proposed for the efficient diagnosis of TB from chest radiograph (CXR) images. However, the majority of previous studies adopted conventional handcrafted feature-based algorithms. In addition, some recent CAD tools utilized the strength of deep learning methods to further enhance diagnostic performance. Nevertheless, all these existing methods can only classify a given CXR image into binary class (either TB positive or TB negative) without providing further descriptive information. Objective: The main objective of this study is to propose a comprehensive CAD framework for the effective diagnosis of TB by providing visual as well as descriptive information from the previous patients' database. Methods: To accomplish our objective, first we propose a fusion-based deep classification network for the CAD decision that exhibits promising performance over the various state-of-the-art methods. Furthermore, a multilevel similarity measure algorithm is devised based on multiscale information fusion to retrieve the best-matched cases from the previous database. Results: The performance of the framework was evaluated based on 2 well-known CXR data sets made available by the US National Library of Medicine and the National Institutes of Health. Our classification model exhibited the best diagnostic performance (0.929, 0.937, 0.921, 0.928, and 0.965 for F1 score, average precision, average recall, accuracy, and area under the curve, respectively) and outperforms the performance of various state-of-the-art methods. Conclusions: This paper presents a comprehensive CAD framework to diagnose TB from CXR images by retrieving the relevant cases and their clinical observations from the previous patients' database. These retrieval results assist the radiologist in making an effective diagnostic decision related to the current medical condition of a patient. Moreover, the retrieval results can facilitate the radiologists in subjectively validating the CAD decision. ", doi="10.2196/21790", url="http://medinform.jmir.org/2020/12/e21790/", url="http://www.ncbi.nlm.nih.gov/pubmed/33284119" } @Article{info:doi/10.2196/17150, author="O'Toole, Saoirse and Bartlett, David and Keeling, Andrew and McBride, John and Bernabe, Eduardo and Crins, Luuk and Loomans, Bas", title="Influence of Scanner Precision and Analysis Software in Quantifying Three-Dimensional Intraoral Changes: Two-Factor Factorial Experimental Design", journal="J Med Internet Res", year="2020", month="Nov", day="27", volume="22", number="11", pages="e17150", keywords="diagnostic systems", keywords="digital imaging/radiology", keywords="engineering", keywords="imaging", keywords="outcomes research", keywords="tooth wear", abstract="Background: Three-dimensional scans are increasingly used to quantify biological topographical changes and clinical health outcomes. Traditionally, the use of 3D scans has been limited to specialized centers owing to the high cost of the scanning equipment and the necessity for complex analysis software. Technological advances have made cheaper, more accessible methods of data capture and analysis available in the field of dentistry, potentially facilitating a primary care system to quantify disease progression. However, this system has yet to be compared with previous high-precision methods in university hospital settings. Objective: The aim of this study was to compare a dental primary care method of data capture (intraoral scanner) with a precision hospital-based method (laser profilometer) in addition to comparing open source and commercial software available for data analysis. Methods: Longitudinal dental wear data from 30 patients were analyzed using a two-factor factorial experimental design. Bimaxillary intraoral digital scans (TrueDefinition, 3M, UK) and conventional silicone impressions, poured in type-4 dental stone, were made at both baseline and follow-up appointments (mean 36 months, SD 10.9). Stone models were scanned using precision laser profilometry (Taicaan, Southampton, UK). Three-dimensional changes in both forms of digital scans of the first molars (n=76) were quantitatively analyzed using the engineering software Geomagic Control (3D Systems, Germany) and freeware WearCompare (Leeds Digital Dentistry, UK). Volume change (mm3) was the primary measurement outcome. The maximum point loss ($\mu$m) and the average profile loss ($\mu$m) were also recorded. Data were paired and skewed, and were therefore compared using Wilcoxon signed-rank tests with Bonferroni correction. Results: The median (IQR) volume change for Geomagic using profilometry and using the intraoral scan was --0.37 mm3 (--3.75-2.30) and +0.51 mm3 (--2.17-4.26), respectively (P<.001). Using WearCompare, the median (IQR) volume change for profilometry and intraoral scanning was --1.21 mm3 (--3.48-0.56) and --0.39 mm3 (--3.96-2.76), respectively (P=.04). WearCompare detected significantly greater volume loss than Geomagic regardless of scanner type. No differences were observed between groups with respect to the maximum point loss or average profile loss. Conclusions: As expected, the method of data capture, software used, and measurement metric all significantly influenced the measurement outcome. However, when appropriate analysis was used, the primary care system was able to quantify the degree of change and can be recommended depending on the accuracy needed to diagnose a condition. Lower-resolution scanners may underestimate complex changes when measuring at the micron level. ", doi="10.2196/17150", url="https://www.jmir.org/2020/11/e17150", url="http://www.ncbi.nlm.nih.gov/pubmed/33245280" } @Article{info:doi/10.2196/19416, author="Cheng, Chi-Tung and Chen, Chih-Chi and Cheng, Fu-Jen and Chen, Huan-Wu and Su, Yi-Siang and Yeh, Chun-Nan and Chung, I-Fang and Liao, Chien-Hung", title="A Human-Algorithm Integration System for Hip Fracture Detection on Plain Radiography: System Development and Validation Study", journal="JMIR Med Inform", year="2020", month="Nov", day="27", volume="8", number="11", pages="e19416", keywords="hip fracture", keywords="neural network", keywords="computer", keywords="artificial intelligence", keywords="algorithms", keywords="human augmentation", keywords="deep learning", keywords="diagnosis", abstract="Background: Hip fracture is the most common type of fracture in elderly individuals. Numerous deep learning (DL) algorithms for plain pelvic radiographs (PXRs) have been applied to improve the accuracy of hip fracture diagnosis. However, their efficacy is still undetermined. Objective: The objective of this study is to develop and validate a human-algorithm integration (HAI) system to improve the accuracy of hip fracture diagnosis in a real clinical environment. Methods: The HAI system with hip fracture detection ability was developed using a deep learning algorithm trained on trauma registry data and 3605 PXRs from August 2008 to December 2016. To compare their diagnostic performance before and after HAI system assistance using an independent testing dataset, 34 physicians were recruited. We analyzed the physicians' accuracy, sensitivity, specificity, and agreement with the algorithm; we also performed subgroup analyses according to physician specialty and experience. Furthermore, we applied the HAI system in the emergency departments of different hospitals to validate its value in the real world. Results: With the support of the algorithm, which achieved 91\% accuracy, the diagnostic performance of physicians was significantly improved in the independent testing dataset, as was revealed by the sensitivity (physician alone, median 95\%; HAI, median 99\%; P<.001), specificity (physician alone, median 90\%; HAI, median 95\%; P<.001), accuracy (physician alone, median 90\%; HAI, median 96\%; P<.001), and human-algorithm agreement [physician alone $\kappa$, median 0.69 (IQR 0.63-0.74); HAI $\kappa$, median 0.80 (IQR 0.76-0.82); P<.001. With the help of the HAI system, the primary physicians showed significant improvement in their diagnostic performance to levels comparable to those of consulting physicians, and both the experienced and less-experienced physicians benefited from the HAI system. After the HAI system had been applied in 3 departments for 5 months, 587 images were examined. The sensitivity, specificity, and accuracy of the HAI system for detecting hip fractures were 97\%, 95.7\%, and 96.08\%, respectively. Conclusions: HAI currently impacts health care, and integrating this technology into emergency departments is feasible. The developed HAI system can enhance physicians' hip fracture diagnostic performance. ", doi="10.2196/19416", url="http://medinform.jmir.org/2020/11/e19416/", url="http://www.ncbi.nlm.nih.gov/pubmed/33245279" } @Article{info:doi/10.2196/23472, author="Kang, Yu-Chuan Eugene and Hsieh, Yi-Ting and Li, Chien-Hung and Huang, Yi-Jin and Kuo, Chang-Fu and Kang, Je-Ho and Chen, Kuan-Jen and Lai, Chi-Chun and Wu, Wei-Chi and Hwang, Yih-Shiou", title="Deep Learning--Based Detection of Early Renal Function Impairment Using Retinal Fundus Images: Model Development and Validation", journal="JMIR Med Inform", year="2020", month="Nov", day="26", volume="8", number="11", pages="e23472", keywords="deep learning", keywords="renal function", keywords="retinal fundus image", keywords="diabetes", keywords="renal", keywords="kidney", keywords="retinal", keywords="eye", keywords="imaging", keywords="impairment", keywords="detection", keywords="development", keywords="validation", keywords="model", abstract="Background: Retinal imaging has been applied for detecting eye diseases and cardiovascular risks using deep learning--based methods. Furthermore, retinal microvascular and structural changes were found in renal function impairments. However, a deep learning--based method using retinal images for detecting early renal function impairment has not yet been well studied. Objective: This study aimed to develop and evaluate a deep learning model for detecting early renal function impairment using retinal fundus images. Methods: This retrospective study enrolled patients who underwent renal function tests with color fundus images captured at any time between January 1, 2001, and August 31, 2019. A deep learning model was constructed to detect impaired renal function from the images. Early renal function impairment was defined as estimated glomerular filtration rate <90 mL/min/1.73 m2. Model performance was evaluated with respect to the receiver operating characteristic curve and area under the curve (AUC). Results: In total, 25,706 retinal fundus images were obtained from 6212 patients for the study period. The images were divided at an 8:1:1 ratio. The training, validation, and testing data sets respectively contained 20,787, 2189, and 2730 images from 4970, 621, and 621 patients. There were 10,686 and 15,020 images determined to indicate normal and impaired renal function, respectively. The AUC of the model was 0.81 in the overall population. In subgroups stratified by serum hemoglobin A1c (HbA1c) level, the AUCs were 0.81, 0.84, 0.85, and 0.87 for the HbA1c levels of ?6.5\%, >6.5\%, >7.5\%, and >10\%, respectively. Conclusions: The deep learning model in this study enables the detection of early renal function impairment using retinal fundus images. The model was more accurate for patients with elevated serum HbA1c levels. ", doi="10.2196/23472", url="http://medinform.jmir.org/2020/11/e23472/", url="http://www.ncbi.nlm.nih.gov/pubmed/33139242" } @Article{info:doi/10.2196/18563, author="Owais, Muhammad and Arsalan, Muhammad and Mahmood, Tahir and Kang, Kyu Jin and Park, Ryoung Kang", title="Automated Diagnosis of Various Gastrointestinal Lesions Using a Deep Learning--Based Classification and Retrieval Framework With a Large Endoscopic Database: Model Development and Validation", journal="J Med Internet Res", year="2020", month="Nov", day="26", volume="22", number="11", pages="e18563", keywords="artificial intelligence", keywords="endoscopic video retrieval", keywords="content-based medical image retrieval", keywords="polyp detection", keywords="deep learning", keywords="computer-aided diagnosis", abstract="Background: The early diagnosis of various gastrointestinal diseases can lead to effective treatment and reduce the risk of many life-threatening conditions. Unfortunately, various small gastrointestinal lesions are undetectable during early-stage examination by medical experts. In previous studies, various deep learning--based computer-aided diagnosis tools have been used to make a significant contribution to the effective diagnosis and treatment of gastrointestinal diseases. However, most of these methods were designed to detect a limited number of gastrointestinal diseases, such as polyps, tumors, or cancers, in a specific part of the human gastrointestinal tract. Objective: This study aimed to develop a comprehensive computer-aided diagnosis tool to assist medical experts in diagnosing various types of gastrointestinal diseases. Methods: Our proposed framework comprises a deep learning--based classification network followed by a retrieval method. In the first step, the classification network predicts the disease type for the current medical condition. Then, the retrieval part of the framework shows the relevant cases (endoscopic images) from the previous database. These past cases help the medical expert validate the current computer prediction subjectively, which ultimately results in better diagnosis and treatment. Results: All the experiments were performed using 2 endoscopic data sets with a total of 52,471 frames and 37 different classes. The optimal performances obtained by our proposed method in accuracy, F1 score, mean average precision, and mean average recall were 96.19\%, 96.99\%, 98.18\%, and 95.86\%, respectively. The overall performance of our proposed diagnostic framework substantially outperformed state-of-the-art methods. Conclusions: This study provides a comprehensive computer-aided diagnosis framework for identifying various types of gastrointestinal diseases. The results show the superiority of our proposed method over various other recent methods and illustrate its potential for clinical diagnosis and treatment. Our proposed network can be applicable to other classification domains in medical imaging, such as computed tomography scans, magnetic resonance imaging, and ultrasound sequences. ", doi="10.2196/18563", url="http://www.jmir.org/2020/11/e18563/", url="http://www.ncbi.nlm.nih.gov/pubmed/33242010" } @Article{info:doi/10.2196/20031, author="Tsai, FS Vincent and Zhuang, Bin and Pong, Yuan-Hung and Hsieh, Ju-Ton and Chang, Hong-Chiang", title="Web- and Artificial Intelligence--Based Image Recognition For Sperm Motility Analysis: Verification Study", journal="JMIR Med Inform", year="2020", month="Nov", day="19", volume="8", number="11", pages="e20031", keywords="Male infertility", keywords="semen analysis", keywords="home sperm test", keywords="smartphone", keywords="artificial intelligence", keywords="cloud computing", keywords="telemedicine", abstract="Background: Human sperm quality fluctuates over time. Therefore, it is crucial for couples preparing for natural pregnancy to monitor sperm motility. Objective: This study verified the performance of an artificial intelligence--based image recognition and cloud computing sperm motility testing system (Bemaner, Createcare) composed of microscope and microfluidic modules and designed to adapt to different types of smartphones. Methods: Sperm videos were captured and uploaded to the cloud with an app. Analysis of sperm motility was performed by an artificial intelligence--based image recognition algorithm then results were displayed. According to the number of motile sperm in the vision field, 47 (deidentified) videos of sperm were scored using 6 grades (0-5) by a male-fertility expert with 10 years of experience. Pearson product-moment correlation was calculated between the grades and the results (concentration of total sperm, concentration of motile sperm, and motility percentage) computed by the system. Results: Good correlation was demonstrated between the grades and results computed by the system for concentration of total sperm (r=0.65, P<.001), concentration of motile sperm (r=0.84, P<.001), and motility percentage (r=0.90, P<.001). Conclusions: This smartphone-based sperm motility test (Bemaner) accurately measures motility-related parameters and could potentially be applied toward the following fields: male infertility detection, sperm quality test during preparation for pregnancy, and infertility treatment monitoring. With frequent at-home testing, more data can be collected to help make clinical decisions and to conduct epidemiological research. ", doi="10.2196/20031", url="http://medinform.jmir.org/2020/11/e20031/", url="http://www.ncbi.nlm.nih.gov/pubmed/33211025" } @Article{info:doi/10.2196/21604, author="Li, Daowei and Zhang, Qiang and Tan, Yue and Feng, Xinghuo and Yue, Yuanyi and Bai, Yuhan and Li, Jimeng and Li, Jiahang and Xu, Youjun and Chen, Shiyu and Xiao, Si-Yu and Sun, Muyan and Li, Xiaona and Zhu, Fang", title="Prediction of COVID-19 Severity Using Chest Computed Tomography and Laboratory Measurements: Evaluation Using a Machine Learning Approach", journal="JMIR Med Inform", year="2020", month="Nov", day="17", volume="8", number="11", pages="e21604", keywords="COVID-19", keywords="severe case prediction", keywords="computerized tomography", keywords="machine learning", keywords="CT", keywords="scan", keywords="detection", keywords="prediction", keywords="model", abstract="Background: Most of the mortality resulting from COVID-19 has been associated with severe disease. Effective treatment of severe cases remains a challenge due to the lack of early detection of the infection. Objective: This study aimed to develop an effective prediction model for COVID-19 severity by combining radiological outcome with clinical biochemical indexes. Methods: A total of 46 patients with COVID-19 (10 severe, 36 nonsevere) were examined. To build the prediction model, a set of 27 severe and 151 nonsevere clinical laboratory records and computerized tomography (CT) records were collected from these patients. We managed to extract specific features from the patients' CT images by using a recently published convolutional neural network. We also trained a machine learning model combining these features with clinical laboratory results. Results: We present a prediction model combining patients' radiological outcomes with their clinical biochemical indexes to identify severe COVID-19 cases. The prediction model yielded a cross-validated area under the receiver operating characteristic (AUROC) score of 0.93 and an F1 score of 0.89, which showed a 6\% and 15\% improvement, respectively, compared to the models based on laboratory test features only. In addition, we developed a statistical model for forecasting COVID-19 severity based on the results of patients' laboratory tests performed before they were classified as severe cases; this model yielded an AUROC score of 0.81. Conclusions: To our knowledge, this is the first report predicting the clinical progression of COVID-19, as well as forecasting severity, based on a combined analysis using laboratory tests and CT images. ", doi="10.2196/21604", url="http://medinform.jmir.org/2020/11/e21604/", url="http://www.ncbi.nlm.nih.gov/pubmed/33038076" } @Article{info:doi/10.2196/19805, author="Gao, Yang and Xiao, Xiong and Han, Bangcheng and Li, Guilin and Ning, Xiaolin and Wang, Defeng and Cai, Weidong and Kikinis, Ron and Berkovsky, Shlomo and Di Ieva, Antonio and Zhang, Liwei and Ji, Nan and Liu, Sidong", title="Deep Learning Methodology for Differentiating Glioma Recurrence From Radiation Necrosis Using Multimodal Magnetic Resonance Imaging: Algorithm Development and Validation", journal="JMIR Med Inform", year="2020", month="Nov", day="17", volume="8", number="11", pages="e19805", keywords="recurrent tumor", keywords="radiation necrosis", keywords="progression", keywords="pseudoprogression", keywords="multimodal MRI", keywords="deep learning", abstract="Background: The radiological differential diagnosis between tumor recurrence and radiation-induced necrosis (ie, pseudoprogression) is of paramount importance in the management of glioma patients. Objective: This research aims to develop a deep learning methodology for automated differentiation of tumor recurrence from radiation necrosis based on routine magnetic resonance imaging (MRI) scans. Methods: In this retrospective study, 146 patients who underwent radiation therapy after glioma resection and presented with suspected recurrent lesions at the follow-up MRI examination were selected for analysis. Routine MRI scans were acquired from each patient, including T1, T2, and gadolinium-contrast-enhanced T1 sequences. Of those cases, 96 (65.8\%) were confirmed as glioma recurrence on postsurgical pathological examination, while 50 (34.2\%) were diagnosed as necrosis. A light-weighted deep neural network (DNN) (ie, efficient radionecrosis neural network [ERN-Net]) was proposed to learn radiological features of gliomas and necrosis from MRI scans. Sensitivity, specificity, accuracy, and area under the curve (AUC) were used to evaluate performance of the model in both image-wise and subject-wise classifications. Preoperative diagnostic performance of the model was also compared to that of the state-of-the-art DNN models and five experienced neurosurgeons. Results: DNN models based on multimodal MRI outperformed single-modal models. ERN-Net achieved the highest AUC in both image-wise (0.915) and subject-wise (0.958) classification tasks. The evaluated DNN models achieved an average sensitivity of 0.947 (SD 0.033), specificity of 0.817 (SD 0.075), and accuracy of 0.903 (SD 0.026), which were significantly better than the tested neurosurgeons (P=.02 in sensitivity and P<.001 in specificity and accuracy). Conclusions: Deep learning offers a useful computational tool for the differential diagnosis between recurrent gliomas and necrosis. The proposed ERN-Net model, a simple and effective DNN model, achieved excellent performance on routine MRI scans and showed a high clinical applicability. ", doi="10.2196/19805", url="http://medinform.jmir.org/2020/11/e19805/", url="http://www.ncbi.nlm.nih.gov/pubmed/33200991" } @Article{info:doi/10.2196/22074, author="Alfonsi, E. Jeffrey and Choi, Y. Elizabeth E. and Arshad, Taha and Sammott, S. Stacie-Ann and Pais, Vanita and Nguyen, Cynthia and Maguire, R. Bryan and Stinson, N. Jennifer and Palmert, R. Mark", title="Carbohydrate Counting App Using Image Recognition for Youth With Type 1 Diabetes: Pilot Randomized Control Trial", journal="JMIR Mhealth Uhealth", year="2020", month="Oct", day="28", volume="8", number="10", pages="e22074", keywords="carbohydrate counting", keywords="type 1 diabetes", keywords="image recognition", keywords="youth", keywords="digital health applications (apps)", keywords="mHealth", abstract="Background: Carbohydrate counting is an important component of diabetes management, but it is challenging, often performed inaccurately, and can be a barrier to optimal diabetes management. iSpy is a novel mobile app that leverages machine learning to allow food identification through images and that was designed to assist youth with type 1 diabetes in counting carbohydrates. Objective: Our objective was to test the app's usability and potential impact on carbohydrate counting accuracy. Methods: Iterative usability testing (3 cycles) was conducted involving a total of 16 individuals aged 8.5-17.0 years with type 1 diabetes. Participants were provided a mobile device and asked to complete tasks using iSpy app features while thinking aloud. Errors were noted, acceptability was assessed, and refinement and retesting were performed across cycles. Subsequently, iSpy was evaluated in a pilot randomized controlled trial with 22 iSpy users and 22 usual care controls aged 10-17 years. Primary outcome was change in carbohydrate counting ability over 3 months. Secondary outcomes included levels of engagement and acceptability. Change in HbA1c level was also assessed. Results: Use of iSpy was associated with improved carbohydrate counting accuracy (total grams per meal, P=.008), reduced frequency of individual counting errors greater than 10 g (P=.047), and lower HbA1c levels (P=.03). Qualitative interviews and acceptability scale scores were positive. No major technical challenges were identified. Moreover, 43\% (9/21) of iSpy participants were still engaged, with usage at least once every 2 weeks, at the end of the study. Conclusions: Our results provide evidence of efficacy and high acceptability of a novel carbohydrate counting app, supporting the advancement of digital health apps for diabetes care among youth with type 1 diabetes. Further testing is needed, but iSpy may be a useful adjunct to traditional diabetes management. Trial Registration: ClinicalTrials.gov NCT04354142; https://clinicaltrials.gov/ct2/show/NCT04354142 ", doi="10.2196/22074", url="http://mhealth.jmir.org/2020/10/e22074/", url="http://www.ncbi.nlm.nih.gov/pubmed/33112249" } @Article{info:doi/10.2196/22228, author="Mai, Hang-Nga and Lee, Du-Hyeong", title="Accuracy of Mobile Device--Compatible 3D Scanners for Facial Digitization: Systematic Review and Meta-Analysis", journal="J Med Internet Res", year="2020", month="Oct", day="23", volume="22", number="10", pages="e22228", keywords="accuracy", keywords="facial digitization", keywords="facial scanners", keywords="systematic review", keywords="meta-analysis", abstract="Background: The accurate assessment and acquisition of facial anatomical information significantly contributes to enhancing the reliability of treatments in dental and medical fields, and has applications in fields such as craniomaxillofacial surgery, orthodontics, prosthodontics, orthopedics, and forensic medicine. Mobile device--compatible 3D facial scanners have been reported to be an effective tool for clinical use, but the accuracy of digital facial impressions obtained with the scanners has not been explored. Objective: We aimed to review comparisons of the accuracy of mobile device--compatible face scanners for facial digitization with that of systems for professional 3D facial scanning. Methods: Individual search strategies were employed in PubMed (MEDLINE), Scopus, Science Direct, and Cochrane Library databases to search for articles published up to May 27, 2020. Peer-reviewed journal articles evaluating the accuracy of 3D facial models generated by mobile device--compatible face scanners were included. Cohen d effect size estimates and confidence intervals of standardized mean difference (SMD) data sets were used for meta-analysis. Results: By automatic database searching, 3942 articles were identified, of which 11 articles were considered eligible for narrative review, with 6 studies included in the meta-analysis. Overall, the accuracy of face models obtained using mobile device--compatible face scanners was significantly lower than that of face models obtained using professional 3D facial scanners (SMD 3.96 mm, 95\% CI 2.81-5.10 mm; z=6.78; P<.001). The difference between face scanning when performed on inanimate facial models was significantly higher (SMD 10.53 mm, 95\% CI 6.29-14.77 mm) than that when performed on living participants (SMD 2.58 mm, 95\% CI 1.70-3.47 mm, P<.001, df=12.94). Conclusions: Overall, mobile device--compatible face scanners did not perform as well as professional scanning systems in 3D facial acquisition, but the deviations were within the clinically acceptable range of <1.5 mm. Significant differences between results when 3D facial scans were performed on inanimate facial objects and when performed on the faces of living participants were found; thus, caution should be exercised when interpreting results from studies conducted on inanimate objects. ", doi="10.2196/22228", url="http://www.jmir.org/2020/10/e22228/", url="http://www.ncbi.nlm.nih.gov/pubmed/33095178" } @Article{info:doi/10.2196/19263, author="Pantel, Tori Jean and Hajjir, Nurulhuda and Danyel, Magdalena and Elsner, Jonas and Abad-Perez, Teresa Angela and Hansen, Peter and Mundlos, Stefan and Spielmann, Malte and Horn, Denise and Ott, Claus-Eric and Mensah, Atta Martin", title="Efficiency of Computer-Aided Facial Phenotyping (DeepGestalt) in Individuals With and Without a Genetic Syndrome: Diagnostic Accuracy Study", journal="J Med Internet Res", year="2020", month="Oct", day="22", volume="22", number="10", pages="e19263", keywords="facial phenotyping", keywords="DeepGestalt", keywords="facial recognition", keywords="Face2Gene", keywords="medical genetics", keywords="diagnostic accuracy", keywords="genetic syndrome", keywords="machine learning", abstract="Background: Collectively, an estimated 5\% of the population have a genetic disease. Many of them feature characteristics that can be detected by facial phenotyping. Face2Gene CLINIC is an online app for facial phenotyping of patients with genetic syndromes. DeepGestalt, the neural network driving Face2Gene, automatically prioritizes syndrome suggestions based on ordinary patient photographs, potentially improving the diagnostic process. Hitherto, studies on DeepGestalt's quality highlighted its sensitivity in syndromic patients. However, determining the accuracy of a diagnostic methodology also requires testing of negative controls. Objective: The aim of this study was to evaluate DeepGestalt's accuracy with photos of individuals with and without a genetic syndrome. Moreover, we aimed to propose a machine learning--based framework for the automated differentiation of DeepGestalt's output on such images. Methods: Frontal facial images of individuals with a diagnosis of a genetic syndrome (established clinically or molecularly) from a convenience sample were reanalyzed. Each photo was matched by age, sex, and ethnicity to a picture featuring an individual without a genetic syndrome. Absence of a facial gestalt suggestive of a genetic syndrome was determined by physicians working in medical genetics. Photos were selected from online reports or were taken by us for the purpose of this study. Facial phenotype was analyzed by DeepGestalt version 19.1.7, accessed via Face2Gene CLINIC. Furthermore, we designed linear support vector machines (SVMs) using Python 3.7 to automatically differentiate between the 2 classes of photographs based on DeepGestalt's result lists. Results: We included photos of 323 patients diagnosed with 17 different genetic syndromes and matched those with an equal number of facial images without a genetic syndrome, analyzing a total of 646 pictures. We confirm DeepGestalt's high sensitivity (top 10 sensitivity: 295/323, 91\%). DeepGestalt's syndrome suggestions in individuals without a craniofacially dysmorphic syndrome followed a nonrandom distribution. A total of 17 syndromes appeared in the top 30 suggestions of more than 50\% of nondysmorphic images. DeepGestalt's top scores differed between the syndromic and control images (area under the receiver operating characteristic [AUROC] curve 0.72, 95\% CI 0.68-0.76; P<.001). A linear SVM running on DeepGestalt's result vectors showed stronger differences (AUROC 0.89, 95\% CI 0.87-0.92; P<.001). Conclusions: DeepGestalt fairly separates images of individuals with and without a genetic syndrome. This separation can be significantly improved by SVMs running on top of DeepGestalt, thus supporting the diagnostic process of patients with a genetic syndrome. Our findings facilitate the critical interpretation of DeepGestalt's results and may help enhance it and similar computer-aided facial phenotyping tools. ", doi="10.2196/19263", url="http://www.jmir.org/2020/10/e19263/", url="http://www.ncbi.nlm.nih.gov/pubmed/33090109" } @Article{info:doi/10.2196/23049, author="Kim, Wook Dong and Kim, Won Kyung and Ko, Yousun and Park, Taeyong and Khang, Seungwoo and Jeong, Heeryeol and Koo, Kyoyeong and Lee, Jeongjin and Kim, Hong-Kyu and Ha, Jiyeon and Sung, Sub Yu and Shin, Youngbin", title="Assessment of Myosteatosis on Computed Tomography by Automatic Generation of a Muscle Quality Map Using a Web-Based Toolkit: Feasibility Study", journal="JMIR Med Inform", year="2020", month="Oct", day="19", volume="8", number="10", pages="e23049", keywords="body composition", keywords="muscle", keywords="skeletal", keywords="sarcopenia", keywords="computed tomography", keywords="x-ray", keywords="scan", keywords="web-based tool", keywords="feasibility", keywords="automated", keywords="CT", abstract="Background: Muscle quality is associated with fatty degeneration or infiltration of the muscle, which may be associated with decreased muscle function and increased disability. Objective: The aim of this study is to evaluate the feasibility of automated quantitative measurements of the skeletal muscle on computed tomography (CT) images to assess normal-attenuation muscle and myosteatosis. Methods: We developed a web-based toolkit to generate a muscle quality map by categorizing muscle components. First, automatic segmentation of the total abdominal muscle area (TAMA), visceral fat area, and subcutaneous fat area was performed using a predeveloped deep learning model on a single axial CT image at the L3 vertebral level. Second, the Hounsfield unit of each pixel in the TAMA was measured and categorized into 3 components: normal-attenuation muscle area (NAMA), low-attenuation muscle area (LAMA), and inter/intramuscular adipose tissue (IMAT) area. The myosteatosis area was derived by adding the LAMA and IMAT area. We tested the feasibility of the toolkit using randomly selected healthy participants, comprising 6 different age groups (20 to 79 years). With stratification by sex, these indices were compared between age groups using 1-way analysis of variance (ANOVA). Correlations between the myosteatosis area or muscle densities and fat areas were analyzed using Pearson correlation coefficient r. Results: A total of 240 healthy participants (135 men and 105 women) with 40 participants per age group were included in the study. In the 1-way ANOVA, the NAMA, LAMA, and IMAT were significantly different between the age groups in both male and female participants (P?.004), whereas the TAMA showed a significant difference only in male participants (male, P<.001; female, P=.88). The myosteatosis area had a strong negative correlation with muscle densities (r=--0.833 to --0.894), a moderate positive correlation with visceral fat areas (r=0.607 to 0.669), and a weak positive correlation with the subcutaneous fat areas (r=0.305 to 0.441). Conclusions: The automated web-based toolkit is feasible and enables quantitative CT assessment of myosteatosis, which can be a potential quantitative biomarker for evaluating structural and functional changes brought on by aging in the skeletal muscle. ", doi="10.2196/23049", url="http://medinform.jmir.org/2020/10/e23049/", url="http://www.ncbi.nlm.nih.gov/pubmed/33074159" } @Article{info:doi/10.2196/17524, author="Sasada, Shinsuke and Masumoto, Norio and Song, Hang and Emi, Akiko and Kadoya, Takayuki and Arihiro, Koji and Kikkawa, Takamaro and Okada, Morihito", title="Microwave Breast Imaging Using Rotational Bistatic Impulse Radar for the Detection of Breast Cancer: Protocol for a Prospective Diagnostic Study", journal="JMIR Res Protoc", year="2020", month="Oct", day="19", volume="9", number="10", pages="e17524", keywords="breast cancer", keywords="microwave imaging", keywords="diagnostic accuracy", keywords="screening", keywords="ultra-wideband radar", abstract="Background: Mammography is the standard examination for breast cancer screening; however, it is associated with pain and exposure to ionizing radiation. Microwave breast imaging is a less invasive method for breast cancer surveillance. A bistatic impulse radar--based breast cancer detector has recently been developed. Objective: This study aims to present a protocol for evaluating the diagnostic accuracy of the novel microwave breast imaging device. Methods: This is a prospective diagnostic study. A total of 120 participants were recruited before treatment administration and divided into 2 cohorts: 100 patients diagnosed with breast cancer and 20 participants with benign breast tumors. The detector will be directly placed on each breast, while the participant is in supine position, without a coupling medium. Confocal images will be created based on the analyzed data, and the presence of breast tumors will be assessed. The primary endpoint will be the diagnostic accuracy, sensitivity, and specificity of the detector for breast cancer and benign tumors. The secondary endpoint will be the safety and detectability of each molecular subtype of breast cancer. For an exploratory endpoint, the influence of breast density and tumor size on tumor detection will be investigated. Results: Recruitment began in November 2018 and was completed by March 2020. We anticipate the preliminary results to be available by summer 2021. Conclusions: This study will provide insights on the diagnostic accuracy of microwave breast imaging using a rotational bistatic impulse radar. The collected data will improve the diagnostic algorithm of microwave imaging and lead to enhanced device performance. Trial Registration: Japan Registry of Clinical Trials jRCTs062180005; https://jrct.niph.go.jp/en-latest-detail/jRCTs062180005 International Registered Report Identifier (IRRID): DERR1-10.2196/17524 ", doi="10.2196/17524", url="http://www.researchprotocols.org/2020/10/e17524/", url="http://www.ncbi.nlm.nih.gov/pubmed/33074156" } @Article{info:doi/10.2196/23578, author="Yao, Xiaopeng and Huang, Xinqiao and Yang, Chunmei and Hu, Anbin and Zhou, Guangjin and Ju, Mei and Lei, Jianbo and Shu, Jian", title="A Novel Approach to Assessing Differentiation Degree and Lymph Node Metastasis of Extrahepatic Cholangiocarcinoma: Prediction Using a Radiomics-Based Particle Swarm Optimization and Support Vector Machine Model", journal="JMIR Med Inform", year="2020", month="Oct", day="5", volume="8", number="10", pages="e23578", keywords="PSO-SVM algorithm", keywords="magnetic resonance imaging", keywords="lymph node metastases", keywords="differentiation degree", keywords="extrahepatic cholangiocarcinoma", keywords="radiomics feature", keywords="algorithm", keywords="MRI", keywords="radiomics", keywords="lymph", keywords="cancer", keywords="oncology", abstract="Background: Radiomics can improve the accuracy of traditional image diagnosis to evaluate extrahepatic cholangiocarcinoma (ECC); however, this is limited by variations across radiologists, subjective evaluation, and restricted data. A radiomics-based particle swarm optimization and support vector machine (PSO-SVM) model may provide a more accurate auxiliary diagnosis for assessing differentiation degree (DD) and lymph node metastasis (LNM) of ECC. Objective: The objective of our study is to develop a PSO-SVM radiomics model for predicting DD and LNM of ECC. Methods: For this retrospective study, the magnetic resonance imaging (MRI) data of 110 patients with ECC who were diagnosed from January 2011 to October 2019 were used to construct a radiomics prediction model. Radiomics features were extracted from T1-precontrast weighted imaging (T1WI), T2-weighted imaging (T2WI), and diffusion-weighted imaging (DWI) using MaZda software (version 4.6; Institute of Electronics, Technical University of Lodz). We performed dimension reduction to obtain 30 optimal features of each sequence, respectively. A PSO-SVM radiomics model was developed to predict DD and LNM of ECC by incorporating radiomics features and apparent diffusion coefficient (ADC) values. We randomly divided the 110 cases into a training group (88/110, 80\%) and a testing group (22/110, 20\%). The performance of the model was evaluated by analyzing the area under the receiver operating characteristic curve (AUC). Results: A radiomics model based on PSO-SVM was developed by using 110 patients with ECC. This model produced average AUCs of 0.8905 and 0.8461, respectively, for DD in the training and testing groups of patients with ECC. The average AUCs of the LNM in the training and testing groups of patients with ECC were 0.9036 and 0.8889, respectively. For the 110 patients, this model has high predictive performance. The average accuracy values of the training group and testing group for DD of ECC were 82.6\% and 80.9\%, respectively; the average accuracy values of the training group and testing group for LNM of ECC were 83.6\% and 81.2\%, respectively. Conclusions: The MRI-based PSO-SVM radiomics model might be useful for auxiliary clinical diagnosis and decision-making, which has a good potential for clinical application for DD and LNM of ECC. ", doi="10.2196/23578", url="https://medinform.jmir.org/2020/10/e23578", url="http://www.ncbi.nlm.nih.gov/pubmed/33016889" } @Article{info:doi/10.2196/16224, author="Min, Qiusha and Wang, Xin and Huang, Bo and Xu, Liangzhou", title="Web-Based Technology for Remote Viewing of Radiological Images: App Validation", journal="J Med Internet Res", year="2020", month="Sep", day="25", volume="22", number="9", pages="e16224", keywords="internet access", keywords="medical informatics applications", keywords="computer-assisted image analyses", keywords="computer-assisted three-dimensional imaging", keywords="medical imaging", keywords="radiology", keywords="application", abstract="Background: Internet technologies can create advanced and rich web-based apps that allow radiologists to easily access teleradiology systems and remotely view medical images. However, each technology has its own drawbacks. It is difficult to balance the advantages and disadvantages of these internet technologies and identify an optimal solution for the development of medical imaging apps. Objective: This study aimed to compare different internet platform technologies for remotely viewing radiological images and analyze their advantages and disadvantages. Methods: Oracle Java, Adobe Flash, and HTML5 were each used to develop a comprehensive web-based medical imaging app that connected to a medical image server and provided several required functions for radiological interpretation (eg, navigation, magnification, windowing, and fly-through). Java-, Flash-, and HTML5-based medical imaging apps were tested on different operating systems over a local area network and a wide area network. Three computed tomography colonography data sets and 2 ordinary personal computers were used in the experiment. Results: The experimental results demonstrated that Java-, Flash-, and HTML5-based apps had the ability to provide real-time 2D functions. However, for 3D, performances differed between the 3 apps. The Java-based app had the highest frame rate of volume rendering. However, it required the longest time for surface rendering and failed to run surface rendering in macOS. The HTML5-based app had the fastest surface rendering and the highest speed for fly-through without platform dependence. Volume rendering, surface rendering, and fly-through performances of the Flash-based app were significantly worse than those of the other 2 apps. Conclusions: Oracle Java, Adobe Flash, and HTML5 have individual strengths in the development of remote access medical imaging apps. However, HTML5 is a promising technology for remote viewing of radiological images and can provide excellent performance without requiring any plug-ins. ", doi="10.2196/16224", url="http://www.jmir.org/2020/9/e16224/", url="http://www.ncbi.nlm.nih.gov/pubmed/32975520" } @Article{info:doi/10.2196/18367, author="Dallas-Orr, David and Penev, Yordan and Schultz, Robert and Courtier, Jesse", title="Comparing Computed Tomography--Derived Augmented Reality Holograms to a Standard Picture Archiving and Communication Systems Viewer for Presurgical Planning: Feasibility Study", journal="JMIR Perioper Med", year="2020", month="Sep", day="24", volume="3", number="2", pages="e18367", keywords="augmented reality", keywords="mixed reality", keywords="picture archiving and communication system", keywords="presurgical planning", keywords="new technology evaluation", keywords="medical imaging", keywords="surgery", abstract="Background: Picture archiving and communication systems (PACS) are ubiquitously used to store, share, and view radiological information for preoperative planning across surgical specialties. Although traditional PACS software has proven reliable in terms of display accuracy and ease of use, it remains limited by its inherent representation of medical imaging in 2 dimensions. Augmented reality (AR) systems present an exciting opportunity to complement traditional PACS capabilities. Objective: This study aims to evaluate the technical feasibility of using a novel AR platform, with holograms derived from computed tomography (CT) imaging, as a supplement to traditional PACS for presurgical planning in complex surgical procedures. Methods: Independent readers measured objects of predetermined, anthropomorphically correlated sizes using the circumference and angle tools of standard-of-care PACS software and a newly developed augmented reality presurgical planning system (ARPPS). Results: Measurements taken with the standard PACS and the ARPPS showed no statistically significant differences. Bland-Altman analysis showed a mean difference of 0.08\% (95\% CI --4.20\% to 4.36\%) for measurements taken with PACS versus ARPPS' circumference tools and --1.84\% (95\% CI --6.17\% to 2.14\%) for measurements with the systems' angle tools. Lin's concordance correlation coefficients were 1.00 and 0.98 for the circumference and angle measurements, respectively, indicating almost perfect strength of agreement between ARPPS and PACS. Intraclass correlation showed no statistically significant difference between the readers for either measurement tool on each system. Conclusions: ARPPS can be an effective, accurate, and precise means of 3D visualization and measurement of CT-derived holograms in the presurgical care timeline. ", doi="10.2196/18367", url="http://periop.jmir.org/2020/2/e18367/", url="http://www.ncbi.nlm.nih.gov/pubmed/33393933" } @Article{info:doi/10.2196/20203, author="Jacob, Christine and Sanchez-Vazquez, Antonio and Ivory, Chris", title="Factors Impacting Clinicians' Adoption of a Clinical Photo Documentation App and its Implications for Clinical Workflows and Quality of Care: Qualitative Case Study", journal="JMIR Mhealth Uhealth", year="2020", month="Sep", day="23", volume="8", number="9", pages="e20203", keywords="mHealth", keywords="mobile health", keywords="telehealth", keywords="eHealth", keywords="health tech", keywords="digital health", keywords="user-engagement", keywords="dermatology", keywords="wound care", keywords="mobile phone", abstract="Background: Mobile health (mHealth) tools have shown promise in clinical photo and wound documentation for their potential to improve workflows, expand access to care, and improve the quality of patient care. However, some barriers to adoption persist. Objective: This study aims to understand the social, organizational, and technical factors affecting clinicians' adoption of a clinical photo documentation mHealth app and its implications for clinical workflows and quality of care. Methods: A qualitative case study of a clinical photo and wound documentation app called imitoCam was conducted. The data were collected through 20 in-depth interviews with mHealth providers, clinicians, and medical informatics experts from 8 clinics and hospitals in Switzerland and Germany. Results: According to the study participants, the use of mHealth in clinical photo and wound documentation provides numerous benefits such as time-saving and efficacy, better patient safety and quality of care, enhanced data security and validation, and better accessibility. The clinical workflow may also improve when the app is a good fit, resulting in better collaboration and transparency, streamlined daily work, clinician empowerment, and improved quality of care. The findings included important factors that may contribute to or hinder adoption. Factors may be related to the material nature of the tool, such as the perceived usefulness, ease of use, interoperability, cost, or security of the app, or social aspects such as personal experience, attitudes, awareness, or culture. Organizational and policy barriers include the available clinical practice infrastructure, workload and resources, the complexity of decision making, training, and ambiguity or lack of regulations. User engagement in the development and implementation process is a vital contributor to the successful adoption of mHealth apps. Conclusions: The promising potential of mHealth in clinical photo and wound documentation is clear and may enhance clinical workflow and quality of care; however, the factors affecting adoption go beyond the technical features of the tool itself to embrace significant social and organizational elements. Technology providers, clinicians, and decision makers should work together to carefully address any barriers to improve adoption and harness the potential of these tools. ", doi="10.2196/20203", url="http://mhealth.jmir.org/2020/9/e20203/", url="http://www.ncbi.nlm.nih.gov/pubmed/32965232" } @Article{info:doi/10.2196/18846, author="Dallora, Luiza Ana and Kvist, Ola and Berglund, Sanmartin Johan and Ruiz, Diaz Sandra and Boldt, Martin and Flodmark, Carl-Erik and Anderberg, Peter", title="Chronological Age Assessment in Young Individuals Using Bone Age Assessment Staging and Nonradiological Aspects: Machine Learning Multifactorial Approach", journal="JMIR Med Inform", year="2020", month="Sep", day="21", volume="8", number="9", pages="e18846", keywords="chronological age assessment", keywords="bone age", keywords="skeletal maturity", keywords="machine learning", keywords="magnetic resonance imaging", keywords="radius", keywords="distal tibia", keywords="proximal tibia", keywords="distal femur", keywords="calcaneus", abstract="Background: Bone age assessment (BAA) is used in numerous pediatric clinical settings as well as in legal settings when entities need an estimate of chronological age (CA) when valid documents are lacking. The latter case presents itself as critical as the law is harsher for adults and granted rights along with imputability changes drastically if the individual is a minor. Traditional BAA methods have drawbacks such as exposure of minors to radiation, they do not consider factors that might affect the bone age, and they mostly focus on a single region. Given the critical scenarios in which BAA can affect the lives of young individuals, it is important to focus on the drawbacks of the traditional methods and investigate the potential of estimating CA through BAA. Objective: This study aims to investigate CA estimation through BAA in young individuals aged 14-21 years with machine learning methods, addressing the drawbacks of research using magnetic resonance imaging (MRI), assessment of multiple regions of interest, and other factors that may affect the bone age. Methods: MRI examinations of the radius, distal tibia, proximal tibia, distal femur, and calcaneus were performed on 465 men and 473 women (aged 14-21 years). Measures of weight and height were taken from the subjects, and a questionnaire was given for additional information (self-assessed Tanner Scale, physical activity level, parents' origin, and type of residence during upbringing). Two pediatric radiologists independently assessed the MRI images to evaluate their stage of bone development (blinded to age, gender, and each other). All the gathered information was used in training machine learning models for CA estimation and minor versus adult classification (threshold of 18 years). Different machine learning methods were investigated. Results: The minor versus adult classification produced accuracies of 0.90 and 0.84 for male and female subjects, respectively, with high recalls for the classification of minors. The CA estimation for the 8 age groups (aged 14-21 years) achieved mean absolute errors of 0.95 years and 1.24 years for male and female subjects, respectively. However, for the latter, a lower error occurred only for the ages of 14 and 15 years. Conclusions: This study investigates CA estimation through BAA using machine learning methods in 2 ways: minor versus adult classification and CA estimation in 8 age groups (aged 14-21 years), while addressing the drawbacks in the research on BAA. The first achieved good results; however, for the second case, the BAA was not precise enough for the classification. ", doi="10.2196/18846", url="http://medinform.jmir.org/2020/9/e18846/", url="http://www.ncbi.nlm.nih.gov/pubmed/32955457" } @Article{info:doi/10.2196/21983, author="Bang, Seok Chang and Lee, Jun Jae and Baik, Ho Gwang", title="Artificial Intelligence for the Prediction of Helicobacter Pylori Infection in Endoscopic Images: Systematic Review and Meta-Analysis Of Diagnostic Test Accuracy", journal="J Med Internet Res", year="2020", month="Sep", day="16", volume="22", number="9", pages="e21983", keywords="artificial intelligence", keywords="convolutional neural network", keywords="deep learning", keywords="machine learning", keywords="endoscopy", keywords="Helicobacter pylori", abstract="Background: Helicobacter pylori plays a central role in the development of gastric cancer, and prediction of H pylori infection by visual inspection of the gastric mucosa is an important function of endoscopy. However, there are currently no established methods of optical diagnosis of H pylori infection using endoscopic images. Definitive diagnosis requires endoscopic biopsy. Artificial intelligence (AI) has been increasingly adopted in clinical practice, especially for image recognition and classification. Objective: This study aimed to evaluate the diagnostic test accuracy of AI for the prediction of H pylori infection using endoscopic images. Methods: Two independent evaluators searched core databases. The inclusion criteria included studies with endoscopic images of H pylori infection and with application of AI for the prediction of H pylori infection presenting diagnostic performance. Systematic review and diagnostic test accuracy meta-analysis were performed. Results: Ultimately, 8 studies were identified. Pooled sensitivity, specificity, diagnostic odds ratio, and area under the curve of AI for the prediction of H pylori infection were 0.87 (95\% CI 0.72-0.94), 0.86 (95\% CI 0.77-0.92), 40 (95\% CI 15-112), and 0.92 (95\% CI 0.90-0.94), respectively, in the 1719 patients (385 patients with H pylori infection vs 1334 controls). Meta-regression showed methodological quality and included the number of patients in each study for the purpose of heterogeneity. There was no evidence of publication bias. The accuracy of the AI algorithm reached 82\% for discrimination between noninfected images and posteradication images. Conclusions: An AI algorithm is a reliable tool for endoscopic diagnosis of H pylori infection. The limitations of lacking external validation performance and being conducted only in Asia should be overcome. Trial Registration: PROSPERO CRD42020175957; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=175957 ", doi="10.2196/21983", url="http://www.jmir.org/2020/9/e21983/", url="http://www.ncbi.nlm.nih.gov/pubmed/32936088" } @Article{info:doi/10.2196/18091, author="Maron, C. Roman and Utikal, S. Jochen and Hekler, Achim and Hauschild, Axel and Sattler, Elke and Sondermann, Wiebke and Haferkamp, Sebastian and Schilling, Bastian and Heppt, V. Markus and Jansen, Philipp and Reinholz, Markus and Franklin, Cindy and Schmitt, Laurenz and Hartmann, Daniela and Krieghoff-Henning, Eva and Schmitt, Max and Weichenthal, Michael and von Kalle, Christof and Fr{\"o}hling, Stefan and Brinker, J. Titus", title="Artificial Intelligence and Its Effect on Dermatologists' Accuracy in Dermoscopic Melanoma Image Classification: Web-Based Survey Study", journal="J Med Internet Res", year="2020", month="Sep", day="11", volume="22", number="9", pages="e18091", keywords="artificial intelligence", keywords="machine learning", keywords="deep learning", keywords="neural network", keywords="dermatology", keywords="diagnosis", keywords="nevi", keywords="melanoma", keywords="skin neoplasm", abstract="Background: Early detection of melanoma can be lifesaving but this remains a challenge. Recent diagnostic studies have revealed the superiority of artificial intelligence (AI) in classifying dermoscopic images of melanoma and nevi, concluding that these algorithms should assist a dermatologist's diagnoses. Objective: The aim of this study was to investigate whether AI support improves the accuracy and overall diagnostic performance of dermatologists in the dichotomous image--based discrimination between melanoma and nevus. Methods: Twelve board-certified dermatologists were presented disjoint sets of 100 unique dermoscopic images of melanomas and nevi (total of 1200 unique images), and they had to classify the images based on personal experience alone (part I) and with the support of a trained convolutional neural network (CNN, part II). Additionally, dermatologists were asked to rate their confidence in their final decision for each image. Results: While the mean specificity of the dermatologists based on personal experience alone remained almost unchanged (70.6\% vs 72.4\%; P=.54) with AI support, the mean sensitivity and mean accuracy increased significantly (59.4\% vs 74.6\%; P=.003 and 65.0\% vs 73.6\%; P=.002, respectively) with AI support. Out of the 10\% (10/94; 95\% CI 8.4\%-11.8\%) of cases where dermatologists were correct and AI was incorrect, dermatologists on average changed to the incorrect answer for 39\% (4/10; 95\% CI 23.2\%-55.6\%) of cases. When dermatologists were incorrect and AI was correct (25/94, 27\%; 95\% CI 24.0\%-30.1\%), dermatologists changed their answers to the correct answer for 46\% (11/25; 95\% CI 33.1\%-58.4\%) of cases. Additionally, the dermatologists' average confidence in their decisions increased when the CNN confirmed their decision and decreased when the CNN disagreed, even when the dermatologists were correct. Reported values are based on the mean of all participants. Whenever absolute values are shown, the denominator and numerator are approximations as every dermatologist ended up rating a varying number of images due to a quality control step. Conclusions: The findings of our study show that AI support can improve the overall accuracy of the dermatologists in the dichotomous image--based discrimination between melanoma and nevus. This supports the argument for AI-based tools to aid clinicians in skin lesion classification and provides a rationale for studies of such classifiers in real-life settings, wherein clinicians can integrate additional information such as patient age and medical history into their decisions. ", doi="10.2196/18091", url="https://www.jmir.org/2020/9/e18091", url="http://www.ncbi.nlm.nih.gov/pubmed/32915161" } @Article{info:doi/10.2196/19673, author="Aljondi, Rowa and Alghamdi, Salem", title="Diagnostic Value of Imaging Modalities for COVID-19: Scoping Review", journal="J Med Internet Res", year="2020", month="Aug", day="19", volume="22", number="8", pages="e19673", keywords="diagnostic imaging", keywords="radiology", keywords="COVID-19", keywords="respiratory infection", keywords="pneumonia", keywords="imaging", keywords="CT", keywords="infectious disease", keywords="diagnosis", keywords="review", abstract="Background: Coronavirus disease (COVID-19) is a serious infectious disease that causes severe respiratory illness. This pandemic represents a serious public health risk. Therefore, early and accurate diagnosis is essential to control disease progression. Radiological examination plays a crucial role in the early identification and management of infected patients. Objective: The aim of this review was to identify the diagnostic value of different imaging modalities used for diagnosis of COVID-19. Methods: A comprehensive literature search was conducted using the PubMed, Scopus, Web of Science, and Google Scholar databases. The keywords diagnostic imaging, radiology, respiratory infection, pneumonia, coronavirus infection and COVID-19 were used to identify radiology articles focusing on the diagnosis of COVID-19 and to determine the diagnostic value of various imaging modalities, including x-ray, computed tomography (CT), ultrasound, and nuclear medicine for identification and management of infected patients. Results: We identified 50 articles in the literature search. Studies that investigated the diagnostic roles and imaging features of patients with COVID-19, using either chest CT, lung ultrasound, chest x-ray, or positron emission topography/computed tomography (PET/CT) scan, were discussed. Of these imaging modalities, chest x-ray and CT scan are the most commonly used for diagnosis and management of COVID-19 patients, with chest CT scan being more accurate and sensitive in identifying COVID-19 at early stages. Only a few studies have investigated the roles of ultrasound and PET/CT scan in diagnosing COVID-19. Conclusions: Chest CT scan remains the most sensitive imaging modality in initial diagnosis and management of suspected and confirmed patients with COVID-19. Other diagnostic imaging modalities could add value in evaluating disease progression and monitoring critically ill patients with COVID-19. ", doi="10.2196/19673", url="http://www.jmir.org/2020/8/e19673/", url="http://www.ncbi.nlm.nih.gov/pubmed/32716893" } @Article{info:doi/10.2196/16709, author="Yu, Kun-Hsing and Lee, Michael Tsung-Lu and Yen, Ming-Hsuan and Kou, C. S. and Rosen, Bruce and Chiang, Jung-Hsien and Kohane, S. Isaac", title="Reproducible Machine Learning Methods for Lung Cancer Detection Using Computed Tomography Images: Algorithm Development and Validation", journal="J Med Internet Res", year="2020", month="Aug", day="5", volume="22", number="8", pages="e16709", keywords="computed tomography, spiral", keywords="lung cancer", keywords="machine learning", keywords="early detection of cancer", keywords="reproducibility of results", abstract="Background: Chest computed tomography (CT) is crucial for the detection of lung cancer, and many automated CT evaluation methods have been proposed. Due to the divergent software dependencies of the reported approaches, the developed methods are rarely compared or reproduced. Objective: The goal of the research was to generate reproducible machine learning modules for lung cancer detection and compare the approaches and performances of the award-winning algorithms developed in the Kaggle Data Science Bowl. Methods: We obtained the source codes of all award-winning solutions of the Kaggle Data Science Bowl Challenge, where participants developed automated CT evaluation methods to detect lung cancer (training set n=1397, public test set n=198, final test set n=506). The performance of the algorithms was evaluated by the log-loss function, and the Spearman correlation coefficient of the performance in the public and final test sets was computed. Results: Most solutions implemented distinct image preprocessing, segmentation, and classification modules. Variants of U-Net, VGGNet, and residual net were commonly used in nodule segmentation, and transfer learning was used in most of the classification algorithms. Substantial performance variations in the public and final test sets were observed (Spearman correlation coefficient = .39 among the top 10 teams). To ensure the reproducibility of results, we generated a Docker container for each of the top solutions. Conclusions: We compared the award-winning algorithms for lung cancer detection and generated reproducible Docker images for the top solutions. Although convolutional neural networks achieved decent accuracy, there is plenty of room for improvement regarding model generalizability. ", doi="10.2196/16709", url="https://www.jmir.org/2020/8/e16709", url="http://www.ncbi.nlm.nih.gov/pubmed/32755895" } @Article{info:doi/10.2196/18089, author="Jang, Ryoungwoo and Kim, Namkug and Jang, Miso and Lee, Hwa Kyung and Lee, Min Sang and Lee, Hee Kyung and Noh, Na Han and Seo, Beom Joon", title="Assessment of the Robustness of Convolutional Neural Networks in Labeling Noise by Using Chest X-Ray Images From Multiple Centers", journal="JMIR Med Inform", year="2020", month="Aug", day="4", volume="8", number="8", pages="e18089", keywords="deep learning", keywords="convolutional neural network", keywords="NIH dataset", keywords="CheXpert dataset", keywords="robustness", abstract="Background: Computer-aided diagnosis on chest x-ray images using deep learning is a widely studied modality in medicine. Many studies are based on public datasets, such as the National Institutes of Health (NIH) dataset and the Stanford CheXpert dataset. However, these datasets are preprocessed by classical natural language processing, which may cause a certain extent of label errors. Objective: This study aimed to investigate the robustness of deep convolutional neural networks (CNNs) for binary classification of posteroanterior chest x-ray through random incorrect labeling. Methods: We trained and validated the CNN architecture with different noise levels of labels in 3 datasets, namely, Asan Medical Center-Seoul National University Bundang Hospital (AMC-SNUBH), NIH, and CheXpert, and tested the models with each test set. Diseases of each chest x-ray in our dataset were confirmed by a thoracic radiologist using computed tomography (CT). Receiver operating characteristic (ROC) and area under the curve (AUC) were evaluated in each test. Randomly chosen chest x-rays of public datasets were evaluated by 3 physicians and 1 thoracic radiologist. Results: In comparison with the public datasets of NIH and CheXpert, where AUCs did not significantly drop to 16\%, the AUC of the AMC-SNUBH dataset significantly decreased from 2\% label noise. Evaluation of the public datasets by 3 physicians and 1 thoracic radiologist showed an accuracy of 65\%-80\%. Conclusions: The deep learning--based computer-aided diagnosis model is sensitive to label noise, and computer-aided diagnosis with inaccurate labels is not credible. Furthermore, open datasets such as NIH and CheXpert need to be distilled before being used for deep learning--based computer-aided diagnosis. ", doi="10.2196/18089", url="https://medinform.jmir.org/2020/8/e18089", url="http://www.ncbi.nlm.nih.gov/pubmed/32749222" } @Article{info:doi/10.2196/17480, author="Wintergerst, M. Maximilian W. and Jansen, G. Linus and Holz, G. Frank and Finger, P. Robert", title="A Novel Device for Smartphone-Based Fundus Imaging and Documentation in Clinical Practice: Comparative Image Analysis Study", journal="JMIR Mhealth Uhealth", year="2020", month="Jul", day="29", volume="8", number="7", pages="e17480", keywords="smartphone-based fundus imaging", keywords="smartphone-based funduscopy", keywords="smartphone", keywords="retinal imaging", keywords="mHealth", keywords="mobile phone", keywords="smartphone imaging", keywords="smartphone funduscopy", keywords="smartphone ophthalmoscope", abstract="Background: Smartphone-based fundus imaging allows for mobile and inexpensive fundus examination with the potential to revolutionize eye care, particularly in lower-resource settings. However, most smartphone-based fundus imaging adapters convey image quality not comparable to conventional fundus imaging. Objective: The purpose of this study was to evaluate a novel smartphone-based fundus imaging device for documentation of a variety of retinal/vitreous pathologies in a patient sample with wide refraction and age ranges. Methods: Participants' eyes were dilated and imaged with the iC2 funduscope (HEINE Optotechnik) using an Apple iPhone 6 in single-image acquisition (image resolution of 2448 {\texttimes} 3264 pixels) or video mode (1248 {\texttimes} 1664 pixels) and a subgroup of participants was also examined by conventional fundus imaging (Zeiss VISUCAM 500). Smartphone-based image quality was compared to conventional fundus imaging in terms of sharpness (focus), reflex artifacts, contrast, and illumination on semiquantitative scales. Results: A total of 47 eyes from 32 participants (age: mean 62.3, SD 19.8 years; range 7-93; spherical equivalent: mean --0.78, SD 3.21 D; range: --7.88 to +7.0 D) were included in the study. Mean (SD) visual acuity (logMAR) was 0.48 (0.66; range 0-2.3); 30\% (14/47) of the eyes were pseudophakic. Image quality was sufficient in all eyes irrespective of refraction. Images acquired with conventional fundus imaging were sharper and had less reflex artifacts, and there was no significant difference in contrast and illumination (P<.001, P=.03, and P=.10, respectively). When comparing image quality at the posterior pole, the mid periphery, and the far periphery, glare increased as images were acquired from a more peripheral part of the retina. Reflex artifacts were more frequent in pseudophakic eyes. Image acquisition was also possible in children. Documentation of deep optic nerve cups in video mode conveyed a mock 3D impression. Conclusions: Image quality of conventional fundus imaging was superior to that of smartphone-based fundus imaging, although this novel smartphone-based fundus imaging device achieved image quality high enough to document various fundus pathologies including only subtle findings. High-quality smartphone-based fundus imaging might represent a mobile alternative for fundus documentation in clinical practice. ", doi="10.2196/17480", url="https://mhealth.jmir.org/2020/7/e17480", url="http://www.ncbi.nlm.nih.gov/pubmed/32723717" } @Article{info:doi/10.2196/17220, author="Wickerson, Lisa and Fujioka, K. Jamie and Kishimoto, Vanessa and Jamieson, Trevor and Fine, Ben and Bhatia, Sacha R. and Desveaux, Laura", title="Utility and Perceived Value of a Provincial Digital Diagnostic Imaging Repository: Multimethod Study", journal="JMIR Form Res", year="2020", month="Jul", day="27", volume="4", number="7", pages="e17220", keywords="diagnostic imaging", keywords="eHealth", keywords="health care delivery", abstract="Background: Timely and comprehensive diagnostic image sharing across institutional and regional boundaries can produce multiple benefits while supporting integrated models of care. In Ontario, Canada, the Diagnostic Imaging Common Service (DICS) was created as a centralized imaging repository to enable the sharing and viewing of diagnostic images and associated reports across hospital-based and community-based clinicians throughout the province. Objective: The aims of this study were as follows: (1) to explore real-world utilization and perceived clinical value of the DICS following the provision of system-wide access and (2) to identify strategies to optimize the technology platform functionality and encourage adoption. Methods: This multimethod study included semistructured interviews with physicians and administrative stakeholders and descriptive analysis of the current DICS usage data. Results: In this study, 41 participants were interviewed, that is, 34 physicians and 7 administrative stakeholders. The following 4 key themes emerged: (1) utilization of the DICS depended on the awareness of the technology and the preferred channels for accessing images, which varied widely, (2) clinical responsibilities and available institutional resources were the drivers of utilization (or lack thereof), (3) centralized image repositories were perceived to offer value at the patient, clinician, and health care system levels, and (4) the enabling factors to realize value included aspects of technology infrastructure (ie, available functionality) alongside policy supports. High-volume DICS usage was not evenly distributed throughout the province. Conclusions: Suboptimal adoption of the DICS was driven by poor awareness and variations in the clinical workflow. Alignment with physician workflow, policy supports, and investment in key technological features and infrastructure would improve functionality and data comprehensiveness, thereby optimizing health system performance, patient and provider experience, population health, and health care costs. ", doi="10.2196/17220", url="https://formative.jmir.org/2020/7/e17220", url="http://www.ncbi.nlm.nih.gov/pubmed/32459644" } @Article{info:doi/10.2196/16843, author="Kim, Hee and Ganslandt, Thomas and Miethke, Thomas and Neumaier, Michael and Kittel, Maximilian", title="Deep Learning Frameworks for Rapid Gram Stain Image Data Interpretation: Protocol for a Retrospective Data Analysis", journal="JMIR Res Protoc", year="2020", month="Jul", day="13", volume="9", number="7", pages="e16843", keywords="high performance computing", keywords="rapid Gram stain classification", keywords="image data analysis", keywords="deep learning", keywords="convolutional neural network", abstract="Background: In recent years, remarkable progress has been made in deep learning technology and successful use cases have been introduced in the medical domain. However, not many studies have considered high-performance computing to fully appreciate the capability of deep learning technology. Objective: This paper aims to design a solution to accelerate an automated Gram stain image interpretation by means of a deep learning framework without additional hardware resources. Methods: We will apply and evaluate 3 methodologies, namely fine-tuning, an integer arithmetic--only framework, and hyperparameter tuning. Results: The choice of pretrained models and the ideal setting for layer tuning and hyperparameter tuning will be determined. These results will provide an empirical yet reproducible guideline for those who consider a rapid deep learning solution for Gram stain image interpretation. The results are planned to be announced in the first quarter of 2021. Conclusions: Making a balanced decision between modeling performance and computational performance is the key for a successful deep learning solution. Otherwise, highly accurate but slow deep learning solutions can add value to routine care. International Registered Report Identifier (IRRID): DERR1-10.2196/16843 ", doi="10.2196/16843", url="http://www.researchprotocols.org/2020/7/e16843/", url="http://www.ncbi.nlm.nih.gov/pubmed/32673276" } @Article{info:doi/10.2196/19569, author="Ko, Hoon and Chung, Heewon and Kang, Seong Wu and Kim, Won Kyung and Shin, Youngbin and Kang, Ji Seung and Lee, Hoon Jae and Kim, Jun Young and Kim, Yeol Nan and Jung, Hyunseok and Lee, Jinseok", title="COVID-19 Pneumonia Diagnosis Using a Simple 2D Deep Learning Framework With a Single Chest CT Image: Model Development and Validation", journal="J Med Internet Res", year="2020", month="Jun", day="29", volume="22", number="6", pages="e19569", keywords="COVID-19", keywords="deep learning", keywords="convolutional neural networks, transfer learning", keywords="chest CT", keywords="CT", keywords="neural network", keywords="pneumonia", keywords="artificial intelligence", keywords="diagnosis", keywords="scan", abstract="Background: Coronavirus disease (COVID-19) has spread explosively worldwide since the beginning of 2020. According to a multinational consensus statement from the Fleischner Society, computed tomography (CT) is a relevant screening tool due to its higher sensitivity for detecting early pneumonic changes. However, physicians are extremely occupied fighting COVID-19 in this era of worldwide crisis. Thus, it is crucial to accelerate the development of an artificial intelligence (AI) diagnostic tool to support physicians. Objective: We aimed to rapidly develop an AI technique to diagnose COVID-19 pneumonia in CT images and differentiate it from non--COVID-19 pneumonia and nonpneumonia diseases. Methods: A simple 2D deep learning framework, named the fast-track COVID-19 classification network (FCONet), was developed to diagnose COVID-19 pneumonia based on a single chest CT image. FCONet was developed by transfer learning using one of four state-of-the-art pretrained deep learning models (VGG16, ResNet-50, Inception-v3, or Xception) as a backbone. For training and testing of FCONet, we collected 3993 chest CT images of patients with COVID-19 pneumonia, other pneumonia, and nonpneumonia diseases from Wonkwang University Hospital, Chonnam National University Hospital, and the Italian Society of Medical and Interventional Radiology public database. These CT images were split into a training set and a testing set at a ratio of 8:2. For the testing data set, the diagnostic performance of the four pretrained FCONet models to diagnose COVID-19 pneumonia was compared. In addition, we tested the FCONet models on an external testing data set extracted from embedded low-quality chest CT images of COVID-19 pneumonia in recently published papers. Results: Among the four pretrained models of FCONet, ResNet-50 showed excellent diagnostic performance (sensitivity 99.58\%, specificity 100.00\%, and accuracy 99.87\%) and outperformed the other three pretrained models in the testing data set. In the additional external testing data set using low-quality CT images, the detection accuracy of the ResNet-50 model was the highest (96.97\%), followed by Xception, Inception-v3, and VGG16 (90.71\%, 89.38\%, and 87.12\%, respectively). Conclusions: FCONet, a simple 2D deep learning framework based on a single chest CT image, provides excellent diagnostic performance in detecting COVID-19 pneumonia. Based on our testing data set, the FCONet model based on ResNet-50 appears to be the best model, as it outperformed other FCONet models based on VGG16, Xception, and Inception-v3. ", doi="10.2196/19569", url="http://www.jmir.org/2020/6/e19569/", url="http://www.ncbi.nlm.nih.gov/pubmed/32568730" } @Article{info:doi/10.2196/11839, author="Knopp, U. Melanie and Binzel, Katherine and Wright, L. Chadwick and Zhang, Jun and Knopp, V. Michael", title="Enhancing Patient Experience With Internet Protocol Addressable Digital Light-Emitting Diode Lighting in Imaging Environments: A Phase I Study", journal="J Med Internet Res", year="2020", month="Jun", day="12", volume="22", number="6", pages="e11839", keywords="ambient lighting", keywords="patient comfort", keywords="medical imaging", keywords="color perception", keywords="health care environment", keywords="internet protocol--based light-emitting diode lighting", abstract="Background: Conventional approaches to improve the quality of clinical patient imaging studies focus predominantly on updating or replacing imaging equipment; however, it is often not considered that patients can also highly influence the diagnostic quality of clinical imaging studies. Patient-specific artifacts can limit the diagnostic image quality, especially when patients are uncomfortable, anxious, or agitated. Imaging facility or environmental conditions can also influence the patient's comfort and willingness to participate in diagnostic imaging studies, especially when performed in visually unesthetic, anxiety-inducing, and technology-intensive imaging centers. When given the opportunity to change a single aspect of the environmental or imaging facility experience, patients feel much more in control of the otherwise unfamiliar and uncomfortable setting. Incorporating commercial, easily adaptable, ambient lighting products within clinical imaging environments allows patients to individually customize their environment for a more personalized and comfortable experience. Objective: The aim of this pilot study was to use a customizable colored light-emitting diode (LED) lighting system within a clinical imaging environment and demonstrate the feasibility and initial findings of enabling healthy subjects to customize the ambient lighting and color. Improving the patient experience within clinical imaging environments with patient-preferred ambient lighting and color may improve overall patient comfort, compliance, and participation in the imaging study and indirectly contribute to improving diagnostic image quality. Methods: We installed consumer-based internet protocol addressable LED lights using the ZigBee standard in different imaging rooms within a clinical imaging environment. We recruited healthy volunteers (n=35) to generate pilot data in order to develop a subsequent clinical trial. The visual perception assessment procedure utilized questionnaires with preprogrammed light/color settings and further assessed how subjects preferred ambient light and color within a clinical imaging setting. Results: Technical implementation using programmable LED lights was performed without any hardware or electrical modifications to the existing clinical imaging environment. Subject testing revealed substantial variabilities in color perception; however, clear trends in subject color preference were noted. In terms of the color hue of the imaging environment, 43\% (15/35) found blue and 31\% (11/35) found yellow to be the most relaxing. Conversely, 69\% (24/35) found red, 17\% (6/35) found yellow, and 11\% (4/35) found green to be the least relaxing. Conclusions: With the majority of subjects indicating that colored lighting within a clinical imaging environment would contribute to an improved patient experience, we predict that enabling patients to customize environmental factors like lighting and color to individual preferences will improve patient comfort and patient satisfaction. Improved patient comfort in clinical imaging environments may also help to minimize patient-specific imaging artifacts that can otherwise limit diagnostic image quality. Trial Registration: ClinicalTrials.gov NCT03456895; https://clinicaltrials.gov/ct2/show/NCT03456895 ", doi="10.2196/11839", url="http://www.jmir.org/2020/6/e11839/", url="http://www.ncbi.nlm.nih.gov/pubmed/32530434" } @Article{info:doi/10.2196/15893, author="Sakai, Kenichiro and Komatsu, Teppei and Iguchi, Yasuyuki and Takao, Hiroyuki and Ishibashi, Toshihiro and Murayama, Yuichi", title="Reliability of Smartphone for Diffusion-Weighted Imaging--Alberta Stroke Program Early Computed Tomography Scores in Acute Ischemic Stroke Patients: Diagnostic Test Accuracy Study", journal="J Med Internet Res", year="2020", month="Jun", day="9", volume="22", number="6", pages="e15893", keywords="smartphone app", keywords="DWI", keywords="ASPECTS", abstract="Background: High-quality neuroimages can be viewed using a medical app installed on a smartphone. Although interdevice agreement between smartphone and desktop PC monitor was found to be favorable for evaluating computed tomography images, there are no interdevice agreement data for diffusion-weighted imaging (DWI). Objective: The aim of our study was to compare DWI interpretation using the Join smartphone app with that using a desktop PC monitor, in terms of interdevice and interrater agreement and elapsed interpretation time. Methods: The ischemic change in the DWI of consecutive patients with acute stroke in the middle cerebral artery territory was graded by 2 vascular neurologists using the Join smartphone app and a desktop PC monitor. The vascular neurologists were blinded to all patient information. Each image was categorized as either Diffusion-Weighted Imaging--Alberta Stroke Program Early Computed Tomography Scores (DWI-ASPECTS) ?7 or DWI-ASPECTS <7 according to the Japanese Society for Neuroendovascular Therapy. We analyzed interdevice agreement and interrater agreement with respect to DWI-ASPECTS. Elapsed interpretation time was compared between DWI-ASPECTS evaluated by the Join smartphone app and a desktop PC monitor. Results: We analyzed the images of 111 patients (66\% male; median age=69 years; median National Institutes of Health Stroke Scale score on admission=4). Interdevice agreement regarding DWI-ASPECTS between the smartphone and the desktop PC monitor was favorable (vascular neurologist 1: $\kappa$=0.777, P<.001, vascular neurologist 2: $\kappa$=0.787, P<.001). Interrater agreement was also satisfactory for the smartphone ($\kappa$=0.710, P<.001) and the desktop PC monitor ($\kappa$=0.663, P<.001). Median elapsed interpretation time was similar between the smartphone and the desktop PC monitor (vascular neurologist 1: 1.7 min vs 1.6 min; P=.64); vascular neurologist 2: 2.4 min vs 2.0 min; P=.14). Conclusions: The use of a smartphone app enables vascular neurologists to estimate DWI-ASPECTS accurately and rapidly. The Join medical smartphone app shows great promise in the management of acute stroke. ", doi="10.2196/15893", url="https://www.jmir.org/2020/6/e15893", url="http://www.ncbi.nlm.nih.gov/pubmed/32515744" } @Article{info:doi/10.2196/17252, author="Akbarian, Sina and Montazeri Ghahjaverestan, Nasim and Yadollahi, Azadeh and Taati, Babak", title="Distinguishing Obstructive Versus Central Apneas in Infrared Video of Sleep Using Deep Learning: Validation Study", journal="J Med Internet Res", year="2020", month="May", day="22", volume="22", number="5", pages="e17252", keywords="noncontact monitoring", keywords="sleep apnea", keywords="motion analysis", keywords="computer vision", keywords="obstructive apnea", keywords="central apnea", keywords="machine learning", keywords="deep learning", abstract="Background: Sleep apnea is a respiratory disorder characterized by an intermittent reduction (hypopnea) or cessation (apnea) of breathing during sleep. Depending on the presence of a breathing effort, sleep apnea is divided into obstructive sleep apnea (OSA) and central sleep apnea (CSA) based on the different pathologies involved. If the majority of apneas in a person are obstructive, they will be diagnosed as OSA or otherwise as CSA. In addition, as it is challenging and highly controversial to divide hypopneas into central or obstructive, the decision about sleep apnea type (OSA vs CSA) is made based on apneas only. Choosing the appropriate treatment relies on distinguishing between obstructive apnea (OA) and central apnea (CA). Objective: The objective of this study was to develop a noncontact method to distinguish between OAs and CAs. Methods: Five different computer vision-based algorithms were used to process infrared (IR) video data to track and analyze body movements to differentiate different types of apnea (OA vs CA). In the first two methods, supervised classifiers were trained to process optical flow information. In the remaining three methods, a convolutional neural network (CNN) was designed to extract distinctive features from optical flow and to distinguish OA from CA. Results: Overnight sleeping data of 42 participants (mean age 53, SD 15 years; mean BMI 30, SD 7 kg/m2; 27 men and 15 women; mean number of OA 16, SD 30; mean number of CA 3, SD 7; mean apnea-hypopnea index 27, SD 31 events/hour; mean sleep duration 5 hours, SD 1 hour) were collected for this study. The test and train data were recorded in two separate laboratory rooms. The best-performing model (3D-CNN) obtained 95\% accuracy and an F1 score of 89\% in differentiating OA vs CA. Conclusions: In this study, the first vision-based method was developed that differentiates apnea types (OA vs CA). The developed algorithm tracks and analyses chest and abdominal movements captured via an IR video camera. Unlike previously developed approaches, this method does not require any attachment to a user that could potentially alter the sleeping condition. ", doi="10.2196/17252", url="http://www.jmir.org/2020/5/e17252/", url="http://www.ncbi.nlm.nih.gov/pubmed/32441656" } @Article{info:doi/10.2196/18438, author="Ray, Arnab and Gupta, Aman and Al, Amutha", title="Skin Lesion Classification With Deep Convolutional Neural Network: Process Development and Validation", journal="JMIR Dermatol", year="2020", month="May", day="7", volume="3", number="1", pages="e18438", keywords="deep convolutional neural network", keywords="VGG16, Inceptionv3", keywords="Inception ResNet V2", keywords="DenseNet", keywords="skin cancer", keywords="cancer", keywords="neural network", keywords="machine learning", keywords="melanoma", abstract="Background: Skin cancer is the most common cancer and is often ignored by people at an early stage. There are 5.4 million new cases of skin cancer worldwide every year. Deaths due to skin cancer could be prevented by early detection of the mole. Objective: We propose a skin lesion classification system that has the ability to detect such moles at an early stage and is able to easily differentiate between a cancerous and noncancerous mole. Using this system, we would be able to save time and resources for both patients and practitioners. Methods: We created a deep convolutional neural network using an Inceptionv3 and DenseNet-201 pretrained model. Results: We found that using the concepts of fine-tuning and the ensemble learning model yielded superior results. Furthermore, fine-tuning the whole model helped models converge faster compared to fine-tuning only the top layers, giving better accuracy overall. Conclusions: Based on our research, we conclude that deep learning algorithms are highly suitable for classifying skin cancer images. ", doi="10.2196/18438", url="http://derma.jmir.org/2020/1/e18438/" } @Article{info:doi/10.2196/18251, author="Yongping, Liang and Juan, Zhang and Zhou, Ping and Yongfeng, Zhao and Liu, Wengang and Shi, Yifan", title="Evaluation of the Quadri-Planes Method in Computer-Aided Diagnosis of Breast Lesions by Ultrasonography: Prospective Single-Center Study", journal="JMIR Med Inform", year="2020", month="May", day="5", volume="8", number="5", pages="e18251", keywords="ultrasonography", keywords="breast neoplasm", keywords="breast imaging reporting and data system (bi-rads)", keywords="breast neoplasm diagnosis", keywords="cancer screening", keywords="computer-aided diagnosis", keywords="breast cancer", abstract="Background: Computer-aided diagnosis (CAD) is a tool that can help radiologists diagnose breast lesions by ultrasonography. Previous studies have demonstrated that CAD can help reduce the incidence of missed diagnoses by radiologists. However, the optimal method to apply CAD to breast lesions using diagnostic planes has not been assessed. Objective: The aim of this study was to compare the performance of radiologists with different levels of experience when using CAD with the quadri-planes method to detect breast tumors. Methods: From November 2018 to October 2019, we enrolled patients in the study who had a breast mass as their most prominent symptom. We assigned 2 ultrasound radiologists (with 1 and 5 years of experience, respectively) to read breast ultrasonography images without CAD and then to perform a second reading while applying CAD with the quadri-planes method. We then compared the diagnostic performance of the readers for the 2 readings (without and with CAD). The McNemar test for paired data was used for statistical analysis. Results: A total of 331 patients were included in this study (mean age 43.88 years, range 17-70, SD 12.10), including 512 lesions (mean diameter 1.85 centimeters, SD 1.19; range 0.26-9.5); 200/512 (39.1\%) were malignant, and 312/512 (60.9\%) were benign. For CAD, the area under the receiver operating characteristic curve (AUC) improved significantly from 0.76 (95\% CI 0.71-0.79) with the cross-planes method to 0.84 (95\% CI 0.80-0.88; P<.001) with the quadri-planes method. For the novice reader, the AUC significantly improved from 0.73 (95\% CI 0.69-0.78) for the without-CAD mode to 0.83 (95\% CI 0.80-0.87; P<.001) for the combined-CAD mode with the quadri-planes method. For the experienced reader, the AUC improved from 0.85 (95\% CI 0.81-0.88) to 0.87 (95\% CI 0.84-0.91; P=.15). The kappa indicating consistency between the experienced reader and the novice reader for the combined-CAD mode was 0.63. For the novice reader, the sensitivity significantly improved from 60.0\% for the without-CAD mode to 79.0\% for the combined-CAD mode (P=.004). The specificity, negative predictive value, positive predictive value, and accuracy improved from 84.9\% to 87.8\% (P=.53), 76.8\% to 86.7\% (P=.07), 71.9\% to 80.6\% (P=.13), and 75.2\% to 84.4\% (P=.12), respectively. For the experienced reader, the sensitivity improved significantly from 76.0\% for the without-CAD mode to 87.0\% for the combined-CAD mode (P=.045). The NPV and accuracy moderately improved from 85.8\% and 86.3\% to 91.0\% (P=.27) and 87.0\% (P=.84), respectively. The specificity and positive predictive value decreased from 87.4\% to 81.3\% (P=.25) and from 87.2\% to 93.0\% (P=.16), respectively. Conclusions: S-Detect is a feasible diagnostic tool that can improve the sensitivity, accuracy, and AUC of the quadri-planes method for both novice and experienced readers while also improving the specificity for the novice reader. It demonstrates important application value in the clinical diagnosis of breast cancer. Trial Registration: ChiCTR.org.cn 1800019649; http://www.chictr.org.cn/showproj.aspx?proj=33094 ", doi="10.2196/18251", url="https://medinform.jmir.org/2020/5/e18251", url="http://www.ncbi.nlm.nih.gov/pubmed/32369039" } @Article{info:doi/10.2196/16225, author="Chun, Jaehyeong and Kim, Youngjun and Shin, Yoon Kyoung and Han, Hyup Sun and Oh, Yeul Sei and Chung, Tae-Young and Park, Kyung-Ah and Lim, Hui Dong", title="Deep Learning--Based Prediction of Refractive Error Using Photorefraction Images Captured by a Smartphone: Model Development and Validation Study", journal="JMIR Med Inform", year="2020", month="May", day="5", volume="8", number="5", pages="e16225", keywords="amblyopia", keywords="cycloplegic refraction", keywords="deep learning", keywords="deep convolutional neural network", keywords="mobile phone", keywords="photorefraction", keywords="refractive error", keywords="screening", abstract="Background: Accurately predicting refractive error in children is crucial for detecting amblyopia, which can lead to permanent visual impairment, but is potentially curable if detected early. Various tools have been adopted to more easily screen a large number of patients for amblyopia risk. Objective: For efficient screening, easy access to screening tools and an accurate prediction algorithm are the most important factors. In this study, we developed an automated deep learning--based system to predict the range of refractive error in children (mean age 4.32 years, SD 1.87 years) using 305 eccentric photorefraction images captured with a smartphone. Methods: Photorefraction images were divided into seven classes according to their spherical values as measured by cycloplegic refraction. Results: The trained deep learning model had an overall accuracy of 81.6\%, with the following accuracies for each refractive error class: 80.0\% for ??5.0 diopters (D), 77.8\% for >?5.0 D and ??3.0 D, 82.0\% for >?3.0 D and ??0.5 D, 83.3\% for >?0.5 D and <+0.5 D, 82.8\% for ?+0.5 D and <+3.0 D, 79.3\% for ?+3.0 D and <+5.0 D, and 75.0\% for ?+5.0 D. These results indicate that our deep learning--based system performed sufficiently accurately. Conclusions: This study demonstrated the potential of precise smartphone-based prediction systems for refractive error using deep learning and further yielded a robust collection of pediatric photorefraction images. ", doi="10.2196/16225", url="https://medinform.jmir.org/2020/5/e16225", url="http://www.ncbi.nlm.nih.gov/pubmed/32369035" } @Article{info:doi/10.2196/18149, author="Eapen, Raj Bell and Archer, Norm and Sartipi, Kamran", title="LesionMap: A Method and Tool for the Semantic Annotation of Dermatological Lesions for Documentation and Machine Learning", journal="JMIR Dermatol", year="2020", month="Apr", day="20", volume="3", number="1", pages="e18149", keywords="LesionMap", keywords="LesionMapper", keywords="digital imaging", keywords="machine learning", keywords="dermatology", doi="10.2196/18149", url="http://derma.jmir.org/2020/1/e18149/" } @Article{info:doi/10.2196/15963, author="Wu, Yi-Ying and Huang, Tzu-Chuan and Ye, Ren-Hua and Fang, Wen-Hui and Lai, Shiue-Wei and Chang, Ping-Ying and Liu, Wei-Nung and Kuo, Tai-Yu and Lee, Cho-Hao and Tsai, Wen-Chiuan and Lin, Chin", title="A Hematologist-Level Deep Learning Algorithm (BMSNet) for Assessing the Morphologies of Single Nuclear Balls in Bone Marrow Smears: Algorithm Development", journal="JMIR Med Inform", year="2020", month="Apr", day="8", volume="8", number="4", pages="e15963", keywords="artificial intelligence", keywords="bone marrow examination", keywords="leukemia", keywords="myelodysplastic syndrome", keywords="deep learning", abstract="Background: Bone marrow aspiration and biopsy remain the gold standard for the diagnosis of hematological diseases despite the development of flow cytometry (FCM) and molecular and gene analyses. However, the interpretation of the results is laborious and operator dependent. Furthermore, the obtained results exhibit inter- and intravariations among specialists. Therefore, it is important to develop a more objective and automated analysis system. Several deep learning models have been developed and applied in medical image analysis but not in the field of hematological histology, especially for bone marrow smear applications. Objective: The aim of this study was to develop a deep learning model (BMSNet) for assisting hematologists in the interpretation of bone marrow smears for faster diagnosis and disease monitoring. Methods: From January 1, 2016, to December 31, 2018, 122 bone marrow smears were photographed and divided into a development cohort (N=42), a validation cohort (N=70), and a competition cohort (N=10). The development cohort included 17,319 annotated cells from 291 high-resolution photos. In total, 20 photos were taken for each patient in the validation cohort and the competition cohort. This study included eight annotation categories: erythroid, blasts, myeloid, lymphoid, plasma cells, monocyte, megakaryocyte, and unable to identify. BMSNet is a convolutional neural network with the YOLO v3 architecture, which detects and classifies single cells in a single model. Six visiting staff members participated in a human-machine competition, and the results from the FCM were regarded as the ground truth. Results: In the development cohort, according to 6-fold cross-validation, the average precision of the bounding box prediction without consideration of the classification is 67.4\%. After removing the bounding box prediction error, the precision and recall of BMSNet were similar to those of the hematologists in most categories. In detecting more than 5\% of blasts in the validation cohort, the area under the curve (AUC) of BMSNet (0.948) was higher than the AUC of the hematologists (0.929) but lower than the AUC of the pathologists (0.985). In detecting more than 20\% of blasts, the AUCs of the hematologists (0.981) and pathologists (0.980) were similar and were higher than the AUC of BMSNet (0.942). Further analysis showed that the performance difference could be attributed to the myelodysplastic syndrome cases. In the competition cohort, the mean value of the correlations between BMSNet and FCM was 0.960, and the mean values of the correlations between the visiting staff and FCM ranged between 0.952 and 0.990. Conclusions: Our deep learning model can assist hematologists in interpreting bone marrow smears by facilitating and accelerating the detection of hematopoietic cells. However, a detailed morphological interpretation still requires trained hematologists. ", doi="10.2196/15963", url="http://medinform.jmir.org/2020/4/e15963/", url="http://www.ncbi.nlm.nih.gov/pubmed/32267237" } @Article{info:doi/10.2196/16334, author="Yongping, Liang and Zhou, Ping and Juan, Zhang and Yongfeng, Zhao and Liu, Wengang and Shi, Yifan", title="Performance of Computer-Aided Diagnosis in Ultrasonography for Detection of Breast Lesions Less and More Than 2 cm: Prospective Comparative Study", journal="JMIR Med Inform", year="2020", month="Mar", day="2", volume="8", number="3", pages="e16334", keywords="ultrasonography", keywords="breast neoplasm", keywords="breast imaging reporting and data system (BI-RADS)", keywords="breast neoplasms diagnosis", keywords="cancer screening", keywords="computer diagnostic aid", abstract="Background: Computer-aided diagnosis (CAD) is used as an aid tool by radiologists on breast lesion diagnosis in ultrasonography. Previous studies demonstrated that CAD can improve the diagnosis performance of radiologists. However, the optimal use of CAD on breast lesions according to size (below or above 2 cm) has not been assessed. Objective: The aim of this study was to compare the performance of different radiologists using CAD to detect breast tumors less and more than 2 cm in size. Methods: We prospectively enrolled 261 consecutive patients (mean age 43 years; age range 17-70 years), including 398 lesions (148 lesions>2 cm, 79 malignant and 69 benign; 250 lesions?2 cm, 71 malignant and 179 benign) with breast mass as the prominent symptom. One novice radiologist with 1 year of ultrasonography experience and one experienced radiologist with 5 years of ultrasonography experience were each assigned to read the ultrasonography images without CAD, and then again at a second reading while applying the CAD S-Detect. We then compared the diagnostic performance of the readers in the two readings (without and combined with CAD) with breast imaging. The McNemar test for paired data was used for statistical analysis. Results: For the novice reader, the area under the receiver operating characteristic curve (AUC) improved from 0.74 (95\% CI 0.67-0.82) from the without-CAD mode to 0.88 (95\% CI 0.83-0.93; P<.001) at the combined-CAD mode in lesions?2 cm. For the experienced reader, the AUC improved from 0.84 (95\% CI 0.77-0.90) to 0.90 (95\% CI 0.86-0.94; P=.002). In lesions>2 cm, the AUC moderately decreased from 0.81 to 0.80 (novice reader) and from 0.90 to 0.82 (experienced reader). The sensitivity of the novice and experienced reader in lesions?2 cm improved from 61.97\% and 73.23\% at the without-CAD mode to 90.14\% and 97.18\% (both P<.001) at the combined-CAD mode, respectively. Conclusions: S-Detect is a feasible diagnostic tool that can improve the sensitivity for both novice and experienced readers, while also improving the negative predictive value and AUC for lesions?2 cm, demonstrating important application value in the clinical diagnosis of breast cancer. Trial Registration: Chinese Clinical Trial Registry ChiCTR1800019649; http://www.chictr.org.cn/showprojen.aspx?proj=33094 ", doi="10.2196/16334", url="https://medinform.jmir.org/2020/3/e16334", url="http://www.ncbi.nlm.nih.gov/pubmed/32130149" } @Article{info:doi/10.2196/16291, author="Dallora, Luiza Ana and Berglund, Sanmartin Johan and Brogren, Martin and Kvist, Ola and Diaz Ruiz, Sandra and D{\"u}bbel, Andr{\'e} and Anderberg, Peter", title="Age Assessment of Youth and Young Adults Using Magnetic Resonance Imaging of the Knee: A Deep Learning Approach", journal="JMIR Med Inform", year="2019", month="Dec", day="5", volume="7", number="4", pages="e16291", keywords="age assessment", keywords="bone age", keywords="skeletal maturity", keywords="deep learning", keywords="convolutional neural networks", keywords="transfer learning", keywords="machine learning", keywords="magnetic resonance imaging", keywords="medical imaging", keywords="knee", abstract="Background: Bone age assessment (BAA) is an important tool for diagnosis and in determining the time of treatment in a number of pediatric clinical scenarios, as well as in legal settings where it is used to estimate the chronological age of an individual where valid documents are lacking. Traditional methods for BAA suffer from drawbacks, such as exposing juveniles to radiation, intra- and interrater variability, and the time spent on the assessment. The employment of automated methods such as deep learning and the use of magnetic resonance imaging (MRI) can address these drawbacks and improve the assessment of age. Objective: The aim of this paper is to propose an automated approach for age assessment of youth and young adults in the age range when the length growth ceases and growth zones are closed (14-21 years of age) by employing deep learning using MRI of the knee. Methods: This study carried out MRI examinations of the knee of 402 volunteer subjects---221 males (55.0\%) and 181 (45.0\%) females---aged 14-21 years. The method comprised two convolutional neural network (CNN) models: the first one selected the most informative images of an MRI sequence, concerning age-assessment purposes; these were then used in the second module, which was responsible for the age estimation. Different CNN architectures were tested, both training from scratch and employing transfer learning. Results: The CNN architecture that provided the best results was GoogLeNet pretrained on the ImageNet database. The proposed method was able to assess the age of male subjects in the range of 14-20.5 years, with a mean absolute error (MAE) of 0.793 years, and of female subjects in the range of 14-19.5 years, with an MAE of 0.988 years. Regarding the classification of minors---with the threshold of 18 years of age---an accuracy of 98.1\% for male subjects and 95.0\% for female subjects was achieved. Conclusions: The proposed method was able to assess the age of youth and young adults from 14 to 20.5 years of age for male subjects and 14 to 19.5 years of age for female subjects in a fully automated manner, without the use of ionizing radiation, addressing the drawbacks of traditional methods. ", doi="10.2196/16291", url="http://medinform.jmir.org/2019/4/e16291/", url="http://www.ncbi.nlm.nih.gov/pubmed/31804183" } @Article{info:doi/10.2196/14919, author="Cambron, C. Julia and Wyatt, D. Kirk and Lohse, M. Christine and Underwood, Y. Page and Hellmich, R. Thomas", title="Medical Videography Using a Mobile App: Retrospective Analysis", journal="JMIR Mhealth Uhealth", year="2019", month="Dec", day="3", volume="7", number="12", pages="e14919", keywords="photography", keywords="video recording", keywords="telemedicine", keywords="medical informatics applications", abstract="Background: As mobile devices and apps grow in popularity, they are increasingly being used by health care providers to aid clinical care. At our institution, we developed and implemented a point-of-care clinical photography app that also permitted the capture of video recordings; however, the clinical findings it was used to capture and the outcomes that resulted following video recording were unclear. Objective: The study aimed to assess the use of a mobile clinical video recording app at our institution and its impact on clinical care. Methods: A single reviewer retrospectively reviewed video recordings captured between April 2016 and July 2017, associated metadata, and patient records. Results: We identified 362 video recordings that were eligible for inclusion. Most video recordings (54.1\%; 190/351) were captured by attending physicians. Specialties recording a high number of video recordings included orthopedic surgery (33.7\%; 122/362), neurology (21.3\%; 77/362), and ophthalmology (15.2\%; 55/362). Consent was clearly documented in the medical record in less than one-third (31.8\%; 115/362) of the records. People other than the patient were incidentally captured in 29.6\% (107/362) of video recordings. Although video recordings were infrequently referenced in notes corresponding to the clinical encounter (12.2\%; 44/362), 7.7\% (22/286) of patients were video recorded in subsequent clinical encounters, with 82\% (18/22) of these corresponding to the same finding seen in the index video. Store-and-forward telemedicine was documented in clinical notes in only 2 cases (0.5\%; 2/362). Videos appeared to be of acceptable quality for clinical purposes. Conclusions: Video recordings were captured in a variety of clinical settings. Documentation of consent was inconsistent, and other individuals were incidentally included in videos. Although clinical impact was not always clearly evident through retrospective review because of limited documentation, potential uses include documentation for future reference and store-and-forward telemedicine. Repeat video recordings of the same finding provide evidence of use to track the findings over time. Clinical video recordings have the potential to support clinical care; however, documentation of consent requires standardization. ", doi="10.2196/14919", url="https://mhealth.jmir.org/2019/12/e14919", url="http://www.ncbi.nlm.nih.gov/pubmed/31793894" } @Article{info:doi/10.2196/14310, author="Shin, Youngbin and Kim, Won Kyung and Lee, Junghyun Amy and Sung, Sub Yu and Ahn, Suah and Koo, Hwan Ja and Choi, Gyu Chang and Ko, Yousun and Kim, Sung Ho and Park, Ho Seong", title="A Good Practice--Compliant Clinical Trial Imaging Management System for Multicenter Clinical Trials: Development and Validation Study", journal="JMIR Med Inform", year="2019", month="Aug", day="30", volume="7", number="3", pages="e14310", keywords="clinical trial", keywords="information technology", keywords="diagnostic imaging", keywords="regulation", keywords="computerized system validation", abstract="Background: With the rapid increase in utilization of imaging endpoints in multicenter clinical trials, the amount of data and workflow complexity have also increased. A Clinical Trial Imaging Management System (CTIMS) is required to comprehensively support imaging processes in clinical trials. The US Food and Drug Administration (FDA) issued a guidance protocol in 2018 for appropriate use of medical imaging in accordance with many regulations including the Good Clinical Practice (GCP) guidelines. Existing research on CTIMS, however, has mainly focused on functions and structures of systems rather than regulation and compliance. Objective: We aimed to develop a comprehensive CTIMS to meet the current regulatory guidelines and various required functions. We also aimed to perform computerized system validation focusing on the regulatory compliance of our CTIMS. Methods: Key regulatory requirements of CTIMS were extracted thorough review of many related regulations and guidelines including International Conference on Harmonization-GCP E6, FDA 21 Code of Federal Regulations parts 11 and 820, Good Automated Manufacturing Practice, and Clinical Data Interchange Standards Consortium. The system architecture was designed in accordance with these regulations by a multidisciplinary team including radiologists, engineers, clinical trial specialists, and regulatory medicine professionals. Computerized system validation of the developed CTIMS was performed internally and externally. Results: Our CTIMS (AiCRO) was developed based on a two-layer design composed of the server system and the client system, which is efficient at meeting the regulatory and functional requirements. The server system manages system security, data archive, backup, and audit trail. The client system provides various functions including deidentification, image transfer, image viewer, image quality control, and electronic record. Computerized system validation was performed internally using a V-model and externally by a global quality assurance company to demonstrate that AiCRO meets all regulatory and functional requirements. Conclusions: We developed a Good Practice--compliant CTIMS---AiCRO system---to manage large amounts of image data and complexity of imaging management processes in clinical trials. Our CTIMS adopts and adheres to all regulatory and functional requirements and has been thoroughly validated. ", doi="10.2196/14310", url="http://medinform.jmir.org/2019/3/e14310/", url="http://www.ncbi.nlm.nih.gov/pubmed/31471962" } @Article{info:doi/10.2196/13423, author="Hyde, Lynne Lisa and Boyes, W. Allison and Mackenzie, J. Lisa and Leigh, Lucy and Oldmeadow, Christopher and Riveros, Carlos and Sanson-Fisher, Rob", title="Electronic Health Literacy Among Magnetic Resonance Imaging and Computed Tomography Medical Imaging Outpatients: Cluster Analysis", journal="J Med Internet Res", year="2019", month="Aug", day="28", volume="21", number="8", pages="e13423", keywords="internet", keywords="health", keywords="literacy", keywords="cluster analysis", keywords="medical imaging", abstract="Background: Variations in an individual's electronic health (eHealth) literacy may influence the degree to which health consumers can benefit from eHealth. The eHealth Literacy Scale (eHEALS) is a common measure of eHealth literacy. However, the lack of guidelines for the standardized interpretation of eHEALS scores limits its research and clinical utility. Cut points are often arbitrarily applied at the eHEALS item or global level, which assumes a dichotomy of high and low eHealth literacy. This approach disregards scale constructs and results in inaccurate and inconsistent conclusions. Cluster analysis is an exploratory technique, which can be used to overcome these issues, by identifying classes of patients reporting similar eHealth literacy without imposing data cut points. Objective: The aim of this cross-sectional study was to identify classes of patients reporting similar eHealth literacy and assess characteristics associated with class membership. Methods: Medical imaging outpatients were recruited consecutively in the waiting room of one major public hospital in New South Wales, Australia. Participants completed a self-report questionnaire assessing their sociodemographic characteristics and eHealth literacy, using the eHEALS. Latent class analysis was used to explore eHealth literacy clusters identified by a distance-based cluster analysis, and to identify characteristics associated with class membership. Results: Of the 268 eligible and consenting participants, 256 (95.5\%) completed the eHEALS. Consistent with distance-based findings, 4 latent classes were identified, which were labeled as low (21.1\%, 54/256), moderate (26.2\%, 67/256), high (32.8\%, 84/256), and very high (19.9\%, 51/256) eHealth literacy. Compared with the low class, participants who preferred to receive a lot of health information reported significantly higher odds of moderate eHealth literacy (odds ratio 16.67, 95\% CI 1.67-100.00; P=.02), and those who used the internet at least daily reported significantly higher odds of high eHealth literacy (odds ratio 4.76, 95\% CI 1.59-14.29; P=.007). Conclusions: The identification of multiple classes of eHealth literacy, using both distance-based and latent class analyses, highlights the limitations of using the eHEALS global score as a dichotomous measurement tool. The findings suggest that eHealth literacy support needs vary in this population. The identification of low and moderate eHealth literacy classes indicate that the design of eHealth resources should be tailored to patients' varying levels of eHealth literacy. eHealth literacy improvement interventions are needed, and these should be targeted based on individuals' internet use frequency and health information amount preferences. ", doi="10.2196/13423", url="http://www.jmir.org/2019/8/e13423/", url="http://www.ncbi.nlm.nih.gov/pubmed/31464188" } @Article{info:doi/10.2196/10010, author="Shen, Jiayi and Zhang, P. Casper J. and Jiang, Bangsheng and Chen, Jiebin and Song, Jian and Liu, Zherui and He, Zonglin and Wong, Yi Sum and Fang, Po-Han and Ming, Wai-Kit", title="Artificial Intelligence Versus Clinicians in Disease Diagnosis: Systematic Review", journal="JMIR Med Inform", year="2019", month="Aug", day="16", volume="7", number="3", pages="e10010", keywords="artificial intelligence", keywords="deep learning", keywords="diagnosis", keywords="diagnostic imaging", keywords="image interpretation, computer-assisted", keywords="patient-centered care", abstract="Background: Artificial intelligence (AI) has been extensively used in a range of medical fields to promote therapeutic development. The development of diverse AI techniques has also contributed to early detections, disease diagnoses, and referral management. However, concerns about the value of advanced AI in disease diagnosis have been raised by health care professionals, medical service providers, and health policy decision makers. Objective: This review aimed to systematically examine the literature, in particular, focusing on the performance comparison between advanced AI and human clinicians to provide an up-to-date summary regarding the extent of the application of AI to disease diagnoses. By doing so, this review discussed the relationship between the current advanced AI development and clinicians with respect to disease diagnosis and thus therapeutic development in the long run. Methods: We systematically searched articles published between January 2000 and March 2019 following the Preferred Reporting Items for Systematic reviews and Meta-Analysis in the following databases: Scopus, PubMed, CINAHL, Web of Science, and the Cochrane Library. According to the preset inclusion and exclusion criteria, only articles comparing the medical performance between advanced AI and human experts were considered. Results: A total of 9 articles were identified. A convolutional neural network was the commonly applied advanced AI technology. Owing to the variation in medical fields, there is a distinction between individual studies in terms of classification, labeling, training process, dataset size, and algorithm validation of AI. Performance indices reported in articles included diagnostic accuracy, weighted errors, false-positive rate, sensitivity, specificity, and the area under the receiver operating characteristic curve. The results showed that the performance of AI was at par with that of clinicians and exceeded that of clinicians with less experience. Conclusions: Current AI development has a diagnostic performance that is comparable with medical experts, especially in image recognition-related fields. Further studies can be extended to other types of medical imaging such as magnetic resonance imaging and other medical practices unrelated to images. With the continued development of AI-assisted technologies, the clinical implications underpinned by clinicians' experience and guided by patient-centered health care principle should be constantly considered in future AI-related and other technology-based medical research. ", doi="10.2196/10010", url="http://medinform.jmir.org/2019/3/e10010/", url="http://www.ncbi.nlm.nih.gov/pubmed/31420959" } @Article{info:doi/10.2196/12660, author="Masud, Rafia and Al-Rei, Mona and Lokker, Cynthia", title="Computer-Aided Detection for Breast Cancer Screening in Clinical Settings: Scoping Review", journal="JMIR Med Inform", year="2019", month="Jul", day="18", volume="7", number="3", pages="e12660", keywords="computer-aided detection", keywords="machine learning", keywords="screening mammography", keywords="breast cancer", keywords="radiology", keywords="implementation", abstract="Background: With the growth of machine learning applications, the practice of medicine is evolving. Computer-aided detection (CAD) is a software technology that has become widespread in radiology practices, particularly in breast cancer screening for improving detection rates at earlier stages. Many studies have investigated the diagnostic accuracy of CAD, but its implementation in clinical settings has been largely overlooked. Objective: The aim of this scoping review was to summarize recent literature on the adoption and implementation of CAD during breast cancer screening by radiologists and to describe barriers and facilitators for CAD use. Methods: The MEDLINE database was searched for English, peer-reviewed articles that described CAD implementation, including barriers or facilitators, in breast cancer screening and were published between January 2010 and March 2018. Articles describing the diagnostic accuracy of CAD for breast cancer detection were excluded. The search returned 526 citations, which were reviewed in duplicate through abstract and full-text screening. Reference lists and cited references in the included studies were reviewed. Results: A total of nine articles met the inclusion criteria. The included articles showed that there is a tradeoff between the facilitators and barriers for CAD use. Facilitators for CAD use were improved breast cancer detection rates, increased profitability of breast imaging, and time saved by replacing double reading. Identified barriers were less favorable perceptions of CAD compared to double reading by radiologists, an increase in recall rates of patients for further testing, increased costs, and unclear effect on patient outcomes. Conclusions: There is a gap in the literature between CAD's well-established diagnostic accuracy and its implementation and use by radiologists. Generally, the perceptions of radiologists have not been considered and details of implementation approaches for adoption of CAD have not been reported. The cost-effectiveness of CAD has not been well established for breast cancer screening in various populations. Further research is needed on how to best facilitate CAD in radiology practices in order to optimize patient outcomes, and the views of radiologists need to be better considered when advancing CAD use. ", doi="10.2196/12660", url="http://medinform.jmir.org/2019/3/e12660/", url="http://www.ncbi.nlm.nih.gov/pubmed/31322128" } @Article{info:doi/10.2196/12595, author="Halaska, Ciarra and Sachs, Peter and Sanfilippo, Kate and Lin, Chen-Tan", title="Patient Attitudes About Viewing Their Radiology Images Online: Preintervention Survey", journal="J Med Internet Res", year="2019", month="Jul", day="18", volume="21", number="7", pages="e12595", keywords="connected health", keywords="electronic health records", keywords="information transparency with patients", keywords="online patient-physician communication", keywords="online patient portal", keywords="radiology images", keywords="second opinion", keywords="social media", keywords="test result management", abstract="Background: Although patient data is available through electronic portals, little information exists about the benefits and/or challenges of providing patients with online access to their radiology images. Objective: The aims of this quality improvement project were to understand patient attitudes toward being able to view their radiology images online and determine how information should be presented to ensure the images are helpful to the patients, rather than causing confusion and anxiety. Methods: An online survey of consumers was conducted to evaluate attitudes toward online access to personal radiological images. Results: A total of 105 responses were received from 686 community members (15.3\%). Of 105 consumers, 94 (89.5\%) reported a desire to have access to the radiology images within their online patient portal; 86.7\% (91/105) believed it would help them better understand their medical conditions and 81.0\% (85/105) said this would help them feel more in control of their care. Most respondents (74/105, 70.5\%) said it would help them feel reassured that their doctor was doing the right thing, and 63.8\% (67/105) said it would increase their level of trust in their doctor. Among surveyed patients, 78.1\% (82/105) valued viewing their radiology images online, while 92.4\% (97/105) valued their online radiology reports. Most patients (69/105, 65.7\%) wished to discuss their results with their ordering clinician, 29.5\% (31/105) wished to discuss with their interpreting radiologist, and 3.8\% (4/105) wished to share their images on social media. The biggest potential concern among 23.8\% (25/105) was that the images would be confusing. Conclusions: A large majority of surveyed patients desired the ability to view their radiology images online and anticipated many benefits and few risks. Health care organizations with electronic health records and online patient portals should consider augmenting their existing portals with this highly desired feature. To avoid the biggest patient concern, radiology reports should accompany images. Patients wanted to discuss their results with their ordering physician and their interpreting radiologist. Some even would like to share results on social media. Further research on the actual experience with such a tool will be needed. ", doi="10.2196/12595", url="http://www.jmir.org/2019/7/e12595/", url="http://www.ncbi.nlm.nih.gov/pubmed/31322124" } @Article{info:doi/10.2196/12109, author="Fu, Sunyang and Leung, Y. Lester and Wang, Yanshan and Raulli, Anne-Olivia and Kallmes, F. David and Kinsman, A. Kristin and Nelson, B. Kristoff and Clark, S. Michael and Luetmer, H. Patrick and Kingsbury, R. Paul and Kent, M. David and Liu, Hongfang", title="Natural Language Processing for the Identification of Silent Brain Infarcts From Neuroimaging Reports", journal="JMIR Med Inform", year="2019", month="Apr", day="21", volume="7", number="2", pages="e12109", keywords="natural language processing", keywords="neuroimaging", keywords="electronic health records", abstract="Background: Silent brain infarction (SBI) is defined as the presence of 1 or more brain lesions, presumed to be because of vascular occlusion, found by neuroimaging (magnetic resonance imaging or computed tomography) in patients without clinical manifestations of stroke. It is more common than stroke and can be detected in 20\% of healthy elderly people. Early detection of SBI may mitigate the risk of stroke by offering preventative treatment plans. Natural language processing (NLP) techniques offer an opportunity to systematically identify SBI cases from electronic health records (EHRs) by extracting, normalizing, and classifying SBI-related incidental findings interpreted by radiologists from neuroimaging reports. Objective: This study aimed to develop NLP systems to determine individuals with incidentally discovered SBIs from neuroimaging reports at 2 sites: Mayo Clinic and Tufts Medical Center. Methods: Both rule-based and machine learning approaches were adopted in developing the NLP system. The rule-based system was implemented using the open source NLP pipeline MedTagger, developed by Mayo Clinic. Features for rule-based systems, including significant words and patterns related to SBI, were generated using pointwise mutual information. The machine learning models adopted convolutional neural network (CNN), random forest, support vector machine, and logistic regression. The performance of the NLP algorithm was compared with a manually created gold standard. The gold standard dataset includes 1000 radiology reportsrandomly retrieved from the 2 study sites (Mayo and Tufts) corresponding to patients with no prior or current diagnosis of stroke or dementia. 400 out of the 1000 reports were randomly sampled and double read to determine interannotator agreements. The gold standard dataset was equally split to 3 subsets for training, developing, and testing. Results: Among the 400 reports selected to determine interannotator agreement, 5 reports were removed due to invalid scan types. The interannotator agreements across Mayo and Tufts neuroimaging reports were 0.87 and 0.91, respectively. The rule-based system yielded the best performance of predicting SBI with an accuracy, sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV) of 0.991, 0.925, 1.000, 1.000, and 0.990, respectively. The CNN achieved the best score on predicting white matter disease (WMD) with an accuracy, sensitivity, specificity, PPV, and NPV of 0.994, 0.994, 0.994, 0.994, and 0.994, respectively. Conclusions: We adopted a standardized data abstraction and modeling process to developed NLP techniques (rule-based and machine learning) to detect incidental SBIs and WMDs from annotated neuroimaging reports. Validation statistics suggested a high feasibility of detecting SBIs and WMDs from EHRs using NLP. ", doi="10.2196/12109", url="http://medinform.jmir.org/2019/2/e12109/", url="http://www.ncbi.nlm.nih.gov/pubmed/31066686" } @Article{info:doi/10.2196/12870, author="Hanna-Pladdy, Brenda and Gullapalli, Rao and Chen, Hegang", title="Functional Magnetic Resonance Imaging Biomarkers Predicting Cognitive Progression in Parkinson Disease: Protocol for a Prospective Longitudinal Cohort Study", journal="JMIR Res Protoc", year="2019", month="Apr", day="29", volume="8", number="4", pages="e12870", keywords="Parkinson disease", keywords="cognition", keywords="disease progression", keywords="dementia", keywords="mild cognitive impairment", keywords="biomarkers", keywords="functional neuroimaging", abstract="Background: Cardinal features of Parkinson disease (PD) are motor symptoms, but nonmotor features such as mild cognitive impairment (MCI) are common early in the disease process. MCI can progress and convert to dementia in advanced stages, creating significant disability and reduced quality of life. The primary pathological substrate for cognitive decline in PD is unclear, and there are no reliable biomarkers predicting the risk of conversion to dementia. A subgroup of PD patients with visual hallucinations may display more rapid conversion to dementia, suggesting that regional markers of visuoperceptual dysfunction may be sensitive to pathologic density in posterior cortical regions. Objective: The purpose of this project is to characterize PD-MCI and evaluate the utility of genetic and neuroimaging biomarkers in predicting cognitive outcomes with a prospective longitudinal study. We will evaluate whether accelerated cognitive progression may be reflected in biomarkers of early posterior cortical changes reflective of $\alpha$-synuclein deposition. Methods: We will evaluate a cohort of early-stage PD patients with the following methods to predict cognitive progression: (1) serial neuropsychological evaluations including detailed visuoperceptual functioning across 4 years; (2) genetic analysis of SNCA ($\alpha$-synuclein), MAPT (microtubule-associated tau), and APOE (apolipoprotein E); (3) an event-related functional magnetic resonance imaging paradigm of object recognition memory; and (4) anatomical and regional brain activation changes (resting-state functional magnetic resonance imaging) across 4 years. Results: The project received funding from the National Institutes of Health in August 2017, and data collection began in February 2018. Enrollment is ongoing, and subjects will be evaluated annually for 4 years extended across a 5-year project including data analysis and image processing. Conclusions: Cognitive, genetic, and structural and functional magnetic resonance imaging will characterize neural network changes predictive of cognitive progression in PD across 4 years. Identification of biomarkers with sensitivity for early prediction and estimation of risk for conversion to dementia in PD will pave the way for effective intervention with neuroprotective therapies during the critical stage when treatment can have the greatest impact. International Registered Report Identifier (IRRID): DERR1-10.2196/12870 ", doi="10.2196/12870", url="http://www.researchprotocols.org/2019/4/e12870/", url="http://www.ncbi.nlm.nih.gov/pubmed/31033450" } @Article{info:doi/10.2196/13822, author="Tariq, Qandeel and Fleming, Lanyon Scott and Schwartz, Nicole Jessey and Dunlap, Kaitlyn and Corbin, Conor and Washington, Peter and Kalantarian, Haik and Khan, Z. Naila and Darmstadt, L. Gary and Wall, Paul Dennis", title="Detecting Developmental Delay and Autism Through Machine Learning Models Using Home Videos of Bangladeshi Children: Development and Validation Study", journal="J Med Internet Res", year="2019", month="Apr", day="24", volume="21", number="4", pages="e13822", keywords="autism", keywords="autism spectrum disorder", keywords="machine learning", keywords="developmental delays", keywords="clinical resources", keywords="Bangladesh", keywords="Biomedical Data Science", abstract="Background: Autism spectrum disorder (ASD) is currently diagnosed using qualitative methods that measure between 20-100 behaviors, can span multiple appointments with trained clinicians, and take several hours to complete. In our previous work, we demonstrated the efficacy of machine learning classifiers to accelerate the process by collecting home videos of US-based children, identifying a reduced subset of behavioral features that are scored by untrained raters using a machine learning classifier to determine children's ``risk scores'' for autism. We achieved an accuracy of 92\% (95\% CI 88\%-97\%) on US videos using a classifier built on five features. Objective: Using videos of Bangladeshi children collected from Dhaka Shishu Children's Hospital, we aim to scale our pipeline to another culture and other developmental delays, including speech and language conditions. Methods: Although our previously published and validated pipeline and set of classifiers perform reasonably well on Bangladeshi videos (75\% accuracy, 95\% CI 71\%-78\%), this work improves on that accuracy through the development and application of a powerful new technique for adaptive aggregation of crowdsourced labels. We enhance both the utility and performance of our model by building two classification layers: The first layer distinguishes between typical and atypical behavior, and the second layer distinguishes between ASD and non-ASD. In each of the layers, we use a unique rater weighting scheme to aggregate classification scores from different raters based on their expertise. We also determine Shapley values for the most important features in the classifier to understand how the classifiers' process aligns with clinical intuition. Results: Using these techniques, we achieved an accuracy (area under the curve [AUC]) of 76\% (SD 3\%) and sensitivity of 76\% (SD 4\%) for identifying atypical children from among developmentally delayed children, and an accuracy (AUC) of 85\% (SD 5\%) and sensitivity of 76\% (SD 6\%) for identifying children with ASD from those predicted to have other developmental delays. Conclusions: These results show promise for using a mobile video-based and machine learning--directed approach for early and remote detection of autism in Bangladeshi children. This strategy could provide important resources for developmental health in developing countries with few clinical resources for diagnosis, helping children get access to care at an early age. Future research aimed at extending the application of this approach to identify a range of other conditions and determine the population-level burden of developmental disabilities and impairments will be of high value. ", doi="10.2196/13822", url="http://www.jmir.org/2019/4/e13822/", url="http://www.ncbi.nlm.nih.gov/pubmed/31017583" } @Article{info:doi/10.2196/12539, author="Vidal-Alaball, Josep and Royo Fibla, D{\'i}dac and Zapata, A. Miguel and Marin-Gomez, X. Francesc and Solans Fernandez, Oscar", title="Artificial Intelligence for the Detection of Diabetic Retinopathy in Primary Care: Protocol for Algorithm Development", journal="JMIR Res Protoc", year="2019", month="Feb", day="01", volume="8", number="2", pages="e12539", keywords="diabetes mellitus", keywords="diabetic retinopathy", keywords="fundus oculi", keywords="artificial intelligence", keywords="computer assisted diagnosis", keywords="neural network computer", abstract="Background: Diabetic retinopathy (DR) is one of the most important causes of blindness worldwide, especially in developed countries. In diabetic patients, periodic examination of the back of the eye using a nonmydriatic camera has been widely demonstrated to be an effective system to control and prevent the onset of DR. Convolutional neural networks have been used to detect DR, achieving very high sensitivities and specificities. Objective: The objective of this is paper was to develop an artificial intelligence (AI) algorithm for the detection of signs of DR in diabetic patients and to scientifically validate the algorithm to be used as a screening tool in primary care. Methods: Under this project, 2 studies will be conducted in a concomitant way: (1) Development of an algorithm with AI to detect signs of DR in patients with diabetes and (2) A prospective study comparing the diagnostic capacity of the AI algorithm with respect to the actual system of family physicians evaluating the images. The standard reference to compare with will be a blinded double reading conducted by retina specialists. For the development of the AI algorithm, different iterations and workouts will be performed on the same set of data. Before starting each new workout, the strategy of dividing the set date into 2 groups will be used randomly. A group with 80\% of the images will be used during the training (training dataset), and the remaining 20\% images will be used to validate the results (validation dataset) of each cycle (epoch). During the prospective study, true-positive, true-negative, false-positive, and false-negative values will be calculated again. From here, we will obtain the resulting confusion matrix and other indicators to measure the performance of the algorithm. Results: Cession of the images began at the end of 2018. The development of the AI algorithm is calculated to last about 3 to 4 months. Inclusion of patients in the cohort will start in early 2019 and is expected to last 3 to 4 months. Preliminary results are expected to be published by the end of 2019. Conclusions: The study will allow the development of an algorithm based on AI that can demonstrate an equal or superior performance, and that constitutes a complement or an alternative, to the current screening of DR in diabetic patients. International Registered Report Identifier (IRRID): PRR1-10.2196/12539 ", doi="10.2196/12539", url="http://www.researchprotocols.org/2019/2/e12539/", url="http://www.ncbi.nlm.nih.gov/pubmed/30707105" } @Article{info:doi/10.2196/10513, author="Zhang, Youshan and Allem, Jon-Patrick and Unger, Beth Jennifer and Boley Cruz, Tess", title="Automated Identification of Hookahs (Waterpipes) on Instagram: An Application in Feature Extraction Using Convolutional Neural Network and Support Vector Machine Classification", journal="J Med Internet Res", year="2018", month="Nov", day="21", volume="20", number="11", pages="e10513", keywords="convolutional neural network", keywords="feature extraction", keywords="image classification", keywords="Instagram", keywords="social media", keywords="support vector machine", abstract="Background: Instagram, with millions of posts per day, can be used to inform public health surveillance targets and policies. However, current research relying on image-based data often relies on hand coding of images, which is time-consuming and costly, ultimately limiting the scope of the study. Current best practices in automated image classification (eg, support vector machine (SVM), backpropagation neural network, and artificial neural network) are limited in their capacity to accurately distinguish between objects within images. Objective: This study aimed to demonstrate how a convolutional neural network (CNN) can be used to extract unique features within an image and how SVM can then be used to classify the image. Methods: Images of waterpipes or hookah (an emerging tobacco product possessing similar harms to that of cigarettes) were collected from Instagram and used in the analyses (N=840). A CNN was used to extract unique features from images identified to contain waterpipes. An SVM classifier was built to distinguish between images with and without waterpipes. Methods for image classification were then compared to show how a CNN+SVM classifier could improve accuracy. Results: As the number of validated training images increased, the total number of extracted features increased. In addition, as the number of features learned by the SVM classifier increased, the average level of accuracy increased. Overall, 99.5\% (418/420) of images classified were correctly identified as either hookah or nonhookah images. This level of accuracy was an improvement over earlier methods that used SVM, CNN, or bag-of-features alone. Conclusions: A CNN extracts more features of images, allowing an SVM classifier to be better informed, resulting in higher accuracy compared with methods that extract fewer features. Future research can use this method to grow the scope of image-based studies. The methods presented here might help detect increases in the popularity of certain tobacco products over time on social media. By taking images of waterpipes from Instagram, we place our methods in a context that can be utilized to inform health researchers analyzing social media to understand user experience with emerging tobacco products and inform public health surveillance targets and policies. ", doi="10.2196/10513", url="http://www.jmir.org/2018/11/e10513/", url="http://www.ncbi.nlm.nih.gov/pubmed/30452385" } @Article{info:doi/10.2196/11144, author="Zhang, Kai and Liu, Xiyang and Liu, Fan and He, Lin and Zhang, Lei and Yang, Yahan and Li, Wangting and Wang, Shuai and Liu, Lin and Liu, Zhenzhen and Wu, Xiaohang and Lin, Haotian", title="An Interpretable and Expandable Deep Learning Diagnostic System for Multiple Ocular Diseases: Qualitative Study", journal="J Med Internet Res", year="2018", month="Nov", day="14", volume="20", number="11", pages="e11144", keywords="deep learning", keywords="object localization", keywords="multiple ocular diseases", keywords="interpretable and expandable diagnosis framework", keywords="making medical decisions", abstract="Background: Although artificial intelligence performs promisingly in medicine, few automatic disease diagnosis platforms can clearly explain why a specific medical decision is made. Objective: We aimed to devise and develop an interpretable and expandable diagnosis framework for automatically diagnosing multiple ocular diseases and providing treatment recommendations for the particular illness of a specific patient. Methods: As the diagnosis of ocular diseases highly depends on observing medical images, we chose ophthalmic images as research material. All medical images were labeled to 4 types of diseases or normal (total 5 classes); each image was decomposed into different parts according to the anatomical knowledge and then annotated. This process yields the positions and primary information on different anatomical parts and foci observed in medical images, thereby bridging the gap between medical image and diagnostic process. Next, we applied images and the information produced during the annotation process to implement an interpretable and expandable automatic diagnostic framework with deep learning. Results: This diagnosis framework comprises 4 stages. The first stage identifies the type of disease (identification accuracy, 93\%). The second stage localizes the anatomical parts and foci of the eye (localization accuracy: images under natural light without fluorescein sodium eye drops, 82\%; images under cobalt blue light or natural light with fluorescein sodium eye drops, 90\%). The third stage carefully classifies the specific condition of each anatomical part or focus with the result from the second stage (average accuracy for multiple classification problems, 79\%-98\%). The last stage provides treatment advice according to medical experience and artificial intelligence, which is merely involved with pterygium (accuracy, >95\%). Based on this, we developed a telemedical system that can show detailed reasons for a particular diagnosis to doctors and patients to help doctors with medical decision making. This system can carefully analyze medical images and provide treatment advices according to the analysis results and consultation between a doctor and a patient. Conclusions: The interpretable and expandable medical artificial intelligence platform was successfully built; this system can identify the disease, distinguish different anatomical parts and foci, discern the diagnostic information relevant to the diagnosis of diseases, and provide treatment suggestions. During this process, the whole diagnostic flow becomes clear and understandable to both doctors and their patients. Moreover, other diseases can be seamlessly integrated into this system without any influence on existing modules or diseases. Furthermore, this framework can assist in the clinical training of junior doctors. Owing to the rare high-grade medical resource, it is impossible that everyone receives high-quality professional diagnosis and treatment service. This framework can not only be applied in hospitals with insufficient medical resources to decrease the pressure on experienced doctors but also deployed in remote areas to help doctors diagnose common ocular diseases. ", doi="10.2196/11144", url="http://www.jmir.org/2018/11/e11144/", url="http://www.ncbi.nlm.nih.gov/pubmed/30429111" } @Article{info:doi/10.2196/medinform.9957, author="Richardson, Safiya and Solomon, Philip and O'Connell, Alexander and Khan, Sundas and Gong, Jonathan and Makhnevich, Alex and Qiu, Guang and Zhang, Meng and McGinn, Thomas", title="A Computerized Method for Measuring Computed Tomography Pulmonary Angiography Yield in the Emergency Department: Validation Study", journal="JMIR Med Inform", year="2018", month="Oct", day="25", volume="6", number="4", pages="e44", keywords="health informatics", keywords="pulmonary embolism", keywords="electronic health record", keywords="quality improvement", keywords="clinical decision support systems", abstract="Background: Use of computed tomography pulmonary angiography (CTPA) in the assessment of pulmonary embolism (PE) has markedly increased over the past two decades. While this technology has improved the accuracy of radiological testing for PE, CTPA also carries the risk of substantial iatrogenic harm. Each CTPA carries a 14\% risk of contrast-induced nephropathy and a lifetime malignancy risk that can be as high as 2.76\%. The appropriate use of CTPA can be estimated by monitoring the CTPA yield, the percentage of tests positive for PE. This is the first study to propose and validate a computerized method for measuring the CTPA yield in the emergency department (ED). Objective: The objective of our study was to assess the validity of a novel computerized method of calculating the CTPA yield in the ED. Methods: The electronic health record databases at two tertiary care academic hospitals were queried for CTPA orders completed in the ED over 1-month periods. These visits were linked with an inpatient admission with a discharge diagnosis of PE based on the International Classification of Diseases codes. The computerized the CTPA yield was calculated as the number of CTPA orders with an associated inpatient discharge diagnosis of PE divided by the total number of orders for completed CTPA. This computerized method was then validated by 2 independent reviewers performing a manual chart review, which included reading the free-text radiology reports for each CTPA. Results: A total of 349 CTPA orders were completed during the 1-month periods at the two institutions. Of them, acute PE was diagnosed on CTPA in 28 studies, with a CTPA yield of 7.7\%. The computerized method correctly identified 27 of 28 scans positive for PE. The one discordant scan was tied to a patient who was discharged directly from the ED and, as a result, never received an inpatient discharge diagnosis. Conclusions: This is the first successful validation study of a computerized method for calculating the CTPA yield in the ED. This method for data extraction allows for an accurate determination of the CTPA yield and is more efficient than manual chart review. With this ability, health care systems can monitor the appropriate use of CTPA and the effect of interventions to reduce overuse and decrease preventable iatrogenic harm. ", doi="10.2196/medinform.9957", url="http://medinform.jmir.org/2018/4/e44/", url="http://www.ncbi.nlm.nih.gov/pubmed/30361200" } @Article{info:doi/10.2196/medinform.9171, author="Zarinabad, Niloufar and Meeus, M. Emma and Manias, Karen and Foster, Katharine and Peet, Andrew", title="Automated Modular Magnetic Resonance Imaging Clinical Decision Support System (MIROR): An Application in Pediatric Cancer Diagnosis", journal="JMIR Med Inform", year="2018", month="May", day="02", volume="6", number="2", pages="e30", keywords="clinical decision support", keywords="real-time systems", keywords="magnetic resonance imaging", abstract="Background: Advances in magnetic resonance imaging and the introduction of clinical decision support systems has underlined the need for an analysis tool to extract and analyze relevant information from magnetic resonance imaging data to aid decision making, prevent errors, and enhance health care. Objective: The aim of this study was to design and develop a modular medical image region of interest analysis tool and repository (MIROR) for automatic processing, classification, evaluation, and representation of advanced magnetic resonance imaging data. Methods: The clinical decision support system was developed and evaluated for diffusion-weighted imaging of body tumors in children (cohort of 48 children, with 37 malignant and 11 benign tumors). Mevislab software and Python have been used for the development of MIROR. Regions of interests were drawn around benign and malignant body tumors on different diffusion parametric maps, and extracted information was used to discriminate the malignant tumors from benign tumors. Results: Using MIROR, the various histogram parameters derived for each tumor case when compared with the information in the repository provided additional information for tumor characterization and facilitated the discrimination between benign and malignant tumors. Clinical decision support system cross-validation showed high sensitivity and specificity in discriminating between these tumor groups using histogram parameters. Conclusions: MIROR, as a diagnostic tool and repository, allowed the interpretation and analysis of magnetic resonance imaging images to be more accessible and comprehensive for clinicians. It aims to increase clinicians' skillset by introducing newer techniques and up-to-date findings to their repertoire and make information from previous cases available to aid decision making. The modular-based format of the tool allows integration of analyses that are not readily available clinically and streamlines the future developments. ", doi="10.2196/medinform.9171", url="http://medinform.jmir.org/2018/2/e30/", url="http://www.ncbi.nlm.nih.gov/pubmed/29720361" } @Article{info:doi/10.2196/medinform.9063, author="Lizarraga, Gabriel and Li, Chunfei and Cabrerizo, Mercedes and Barker, Warren and Loewenstein, A. David and Duara, Ranjan and Adjouadi, Malek", title="A Neuroimaging Web Services Interface as a Cyber Physical System for Medical Imaging and Data Management in Brain Research: Design Study", journal="JMIR Med Inform", year="2018", month="Apr", day="26", volume="6", number="2", pages="e26", keywords="neuroimaging", keywords="multimodal imaging", keywords="magnetic resonance imaging", keywords="image processing", keywords="positron-emission tomography", keywords="diffusion tensor imaging", keywords="information storage and retrieval", keywords="diagnostic imaging", abstract="Background: Structural and functional brain images are essential imaging modalities for medical experts to study brain anatomy. These images are typically visually inspected by experts. To analyze images without any bias, they must be first converted to numeric values. Many software packages are available to process the images, but they are complex and difficult to use. The software packages are also hardware intensive. The results obtained after processing vary depending on the native operating system used and its associated software libraries; data processed in one system cannot typically be combined with data on another system. Objective: The aim of this study was to fulfill the neuroimaging community's need for a common platform to store, process, explore, and visualize their neuroimaging data and results using Neuroimaging Web Services Interface: a series of processing pipelines designed as a cyber physical system for neuroimaging and clinical data in brain research. Methods: Neuroimaging Web Services Interface accepts magnetic resonance imaging, positron emission tomography, diffusion tensor imaging, and functional magnetic resonance imaging. These images are processed using existing and custom software packages. The output is then stored as image files, tabulated files, and MySQL tables. The system, made up of a series of interconnected servers, is password-protected and is securely accessible through a Web interface and allows (1) visualization of results and (2) downloading of tabulated data. Results: All results were obtained using our processing servers in order to maintain data validity and consistency. The design is responsive and scalable. The processing pipeline started from a FreeSurfer reconstruction of Structural magnetic resonance imaging images. The FreeSurfer and regional standardized uptake value ratio calculations were validated using Alzheimer's Disease Neuroimaging Initiative input images, and the results were posted at the Laboratory of Neuro Imaging data archive. Notable leading researchers in the field of Alzheimer's Disease and epilepsy have used the interface to access and process the data and visualize the results. Tabulated results with unique visualization mechanisms help guide more informed diagnosis and expert rating, providing a truly unique multimodal imaging platform that combines magnetic resonance imaging, positron emission tomography, diffusion tensor imaging, and resting state functional magnetic resonance imaging. A quality control component was reinforced through expert visual rating involving at least 2 experts. Conclusions: To our knowledge, there is no validated Web-based system offering all the services that Neuroimaging Web Services Interface offers. The intent of Neuroimaging Web Services Interface is to create a tool for clinicians and researchers with keen interest on multimodal neuroimaging. More importantly, Neuroimaging Web Services Interface significantly augments the Alzheimer's Disease Neuroimaging Initiative data, especially since our data contain a large cohort of Hispanic normal controls and Alzheimer's Disease patients. The obtained results could be scrutinized visually or through the tabulated forms, informing researchers on subtle changes that characterize the different stages of the disease. ", doi="10.2196/medinform.9063", url="http://medinform.jmir.org/2018/2/e26/", url="http://www.ncbi.nlm.nih.gov/pubmed/29699962" } @Article{info:doi/10.2196/medinform.5703, author="Buabbas, Jassem Ali and Al-Shamali, Ameer Dawood and Sharma, Prem and Haidar, Salwa and Al-Shawaf, Hamza", title="Users' Perspectives on a Picture Archiving and Communication System (PACS): An In-Depth Study in a Teaching Hospital in Kuwait", journal="JMIR Med Inform", year="2016", month="Jun", day="15", volume="4", number="2", pages="e21", keywords="PACS evaluation", keywords="user perspective", keywords="IS success", keywords="imaging informatics", keywords="radiology", abstract="Background: Picture archiving and communication system (PACS) is a well-known imaging informatics application in health care organizations, specifically designed for the radiology department. Health care providers have exhibited willingness toward evaluating PACS in hospitals to ascertain the critical success and failure of the technology, considering that evaluation is a basic requirement. Objective: This study aimed at evaluating the success of a PACS in a regional teaching hospital of Kuwait, from users' perspectives, using information systems success criteria. Methods: An in-depth study was conducted by using quantitative and qualitative methods. This mixed-method study was based on: (1) questionnaires, distributed to all radiologists and technologists and (2) interviews, conducted with PACS administrators. Results: In all, 60 questionnaires were received from the respondents. These included 39 radiologists (75\% response rate) and 21 technologists (62\% response rate), with the results showing almost three-quarters (74\%, 44 of 59) of the respondents rating PACS positively and as user friendly. This study's findings revealed that the demographic data, including computer experience, was an insignificant factor, having no influence on the users' responses. The findings were further substantiated by the administrators' interview responses, which supported the benefits of PACS, indicating the need for developing a unified policy aimed at streamlining and improving the departmental workflow. Conclusions: The PACS had a positive and productive impact on the radiologists' and technologists' work performance. They were endeavoring to resolve current problems while keeping abreast of advances in PACS technology, including teleradiology and mobile image viewer, which is steadily increasing in usage in the Kuwaiti health system. ", doi="10.2196/medinform.5703", url="http://medinform.jmir.org/2016/2/e21/", url="http://www.ncbi.nlm.nih.gov/pubmed/27307046" } @Article{info:doi/10.2196/medinform.4923, author="Kim, Jae Young and Park, Won Ji and Kim, Wan Jong and Park, Chan-Soo and Gonzalez, S. John Paul and Lee, Hyun Seung and Kim, Gi Kwang and Oh, Hwan Jae", title="Computerized Automated Quantification of Subcutaneous and Visceral Adipose Tissue From Computed Tomography Scans: Development and Validation Study", journal="JMIR Med Inform", year="2016", month="Feb", day="04", volume="4", number="1", pages="e2", keywords="obesity", keywords="visceral adipose tissue", keywords="subcutaneous adipose tissue", keywords="computed tomography", keywords="computer-assisted image analysis", abstract="Background: Computed tomography (CT) is often viewed as one of the most accurate methods for measuring visceral adipose tissue (VAT). However, measuring VAT and subcutaneous adipose tissue (SAT) from CT is a time-consuming and tedious process. Thus, evaluating patients' obesity levels during clinical trials using CT scans is both cumbersome and limiting. Objective: To describe an image-processing-based and automated method for measuring adipose tissue in the entire abdominal region. Methods: The method detects SAT and VAT levels using a separation mask based on muscles of the human body. The separation mask is the region that minimizes the unnecessary space between a closed path and muscle area. In addition, a correction mask, based on bones, corrects the error in VAT. Results: To validate the method, the volume of total adipose tissue (TAT), SAT, and VAT were measured for a total of 100 CTs using the automated method, and the results compared with those from manual measurements obtained by 2 experts. Dice's similarity coefficients (DSCs) between the first manual measurement and the automated result for TAT, SAT, and VAT are 0.99, 0.98, and 0.97, respectively. The DSCs between the second manual measurement and the automated result for TAT, SAT, and VAT are 0.98, 0.98, and 0.97, respectively. Moreover, intraclass correlation coefficients (ICCs) between the automated method and the results of the manual measurements indicate high reliability as the ICCs for the items are all .99 (P<.001). Conclusions: The results described in this paper confirm the accuracy and reliability of the proposed method. The method is expected to be both convenient and useful in the clinical evaluation and study of obesity in patients who require SAT and VAT measurements. ", doi="10.2196/medinform.4923", url="http://medinform.jmir.org/2016/1/e2/", url="http://www.ncbi.nlm.nih.gov/pubmed/26846251" }