@Article{info:doi/10.2196/68704, author="Remaki, Adam and Ung, Jacques and Pages, Pierre and Wajsburt, Perceval and Liu, Elise and Faure, Guillaume and Petit-Jean, Thomas and Tannier, Xavier and G{\'e}rardin, Christel", title="Improving Phenotyping of Patients With Immune-Mediated Inflammatory Diseases Through Automated Processing of Discharge Summaries: Multicenter Cohort Study", journal="JMIR Med Inform", year="2025", month="Apr", day="9", volume="13", pages="e68704", keywords="secondary use of clinical data for research and surveillance", keywords="clinical informatics", keywords="clinical data warehouse", keywords="electronic health record", keywords="data science", keywords="artificial intelligence", keywords="AI", keywords="natural language processing", keywords="ontologies", keywords="classifications", keywords="coding", keywords="tools", keywords="programs and algorithms", keywords="immune-mediated inflammatory diseases", abstract="Background: Valuable insights gathered by clinicians during their inquiries and documented in textual reports are often unavailable in the structured data recorded in electronic health records (EHRs). Objective: This study aimed to highlight that mining unstructured textual data with natural language processing techniques complements the available structured data and enables more comprehensive patient phenotyping. A proof-of-concept for patients diagnosed with specific autoimmune diseases is presented, in which the extraction of information on laboratory tests and drug treatments is performed. Methods: We collected EHRs available in the clinical data warehouse of the Greater Paris University Hospitals from 2012 to 2021 for patients hospitalized and diagnosed with 1 of 4 immune-mediated inflammatory diseases: systemic lupus erythematosus, systemic sclerosis, antiphospholipid syndrome, and Takayasu arteritis. Then, we built, trained, and validated natural language processing algorithms on 103 discharge summaries selected from the cohort and annotated by a clinician. Finally, all discharge summaries in the cohort were processed with the algorithms, and the extracted data on laboratory tests and drug treatments were compared with the structured data. Results: Named entity recognition followed by normalization yielded F1-scores of 71.1 (95\% CI 63.6-77.8) for the laboratory tests and 89.3 (95\% CI 85.9-91.6) for the drugs. Application of the algorithms to 18,604 EHRs increased the detection of antibody results and drug treatments. For instance, among patients in the systemic lupus erythematosus cohort with positive antinuclear antibodies, the rate increased from 18.34\% (752/4102) to 71.87\% (2949/4102), making the results more consistent with the literature. Conclusions: While challenges remain in standardizing laboratory tests, particularly with abbreviations, this work, based on secondary use of clinical data, demonstrates that automated processing of discharge summaries enriched the information available in structured data and facilitated more comprehensive patient profiling. ", doi="10.2196/68704", url="https://medinform.jmir.org/2025/1/e68704" } @Article{info:doi/10.2196/62942, author="Isaradech, Natthanaphop and Sirikul, Wachiranun and Buawangpong, Nida and Siviroj, Penprapa and Kitro, Amornphat", title="Machine Learning Models for Frailty Classification of Older Adults in Northern Thailand: Model Development and Validation Study", journal="JMIR Aging", year="2025", month="Apr", day="2", volume="8", pages="e62942", keywords="aged care", keywords="gerontology", keywords="geriatric", keywords="old", keywords="aging", keywords="clinical decision support", keywords="delivering health information and knowledge to the public", keywords="diagnostic systems", keywords="digital health", keywords="epidemiology", keywords="surveillance", keywords="diagnosis", keywords="frailty", keywords="machine learning", keywords="prediction", keywords="predictive", keywords="AI", keywords="artificial intelligence", keywords="Thailand", keywords="community dwelling", keywords="health care intervention", keywords="patient care", abstract="Background: Frailty is defined as a clinical state of increased vulnerability due to the age-associated decline of an individual's physical function resulting in increased morbidity and mortality when exposed to acute stressors. Early identification and management can reverse individuals with frailty to being robust once more. However, we found no integration of machine learning (ML) tools and frailty screening and surveillance studies in Thailand despite the abundance of evidence of frailty assessment using ML globally and in Asia. Objective: We propose an approach for early diagnosis of frailty in community-dwelling older individuals in Thailand using an ML model generated from individual characteristics and anthropometric data. Methods: Datasets including 2692 community-dwelling Thai older adults in Lampang from 2016 and 2017 were used for model development and internal validation. The derived models were externally validated with a dataset of community-dwelling older adults in Chiang Mai from 2021. The ML algorithms implemented in this study include the k-nearest neighbors algorithm, random forest ML algorithms, multilayer perceptron artificial neural network, logistic regression models, gradient boosting classifier, and linear support vector machine classifier. Results: Logistic regression showed the best overall discrimination performance with a mean area under the receiver operating characteristic curve of 0.81 (95\% CI 0.75?0.86) in the internal validation dataset and 0.75 (95\% CI 0.71?0.78) in the external validation dataset. The model was also well-calibrated to the expected probability of the external validation dataset. Conclusions: Our findings showed that our models have the potential to be utilized as a screening tool using simple, accessible demographic and explainable clinical variables in Thai community-dwelling older persons to identify individuals with frailty who require early intervention to become physically robust. ", doi="10.2196/62942", url="https://aging.jmir.org/2025/1/e62942" } @Article{info:doi/10.2196/69048, author="Oliveira, Fonseca Juliane and Vasconcelos, O. Adriano and Alencar, L. Andr{\^e}za and Cunha, L. Maria C{\'e}lia S. and Marcilio, Izabel and Barral-Netto, Manoel and P Ramos, Ivan Pablo", title="Balancing Human Mobility and Health Care Coverage in Sentinel Surveillance of Brazilian Indigenous Areas: Mathematical Optimization Approach", journal="JMIR Public Health Surveill", year="2025", month="Apr", day="1", volume="11", pages="e69048", keywords="representative sentinel surveillance", keywords="early pathogen detection", keywords="indigenous health", keywords="human mobility", keywords="surveillance network optimization", keywords="infectious disease surveillance", keywords="public health strategy", keywords="Brazil", abstract="Background: Optimizing sentinel surveillance site allocation for early pathogen detection remains a challenge, particularly in ensuring coverage of vulnerable and underserved populations. Objective: This study evaluates the current respiratory pathogen surveillance network in Brazil and proposes an optimized sentinel site distribution that balances Indigenous population coverage and national human mobility patterns. Methods: We compiled Indigenous Special Health District (Portuguese: Distrito Sanit{\'a}rio Especial Ind{\'i}gena [DSEI]) locations from the Brazilian Ministry of Health and estimated national mobility routes by using the Ford-Fulkerson algorithm, incorporating air, road, and water transportation data. To optimize sentinel site selection, we implemented a linear optimization algorithm that maximizes (1) Indigenous region representation and (2) human mobility coverage. We validated our approach by comparing results with Brazil's current influenza sentinel network and analyzing the health attraction index from the Brazilian Institute of Geography and Statistics to assess the feasibility and potential benefits of our optimized surveillance network. Results: The current Brazilian network includes 199 municipalities, representing 3.6\% (199/5570) of the country's cities. The optimized sentinel site design, while keeping the same number of municipalities, ensures 100\% coverage of all 34 DSEI regions while rearranging 108 (54.3\%) of the 199 cities from the existing flu sentinel system. This would result in a more representative sentinel network, addressing gaps in 9 of 34 previously uncovered DSEI regions, which span 750,515 km{\texttwosuperior} and have a population of 1.11 million. Mobility coverage would improve by 16.8 percentage points, from 52.4\% (4,598,416 paths out of 8,780,046 total paths) to 69.2\% (6,078,747 paths out of 8,780,046 total paths). Additionally, all newly selected cities serve as hubs for medium- or high-complexity health care, ensuring feasibility for pathogen surveillance. Conclusions: The proposed framework optimizes sentinel site allocation to enhance disease surveillance and early detection. By maximizing DSEI coverage and integrating human mobility patterns, this approach provides a more effective and equitable surveillance network, which would particularly benefit underserved Indigenous regions. ", doi="10.2196/69048", url="https://publichealth.jmir.org/2025/1/e69048" } @Article{info:doi/10.2196/63983, author="Lee, Heonyi and Kim, Yi-Jun and Kim, Jin-Hong and Kim, Soo-Kyung and Jeong, Tae-Dong", title="Optimizing Initial Vancomycin Dosing in Hospitalized Patients Using Machine Learning Approach for Enhanced Therapeutic Outcomes: Algorithm Development and Validation Study", journal="J Med Internet Res", year="2025", month="Mar", day="31", volume="27", pages="e63983", keywords="algorithm", keywords="machine learning", keywords="therapeutic drug monitoring", keywords="vancomycin", keywords="area under curve", keywords="pharmacokinetics", keywords="vancomycin dosing", abstract="Background: Vancomycin is commonly dosed using standard weight--based methods before dose adjustments are made through therapeutic drug monitoring (TDM). However, variability in initial dosing can lead to suboptimal therapeutic outcomes. A predictive model that personalizes initial dosing based on patient-specific pharmacokinetic factors prior to administration may enhance target attainment and minimize the need for subsequent dose adjustments. Objective: This study aimed to develop and evaluate a machine learning (ML)--based algorithm to predict whether an initial vancomycin dose falls within the therapeutic range of the 24-hour area under the curve to minimum inhibitory concentration, thereby optimizing the initial vancomycin dosage. Methods: A retrospective cohort study was conducted using hospitalized patients who received intravenous vancomycin and underwent pharmacokinetic TDM consultation (n=415). The cohort was randomly divided into training and testing datasets in a 7:3 ratio, and multiple ML techniques were used to develop an algorithm for optimizing initial vancomycin dosing. The optimal algorithm, referred to as the OPTIVAN algorithm, was selected and validated using an external cohort (n=268). We evaluated the performance of 4 ML models: gradient boosting machine, random forest (RF), support vector machine (SVM), and eXtreme gradient boosting (XGB). Additionally, a web-based clinical support tool was developed to facilitate real-time vancomycin TDM application in clinical practice. Results: The SVM algorithm demonstrated the best predictive performance, achieving an area under the receiver operating characteristic curve (AUROC) of 0.832 (95\% CI 0.753-0.900) for the training dataset and 0.720 (95\% CI 0.654-0.783) for the external validation dataset. The gradient boosting machine followed closely with AUROC scores of 0.802 (95\% CI 0.667-0.857) for the training dataset and 0.689 (95\% CI 0.596-0.733) for the validation dataset. In contrast, both XGB and RF exhibited relatively lower performance. XGB achieved AUROC values of 0.769 (95\% CI 0.671-0.853) for the training set and 0.707 (95\% CI 0.644-0.772) for the validation set, while RF recorded AUROC scores of 0.759 (95\% CI 0.656-0.846) for the test dataset and 0.693 (95\% CI 0.625-0.757) for the external validation set. The SVM model incorporated 7 covariates: age, BMI, glucose, blood urea nitrogen, estimated glomerular filtration rate, hematocrit, and daily dose per body weight. Subgroup analyses demonstrated consistent performance across different patient categories, such as renal function, sex, and BMI. A web-based TDM analysis tool was developed using the OPTIVAN algorithm. Conclusions: The OPTIVAN algorithm represents a significant advancement in personalized initial vancomycin dosing, addressing the limitations of current TDM practices. By optimizing the initial dose, this algorithm may reduce the need for subsequent dosage adjustments. The algorithm's web-based app is easy to use, making it a practical tool for clinicians. This study highlights the potential of ML to enhance the effectiveness of vancomycin treatment. ", doi="10.2196/63983", url="https://www.jmir.org/2025/1/e63983" } @Article{info:doi/10.2196/65371, author="Hasegawa, Tatsuki and Kizaki, Hayato and Ikegami, Keisho and Imai, Shungo and Yanagisawa, Yuki and Yada, Shuntaro and Aramaki, Eiji and Hori, Satoko", title="Improving Systematic Review Updates With Natural Language Processing Through Abstract Component Classification and Selection: Algorithm Development and Validation", journal="JMIR Med Inform", year="2025", month="Mar", day="27", volume="13", pages="e65371", keywords="systematic review", keywords="natural language processing", keywords="guideline updates", keywords="bidirectional encoder representations from transformer", keywords="screening model", keywords="literature", keywords="efficiency", keywords="updating systematic reviews", keywords="language model", abstract="Background: A challenge in updating systematic reviews is the workload in screening the articles. Many screening models using natural language processing technology have been implemented to scrutinize articles based on titles and abstracts. While these approaches show promise, traditional models typically treat abstracts as uniform text. We hypothesize that selective training on specific abstract components could enhance model performance for systematic review screening. Objective: We evaluated the efficacy of a novel screening model that selects specific components from abstracts to improve performance and developed an automatic systematic review update model using an abstract component classifier to categorize abstracts based on their components. Methods: A screening model was created based on the included and excluded articles in the existing systematic review and used as the scheme for the automatic update of the systematic review. A prior publication was selected for the systematic review, and articles included or excluded in the articles screening process were used as training data. The titles and abstracts were classified into 5 categories (Title, Introduction, Methods, Results, and Conclusion). Thirty-one component-composition datasets were created by combining 5 component datasets. We implemented 31 screening models using the component-composition datasets and compared their performances. Comparisons were conducted using 3 pretrained models: Bidirectional Encoder Representations from Transformer (BERT), BioLinkBERT, and BioM- Efficiently Learning an Encoder that Classifies Token Replacements Accurately (ELECTRA). Moreover, to automate the component selection of abstracts, we developed the Abstract Component Classifier Model and created component datasets using this classifier model classification. Using the component datasets classified using the Abstract Component Classifier Model, we created 10 component-composition datasets used by the top 10 screening models with the highest performance when implementing screening models using the component datasets that were classified manually. Ten screening models were implemented using these datasets, and their performances were compared with those of models developed using manually classified component-composition datasets. The primary evaluation metric was the F10-Score weighted by the recall. Results: A total of 256 included articles and 1261 excluded articles were extracted from the selected systematic review. In the screening models implemented using manually classified datasets, the performance of some surpassed that of models trained on all components (BERT: 9 models, BioLinkBERT: 6 models, and BioM-ELECTRA: 21 models). In models implemented using datasets classified by the Abstract Component Classifier Model, the performances of some models (BERT: 7 models and BioM-ELECTRA: 9 models) surpassed that of the models trained on all components. These models achieved an 88.6\% reduction in manual screening workload while maintaining high recall (0.93). Conclusions: Component selection from the title and abstract can improve the performance of screening models and substantially reduce the manual screening workload in systematic review updates. Future research should focus on validating this approach across different systematic review domains. ", doi="10.2196/65371", url="https://medinform.jmir.org/2025/1/e65371" } @Article{info:doi/10.2196/69820, author="Waaler, Niklas Per and Hussain, Musarrat and Molchanov, Igor and Bongo, Ailo Lars and Elvev{\aa}g, Brita", title="Prompt Engineering an Informational Chatbot for Education on Mental Health Using a Multiagent Approach for Enhanced Compliance With Prompt Instructions: Algorithm Development and Validation", journal="JMIR AI", year="2025", month="Mar", day="26", volume="4", pages="e69820", keywords="schizophrenia", keywords="mental health", keywords="prompt engineering", keywords="AI in health care", keywords="AI safety", keywords="self-reflection", keywords="limiting scope of AI", keywords="large language model", keywords="LLM", keywords="GPT-4", keywords="AI transparency", keywords="adaptive learning", abstract="Background: People with schizophrenia often present with cognitive impairments that may hinder their ability to learn about their condition. Education platforms powered by large language models (LLMs) have the potential to improve the accessibility of mental health information. However, the black-box nature of LLMs raises ethical and safety concerns regarding the controllability of chatbots. In particular, prompt-engineered chatbots may drift from their intended role as the conversation progresses and become more prone to hallucinations. Objective: This study aimed to develop and evaluate a critical analysis filter (CAF) system that ensures that an LLM-powered prompt-engineered chatbot reliably complies with its predefined instructions and scope while delivering validated mental health information. Methods: For a proof of concept, we prompt engineered an educational chatbot for schizophrenia powered by GPT-4 that could dynamically access information from a schizophrenia manual written for people with schizophrenia and their caregivers. In the CAF, a team of prompt-engineered LLM agents was used to critically analyze and refine the chatbot's responses and deliver real-time feedback to the chatbot. To assess the ability of the CAF to re-establish the chatbot's adherence to its instructions, we generated 3 conversations (by conversing with the chatbot with the CAF disabled) wherein the chatbot started to drift from its instructions toward various unintended roles. We used these checkpoint conversations to initialize automated conversations between the chatbot and adversarial chatbots designed to entice it toward unintended roles. Conversations were repeatedly sampled with the CAF enabled and disabled. In total, 3 human raters independently rated each chatbot response according to criteria developed to measure the chatbot's integrity, specifically, its transparency (such as admitting when a statement lacked explicit support from its scripted sources) and its tendency to faithfully convey the scripted information in the schizophrenia manual. Results: In total, 36 responses (3 different checkpoint conversations, 3 conversations per checkpoint, and 4 adversarial queries per conversation) were rated for compliance with the CAF enabled and disabled. Activating the CAF resulted in a compliance score that was considered acceptable (?2) in 81\% (7/36) of the responses, compared to only 8.3\% (3/36) when the CAF was deactivated. Conclusions: Although more rigorous testing in realistic scenarios is needed, our results suggest that self-reflection mechanisms could enable LLMs to be used effectively and safely in educational mental health platforms. This approach harnesses the flexibility of LLMs while reliably constraining their scope to appropriate and accurate interactions. ", doi="10.2196/69820", url="https://ai.jmir.org/2025/1/e69820", url="http://www.ncbi.nlm.nih.gov/pubmed/39992720" } @Article{info:doi/10.2196/65456, author="Helgeson, A. Scott and Quicksall, S. Zachary and Johnson, W. Patrick and Lim, G. Kaiser and Carter, E. Rickey and Lee, S. Augustine", title="Estimation of Static Lung Volumes and Capacities From Spirometry Using Machine Learning: Algorithm Development and Validation", journal="JMIR AI", year="2025", month="Mar", day="24", volume="4", pages="e65456", keywords="artificial intelligence", keywords="machine learning", keywords="pulmonary function test", keywords="spirometry", keywords="total lung capacity", keywords="AI", keywords="ML", keywords="lung", keywords="lung volume", keywords="lung capacity", keywords="spirometer", keywords="lung disease", keywords="database", keywords="respiratory", keywords="pulmonary", abstract="Background: Spirometry can be performed in an office setting or remotely using portable spirometers. Although basic spirometry is used for diagnosis of obstructive lung disease, clinically relevant information such as restriction, hyperinflation, and air trapping require additional testing, such as body plethysmography, which is not as readily available. We hypothesize that spirometry data contains information that can allow estimation of static lung volumes in certain circumstances by leveraging machine learning techniques. Objective: The aim of the study was to develop artificial intelligence-based algorithms for estimating lung volumes and capacities using spirometry measures. Methods: This study obtained spirometry and lung volume measurements from the Mayo Clinic pulmonary function test database for patient visits between February 19, 2001, and December 16, 2022. Preprocessing was performed, and various machine learning algorithms were applied, including a generalized linear model with regularization, random forests, extremely randomized trees, gradient-boosted trees, and XGBoost for both classification and regression cohorts. Results: A total of 121,498 pulmonary function tests were used in this study, with 85,017 allotted for exploratory data analysis and model development (ie, training dataset) and 36,481 tests reserved for model evaluation (ie, testing dataset). The median age of the cohort was 64.7 years (IQR 18?119.6), with a balanced distribution between genders, consisting 48.2\% (n=58,607) female and 51.8\% (n=62,889) male patients. The classification models showed a robust performance overall, with relatively low root mean square error and mean absolute error values observed across all predicted lung volumes. Across all lung volume categories, the models demonstrated strong discriminatory capacity, as indicated by the high area under the receiver operating characteristic curve values ranging from 0.85 to 0.99 in the training set and 0.81 to 0.98 in the testing set. Conclusions: Overall, the models demonstrate robust performance across lung volume measurements, underscoring their potential utility in clinical practice for accurate diagnosis and prognosis of respiratory conditions, particularly in settings where access to body plethysmography or other lung volume measurement modalities is limited. ", doi="10.2196/65456", url="https://ai.jmir.org/2025/1/e65456" } @Article{info:doi/10.2196/69150, author="Twumasi, Clement and Aktas, Mikail and Santoni, Nicholas", title="Kinetic Pattern Recognition in Home-Based Knee Rehabilitation Using Machine Learning Clustering Methods on the Slider Digital Physiotherapy Device: Prospective Observational Study", journal="JMIR Form Res", year="2025", month="Mar", day="18", volume="9", pages="e69150", keywords="machine learning", keywords="cluster analysis", keywords="force measurement", keywords="knee replacement", keywords="musculoskeletal", keywords="physical therapy", keywords="Slider device", keywords="knee osteoarthritis", keywords="digital health", keywords="telerehabilitation", abstract="Background: Recent advancements in rehabilitation sciences have progressively used computational techniques to improve diagnostic and treatment approaches. However, the analysis of high-dimensional, time-dependent data continues to pose a significant problem. Prior research has used clustering techniques on rehabilitation data to identify movement patterns and forecast recovery outcomes. Nonetheless, these initiatives have not yet used force or motion datasets obtained outside a clinical setting, thereby limiting the capacity for therapeutic decisions. Biomechanical data analysis has demonstrated considerable potential in bridging these gaps and improving clinical decision-making in rehabilitation settings. Objective: This study presents a comprehensive clustering analysis of multidimensional movement datasets captured using a novel home exercise device, the ``Slider''. The aim is to identify clinically relevant movement patterns and provide answers to open research questions for the first time to inform personalized rehabilitation protocols, predict individual recovery trajectories, and assess the risks of potential postoperative complications. Methods: High-dimensional, time-dependent, bilateral knee kinetic datasets were independently analyzed from 32 participants using four unsupervised clustering techniques: k-means, hierarchical clustering, partition around medoids, and CLARA (Clustering Large Applications). The data comprised force, laser-measured distance, and optical tracker coordinates from lower limb activities. The optimal clusters identified through the unsupervised clustering methods were further evaluated and compared using silhouette analysis to quantify their performance. Key determinants of cluster membership were assessed, including demographic factors (eg, gender, BMI, and age) and pain levels, by using a logistic regression model with analysis of covariance adjustment. Results: Three distinct, time-varying movement patterns or clusters were identified for each knee. Hierarchical clustering performed best for the right knee datasets (with an average silhouette score of 0.637), while CLARA was the most effective for the left knee datasets (with an average silhouette score of 0.598). Key predictors of the movement cluster membership were discovered for both knees. BMI was the most influential determinant of cluster membership for the right knee, where higher BMI decreased the odds of cluster-2 membership (odds ratio [OR] 0.95, 95\% CI 0.94-0.96; P<.001) but increased the odds for cluster-3 assignment relative to cluster 1 (OR 1.05, 95\% CI 1.03-1.06; P<.001). For the left knee, all predictors of cluster-2 membership were significant (.001?P?.008), whereas only BMI (P=.81) could not predict the likelihood of an individual belonging to cluster 3 compared to cluster 1. Gender was the strongest determinant for the left knee, with male participants significantly likely to belong to cluster 3 (OR 3.52, 95\% CI 2.91-4.27; P<.001). Conclusions: These kinetic patterns offer significant insights for creating personalized rehabilitation procedures, potentially improving patient outcomes. These findings underscore the efficacy of unsupervised clustering techniques in the analysis of biomechanical data for clinical rehabilitation applications. ", doi="10.2196/69150", url="https://formative.jmir.org/2025/1/e69150" } @Article{info:doi/10.2196/67239, author="Tzeng, Jing-Tong and Li, Jeng-Lin and Chen, Huan-Yu and Huang, Chu-Hsiang and Chen, Chi-Hsin and Fan, Cheng-Yi and Huang, Pei-Chuan Edward and Lee, Chi-Chun", title="Improving the Robustness and Clinical Applicability of Automatic Respiratory Sound Classification Using Deep Learning--Based Audio Enhancement: Algorithm Development and Validation", journal="JMIR AI", year="2025", month="Mar", day="13", volume="4", pages="e67239", keywords="respiratory sound", keywords="lung sound", keywords="audio enhancement", keywords="noise robustness", keywords="clinical applicability", keywords="artificial intelligence", keywords="AI", abstract="Background: Deep learning techniques have shown promising results in the automatic classification of respiratory sounds. However, accurately distinguishing these sounds in real-world noisy conditions poses challenges for clinical deployment. In addition, predicting signals with only background noise could undermine user trust in the system. Objective: This study aimed to investigate the feasibility and effectiveness of incorporating a deep learning--based audio enhancement preprocessing step into automatic respiratory sound classification systems to improve robustness and clinical applicability. Methods: We conducted extensive experiments using various audio enhancement model architectures, including time-domain and time-frequency--domain approaches, in combination with multiple classification models to evaluate the effectiveness of the audio enhancement module in an automatic respiratory sound classification system. The classification performance was compared against the baseline noise injection data augmentation method. These experiments were carried out on 2 datasets: the International Conference in Biomedical and Health Informatics (ICBHI) respiratory sound dataset, which contains 5.5 hours of recordings, and the Formosa Archive of Breath Sound dataset, which comprises 14.6 hours of recordings. Furthermore, a physician validation study involving 7 senior physicians was conducted to assess the clinical utility of the system. Results: The integration of the audio enhancement module resulted in a 21.88\% increase with P<.001 in the ICBHI classification score on the ICBHI dataset and a 4.1\% improvement with P<.001 on the Formosa Archive of Breath Sound dataset in multi-class noisy scenarios. Quantitative analysis from the physician validation study revealed improvements in efficiency, diagnostic confidence, and trust during model-assisted diagnosis, with workflows that integrated enhanced audio leading to an 11.61\% increase in diagnostic sensitivity and facilitating high-confidence diagnoses. Conclusions: Incorporating an audio enhancement algorithm significantly enhances the robustness and clinical utility of automatic respiratory sound classification systems, improving performance in noisy environments and fostering greater trust among medical professionals. ", doi="10.2196/67239", url="https://ai.jmir.org/2025/1/e67239" } @Article{info:doi/10.2196/64354, author="Ehrig, Molly and Bullock, S. Garrett and Leng, Iris Xiaoyan and Pajewski, M. Nicholas and Speiser, Lynn Jaime", title="Imputation and Missing Indicators for Handling Missing Longitudinal Data: Data Simulation Analysis Based on Electronic Health Record Data", journal="JMIR Med Inform", year="2025", month="Mar", day="13", volume="13", pages="e64354", keywords="missing indicator method", keywords="missing data", keywords="imputation", keywords="longitudinal data", keywords="electronic health record data", keywords="electronic health records", keywords="EHR", keywords="simulation study", keywords="clinical prediction model", keywords="prediction model", keywords="older adults", keywords="falls", keywords="logistic regression", keywords="prediction modeling", abstract="Background: Missing data in electronic health records are highly prevalent and result in analytical concerns such as heterogeneous sources of bias and loss of statistical power. One simple analytic method for addressing missing or unknown covariate values is to treat missingness for a particular variable as a category onto itself, which we refer to as the missing indicator method. For cross-sectional analyses, recent work suggested that there was minimal benefit to the missing indicator method; however, it is unclear how this approach performs in the setting of longitudinal data, in which correlation among clustered repeated measures may be leveraged for potentially improved model performance. Objectives: This study aims to conduct a simulation study to evaluate whether the missing indicator method improved model performance and imputation accuracy for longitudinal data mimicking an application of developing a clinical prediction model for falls in older adults based on electronic health record data. Methods: We simulated a longitudinal binary outcome using mixed effects logistic regression that emulated a falls assessment at annual follow-up visits. Using multivariate imputation by chained equations, we simulated time-invariant predictors such as sex and medical history, as well as dynamic predictors such as physical function, BMI, and medication use. We induced missing data in predictors under scenarios that had both random (missing at random) and dependent missingness (missing not at random). We evaluated aggregate performance using the area under the receiver operating characteristic curve (AUROC) for models with and with no missing indicators as predictors, as well as complete case analysis, across simulation replicates. We evaluated imputation quality using normalized root-mean-square error for continuous variables and percent falsely classified for categorical variables. Results: Independent of the mechanism used to simulate missing data (missing at random or missing not at random), overall model performance via AUROC was similar regardless of whether missing indicators were included in the model. The root-mean-square error and percent falsely classified measures were similar for models including missing indicators versus those with no missing indicators. Model performance and imputation quality were similar regardless of whether the outcome was related to missingness. Imputation with or with no missing indicators had similar mean values of AUROC compared with complete case analysis, although complete case analysis had the largest range of values. Conclusions: The results of this study suggest that the inclusion of missing indicators in longitudinal data modeling neither improves nor worsens overall performance or imputation accuracy. Future research is needed to address whether the inclusion of missing indicators is useful in prediction modeling with longitudinal data in different settings, such as high dimensional data analysis. ", doi="10.2196/64354", url="https://medinform.jmir.org/2025/1/e64354" } @Article{info:doi/10.2196/63216, author="Yang, Zhongbao and Xu, Shan-Shan and Liu, Xiaozhu and Xu, Ningyuan and Chen, Yuqing and Wang, Shuya and Miao, Ming-Yue and Hou, Mengxue and Liu, Shuai and Zhou, Yi-Min and Zhou, Jian-Xin and Zhang, Linlin", title="Large Language Model--Based Critical Care Big Data Deployment and Extraction: Descriptive Analysis", journal="JMIR Med Inform", year="2025", month="Mar", day="12", volume="13", pages="e63216", keywords="big data", keywords="critical care--related databases", keywords="database deployment", keywords="large language model", keywords="database extraction", keywords="intensive care unit", keywords="ICU", keywords="GPT", keywords="artificial intelligence", keywords="AI", keywords="LLM", abstract="Background: Publicly accessible critical care--related databases contain enormous clinical data, but their utilization often requires advanced programming skills. The growing complexity of large databases and unstructured data presents challenges for clinicians who need programming or data analysis expertise to utilize these systems directly. Objective: This study aims to simplify critical care--related database deployment and extraction via large language models. Methods: The development of this platform was a 2-step process. First, we enabled automated database deployment using Docker container technology, with incorporated web-based analytics interfaces Metabase and Superset. Second, we developed the intensive care unit--generative pretrained transformer (ICU-GPT), a large language model fine-tuned on intensive care unit (ICU) data that integrated LangChain and Microsoft AutoGen. Results: The automated deployment platform was designed with user-friendliness in mind, enabling clinicians to deploy 1 or multiple databases in local, cloud, or remote environments without the need for manual setup. After successfully overcoming GPT's token limit and supporting multischema data, ICU-GPT could generate Structured Query Language (SQL) queries and extract insights from ICU datasets based on request input. A front-end user interface was developed for clinicians to achieve code-free SQL generation on the web-based client. Conclusions: By harnessing the power of our automated deployment platform and ICU-GPT model, clinicians are empowered to easily visualize, extract, and arrange critical care--related databases more efficiently and flexibly than manual methods. Our research could decrease the time and effort spent on complex bioinformatics methods and advance clinical research. ", doi="10.2196/63216", url="https://medinform.jmir.org/2025/1/e63216" } @Article{info:doi/10.2196/64705, author="Mast, H. Nicholas and Oeste, L. Clara and Hens, Dries", title="Assessing Total Hip Arthroplasty Outcomes and Generating an Orthopedic Research Outcome Database via a Natural Language Processing Pipeline: Development and Validation Study", journal="JMIR Med Inform", year="2025", month="Mar", day="12", volume="13", pages="e64705", keywords="total hip arthroplasty", keywords="THA", keywords="direct anterior approach", keywords="electronic health records", keywords="EHR", keywords="natural language processing", keywords="NLP", keywords="complication rate", keywords="single-surgeon registry", keywords="hip arthroplasty", keywords="orthopedic", keywords="validation", keywords="surgeon", keywords="outpatient visits", keywords="hospitalizations", keywords="surgery", abstract="Background: Processing data from electronic health records (EHRs) to build research-grade databases is a lengthy and expensive process. Modern arthroplasty practice commonly uses multiple sites of care, including clinics and ambulatory care centers. However, most private data systems prevent obtaining usable insights for clinical practice. Objective: This study aims to create an automated natural language processing (NLP) pipeline for extracting clinical concepts from EHRs related to orthopedic outpatient visits, hospitalizations, and surgeries in a multicenter, single-surgeon practice. The pipeline was also used to assess therapies and complications after total hip arthroplasty (THA). Methods: EHRs of 1290 patients undergoing primary THA from January 1, 2012 to December 31, 2019 (operated and followed by the same surgeon) were processed using artificial intelligence (AI)--based models (NLP and machine learning). In addition, 3 independent medical reviewers generated a gold standard using 100 randomly selected EHRs. The algorithm processed the entire database from different EHR systems, generating an aggregated clinical data warehouse. An additional manual control arm was used for data quality control. Results: The algorithm was as accurate as human reviewers (0.95 vs 0.94; P=.01), achieving a database-wide average F1-score of 0.92 (SD 0.09; range 0.67?0.99), validating its use as an automated data extraction tool. During the first year after direct anterior THA, 92.1\% (1188/1290) of our population had a complication-free recovery. In 7.9\% (102/1290) of cases where surgery or recovery was not uneventful, lateral femoral cutaneous nerve sensitivity (47/1290, 3.6\%), intraoperative fractures (13/1290, 1\%), and hematoma (9/1290, 0.7\%) were the most common complications. Conclusions: Algorithm evaluation of this dataset accurately represented key clinical information swiftly, compared with human reviewers. This technology may provide substantial value for future surgeon practice and patient counseling. Furthermore, the low early complication rate of direct anterior THA in this surgeon's hands was supported by the dataset, which included data from all treated patients in a multicenter practice. ", doi="10.2196/64705", url="https://medinform.jmir.org/2025/1/e64705" } @Article{info:doi/10.2196/59377, author="Gao, Jing and Jie, Xu and Yao, Yujun and Xue, Jingdong and Chen, Lei and Chen, Ruiyao and Chen, Jiayuan and Cheng, Weiwei", title="Fetal Birth Weight Prediction in the Third Trimester: Retrospective Cohort Study and Development of an Ensemble Model", journal="JMIR Pediatr Parent", year="2025", month="Mar", day="10", volume="8", pages="e59377", keywords="fetal birthweight", keywords="ensemble learning model", keywords="machine learning", keywords="prediction model", keywords="ultrasonography", keywords="macrosomia", keywords="low birth weight", keywords="birth weight", keywords="fetal", keywords="AI", keywords="artificial intelligence", keywords="prenatal", keywords="prenatal care", keywords="Shanghai", keywords="neonatal", keywords="maternal", keywords="parental", abstract="Background: Accurate third-trimester birth weight prediction is vital for reducing adverse outcomes, and machine learning (ML) offers superior precision over traditional ultrasound methods. Objective: This study aims to develop an ML model on the basis of clinical big data for accurate prediction of birth weight in the third trimester of pregnancy, which can help reduce adverse maternal and fetal outcomes. Methods: From January 1, 2018 to December 31, 2019, a retrospective cohort study involving 16,655 singleton live births without congenital anomalies (>28 weeks of gestation) was conducted in a tertiary first-class hospital in Shanghai. The initial set of data was divided into a train set for algorithm development and a test set on which the algorithm was divided in a ratio of 4:1. We extracted maternal and neonatal delivery outcomes, as well as parental demographics, obstetric clinical data, and sonographic fetal biometry, from electronic medical records. A total of 5 basic ML algorithms, including Ridge, SVM, Random Forest, extreme gradient boosting (XGBoost), and Multi-Layer Perceptron, were used to develop the prediction model, which was then averaged into an ensemble learning model. The models were compared using accuracy, mean squared error, root mean squared error, and mean absolute error. International Peace Maternity and Child Health Hospital's Research Ethics Committee granted ethical approval for the usage of patient information (GKLW2021-20). Results: Train and test sets contained a total of 13,324 and 3331 cases, respectively. From a total of 59 variables, we selected 17 variables that were readily available for the ``few feature model,'' which achieved high predictive power with an accuracy of 81\% and significantly exceeded ultrasound formula methods. In addition, our model maintained superior performance for low birth weight and macrosomic fetal populations. Conclusions: Our research investigated an innovative artificial intelligence model for predicting fetal birth weight and maximizing health care resource use. In the era of big data, our model improves maternal and fetal outcomes and promotes precision medicine. ", doi="10.2196/59377", url="https://pediatrics.jmir.org/2025/1/e59377" } @Article{info:doi/10.2196/65590, author="Malik, Salma and Dorothea, Pana Zoi and Argyropoulos, D. Christos and Themistocleous, Sophia and Macken, J. Alan and Valdenmaiier, Olena and Scheckenbach, Frank and Bardach, Elena and Pfeiffer, Andrea and Loens, Katherine and Ochando, Cano Jordi and Cornely, A. Oliver and Demotes-Mainard, Jacques and Contrino, Sergio and Felder, Gerd", title="Data Interoperability in COVID-19 Vaccine Trials: Methodological Approach in the VACCELERATE Project", journal="JMIR Med Inform", year="2025", month="Mar", day="7", volume="13", pages="e65590", keywords="interoperability", keywords="metadata", keywords="data management", keywords="clinical trials", keywords="protocol", keywords="harmonization", keywords="adult", keywords="pediatric", keywords="systems", keywords="standards", abstract="Background: Data standards are not only key to making data processing efficient but also fundamental to ensuring data interoperability. When clinical trial data are structured according to international standards, they become significantly easier to analyze, reducing the efforts required for data cleaning, preprocessing, and secondary use. A common language and a shared set of expectations facilitate interoperability between systems and devices. Objective: The main objectives of this study were to identify commonalities and differences in clinical trial metadata, protocols, and data collection systems/items within the VACCELERATE project. Methods: To assess the degree of interoperability achieved in the project and suggest methodological improvements, interoperable points were identified based on the core outcome areas---immunogenicity, safety, and efficacy (clinical/physiological). These points were emphasized in the development of the master protocol template and were manually compared in the following ways: (1) summaries, objectives, and end points in the protocols of 3 VACCELERATE clinical trials (EU-COVAT-1\_AGED, EU-COVAT-2\_BOOSTAVAC, and EU-COVPT-1\_CoVacc) against the master protocol template; (2) metadata of all 3 clinical trials; and (3) evaluations from a questionnaire survey regarding differences in data management systems and structures that enabled data exchange within the VACCELERATE network. Results: The noncommonalities identified in the protocols and metadata were attributed to differences in populations, variations in protocol design, and vaccination patterns. The detailed metadata released for all 3 vaccine trials were clearly structured using internal standards, terminology, and the general approach of Clinical Data Acquisition Standards Harmonisation (CDASH) for data collection (eg, on electronic case report forms). VACCELERATE benefited significantly from the selection of the Clinical Trials Centre Cologne as the sole data management provider. With system database development coordinated by a single individual and no need for coordination among different trial units, a high degree of uniformity was achieved automatically. The harmonized transfer of data to all sites, using well-established methods, enabled quick exchanges and provided a relatively secure means of data transfer. Conclusions: This study demonstrated that using master protocols can significantly enhance trial operational efficiency and data interoperability, provided that similar infrastructure and data management procedures are adopted across multiple trials. To further improve data interoperability and facilitate interpretation and analysis, shared data should be structured, described, formatted, and stored using widely recognized data and metadata standards. Trial Registration: EudraCT 2021-004526-29; https://www.clinicaltrialsregister.eu/ctr-search/trial/2021-004526-29/DE/; 2021-004889-35; https://www.clinicaltrialsregister.eu/ctr-search/search?query=eudract\_number:2021-004889-35; and 2021-004526-29; https://www.clinicaltrialsregister.eu/ctr-search/search?query=eudract\_number:2021-004526-29 ", doi="10.2196/65590", url="https://medinform.jmir.org/2025/1/e65590" } @Article{info:doi/10.2196/59801, author="Park, Adam and Jung, Young Se and Yune, Ilha and Lee, Ho-Young", title="Applying Robotic Process Automation to Monitor Business Processes in Hospital Information Systems: Mixed Method Approach", journal="JMIR Med Inform", year="2025", month="Mar", day="7", volume="13", pages="e59801", keywords="robotic process automation", keywords="RPA", keywords="electronic medical records", keywords="EMR", keywords="system monitoring", keywords="health care information systems", keywords="user-centric monitoring", keywords="performance evaluation", keywords="business process management", keywords="BPM", keywords="healthcare technology", keywords="mixed methods research", keywords="process automation in health care", abstract="Background: Electronic medical records (EMRs) have undergone significant changes due to advancements in technology, including artificial intelligence, the Internet of Things, and cloud services. The increasing complexity within health care systems necessitates enhanced process reengineering and system monitoring approaches. Robotic process automation (RPA) provides a user-centric approach to monitoring system complexity by mimicking end user interactions, thus presenting potential improvements in system performance and monitoring. Objective: This study aimed to explore the application of RPA in monitoring the complexities of EMR systems within a hospital environment, focusing on RPA's ability to perform end-to-end performance monitoring that closely reflects real-time user experiences. Methods: The research was conducted at Seoul National University Bundang Hospital using a mixed methods approach. It included the iterative development and integration of RPA bots programmed to simulate and monitor typical user interactions with the hospital's EMR system. Quantitative data from RPA process outputs and qualitative insights from interviews with system engineers and managers were used to evaluate the effectiveness of RPA in system monitoring. Results: RPA bots effectively identified and reported system inefficiencies and failures, providing a bridge between end user experiences and engineering assessments. The bots were particularly useful in detecting delays and errors immediately following system updates or interactions with external services. Over 3 years, RPA monitoring highlighted discrepancies between user-reported experiences and traditional engineering metrics, with the bots frequently identifying critical system issues that were not evident from standard component-level monitoring. Conclusions: RPA enhances system monitoring by providing insights that reflect true end user experiences, which are often overlooked by traditional monitoring methods. The study confirms the potential of RPA to act as a comprehensive monitoring tool within complex health care systems, suggesting that RPA can significantly contribute to the maintenance and improvement of EMR systems by providing a more accurate and timely reflection of system performance and user satisfaction. ", doi="10.2196/59801", url="https://medinform.jmir.org/2025/1/e59801", url="http://www.ncbi.nlm.nih.gov/pubmed/40053771" } @Article{info:doi/10.2196/64279, author="Rajaram, Akshay and Judd, Michael and Barber, David", title="Deep Learning Models to Predict Diagnostic and Billing Codes Following Visits to a Family Medicine Practice: Development and Validation Study", journal="JMIR AI", year="2025", month="Mar", day="7", volume="4", pages="e64279", keywords="machine learning", keywords="ML", keywords="artificial intelligence", keywords="algorithm", keywords="predictive model", keywords="predictive analytics", keywords="predictive system", keywords="family medicine", keywords="primary care", keywords="family doctor", keywords="family physician", keywords="income", keywords="billing code", keywords="electronic notes", keywords="electronic health record", keywords="electronic medical record", keywords="EMR", keywords="patient record", keywords="health record", keywords="personal health record", abstract="Background: Despite significant time spent on billing, family physicians routinely make errors and miss billing opportunities. In other disciplines, machine learning models have predicted Current Procedural Terminology codes with high accuracy. Objective: Our objective was to derive machine learning models capable of predicting diagnostic and billing codes from notes recorded in the electronic medical record. Methods: We conducted a retrospective algorithm development and validation study involving an academic family medicine practice. Visits between July 1, 2015, and June 30, 2020, containing a physician-authored note and an invoice in the electronic medical record were eligible for inclusion. We trained 2 deep learning models and compared their predictions to codes submitted for reimbursement. We calculated accuracy, recall, precision, F1-score, and area under the receiver operating characteristic curve. Results: Of the 245,045 visits eligible for inclusion, 198,802 (81\%) were included in model development. Accuracy was 99.8\% and 99.5\% for the diagnostic and billing code models, respectively. Recall was 49.4\% and 70.3\% for the diagnostic and billing code models, respectively. Precision was 55.3\% and 76.7\% for the diagnostic and billing code models, respectively. The area under the receiver operating characteristic curve was 0.983 for the diagnostic code model and 0.993 for the billing code model. Conclusions: We developed models capable of predicting diagnostic and billing codes from electronic notes following visits to a family medicine practice. The billing code model outperformed the diagnostic code model in terms of recall and precision, likely due to fewer codes being predicted. Work is underway to further enhance model performance and assess the generalizability of these models to other family medicine practices. ", doi="10.2196/64279", url="https://ai.jmir.org/2025/1/e64279" } @Article{info:doi/10.2196/66821, author="El Kababji, Samer and Mitsakakis, Nicholas and Jonker, Elizabeth and Beltran-Bless, Ana-Alicia and Pond, Gregory and Vandermeer, Lisa and Radhakrishnan, Dhenuka and Mosquera, Lucy and Paterson, Alexander and Shepherd, Lois and Chen, Bingshu and Barlow, William and Gralow, Julie and Savard, Marie-France and Fesl, Christian and Hlauschek, Dominik and Balic, Marija and Rinnerthaler, Gabriel and Greil, Richard and Gnant, Michael and Clemons, Mark and El Emam, Khaled", title="Augmenting Insufficiently Accruing Oncology Clinical Trials Using Generative Models: Validation Study", journal="J Med Internet Res", year="2025", month="Mar", day="5", volume="27", pages="e66821", keywords="generative models", keywords="study accrual", keywords="recruitment", keywords="clinical trial replication", keywords="oncology", keywords="validation", keywords="simulated patient", keywords="simulation", keywords="retrospective", keywords="dataset", keywords="patient", keywords="artificial intelligence", keywords="machine learning", abstract="Background: Insufficient patient accrual is a major challenge in clinical trials and can result in underpowered studies, as well as exposing study participants to toxicity and additional costs, with limited scientific benefit. Real-world data can provide external controls, but insufficient accrual affects all arms of a study, not just controls. Studies that used generative models to simulate more patients were limited in the accrual scenarios considered, replicability criteria, number of generative models, and number of clinical trials evaluated. Objective: This study aimed to perform a comprehensive evaluation on the extent generative models can be used to simulate additional patients to compensate for insufficient accrual in clinical trials. Methods: We performed a retrospective analysis using 10 datasets from 9 fully accrued, completed, and published cancer trials. For each trial, we removed the latest recruited patients (from 10\% to 50\%), trained a generative model on the remaining patients, and simulated additional patients to replace the removed ones using the generative model to augment the available data. We then replicated the published analysis on this augmented dataset to determine if the findings remained the same. Four different generative models were evaluated: sequential synthesis with decision trees, Bayesian network, generative adversarial network, and a variational autoencoder. These generative models were compared to sampling with replacement (ie, bootstrap) as a simple alternative. Replication of the published analyses used 4 metrics: decision agreement, estimate agreement, standardized difference, and CI overlap. Results: Sequential synthesis performed well on the 4 replication metrics for the removal of up to 40\% of the last recruited patients (decision agreement: 88\% to 100\% across datasets, estimate agreement: 100\%, cannot reject standardized difference null hypothesis: 100\%, and CI overlap: 0.8-0.92). Sampling with replacement was the next most effective approach, with decision agreement varying from 78\% to 89\% across all datasets. There was no evidence of a monotonic relationship in the estimated effect size with recruitment order across these studies. This suggests that patients recruited earlier in a trial were not systematically different than those recruited later, at least partially explaining why generative models trained on early data can effectively simulate patients recruited later in a trial. The fidelity of the generated data relative to the training data on the Hellinger distance was high in all cases. Conclusions: For an oncology study with insufficient accrual with as few as 60\% of target recruitment, sequential synthesis can enable the simulation of the full dataset had the study continued accruing patients and can be an alternative to drawing conclusions from an underpowered study. These results provide evidence demonstrating the potential for generative models to rescue poorly accruing clinical trials, but additional studies are needed to confirm these findings and to generalize them for other diseases. ", doi="10.2196/66821", url="https://www.jmir.org/2025/1/e66821", url="http://www.ncbi.nlm.nih.gov/pubmed/40053790" } @Article{info:doi/10.2196/52119, author="Tighe, Carlos and Ngongalah, Lem and Sent{\'i}s, Alexis and Orchard, Francisco and Pacurar, Gheorghe-Aurel and Hayes, Conor and Hayes, S. Jessica and Toader, Adrian and Connolly, A. M{\'a}ire", title="Building and Developing a Tool (PANDEM-2 Dashboard) to Strengthen Pandemic Management: Participatory Design Study", journal="JMIR Public Health Surveill", year="2025", month="Mar", day="5", volume="11", pages="e52119", keywords="pandemic preparedness and response", keywords="COVID-19", keywords="cross-border collaboration", keywords="surveillance", keywords="data collection", keywords="data standardization", keywords="data sharing", keywords="dashboard", keywords="IT system", keywords="IT tools", abstract="Background: The COVID-19 pandemic exposed challenges in pandemic management, particularly in real-time data sharing and effective decision-making. Data protection concerns and the lack of data interoperability and standardization hindered the collection, analysis, and interpretation of critical information. Effective data visualization and customization are essential to facilitate decision-making. Objective: This study describes the development of the PANDEM-2 dashboard, a system providing a standardized and interactive platform for decision-making in pandemic management. It outlines the participatory approaches used to involve expert end users in its development and addresses key considerations of privacy, data protection, and ethical and social issues. Methods: Development was informed by a review of 25 publicly available COVID-19 dashboards, leading to the creation of a visualization catalog. User requirements were gathered through workshops and consultations with 20 experts from various health care and public health professions in 13 European Union countries. These were further refined by mapping variables and indicators required to fulfill the identified needs. Through a participatory design process, end users interacted with a preprototype platform, explored potential interface designs, and provided feedback to refine the system's components. Potential privacy, data protection, and ethical and social risks associated with the technology, along with mitigation strategies, were identified through an iterative impact assessment. Results: Key variables incorporated into the PANDEM-2 dashboard included case rates, number of deaths, mortality rates, hospital resources, hospital admissions, testing, contact tracing, and vaccination uptake. Cases, deaths, and vaccination uptake were prioritized as the most relevant and readily available variables. However, data gaps, particularly in contact tracing and mortality rates, highlighted the need for better data collection and reporting mechanisms. User feedback emphasized the importance of diverse data visualization formats combining different data types, as well as analyzing data across various time frames. Users also expressed interest in generating custom visualizations and reports, especially on the impact of government interventions. Participants noted challenges in data reporting, such as inconsistencies in reporting levels, time intervals, the need for standardization between member states, and General Data Protection Regulation concerns for data sharing. Identified risks included ethical concerns (accessibility, user autonomy, responsible use, transparency, and accountability), privacy and data protection (security and access controls and data reidentification), and social issues (unintentional bias, data quality and accuracy, dependency on technology, and collaborative development). Mitigation measures focused on designing user-friendly interfaces, implementing robust security protocols, and promoting cross-member state collaboration. Conclusions: The PANDEM-2 dashboard provides an adaptable, user-friendly platform for pandemic preparedness and response. Our findings highlight the critical role of data interoperability, cross-border collaboration, and custom IT tools in strengthening future health crisis management. They also offer valuable insights into the challenges and opportunities in developing IT solutions to support pandemic preparedness. ", doi="10.2196/52119", url="https://publichealth.jmir.org/2025/1/e52119", url="http://www.ncbi.nlm.nih.gov/pubmed/40053759" } @Article{info:doi/10.2196/68863, author="Ohno, Yukiko and Aomori, Tohru and Nishiyama, Tomohiro and Kato, Riri and Fujiki, Reina and Ishikawa, Haruki and Kiyomiya, Keisuke and Isawa, Minae and Mochizuki, Mayumi and Aramaki, Eiji and Ohtani, Hisakazu", title="Performance Improvement of a Natural Language Processing Tool for Extracting Patient Narratives Related to Medical States From Japanese Pharmaceutical Care Records by Increasing the Amount of Training Data: Natural Language Processing Analysis and Validation Study", journal="JMIR Med Inform", year="2025", month="Mar", day="4", volume="13", pages="e68863", keywords="natural language processing", keywords="NLP", keywords="named entity recognition", keywords="NER", keywords="deep learning", keywords="pharmaceutical care record", keywords="electronic medical record", keywords="EMR", keywords="Japanese", abstract="Background: Patients' oral expressions serve as valuable sources of clinical information to improve pharmacotherapy. Natural language processing (NLP) is a useful approach for analyzing unstructured text data, such as patient narratives. However, few studies have focused on using NLP for narratives in the Japanese language. Objective: We aimed to develop a high-performance NLP system for extracting clinical information from patient narratives by examining the performance progression with a gradual increase in the amount of training data. Methods: We used subjective texts from the pharmaceutical care records of Keio University Hospital from April 1, 2018, to March 31, 2019, comprising 12,004 records from 6559 cases. After preprocessing, we annotated diseases and symptoms within the texts. We then trained and evaluated a deep learning model (bidirectional encoder representations from transformers combined with a conditional random field [BERT-CRF]) through 10-fold cross-validation. The annotated data were divided into 10 subsets, and the amount of training data was progressively increased over 10 steps. We also analyzed the causes of errors. Finally, we applied the developed system to the analysis of case report texts to evaluate its usability for texts from other sources. Results: The F1-score of the system improved from 0.67 to 0.82 as the amount of training data increased from 1200 to 12,004 records. The F1-score reached 0.78 with 3600 records and was largely similar thereafter. As performance improved, errors from incorrect extractions decreased significantly, which resulted in an increase in precision. For case reports, the F1-score also increased from 0.34 to 0.41 as the training dataset expanded from 1200 to 12,004 records. Performance was lower for extracting symptoms from case report texts compared with pharmaceutical care records, suggesting that this system is more specialized for analyzing subjective data from pharmaceutical care records. Conclusions: We successfully developed a high-performance system specialized in analyzing subjective data from pharmaceutical care records by training a large dataset, with near-complete saturation of system performance with about 3600 training records. This system will be useful for monitoring symptoms, offering benefits for both clinical practice and research. ", doi="10.2196/68863", url="https://medinform.jmir.org/2025/1/e68863", url="http://www.ncbi.nlm.nih.gov/pubmed/40053805" } @Article{info:doi/10.2196/68354, author="Huang, Pinjie and Yang, Jirong and Zhao, Dizhou and Ran, Taojia and Luo, Yuheng and Yang, Dong and Zheng, Xueqin and Zhou, Shaoli and Chen, Chaojin", title="Machine Learning--Based Prediction of Early Complications Following Surgery for Intestinal Obstruction: Multicenter Retrospective Study", journal="J Med Internet Res", year="2025", month="Mar", day="3", volume="27", pages="e68354", keywords="postoperative complications", keywords="intestinal obstruction", keywords="machine learning", keywords="early intervention", keywords="risk calculator", keywords="prediction model", keywords="Shapley additive explanations", abstract="Background: Early complications increase in-hospital stay and mortality after intestinal obstruction surgery. It is important to identify the risk of postoperative early complications for patients with intestinal obstruction at a sufficiently early stage, which would allow preemptive individualized enhanced therapy to be conducted to improve the prognosis of patients with intestinal obstruction. A risk predictive model based on machine learning is helpful for early diagnosis and timely intervention. Objective: This study aimed to construct an online risk calculator for early postoperative complications in patients after intestinal obstruction surgery based on machine learning algorithms. Methods: A total of 396 patients undergoing intestinal obstruction surgery from April 2013 to April 2021 at an independent medical center were enrolled as the training cohort. Overall, 7 machine learning methods were used to establish prediction models, with their performance appraised via the area under the receiver operating characteristic curve (AUROC), accuracy, sensitivity, specificity, and F1-score. The best model was validated through 2 independent medical centers, a publicly available perioperative dataset the Informative Surgical Patient dataset for Innovative Research Environment (INSPIRE), and a mixed cohort consisting of the above 3 datasets, involving 50, 66, 48, and 164 cases, respectively. Shapley Additive Explanations were measured to identify risk factors. Results: The incidence of postoperative complications in the training cohort was 47.44\% (176/371), while the incidences in 4 external validation cohorts were 34\% (17/50), 56.06\% (37/66), 52.08\% (25/48), and 48.17\% (79/164), respectively. Postoperative complications were associated with 8-item features: Physiological Severity Score for the Enumeration of Mortality and Morbidity (POSSUM physiological score), the amount of colloid infusion, shock index before anesthesia induction, ASA (American Society of Anesthesiologists) classification, the percentage of neutrophils, shock index at the end of surgery, age, and total protein. The random forest model showed the best overall performance, with an AUROC of 0.788 (95\% CI 0.709-0.869), accuracy of 0.756, sensitivity of 0.695, specificity of 0.810, and F1-score of 0.727 in the training cohort. The random forest model also achieved a comparable AUROC of 0.755 (95\% CI 0.652-0.839) in validation cohort 1, a greater AUROC of 0.817 (95\% CI 0.695-0.913) in validation cohort 2, a similar AUROC of 0.786 (95\% CI 0.628-0.902) in validation cohort 3, and the comparable AUROC of 0.720 (95\% CI 0.671-0.768) in validation cohort 4. We visualized the random forest model and created a web-based online risk calculator. Conclusions: We have developed and validated a generalizable random forest model to predict postoperative early complications in patients undergoing intestinal obstruction surgery, enabling clinicians to screen high-risk patients and implement early individualized interventions. An online risk calculator for early postoperative complications was developed to make the random forest model accessible to clinicians around the world. ", doi="10.2196/68354", url="https://www.jmir.org/2025/1/e68354", url="http://www.ncbi.nlm.nih.gov/pubmed/40053794" } @Article{info:doi/10.2196/63312, author="Borg, Alexander and Georg, Carina and Jobs, Benjamin and Huss, Viking and Waldenlind, Kristin and Ruiz, Mini and Edelbring, Samuel and Skantze, Gabriel and Parodis, Ioannis", title="Virtual Patient Simulations Using Social Robotics Combined With Large Language Models for Clinical Reasoning Training in Medical Education: Mixed Methods Study", journal="J Med Internet Res", year="2025", month="Mar", day="3", volume="27", pages="e63312", keywords="virtual patients", keywords="clinical reasoning", keywords="large language models", keywords="social robotics", keywords="medical education", keywords="sustainable learning", keywords="medical students", abstract="Background: Virtual patients (VPs) are computer-based simulations of clinical scenarios used in health professions education to address various learning outcomes, including clinical reasoning (CR). CR is a crucial skill for health care practitioners, and its inadequacy can compromise patient safety. Recent advancements in large language models (LLMs) and social robots have introduced new possibilities for enhancing VP interactivity and realism. However, their application in VP simulations has been limited, and no studies have investigated the effectiveness of combining LLMs with social robots for CR training. Objective: The aim of the study is to explore the potential added value of a social robotic VP platform combined with an LLM compared to a conventional computer-based VP modality for CR training of medical students. Methods: A Swedish explorative proof-of-concept study was conducted between May and July 2023, combining quantitative and qualitative methodology. In total, 15 medical students from Karolinska Institutet and an international exchange program completed a VP case in a social robotic platform and a computer-based semilinear platform. Students' self-perceived VP experience focusing on CR training was assessed using a previously developed index, and paired 2-tailed t test was used to compare mean scores (scales from 1 to 5) between the platforms. Moreover, in-depth interviews were conducted with 8 medical students. Results: The social robotic platform was perceived as more authentic (mean 4.5, SD 0.7 vs mean 3.9, SD 0.5; odds ratio [OR] 2.9, 95\% CI 0.0-1.0; P=.04) and provided a beneficial overall learning effect (mean 4.4, SD 0.6 versus mean 4.1, SD 0.6; OR 3.7, 95\% CI 0.1-0.5; P=.01) compared with the computer-based platform. Qualitative analysis revealed 4 themes, wherein students experienced the social robot as superior to the computer-based platform in training CR, communication, and emotional skills. Limitations related to technical and user-related aspects were identified, and suggestions for improvements included enhanced facial expressions and VP cases simulating multiple personalities. Conclusions: A social robotic platform enhanced by an LLM may provide an authentic and engaging learning experience for medical students in the context of VP simulations for training CR. Beyond its limitations, several aspects of potential improvement were identified for the social robotic platform, lending promise for this technology as a means toward the attainment of learning outcomes within medical education curricula. ", doi="10.2196/63312", url="https://www.jmir.org/2025/1/e63312", url="http://www.ncbi.nlm.nih.gov/pubmed/40053778" } @Article{info:doi/10.2196/65565, author="Owoyemi, Ayomide and Osuchukwu, Joanne and Salwei, E. Megan and Boyd, Andrew", title="Checklist Approach to Developing and Implementing AI in Clinical Settings: Instrument Development Study", journal="JMIRx Med", year="2025", month="Feb", day="20", volume="6", pages="e65565", keywords="artificial intelligence", keywords="machine learning", keywords="algorithm", keywords="model", keywords="analytics", keywords="AI deployment", keywords="human-AI interaction", keywords="AI integration", keywords="checklist", keywords="clinical workflow", keywords="clinical setting", keywords="literature review", abstract="Background: The integration of artificial intelligence (AI) in health care settings demands a nuanced approach that considers both technical performance and sociotechnical factors. Objective: This study aimed to develop a checklist that addresses the sociotechnical aspects of AI deployment in health care and provides a structured, holistic guide for teams involved in the life cycle of AI systems. Methods: A literature synthesis identified 20 relevant studies, forming the foundation for the Clinical AI Sociotechnical Framework checklist. A modified Delphi study was then conducted with 35 global health care professionals. Participants assessed the checklist's relevance across 4 stages: ``Planning,'' ``Design,'' ``Development,'' and ``Proposed Implementation.'' A consensus threshold of 80\% was established for each item. IQRs and Cronbach $\alpha$ were calculated to assess agreement and reliability. Results: The initial checklist had 45 questions. Following participant feedback, the checklist was refined to 34 items, and a final round saw 100\% consensus on all items (mean score >0.8, IQR 0). Based on the outcome of the Delphi study, a final checklist was outlined, with 1 more question added to make 35 questions in total. Conclusions: The Clinical AI Sociotechnical Framework checklist provides a comprehensive, structured approach to developing and implementing AI in clinical settings, addressing technical and social factors critical for adoption and success. This checklist is a practical tool that aligns AI development with real-world clinical needs, aiming to enhance patient outcomes and integrate smoothly into health care workflows. ", doi="10.2196/65565", url="https://xmed.jmir.org/2025/1/e65565" } @Article{info:doi/10.2196/68436, author="Chen, Yuan-Hsin and Lin, Ching-Hsuan and Fan, Chiao-Hsin and Long, Jim An and Scholl, Jeremiah and Kao, Yen-Pin and Iqbal, Usman and Li, Jack Yu-Chuan", title="Machine Learning Approach to Identifying Wrong-Site Surgeries Using Centers for Medicare and Medicaid Services Dataset: Development and Validation Study", journal="JMIR Form Res", year="2025", month="Feb", day="13", volume="9", pages="e68436", keywords="patient safety", keywords="wrong site surgery", keywords="medical errors", keywords="machine learning", keywords="claim data", abstract="Background: Wrong-site surgery (WSS) is a critical but preventable medical error, often resulting in severe patient harm and substantial financial costs. While protocols exist to reduce wrong-site surgery, underreporting and inconsistent documentation continue to contribute to its persistence. Machine learning (ML) models, which have shown success in detecting medication errors, may offer a solution by identifying unusual procedure-diagnosis combinations. This study investigated whether an ML approach can effectively adapt to detect surgical errors. Objective: This study aimed to evaluate the transferability and effectiveness of an ML-based model for detecting inconsistencies within surgical documentation, particularly focusing on laterality discrepancies. Methods: We used claims data from the Centers for Medicare and Medicaid Services Limited Data Set (CMS-LDS) from 2017 to 2020, focusing on surgical procedures with documented laterality. We developed an adapted Association Outlier Pattern (AOP) ML model to identify uncommon procedure-diagnosis combinations, specifically targeting discrepancies in laterality. The model was trained on data from 2017 to 2019 and tested on 2020 orthopedic procedures, using ICD-10-PCS (International Classification of Diseases, Tenth Revision, Procedure Coding System) codes to distinguish body part and laterality. Test cases were classified based on alignment between procedural and diagnostic laterality, with 2 key subgroups (right-left and left-right mismatches) identified for evaluation. Model performance was assessed by comparing precision-recall curves and accuracy against rule-based methods. Results: The findings here included 346,382 claims, of which 2170 claims demonstrated with significant laterality discrepancies between procedures and diagnoses. Among patients with left-side procedures and right-side diagnoses (603/1106), 54.5\% were confirmed as errors after clinical review. For right-side procedures with left-side diagnoses (541/1064), 50.8\% were classified as errors. The AOP model identified 697 and 655 potentially unusual combinations in the left-right and right-left subgroups, respectively, with over 80\% of these cases confirmed as errors following clinical review. Most confirmed errors involved discrepancies in laterality for the same body part, while nonerror cases typically involved general diagnoses without specified laterality. Conclusions: This investigation showed that the AOP model effectively detects inconsistencies between surgical procedures and diagnoses using CMS-LDS data. The AOP model outperformed traditional rule-based methods, offering higher accuracy in identifying errors. Moreover, the model's transferability from medication-disease associations to procedure-diagnosis verification highlights its broad applicability. By improving the precision of identifying laterality discrepancies, the AOP model can reduce surgical errors, particularly in orthopedic care. These findings suggest that the model enhances patient safety and has the potential to improve clinical decision-making and outcomes. ", doi="10.2196/68436", url="https://formative.jmir.org/2025/1/e68436" } @Article{info:doi/10.2196/68135, author="Peasley, Dale and Kuplicki, Rayus and Sen, Sandip and Paulus, Martin", title="Leveraging Large Language Models and Agent-Based Systems for Scientific Data Analysis: Validation Study", journal="JMIR Ment Health", year="2025", month="Feb", day="13", volume="12", pages="e68135", keywords="LLM", keywords="agent-based systems", keywords="scientific data analysis", keywords="data contextualization", keywords="AI-driven research tools", keywords="large language model", keywords="scientific data", keywords="analysis", keywords="contextualization", keywords="AI", keywords="artificial intelligence", keywords="research tool", abstract="Background: Large language models have shown promise in transforming how complex scientific data are analyzed and communicated, yet their application to scientific domains remains challenged by issues of factual accuracy and domain-specific precision. The Laureate Institute for Brain Research--Tulsa University (LIBR-TU) Research Agent (LITURAt) leverages a sophisticated agent-based architecture to mitigate these limitations, using external data retrieval and analysis tools to ensure reliable, context-aware outputs that make scientific information accessible to both experts and nonexperts. Objective: The objective of this study was to develop and evaluate LITURAt to enable efficient analysis and contextualization of complex scientific datasets for diverse user expertise levels. Methods: An agent-based system based on large language models was designed to analyze and contextualize complex scientific datasets using a ``plan-and-solve'' framework. The system dynamically retrieves local data and relevant PubMed literature, performs statistical analyses, and generates comprehensive, context-aware summaries to answer user queries with high accuracy and consistency. Results: Our experiments demonstrated that LITURAt achieved an internal consistency rate of 94.8\% and an external consistency rate of 91.9\% across repeated and rephrased queries. Additionally, GPT-4 evaluations rated 80.3\% (171/213) of the system's answers as accurate and comprehensive, with 23.5\% (50/213) receiving the highest rating of 5 for completeness and precision. Conclusions: These findings highlight the potential of LITURAt to significantly enhance the accessibility and accuracy of scientific data analysis, achieving high consistency and strong performance in complex query resolution. Despite existing limitations, such as model stability for highly variable queries, LITURAt demonstrates promise as a robust tool for democratizing data-driven insights across diverse scientific domains. ", doi="10.2196/68135", url="https://mental.jmir.org/2025/1/e68135" } @Article{info:doi/10.2196/59961, author="Lu, An-Tai and Liou, Chong-Sin and Lai, Chia-Hsin and Shian, Bo-Tsz and Li, Ming-Ta and Sun, Chih-Yen and Kao, Hao-Yun and Dai, Hong-Jie and Tsai, Ming-Ju", title="Application of Clinical Department--Specific AI-Assisted Coding Using Taiwan Diagnosis-Related Groups: Retrospective Validation Study", journal="JMIR Hum Factors", year="2025", month="Feb", day="12", volume="12", pages="e59961", keywords="diagnosis-related group", keywords="artificial intelligence coding", keywords="International Classification of Diseases, Tenth Revision, Clinical Modification", keywords="ICD-10-CM", keywords="coding professionals", abstract="Background: The accuracy of the ICD-10-CM (International Classification of Diseases, Tenth Revision, Clinical Modification) procedure coding system (PCS) is crucial for generating correct Taiwan diagnosis-related groups (DRGs), as coding errors can lead to financial losses for hospitals. Objective: The study aimed to determine the consistency between an artificial intelligence (AI)-assisted coding module and manual coding, as well as to identify clinical specialties suitable for implementing the developed AI-assisted coding module. Methods: This study examined the AI-assisted coding module from the perspective of health care professionals. The research period started in February 2023. The study excluded cases outside of Taiwan DRGs, those with incomplete medical records, and cases with Taiwan DRG disposals ICD-10 (International Statistical Classification of Diseases, Tenth Revision) PCS. Data collection was conducted through retrospective medical record review. The AI-assisted module was constructed using a hierarchical attention network. The verification of the Taiwan DRGs results from the AI-assisted coding model focused on the major diagnostic categories (MDCs). Statistical computations were conducted using SPSS version 19. Research variables consisted of categorical variables represented by MDC, and continuous variables were represented by the relative weight of Taiwan DRGs. Results: A total of 2632 discharge records meeting the research criteria were collected from February to April 2023. In terms of inferential statistics, $\kappa$ statistics were used for MDC analysis. The infectious and parasitic diseases MDC, as well as the respiratory diseases MDC had $\kappa$ values exceeding 0.8. Clinical inpatient specialties were statistically analyzed using the Wilcoxon signed rank test. There was not a difference in coding results between the 23 clinical departments, such as the Division of Cardiology, the Division of Nephrology, and the Department of Urology. Conclusions: For human coders, with the assistance of the ICD-10-CM AI-assisted coding system, work time is reduced. Additionally, strengthening knowledge in clinical documentation enables human coders to maximize their role. This positions them to become clinical documentation experts, preparing them for further career development. Future research will apply the same method to validate the ICD-10 AI-assisted coding module. ", doi="10.2196/59961", url="https://humanfactors.jmir.org/2025/1/e59961" } @Article{info:doi/10.2196/58107, author="Kim, Sunyoung and Park, Jaeyu and Son, Yejun and Lee, Hojae and Woo, Selin and Lee, Myeongcheol and Lee, Hayeon and Sang, Hyunji and Yon, Keon Dong and Rhee, Youl Sang", title="Development and Validation of a Machine Learning Algorithm for Predicting Diabetes Retinopathy in Patients With Type 2 Diabetes: Algorithm Development Study", journal="JMIR Med Inform", year="2025", month="Feb", day="7", volume="13", pages="e58107", keywords="type 2 diabetes", keywords="diabetes retinopathy", keywords="algorithm", keywords="machine learning", keywords="prediction", keywords="comorbidities", keywords="retinal", keywords="ophthalmology", abstract="Background: Diabetic retinopathy (DR) is the leading cause of preventable blindness worldwide. Machine learning (ML) systems can enhance DR in community-based screening. However, predictive power models for usability and performance are still being determined. Objective: This study used data from 3 university hospitals in South Korea to conduct a simple and accurate assessment of ML-based risk prediction for the development of DR that can be universally applied to adults with type 2 diabetes mellitus (T2DM). Methods: DR was predicted using data from 2 independent electronic medical records: a discovery cohort (one hospital, n=14,694) and a validation cohort (2 hospitals, n=1856). The primary outcome was the presence of DR at 3 years. Different ML-based models were selected through hyperparameter tuning in the discovery cohort, and the area under the receiver operating characteristic (ROC) curve was analyzed in both cohorts. Results: Among 14,694 patients screened for inclusion, 348 (2.37\%) were diagnosed with DR. For DR, the extreme gradient boosting (XGBoost) system had an accuracy of 75.13\% (95\% CI 74.10?76.17), a sensitivity of 71.00\% (95\% CI 66.83?75.17), and a specificity of 75.23\% (95\% CI 74.16?76.31) in the original dataset. Among the validation datasets, XGBoost had an accuracy of 65.14\%, a sensitivity of 64.96\%, and a specificity of 65.15\%. The most common feature in the XGBoost model is dyslipidemia, followed by cancer, hypertension, chronic kidney disease, neuropathy, and cardiovascular disease. Conclusions: This approach shows the potential to enhance patient outcomes by enabling timely interventions in patients with T2DM, improving our understanding of contributing factors, and reducing DR-related complications. The proposed prediction model is expected to be both competitive and cost-effective, particularly for primary care settings in South Korea. ", doi="10.2196/58107", url="https://medinform.jmir.org/2025/1/e58107" } @Article{info:doi/10.2196/58779, author="Liu, Guanghao and Zheng, Shixiang and He, Jun and Zhang, Zi-Mei and Wu, Ruoqiong and Yu, Yingying and Fu, Hao and Han, Li and Zhu, Haibo and Xu, Yichang and Shao, Huaguo and Yan, Haidan and Chen, Ting and Shen, Xiaopei", title="An Easy and Quick Risk-Stratified Early Forewarning Model for Septic Shock in the Intensive Care Unit: Development, Validation, and Interpretation Study", journal="J Med Internet Res", year="2025", month="Feb", day="6", volume="27", pages="e58779", keywords="septic shock", keywords="early forewarning", keywords="risk stratification", keywords="machine learning", abstract="Background: Septic shock (SS) is a syndrome with high mortality. Early forewarning and diagnosis of SS, which are critical in reducing mortality, are still challenging in clinical management. Objective: We propose a simple and fast risk-stratified forewarning model for SS to help physicians recognize patients in time. Moreover, further insights can be gained from the application of the model to improve our understanding of SS. Methods: A total of 5125 patients with sepsis from the Medical Information Mart for Intensive Care-IV (MIMIC-IV) database were divided into training, validation, and test sets. In addition, 2180 patients with sepsis from the eICU Collaborative Research Database (eICU) were used as an external validation set. We developed a simplified risk-stratified early forewarning model for SS based on the weight of evidence and logistic regression, which was compared with multi-feature complex models, and clinical characteristics among risk groups were evaluated. Results: Using only vital signs and rapid arterial blood gas test features according to feature importance, we constructed the Septic Shock Risk Predictor (SORP), with an area under the curve (AUC) of 0.9458 in the test set, which is only slightly lower than that of the optimal multi-feature complex model (0.9651). A median forewarning time of 13 hours was calculated for SS patients. 4 distinct risk groups (high, medium, low, and ultralow) were identified by the SORP 6 hours before onset, and the incidence rates of SS in the 4 risk groups in the postonset interval were 88.6\% (433/489), 34.5\% (262/760), 2.5\% (67/2707), and 0.3\% (4/1301), respectively. The severity increased significantly with increasing risk in both clinical features and survival. Clustering analysis demonstrated a high similarity of pathophysiological characteristics between the high-risk patients without SS diagnosis (NS\_HR) and the SS patients, while a significantly worse overall survival was shown in NS\_HR patients. On further exploring the characteristics of the treatment and comorbidities of the NS\_HR group, these patients demonstrated a significantly higher incidence of mean blood pressure <65 mmHg, significantly lower vasopressor use and infused volume, and more severe renal dysfunction. The above findings were further validated by multicenter eICU data. Conclusions: The SORP demonstrated accurate forewarning and a reliable risk stratification capability. Among patients forewarned as high risk, similar pathophysiological phenotypes and high mortality were observed in both those subsequently diagnosed as having SS and those without such a diagnosis. NS\_HR patients, overlooked by the Sepsis-3 definition, may provide further insights into the pathophysiological processes of SS onset and help to complement its diagnosis and precise management. The importance of precise fluid resuscitation management in SS patients with renal dysfunction is further highlighted. For convenience, an online service for the SORP has been provided. ", doi="10.2196/58779", url="https://www.jmir.org/2025/1/e58779" } @Article{info:doi/10.2196/56880, author="Wac, Marceli and Santos-Rodriguez, Raul and McWilliams, Chris and Bourdeaux, Christopher", title="Capturing Requirements for a Data Annotation Tool for Intensive Care: Experimental User-Centered Design Study", journal="JMIR Hum Factors", year="2025", month="Feb", day="5", volume="12", pages="e56880", keywords="ICU", keywords="intensive care", keywords="machine learning", keywords="data annotation", keywords="data labeling", keywords="annotation software", keywords="capturing software requirements", abstract="Background: Increasing use of computational methods in health care provides opportunities to address previously unsolvable problems. Machine learning techniques applied to routinely collected data can enhance clinical tools and improve patient outcomes, but their effective deployment comes with significant challenges. While some tasks can be addressed by training machine learning models directly on the collected data, more complex problems require additional input in the form of data annotations. Data annotation is a complex and time-consuming problem that requires domain expertise and frequently, technical proficiency. With clinicians' time being an extremely limited resource, existing tools fail to provide an effective workflow for deployment in health care. Objective: This paper investigates the approach of intensive care unit staff to the task of data annotation. Specifically, it aims to (1) understand how clinicians approach data annotation and (2) capture the requirements for a digital annotation tool for the health care setting. Methods: We conducted an experimental activity involving annotation of the printed excerpts of real time-series admission data with 7 intensive care unit clinicians. Each participant annotated an identical set of admissions with the periods of weaning from mechanical ventilation during a single 45-minute workshop. Participants were observed during task completion and their actions were analyzed within Norman's Interaction Cycle model to identify the software requirements. Results: Clinicians followed a cyclic process of investigation, annotation, data reevaluation, and label refinement. Variety of techniques were used to investigate data and create annotations. We identified 11 requirements for the digital tool across 4 domains: annotation of individual admissions (n=5), semiautomated annotation (n=3), operational constraints (n=2), and use of labels in machine learning (n=1). Conclusions: Effective data annotation in a clinical setting relies on flexibility in analysis and label creation and workflow continuity across multiple admissions. There is a need to ensure a seamless transition between data investigation, annotation, and refinement of the labels. ", doi="10.2196/56880", url="https://humanfactors.jmir.org/2025/1/e56880" } @Article{info:doi/10.2196/64972, author="Xu, Qian and Cai, Xue and Yu, Ruicong and Zheng, Yueyue and Chen, Guanjie and Sun, Hui and Gao, Tianyun and Xu, Cuirong and Sun, Jing", title="Machine Learning--Based Risk Factor Analysis and Prediction Model Construction for the Occurrence of Chronic Heart Failure: Health Ecologic Study", journal="JMIR Med Inform", year="2025", month="Jan", day="31", volume="13", pages="e64972", keywords="machine learning, chronic heart failure, risk of occurrence", keywords="prediction model, health ecology", abstract="Background: Chronic heart failure (CHF) is a serious threat to human health, with high morbidity and mortality rates, imposing a heavy burden on the health care system and society. With the abundance of medical data and the rapid development of machine learning (ML) technologies, new opportunities are provided for in-depth investigation of the mechanisms of CHF and the construction of predictive models. The introduction of health ecology research methodology enables a comprehensive dissection of CHF risk factors from a wider range of environmental, social, and individual factors. This not only helps to identify high-risk groups at an early stage but also provides a scientific basis for the development of precise prevention and intervention strategies. Objective: This study aims to use ML to construct a predictive model of the risk of occurrence of CHF and analyze the risk of CHF from a health ecology perspective. Methods: This study sourced data from the Jackson Heart Study database. Stringent data preprocessing procedures were implemented, which included meticulous management of missing values and the standardization of data. Principal component analysis and random forest (RF) were used as feature selection techniques. Subsequently, several ML models, namely decision tree, RF, extreme gradient boosting, adaptive boosting (AdaBoost), support vector machine, naive Bayes model, multilayer perceptron, and bootstrap forest, were constructed, and their performance was evaluated. The effectiveness of the models was validated through internal validation using a 10-fold cross-validation approach on the training and validation sets. In addition, the performance metrics of each model, including accuracy, precision, sensitivity, F1-score, and area under the curve (AUC), were compared. After selecting the best model, we used hyperparameter optimization to construct a better model. Results: RF-selected features (21 in total) had an average root mean square error of 0.30, outperforming principal component analysis. Synthetic Minority Oversampling Technique and Edited Nearest Neighbors showed better accuracy in data balancing. The AdaBoost model was most effective with an AUC of 0.86, accuracy of 75.30\%, precision of 0.86, sensitivity of 0.69, and F1-score of 0.76. Validation on the training and validation sets through 10-fold cross-validation gave an AUC of 0.97, an accuracy of 91.27\%, a precision of 0.94, a sensitivity of 0.92, and an F1-score of 0.94. After random search processing, the accuracy and AUC of AdaBoost improved. Its accuracy was 77.68\% and its AUC was 0.86. Conclusions: This study offered insights into CHF risk prediction. Future research should focus on prospective studies, diverse data, advanced techniques, longitudinal studies, and exploring factor interactions for better CHF prevention and management. ", doi="10.2196/64972", url="https://medinform.jmir.org/2025/1/e64972" } @Article{info:doi/10.2196/58760, author="Li, Yanong and He, Yixuan and Liu, Yawei and Wang, Bingchen and Li, Bo and Qiu, Xiaoguang", title="Identification of Intracranial Germ Cell Tumors Based on Facial Photos: Exploratory Study on the Use of Deep Learning for Software Development", journal="J Med Internet Res", year="2025", month="Jan", day="30", volume="27", pages="e58760", keywords="deep learning", keywords="facial recognition", keywords="intracranial germ cell tumors", keywords="endocrine indicators", keywords="software development", keywords="artificial intelligence", keywords="machine learning models", keywords="software engineering", keywords="neural networks", keywords="algorithms", keywords="cohort studies", abstract="Background: Primary intracranial germ cell tumors (iGCTs) are highly malignant brain tumors that predominantly occur in children and adolescents, with an incidence rate ranking third among primary brain tumors in East Asia (8\%-15\%). Due to their insidious onset and impact on critical functional areas of the brain, these tumors often result in irreversible abnormalities in growth and development, as well as cognitive and motor impairments in affected children. Therefore, early diagnosis through advanced screening techniques is vital for improving patient outcomes and quality of life. Objective: This study aimed to investigate the application of facial recognition technology in the early detection of iGCTs in children and adolescents. Early diagnosis through advanced screening techniques is vital for improving patient outcomes and quality of life. Methods: A multicenter, phased approach was adopted for the development and validation of a deep learning model, GVisageNet, dedicated to the screening of midline brain tumors from normal controls (NCs) and iGCTs from other midline brain tumors. The study comprised the collection and division of datasets into training (n=847, iGCTs=358, NCs=300, other midline brain tumors=189) and testing (n=212, iGCTs=79, NCs=70, other midline brain tumors=63), with an additional independent validation dataset (n=336, iGCTs=130, NCs=100, other midline brain tumors=106) sourced from 4 medical institutions. A regression model using clinically relevant, statistically significant data was developed and combined with GVisageNet outputs to create a hybrid model. This integration sought to assess the incremental value of clinical data. The model's predictive mechanisms were explored through correlation analyses with endocrine indicators and stratified evaluations based on the degree of hypothalamic-pituitary-target axis damage. Performance metrics included area under the curve (AUC), accuracy, sensitivity, and specificity. Results: On the independent validation dataset, GVisageNet achieved an AUC of 0.938 (P<.01) in distinguishing midline brain tumors from NCs. Further, GVisageNet demonstrated significant diagnostic capability in distinguishing iGCTs from the other midline brain tumors, achieving an AUC of 0.739, which is superior to the regression model alone (AUC=0.632, P<.001) but less than the hybrid model (AUC=0.789, P=.04). Significant correlations were found between the GVisageNet's outputs and 7 endocrine indicators. Performance varied with hypothalamic-pituitary-target axis damage, indicating a further understanding of the working mechanism of GVisageNet. Conclusions: GVisageNet, capable of high accuracy both independently and with clinical data, shows substantial potential for early iGCTs detection, highlighting the importance of combining deep learning with clinical insights for personalized health care. ", doi="10.2196/58760", url="https://www.jmir.org/2025/1/e58760" } @Article{info:doi/10.2196/53542, author="Demuth, Stanislas and De S{\`e}ze, J{\'e}r{\^o}me and Edan, Gilles and Ziemssen, Tjalf and Simon, Fran{\c{c}}oise and Gourraud, Pierre-Antoine", title="Digital Representation of Patients as Medical Digital Twins: Data-Centric Viewpoint", journal="JMIR Med Inform", year="2025", month="Jan", day="28", volume="13", pages="e53542", keywords="digital twin", keywords="artificial intelligence", keywords="data architecture", keywords="synthetic data", keywords="computational modeling", keywords="precision medicine", keywords="conceptual clarification", keywords="conceptual", keywords="patient", keywords="medicine", keywords="health record", keywords="digital records", keywords="synthetic patient", doi="10.2196/53542", url="https://medinform.jmir.org/2025/1/e53542" } @Article{info:doi/10.2196/59452, author="Willem, Theresa and Wollek, Alessandro and Cheslerean-Boghiu, Theodor and Kenney, Martha and Buyx, Alena", title="The Social Construction of Categorical Data: Mixed Methods Approach to Assessing Data Features in Publicly Available Datasets", journal="JMIR Med Inform", year="2025", month="Jan", day="28", volume="13", pages="e59452", keywords="machine learning", keywords="categorical data", keywords="social context dependency", keywords="mixed methods", keywords="dermatology", keywords="dataset analysis", abstract="Background: In data-sparse areas such as health care, computer scientists aim to leverage as much available information as possible to increase the accuracy of their machine learning models' outputs. As a standard, categorical data, such as patients' gender, socioeconomic status, or skin color, are used to train models in fusion with other data types, such as medical images and text-based medical information. However, the effects of including categorical data features for model training in such data-scarce areas are underexamined, particularly regarding models intended to serve individuals equitably in a diverse population. Objective: This study aimed to explore categorical data's effects on machine learning model outputs, rooted the effects in the data collection and dataset publication processes, and proposed a mixed methods approach to examining datasets' data categories before using them for machine learning training. Methods: Against the theoretical background of the?social construction of categories, we suggest a mixed methods approach to assess categorical data's utility for machine learning model training. As an example, we applied our approach to a Brazilian dermatological dataset (Dermatological and Surgical Assistance Program at the Federal University of Esp{\'i}rito Santo [PAD-UFES] 20). We first present an exploratory, quantitative study that assesses the effects when including or excluding each of the unique categorical data features of the PAD-UFES 20 dataset for training a transformer-based model using a data fusion algorithm. We then pair our quantitative analysis with a qualitative examination of the data categories based on interviews with the dataset authors. Results: Our quantitative study suggests scattered effects of including categorical data for machine learning model training across predictive classes. Our qualitative analysis gives insights into how the categorical data were collected and why they were published, explaining some of the quantitative effects that we observed. Our findings highlight the social constructedness of categorical data in publicly available datasets, meaning that the data in a category heavily depend on both how these categories are defined by the dataset creators and the sociomedico context in which the data are collected. This reveals relevant limitations of using publicly available datasets in contexts different from those of the collection of their data. Conclusions: We caution against using data features of publicly available datasets without reflection on the social construction and context dependency of their categorical data features, particularly in data-sparse areas. We conclude that social scientific, context-dependent analysis of available data features using both quantitative and qualitative methods is helpful in judging the utility of categorical data for the population for which a model is intended. ", doi="10.2196/59452", url="https://medinform.jmir.org/2025/1/e59452" } @Article{info:doi/10.2196/67969, author="Subramanian, Ajan and Cao, Rui and Naeini, Kasaeyan Emad and Aqajari, Hossein Seyed Amir and Hughes, D. Thomas and Calderon, Michael-David and Zheng, Kai and Dutt, Nikil and Liljeberg, Pasi and Salanter{\"a}, Sanna and Nelson, M. Ariana and Rahmani, M. Amir", title="Multimodal Pain Recognition in Postoperative Patients: Machine Learning Approach", journal="JMIR Form Res", year="2025", month="Jan", day="27", volume="9", pages="e67969", keywords="pain intensity recognition", keywords="multimodal information fusion", keywords="signal processing", keywords="weak supervision", keywords="health care", keywords="pain intensity", keywords="pain recognition", keywords="machine learning approach", keywords="acute pain", keywords="pain assessment", keywords="behavioral pain", keywords="pain measurement", keywords="pain monitoring", keywords="multimodal machine learning--based framework", keywords="machine learning--based framework", keywords="electrocardiogram", keywords="electromyogram", keywords="electrodermal activity", keywords="self-reported pain level", keywords="clinical pain management", abstract="Background: Acute pain management is critical in postoperative care, especially in vulnerable patient populations that may be unable to self-report pain levels effectively. Current methods of pain assessment often rely on subjective patient reports or behavioral pain observation tools, which can lead to inconsistencies in pain management. Multimodal pain assessment, integrating physiological and behavioral data, presents an opportunity to create more objective and accurate pain measurement systems. However, most previous work has focused on healthy subjects in controlled environments, with limited attention to real-world postoperative pain scenarios. This gap necessitates the development of robust, multimodal approaches capable of addressing the unique challenges associated with assessing pain in clinical settings, where factors like motion artifacts, imbalanced label distribution, and sparse data further complicate pain monitoring. Objective: This study aimed to develop and evaluate a multimodal machine learning--based framework for the objective assessment of pain in postoperative patients in real clinical settings using biosignals such as electrocardiogram, electromyogram, electrodermal activity, and respiration rate (RR) signals. Methods: The iHurt study was conducted on 25 postoperative patients at the University of California, Irvine Medical Center. The study captured multimodal biosignals during light physical activities, with concurrent self-reported pain levels using the Numerical Rating Scale. Data preprocessing involved noise filtering, feature extraction, and combining handcrafted and automatic features through convolutional and long-short-term memory autoencoders. Machine learning classifiers, including support vector machine, random forest, adaptive boosting, and k-nearest neighbors, were trained using weak supervision and minority oversampling to handle sparse and imbalanced pain labels. Pain levels were categorized into baseline and 3 levels of pain intensity (1-3). Results: The multimodal pain recognition models achieved an average balanced accuracy of over 80\% across the different pain levels. RR models consistently outperformed other single modalities, particularly for lower pain intensities, while facial muscle activity (electromyogram) was most effective for distinguishing higher pain intensities. Although single-modality models, especially RR, generally provided higher performance compared to multimodal approaches, our multimodal framework still delivered results that surpassed most previous works in terms of overall accuracy. Conclusions: This study presents a novel, multimodal machine learning framework for objective pain recognition in postoperative patients. The results highlight the potential of integrating multiple biosignal modalities for more accurate pain assessment, with particular value in real-world clinical settings. ", doi="10.2196/67969", url="https://formative.jmir.org/2025/1/e67969" } @Article{info:doi/10.2196/60250, author="Li, Aoyu and Li, Jingwen and Hu, Yishan and Geng, Yan and Qiang, Yan and Zhao, Juanjuan", title="A Dynamic Adaptive Ensemble Learning Framework for Noninvasive Mild Cognitive Impairment Detection: Development and Validation Study", journal="JMIR Med Inform", year="2025", month="Jan", day="20", volume="13", pages="e60250", keywords="mild cognitive impairment", keywords="ensemble learning", keywords="harmony search", keywords="combination optimization", keywords="digital cognitive assessment", keywords="physiological signal", keywords="cognitive impairment", keywords="detection", keywords="machine learning", keywords="cognitive metrics", keywords="photoplethysmography", keywords="neurodegenerative", keywords="Alzheimer", keywords="cognitive decline", abstract="Background: The prompt and accurate identification of mild cognitive impairment (MCI) is crucial for preventing its progression into more severe neurodegenerative diseases. However, current diagnostic solutions, such as biomarkers and cognitive screening tests, prove costly, time-consuming, and invasive, hindering patient compliance and the accessibility of these tests. Therefore, exploring a more cost-effective, efficient, and noninvasive method to aid clinicians in detecting MCI is necessary. Objective: This study aims to develop an ensemble learning framework that adaptively integrates multimodal physiological data collected from wearable wristbands and digital cognitive metrics recorded on tablets, thereby improving the accuracy and practicality of MCI detection. Methods: We recruited 843 participants aged 60 years and older from the geriatrics and neurology departments of our collaborating hospitals, who were randomly divided into a development dataset (674/843 participants) and an internal test dataset (169/843 participants) at a 4:1 ratio. In addition, 226 older adults were recruited from 3 external centers to form an external test dataset. We measured their physiological signals (eg, electrodermal activity and photoplethysmography) and digital cognitive parameters (eg, reaction time and test scores) using the clinically certified Empatica 4 wristband and a tablet cognitive screening tool. The collected data underwent rigorous preprocessing, during which features in the time, frequency, and nonlinear domains were extracted from individual physiological signals. To address the challenges (eg, the curse of dimensionality and increased model complexity) posed by high-dimensional features, we developed a dynamic adaptive feature selection optimization algorithm to identify the most impactful subset of features for classification performance. Finally, the accuracy and efficiency of the classification model were improved by optimizing the combination of base learners. Results: The experimental results indicate that the proposed MCI detection framework achieved classification accuracies of 88.4\%, 85.5\%, and 84.5\% on the development, internal test, and external test datasets, respectively. The area under the curve for the binary classification task was 0.945 (95\% CI 0.903-0.986), 0.912 (95\% CI 0.859-0.965), and 0.904 (95\% CI 0.846-0.962) on these datasets. Furthermore, a statistical analysis of feature subsets during the iterative modeling process revealed that the decay time of skin conductance response, the percentage of continuous normal-to-normal intervals exceeding 50 milliseconds, the ratio of low-frequency to high-frequency (LF/HF) components in heart rate variability, and cognitive time features emerged as the most prevalent and effective indicators. Specifically, compared with healthy individuals, patients with MCI exhibited a longer skin conductance response decay time during cognitive testing (P<.001), a lower percentage of continuous normal-to-normal intervals exceeding 50 milliseconds (P<.001), and higher LF/HF (P<.001), accompanied by greater variability. Similarly, patients with MCI took longer to complete cognitive tests than healthy individuals (P<.001). Conclusions: The developed MCI detection framework has demonstrated exemplary performance and stability in large-scale validations. It establishes a new benchmark for noninvasive, effective early MCI detection that can be integrated into routine wearable and tablet-based assessments. Furthermore, the framework enables continuous and convenient self-screening within home or nonspecialized settings, effectively mitigating underresourced health care and geographic location constraints, making it an essential tool in the current fight against neurodegenerative diseases. ", doi="10.2196/60250", url="https://medinform.jmir.org/2025/1/e60250" } @Article{info:doi/10.2196/57715, author="Fong, Allan and Boxley, Christian and Schubel, Laura and Gallagher, Christopher and AuBuchon, Katarina and Arem, Hannah", title="Identifying Complex Scheduling Patterns Among Patients With Cancer With Transportation and Housing Needs: Feasibility Pilot Study", journal="JMIR Cancer", year="2025", month="Jan", day="17", volume="11", pages="e57715", keywords="patient scheduling", keywords="scheduling complexities", keywords="temporal data mining", keywords="dataset", keywords="breast cancer", keywords="social determinant of health", keywords="oncology", keywords="metastasis", keywords="cancer patient", keywords="social support", keywords="community health worker", keywords="housing need", keywords="care", keywords="transportation", keywords="algorithm", abstract="Background: Patients with cancer frequently encounter complex treatment pathways, often characterized by challenges with coordinating and scheduling appointments at various specialty services and locations. Identifying patients who might benefit from scheduling and social support from community health workers or patient navigators is largely determined on a case-by-case basis and is resource intensive. Objective: This study aims to propose a novel algorithm to use scheduling data to identify complex scheduling patterns among patients with transportation and housing needs. Methods: We present a novel algorithm to calculate scheduling complexity from patient scheduling data. We define patient scheduling complexity as an aggregation of sequence, resolution, and facility components. Schedule sequence complexity is the degree to which appointments are scheduled and arrived to in a nonchronological order. Resolution complexity is the degree of no shows or canceled appointments. Location complexity reflects the proportion of appointment dates at 2 or more different locations. Schedule complexity captures deviations from chronological order, unresolved appointments, and coordination across multiple locations. We apply the scheduling complexity algorithm to scheduling data from 38 patients with breast cancer enrolled in a 6-month comorbidity management intervention at an urban hospital in the Washington, DC area that serves low-income patients. We compare the scheduling complexity metric with count-based metrics: arrived ratio, rescheduled ratio, canceled ratio, and no-show ratio. We defined an aggregate count-based adjustment metric as the harmonic mean of rescheduled ratio, canceled ratio, and no-show ratio. A low count-based adjustment metric would indicate that a patient has fewer disruptions or changes in their appointment scheduling. Results: The patients had a median of 88 unique appointments (IQR 60.3), 62 arrived appointments (IQR 47.8), 13 rescheduled appointments (IQR 13.5), 9 canceled appointments (IQR 10), and 1.5 missed appointments (IQR 5). There was no statistically significant difference in count-based adjustments and scheduling complexity bins ($\chi$24=6.296, P=.18). In total, 5 patients exhibited high scheduling complexity with low count-based adjustments. A total of 2 patients exhibited high count-based adjustments with low scheduling complexity. Out of the 15 patients that indicated transportation or housing insecurity issues in conversations with community health workers, 86.7\% (13/15) patients were identified as medium or high scheduling complexity while 60\% (9/15) were identified as medium or high count-based adjustments. Conclusions: Scheduling complexity identifies patients with complex but nonchronological scheduling behaviors who would be missed by traditional count-based metrics. This study shows a potential link between transportation and housing needs with schedule complexity. Scheduling complexity can complement count-based metrics when identifying patients who might need additional care coordination support especially as it relates to transportation and housing needs. Trial Registration: ClinicalTrials.gov NCT04836221; https://clinicaltrials.gov/study/NCT04836221 ", doi="10.2196/57715", url="https://cancer.jmir.org/2025/1/e57715" } @Article{info:doi/10.2196/55046, author="Ding, Zhendong and Zhang, Linan and Zhang, Yihan and Yang, Jing and Luo, Yuheng and Ge, Mian and Yao, Weifeng and Hei, Ziqing and Chen, Chaojin", title="A Supervised Explainable Machine Learning Model for Perioperative Neurocognitive Disorder in Liver-Transplantation Patients and External Validation on the Medical Information Mart for Intensive Care IV Database: Retrospective Study", journal="J Med Internet Res", year="2025", month="Jan", day="15", volume="27", pages="e55046", keywords="machine learning", keywords="risk factors", keywords="liver transplantation", keywords="perioperative neurocognitive disorders", keywords="MIMIC-? database", keywords="external validation", abstract="Background: Patients undergoing liver transplantation (LT) are at risk of perioperative neurocognitive dysfunction (PND), which significantly affects the patients' prognosis. Objective: This study used machine learning (ML) algorithms with an aim to extract critical predictors and develop an ML model to predict PND among LT recipients. Methods: In this retrospective study, data from 958 patients who underwent LT between January 2015 and January 2020 were extracted from the Third Affiliated Hospital of Sun Yat-sen University. Six ML algorithms were used to predict post-LT PND, and model performance was evaluated using area under the receiver operating curve (AUC), accuracy, sensitivity, specificity, and F1-scores. The best-performing model was additionally validated using a temporal external dataset including 309 LT cases from February 2020 to August 2022, and an independent external dataset extracted from the Medical Information Mart for Intensive Care ? (MIMIC-?) database including 325 patients. Results: In the development cohort, 201 out of 751 (33.5\%) patients were diagnosed with PND. The logistic regression model achieved the highest AUC (0.799) in the internal validation set, with comparable AUC in the temporal external (0.826) and MIMIC-? validation sets (0.72). The top 3 features contributing to post-LT PND diagnosis were the preoperative overt hepatic encephalopathy, platelet level, and postoperative sequential organ failure assessment score, as revealed by the Shapley additive explanations method. Conclusions: A real-time logistic regression model-based online predictor of post-LT PND was developed, providing a highly interoperable tool for use across medical institutions to support early risk stratification and decision making for the LT recipients. ", doi="10.2196/55046", url="https://www.jmir.org/2025/1/e55046", url="http://www.ncbi.nlm.nih.gov/pubmed/39813086" } @Article{info:doi/10.2196/58073, author="Saito, Chihiro and Nakatani, Eiji and Sasaki, Hatoko and E Katsuki, Naoko and Tago, Masaki and Harada, Kiyoshi", title="Predictive Factors and the Predictive Scoring System for Falls in Acute Care Inpatients: Retrospective Cohort Study", journal="JMIR Hum Factors", year="2025", month="Jan", day="13", volume="12", pages="e58073", keywords="falls", keywords="inpatient falls", keywords="acute care hospital", keywords="predictive factor", keywords="risk factors", abstract="Background: Falls in hospitalized patients are a serious problem, resulting in physical injury, secondary complications, impaired activities of daily living, prolonged hospital stays, and increased medical costs. Establishing a fall prediction scoring system to identify patients most likely to fall can help prevent falls among hospitalized patients. Objectives: This study aimed to identify predictive factors of falls in acute care hospital patients, develop a scoring system, and evaluate its validity. Methods: This single-center, retrospective cohort study involved patients aged 20 years or older admitted to Shizuoka General Hospital between April 2019 and September 2020. Demographic data, candidate predictors at admission, and fall occurrence reports were collected from medical records. The outcome was the time from admission to a fall requiring medical resources. Two-thirds of cases were randomly selected as the training set for analysis, and univariable and multivariable Cox regression analyses were used to identify factors affecting fall risk. We scored the fall risk based on the estimated hazard ratios (HRs) and constructed a fall prediction scoring system. The remaining one-third of cases was used as the test set to evaluate the predictive performance of the new scoring system. Results: A total of 13,725 individuals were included. During the study period, 2.4\% (326/13,725) of patients experienced a fall. In the training dataset (n=9150), Cox regression analysis identified sex (male: HR 1.60, 95\% CI 1.21?2.13), age (65 to <80 years: HR 2.26, 95\% CI 1.48?3.44; ?80 years: HR 2.50, 95\% CI 1.60?3.92 vs 20-<65 years), BMI (18.5 to <25 kg/m{\texttwosuperior}: HR 1.36, 95\% CI 0.94?1.97; <18.5 kg/m{\texttwosuperior}: HR 1.57, 95\% CI 1.01?2.44 vs ?25 kg/m{\texttwosuperior}), independence degree of daily living for older adults with disabilities (bedriddenness rank A: HR 1.81, 95\% CI 1.26?2.60; rank B: HR 2.03, 95\% CI 1.31?3.14; rank C: HR 1.23, 95\% CI 0.83?1.83 vs rank J), department (internal medicine: HR 1.23, 95\% CI 0.92?1.64; emergency department: HR 1.81, 95\% CI 1.26?2.60 vs department of surgery), and history of falls within 1 year (yes: HR 1.66, 95\% CI 1.21?2.27) as predictors of falls. Using these factors, we developed a fall prediction scoring system categorizing patients into 3 risk groups: low risk (0-4 points), intermediate risk (5-9 points), and high risk (10-15 points). The c-index indicating predictive performance in the test set (n=4575) was 0.733 (95\% CI 0.684?0.782). Conclusions: We developed a new fall prediction scoring system for patients admitted to acute care hospitals by identifying predictors of falls in Japan. This system may be useful for preventive interventions in patient populations with a high likelihood of falling in acute care settings. ", doi="10.2196/58073", url="https://humanfactors.jmir.org/2025/1/e58073" } @Article{info:doi/10.2196/58509, author="Kurita, Junko and Hori, Motomi and Yamaguchi, Sumiyo and Ogiwara, Aiko and Saito, Yurina and Sugiyama, Minako and Sunadori, Asami and Hayashi, Tomoko and Hara, Akane and Kawana, Yukari and Itoi, Youichi and Sugawara, Tamie and Sugishita, Yoshiyuki and Irie, Fujiko and Sakurai, Naomi", title="Effectiveness of the Facility for Elderly Surveillance System (FESSy) in Two Public Health Center Jurisdictions in Japan: Prospective Observational Study", journal="JMIR Med Inform", year="2025", month="Jan", day="10", volume="13", pages="e58509", keywords="early detection", keywords="facility for older people", keywords="outbreak", keywords="public health center", keywords="syndromic surveillance", keywords="Japan", keywords="older adults", abstract="Background: Residents of facilities for older people are vulnerable to COVID-19 outbreaks. Nevertheless, timely recognition of outbreaks at facilities for older people at public health centers has been impossible in Japan since May 8, 2023, when the Japanese government discontinued aggressive countermeasures against COVID-19 because of the waning severity of the dominant Omicron strain. The Facility for Elderly Surveillance System (FESSy) has been developed to improve information collection. Objective: This study examined FESSy experiences and effectiveness in two public health center jurisdictions in Japan. Methods: This study assessed the use by public health centers of the detection mode of an automated AI detection system (ie, FESSy AI), as well as manual detection by the public health centers' staff (ie, FESSy staff) and direct reporting by facilities to the public health centers. We considered the following aspects: (1) diagnoses or symptoms, (2) numbers of patients as of their detection date, and (3) ultimate numbers of patients involved in incidents. Subsequently, effectiveness was assessed and compared based on detection modes. The study lasted from June 1, 2023, through January 2024. Results: In both areas, this study examined 31 facilities at which 87 incidents were detected. FESSy (AI or staff) detected significantly fewer patients than non-FESSy methods, that is, direct reporting to the public health center of the detection date and ultimate number of patients. Conclusions: FESSy was superior to direct reporting from facilities for the number of patients as of the detection date and for the ultimate outbreak size. ", doi="10.2196/58509", url="https://medinform.jmir.org/2025/1/e58509" } @Article{info:doi/10.2196/67056, author="Kim, Sanghwan and Jang, Sowon and Kim, Borham and Sunwoo, Leonard and Kim, Seok and Chung, Jin-Haeng and Nam, Sejin and Cho, Hyeongmin and Lee, Donghyoung and Lee, Keehyuck and Yoo, Sooyoung", title="Automated Pathologic TN Classification Prediction and Rationale Generation From Lung Cancer Surgical Pathology Reports Using a Large Language Model Fine-Tuned With Chain-of-Thought: Algorithm Development and Validation Study", journal="JMIR Med Inform", year="2024", month="Dec", day="20", volume="12", pages="e67056", keywords="AJCC Cancer Staging Manual 8th edition", keywords="American Joint Committee on Cancer", keywords="large language model", keywords="chain-of-thought", keywords="rationale", keywords="lung cancer", keywords="report analysis", keywords="AI", keywords="surgery", keywords="pathology reports", keywords="tertiary hospital", keywords="generative language models", keywords="efficiency", keywords="accuracy", keywords="automated", abstract="Background: Traditional rule-based natural language processing approaches in electronic health record systems are effective but are often time-consuming and prone to errors when handling unstructured data. This is primarily due to the substantial manual effort required to parse and extract information from diverse types of documentation. Recent advancements in large language model (LLM) technology have made it possible to automatically interpret medical context and support pathologic staging. However, existing LLMs encounter challenges in rapidly adapting to specialized guideline updates. In this study, we fine-tuned an LLM specifically for lung cancer pathologic staging, enabling it to incorporate the latest guidelines for pathologic TN classification. Objective: This study aims to evaluate the performance of fine-tuned generative language models in automatically inferring pathologic TN classifications and extracting their rationale from lung cancer surgical pathology reports. By addressing the inefficiencies and extensive parsing efforts associated with rule-based methods, this approach seeks to enable rapid and accurate reclassification aligned with the latest cancer staging guidelines. Methods: We conducted a comparative performance evaluation of 6 open-source LLMs for automated TN classification and rationale generation, using 3216 deidentified lung cancer surgical pathology reports based on the American Joint Committee on Cancer (AJCC) Cancer Staging Manual8th edition, collected from a tertiary hospital. The dataset was preprocessed by segmenting each report according to lesion location and morphological diagnosis. Performance was assessed using exact match ratio (EMR) and semantic match ratio (SMR) as evaluation metrics, which measure classification accuracy and the contextual alignment of the generated rationales, respectively. Results: Among the 6 models, the Orca2\_13b model achieved the highest performance with an EMR of 0.934 and an SMR of 0.864. The Orca2\_7b model also demonstrated strong performance, recording an EMR of 0.914 and an SMR of 0.854. In contrast, the Llama2\_7b model achieved an EMR of 0.864 and an SMR of 0.771, while the Llama2\_13b model showed an EMR of 0.762 and an SMR of 0.690. The Mistral\_7b and Llama3\_8b models, on the other hand, showed lower performance, with EMRs of 0.572 and 0.489, and SMRs of 0.377 and 0.456, respectively. Overall, the Orca2 models consistently outperformed the others in both TN stage classification and rationale generation. Conclusions: The generative language model approach presented in this study has the potential to enhance and automate TN classification in complex cancer staging, supporting both clinical practice and oncology data curation. With additional fine-tuning based on cancer-specific guidelines, this approach can be effectively adapted to other cancer types. ", doi="10.2196/67056", url="https://medinform.jmir.org/2024/1/e67056" } @Article{info:doi/10.2196/60665, author="Cao, Lang and Sun, Jimeng and Cross, Adam", title="An Automatic and End-to-End System for Rare Disease Knowledge Graph Construction Based on Ontology-Enhanced Large Language Models: Development Study", journal="JMIR Med Inform", year="2024", month="Dec", day="18", volume="12", pages="e60665", keywords="rare disease", keywords="clinical informatics", keywords="LLM", keywords="natural language processing", keywords="machine learning", keywords="artificial intelligence", keywords="large language models", keywords="data extraction", keywords="ontologies", keywords="knowledge graphs", keywords="text mining", abstract="Background: Rare diseases affect millions worldwide but sometimes face limited research focus individually due to low prevalence. Many rare diseases do not have specific International Classification of Diseases, Ninth Edition (ICD-9) and Tenth Edition (ICD-10), codes and therefore cannot be reliably extracted from granular fields like ``Diagnosis'' and ``Problem List'' entries, which complicates tasks that require identification of patients with these conditions, including clinical trial recruitment and research efforts. Recent advancements in large language models (LLMs) have shown promise in automating the extraction of medical information, offering the potential to improve medical research, diagnosis, and management. However, most LLMs lack professional medical knowledge, especially concerning specific rare diseases, and cannot effectively manage rare disease data in its various ontological forms, making it unsuitable for these tasks. Objective: Our aim is to create an end-to-end system called automated rare disease mining (AutoRD), which automates the extraction of rare disease--related information from medical text, focusing on entities and their relations to other medical concepts, such as signs and symptoms. AutoRD integrates up-to-date ontologies with other structured knowledge and demonstrates superior performance in rare disease extraction tasks. We conducted various experiments to evaluate AutoRD's performance, aiming to surpass common LLMs and traditional methods. Methods: AutoRD is a pipeline system that involves data preprocessing, entity extraction, relation extraction, entity calibration, and knowledge graph construction. We implemented this system using GPT-4 and medical knowledge graphs developed from the open-source Human Phenotype and Orphanet ontologies, using techniques such as chain-of-thought reasoning and prompt engineering. We quantitatively evaluated our system's performance in entity extraction, relation extraction, and knowledge graph construction. The experiment used the well-curated dataset RareDis2023, which contains medical literature focused on rare disease entities and their relations, making it an ideal dataset for training and testing our methodology. Results: On the RareDis2023 dataset, AutoRD achieved an overall entity extraction F1-score of 56.1\% and a relation extraction F1-score of 38.6\%, marking a 14.4\% improvement over the baseline LLM. Notably, the F1-score for rare disease entity extraction reached 83.5\%, indicating high precision and recall in identifying rare disease mentions. These results demonstrate the effectiveness of integrating LLMs with medical ontologies in extracting complex rare disease information. Conclusions: AutoRD is an automated end-to-end system for extracting rare disease information from text to build knowledge graphs, addressing critical limitations of existing LLMs by improving identification of these diseases and connecting them to related clinical features. This work underscores the significant potential of LLMs in transforming health care, particularly in the rare disease domain. By leveraging ontology-enhanced LLMs, AutoRD constructs a robust medical knowledge base that incorporates up-to-date rare disease information, facilitating improved identification of patients and resulting in more inclusive research and trial candidacy efforts. ", doi="10.2196/60665", url="https://medinform.jmir.org/2024/1/e60665" } @Article{info:doi/10.2196/60231, author="Silvey, Scott and Liu, Jinze", title="Sample Size Requirements for Popular Classification Algorithms in Tabular Clinical Data: Empirical Study", journal="J Med Internet Res", year="2024", month="Dec", day="17", volume="26", pages="e60231", keywords="medical informatics", keywords="machine learning", keywords="sample size", keywords="research design", keywords="decision trees", keywords="classification algorithm", keywords="clinical research", keywords="learning-curve analysis", keywords="analysis", keywords="analyses", keywords="guidelines", keywords="ML", keywords="decision making", keywords="algorithm", keywords="curve analysis", keywords="dataset", abstract="Background: The performance of a classification algorithm eventually reaches a point of diminishing returns, where the additional sample added does not improve the results. Thus, there is a need to determine an optimal sample size that maximizes performance while accounting for computational burden or budgetary concerns. Objective: This study aimed to determine optimal sample sizes and the relationships between sample size and dataset-level characteristics over a variety of binary classification algorithms. Methods: A total of 16 large open-source datasets were collected, each containing a binary clinical outcome. Furthermore, 4 machine learning algorithms were assessed: XGBoost (XGB), random forest (RF), logistic regression (LR), and neural networks (NNs). For each dataset, the cross-validated area under the curve (AUC) was calculated at increasing sample sizes, and learning curves were fit. Sample sizes needed to reach the observed full--dataset AUC minus 2 points (0.02) were calculated from the fitted learning curves and compared across the datasets and algorithms. Dataset--level characteristics, minority class proportion, full--dataset AUC, number of features, type of features, and degree of nonlinearity were examined. Negative binomial regression models were used to quantify relationships between these characteristics and expected sample sizes within each algorithm. A total of 4 multivariable models were constructed, which selected the best-fitting combination of dataset--level characteristics. Results: Among the 16 datasets (full-dataset sample sizes ranging from 70,000-1,000,000), median sample sizes were 9960 (XGB), 3404 (RF), 696 (LR), and 12,298 (NN) to reach AUC stability. For all 4 algorithms, more balanced classes (multiplier: 0.93-0.96 for a 1\% increase in minority class proportion) were associated with decreased sample size. Other characteristics varied in importance across algorithms---in general, more features, weaker features, and more complex relationships between the predictors and the response increased expected sample sizes. In multivariable analysis, the top selected predictors were minority class proportion among all 4 algorithms assessed, full--dataset AUC (XGB, RF, and NN), and dataset nonlinearity (XGB, RF, and NN). For LR, the top predictors were minority class proportion, percentage of strong linear features, and number of features. Final multivariable sample size models had high goodness-of-fit, with dataset--level predictors explaining a majority (66.5\%-84.5\%) of the total deviance in the data among all 4 models. Conclusions: The sample sizes needed to reach AUC stability among 4 popular classification algorithms vary by dataset and method and are associated with dataset--level characteristics that can be influenced or estimated before the start of a research study. ", doi="10.2196/60231", url="https://www.jmir.org/2024/1/e60231" } @Article{info:doi/10.2196/58423, author="Du, Jianchao and Ding, Junyao and Wu, Yuan and Chen, Tianyan and Lian, Jianqi and Shi, Lei and Zhou, Yun", title="A Pathological Diagnosis Method for Fever of Unknown Origin Based on Multipath Hierarchical Classification: Model Design and Validation", journal="JMIR Form Res", year="2024", month="Dec", day="9", volume="8", pages="e58423", keywords="fever of unknown origin", keywords="FUO", keywords="intelligent diagnosis", keywords="machine learning", keywords="hierarchical classification", keywords="feature selection", keywords="model design", keywords="validation", keywords="diagnostic", keywords="prediction model", abstract="Background: Fever of unknown origin (FUO) is a significant challenge for the medical community due to its association with a wide range of diseases, the complexity of diagnosis, and the likelihood of misdiagnosis. Machine learning can extract valuable information from the extensive data of patient indicators, aiding doctors in diagnosing the underlying cause of FUO. Objective: The study aims to design a multipath hierarchical classification algorithm to diagnose FUO due to the hierarchical structure of the etiology of FUO. In addition, to improve the diagnostic performance of the model, a mechanism for feature selection is added to the model. Methods: The case data of patients with FUO admitted to the First Affiliated Hospital of Xi'an Jiaotong University between 2011 and 2020 in China were used as the dataset for model training and validation. The hierarchical structure tree was then characterized according to etiology. The structure included 3 layers, with the top layer representing the FUO, the middle layer dividing the FUO into 5 categories of etiology (bacterial infection, viral infection, other infection, autoimmune diseases, and other noninfection), and the last layer further refining them to 16 etiologies. Finally, ablation experiments were set to determine the optimal structure of the proposed method, and comparison experiments were to verify the diagnostic performance. Results: According to ablation experiments, the model achieved the best performance with an accuracy of 76.08\% when the number of middle paths was 3\%, and 25\% of the features were selected. According to comparison experiments, the proposed model outperformed the comparison methods, both from the perspective of feature selection methods and hierarchical classification methods. Specifically, brucellosis had an accuracy of 100\%, and liver abscess, viral infection, and lymphoma all had an accuracy of more than 80\%. Conclusions: In this study, a novel multipath feature selection and hierarchical classification model was designed for the diagnosis of FUO and was adequately evaluated quantitatively. Despite some limitations, this model enriches the exploration of FUO in machine learning and assists physicians in their work. ", doi="10.2196/58423", url="https://formative.jmir.org/2024/1/e58423" } @Article{info:doi/10.2196/63834, author="AboArab, A. Mohammed and Potsika, T. Vassiliki and Theodorou, Alexis and Vagena, Sylvia and Gravanis, Miltiadis and Sigala, Fragiska and Fotiadis, I. Dimitrios", title="Advancing Progressive Web Applications to Leverage Medical Imaging for Visualization of Digital Imaging and Communications in Medicine and Multiplanar Reconstruction: Software Development and Validation Study", journal="JMIR Med Inform", year="2024", month="Dec", day="9", volume="12", pages="e63834", keywords="medical image visualization", keywords="peripheral artery computed tomography imaging", keywords="multiplanar reconstruction", keywords="progressive web applications", abstract="Background: In medical imaging, 3D visualization is vital for displaying volumetric organs, enhancing diagnosis and analysis. Multiplanar reconstruction (MPR) improves visual and diagnostic capabilities by transforming 2D images from computed tomography (CT) and magnetic resonance imaging into 3D representations. Web-based Digital Imaging and Communications in Medicine (DICOM) viewers integrated into picture archiving and communication systems facilitate access to pictures and interaction with remote data. However, the adoption of progressive web applications (PWAs) for web-based DICOM and MPR visualization remains limited. This paper addresses this gap by leveraging PWAs for their offline access and enhanced performance. Objective: This study aims to evaluate the integration of DICOM and MPR visualization into the web using PWAs, addressing challenges related to cross-platform compatibility, integration capabilities, and high-resolution image reconstruction for medical image visualization. Methods: Our paper introduces a PWA that uses a modular design for enhancing DICOM and MPR visualization in web-based medical imaging. By integrating React.js and Cornerstone.js, the application offers seamless DICOM image processing, ensures cross-browser compatibility, and delivers a responsive user experience across multiple devices. It uses advanced interpolation techniques to make volume reconstructions more accurate. This makes MPR analysis and visualization better in a web environment, thus promising a substantial advance in medical imaging analysis. Results: In our approach, the performance of DICOM- and MPR-based PWAs for medical image visualization and reconstruction was evaluated through comprehensive experiments. The application excelled in terms of loading time and volume reconstruction, particularly in Google Chrome, whereas Firefox showed superior performance in viewing slices. This study uses a dataset comprising 22 CT scans of peripheral artery patients to demonstrate the application's robust performance, with Google Chrome outperforming other browsers in both the local area network and wide area network settings. In addition, the application's accuracy in MPR reconstructions was validated with an error margin of <0.05 mm and outperformed the state-of-the-art methods by 84\% to 98\% in loading and volume rendering time. Conclusions: This paper highlights advancements in DICOM and MPR visualization using PWAs, addressing the gaps in web-based medical imaging. By exploiting PWA features such as offline access and improved performance, we have significantly advanced medical imaging technology, focusing on cross-platform compatibility, integration efficiency, and speed. Our application outperforms existing platforms for handling complex MPR analyses and accurate analysis of medical imaging as validated through peripheral artery CT imaging. ", doi="10.2196/63834", url="https://medinform.jmir.org/2024/1/e63834" } @Article{info:doi/10.2196/55231, author="Gnadlinger, Florian and Werminghaus, Maika and Selmanagi{\'c}, Andr{\'e} and Filla, Tim and Richter, G. Jutta and Kriglstein, Simone and Klenzner, Thomas", title="Incorporating an Intelligent Tutoring System Into a Game-Based Auditory Rehabilitation Training for Adult Cochlear Implant Recipients: Algorithm Development and Validation", journal="JMIR Serious Games", year="2024", month="Dec", day="3", volume="12", pages="e55231", keywords="cochlear implant", keywords="eHealth", keywords="evidence-centered design", keywords="hearing rehabilitation", keywords="adaptive learning", keywords="intelligent tutoring system", keywords="game-based learning", abstract="Background: Cochlear implants are implanted hearing devices; instead of amplifying sounds like common hearing aids, this technology delivers preprocessed sound information directly to the hearing (ie, auditory) nerves. After surgery and the first cochlear implant activation, patients must practice interpreting the new auditory sensations, especially for language comprehension. This rehabilitation process is accompanied by hearing therapy through face-to-face training with a therapist, self-directed training, and computer-based auditory training. Objective: In general, self-directed, computer-based auditory training tasks have already shown advantages. However, compliance of cochlear implant recipients is still a major factor, especially for self-directed training at home. Hence, we aimed to explore the combination of 2 techniques to enhance learner motivation in this context: adaptive learning (in the form of an intelligent tutoring system) and game-based learning (in the form of a serious game). Methods: Following the suggestions of the evidence-centered design framework, a domain analysis of hearing therapy was conducted, allowing us to partially describe human hearing skill as a probabilistic competence model (Bayesian network). We developed an algorithm that uses such a model to estimate the current competence level of a patient and create training recommendations. For training, our developed task system was based on 7 language comprehension task types that act as a blueprint for generating tasks of diverse difficulty automatically. To achieve this, 1053 audio assets with meta-information labels were created. We embedded the adaptive task system into a graphic novel--like mobile serious game. German-speaking cochlear implant recipients used the system during a feasibility study for 4 weeks. Results: The 23 adult participants (20 women; 3 men) fulfilled 2259 tasks. In total, 2004 (90.5\%) tasks were solved correctly, and 255 (9.5\%) tasks were solved incorrectly. A generalized additive model analysis of these tasks indicated that the system adapted to the estimated competency levels of the cochlear implant recipients more quickly in the beginning than at the end. Compared with a uniform distribution of all task types, the recommended task types differed ($\chi${\texttwosuperior}6=86.713; P<.001), indicating that the system selected specific task types for each patient. This is underlined by the identified categories for the error proportions of the task types. Conclusions: This contribution demonstrates the feasibility of combining an intelligent tutoring system with a serious game in cochlear implant rehabilitation therapies. The findings presented here could lead to further advances in cochlear implant care and aural rehabilitation in general. Trial Registration: German Clinical Trials Register (DRKS) DRKS00022860; https://drks.de/search/en/trial/DRKS00022860 ", doi="10.2196/55231", url="https://games.jmir.org/2024/1/e55231" } @Article{info:doi/10.2196/63195, author="Gariepy, Genevieve and Zahan, Rifat and Osgood, D. Nathaniel and Yeoh, Benjamin and Graham, Eva and Orpana, Heather", title="Dynamic Simulation Models of Suicide and Suicide-Related Behaviors: Systematic Review", journal="JMIR Public Health Surveill", year="2024", month="Dec", day="2", volume="10", pages="e63195", keywords="suicide", keywords="agent-based modeling", keywords="complex system", keywords="complexity science", keywords="discrete-event simulation", keywords="dynamic modeling", keywords="microsimulation", keywords="system dynamics", keywords="systems science", keywords="qualitative study", keywords="dynamic simulation", keywords="database", keywords="depression", keywords="mental state", keywords="systematic review", keywords="stress", abstract="Background: Suicide remains a public health priority worldwide with over 700,000 deaths annually, ranking as a leading cause of death among young adults. Traditional research methodologies have often fallen short in capturing the multifaceted nature of suicide, focusing on isolated risk factors rather than the complex interplay of individual, social, and environmental influences. Recognizing these limitations, there is a growing recognition of the value of dynamic simulation modeling to inform suicide prevention planning. Objective: This systematic review aims to provide a comprehensive overview of existing dynamic models of population-level suicide and suicide-related behaviors, and to summarize their methodologies, applications, and outcomes. Methods: Eight databases were searched, including MEDLINE, Embase, PsycINFO, Scopus, Compendex, ACM Digital Library, IEEE Xplore, and medRxiv, from inception to July 2023. We developed a search strategy in consultation with a research librarian. Two reviewers independently conducted the title and abstract and full-text screenings including studies using dynamic modeling methods (eg, System Dynamics and agent-based modeling) for suicide or suicide-related behaviors at the population level, and excluding studies on microbiology, bioinformatics, pharmacology, nondynamic modeling methods, and nonprimary modeling reports (eg, editorials and reviews). Reviewers extracted the data using a standardized form and assessed the quality of reporting using the STRESS (Strengthening the Reporting of Empirical Simulation Studies) guidelines. A narrative synthesis was conducted for the included studies. Results: The search identified 1574 studies, with 22 studies meeting the inclusion criteria, including 15 System Dynamics models, 6 agent-based models, and 1 microsimulation model. The studies primarily targeted populations in Australia and the United States, with some focusing on hypothetical scenarios. The models addressed various interventions ranging from specific clinical and health service interventions, such as mental health service capacity increases, to broader social determinants, including employment programs and reduction in access to means of suicide. The studies demonstrated the utility of dynamic models in identifying the synergistic effects of combined interventions and understanding the temporal dynamics of intervention impacts. Conclusions: Dynamic modeling of suicide and suicide-related behaviors, though still an emerging area, is expanding rapidly, adapting to a range of questions, settings, and contexts. While the quality of reporting was overall adequate, some studies lacked detailed reporting on model transparency and reproducibility. This review highlights the potential of dynamic modeling as a tool to support decision-making and to further our understanding of the complex dynamics of suicide and its related behaviors. Trial Registration: PROSPERO CRD42022346617; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=346617 ", doi="10.2196/63195", url="https://publichealth.jmir.org/2024/1/e63195" } @Article{info:doi/10.2196/57705, author="Gosselin, Laura and Maes, Alexandre and Eyer, Kevin and Dahamna, Badisse and Disson, Flavien and Darmoni, Stefan and Wils, Julien and Grosjean, Julien", title="Design and Implementation of a Dashboard for Drug Interactions Mediated by Cytochromes Using a Health Care Data Warehouse in a University Hospital Center: Development Study", journal="JMIR Med Inform", year="2024", month="Nov", day="28", volume="12", pages="e57705", keywords="drug-drug interaction", keywords="adverse", keywords="interaction", keywords="information system", keywords="warehouse", keywords="warehousing", keywords="cytochrome", keywords="personalized medicine", keywords="dashboard", keywords="drugs", keywords="pharmacy", keywords="pharmacology", keywords="pharmacotherapy", keywords="pharmaceutic", keywords="pharmaceutical", keywords="medication", keywords="visualization", keywords="develop", keywords="development", keywords="design", abstract="Background: The enzymatic system of cytochrome P450 (CYP450) is a group of enzymes involved in the metabolism of drugs present in the liver. Literature records instances of underdosing of drugs due to the concurrent administration of another drug that strongly induces the same cytochrome for which the first drug is a substrate and overdosing due to strong inhibition. IT solutions have been proposed to raise awareness among prescribers to mitigate these interactions. Objective: This study aimed to develop a drug interaction dashboard for Cytochrome-mediated drug interactions (DIDC) using a health care data warehouse to display results that are easily readable and interpretable by clinical experts. Methods: The initial step involved defining requirements with expert pharmacologists. An existing model of interactions involving the (CYP450) was used. A program for the automatic detection of cytochrome-mediated drug interactions (DI) was developed. Finally, the development and visualization of the DIDC were carried out by an IT engineer. An evaluation of the tool was carried out. Results: The development of the DIDC was successfully completed. It automatically compiled cytochrome-mediated DIs in a comprehensive table and provided a dedicated dashboard for each potential DI. The most frequent interaction involved paracetamol and carbamazepine with CYP450 3A4 (n=50 patients). The prescription of tacrolimus with CYP3A5 genotyping pertained to 675 patients. Two experts qualitatively evaluated the tool, resulting in overall satisfaction scores of 6 and 5 out of 7, respectively. Conclusions: At our hospital, measurements of molecules that could have altered concentrations due to cytochrome-mediated DIs are not systematic. These DIs can lead to serious clinical consequences. The purpose of this DIDC is to provide an overall view and raise awareness among prescribers about the importance of measuring concentrations of specific drugs and metabolites. Ultimately, the tool could lead to an individualized approach and become a prescription support tool if integrated into prescription assistance software. ", doi="10.2196/57705", url="https://medinform.jmir.org/2024/1/e57705" } @Article{info:doi/10.2196/59396, author="Oh, Soyeon Sarah and Kang, Bada and Hong, Dahye and Kim, Ivy Jennifer and Jeong, Hyewon and Song, Jinyeop and Jeon, Minkyu", title="A Multivariable Prediction Model for Mild Cognitive Impairment and Dementia: Algorithm Development and Validation", journal="JMIR Med Inform", year="2024", month="Nov", day="22", volume="12", pages="e59396", keywords="mild cognitive impairment", keywords="machine learning algorithms", keywords="sociodemographic factors", keywords="gerontology", keywords="geriatrics", keywords="older people", keywords="aging", keywords="MCI", keywords="dementia", keywords="Alzheimer", keywords="cognitive", keywords="machine learning", keywords="prediction", keywords="algorithm", abstract="Background: Mild cognitive impairment (MCI) poses significant challenges in early diagnosis and timely intervention. Underdiagnosis, coupled with the economic and social burden of dementia, necessitates more precise detection methods. Machine learning (ML) algorithms show promise in managing complex data for MCI and dementia prediction. Objective: This study assessed the predictive accuracy of ML models in identifying the onset of MCI and dementia using the Korean Longitudinal Study of Aging (KLoSA) dataset. Methods: This study used data from the KLoSA, a comprehensive biennial survey that tracks the demographic, health, and socioeconomic aspects of middle-aged and older Korean adults from 2018 to 2020. Among the 6171 initial households, 4975 eligible older adult participants aged 60 years or older were selected after excluding individuals based on age and missing data. The identification of MCI and dementia relied on self-reported diagnoses, with sociodemographic and health-related variables serving as key covariates. The dataset was categorized into training and test sets to predict MCI and dementia by using multiple models, including logistic regression, light gradient-boosting machine, XGBoost (extreme gradient boosting), CatBoost, random forest, gradient boosting, AdaBoost, support vector classifier, and k-nearest neighbors, and the training and test sets were used to evaluate predictive performance. The performance was assessed using the area under the receiver operating characteristic curve (AUC). Class imbalances were addressed via weights. Shapley additive explanation values were used to determine the contribution of each feature to the prediction rate. Results: Among the 4975 participants, the best model for predicting MCI onset was random forest, with a median AUC of 0.6729 (IQR 0.3883-0.8152), followed by k-nearest neighbors with a median AUC of 0.5576 (IQR 0.4555-0.6761) and support vector classifier with a median AUC of 0.5067 (IQR 0.3755-0.6389). For dementia onset prediction, the best model was XGBoost, achieving a median AUC of 0.8185 (IQR 0.8085-0.8285), closely followed by light gradient-boosting machine with a median AUC of 0.8069 (IQR 0.7969-0.8169) and AdaBoost with a median AUC of 0.8007 (IQR 0.7907-0.8107). The Shapley values highlighted pain in everyday life, being widowed, living alone, exercising, and living with a partner as the strongest predictors of MCI. For dementia, the most predictive features were other contributing factors, education at the high school level, education at the middle school level, exercising, and monthly social engagement. Conclusions: ML algorithms, especially XGBoost, exhibited the potential for predicting MCI onset using KLoSA data. However, no model has demonstrated robust accuracy in predicting MCI and dementia. Sociodemographic and health-related factors are crucial for initiating cognitive conditions, emphasizing the need for multifaceted predictive models for early identification and intervention. These findings underscore the potential and limitations of ML in predicting cognitive impairment in community-dwelling older adults. ", doi="10.2196/59396", url="https://medinform.jmir.org/2024/1/e59396" } @Article{info:doi/10.2196/64806, author="C Areias, Anabela and G Moulder, Robert and Molinos, Maria and Janela, Dora and Bento, Virg{\'i}lio and Moreira, Carolina and Yanamadala, Vijay and P Cohen, Steven and Dias Correia, Fernando and Costa, Fab{\'i}ola", title="Predicting Pain Response to a Remote Musculoskeletal Care Program for Low Back Pain Management: Development of a Prediction Tool", journal="JMIR Med Inform", year="2024", month="Nov", day="19", volume="12", pages="e64806", keywords="telerehabilitation", keywords="predictive modeling", keywords="personalized medicine", keywords="rehabilitation", keywords="clinical decision support", keywords="machine learning", keywords="artificial intelligence", abstract="Background: Low back pain (LBP) presents with diverse manifestations, necessitating personalized treatment approaches that recognize various phenotypes within the same diagnosis, which could be achieved through precision medicine. Although prediction strategies have been explored, including those employing artificial intelligence (AI), they still lack scalability and real-time capabilities. Digital care programs (DCPs) facilitate seamless data collection through the Internet of Things and cloud storage, creating an ideal environment for developing and implementing an AI predictive tool to assist clinicians in dynamically optimizing treatment. Objective: This study aims to develop an AI tool that continuously assists physical therapists in predicting an individual's potential for achieving clinically significant pain relief by the end of the program. A secondary aim was to identify predictors of pain nonresponse to guide treatment adjustments. Methods: Data collected actively (eg, demographic and clinical information) and passively in real-time (eg, range of motion, exercise performance, and socioeconomic data from public data sources) from 6125 patients enrolled in a remote digital musculoskeletal intervention program were stored in the cloud. Two machine learning techniques, recurrent neural networks (RNNs) and light gradient boosting machine (LightGBM), continuously analyzed session updates up to session 7 to predict the likelihood of achieving significant pain relief at the program end. Model performance was assessed using the area under the receiver operating characteristic curve (ROC-AUC), precision-recall curves, specificity, and sensitivity. Model explainability was assessed using SHapley Additive exPlanations values. Results: At each session, the model provided a prediction about the potential of being a pain responder, with performance improving over time (P<.001). By session 7, the RNN achieved an ROC-AUC of 0.70 (95\% CI 0.65-0.71), and the LightGBM achieved an ROC-AUC of 0.71 (95\% CI 0.67-0.72). Both models demonstrated high specificity in scenarios prioritizing high precision. The key predictive features were pain-associated domains, exercise performance, motivation, and compliance, informing continuous treatment adjustments to maximize response rates. Conclusions: This study underscores the potential of an AI predictive tool within a DCP to enhance the management of LBP, supporting physical therapists in redirecting care pathways early and throughout the treatment course. This approach is particularly important for addressing the heterogeneous phenotypes observed in LBP. Trial Registration: ClinicalTrials.gov NCT04092946; https://clinicaltrials.gov/ct2/show/NCT04092946 and NCT05417685; https://clinicaltrials.gov/ct2/show/NCT05417685 ", doi="10.2196/64806", url="https://medinform.jmir.org/2024/1/e64806" } @Article{info:doi/10.2196/51865, author="Lange-Drenth, Lukas and Schulz, Holger and Suck, Isabell and Bleich, Christiane", title="Barriers, Facilitators, and Requirements for a Telerehabilitation Aftercare Program for Patients After Occupational Injuries: Semistructured Interviews With Key Stakeholders", journal="JMIR Form Res", year="2024", month="Nov", day="8", volume="8", pages="e51865", keywords="telerehabilitation", keywords="rehabilitation", keywords="eHealth development", keywords="value specification", keywords="stakeholder participation", keywords="occupational injuries", keywords="vocational rehabilitation", keywords="aftercare", keywords="mobile phone", abstract="Background: Patients with occupational injuries often receive multidisciplinary rehabilitation for a rapid return to work. Rehabilitation aftercare programs give patients the opportunity to help patients apply the progress they have made during the rehabilitation to their everyday activities. Telerehabilitation aftercare programs can help reduce barriers, such as lack of time due to other commitments, because they can be used regardless of time or location. Careful identification of barriers, facilitators, and design requirements with key stakeholders is a critical step in developing a telerehabilitation aftercare program. Objective: This study aims to identify barriers, facilitators, and design requirements for a future telerehabilitation aftercare program for patients with occupational injuries from the perspective of the key stakeholders. Methods: We used a literature review and expert recommendations to identify key stakeholders. We conducted semistructured interviews in person and via real-time video calls with 27 key stakeholders to collect data. Interviews were transcribed verbatim, and thematic analysis was applied. We selected key stakeholder statements about facilitators and barriers and categorized them as individual, technical, environmental, and organizational facilitators and barriers. We identified expressions that captured aspects that the telerehabilitation aftercare program should fulfill and clustered them into attributes and overarching values. We translated the attributes into one or more requirements and grouped them into content, functional, service, user experience, and work context requirements. Results: The key stakeholders identified can be grouped into the following categories: patients, health care professionals, administrative personnel, and members of the telerehabilitation program design and development team. The most frequently reported facilitators of a future telerehabilitation aftercare program were time savings for patients, high motivation of the patients to participate in telerehabilitation aftercare program, high usability of the program, and regular in-person therapy meetings during the telerehabilitation aftercare program. The most frequently reported barriers were low digital affinity and skills of the patients and personnel, patients' lack of trust and acceptance of the telerehabilitation aftercare program, slow internet speed, program functionality problems (eg, application crashes or freezes), and inability of telerehabilitation to deliver certain elements of in-person rehabilitation aftercare such as monitoring exercise performance. In our study, the most common design requirements were reducing barriers and implementing facilitators. The 2 most frequently discussed overarching values were tailoring of telerehabilitation, such as a tailored exercise plan and tailored injury-related information, and social interaction, such as real-time psychotherapy and digital and in-person rehabilitation aftercare in a blended care approach. Conclusions: Key stakeholders reported on facilitators, barriers, and design requirements that should be considered throughout the development process. Tailoring telerehabilitation content was the key value for stakeholders to ensure the program could meet the needs of patients with different types of occupational injuries. ", doi="10.2196/51865", url="https://formative.jmir.org/2024/1/e51865" } @Article{info:doi/10.2196/55614, author="Sullivan, Sean Patrick and Mera-Giler, M. Robertino and Bush, Staci and Shvachko, Valentina and Sarkodie, Eleanor and O'Farrell, Daniel and Dubose, Stephanie and Magnuson, David", title="Claims-Based Algorithm to Identify Pre-Exposure Prophylaxis Indications for Tenofovir Disoproxil Fumarate and Emtricitabine Prescriptions (2012-2014): Validation Study", journal="JMIR Form Res", year="2024", month="Nov", day="4", volume="8", pages="e55614", keywords="pre-exposure prophylaxis", keywords="PrEP", keywords="classification", keywords="electronic medical record", keywords="EMR", keywords="algorithm", keywords="electronic health record", keywords="EHR", keywords="drug", keywords="pharmacology", keywords="pharmacotherapy", keywords="pharmaceutical", keywords="medication", keywords="monotherapy", keywords="HIV", keywords="prevention", abstract="Background: To monitor the use of tenofovir disoproxil fumarate and emtricitabine (TDF/FTC) and related medicines for pre-exposure prophylaxis (PrEP) as HIV prevention using commercial pharmacy data, it is necessary to determine whether TDF/FTC prescriptions are used for PrEP or for some other clinical indication. Objective: This study aimed to validate an algorithm to distinguish the use of TDF/FTC for HIV prevention or infectious disease treatment. Methods: An algorithm was developed to identify whether TDF/FTC prescriptions were for PrEP or for other indications from large-scale administrative databases. The algorithm identifies TDF/FTC prescriptions and then excludes patients with International Classification of Diseases (ICD)--9 diagnostic codes, medications, or procedures that suggest indications other than for PrEP (eg, documentation of HIV infection, chronic hepatitis B, or use of TDF/FTC for postexposure prophylaxis). For evaluation, we collected data by clinician assessment of medical records for patients with TDF/FTC prescriptions and compared the assessed indication identified by the clinician review with the assessed indication identified by the algorithm. The algorithm was then applied and evaluated in a large, urban, community-based sexual health clinic. Results: The PrEP algorithm demonstrated high sensitivity and moderate specificity (99.6\% and 49.6\%) in the electronic medical record database and high sensitivity and specificity (99\% and 87\%) in data from the urban community health clinic. Conclusions: The PrEP algorithm classified the indication for PrEP in most patients treated with TDF/FTC with sufficient accuracy to be useful for surveillance purposes. The methods described can serve as a basis for developing a robust and evolving case definition for antiretroviral prescriptions for HIV prevention purposes. ", doi="10.2196/55614", url="https://formative.jmir.org/2024/1/e55614", url="http://www.ncbi.nlm.nih.gov/pubmed/39141024" } @Article{info:doi/10.2196/54246, author="Paiva, Bruno and Gon{\c{c}}alves, Andr{\'e} Marcos and da Rocha, Dutra Leonardo Chaves and Marcolino, Soriano Milena and Lana, Barbosa Fernanda Cristina and Souza-Silva, Rego Maira Viana and Almeida, M. Jussara and Pereira, Delfino Polianna and de Andrade, Valiense Claudio Mois{\'e}s and Gomes, Reis Ang{\'e}lica Gomides dos and Ferreira, Pires Maria Ang{\'e}lica and Bartolazzi, Frederico and Sacioto, Furtado Manuela and Boscato, Paula Ana and Guimar{\~a}es-J{\'u}nior, Henriques Milton and dos Reis, Pereira Priscilla and Costa, Roberto Fel{\'i}cio and Jorge, Oliveira Alzira de and Coelho, Reis Laryssa and Carneiro, Marcelo and Sales, Souza Tha{\'i}s Lorenna and Ara{\'u}jo, Ferreira Silvia and Silveira, Vit{\'o}rio Daniel and Ruschel, Brasil Karen and Santos, Veloso Fernanda Caldeira and Cenci, Almeida Evelin Paola de and Menezes, Monteiro Luanna Silva and Anschau, Fernando and Bicalho, Camargos Maria Aparecida and Manenti, Fernandes Euler Roberto and Finger, Goulart Renan and Ponce, Daniela and de Aguiar, Carrilho Filipe and Marques, Margoto Luiza and de Castro, C{\'e}sar Lu{\'i}s and Vietta, Gr{\"u}newald Giovanna and Godoy, de Mariana Frizzo and Vila{\c{c}}a, Nascimento Mariana do and Morais, Costa Vivian", title="A New Natural Language Processing--Inspired Methodology (Detection, Initial Characterization, and Semantic Characterization) to Investigate Temporal Shifts (Drifts) in Health Care Data: Quantitative Study", journal="JMIR Med Inform", year="2024", month="Oct", day="28", volume="12", pages="e54246", keywords="health care", keywords="machine learning", keywords="data drifts", keywords="temporal drifts", abstract="Background: Proper analysis and interpretation of health care data can significantly improve patient outcomes by enhancing services and revealing the impacts of new technologies and treatments. Understanding the substantial impact of temporal shifts in these data is crucial. For example, COVID-19 vaccination initially lowered the mean age of at-risk patients and later changed the characteristics of those who died. This highlights the importance of understanding these shifts for assessing factors that affect patient outcomes. Objective: This study aims to propose detection, initial characterization, and semantic characterization (DIS), a new methodology for analyzing?changes in health outcomes and variables over time while discovering contextual changes for outcomes in large volumes of data. Methods: The DIS methodology involves 3 steps: detection, initial characterization, and semantic characterization. Detection uses metrics such as Jensen-Shannon divergence to identify significant data drifts. Initial characterization offers a global analysis of changes in data distribution and predictive feature significance over time. Semantic characterization uses natural language processing--inspired techniques to understand the local context of these changes, helping identify factors driving changes in patient outcomes. By integrating the outcomes from these 3 steps, our results can identify specific factors (eg, interventions and modifications in health care practices) that drive changes in patient outcomes. DIS was applied to the Brazilian COVID-19 Registry and the Medical Information Mart for Intensive Care, version IV (MIMIC-IV) data sets. Results: Our approach allowed us to (1) identify drifts effectively, especially using metrics such as the Jensen-Shannon divergence, and (2) uncover reasons for the decline in overall mortality in both the COVID-19 and MIMIC-IV data sets, as well as changes in the cooccurrence between different diseases and this particular outcome. Factors such as vaccination during the COVID-19 pandemic and reduced iatrogenic events and cancer-related deaths in MIMIC-IV were highlighted. The methodology also pinpointed shifts in patient demographics and disease patterns, providing insights into the evolving health care landscape during the study period. Conclusions: We developed a novel methodology combining machine learning?and natural language processing techniques to detect, characterize, and understand temporal shifts in health care data. This understanding can enhance predictive algorithms, improve patient outcomes, and optimize health care resource allocation, ultimately?improving the effectiveness of machine learning predictive algorithms applied to health care data. Our methodology can be applied to a variety of scenarios beyond those discussed in this paper. ", doi="10.2196/54246", url="https://medinform.jmir.org/2024/1/e54246" } @Article{info:doi/10.2196/54839, author="Wernli, Boris and Verloo, Henk and von Gunten, Armin and Pereira, Filipa", title="Using Existing Clinical Data to Measure Older Adult Inpatients' Frailty at Admission and Discharge: Hospital Patient Register Study", journal="JMIR Aging", year="2024", month="Oct", day="28", volume="7", pages="e54839", keywords="frailty", keywords="frailty assessment", keywords="electronic patient records", keywords="functional independence measure", keywords="routinely collected data", keywords="hospital register", keywords="patient records", keywords="medical records", keywords="clinical data", keywords="older adults", keywords="cluster analysis", keywords="hierarchical clustering", abstract="Background: Frailty is a widespread geriatric syndrome among older adults, including hospitalized older inpatients. Some countries use electronic frailty measurement tools to identify frailty at the primary care level, but this method has rarely been investigated during hospitalization in acute care hospitals. An electronic frailty measurement instrument based on population-based hospital electronic health records could effectively detect frailty, frailty-related problems, and complications as well be a clinical alert. Identifying frailty among older adults using existing patient health data would greatly aid the management and support of frailty identification and could provide a valuable public health instrument without additional costs. Objective: We aim to explore a data-driven frailty measurement instrument for older adult inpatients using data routinely collected at hospital admission and discharge. Methods: A retrospective electronic patient register study included inpatients aged ?65 years admitted to and discharged from a public hospital between 2015 and 2017. A dataset of 53,690 hospitalizations was used to customize this data-driven frailty measurement instrument inspired by the Edmonton Frailty Scale developed by Rolfson et al. A 2-step hierarchical cluster procedure was applied to compute e-Frail-CH (Switzerland) scores at hospital admission and discharge. Prevalence, central tendency, comparative, and validation statistics were computed. Results: Mean patient age at admission was 78.4 (SD 7.9) years, with more women admitted (28,018/53,690, 52.18\%) than men (25,672/53,690, 47.81\%). Our 2-step hierarchical clustering approach computed 46,743 inputs of hospital admissions and 47,361 for discharges. Clustering solutions scored from 0.5 to 0.8 on a scale from 0 to 1. Patients considered frail comprised 42.02\% (n=19,643) of admissions and 48.23\% (n=22,845) of discharges. Within e-Frail-CH's 0-12 range, a score ?6 indicated frailty. We found a statistically significant mean e-Frail-CH score change between hospital admission (5.3, SD 2.6) and discharge (5.75, SD 2.7; P<.001). Sensitivity and specificity cut point values were 0.82 and 0.88, respectively. The area under the receiver operating characteristic curve was 0.85. Comparing the e-Frail-CH instrument to the existing Functional Independence Measure (FIM) instrument, FIM scores indicating severe dependence equated to e-Frail-CH scores of ?9, with a sensitivity and specificity of 0.97 and 0.88, respectively. The area under the receiver operating characteristic curve was 0.92. There was a strong negative association between e-Frail-CH scores at hospital discharge and FIM scores (rs=--0.844; P<.001). Conclusions: An electronic frailty measurement instrument was constructed and validated using patient data routinely collected during hospitalization, especially at admission and discharge. The mean e-Frail-CH score was higher at discharge than at admission. The routine calculation of e-Frail-CH scores during hospitalization could provide very useful clinical alerts on the health trajectories of older adults and help select interventions for preventing or mitigating frailty. ", doi="10.2196/54839", url="https://aging.jmir.org/2024/1/e54839" } @Article{info:doi/10.2196/54710, author="Alam, Ashraful Md and Sajib, Zaman Md Refat Uz and Rahman, Fariya and Ether, Saraban and Hanson, Molly and Sayeed, Abu and Akter, Ema and Nusrat, Nowrin and Islam, Tahrin Tanjeena and Raza, Sahar and Tanvir, M. K. and Chisti, Jobayer Mohammod and Rahman, Sadeq-ur Qazi and Hossain, Akm and Layek, MA and Zaman, Asaduz and Rana, Juwel and Rahman, Moshfiqur Syed and Arifeen, El Shams and Rahman, Ehsanur Ahmed and Ahmed, Anisuddin", title="Implications of Big Data Analytics, AI, Machine Learning, and Deep Learning in the Health Care System of Bangladesh: Scoping Review", journal="J Med Internet Res", year="2024", month="Oct", day="28", volume="26", pages="e54710", keywords="machine learning", keywords="deep learning", keywords="artificial intelligence", keywords="big data analytics", keywords="public health", keywords="health care", keywords="mobile phone", keywords="Bangladesh", abstract="Background: The rapid advancement of digital technologies, particularly in big data analytics (BDA), artificial intelligence (AI), machine learning (ML), and deep learning (DL), is reshaping the global health care system, including in Bangladesh. The increased adoption of these technologies in health care delivery within Bangladesh has sparked their integration into health care and public health research, resulting in a noticeable surge in related studies. However, a critical gap exists, as there is a lack of comprehensive evidence regarding the research landscape; regulatory challenges; use cases; and the application and adoption of BDA, AI, ML, and DL in the health care system of Bangladesh. This gap impedes the attainment of optimal results. As Bangladesh is a leading implementer of digital technologies, bridging this gap is urgent for the effective use of these advancing technologies. Objective: This scoping review aims to collate (1) the existing research in Bangladesh's health care system, using the aforementioned technologies and synthesizing their findings, and (2) the limitations faced by researchers in integrating the aforementioned technologies into health care research. Methods: MEDLINE (via PubMed), IEEE Xplore, Scopus, and Embase databases were searched to identify published research articles between January 1, 2000, and September 10, 2023, meeting the following inclusion criteria: (1) any study using any of the BDA, AI, ML, and DL technologies and health care and public health datasets for predicting health issues and forecasting any kind of outbreak; (2) studies primarily focusing on health care and public health issues in Bangladesh; and (3) original research articles published in peer-reviewed journals and conference proceedings written in English. Results: With the initial search, we identified 1653 studies. Following the inclusion and exclusion criteria and full-text review, 4.66\% (77/1653) of the articles were finally included in this review. There was a substantial increase in studies over the last 5 years (2017-2023). Among the 77 studies, the majority (n=65, 84\%) used ML models. A smaller proportion of studies incorporated AI (4/77, 5\%), DL (7/77, 9\%), and BDA (1/77, 1\%) technologies. Among the reviewed articles, 52\% (40/77) relied on primary data, while the remaining 48\% (37/77) used secondary data. The primary research areas of focus were infectious diseases (15/77, 19\%), noncommunicable diseases (23/77, 30\%), child health (11/77, 14\%), and mental health (9/77, 12\%). Conclusions: This scoping review highlights remarkable progress in leveraging BDA, AI, ML, and DL within Bangladesh's health care system. The observed surge in studies over the last 5 years underscores the increasing significance of AI and related technologies in health care research. Notably, most (65/77, 84\%) studies focused on ML models, unveiling opportunities for advancements in predictive modeling. This review encapsulates the current state of technological integration and propels us into a promising era for the future of digital Bangladesh. ", doi="10.2196/54710", url="https://www.jmir.org/2024/1/e54710", url="http://www.ncbi.nlm.nih.gov/pubmed/39466315" } @Article{info:doi/10.2196/56681, author="Accorsi, Duenhas Tarso Augusto and Eduardo, Aires Anderson and Baptista, Guilherme Carlos and Moreira, Tocci Flavio and Morbeck, Albaladejo Renata and K{\"o}hler, Francine Karen and Lima, Amicis Karine de and Pedrotti, Sartorato Carlos Henrique", title="The Impact of International Classification of Disease--Triggered Prescription Support on Telemedicine: Observational Analysis of Efficiency and Guideline Adherence", journal="JMIR Med Inform", year="2024", month="Oct", day="25", volume="12", pages="e56681", keywords="telemedicine", keywords="clinical decision support systems", keywords="electronic prescriptions", keywords="guideline adherence", keywords="consultation efficiency", keywords="International Classification of Disease--coded prescriptions", keywords="teleheath", keywords="eHealth", abstract="Background: Integrating decision support systems into telemedicine may optimize consultation efficiency and adherence to clinical guidelines; however, the extent of such effects remains underexplored. Objective: This study aims to evaluate the use of ICD (International Classification of Disease)-coded prescription decision support systems (PDSSs) and the effects of these systems on consultation duration and guideline adherence during telemedicine encounters. Methods: In this retrospective, single-center, observational study conducted from October 2021 to March 2022, adult patients who sought urgent digital care via direct-to-consumer video consultations were included. Physicians had access to current guidelines and could use an ICD-triggered PDSS (which was introduced in January 2022 after a preliminary test in the preceding month) for 26 guideline-based conditions. This study analyzed the impact of implementing automated prescription systems and compared these systems to manual prescription processes in terms of consultation duration and guideline adherence. Results: This study included 10,485 telemedicine encounters involving 9644 patients, with 12,346 prescriptions issued by 290 physicians. Automated prescriptions were used in 5022 (40.67\%) of the consultations following system integration. Before introducing decision support, 4497 (36.42\%) prescriptions were issued, which increased to 7849 (63.57\%) postimplementation. The physician's average consultation time decreased significantly to 9.5 (SD 5.5) minutes from 11.2 (SD 5.9) minutes after PDSS implementation (P<.001). Of the 12,346 prescriptions, 8683 (70.34\%) were aligned with disease-specific international guidelines tailored for telemedicine encounters. Primary medication adherence in accordance with existing guidelines was significantly greater in the decision support group than in the manual group (n=4697, 93.53\% vs n=1389, 49.14\%; P<.001). Conclusions: Most of the physicians adopted the PDSS, and the results demonstrated the use of the ICD-code system in reducing consultation times and increasing guideline adherence. These systems appear to be valuable for enhancing the efficiency and quality of telemedicine consultations by supporting evidence-based clinical decision-making. ", doi="10.2196/56681", url="https://medinform.jmir.org/2024/1/e56681", url="http://www.ncbi.nlm.nih.gov/pubmed/39453703" } @Article{info:doi/10.2196/57035, author="van Maurik, S. I. and Doodeman, J. H. and Veeger-Nuijens, W. B. and M{\"o}hringer, M. R. P. and Sudiono, R. D. and Jongbloed, W. and van Soelen, E.", title="Targeted Development and Validation of Clinical Prediction Models in Secondary Care Settings: Opportunities and Challenges for Electronic Health Record Data", journal="JMIR Med Inform", year="2024", month="Oct", day="24", volume="12", pages="e57035", keywords="clinical prediction model", keywords="electronic health record", keywords="targeted validation", keywords="EHR", keywords="EMR", keywords="prediction models", keywords="validation", keywords="CPM", keywords="secondary care", keywords="machine learning", keywords="artificial intelligence", keywords="AI", doi="10.2196/57035", url="https://medinform.jmir.org/2024/1/e57035" } @Article{info:doi/10.2196/57940, author="Ortiz-Barrios, Miguel and Cleland, Ian and Donnelly, Mark and Gul, Muhammet and Yucesan, Melih and Jim{\'e}nez-Delgado, Isabel Genett and Nugent, Chris and Madrid-Sierra, Stephany", title="Integrated Approach Using Intuitionistic Fuzzy Multicriteria Decision-Making to Support Classifier Selection for Technology Adoption in Patients with Parkinson Disease: Algorithm Development and Validation", journal="JMIR Rehabil Assist Technol", year="2024", month="Oct", day="22", volume="11", pages="e57940", keywords="Parkinson disease", keywords="technology adoption", keywords="intuitionistic fuzzy analytic hierarchy process", keywords="intuitionistic fuzzy decision-making trial and evaluation laboratory", keywords="combined compromise solution", abstract="Background: Parkinson disease (PD) is reported to be among the most prevalent neurodegenerative diseases globally, presenting ongoing challenges and increasing burden on health care systems. In an effort to support patients with PD, their carers, and the wider health care sector to manage this incurable condition, the focus has begun to shift away from traditional treatments. One of the most contemporary treatments includes prescribing assistive technologies (ATs), which are viewed as a way to promote independent living and deliver remote care. However, the uptake of these ATs is varied, with some users not ready or willing to accept all forms of AT and others only willing to adopt low-technology solutions. Consequently, to manage both the demands on resources and the efficiency with which ATs are deployed, new approaches are needed to automatically assess or predict a user's likelihood to accept and adopt a particular AT before it is prescribed. Classification algorithms can be used to automatically consider the range of factors impacting AT adoption likelihood, thereby potentially supporting more effective AT allocation. From a computational perspective, different classification algorithms and selection criteria offer various opportunities and challenges to address this need. Objective: This paper presents a novel hybrid multicriteria decision-making approach to support classifier selection in technology adoption processes involving patients with PD. Methods: First, the intuitionistic fuzzy analytic hierarchy process (IF-AHP) was implemented to calculate the relative priorities of criteria and subcriteria considering experts' knowledge and uncertainty. Second, the intuitionistic fuzzy decision-making trial and evaluation laboratory (IF-DEMATEL) was applied to evaluate the cause-effect relationships among criteria/subcriteria. Finally, the combined compromise solution (CoCoSo) was used to rank the candidate classifiers based on their capability to model the technology adoption. Results: We conducted a study involving a mobile smartphone solution to validate the proposed methodology. Structure (F5) was identified as the factor with the highest relative priority (overall weight=0.214), while adaptability (F4) (D-R=1.234) was found to be the most influencing aspect when selecting classifiers for technology adoption in patients with PD. In this case, the most appropriate algorithm for supporting technology adoption in patients with PD was the A3 - J48 decision tree (M3=2.5592). The results obtained by comparing the CoCoSo method in the proposed approach with 2 alternative methods (simple additive weighting and technique for order of preference by similarity to ideal solution) support the accuracy and applicability of the proposed methodology. It was observed that the final scores of the algorithms in each method were highly correlated (Pearson correlation coefficient >0.8). Conclusions: The IF-AHP-IF-DEMATEL-CoCoSo approach helped to identify classification algorithms that do not just discriminate between good and bad adopters of assistive technologies within the Parkinson population but also consider technology-specific features like design, quality, and compatibility that make these classifiers easily implementable by clinicians in the health care system. ", doi="10.2196/57940", url="https://rehab.jmir.org/2024/1/e57940" } @Article{info:doi/10.2196/59782, author="Liu, Shengyu and Wang, Anran and Xiu, Xiaolei and Zhong, Ming and Wu, Sizhu", title="Evaluating Medical Entity Recognition in Health Care: Entity Model Quantitative Study", journal="JMIR Med Inform", year="2024", month="Oct", day="17", volume="12", pages="e59782", keywords="natural language processing", keywords="NLP", keywords="model evaluation", keywords="macrofactors", keywords="medical named entity recognition models", abstract="Background: Named entity recognition (NER) models are essential for extracting structured information from unstructured medical texts by identifying entities such as diseases, treatments, and conditions, enhancing clinical decision-making and research. Innovations in machine learning, particularly those involving Bidirectional Encoder Representations From Transformers (BERT)--based deep learning and large language models, have significantly advanced NER capabilities. However, their performance varies across medical datasets due to the complexity and diversity of medical terminology. Previous studies have often focused on overall performance, neglecting specific challenges in medical contexts and the impact of macrofactors like lexical composition on prediction accuracy. These gaps hinder the development of optimized NER models for medical applications. Objective: This study aims to meticulously evaluate the performance of various NER models in the context of medical text analysis, focusing on how complex medical terminology affects entity recognition accuracy. Additionally, we explored the influence of macrofactors on model performance, seeking to provide insights for refining NER models and enhancing their reliability for medical applications. Methods: This study comprehensively evaluated 7 NER models---hidden Markov models, conditional random fields, BERT for Biomedical Text Mining, Big Transformer Models for Efficient Long-Sequence Attention, Decoding-enhanced BERT with Disentangled Attention, Robustly Optimized BERT Pretraining Approach, and Gemma---across 3 medical datasets: Revised Joint Workshop on Natural Language Processing in Biomedicine and its Applications (JNLPBA), BioCreative V CDR, and Anatomical Entity Mention (AnatEM). The evaluation focused on prediction accuracy, resource use (eg, central processing unit and graphics processing unit use), and the impact of fine-tuning hyperparameters. The macrofactors affecting model performance were also screened using the multilevel factor elimination algorithm. Results: The fine-tuned BERT for Biomedical Text Mining, with balanced resource use, generally achieved the highest prediction accuracy across the Revised JNLPBA and AnatEM datasets, with microaverage (AVG\_MICRO) scores of 0.932 and 0.8494, respectively, highlighting its superior proficiency in identifying medical entities. Gemma, fine-tuned using the low-rank adaptation technique, achieved the highest accuracy on the BioCreative V CDR dataset with an AVG\_MICRO score of 0.9962 but exhibited variability across the other datasets (AVG\_MICRO scores of 0.9088 on the Revised JNLPBA and 0.8029 on AnatEM), indicating a need for further optimization. In addition, our analysis revealed that 2 macrofactors, entity phrase length and the number of entity words in each entity phrase, significantly influenced model performance. Conclusions: This study highlights the essential role of NER models in medical informatics, emphasizing the imperative for model optimization via precise data targeting and fine-tuning. The insights from this study will notably improve clinical decision-making and facilitate the creation of more sophisticated and effective medical NER models. ", doi="10.2196/59782", url="https://medinform.jmir.org/2024/1/e59782" } @Article{info:doi/10.2196/58463, author="Tan, Kuan Joshua and Quan, Le and Salim, Mohamed Nur Nasyitah and Tan, Hong Jen and Goh, Su-Yen and Thumboo, Julian and Bee, Mong Yong", title="Machine Learning--Based Prediction for High Health Care Utilizers by Using a Multi-Institutional Diabetes Registry: Model Training and Evaluation", journal="JMIR AI", year="2024", month="Oct", day="17", volume="3", pages="e58463", keywords="diabetes mellitus", keywords="type 2 diabetes", keywords="health care utilization", keywords="population health management", keywords="population health", keywords="machine learning", keywords="artificial intelligence", keywords="predictive model", keywords="predictive system", keywords="practical model", abstract="Background: The cost of health care in many countries is increasing rapidly. There is a growing interest in using machine learning for predicting high health care utilizers for population health initiatives. Previous studies have focused on individuals who contribute to the highest financial burden. However, this group is small and represents a limited opportunity for long-term cost reduction. Objective: We developed a collection of models that predict future health care utilization at various thresholds. Methods: We utilized data from a multi-institutional diabetes database from the year 2019 to develop binary classification models. These models predict health care utilization in the subsequent year across 6 different outcomes: patients having a length of stay of ?7, ?14, and ?30 days and emergency department attendance of ?3, ?5, and ?10 visits. To address class imbalance, random and synthetic minority oversampling techniques were employed. The models were then applied to unseen data from 2020 and 2021 to predict health care utilization in the following year. A portfolio of performance metrics, with priority on area under the receiver operating characteristic curve, sensitivity, and positive predictive value, was used for comparison. Explainability analyses were conducted on the best performing models. Results: When trained with random oversampling, 4 models, that is, logistic regression, multivariate adaptive regression splines, boosted trees, and multilayer perceptron consistently achieved high area under the receiver operating characteristic curve (>0.80) and sensitivity (>0.60) across training-validation and test data sets. Correcting for class imbalance proved critical for model performance. Important predictors for all outcomes included age, number of emergency department visits in the present year, chronic kidney disease stage, inpatient bed days in the present year, and mean hemoglobin A1c levels. Explainability analyses using partial dependence plots demonstrated that for the best performing models, the learned patterns were consistent with real-world knowledge, thereby supporting the validity of the models. Conclusions: We successfully developed machine learning models capable of predicting high service level utilization with strong performance and valid explainability. These models can be integrated into wider diabetes-related population health initiatives. ", doi="10.2196/58463", url="https://ai.jmir.org/2024/1/e58463", url="http://www.ncbi.nlm.nih.gov/pubmed/39418089" } @Article{info:doi/10.2196/44494, author="Liu, Siqi and Xu, Qianyi and Xu, Zhuoyang and Liu, Zhuo and Sun, Xingzhi and Xie, Guotong and Feng, Mengling and See, Choong Kay", title="Reinforcement Learning to Optimize Ventilator Settings for Patients on Invasive Mechanical Ventilation: Retrospective Study", journal="J Med Internet Res", year="2024", month="Oct", day="16", volume="26", pages="e44494", keywords="mechanical ventilation", keywords="reinforcement learning", keywords="artificial intelligence", keywords="validation study", keywords="critical care", keywords="treatment", keywords="intensive care unit", keywords="critically ill", keywords="patient", keywords="monitoring", keywords="database", keywords="mortality rate", keywords="decision support", keywords="support tool", keywords="survival", keywords="prognosis", keywords="respiratory support", abstract="Background: One of the significant changes in intensive care medicine over the past 2 decades is the acknowledgment that improper mechanical ventilation settings substantially contribute to pulmonary injury in critically ill patients. Artificial intelligence (AI) solutions can optimize mechanical ventilation settings in intensive care units (ICUs) and improve patient outcomes. Specifically, machine learning algorithms can be trained on large datasets of patient information and mechanical ventilation settings. These algorithms can then predict patient responses to different ventilation strategies and suggest personalized ventilation settings for individual patients. Objective: In this study, we aimed to design and evaluate an AI solution that could tailor an optimal ventilator strategy for each critically ill patient who requires mechanical ventilation. Methods: We proposed a reinforcement learning--based AI solution using observational data from multiple ICUs in the United States. The primary outcome was hospital mortality. Secondary outcomes were the proportion of optimal oxygen saturation and the proportion of optimal mean arterial blood pressure. We trained our AI agent to recommend low, medium, and high levels of 3 ventilator settings---positive end-expiratory pressure, fraction of inspired oxygen, and ideal body weight--adjusted tidal volume---according to patients' health conditions. We defined a policy as rules guiding ventilator setting changes given specific clinical scenarios. Off-policy evaluation metrics were applied to evaluate the AI policy. Results: We studied 21,595 and 5105 patients' ICU stays from the e-Intensive Care Unit Collaborative Research (eICU) and Medical Information Mart for Intensive Care IV (MIMIC-IV) databases, respectively. Using the learned AI policy, we estimated the hospital mortality rate (eICU 12.1\%, SD 3.1\%; MIMIC-IV 29.1\%, SD 0.9\%), the proportion of optimal oxygen saturation (eICU 58.7\%, SD 4.7\%; MIMIC-IV 49\%, SD 1\%), and the proportion of optimal mean arterial blood pressure (eICU 31.1\%, SD 4.5\%; MIMIC-IV 41.2\%, SD 1\%). Based on multiple quantitative and qualitative evaluation metrics, our proposed AI solution outperformed observed clinical practice. Conclusions: Our study found that customizing ventilation settings for individual patients led to lower estimated hospital mortality rates compared to actual rates. This highlights the potential effectiveness of using reinforcement learning methodology to develop AI models that analyze complex clinical data for optimizing treatment parameters. Additionally, our findings suggest the integration of this model into a clinical decision support system for refining ventilation settings, supporting the need for prospective validation trials. ", doi="10.2196/44494", url="https://www.jmir.org/2024/1/e44494", url="http://www.ncbi.nlm.nih.gov/pubmed/39219230" } @Article{info:doi/10.2196/60601, author="Yang, Rui and Zeng, Qingcheng and You, Keen and Qiao, Yujie and Huang, Lucas and Hsieh, Chia-Chun and Rosand, Benjamin and Goldwasser, Jeremy and Dave, Amisha and Keenan, Tiarnan and Ke, Yuhe and Hong, Chuan and Liu, Nan and Chew, Emily and Radev, Dragomir and Lu, Zhiyong and Xu, Hua and Chen, Qingyu and Li, Irene", title="Ascle---A Python Natural Language Processing Toolkit for Medical Text Generation: Development and Evaluation Study", journal="J Med Internet Res", year="2024", month="Oct", day="3", volume="26", pages="e60601", keywords="natural language processing", keywords="machine learning", keywords="deep learning", keywords="generative artificial intelligence", keywords="large language models", keywords="retrieval-augmented generation", keywords="healthcare", abstract="Background: Medical texts present significant domain-specific challenges, and manually curating these texts is a time-consuming and labor-intensive process. To address this, natural language processing (NLP) algorithms have been developed to automate text processing. In the biomedical field, various toolkits for text processing exist, which have greatly improved the efficiency of handling unstructured text. However, these existing toolkits tend to emphasize different perspectives, and none of them offer generation capabilities, leaving a significant gap in the current offerings. Objective: This study aims to describe the development and preliminary evaluation of Ascle. Ascle is tailored for biomedical researchers and clinical staff with an easy-to-use, all-in-one solution that requires minimal programming expertise. For the first time, Ascle provides 4 advanced and challenging generative functions: question-answering, text summarization, text simplification, and machine translation. In addition, Ascle integrates 12 essential NLP functions, along with query and search capabilities for clinical databases. Methods: We fine-tuned 32 domain-specific language models and evaluated them thoroughly on 27 established benchmarks. In addition, for the question-answering task, we developed a retrieval-augmented generation (RAG) framework for large language models that incorporated a medical knowledge graph with ranking techniques to enhance the reliability of generated answers. Additionally, we conducted a physician validation to assess the quality of generated content beyond automated metrics. Results: The fine-tuned models and RAG framework consistently enhanced text generation tasks. For example, the fine-tuned models improved the machine translation task by 20.27 in terms of BLEU score. In the question-answering task, the RAG framework raised the ROUGE-L score by 18\% over the vanilla models. Physician validation of generated answers showed high scores for readability (4.95/5) and relevancy (4.43/5), with a lower score for accuracy (3.90/5) and completeness (3.31/5). Conclusions: This study introduces the development and evaluation of Ascle, a user-friendly NLP toolkit designed for medical text generation. All code is publicly available through the Ascle GitHub repository. All fine-tuned language models can be accessed through Hugging Face. ", doi="10.2196/60601", url="https://www.jmir.org/2024/1/e60601" } @Article{info:doi/10.2196/58978, author="Uhl, Laura and Augusto, Vincent and Dalmas, Benjamin and Alexandre, Youenn and Bercelli, Paolo and Jardinaud, Fanny and Aloui, Saber", title="Evaluating the Bias in Hospital Data: Automatic Preprocessing of Patient Pathways Algorithm Development and Validation Study", journal="JMIR Med Inform", year="2024", month="Sep", day="23", volume="12", pages="e58978", keywords="preprocessing", keywords="framework", keywords="health care data", keywords="patient pathway", keywords="bed management", abstract="Background: The optimization of patient care pathways is crucial for hospital managers in the context of a scarcity of medical resources. Assuming unlimited capacities, the pathway of a patient would only be governed by pure medical logic to meet at best the patient's needs. However, logistical limitations (eg, resources such as inpatient beds) are often associated with delayed treatments and may ultimately affect patient pathways. This is especially true for unscheduled patients---when a patient in the emergency department needs to be admitted to another medical unit without disturbing the flow of planned hospitalizations. Objective: In this study, we proposed a new framework to automatically detect activities in patient pathways that may be unrelated to patients' needs but rather induced by logistical limitations. Methods: The scientific contribution lies in a method that transforms a database of historical pathways with bias into 2 databases: a labeled pathway database where each activity is labeled as relevant (related to a patient's needs) or irrelevant (induced by logistical limitations) and a corrected pathway database where each activity corresponds to the activity that would occur assuming unlimited resources. The labeling algorithm was assessed through medical expertise. In total, 2 case studies quantified the impact of our method of preprocessing health care data using process mining and discrete event simulation. Results: Focusing on unscheduled patient pathways, we collected data covering 12 months of activity at the Groupe Hospitalier Bretagne Sud in France. Our algorithm had 87\% accuracy and demonstrated its usefulness for preprocessing traces and obtaining a clean database. The 2 case studies showed the importance of our preprocessing step before any analysis. The process graphs of the processed data had, on average, 40\% (SD 10\%) fewer variants than the raw data. The simulation revealed that 30\% of the medical units had >1 bed difference in capacity between the processed and raw data. Conclusions: Patient pathway data reflect the actual activity of hospitals that is governed by medical requirements and logistical limitations. Before using these data, these limitations should be identified and corrected. We anticipate that our approach can be generalized to obtain unbiased analyses of patient pathways for other hospitals. ", doi="10.2196/58978", url="https://medinform.jmir.org/2024/1/e58978", url="http://www.ncbi.nlm.nih.gov/pubmed/39312289" } @Article{info:doi/10.2196/56729, author="Raff, Daniel and Stewart, Kurtis and Yang, Christie Michelle and Shang, Jessie and Cressman, Sonya and Tam, Roger and Wong, Jessica and Tammem{\"a}gi, C. Martin and Ho, Kendall", title="Improving Triage Accuracy in Prehospital Emergency Telemedicine: Scoping Review of Machine Learning--Enhanced Approaches", journal="Interact J Med Res", year="2024", month="Sep", day="11", volume="13", pages="e56729", keywords="telemedicine", keywords="machine learning", keywords="emergency medicine", keywords="artificial intelligence", keywords="chatbot", keywords="triage", keywords="scoping review", keywords="prehospital", abstract="Background: Prehospital telemedicine triage systems combined with machine learning (ML) methods have the potential to improve triage accuracy and safely redirect low-acuity patients from attending the emergency department. However, research in prehospital settings is limited but needed; emergency department overcrowding and adverse patient outcomes are increasingly common. Objective: In this scoping review, we sought to characterize the existing methods for ML-enhanced telemedicine emergency triage. In order to support future research, we aimed to delineate what data sources, predictors, labels, ML models, and performance metrics were used, and in which telemedicine triage systems these methods were applied. Methods: A scoping review was conducted, querying multiple databases (MEDLINE, PubMed, Scopus, and IEEE Xplore) through February 24, 2023, to identify potential ML-enhanced methods, and for those eligible, relevant study characteristics were extracted, including prehospital triage setting, types of predictors, ground truth labeling method, ML models used, and performance metrics. Inclusion criteria were restricted to the triage of emergency telemedicine services using ML methods on an undifferentiated (disease nonspecific) population. Only primary research studies in English were considered. Furthermore, only those studies using data collected remotely (as opposed to derived from physical assessments) were included. In order to limit bias, we exclusively included articles identified through our predefined search criteria and had 3 researchers (DR, JS, and KS) independently screen the resulting studies. We conducted a narrative synthesis of findings to establish a knowledge base in this domain and identify potential gaps to be addressed in forthcoming ML-enhanced methods. Results: A total of 165 unique records were screened for eligibility and 15 were included in the review. Most studies applied ML methods during emergency medical dispatch (7/15, 47\%) or used chatbot applications (5/15, 33\%). Patient demographics and health status variables were the most common predictors, with a notable absence of social variables. Frequently used ML models included support vector machines and tree-based methods. ML-enhanced models typically outperformed conventional triage algorithms, and we found a wide range of methods used to establish ground truth labels. Conclusions: This scoping review observed heterogeneity in dataset size, predictors, clinical setting (triage process), and reported performance metrics. Standard structured predictors, including age, sex, and comorbidities, across articles suggest the importance of these inputs; however, there was a notable absence of other potentially useful data, including medications, social variables, and health system exposure. Ground truth labeling practices should be reported in a standard fashion as the true model performance hinges on these labels. This review calls for future work to form a standardized framework, thereby supporting consistent reporting and performance comparisons across ML-enhanced prehospital triage systems. ", doi="10.2196/56729", url="https://www.i-jmr.org/2024/1/e56729" } @Article{info:doi/10.2196/57949, author="Zheng, Chengyi and Ackerson, Bradley and Qiu, Sijia and Sy, S. Lina and Daily, Vega Leticia I. and Song, Jeannie and Qian, Lei and Luo, Yi and Ku, H. Jennifer and Cheng, Yanjun and Wu, Jun and Tseng, Fu Hung", title="Natural Language Processing Versus Diagnosis Code--Based Methods for Postherpetic Neuralgia Identification: Algorithm Development and Validation", journal="JMIR Med Inform", year="2024", month="Sep", day="10", volume="12", pages="e57949", keywords="postherpetic neuralgia", keywords="herpes zoster", keywords="natural language processing", keywords="electronic health record", keywords="real-world data", keywords="artificial intelligence", keywords="development", keywords="validation", keywords="diagnosis", keywords="EHR", keywords="algorithm", keywords="EHR data", keywords="sensitivity", keywords="specificity", keywords="validation data", keywords="neuralgia", keywords="recombinant zoster vaccine", abstract="Background: Diagnosis codes and prescription data are used in algorithms to identify postherpetic neuralgia (PHN), a debilitating complication of herpes zoster (HZ). Because of the questionable accuracy of codes and prescription data, manual chart review is sometimes used to identify PHN in electronic health records (EHRs), which can be costly and time-consuming. Objective: This study aims to develop and validate a natural language processing (NLP) algorithm for automatically identifying PHN from unstructured EHR data and to compare its performance with that of code-based methods. Methods: This retrospective study used EHR data from Kaiser Permanente Southern California, a large integrated health care system that serves over 4.8 million members. The source population included members aged ?50 years who received an incident HZ diagnosis and accompanying antiviral prescription between 2018 and 2020 and had ?1 encounter within 90?180 days of the incident HZ diagnosis. The study team manually reviewed the EHR and identified PHN cases. For NLP development and validation, 500 and 800 random samples from the source population were selected, respectively. The sensitivity, specificity, positive predictive value (PPV), negative predictive value (NPV), F-score, and Matthews correlation coefficient (MCC) of NLP and the code-based methods were evaluated using chart-reviewed results as the reference standard. Results: The NLP algorithm identified PHN cases with a 90.9\% sensitivity, 98.5\% specificity, 82\% PPV, and 99.3\% NPV. The composite scores of the NLP algorithm were 0.89 (F-score) and 0.85 (MCC). The prevalences of PHN in the validation data were 6.9\% (reference standard), 7.6\% (NLP), and 5.4\%?13.1\% (code-based). The code-based methods achieved a 52.7\%?61.8\% sensitivity, 89.8\%?98.4\% specificity, 27.6\%?72.1\% PPV, and 96.3\%?97.1\% NPV. The F-scores and MCCs ranged between 0.45 and 0.59 and between 0.32 and 0.61, respectively. Conclusions: The automated NLP-based approach identified PHN cases from the EHR with good accuracy. This method could be useful in population-based PHN research. ", doi="10.2196/57949", url="https://medinform.jmir.org/2024/1/e57949" } @Article{info:doi/10.2196/55613, author="Lopes, Henrique and Baptista-Leite, Ricardo and Hermenegildo, Catarina and Atun, Rifat", title="Digital Gamification Tool (Let's Control Flu) to Increase Vaccination Coverage Rates: Proposal for Algorithm Development", journal="JMIR Res Protoc", year="2024", month="Sep", day="10", volume="13", pages="e55613", keywords="influenza", keywords="gamification", keywords="public health policies", keywords="vaccination coverage rates", keywords="health promotion", abstract="Background: Influenza represents a critical public health challenge, disproportionately affecting at-risk populations, including older adults and those with chronic conditions, often compounded by socioeconomic factors. Innovative strategies, such as gamification, are essential for augmenting risk communication and community engagement efforts to address this threat. Objective: This study aims to introduce the ``Let's Control Flu'' (LCF) tool, a gamified, interactive platform aimed at simulating the impact of various public health policies (PHPs) on influenza vaccination coverage rates and health outcomes. The tool aligns with the World Health Organization's goal of achieving a 75\% influenza vaccination rate by 2030, facilitating strategic decision-making to enhance vaccination uptake. Methods: The LCF tool integrates a selection of 13 PHPs from an initial set proposed in another study, targeting specific population groups to evaluate 7 key health outcomes. A prioritization mechanism accounts for societal resistance and the synergistic effects of PHPs, projecting the potential policy impacts from 2022 to 2031. This methodology enables users to assess how PHPs could influence public health strategies within distinct target groups. Results: The LCF project began in February 2021 and is scheduled to end in December 2024. The model creation phase and its application to the pilot country, Sweden, took place between May 2021 and May 2023, with subsequent application to other European countries. The pilot phase demonstrated the tool's potential, indicating a promising increase in the national influenza vaccination coverage rate, with uniform improvements across all targeted demographic groups. These initial findings highlight the tool's capacity to model the effects of PHPs on improving vaccination rates and mitigating the health impact of influenza. Conclusions: By incorporating gamification into the analysis of PHPs, the LCF tool offers an innovative and accessible approach to supporting health decision makers and patient advocacy groups. It enhances the comprehension of policy impacts, promoting more effective influenza prevention and control strategies. This paper underscores the critical need for adaptable and engaging tools in PHP planning and implementation. International Registered Report Identifier (IRRID): RR1-10.2196/55613 ", doi="10.2196/55613", url="https://www.researchprotocols.org/2024/1/e55613", url="http://www.ncbi.nlm.nih.gov/pubmed/39255031" } @Article{info:doi/10.2196/54638, author="Liu, Jingkun and Tai, Jiaojiao and Han, Junying and Zhang, Meng and Li, Yang and Yang, Hongjuan and Yan, Ziqiang", title="Constructing a Hospital Department Development--Level Assessment Model: Machine Learning and Expert Consultation Approach in Complex Hospital Data Environments", journal="JMIR Form Res", year="2024", month="Sep", day="4", volume="8", pages="e54638", keywords="machine algorithms", keywords="hospital management", keywords="model construction", keywords="support vector machine", keywords="clustering", abstract="Background: Every hospital manager aims to build harmonious, mutually beneficial, and steady-state departments. Therefore, it is important to explore a hospital department development assessment model based on objective hospital data. Objective: This study aims to use a novel machine learning algorithm to identify key evaluation indexes for hospital departments, offering insights for strategic planning and resource allocation in hospital management. Methods: Data related to the development of a hospital department over the past 3 years were extracted from various hospital information systems. The resulting data set was mined using neural machine algorithms to assess the possible role of hospital departments in the development of a hospital. A questionnaire was used to consult senior experts familiar with the hospital to assess the actual work in each hospital department and the impact of each department's development on overall hospital discipline. We used the results from this questionnaire to verify the accuracy of the departmental risk scores calculated by the machine learning algorithm. Results: Deep machine learning was performed and modeled on the hospital system training data set. The model successfully leveraged the hospital's training data set to learn, predict, and evaluate the working and development of hospital departments. A comparison of the questionnaire results with the risk ranking set from the departments machine learning algorithm using the cosine similarity algorithm and Pearson correlation analysis showed a good match. This indicates that the department development assessment model and risk score based on the objective data of hospital systems are relatively accurate and objective. Conclusions: This study demonstrated that our machine learning algorithm provides an accurate and objective assessment model for hospital department development. The strong alignment of the model's risk assessments with expert opinions, validated through statistical analysis, highlights its reliability and potential to guide strategic hospital management decisions. ", doi="10.2196/54638", url="https://formative.jmir.org/2024/1/e54638", url="http://www.ncbi.nlm.nih.gov/pubmed/39230941" } @Article{info:doi/10.2196/62866, author="Zhou, Huan and Fang, Cheng and Pan, Yifeng", title="Development of a System for Predicting Hospitalization Time for Patients With Traumatic Brain Injury Based on Machine Learning Algorithms: User-Centered Design Case Study", journal="JMIR Hum Factors", year="2024", month="Aug", day="30", volume="11", pages="e62866", keywords="machine learning", keywords="traumatic brain injury", keywords="support vector regression machine", keywords="predictive model", keywords="hospitalization", abstract="Background: Currently, the treatment and care of patients with traumatic brain injury (TBI) are intractable health problems worldwide and greatly increase the medical burden in society. However, machine learning--based algorithms and the use of a large amount of data accumulated in the clinic in the past can predict the hospitalization time of patients with brain injury in advance, so as to design a reasonable arrangement of resources and effectively reduce the medical burden of society. Especially in China, where medical resources are so tight, this method has important application value. Objective: We aimed to develop a system based on a machine learning model for predicting the length of hospitalization of patients with TBI, which is available to patients, nurses, and physicians. Methods: We collected information on 1128 patients who received treatment at the Neurosurgery Center of the Second Affiliated Hospital of Anhui Medical University from May 2017 to May 2022, and we trained and tested the machine learning model using 5 cross-validations to avoid overfitting; 28 types of independent variables were used as input variables in the machine learning model, and the length of hospitalization was used as the output variables. Once the models were trained, we obtained the error and goodness of fit (R2) of each machine learning model from the 5 rounds of cross-validation and compared them to select the best predictive model to be encapsulated in the developed system. In addition, we externally tested the models using clinical data related to patients treated at the First Affiliated Hospital of Anhui Medical University from June 2021 to February 2022. Results: Six machine learning models were built, including support vector regression machine, convolutional neural network, back propagation neural network, random forest, logistic regression, and multilayer perceptron. Among them, the support vector regression has the smallest error of 10.22\% on the test set, the highest goodness of fit of 90.4\%, and all performances are the best among the 6 models. In addition, we used external datasets to verify the experimental results of these 6 models in order to avoid experimental chance, and the support vector regression machine eventually performed the best in the external datasets. Therefore, we chose to encapsulate the support vector regression machine into our system for predicting the length of stay of patients with traumatic brain trauma. Finally, we made the developed system available to patients, nurses, and physicians, and the satisfaction questionnaire showed that patients, nurses, and physicians agreed that the system was effective in providing clinical decisions to help patients, nurses, and physicians. Conclusions: This study shows that the support vector regression machine model developed using machine learning methods can accurately predict the length of hospitalization of patients with TBI, and the developed prediction system has strong clinical use. ", doi="10.2196/62866", url="https://humanfactors.jmir.org/2024/1/e62866" } @Article{info:doi/10.2196/56734, author="Sood, Dua Priyanka and Liu, Star and Lehmann, Harold and Kharrazi, Hadi", title="Assessing the Effect of Electronic Health Record Data Quality on Identifying Patients With Type 2 Diabetes: Cross-Sectional Study", journal="JMIR Med Inform", year="2024", month="Aug", day="27", volume="12", pages="e56734", keywords="electronic health record", keywords="EHR", keywords="EHRs", keywords="record", keywords="records", keywords="computable", keywords="phenotyping", keywords="phenotype", keywords="phenotypes", keywords="computable phenotypes", keywords="data quality", keywords="data science", keywords="chronic", keywords="identify", keywords="identification", keywords="data types---diagnosis data, medication data, laboratory data", keywords="type-2 diabetes", keywords="diabetes", keywords="diabetic", keywords="DM", keywords="type 2", keywords="hospital system", keywords="clinical research and trial", keywords="diagnosis", keywords="diagnoses", keywords="diagnose", keywords="diagnostic", keywords="diagnostics", keywords="phenotypic", abstract="Background: Increasing and substantial reliance on electronic health records (EHRs) and data types (ie, diagnosis, medication, and laboratory data) demands assessment of their data quality as a fundamental approach, especially since there is a need to identify appropriate denominator populations with chronic conditions, such as type 2 diabetes (T2D), using commonly available computable phenotype definitions (ie, phenotypes). Objective: To bridge this gap, our study aims to assess how issues of EHR data quality and variations and robustness (or lack thereof) in phenotypes may have potential impacts in identifying denominator populations. Methods: Approximately 208,000 patients with T2D were included in our study, which used retrospective EHR data from the Johns Hopkins Medical Institution (JHMI) during 2017?2019. Our assessment included 4 published phenotypes and 1 definition from a panel of experts at Hopkins. We conducted descriptive analyses of demographics (ie, age, sex, race, and ethnicity), use of health care (inpatient and emergency room visits), and the average Charlson Comorbidity Index score of each phenotype. We then used different methods to induce or simulate data quality issues of completeness, accuracy, and timeliness separately across each phenotype. For induced data incompleteness, our model randomly dropped diagnosis, medication, and laboratory codes independently at increments of 10\%; for induced data inaccuracy, our model randomly replaced a diagnosis or medication code with another code of the same data type and induced 2\% incremental change from ?100\% to +10\% in laboratory result values; and lastly, for timeliness, data were modeled for induced incremental shift of date records by 30 days to 365 days. Results: Less than a quarter (n=47,326, 23\%) of the population overlapped across all phenotypes using EHRs. The population identified by each phenotype varied across all combinations of data types. Induced incompleteness identified fewer patients with each increment; for example, at 100\% diagnostic incompleteness, the Chronic Conditions Data Warehouse phenotype identified zero patients, as its phenotypic characteristics included only diagnosis codes. Induced inaccuracy and timeliness similarly demonstrated variations in performance of each phenotype, therefore resulting in fewer patients being identified with each incremental change. Conclusions: We used EHR data with diagnosis, medication, and laboratory data types from a large tertiary hospital system to understand T2D phenotypic differences and performance. We used induced data quality methods to learn how data quality issues may impact identification of the denominator populations upon which clinical (eg, clinical research and trials, population health evaluations) and financial or operational decisions are made. The novel results from our study may inform future approaches to shaping a common T2D computable phenotype definition that can be applied to clinical informatics, managing chronic conditions, and additional industry-wide efforts in health care. ", doi="10.2196/56734", url="https://medinform.jmir.org/2024/1/e56734" } @Article{info:doi/10.2196/56042, author="Kar, Debasish and Taylor, S. Kathryn and Joy, Mark and Venkatesan, Sudhir and Meeraus, Wilhelmine and Taylor, Sylvia and Anand, N. Sneha and Ferreira, Filipa and Jamie, Gavin and Fan, Xuejuan and de Lusignan, Simon", title="Creating a Modified Version of the Cambridge Multimorbidity Score to Predict Mortality in People Older Than 16 Years: Model Development and Validation", journal="J Med Internet Res", year="2024", month="Aug", day="26", volume="26", pages="e56042", keywords="pandemics", keywords="COVID-19", keywords="multimorbidity", keywords="prevalence", keywords="predictive model", keywords="discrimination", keywords="calibration", keywords="systematized nomenclature of medicine", keywords="computerized medical records", keywords="systems", abstract="Background: No single multimorbidity measure is validated for use in NHS (National Health Service) England's General Practice Extraction Service Data for Pandemic Planning and Research (GDPPR), the nationwide primary care data set created for COVID-19 pandemic research. The Cambridge Multimorbidity Score (CMMS) is a validated tool for predicting mortality risk, with 37 conditions defined by Read Codes. The GDPPR uses the more internationally used Systematized Nomenclature of Medicine clinical terms (SNOMED CT). We previously developed a modified version of the CMMS using SNOMED CT, but the number of terms for the GDPPR data set is limited making it impossible to use this version. Objective: We aimed to develop and validate a modified version of CMMS using the clinical terms available for the GDPPR. Methods: We used pseudonymized data from the Oxford-Royal College of General Practitioners Research and Surveillance Centre (RSC), which has an extensive SNOMED CT list. From the 37 conditions in the original CMMS model, we selected conditions either with (1) high prevalence ratio (?85\%), calculated as the prevalence in the RSC data set but using the GDPPR set of SNOMED CT codes, divided by the prevalence included in the RSC SNOMED CT codes or (2) conditions with lower prevalence ratios but with high predictive value. The resulting set of conditions was included in Cox proportional hazard models to determine the 1-year mortality risk in a development data set (n=500,000) and construct a new CMMS model, following the methods for the original CMMS study, with variable reduction and parsimony, achieved by backward elimination and the Akaike information stopping criterion. Model validation involved obtaining 1-year mortality estimates for a synchronous data set (n=250,000) and 1-year and 5-year mortality estimates for an asynchronous data set (n=250,000). We compared the performance with that of the original CMMS and the modified CMMS that we previously developed using RSC data. Results: The initial model contained 22 conditions and our final model included 17 conditions. The conditions overlapped with those of the modified CMMS using the more extensive SNOMED CT list. For 1-year mortality, discrimination was high in both the derivation and validation data sets (Harrell C=0.92) and 5-year mortality was slightly lower (Harrell C=0.90). Calibration was reasonable following an adjustment for overfitting. The performance was similar to that of both the original and previous modified CMMS models. Conclusions: The new modified version of the CMMS can be used on the GDPPR, a nationwide primary care data set of 54 million people, to enable adjustment for multimorbidity in predicting mortality in people in real-world vaccine effectiveness, pandemic planning, and other research studies. It requires 17 variables to produce a comparable performance with our previous modification of CMMS to enable it to be used in routine data using SNOMED CT. ", doi="10.2196/56042", url="https://www.jmir.org/2024/1/e56042" } @Article{info:doi/10.2196/46936, author="Straw, Isabel and Rees, Geraint and Nachev, Parashkev", title="Sex-Based Performance Disparities in Machine Learning Algorithms for Cardiac Disease Prediction: Exploratory Study", journal="J Med Internet Res", year="2024", month="Aug", day="26", volume="26", pages="e46936", keywords="artificial intelligence", keywords="machine learning", keywords="cardiology", keywords="health care", keywords="health equity", keywords="medicine", keywords="cardiac", keywords="quantitative evaluation", keywords="inequality", keywords="cardiac disease", keywords="performance", keywords="sex", keywords="management", keywords="heart failure", abstract="Background: The presence of bias in artificial intelligence has garnered increased attention, with inequities in algorithmic performance being exposed across the fields of criminal justice, education, and welfare services. In health care, the inequitable performance of algorithms across demographic groups may widen health inequalities. Objective: Here, we identify and characterize bias in cardiology algorithms, looking specifically at algorithms used in the management of heart failure. Methods: Stage 1 involved a literature search of PubMed and Web of Science for key terms relating to cardiac machine learning (ML) algorithms. Papers that built ML models to predict cardiac disease were evaluated for their focus on demographic bias in model performance, and open-source data sets were retained for our investigation. Two open-source data sets were identified: (1) the University of California Irvine Heart Failure data set and (2) the University of California Irvine Coronary Artery Disease data set. We reproduced existing algorithms that have been reported for these data sets, tested them for sex biases in algorithm performance, and assessed a range of remediation techniques for their efficacy in reducing inequities. Particular attention was paid to the false negative rate (FNR), due to the clinical significance of underdiagnosis and missed opportunities for treatment. Results: In stage 1, our literature search returned 127 papers, with 60 meeting the criteria for a full review and only 3 papers highlighting sex differences in algorithm performance. In the papers that reported sex, there was a consistent underrepresentation of female patients in the data sets. No papers investigated racial or ethnic differences. In stage 2, we reproduced algorithms reported in the literature, achieving mean accuracies of 84.24\% (SD 3.51\%) for data set 1 and 85.72\% (SD 1.75\%) for data set 2 (random forest models). For data set 1, the FNR was significantly higher for female patients in 13 out of 16 experiments, meeting the threshold of statistical significance (--17.81\% to --3.37\%; P<.05). A smaller disparity in the false positive rate was significant for male patients in 13 out of 16 experiments (--0.48\% to +9.77\%; P<.05). We observed an overprediction of disease for male patients (higher false positive rate) and an underprediction of disease for female patients (higher FNR). Sex differences in feature importance suggest that feature selection needs to be demographically tailored. Conclusions: Our research exposes a significant gap in cardiac ML research, highlighting that the underperformance of algorithms for female patients has been overlooked in the published literature. Our study quantifies sex disparities in algorithmic performance and explores several sources of bias. We found an underrepresentation of female patients in the data sets used to train algorithms, identified sex biases in model error rates, and demonstrated that a series of remediation techniques were unable to address the inequities present. ", doi="10.2196/46936", url="https://www.jmir.org/2024/1/e46936" } @Article{info:doi/10.2196/56035, author="Ni, Jiali and Huang, Yong and Xiang, Qiangqiang and Zheng, Qi and Xu, Xiang and Qin, Zhiwen and Sheng, Guoping and Li, Lanjuan", title="Establishment and Evaluation of a Noninvasive Metabolism-Related Fatty Liver Screening and Dynamic Monitoring Model: Cross-Sectional Study", journal="Interact J Med Res", year="2024", month="Aug", day="22", volume="13", pages="e56035", keywords="metabolic-associated fatty liver disease", keywords="nonalcoholic fatty liver disease", keywords="nonalcoholic steatohepatitis", keywords="body fat mass", keywords="waist-height ratio", keywords="basal metabolic rate", keywords="liver", abstract="Background: Metabolically associated fatty liver disease (MAFLD) insidiously affects people's health, and many models have been proposed for the evaluation of liver fibrosis. However, there is still a lack of noninvasive and sensitive models to screen MAFLD in high-risk populations. Objective: The purpose of this study was to explore a new method for early screening of the public and establish a home-based tool for regular self-assessment and monitoring of MAFLD. Methods: In this cross-sectional study, there were 1758 eligible participants in the training set and 200 eligible participants in the testing set. Routine blood, blood biochemistry, and FibroScan tests were performed, and body composition was analyzed using a body composition instrument. Additionally, we recorded multiple factors including disease-related risk factors, the Forns index score, the hepatic steatosis index (HSI), the triglyceride glucose index, total body water (TBW), body fat mass (BFM), visceral fat area, waist-height ratio (WHtR), and basal metabolic rate. Binary logistic regression analysis was performed to explore the potential anthropometric indicators that have a predictive ability to screen for MAFLD. A new model, named the MAFLD Screening Index (MFSI), was established using binary logistic regression analysis, and BFM, WHtR, and TBW were included. A simple rating table, named the MAFLD Rating Table (MRT), was also established using these indicators. Results: The performance of the HSI (area under the curve [AUC]=0.873, specificity=76.8\%, sensitivity=81.4\%), WHtR (AUC=0.866, specificity=79.8\%, sensitivity=80.8\%), and BFM (AUC=0.842, specificity=76.9\%, sensitivity=76.2\%) in discriminating between the MAFLD group and non-fatty liver group was evaluated (P<.001). The AUC of the combined model including WHtR, HSI, and BFM values was 0.900 (specificity=81.8\%, sensitivity=85.6\%; P<.001). The MFSI was established based on better performance at screening MAFLD patients in the training set (AUC=0.896, specificity=83.8\%, sensitivity=82.1\%) and was confirmed in the testing set (AUC=0.917, specificity=89.8\%, sensitivity=84.4\%; P<.001). Conclusions: The novel MFSI model was built using WHtR, BFM, and TBW to screen for early MAFLD. These body parameters can be easily obtained using a body fat scale at home, and the mobile device software can record specific values and perform calculations. MFSI had better performance than other models for early MAFLD screening. The new model showed strong power and stability and shows promise in the area of MAFLD detection and self-assessment. The MRT was a practical tool to assess disease alterations in real time. ", doi="10.2196/56035", url="https://www.i-jmr.org/2024/1/e56035", url="http://www.ncbi.nlm.nih.gov/pubmed/39172506" } @Article{info:doi/10.2196/53714, author="Razavi, Moein and Ziyadidegan, Samira and Mahmoudzadeh, Ahmadreza and Kazeminasab, Saber and Baharlouei, Elaheh and Janfaza, Vahid and Jahromi, Reza and Sasangohar, Farzan", title="Machine Learning, Deep Learning, and Data Preprocessing Techniques for Detecting, Predicting, and Monitoring Stress and Stress-Related Mental Disorders: Scoping Review", journal="JMIR Ment Health", year="2024", month="Aug", day="21", volume="11", pages="e53714", keywords="machine learning", keywords="deep learning", keywords="data preprocessing", keywords="stress detection", keywords="stress prediction", keywords="stress monitoring", keywords="mental disorders", abstract="Background: Mental stress and its consequent mental health disorders (MDs) constitute a significant public health issue. With the advent of machine learning (ML), there is potential to harness computational techniques for better understanding and addressing mental stress and MDs. This comprehensive review seeks to elucidate the current ML methodologies used in this domain to pave the way for enhanced detection, prediction, and analysis of mental stress and its subsequent MDs. Objective: This review aims to investigate the scope of ML methodologies used in the detection, prediction, and analysis of mental stress and its consequent MDs. Methods: Using a rigorous scoping review process with PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) guidelines, this investigation delves into the latest ML algorithms, preprocessing techniques, and data types used in the context of stress and stress-related MDs. Results: A total of 98 peer-reviewed publications were examined for this review. The findings highlight that support vector machine, neural network, and random forest models consistently exhibited superior accuracy and robustness among all ML algorithms examined. Physiological parameters such as heart rate measurements and skin response are prevalently used as stress predictors due to their rich explanatory information concerning stress and stress-related MDs, as well as the relative ease of data acquisition. The application of dimensionality reduction techniques, including mappings, feature selection, filtering, and noise reduction, is frequently observed as a crucial step preceding the training of ML algorithms. Conclusions: The synthesis of this review identified significant research gaps and outlines future directions for the field. These encompass areas such as model interpretability, model personalization, the incorporation of naturalistic settings, and real-time processing capabilities for the detection and prediction of stress and stress-related MDs. ", doi="10.2196/53714", url="https://mental.jmir.org/2024/1/e53714" } @Article{info:doi/10.2196/52730, author="Mutnuri, Kumar Maruthi and Stelfox, Thomas Henry and Forkert, Daniel Nils and Lee, Joon", title="Using Domain Adaptation and Inductive Transfer Learning to Improve Patient Outcome Prediction in the Intensive Care Unit: Retrospective Observational Study", journal="J Med Internet Res", year="2024", month="Aug", day="21", volume="26", pages="e52730", keywords="transfer learning", keywords="patient outcome prediction", keywords="intensive care", keywords="deep learning", keywords="electronic health record", abstract="Background: Accurate patient outcome prediction in the intensive care unit (ICU) can potentially lead to more effective and efficient patient care. Deep learning models are capable of learning from data to accurately predict patient outcomes, but they typically require large amounts of data and computational resources. Transfer learning (TL) can help in scenarios where data and computational resources are scarce by leveraging pretrained models. While TL has been widely used in medical imaging and natural language processing, it has been rare in electronic health record (EHR) analysis. Furthermore, domain adaptation (DA) has been the most common TL method in general, whereas inductive transfer learning (ITL) has been rare. To the best of our knowledge, DA and ITL have never been studied in-depth in the context of EHR-based ICU patient outcome prediction. Objective: This study investigated DA, as well as rarely researched ITL, in EHR-based ICU patient outcome prediction under simulated, varying levels of data scarcity. Methods: Two patient cohorts were used in this study: (1) eCritical, a multicenter ICU data from 55,689 unique admission records from 48,672 unique patients admitted to 15 medical-surgical ICUs in Alberta, Canada, between March 2013 and December 2019, and (2) Medical Information Mart for Intensive Care III, a single-center, publicly available ICU data set from Boston, Massachusetts, acquired between 2001 and 2012 containing 61,532 admission records from 46,476 patients. We compared DA and ITL models with baseline models (without TL) of fully connected neural networks, logistic regression, and lasso regression in the prediction of 30-day mortality, acute kidney injury, ICU length of stay, and hospital length of stay. Random subsets of training data, ranging from 1\% to 75\%, as well as the full data set, were used to compare the performances of DA and ITL with the baseline models at various levels of data scarcity. Results: Overall, the ITL models outperformed the baseline models in 55 of 56 comparisons (all P values <.001). The DA models outperformed the baseline models in 45 of 56 comparisons (all P values <.001). ITL resulted in better performance than DA in terms of the number of times and the margin with which it outperformed the baseline models. In 11 of 16 cases (8 of 8 for ITL and 3 of 8 for DA), TL models outperformed baseline models when trained using 1\% data subset. Conclusions: TL-based ICU patient outcome prediction models are useful in data-scarce scenarios. The results of this study can be used to estimate ICU outcome prediction performance at different levels of data scarcity, with and without TL. The publicly available pretrained models from this study can serve as building blocks in further research for the development and validation of models in other ICU cohorts and outcomes. ", doi="10.2196/52730", url="https://www.jmir.org/2024/1/e52730" } @Article{info:doi/10.2196/57097, author="De Ram{\'o}n Fern{\'a}ndez, Alberto and Ruiz Fern{\'a}ndez, Daniel and Garc{\'i}a Ja{\'e}n, Miguel and Cortell-Tormo, M. Juan", title="Recognition of Daily Activities in Adults With Wearable Inertial Sensors: Deep Learning Methods Study", journal="JMIR Med Inform", year="2024", month="Aug", day="9", volume="12", pages="e57097", keywords="activities of daily living", keywords="ADL", keywords="ADLs", keywords="deep learning", keywords="deep learning models", keywords="wearable inertial sensors", keywords="clinical evaluation", keywords="patient's rehabilitation", keywords="rehabilitation", keywords="movement", keywords="accelerometers", keywords="accelerometer", keywords="accelerometry", keywords="wearable", keywords="wearables", keywords="sensor", keywords="sensors", keywords="gyroscopes", keywords="gyroscope", keywords="monitor", keywords="monitoring", abstract="Background: Activities of daily living (ADL) are essential for independence and personal well-being, reflecting an individual's functional status. Impairment in executing these tasks can limit autonomy and negatively affect quality of life. The assessment of physical function during ADL is crucial for the prevention and rehabilitation of movement limitations. Still, its traditional evaluation based on subjective observation has limitations in precision and objectivity. Objective: The primary objective of this study is to use innovative technology, specifically wearable inertial sensors combined with artificial intelligence techniques, to objectively and accurately evaluate human performance in ADL. It is proposed to overcome the limitations of traditional methods by implementing systems that allow dynamic and noninvasive monitoring of movements during daily activities. The approach seeks to provide an effective tool for the early detection of dysfunctions and the personalization of treatment and rehabilitation plans, thus promoting an improvement in the quality of life of individuals. Methods: To monitor movements, wearable inertial sensors were developed, which include accelerometers and triaxial gyroscopes. The developed sensors were used to create a proprietary database with 6 movements related to the shoulder and 3 related to the back. We registered 53,165 activity records in the database (consisting of accelerometer and gyroscope measurements), which were reduced to 52,600 after processing to remove null or abnormal values. Finally, 4 deep learning (DL) models were created by combining various processing layers to explore different approaches in ADL recognition. Results: The results revealed high performance of the 4 proposed models, with levels of accuracy, precision, recall, and F1-score ranging between 95\% and 97\% for all classes and an average loss of 0.10. These results indicate the great capacity of the models to accurately identify a variety of activities, with a good balance between precision and recall. Both the convolutional and bidirectional approaches achieved slightly superior results, although the bidirectional model reached convergence in a smaller number of epochs. Conclusions: The DL models implemented have demonstrated solid performance, indicating an effective ability to identify and classify various daily activities related to the shoulder and lumbar region. These results were achieved with minimal sensorization---being noninvasive and practically imperceptible to the user---which does not affect their daily routine and promotes acceptance and adherence to continuous monitoring, thus improving the reliability of the data collected. This research has the potential to have a significant impact on the clinical evaluation and rehabilitation of patients with movement limitations, by providing an objective and advanced tool to detect key movement patterns and joint dysfunctions. ", doi="10.2196/57097", url="https://medinform.jmir.org/2024/1/e57097", url="http://www.ncbi.nlm.nih.gov/pubmed/39121473" } @Article{info:doi/10.2196/50043, author="Pulantara, Wayan I. and Wang, Yuhan and Burke, E. Lora and Sereika, M. Susan and Bizhanova, Zhadyra and Kariuki, K. Jacob and Cheng, Jessica and Beatrice, Britney and Loar, India and Cedillo, Maribel and Conroy, B. Molly and Parmanto, Bambang", title="Data Collection and Management of mHealth, Wearables, and Internet of Things in Digital Behavioral Health Interventions With the Awesome Data Acquisition Method (ADAM): Development of a Novel Informatics Architecture", journal="JMIR Mhealth Uhealth", year="2024", month="Aug", day="7", volume="12", pages="e50043", keywords="integrated system", keywords="IoT integration", keywords="wearable", keywords="mHealth Fitbit", keywords="Nokia", keywords="clinical trial management", keywords="research study management", keywords="study tracking", keywords="remote assessment", keywords="tracking", keywords="Fitbit", keywords="wearable devices", keywords="device", keywords="management", keywords="data analysis", keywords="behavioral", keywords="data collection", keywords="Internet of Things", keywords="IoT", keywords="mHealth", keywords="mobile health", doi="10.2196/50043", url="https://mhealth.jmir.org/2024/1/e50043" } @Article{info:doi/10.2196/56924, author="Katzburg, Omer and Roimi, Michael and Frenkel, Amit and Ilan, Roy and Bitan, Yuval", title="The Impact of Information Relevancy and Interactivity on Intensivists' Trust in a Machine Learning--Based Bacteremia Prediction System: Simulation Study", journal="JMIR Hum Factors", year="2024", month="Aug", day="1", volume="11", pages="e56924", keywords="user-interface design", keywords="user-interface designs", keywords="user interface", keywords="human-automation interaction", keywords="human-automation interactions", keywords="trust in automation", keywords="automation", keywords="human-computer interaction", keywords="human-computer interactions", keywords="human-ML", keywords="human-ML interaction", keywords="human-ML interactions", keywords="decision making", keywords="decision support system", keywords="clinical decision support", keywords="decision support", keywords="decision support systems", keywords="machine learning", keywords="ML", keywords="artificial intelligence", keywords="AI", keywords="machine learning algorithm", keywords="machine learning algorithms", keywords="digitization", keywords="digitization of information", abstract="Background: The exponential growth in computing power and the increasing digitization of information have substantially advanced the machine learning (ML) research field. However, ML algorithms are often considered ``black boxes,'' and this fosters distrust. In medical domains, in which mistakes can result in fatal outcomes, practitioners may be especially reluctant to trust ML algorithms. Objective: The aim of this study is to explore the effect of user-interface design features on intensivists' trust in an ML-based clinical decision support system. Methods: A total of 47 physicians from critical care specialties were presented with 3 patient cases of bacteremia in the setting of an ML-based simulation system. Three conditions of the simulation were tested according to combinations of information relevancy and interactivity. Participants' trust in the system was assessed by their agreement with the system's prediction and a postexperiment questionnaire. Linear regression models were applied to measure the effects. Results: Participants' agreement with the system's prediction did not differ according to the experimental conditions. However, in the postexperiment questionnaire, higher information relevancy ratings and interactivity ratings were associated with higher perceived trust in the system (P<.001 for both). The explicit visual presentation of the features of the ML algorithm on the user interface resulted in lower trust among the participants (P=.05). Conclusions: Information relevancy and interactivity features should be considered in the design of the user interface of ML-based clinical decision support systems to enhance intensivists' trust. This study sheds light on the connection between information relevancy, interactivity, and trust in human-ML interaction, specifically in the intensive care unit environment. ", doi="10.2196/56924", url="https://humanfactors.jmir.org/2024/1/e56924" } @Article{info:doi/10.2196/52896, author="Ghasemi, Peyman and Lee, Joon", title="Unsupervised Feature Selection to Identify Important ICD-10 and ATC Codes for Machine Learning on a Cohort of Patients With Coronary Heart Disease: Retrospective Study", journal="JMIR Med Inform", year="2024", month="Jul", day="26", volume="12", pages="e52896", keywords="unsupervised feature selection", keywords="ICD-10", keywords="International Classification of Diseases", keywords="ATC", keywords="Anatomical Therapeutic Chemical", keywords="concrete autoencoder", keywords="Laplacian score", keywords="unsupervised feature selection for multicluster data", keywords="autoencoder-inspired unsupervised feature selection", keywords="principal feature analysis", keywords="machine learning", keywords="artificial intelligence", keywords="case study", keywords="coronary artery disease", keywords="artery disease", keywords="patient cohort", keywords="artery", keywords="mortality prediction", keywords="mortality", keywords="data set", keywords="interpretability", keywords="International Classification of Diseases, Tenth Revision", abstract="Background: The application of machine learning in health care often necessitates the use of hierarchical codes such as the International Classification of Diseases (ICD) and Anatomical Therapeutic Chemical (ATC) systems. These codes classify diseases and medications, respectively, thereby forming extensive data dimensions. Unsupervised feature selection tackles the ``curse of dimensionality'' and helps to improve the accuracy and performance of supervised learning models by reducing the number of irrelevant or redundant features and avoiding overfitting. Techniques for unsupervised feature selection, such as filter, wrapper, and embedded methods, are implemented to select the most important features with the most intrinsic information. However, they face challenges due to the sheer volume of ICD and ATC codes and the hierarchical structures of these systems. Objective: The objective of this study was to compare several unsupervised feature selection methods for ICD and ATC code databases of patients with coronary artery disease in different aspects of performance and complexity and select the best set of features representing these patients. Methods: We compared several unsupervised feature selection methods for 2 ICD and 1 ATC code databases of 51,506 patients with coronary artery disease in Alberta, Canada. Specifically, we used the Laplacian score, unsupervised feature selection for multicluster data, autoencoder-inspired unsupervised feature selection, principal feature analysis, and concrete autoencoders with and without ICD or ATC tree weight adjustment to select the 100 best features from over 9000 ICD and 2000 ATC codes. We assessed the selected features based on their ability to reconstruct the initial feature space and predict 90-day mortality following discharge. We also compared the complexity of the selected features by mean code level in the ICD or ATC tree and the interpretability of the features in the mortality prediction task using Shapley analysis. Results: In feature space reconstruction and mortality prediction, the concrete autoencoder--based methods outperformed other techniques. Particularly, a weight-adjusted concrete autoencoder variant demonstrated improved reconstruction accuracy and significant predictive performance enhancement, confirmed by DeLong and McNemar tests (P<.05). Concrete autoencoders preferred more general codes, and they consistently reconstructed all features accurately. Additionally, features selected by weight-adjusted concrete autoencoders yielded higher Shapley values in mortality prediction than most alternatives. Conclusions: This study scrutinized 5 feature selection methods in ICD and ATC code data sets in an unsupervised context. Our findings underscore the superiority of the concrete autoencoder method in selecting salient features that represent the entire data set, offering a potential asset for subsequent machine learning research. We also present a novel weight adjustment approach for the concrete autoencoders specifically tailored for ICD and ATC code data sets to enhance the generalizability and interpretability of the selected features. ", doi="10.2196/52896", url="https://medinform.jmir.org/2024/1/e52896" } @Article{info:doi/10.2196/49865, author="Bellmann, Louis and Wiederhold, Johannes Alexander and Tr{\"u}be, Leona and Twerenbold, Raphael and {\"U}ckert, Frank and Gottfried, Karl", title="Introducing Attribute Association Graphs to Facilitate Medical Data Exploration: Development and Evaluation Using Epidemiological Study Data", journal="JMIR Med Inform", year="2024", month="Jul", day="24", volume="12", pages="e49865", keywords="data exploration", keywords="cohort studies", keywords="data visualization", keywords="big data", keywords="statistical models", keywords="medical knowledge", keywords="data analysis", keywords="cardiovascular diseases", keywords="usability", abstract="Background: Interpretability and intuitive visualization facilitate medical knowledge generation through big data. In addition, robustness to high-dimensional and missing data is a requirement for statistical approaches in the medical domain. A method tailored to the needs of physicians must meet all the abovementioned criteria. Objective: This study aims to develop an accessible tool for visual data exploration without the need for programming knowledge, adjusting complex parameterizations, or handling missing data. We sought to use statistical analysis using the setting of disease and control cohorts familiar to clinical researchers. We aimed to guide the user by identifying and highlighting data patterns associated with disease and reveal relations between attributes within the data set. Methods: We introduce the attribute association graph, a novel graph structure designed for visual data exploration using robust statistical metrics. The nodes capture frequencies of participant attributes in disease and control cohorts as well as deviations between groups. The edges represent conditional relations between attributes. The graph is visualized using the Neo4j (Neo4j, Inc) data platform and can be interactively explored without the need for technical knowledge. Nodes with high deviations between cohorts and edges of noticeable conditional relationship are highlighted to guide the user during the exploration. The graph is accompanied by a dashboard visualizing variable distributions. For evaluation, we applied the graph and dashboard to the Hamburg City Health Study data set, a large cohort study conducted in the city of Hamburg, Germany. All data structures can be accessed freely by researchers, physicians, and patients. In addition, we developed a user test conducted with physicians incorporating the System Usability Scale, individual questions, and user tasks. Results: We evaluated the attribute association graph and dashboard through an exemplary data analysis of participants with a general cardiovascular disease in the Hamburg City Health Study data set. All results extracted from the graph structure and dashboard are in accordance with findings from the literature, except for unusually low cholesterol levels in participants with cardiovascular disease, which could be induced by medication. In addition, 95\% CIs of Pearson correlation coefficients were calculated for all associations identified during the data analysis, confirming the results. In addition, a user test with 10 physicians assessing the usability of the proposed methods was conducted. A System Usability Scale score of 70.5\% and average successful task completion of 81.4\% were reported. Conclusions: The proposed attribute association graph and dashboard enable intuitive visual data exploration. They are robust to high-dimensional as well as missing data and require no parameterization. The usability for clinicians was confirmed via a user test, and the validity of the statistical results was confirmed by associations known from literature and standard statistical inference. ", doi="10.2196/49865", url="https://medinform.jmir.org/2024/1/e49865" } @Article{info:doi/10.2196/49142, author="Lee, Hsin-Ying and Kuo, Po-Chih and Qian, Frank and Li, Chien-Hung and Hu, Jiun-Ruey and Hsu, Wan-Ting and Jhou, Hong-Jie and Chen, Po-Huang and Lee, Cho-Hao and Su, Chin-Hua and Liao, Po-Chun and Wu, I-Ju and Lee, Chien-Chang", title="Prediction of In-Hospital Cardiac Arrest in the Intensive Care Unit: Machine Learning--Based Multimodal Approach", journal="JMIR Med Inform", year="2024", month="Jul", day="23", volume="12", pages="e49142", keywords="cardiac arrest", keywords="machine learning", keywords="intensive care", keywords="mortality", keywords="medical emergency team", keywords="early warning scores", abstract="Background: Early identification of impending in-hospital cardiac arrest (IHCA) improves clinical outcomes but remains elusive for practicing clinicians. Objective: We aimed to develop a multimodal machine learning algorithm based on ensemble techniques to predict the occurrence of IHCA. Methods: Our model was developed by the Multiparameter Intelligent Monitoring of Intensive Care (MIMIC)--IV database and validated in the Electronic Intensive Care Unit Collaborative Research Database (eICU-CRD). Baseline features consisting of patient demographics, presenting illness, and comorbidities were collected to train a random forest model. Next, vital signs were extracted to train a long short-term memory model. A support vector machine algorithm then stacked the results to form the final prediction model. Results: Of 23,909 patients in the MIMIC-IV database and 10,049 patients in the eICU-CRD database, 452 and 85 patients, respectively, had IHCA. At 13 hours in advance of an IHCA event, our algorithm had already demonstrated an area under the receiver operating characteristic curve of 0.85 (95\% CI 0.815?0.885) in the MIMIC-IV database. External validation with the eICU-CRD and National Taiwan University Hospital databases also presented satisfactory results, showing area under the receiver operating characteristic curve values of 0.81 (95\% CI 0.763-0.851) and 0.945 (95\% CI 0.934-0.956), respectively. Conclusions: Using only vital signs and information available in the electronic medical record, our model demonstrates it is possible to detect a trajectory of clinical deterioration up to 13 hours in advance. This predictive tool, which has undergone external validation, could forewarn and help clinicians identify patients in need of assessment to improve their overall prognosis. ", doi="10.2196/49142", url="https://medinform.jmir.org/2024/1/e49142" } @Article{info:doi/10.2196/56893, author="Suh, Jungyo and Lee, Garam and Kim, Woo Jung and Shin, Junbum and Kim, Yi-Jun and Lee, Sang-Wook and Kim, Sulgi", title="Privacy-Preserving Prediction of Postoperative Mortality in Multi-Institutional Data: Development and Usability Study", journal="JMIR Med Inform", year="2024", month="Jul", day="5", volume="12", pages="e56893", keywords="machine learning", keywords="privacy", keywords="in-hospital mortality", keywords="homomorphic encryption", keywords="multi-institutional system", abstract="Background: To circumvent regulatory barriers that limit medical data exchange due to personal information security concerns, we use homomorphic encryption (HE) technology, enabling computation on encrypted data and enhancing privacy. Objective: This study explores whether using HE to integrate encrypted multi-institutional data enhances predictive power in research, focusing on the integration feasibility across institutions and determining the optimal size of hospital data sets for improved prediction models. Methods: We used data from 341,007 individuals aged 18 years and older who underwent noncardiac surgeries across 3 medical institutions. The study focused on predicting in-hospital mortality within 30 days postoperatively, using secure logistic regression based on HE as the prediction model. We compared the predictive performance of this model using plaintext data from a single institution against a model using encrypted data from multiple institutions. Results: The predictive model using encrypted data from all 3 institutions exhibited the best performance based on area under the receiver operating characteristic curve (0.941); the model combining Asan Medical Center (AMC) and Seoul National University Hospital (SNUH) data exhibited the best predictive performance based on area under the precision-recall curve (0.132). Both Ewha Womans University Medical Center and SNUH demonstrated improvement in predictive power for their own institutions upon their respective data's addition to the AMC data. Conclusions: Prediction models using multi-institutional data sets processed with HE outperformed those using single-institution data sets, especially when our model adaptation approach was applied, which was further validated on a smaller host hospital with a limited data set. ", doi="10.2196/56893", url="https://medinform.jmir.org/2024/1/e56893" } @Article{info:doi/10.2196/55834, author="Sato, Daisuke and Ikarashi, Koyuki and Nakajima, Fumiko and Fujimoto, Tomomi", title="Novel Methodology for Identifying the Occurrence of Ovulation by Estimating Core Body Temperature During Sleeping: Validity and Effectiveness Study", journal="JMIR Form Res", year="2024", month="Jul", day="5", volume="8", pages="e55834", keywords="menstrual cycle", keywords="ovulation", keywords="biphasic temperature shift", keywords="estimation method", keywords="women", abstract="Background: Body temperature is the most-used noninvasive biomarker to determine menstrual cycle and ovulation. However, issues related to its low accuracy are still under discussion. Objective: This study aimed to improve the accuracy of identifying the presence or absence of ovulation within a menstrual cycle. We investigated whether core body temperature (CBT) estimation can improve the accuracy of temperature biphasic shift discrimination in the menstrual cycle. The study consisted of 2 parts: experiment 1 assessed the validity of the CBT estimation method, while experiment 2 focused on the effectiveness of the method in discriminating biphasic temperature shifts. Methods: In experiment 1, healthy women aged between 18 and 40 years had their true CBT measured using an ingestible thermometer and their CBT estimated from skin temperature and ambient temperature measured during sleep in both the follicular and luteal phases of their menstrual cycles. This study analyzed the differences between these 2 measurements, the variations in temperature between the 2 phases, and the repeated measures correlation between the true and estimated CBT. Experiment 2 followed a similar methodology, but focused on evaluating the diagnostic accuracy of these 2 temperature measurement approaches (estimated CBT and traditional oral basal body temperature [BBT]) for identifying ovulatory cycles. This was performed using urine luteinizing hormone (LH) as the reference standard. Menstrual cycles were categorized based on the results of the LH tests, and a temperature shift was identified using a specific criterion called the ``three-over-six rule.'' This rule and the nested design of the study facilitated the assessment of diagnostic measures, such as sensitivity and specificity. Results: The main findings showed that CBT estimated from skin temperature and ambient temperature during sleep was consistently lower than directly measured CBT in both the follicular and luteal phases of the menstrual cycle. Despite this, the pattern of temperature variation between these phases was comparable for both the estimated and true CBT measurements, suggesting that the estimated CBT accurately reflected the cyclical variations in the true CBT. Significantly, the CBT estimation method showed higher sensitivity and specificity for detecting the occurrence of ovulation than traditional oral BBT measurements, highlighting its potential as an effective tool for reproductive health monitoring. The current method for estimating the CBT provides a practical and noninvasive method for monitoring CBT, which is essential for identifying biphasic shifts in the BBT throughout the menstrual cycle. Conclusions: This study demonstrated that the estimated CBT derived from skin temperature and ambient temperature during sleep accurately captures variations in true CBT and is more accurate in determining the presence or absence of ovulation than traditional oral BBT measurements. This method holds promise for improving reproductive health monitoring and understanding of menstrual cycle dynamics. ", doi="10.2196/55834", url="https://formative.jmir.org/2024/1/e55834", url="http://www.ncbi.nlm.nih.gov/pubmed/38967967" } @Article{info:doi/10.2196/51397, author="Duggan, M. Nicole and Jin, Mike and Duran Mendicuti, Alejandra Maria and Hallisey, Stephen and Bernier, Denie and Selame, A. Lauren and Asgari-Targhi, Ameneh and Fischetti, E. Chanel and Lucassen, Ruben and Samir, E. Anthony and Duhaime, Erik and Kapur, Tina and Goldsmith, J. Andrew", title="Gamified Crowdsourcing as a Novel Approach to Lung Ultrasound Data Set Labeling: Prospective Analysis", journal="J Med Internet Res", year="2024", month="Jul", day="4", volume="26", pages="e51397", keywords="crowdsource", keywords="crowdsourced", keywords="crowdsourcing", keywords="machine learning", keywords="artificial intelligence", keywords="point-of-care ultrasound", keywords="POCUS", keywords="lung ultrasound", keywords="B-lines", keywords="gamification", keywords="gamify", keywords="gamified", keywords="label", keywords="labels", keywords="labeling", keywords="classification", keywords="lung", keywords="pulmonary", keywords="respiratory", keywords="ultrasound", keywords="imaging", keywords="medical image", keywords="diagnostic", keywords="diagnose", keywords="diagnosis", keywords="data science", abstract="Background: Machine learning (ML) models can yield faster and more accurate medical diagnoses; however, developing ML models is limited by a lack of high-quality labeled training data. Crowdsourced labeling is a potential solution but can be constrained by concerns about label quality. Objective: This study aims to examine whether a gamified crowdsourcing platform with continuous performance assessment, user feedback, and performance-based incentives could produce expert-quality labels on medical imaging data. Methods: In this diagnostic comparison study, 2384 lung ultrasound clips were retrospectively collected from 203 emergency department patients. A total of 6 lung ultrasound experts classified 393 of these clips as having no B-lines, one or more discrete B-lines, or confluent B-lines to create 2 sets of reference standard data sets (195 training clips and 198 test clips). Sets were respectively used to (1) train users on a gamified crowdsourcing platform and (2) compare the concordance of the resulting crowd labels to the concordance of individual experts to reference standards. Crowd opinions were sourced from DiagnosUs (Centaur Labs) iOS app users over 8 days, filtered based on past performance, aggregated using majority rule, and analyzed for label concordance compared with a hold-out test set of expert-labeled clips. The primary outcome was comparing the labeling concordance of collated crowd opinions to trained experts in classifying B-lines on lung ultrasound clips. Results: Our clinical data set included patients with a mean age of 60.0 (SD 19.0) years; 105 (51.7\%) patients were female and 114 (56.1\%) patients were White. Over the 195 training clips, the expert-consensus label distribution was 114 (58\%) no B-lines, 56 (29\%) discrete B-lines, and 25 (13\%) confluent B-lines. Over the 198 test clips, expert-consensus label distribution was 138 (70\%) no B-lines, 36 (18\%) discrete B-lines, and 24 (12\%) confluent B-lines. In total, 99,238 opinions were collected from 426 unique users. On a test set of 198 clips, the mean labeling concordance of individual experts relative to the reference standard was 85.0\% (SE 2.0), compared with 87.9\% crowdsourced label concordance (P=.15). When individual experts' opinions were compared with reference standard labels created by majority vote excluding their own opinion, crowd concordance was higher than the mean concordance of individual experts to reference standards (87.4\% vs 80.8\%, SE 1.6 for expert concordance; P<.001). Clips with discrete B-lines had the most disagreement from both the crowd consensus and individual experts with the expert consensus. Using randomly sampled subsets of crowd opinions, 7 quality-filtered opinions were sufficient to achieve near the maximum crowd concordance. Conclusions: Crowdsourced labels for B-line classification on lung ultrasound clips via a gamified approach achieved expert-level accuracy. This suggests a strategic role for gamified crowdsourcing in efficiently generating labeled image data sets for training ML systems. ", doi="10.2196/51397", url="https://www.jmir.org/2024/1/e51397" } @Article{info:doi/10.2196/55964, author="Gabarron, Elia and Larbi, Dillys and Rivera-Romero, Octavio and Denecke, Kerstin", title="Human Factors in AI-Driven Digital Solutions for Increasing Physical Activity: Scoping Review", journal="JMIR Hum Factors", year="2024", month="Jul", day="3", volume="11", pages="e55964", keywords="machine learning", keywords="ML", keywords="artificial intelligence", keywords="AI", keywords="algorithm", keywords="algorithms", keywords="predictive model", keywords="predictive models", keywords="predictive analytics", keywords="predictive system", keywords="practical model", keywords="practical models", keywords="deep learning", keywords="human factors", keywords="physical activity", keywords="physical exercise", keywords="healthy living", keywords="active lifestyle", keywords="exercise", keywords="physically active", keywords="digital health", keywords="mHealth", keywords="mobile health", keywords="app", keywords="apps", keywords="application", keywords="applications", keywords="digital technology", keywords="digital intervention", keywords="digital interventions", keywords="smartphone", keywords="smartphones", keywords="PRISMA", abstract="Background: Artificial intelligence (AI) has the potential to enhance physical activity (PA) interventions. However, human factors (HFs) play a pivotal role in the successful integration of AI into mobile health (mHealth) solutions for promoting PA. Understanding and optimizing the interaction between individuals and AI-driven mHealth apps is essential for achieving the desired outcomes. Objective: This study aims to review and describe the current evidence on the HFs in AI-driven digital solutions for increasing PA. Methods: We conducted a scoping review by searching for publications containing terms related to PA, HFs, and AI in the titles and abstracts across 3 databases---PubMed, Embase, and IEEE Xplore---and Google Scholar. Studies were included if they were primary studies describing an AI-based solution aimed at increasing PA, and results from testing the solution were reported. Studies that did not meet these criteria were excluded. Additionally, we searched the references in the included articles for relevant research. The following data were extracted from included studies and incorporated into a qualitative synthesis: bibliographic information, study characteristics, population, intervention, comparison, outcomes, and AI-related information. The certainty of the evidence in the included studies was evaluated using GRADE (Grading of Recommendations Assessment, Development, and Evaluation). Results: A total of 15 studies published between 2015 and 2023 involving 899 participants aged approximately between 19 and 84 years, 60.7\% (546/899) of whom were female participants, were included in this review. The interventions lasted between 2 and 26 weeks in the included studies. Recommender systems were the most commonly used AI technology in digital solutions for PA (10/15 studies), followed by conversational agents (4/15 studies). User acceptability and satisfaction were the HFs most frequently evaluated (5/15 studies each), followed by usability (4/15 studies). Regarding automated data collection for personalization and recommendation, most systems involved fitness trackers (5/15 studies). The certainty of the evidence analysis indicates moderate certainty of the effectiveness of AI-driven digital technologies in increasing PA (eg, number of steps, distance walked, or time spent on PA). Furthermore, AI-driven technology, particularly recommender systems, seems to positively influence changes in PA behavior, although with very low certainty evidence. Conclusions: Current research highlights the potential of AI-driven technologies to enhance PA, though the evidence remains limited. Longer-term studies are necessary to assess the sustained impact of AI-driven technologies on behavior change and habit formation. While AI-driven digital solutions for PA hold significant promise, further exploration into optimizing AI's impact on PA and effectively integrating AI and HFs is crucial for broader benefits. Thus, the implications for innovation management involve conducting long-term studies, prioritizing diversity, ensuring research quality, focusing on user experience, and understanding the evolving role of AI in PA promotion. ", doi="10.2196/55964", url="https://humanfactors.jmir.org/2024/1/e55964" } @Article{info:doi/10.2196/58058, author="Goldstein, D. Neal and Jones, Justin and Kahal, Deborah and Burstyn, Igor", title="Inferring Population HIV Viral Load From a Single HIV Clinic's Electronic Health Record: Simulation Study With a Real-World Example", journal="Online J Public Health Inform", year="2024", month="Jul", day="3", volume="16", pages="e58058", keywords="HIV", keywords="human immunodeficiency virus", keywords="viral load", keywords="population viral load", keywords="electronic health record", keywords="EHR", keywords="electronic health records", keywords="EHRs", keywords="electric medical record", keywords="EMR", keywords="electric medical records", keywords="EMRs", keywords="patient record", keywords="health record", keywords="health records", keywords="personal health record", keywords="PHR", keywords="selection weights", keywords="sampling", keywords="sampling bias", keywords="Bayes", abstract="Background: Population viral load (VL), the most comprehensive measure of the HIV transmission potential, cannot be directly measured due to lack of complete sampling of all people with HIV. Objective: A given HIV clinic's electronic health record (EHR), a biased sample of this population, may be used to attempt to impute this measure. Methods: We simulated a population of 10,000 individuals with VL calibrated to surveillance data with a geometric mean of 4449 copies/mL. We sampled 3 hypothetical EHRs from (A) the source population, (B) those diagnosed, and (C) those retained in care. Our analysis imputed population VL from each EHR using sampling weights followed by Bayesian adjustment. These methods were then tested using EHR data from an HIV clinic in Delaware. Results: Following weighting, the estimates moved in the direction of the population value with correspondingly wider 95\% intervals as follows: clinic A: 4364 (95\% interval 1963-11,132) copies/mL; clinic B: 4420 (95\% interval 1913-10,199) copies/mL; and clinic C: 242 (95\% interval 113-563) copies/mL. Bayesian-adjusted weighting further improved the estimate. Conclusions: These findings suggest that methodological adjustments are ineffective for estimating population VL from a single clinic's EHR without the resource-intensive elucidation of an informative prior. ", doi="10.2196/58058", url="https://ojphi.jmir.org/2024/1/e58058" } @Article{info:doi/10.2196/59680, author="Herman Bernardim Andrade, Gabriel and Yada, Shuntaro and Aramaki, Eiji", title="Is Boundary Annotation Necessary? Evaluating Boundary-Free Approaches to Improve Clinical Named Entity Annotation Efficiency: Case Study", journal="JMIR Med Inform", year="2024", month="Jul", day="2", volume="12", pages="e59680", keywords="natural language processing", keywords="named entity recognition", keywords="information extraction", keywords="text annotation", keywords="entity boundaries", keywords="lenient annotation", keywords="case reports", keywords="annotation", keywords="case study", keywords="medical case report", keywords="efficiency", keywords="model", keywords="model performance", keywords="dataset", keywords="Japan", keywords="Japanese", keywords="entity", keywords="clinical domain", keywords="clinical", abstract="Background: Named entity recognition (NER) is a fundamental task in natural language processing. However, it is typically preceded by named entity annotation, which poses several challenges, especially in the clinical domain. For instance, determining entity boundaries is one of the most common sources of disagreements between annotators due to questions such as whether modifiers or peripheral words should be annotated. If unresolved, these can induce inconsistency in the produced corpora, yet, on the other hand, strict guidelines or adjudication sessions can further prolong an already slow and convoluted process. Objective: The aim of this study is to address these challenges by evaluating 2 novel annotation methodologies, lenient span and point annotation, aiming to mitigate the difficulty of precisely determining entity boundaries. Methods: We evaluate their effects through an annotation case study on a Japanese medical case report data set. We compare annotation time, annotator agreement, and the quality of the produced labeling and assess the impact on the performance of an NER system trained on the annotated corpus. Results: We saw significant improvements in the labeling process efficiency, with up to a 25\% reduction in overall annotation time and even a 10\% improvement in annotator agreement compared to the traditional boundary-strict approach. However, even the best-achieved NER model presented some drop in performance compared to the traditional annotation methodology. Conclusions: Our findings demonstrate a balance between annotation speed and model performance. Although disregarding boundary information affects model performance to some extent, this is counterbalanced by significant reductions in the annotator's workload and notable improvements in the speed of the annotation process. These benefits may prove valuable in various applications, offering an attractive compromise for developers and researchers. ", doi="10.2196/59680", url="https://medinform.jmir.org/2024/1/e59680" } @Article{info:doi/10.2196/55118, author="Akiya, Ippei and Ishihara, Takuma and Yamamoto, Keiichi", title="Comparison of Synthetic Data Generation Techniques for Control Group Survival Data in Oncology Clinical Trials: Simulation Study", journal="JMIR Med Inform", year="2024", month="Jun", day="18", volume="12", pages="e55118", keywords="oncology clinical trial", keywords="survival analysis", keywords="synthetic patient data", keywords="machine learning", keywords="SPD", keywords="simulation", abstract="Background: Synthetic patient data (SPD) generation for survival analysis in oncology trials holds significant potential for accelerating clinical development. Various machine learning methods, including classification and regression trees (CART), random forest (RF), Bayesian network (BN), and conditional tabular generative adversarial network (CTGAN), have been used for this purpose, but their performance in reflecting actual patient survival data remains under investigation. Objective: The aim of this study was to determine the most suitable SPD generation method for oncology trials, specifically focusing on both progression-free survival (PFS) and overall survival (OS), which are the primary evaluation end points in oncology trials. To achieve this goal, we conducted a comparative simulation of 4 generation methods, including CART, RF, BN, and the CTGAN, and the performance of each method was evaluated. Methods: Using multiple clinical trial data sets, 1000 data sets were generated by using each method for each clinical trial data set and evaluated as follows: (1) median survival time (MST) of PFS and OS; (2) hazard ratio distance (HRD), which indicates the similarity between the actual survival function and a synthetic survival function; and (3) visual analysis of Kaplan-Meier (KM) plots. Each method's ability to mimic the statistical properties of real patient data was evaluated from these multiple angles. Results: In most simulation cases, CART demonstrated the high percentages of MSTs for synthetic data falling within the 95\% CI range of the MST of the actual data. These percentages ranged from 88.8\% to 98.0\% for PFS and from 60.8\% to 96.1\% for OS. In the evaluation of HRD, CART revealed that HRD values were concentrated at approximately 0.9. Conversely, for the other methods, no consistent trend was observed for either PFS or OS. CART demonstrated better similarity than RF, in that CART caused overfitting and RF (a kind of ensemble learning approach) prevented it. In SPD generation, the statistical properties close to the actual data should be the focus, not a well-generalized prediction model. Both the BN and CTGAN methods cannot accurately reflect the statistical properties of the actual data because small data sets are not suitable. Conclusions: As a method for generating SPD for survival data from small data sets, such as clinical trial data, CART demonstrated to be the most effective method compared to RF, BN, and CTGAN. Additionally, it is possible to improve CART-based generation methods by incorporating feature engineering and other methods in future work. ", doi="10.2196/55118", url="https://medinform.jmir.org/2024/1/e55118" } @Article{info:doi/10.2196/56529, author="Rubin, Matan and Arnon, Hadar and Huppert, D. Jonathan and Perry, Anat", title="Considering the Role of Human Empathy in AI-Driven Therapy", journal="JMIR Ment Health", year="2024", month="Jun", day="11", volume="11", pages="e56529", keywords="empathy", keywords="empathetic", keywords="empathic", keywords="artificial empathy", keywords="AI", keywords="artificial intelligence", keywords="mental health", keywords="machine learning", keywords="algorithm", keywords="algorithms", keywords="predictive model", keywords="predictive models", keywords="predictive analytics", keywords="predictive system", keywords="practical model", keywords="practical models", keywords="model", keywords="models", keywords="therapy", keywords="mental illness", keywords="mental illnesses", keywords="mental disease", keywords="mental diseases", keywords="mood disorder", keywords="mood disorders", keywords="emotion", keywords="emotions", keywords="e-mental health", keywords="digital mental health", keywords="internet-based therapy", doi="10.2196/56529", url="https://mental.jmir.org/2024/1/e56529", url="http://www.ncbi.nlm.nih.gov/pubmed/38861302" } @Article{info:doi/10.2196/57678, author="Yin, Ziming and Kuang, Zhongling and Zhang, Haopeng and Guo, Yu and Li, Ting and Wu, Zhengkun and Wang, Lihua", title="Explainable AI Method for Tinnitus Diagnosis via Neighbor-Augmented Knowledge Graph and Traditional Chinese Medicine: Development and Validation Study", journal="JMIR Med Inform", year="2024", month="Jun", day="10", volume="12", pages="e57678", keywords="knowledge graph", keywords="syndrome differentiation", keywords="tinnitus", keywords="traditional Chinese medicine", keywords="explainable", keywords="ear", keywords="audiology", keywords="TCM", keywords="algorithm", keywords="diagnosis", keywords="AI", keywords="artificial intelligence", abstract="Background: Tinnitus diagnosis poses a challenge in otolaryngology owing to an extremely complex pathogenesis, lack of effective objectification methods, and factor-affected diagnosis. There is currently a lack of explainable auxiliary diagnostic tools for tinnitus in clinical practice. Objective: This study aims to develop a diagnostic model using an explainable artificial intelligence (AI) method to address the issue of low accuracy in tinnitus diagnosis. Methods: In this study, a knowledge graph--based tinnitus diagnostic method was developed by combining clinical medical knowledge with electronic medical records. Electronic medical record data from 1267 patients were integrated with traditional Chinese clinical medical knowledge to construct a tinnitus knowledge graph. Subsequently, weights were introduced, which measured patient similarity in the knowledge graph based on mutual information values. Finally, a collaborative neighbor algorithm was proposed, which scored patient similarity to obtain the recommended diagnosis. We conducted 2 group experiments and 1 case derivation to explore the effectiveness of our models and compared the models with state-of-the-art graph algorithms and other explainable machine learning models. Results: The experimental results indicate that the method achieved 99.4\% accuracy, 98.5\% sensitivity, 99.6\% specificity, 98.7\% precision, 98.6\% F1-score, and 99\% area under the receiver operating characteristic curve for the inference of 5 tinnitus subtypes among 253 test patients. Additionally, it demonstrated good interpretability. The topological structure of knowledge graphs provides transparency that can explain the reasons for the similarity between patients. Conclusions: This method provides doctors with a reliable and explainable diagnostic tool that is expected to improve tinnitus diagnosis accuracy. ", doi="10.2196/57678", url="https://medinform.jmir.org/2024/1/e57678", url="http://www.ncbi.nlm.nih.gov/pubmed/38857077" } @Article{info:doi/10.2196/51323, author="Hopcroft, EM Lisa and Curtis, J. Helen and Croker, Richard and Pretis, Felix and Inglesby, Peter and Evans, David and Bacon, Sebastian and Goldacre, Ben and Walker, J. Alex and MacKenna, Brian", title="Data-Driven Identification of Potentially Successful Intervention Implementations Using 5 Years of Opioid Prescribing Data: Retrospective Database Study", journal="JMIR Public Health Surveill", year="2024", month="Jun", day="5", volume="10", pages="e51323", keywords="electronic health records", keywords="primary care", keywords="general practice", keywords="opioid analgesics", keywords="data science", keywords="implementation science", keywords="data-driven", keywords="identification", keywords="intervention", keywords="implementations", keywords="proof of concept", keywords="opioid", keywords="unbiased", keywords="prescribing data", keywords="analysis tool", abstract="Background: We have previously demonstrated that opioid prescribing increased by 127\% between 1998 and 2016. New policies aimed at tackling this increasing trend have been recommended by public health bodies, and there is some evidence that progress is being made. Objective: We sought to extend our previous work and develop a data-driven approach to identify general practices and clinical commissioning groups (CCGs) whose prescribing data suggest that interventions to reduce the prescribing of opioids may have been successfully implemented. Methods: We analyzed 5 years of prescribing data (December 2014 to November 2019) for 3 opioid prescribing measures---total opioid prescribing as oral morphine equivalent per 1000 registered population, the number of high-dose opioids prescribed per 1000 registered population, and the number of high-dose opioids as a percentage of total opioids prescribed. Using a data-driven approach, we applied a modified version of our change detection Python library to identify reductions in these measures over time, which may be consistent with the successful implementation of an intervention to reduce opioid prescribing. This analysis was carried out for general practices and CCGs, and organizations were ranked according to the change in prescribing rate. Results: We identified a reduction in total opioid prescribing in 94 (49.2\%) out of 191 CCGs, with a median reduction of 15.1 (IQR 11.8-18.7; range 9.0-32.8) in total oral morphine equivalence per 1000 patients. We present data for the 3 CCGs and practices demonstrating the biggest reduction in opioid prescribing for each of the 3 opioid prescribing measures. We observed a 40\% proportional drop (8.9\% absolute reduction) in the regular prescribing of high-dose opioids (measured as a percentage of regular opioids) in the highest-ranked CCG (North Tyneside); a 99\% drop in this same measure was found in several practices (44\%-95\% absolute reduction). Decile plots demonstrate that CCGs exhibiting large reductions in opioid prescribing do so via slow and gradual reductions over a long period of time (typically over a period of 2 years); in contrast, practices exhibiting large reductions do so rapidly over a much shorter period of time. Conclusions: By applying 1 of our existing analysis tools to a national data set, we were able to identify rapid and maintained changes in opioid prescribing within practices and CCGs and rank organizations by the magnitude of reduction. Highly ranked organizations are candidates for further qualitative research into intervention design and implementation. ", doi="10.2196/51323", url="https://publichealth.jmir.org/2024/1/e51323", url="http://www.ncbi.nlm.nih.gov/pubmed/38838327" } @Article{info:doi/10.2196/56333, author="Wang, Rui and Liu, Guangtian and Jing, Liwei and Zhang, Jing and Li, Chenyang and Gong, Lichao", title="Finite Element Analysis of Pelvic Floor Biomechanical Models to Elucidate the Mechanism for Improving Urination and Defecation Dysfunction in Older Adults: Protocol for a Model Development and Validation Study", journal="JMIR Res Protoc", year="2024", month="May", day="31", volume="13", pages="e56333", keywords="elderly", keywords="older adults", keywords="pelvic cavity", keywords="finite element analysis", keywords="biomechanical model", keywords="protocol", keywords="urination", keywords="incontinence", keywords="aging", keywords="bowel dysfunction", abstract="Background: The population is constantly aging, and most older adults will experience many potential physiological changes as they age, leading to functional decline. Urinary and bowel dysfunction is the most common obstacle in older people. At present, the analysis of pelvic floor histological changes related to aging has not been fully elucidated, and the mechanism of improving intestinal control ability in older people is still unclear. Objective: The purpose of this study is to describe?how the finite element method will be used?to understand the mechanical characteristics of and physiological changes in the pelvic cavity during the rehabilitation process, providing theoretical support for the mechanism for improving urination and defecation dysfunction in older individuals. Methods: We will collect magnetic resonance imaging (MRI) and computed tomography (CT) data of the pelvic cavity of one male and one female volunteer older than 60 years and use the finite element method to construct a 3D computer simulation model of the pelvic cavity. By simulating different physiological states, such as the Valsalva maneuver and bowel movement, we will verify the accuracy of the constructed model, investigate the effects of different neuromuscular functional changes, and quantify the impact proportions of the pelvic floor muscle group, core muscle group, and sacral nerve. Results: At present, we have registered the study in the Chinese Clinical Trial Registry and collected MRI and CT data for an older male and an older female patient. Next, the construction and analysis of the finite element model will be accomplished according to the study plan. We expect to complete the construction and analysis of the finite element model by July 2024 and publish the research results by October 2025. Conclusions: Our study will build finite element models of the pelvic floor of older men and older women, and we shall elucidate the relationship between the muscles of the pelvic floor, back, abdomen, and hips and the ability of older adults to control bowel movements. The results of this study will provide theoretical support for elucidating the mechanism for improving urination and defecation dysfunction through rehabilitation. Trial Registration: Chinese Clinical Trial Registry ChiCTR2400080749; https://www.chictr.org.cn/showproj.html?proj=193428 International Registered Report Identifier (IRRID): DERR1-10.2196/56333 ", doi="10.2196/56333", url="https://www.researchprotocols.org/2024/1/e56333", url="http://www.ncbi.nlm.nih.gov/pubmed/38820582" } @Article{info:doi/10.2196/55121, author="Jiang, Yuyan and Liu, Xue-li and Zhang, Zixuan and Yang, Xinru", title="Evaluation and Comparison of Academic Impact and Disruptive Innovation Level of Medical Journals: Bibliometric Analysis and Disruptive Evaluation", journal="J Med Internet Res", year="2024", month="May", day="31", volume="26", pages="e55121", keywords="medical journals", keywords="journal evaluation", keywords="innovative evaluation", keywords="journal disruption index", keywords="disruptive innovation", keywords="academic impact", keywords="peer review", abstract="Background: As an important platform for researchers to present their academic findings, medical journals have a close relationship between their evaluation orientation and the value orientation of their published research results. However, the differences between the academic impact and level of disruptive innovation of medical journals have not been examined by any study yet. Objective: This study aims to compare the relationships and differences between the academic impact, disruptive innovation levels, and peer review results of medical journals and published research papers. We also analyzed the similarities and differences in the impact evaluations, disruptive innovations, and peer reviews for different types of medical research papers and the underlying reasons. Methods: The general and internal medicine Science Citation Index Expanded (SCIE) journals in 2018 were chosen as the study object to explore the differences in the academic impact and level of disruptive innovation of medical journals based on the OpenCitations Index of PubMed open PMID-to-PMID citations (POCI) and H1Connect databases, respectively, and we compared them with the results of peer review. Results: First, the correlation coefficients of the Journal Disruption Index (JDI) with the Journal Cumulative Citation for 5 years (JCC5), Journal Impact Factor (JIF), and Journal Citation Indicator (JCI) were 0.677, 0.585, and 0.621, respectively. The correlation coefficient of the absolute disruption index (Dz) with the Cumulative Citation for 5 years (CC5) was 0.635. However, the average difference in the disruptive innovation and academic influence rankings of journals reached 20 places (about 17.5\%). The average difference in the disruptive innovation and influence rankings of research papers reached about 2700 places (about 17.7\%). The differences reflect the essential difference between the two evaluation systems. Second, the top 7 journals selected based on JDI, JCC5, JIF, and JCI were the same, and all of them were H-journals. Although 8 (8/15, 53\%), 96 (96/150, 64\%), and 880 (880/1500, 58.67\%) of the top 0.1\%, top 1\%, and top 10\% papers selected based on Dz and CC5, respectively, were the same. Third, research papers with the ``changes clinical practice'' tag showed only moderate innovation (4.96) and impact (241.67) levels but had high levels of peer-reviewed recognition (6.00) and attention (2.83). Conclusions: The results of the study show that research evaluation based on innovative indicators is detached from the traditional impact evaluation system. The 3 evaluation systems (impact evaluation, disruptive innovation evaluation, and peer review) only have high consistency for authoritative journals and top papers. Neither a single impact indicator nor an innovative indicator can directly reflect the impact of medical research for clinical practice. How to establish an integrated, comprehensive, scientific, and reasonable journal evaluation system to improve the existing evaluation system of medical journals still needs further research. ", doi="10.2196/55121", url="https://www.jmir.org/2024/1/e55121", url="http://www.ncbi.nlm.nih.gov/pubmed/38820583" } @Article{info:doi/10.2196/52655, author="Invernici, Francesco and Bernasconi, Anna and Ceri, Stefano", title="Searching COVID-19 Clinical Research Using Graph Queries: Algorithm Development and Validation", journal="J Med Internet Res", year="2024", month="May", day="30", volume="26", pages="e52655", keywords="big data corpus", keywords="clinical research", keywords="co-occurrence network", keywords="COVID-19 Open Research Dataset", keywords="CORD-19", keywords="graph search", keywords="Named Entity Recognition", keywords="Neo4j", keywords="text mining", abstract="Background: Since the beginning of the COVID-19 pandemic, >1 million studies have been collected within the COVID-19 Open Research Dataset, a corpus of manuscripts created to accelerate research against the disease. Their related abstracts hold a wealth of information that remains largely unexplored and difficult to search due to its unstructured nature. Keyword-based search is the standard approach, which allows users to retrieve the documents of a corpus that contain (all or some of) the words in a target list. This type of search, however, does not provide visual support to the task and is not suited to expressing complex queries or compensating for missing specifications. Objective: This study aims to consider small graphs of concepts and exploit them for expressing graph searches over existing COVID-19--related literature, leveraging the increasing use of graphs to represent and query scientific knowledge and providing a user-friendly search and exploration experience. Methods: We considered the COVID-19 Open Research Dataset corpus and summarized its content by annotating the publications' abstracts using terms selected from the Unified Medical Language System and the Ontology of Coronavirus Infectious Disease. Then, we built a co-occurrence network that includes all relevant concepts mentioned in the corpus, establishing connections when their mutual information is relevant. A sophisticated graph query engine was built to allow the identification of the best matches of graph queries on the network. It also supports partial matches and suggests potential query completions using shortest paths. Results: We built a large co-occurrence network, consisting of 128,249 entities and 47,198,965 relationships; the GRAPH-SEARCH interface allows users to explore the network by formulating or adapting graph queries; it produces a bibliography of publications, which are globally ranked; and each publication is further associated with the specific parts of the query that it explains, thereby allowing the user to understand each aspect of the matching. Conclusions: Our approach supports the process of query formulation and evidence search upon a large text corpus; it can be reapplied to any scientific domain where documents corpora and curated ontologies are made available. ", doi="10.2196/52655", url="https://www.jmir.org/2024/1/e52655", url="http://www.ncbi.nlm.nih.gov/pubmed/38814687" } @Article{info:doi/10.2196/51013, author="Bandiera, Carole and Pasquier, J{\'e}r{\^o}me and Locatelli, Isabella and Schneider, P. Marie", title="Using a Semiautomated Procedure (CleanADHdata.R Script) to Clean Electronic Adherence Monitoring Data: Tutorial", journal="JMIR Form Res", year="2024", month="May", day="22", volume="8", pages="e51013", keywords="medication adherence", keywords="digital technology", keywords="digital pharmacy", keywords="electronic adherence monitoring", keywords="data management", keywords="data cleaning", keywords="research methodology", keywords="algorithms", keywords="R", keywords="semiautomated", keywords="code", keywords="coding", keywords="computer science", keywords="computer programming", keywords="medications", keywords="computer script", abstract="Background: Patient adherence to medications can be assessed using interactive digital health technologies such as electronic monitors (EMs). Changes in treatment regimens and deviations from EM use over time must be characterized to establish the actual level of medication adherence. Objective: We developed the computer script CleanADHdata.R to clean raw EM adherence data, and this tutorial is a guide for users. Methods: In addition to raw EM data, we collected adherence start and stop monitoring dates and identified the prescribed regimens, the expected number of EM openings per day based on the prescribed regimen, EM use deviations, and patients' demographic data. The script formats the data longitudinally and calculates each day's medication implementation. Results: We provided a simulated data set for 10 patients, for which 15 EMs were used over a median period of 187 (IQR 135-342) days. The median patient implementation before and after EM raw data cleaning was 83.3\% (IQR 71.5\%-93.9\%) and 97.3\% (IQR 95.8\%-97.6\%), respectively ($\Delta$+14\%). This difference is substantial enough to consider EM data cleaning to be capable of avoiding data misinterpretation and providing a cleaned data set for the adherence analysis in terms of implementation and persistence. Conclusions: The CleanADHdata.R script is a semiautomated procedure that increases standardization and reproducibility. This script has broader applicability within the realm of digital health, as it can be used to clean adherence data collected with diverse digital technologies. ", doi="10.2196/51013", url="https://formative.jmir.org/2024/1/e51013", url="http://www.ncbi.nlm.nih.gov/pubmed/38776539" } @Article{info:doi/10.2196/57026, author="Zhang, Jinbo and Yang, Pingping and Zeng, Lu and Li, Shan and Zhou, Jiamei", title="Ventilator-Associated Pneumonia Prediction Models Based on AI: Scoping Review", journal="JMIR Med Inform", year="2024", month="May", day="14", volume="12", pages="e57026", keywords="artificial intelligence", keywords="machine learning", keywords="ventilator-associated pneumonia", keywords="prediction", keywords="scoping", keywords="PRISMA", keywords="Preferred Reporting Items for Systematic Reviews and Meta-Analyses", abstract="Background: Ventilator-associated pneumonia (VAP) is a serious complication of mechanical ventilation therapy that affects patients' treatments and prognoses. Owing to its excellent data mining capabilities, artificial intelligence (AI) has been increasingly used to predict VAP. Objective: This paper reviews VAP prediction models that are based on AI, providing a reference for the early identification of high-risk groups in future clinical practice. Methods: A scoping review was conducted in accordance with the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) guidelines. The Wanfang database, the Chinese Biomedical Literature Database, Cochrane Library, Web of Science, PubMed, MEDLINE, and Embase were searched to identify relevant articles. Study selection and data extraction were independently conducted by 2 reviewers. The data extracted from the included studies were synthesized narratively. Results: Of the 137 publications retrieved, 11 were included in this scoping review. The included studies reported the use of AI for predicting VAP. All 11 studies predicted VAP occurrence, and studies on VAP prognosis were excluded. Further, these studies used text data, and none of them involved imaging data. Public databases were the primary sources of data for model building (studies: 6/11, 55\%), and 5 studies had sample sizes of <1000. Machine learning was the primary algorithm for studying the VAP prediction models. However, deep learning and large language models were not used to construct VAP prediction models. The random forest model was the most commonly used model (studies: 5/11, 45\%). All studies only performed internal validations, and none of them addressed how to implement and apply the final model in real-life clinical settings. Conclusions: This review presents an overview of studies that used AI to predict and diagnose VAP. AI models have better predictive performance than traditional methods and are expected to provide indispensable tools for VAP risk prediction in the future. However, the current research is in the model construction and validation stage, and the implementation of and guidance for clinical VAP prediction require further research. ", doi="10.2196/57026", url="https://medinform.jmir.org/2024/1/e57026" } @Article{info:doi/10.2196/50679, author="Gandrup, Julie and Selby, A. David and Dixon, G. William", title="Classifying Self-Reported Rheumatoid Arthritis Flares Using Daily Patient-Generated Data From a Smartphone App: Exploratory Analysis Applying Machine Learning Approaches", journal="JMIR Form Res", year="2024", month="May", day="14", volume="8", pages="e50679", keywords="rheumatoid arthritis", keywords="flare", keywords="patient-generated health data", keywords="smartphone", keywords="mobile health", keywords="machine learning", keywords="arthritis", keywords="rheumatic", keywords="rheumatism", keywords="joint", keywords="joints", keywords="arthritic", keywords="musculoskeletal", keywords="flares", keywords="classify", keywords="classification", keywords="symptom", keywords="symptoms", keywords="mobile phone", abstract="Background: The ability to predict rheumatoid arthritis (RA) flares between clinic visits based on real-time, longitudinal patient-generated data could potentially allow for timely interventions to avoid disease worsening. Objective: This exploratory study aims to investigate the feasibility of using machine learning methods to classify self-reported RA flares based on a small data set of daily symptom data collected on a smartphone app. Methods: Daily symptoms and weekly flares reported on the Remote Monitoring of Rheumatoid Arthritis (REMORA) smartphone app from 20 patients with RA over 3 months were used. Predictors were several summary features of the daily symptom scores (eg, pain and fatigue) collected in the week leading up to the flare question. We fitted 3 binary classifiers: logistic regression with and without elastic net regularization, a random forest, and naive Bayes. Performance was evaluated according to the area under the curve (AUC) of the receiver operating characteristic curve. For the best-performing model, we considered sensitivity and specificity for different thresholds in order to illustrate different ways in which the predictive model could behave in a clinical setting. Results: The data comprised an average of 60.6 daily reports and 10.5 weekly reports per participant. Participants reported a median of 2 (IQR 0.75-4.25) flares each over a median follow-up time of 81 (IQR 79-82) days. AUCs were broadly similar between models, but logistic regression with elastic net regularization had the highest AUC of 0.82. At a cutoff requiring specificity to be 0.80, the corresponding sensitivity to detect flares was 0.60 for this model. The positive predictive value (PPV) in this population was 53\%, and the negative predictive value (NPV) was 85\%. Given the prevalence of flares, the best PPV achieved meant only around 2 of every 3 positive predictions were correct (PPV 0.65). By prioritizing a higher NPV, the model correctly predicted over 9 in every 10 non-flare weeks, but the accuracy of predicted flares fell to only 1 in 2 being correct (NPV and PPV of 0.92 and 0.51, respectively). Conclusions: Predicting self-reported flares based on daily symptom scorings in the preceding week using machine learning methods was feasible. The observed predictive accuracy might improve as we obtain more data, and these exploratory results need to be validated in an external cohort. In the future, analysis of frequently collected patient-generated data may allow us to predict flares before they unfold, opening opportunities for just-in-time adaptative interventions. Depending on the nature and implication of an intervention, different cutoff values for an intervention decision need to be considered, as well as the level of predictive certainty required. ", doi="10.2196/50679", url="https://formative.jmir.org/2024/1/e50679", url="http://www.ncbi.nlm.nih.gov/pubmed/38743480" } @Article{info:doi/10.2196/44805, author="Granviken, Fredrik and Vasseljen, Ottar and Bach, Kerstin and Jaiswal, Amar and Meisingset, Ingebrigt", title="Decision Support for Managing Common Musculoskeletal Pain Disorders: Development of a Case-Based Reasoning Application", journal="JMIR Form Res", year="2024", month="May", day="10", volume="8", pages="e44805", keywords="case-based reasoning", keywords="musculoskeletal pain", keywords="physiotherapy", keywords="decision support", keywords="primary care", keywords="artificial intelligence", abstract="Background: Common interventions for musculoskeletal pain disorders either lack evidence to support their use or have small to modest or short-term effects. Given the heterogeneity of patients with musculoskeletal pain disorders, treatment guidelines and systematic reviews have limited transferability to clinical practice. A problem-solving method in artificial intelligence, case-based reasoning (CBR), where new problems are solved based on experiences from past similar problems, might offer guidance in such situations. Objective: This study aims to use CBR to build a decision support system for patients with musculoskeletal pain disorders seeking physiotherapy care. This study describes the development of the CBR system SupportPrim PT and demonstrates its ability to identify similar patients. Methods: Data from physiotherapy patients in primary care in Norway were collected to build a case base for SupportPrim PT. We used the local-global principle in CBR to identify similar patients. The global similarity measures are attributes used to identify similar patients and consisted of prognostic attributes. They were weighted in terms of prognostic importance and choice of treatment, where the weighting represents the relevance of the different attributes. For the local similarity measures, the degree of similarity within each attribute was based on minimal clinically important differences and expert knowledge. The SupportPrim PT's ability to identify similar patients was assessed by comparing the similarity scores of all patients in the case base with the scores on an established screening tool (the short form {\"O}rebro Musculoskeletal Pain Screening Questionnaire [{\"O}MSPQ]) and an outcome measure (the Musculoskeletal Health Questionnaire [MSK-HQ]) used in musculoskeletal pain. We also assessed the same in a more extensive case base. Results: The original case base contained 105 patients with musculoskeletal pain (mean age 46, SD 15 years; 77/105, 73.3\% women). The SupportPrim PT consisted of 29 weighted attributes with local similarities. When comparing the similarity scores for all patients in the case base, one at a time, with the {\"O}MSPQ and MSK-HQ, the most similar patients had a mean absolute difference from the query patient of 9.3 (95\% CI 8.0-10.6) points on the {\"O}MSPQ and a mean absolute difference of 5.6 (95\% CI 4.6-6.6) points on the MSK-HQ. For both {\"O}MSPQ and MSK-HQ, the absolute score difference increased as the rank of most similar patients decreased. Patients retrieved from a more extensive case base (N=486) had a higher mean similarity score and were slightly more similar to the query patients in {\"O}MSPQ and MSK-HQ compared with the original smaller case base. Conclusions: This study describes the development of a CBR system, SupportPrim PT, for musculoskeletal pain in primary care. The SupportPrim PT identified similar patients according to an established screening tool and an outcome measure for patients with musculoskeletal pain. ", doi="10.2196/44805", url="https://formative.jmir.org/2024/1/e44805", url="http://www.ncbi.nlm.nih.gov/pubmed/38728686" } @Article{info:doi/10.2196/50035, author="Kluge, Felix and Brand, E. Yonatan and Mic{\'o}-Amigo, Encarna M. and Bertuletti, Stefano and D'Ascanio, Ilaria and Gazit, Eran and Bonci, Tecla and Kirk, Cameron and K{\"u}derle, Arne and Palmerini, Luca and Paraschiv-Ionescu, Anisoara and Salis, Francesca and Soltani, Abolfazl and Ullrich, Martin and Alcock, Lisa and Aminian, Kamiar and Becker, Clemens and Brown, Philip and Buekers, Joren and Carsin, Anne-Elie and Caruso, Marco and Caulfield, Brian and Cereatti, Andrea and Chiari, Lorenzo and Echevarria, Carlos and Eskofier, Bjoern and Evers, Jordi and Garcia-Aymerich, Judith and Hache, Tilo and Hansen, Clint and Hausdorff, M. Jeffrey and Hiden, Hugo and Hume, Emily and Keogh, Alison and Koch, Sarah and Maetzler, Walter and Megaritis, Dimitrios and Niessen, Martijn and Perlman, Or and Schwickert, Lars and Scott, Kirsty and Sharrack, Basil and Singleton, David and Vereijken, Beatrix and Vogiatzis, Ioannis and Yarnall, Alison and Rochester, Lynn and Mazz{\`a}, Claudia and Del Din, Silvia and Mueller, Arne", title="Real-World Gait Detection Using a Wrist-Worn Inertial Sensor: Validation Study", journal="JMIR Form Res", year="2024", month="May", day="1", volume="8", pages="e50035", keywords="digital mobility outcomes", keywords="validation", keywords="wearable sensor", keywords="walking", keywords="digital health", keywords="inertial measurement unit", keywords="accelerometer", keywords="Mobilise-D", abstract="Background: Wrist-worn inertial sensors are used in digital health for evaluating mobility in real-world environments. Preceding the estimation of spatiotemporal gait parameters within long-term recordings, gait detection is an important step to identify regions of interest where gait occurs, which requires robust algorithms due to the complexity of arm movements. While algorithms exist for other sensor positions, a comparative validation of algorithms applied to the wrist position on real-world data sets across different disease populations is missing. Furthermore, gait detection performance differences between the wrist and lower back position have not yet been explored but could yield valuable information regarding sensor position choice in clinical studies. Objective: The aim of this study was to validate gait sequence (GS) detection algorithms developed for the wrist position against reference data acquired in a real-world context. In addition, this study aimed to compare the performance of algorithms applied to the wrist position to those applied to lower back--worn inertial sensors. Methods: Participants with Parkinson disease, multiple sclerosis, proximal femoral fracture (hip fracture recovery), chronic obstructive pulmonary disease, and congestive heart failure and healthy older adults (N=83) were monitored for 2.5 hours in the real-world using inertial sensors on the wrist, lower back, and feet including pressure insoles and infrared distance sensors as reference. In total, 10 algorithms for wrist-based gait detection were validated against a multisensor reference system and compared to gait detection performance using lower back--worn inertial sensors. Results: The best-performing GS detection algorithm for the wrist showed a mean (per disease group) sensitivity ranging between 0.55 (SD 0.29) and 0.81 (SD 0.09) and a mean (per disease group) specificity ranging between 0.95 (SD 0.06) and 0.98 (SD 0.02). The mean relative absolute error of estimated walking time ranged between 8.9\% (SD 7.1\%) and 32.7\% (SD 19.2\%) per disease group for this algorithm as compared to the reference system. Gait detection performance from the best algorithm applied to the wrist inertial sensors was lower than for the best algorithms applied to the lower back, which yielded mean sensitivity between 0.71 (SD 0.12) and 0.91 (SD 0.04), mean specificity between 0.96 (SD 0.03) and 0.99 (SD 0.01), and a mean relative absolute error of estimated walking time between 6.3\% (SD 5.4\%) and 23.5\% (SD 13\%). Performance was lower in disease groups with major gait impairments (eg, patients recovering from hip fracture) and for patients using bilateral walking aids. Conclusions: Algorithms applied to the wrist position can detect GSs with high performance in real-world environments. Those periods of interest in real-world recordings can facilitate gait parameter extraction and allow the quantification of gait duration distribution in everyday life. Our findings allow taking informed decisions on alternative positions for gait recording in clinical studies and public health. Trial Registration: ISRCTN Registry 12246987; https://www.isrctn.com/ISRCTN12246987 International Registered Report Identifier (IRRID): RR2-10.1136/bmjopen-2021-050785 ", doi="10.2196/50035", url="https://formative.jmir.org/2024/1/e50035", url="http://www.ncbi.nlm.nih.gov/pubmed/38691395" } @Article{info:doi/10.2196/51612, author="Tremoulet, D. Patrice and Lobo, F. Andrea and Simmons, A. Christina and Baliga, Ganesh and Brady, Matthew", title="Assessing the Usability and Feasibility of Digital Assistant Tools for Direct Support Professionals: Participatory Design and Pilot-Testing", journal="JMIR Hum Factors", year="2024", month="Apr", day="25", volume="11", pages="e51612", keywords="technology prototype", keywords="data collection", keywords="documentation", keywords="direct support professionals", keywords="intellectual and developmental disabilities", keywords="pilot test", keywords="mobile phone", abstract="Background: The United States is experiencing a direct support professional (DSP) crisis, with demand far exceeding supply. Although generating documentation is a critical responsibility, it is one of the most wearisome aspects of DSPs' jobs. Technology that enables DSPs to log informal time-stamped notes throughout their shift could help reduce the burden of end-of-shift documentation and increase job satisfaction, which in turn could improve the quality of life of the individuals with intellectual and developmental disabilities (IDDs) whom DSPs support. However, DSPs, with varied ages, levels of education, and comfort using technology, are not likely to adopt tools that detract from caregiving responsibilities or increase workload; therefore, technological tools for them must be relatively simple, extremely intuitive, and provide highly valued capabilities. Objective: This paper describes the development and pilot-testing of a digital assistant tool (DAT) that enables DSPs to create informal notes throughout their shifts and use these notes to facilitate end-of-shift documentation. The purpose of the pilot study was to assess the usability and feasibility of the DAT. Methods: The research team applied an established user-centered participatory design process to design, develop, and test the DAT prototypes between May 2020 and April 2023. Pilot-testing entailed having 14 DSPs who support adults with IDDs use the first full implementation of the DAT prototypes during 2 or 3 successive work shifts and fill out demographic and usability questionnaires. Results: Participants used the DAT prototypes to create notes and help generate end-of-shift reports. The System Usability Scale score of 81.79 indicates that they found the prototypes easy to use. Survey responses imply that using the DAT made it easier for participants to produce required documentation and suggest that they would adopt the DAT if this tool were available for daily use. Conclusions: Simple technologies such as the DAT prototypes, which enable DSPs to use mobile devices to log time-stamped notes throughout their shift with minimal effort and use the notes to help write reports, have the potential to both reduce the burden associated with producing documentation and enhance the quality (level of detail and accuracy) of this documentation. This could help to increase job satisfaction and reduce turnover in DSPs, both of which would help improve the quality of life of the individuals with IDDs whom they support. The pilot test results indicate that DSPs found the DAT easy to use. Next steps include (1) producing more robust versions of the DAT with additional capabilities, such as storing data locally on mobile devices when Wi-Fi is not available; and (2) eliciting input from agency directors, families, and others who use data about adults with IDDs to help care for them to ensure that data produced by DSPs are relevant and useful. ", doi="10.2196/51612", url="https://humanfactors.jmir.org/2024/1/e51612", url="http://www.ncbi.nlm.nih.gov/pubmed/38662420" } @Article{info:doi/10.2196/49445, author="Pilgram, Lisa and Meurers, Thierry and Malin, Bradley and Schaeffner, Elke and Eckardt, Kai-Uwe and Prasser, Fabian and ", title="The Costs of Anonymization: Case Study Using Clinical Data", journal="J Med Internet Res", year="2024", month="Apr", day="24", volume="26", pages="e49445", keywords="data sharing", keywords="anonymization", keywords="deidentification", keywords="privacy-utility trade-off", keywords="privacy-enhancing technologies", keywords="medical informatics", keywords="privacy", keywords="anonymized", keywords="security", keywords="identification", keywords="confidentiality", keywords="data science", abstract="Background: Sharing data from clinical studies can accelerate scientific progress, improve transparency, and increase the potential for innovation and collaboration. However, privacy concerns remain a barrier to data sharing. Certain concerns, such as reidentification risk, can be addressed through the application of anonymization algorithms, whereby data are altered so that it is no longer reasonably related to a person. Yet, such alterations have the potential to influence the data set's statistical properties, such that the privacy-utility trade-off must be considered. This has been studied in theory, but evidence based on real-world individual-level clinical data is rare, and anonymization has not broadly been adopted in clinical practice. Objective: The goal of this study is to contribute to a better understanding of anonymization in the real world by comprehensively evaluating the privacy-utility trade-off of differently anonymized data using data and scientific results from the German Chronic Kidney Disease (GCKD) study. Methods: The GCKD data set extracted for this study consists of 5217 records and 70 variables. A 2-step procedure was followed to determine which variables constituted reidentification risks. To capture a large portion of the risk-utility space, we decided on risk thresholds ranging from 0.02 to 1. The data were then transformed via generalization and suppression, and the anonymization process was varied using a generic and a use case--specific configuration. To assess the utility of the anonymized GCKD data, general-purpose metrics (ie, data granularity and entropy), as well as use case--specific metrics (ie, reproducibility), were applied. Reproducibility was assessed by measuring the overlap of the 95\% CI lengths between anonymized and original results. Results: Reproducibility measured by 95\% CI overlap was higher than utility obtained from general-purpose metrics. For example, granularity varied between 68.2\% and 87.6\%, and entropy varied between 25.5\% and 46.2\%, whereas the average 95\% CI overlap was above 90\% for all risk thresholds applied. A nonoverlapping 95\% CI was detected in 6 estimates across all analyses, but the overwhelming majority of estimates exhibited an overlap over 50\%. The use case--specific configuration outperformed the generic one in terms of actual utility (ie, reproducibility) at the same level of privacy. Conclusions: Our results illustrate the challenges that anonymization faces when aiming to support multiple likely and possibly competing uses, while use case--specific anonymization can provide greater utility. This aspect should be taken into account when evaluating the associated costs of anonymized data and attempting to maintain sufficiently high levels of privacy for anonymized data. Trial Registration: German Clinical Trials Register DRKS00003971; https://drks.de/search/en/trial/DRKS00003971 International Registered Report Identifier (IRRID): RR2-10.1093/ndt/gfr456 ", doi="10.2196/49445", url="https://www.jmir.org/2024/1/e49445", url="http://www.ncbi.nlm.nih.gov/pubmed/38657232" } @Article{info:doi/10.2196/55318, author="Sivarajkumar, Sonish and Kelley, Mark and Samolyk-Mazzanti, Alyssa and Visweswaran, Shyam and Wang, Yanshan", title="An Empirical Evaluation of Prompting Strategies for Large Language Models in Zero-Shot Clinical Natural Language Processing: Algorithm Development and Validation Study", journal="JMIR Med Inform", year="2024", month="Apr", day="8", volume="12", pages="e55318", keywords="large language model", keywords="LLM", keywords="LLMs", keywords="natural language processing", keywords="NLP", keywords="in-context learning", keywords="prompt engineering", keywords="evaluation", keywords="zero-shot", keywords="few shot", keywords="prompting", keywords="GPT", keywords="language model", keywords="language", keywords="models", keywords="machine learning", keywords="clinical data", keywords="clinical information", keywords="extraction", keywords="BARD", keywords="Gemini", keywords="LLaMA-2", keywords="heuristic", keywords="prompt", keywords="prompts", keywords="ensemble", abstract="Background: Large language models (LLMs) have shown remarkable capabilities in natural language processing (NLP), especially in domains where labeled data are scarce or expensive, such as the clinical domain. However, to unlock the clinical knowledge hidden in these LLMs, we need to design effective prompts that can guide them to perform specific clinical NLP tasks without any task-specific training data. This is known as in-context learning, which is an art and science that requires understanding the strengths and weaknesses of different LLMs and prompt engineering approaches. Objective: The objective of this study is to assess the effectiveness of various prompt engineering techniques, including 2 newly introduced types---heuristic and ensemble prompts, for zero-shot and few-shot clinical information extraction using pretrained language models. Methods: This comprehensive experimental study evaluated different prompt types (simple prefix, simple cloze, chain of thought, anticipatory, heuristic, and ensemble) across 5 clinical NLP tasks: clinical sense disambiguation, biomedical evidence extraction, coreference resolution, medication status extraction, and medication attribute extraction. The performance of these prompts was assessed using 3 state-of-the-art language models: GPT-3.5 (OpenAI), Gemini (Google), and LLaMA-2 (Meta). The study contrasted zero-shot with few-shot prompting and explored the effectiveness of ensemble approaches. Results: The study revealed that task-specific prompt tailoring is vital for the high performance of LLMs for zero-shot clinical NLP. In clinical sense disambiguation, GPT-3.5 achieved an accuracy of 0.96 with heuristic prompts and 0.94 in biomedical evidence extraction. Heuristic prompts, alongside chain of thought prompts, were highly effective across tasks. Few-shot prompting improved performance in complex scenarios, and ensemble approaches capitalized on multiple prompt strengths. GPT-3.5 consistently outperformed Gemini and LLaMA-2 across tasks and prompt types. Conclusions: This study provides a rigorous evaluation of prompt engineering methodologies and introduces innovative techniques for clinical information extraction, demonstrating the potential of in-context learning in the clinical domain. These findings offer clear guidelines for future prompt-based clinical NLP research, facilitating engagement by non-NLP experts in clinical NLP advancements. To the best of our knowledge, this is one of the first works on the empirical evaluation of different prompt engineering approaches for clinical NLP in this era of generative artificial intelligence, and we hope that it will inspire and inform future research in this area. ", doi="10.2196/55318", url="https://medinform.jmir.org/2024/1/e55318", url="http://www.ncbi.nlm.nih.gov/pubmed/38587879" } @Article{info:doi/10.2196/52289, author="Sivarajkumar, Sonish and Gao, Fengyi and Denny, Parker and Aldhahwani, Bayan and Visweswaran, Shyam and Bove, Allyn and Wang, Yanshan", title="Mining Clinical Notes for Physical Rehabilitation Exercise Information: Natural Language Processing Algorithm Development and Validation Study", journal="JMIR Med Inform", year="2024", month="Apr", day="3", volume="12", pages="e52289", keywords="natural language processing", keywords="electronic health records", keywords="rehabilitation", keywords="physical exercise", keywords="ChatGPT", keywords="artificial intelligence", keywords="stroke", keywords="physical rehabilitation", keywords="rehabilitation therapy", keywords="exercise", keywords="machine learning", abstract="Background: The rehabilitation of a patient who had a stroke requires precise, personalized treatment plans. Natural language processing (NLP) offers the potential to extract valuable exercise information from clinical notes, aiding in the development of more effective rehabilitation strategies. Objective: This study aims to develop and evaluate a variety of NLP algorithms to extract and categorize physical rehabilitation exercise information from the clinical notes of patients who had a stroke treated at the University of Pittsburgh Medical Center. Methods: A cohort of 13,605 patients diagnosed with stroke was identified, and their clinical notes containing rehabilitation therapy notes were retrieved. A comprehensive clinical ontology was created to represent various aspects of physical rehabilitation exercises. State-of-the-art NLP algorithms were then developed and compared, including rule-based, machine learning--based algorithms (support vector machine, logistic regression, gradient boosting, and AdaBoost) and large language model (LLM)--based algorithms (ChatGPT [OpenAI]). The study focused on key performance metrics, particularly F1-scores, to evaluate algorithm effectiveness. Results: The analysis was conducted on a data set comprising 23,724 notes with detailed demographic and clinical characteristics. The rule-based NLP algorithm demonstrated superior performance in most areas, particularly in detecting the ``Right Side'' location with an F1-score of 0.975, outperforming gradient boosting by 0.063. Gradient boosting excelled in ``Lower Extremity'' location detection (F1-score: 0.978), surpassing rule-based NLP by 0.023. It also showed notable performance in the ``Passive Range of Motion'' detection with an F1-score of 0.970, a 0.032 improvement over rule-based NLP. The rule-based algorithm efficiently handled ``Duration,'' ``Sets,'' and ``Reps'' with F1-scores up to 0.65. LLM-based NLP, particularly ChatGPT with few-shot prompts, achieved high recall but generally lower precision and F1-scores. However, it notably excelled in ``Backward Plane'' motion detection, achieving an F1-score of 0.846, surpassing the rule-based algorithm's 0.720. Conclusions: The study successfully developed and evaluated multiple NLP algorithms, revealing the strengths and weaknesses of each in extracting physical rehabilitation exercise information from clinical notes. The detailed ontology and the robust performance of the rule-based and gradient boosting algorithms demonstrate significant potential for enhancing precision rehabilitation. These findings contribute to the ongoing efforts to integrate advanced NLP techniques into health care, moving toward predictive models that can recommend personalized rehabilitation treatments for optimal patient outcomes. ", doi="10.2196/52289", url="https://medinform.jmir.org/2024/1/e52289", url="http://www.ncbi.nlm.nih.gov/pubmed/38568736" } @Article{info:doi/10.2196/45754, author="Slade, Emily and Rennick-Egglestone, Stefan and Ng, Fiona and Kotera, Yasuhiro and Llewellyn-Beardsley, Joy and Newby, Chris and Glover, Tony and Keppens, Jeroen and Slade, Mike", title="The Implementation of Recommender Systems for Mental Health Recovery Narratives: Evaluation of Use and Performance", journal="JMIR Ment Health", year="2024", month="Mar", day="29", volume="11", pages="e45754", keywords="recommender system", keywords="mean absolute error", keywords="precision", keywords="intralist diversity", keywords="item space coverage", keywords="fairness across users", keywords="psychosis", keywords="Narrative Experiences Online trial", keywords="NEON trial", keywords="lived experience narrative", keywords="recovery story", abstract="Background: Recommender systems help narrow down a large range of items to a smaller, personalized set. NarraGive is a first-in-field hybrid recommender system for mental health recovery narratives, recommending narratives based on their content and narrator characteristics (using content-based filtering) and on narratives beneficially impacting other similar users (using collaborative filtering). NarraGive is integrated into the Narrative Experiences Online (NEON) intervention, a web application providing access to the NEON Collection of recovery narratives. Objective: This study aims to analyze the 3 recommender system algorithms used in NarraGive to inform future interventions using recommender systems for lived experience narratives. Methods: Using a recently published framework for evaluating recommender systems to structure the analysis, we compared the content-based filtering algorithm and collaborative filtering algorithms by evaluating the accuracy (how close the predicted ratings are to the true ratings), precision (the proportion of the recommended narratives that are relevant), diversity (how diverse the recommended narratives are), coverage (the proportion of all available narratives that can be recommended), and unfairness (whether the algorithms produce less accurate predictions for disadvantaged participants) across gender and ethnicity. We used data from all participants in 2 parallel-group, waitlist control clinical trials of the NEON intervention (NEON trial: N=739; NEON for other [eg, nonpsychosis] mental health problems [NEON-O] trial: N=1023). Both trials included people with self-reported mental health problems who had and had not used statutory mental health services. In addition, NEON trial participants had experienced self-reported psychosis in the previous 5 years. Our evaluation used a database of Likert-scale narrative ratings provided by trial participants in response to validated narrative feedback questions. Results: Participants from the NEON and NEON-O trials provided 2288 and 1896 narrative ratings, respectively. Each rated narrative had a median of 3 ratings and 2 ratings, respectively. For the NEON trial, the content-based filtering algorithm performed better for coverage; the collaborative filtering algorithms performed better for accuracy, diversity, and unfairness across both gender and ethnicity; and neither algorithm performed better for precision. For the NEON-O trial, the content-based filtering algorithm did not perform better on any metric; the collaborative filtering algorithms performed better on accuracy and unfairness across both gender and ethnicity; and neither algorithm performed better for precision, diversity, or coverage. Conclusions: Clinical population may be associated with recommender system performance. Recommender systems are susceptible to a wide range of undesirable biases. Approaches to mitigating these include providing enough initial data for the recommender system (to prevent overfitting), ensuring that items can be accessed outside the recommender system (to prevent a feedback loop between accessed items and recommended items), and encouraging participants to provide feedback on every narrative they interact with (to prevent participants from only providing feedback when they have strong opinions). ", doi="10.2196/45754", url="https://mental.jmir.org/2024/1/e45754", url="http://www.ncbi.nlm.nih.gov/pubmed/38551630" } @Article{info:doi/10.2196/52482, author="Nguyen, Duy-Anh and Li, Minyi and Lambert, Gavin and Kowalczyk, Ryszard and McDonald, Rachael and Vo, Bao Quoc", title="Efficient Machine Reading Comprehension for Health Care Applications: Algorithm Development and Validation of a Context Extraction Approach", journal="JMIR Form Res", year="2024", month="Mar", day="25", volume="8", pages="e52482", keywords="question answering", keywords="machine reading comprehension", keywords="context extraction", keywords="covid19", keywords="health care", abstract="Background: Extractive methods for machine reading comprehension (MRC) tasks have achieved comparable or better accuracy than human performance on benchmark data sets. However, such models are not as successful when adapted to complex domains such as health care. One of the main reasons is that the context that the MRC model needs to process when operating in a complex domain can be much larger compared with an average open-domain context. This causes the MRC model to make less accurate and slower predictions. A potential solution to this problem is to reduce the input context of the MRC model by extracting only the necessary parts from the original context. Objective: This study aims to develop a method for extracting useful contexts from long articles as an additional component to the question answering task, enabling the MRC model to work more efficiently and accurately. Methods: Existing approaches to context extraction in MRC are based on sentence selection strategies, in which the models are trained to find the sentences containing the answer. We found that using only the sentences containing the answer was insufficient for the MRC model to predict correctly. We conducted a series of empirical studies and observed a strong relationship between the usefulness of the context and the confidence score output of the MRC model. Our investigation showed that a precise input context can boost the prediction correctness of the MRC and greatly reduce inference time. We proposed a method to estimate the utility of each sentence in a context in answering the question and then extract a new, shorter context according to these estimations. We generated a data set to train 2 models for estimating sentence utility, based on which we selected more precise contexts that improved the MRC model's performance. Results: We demonstrated our approach on the Question Answering Data Set for COVID-19 and Biomedical Semantic Indexing and Question Answering data sets and showed that the approach benefits the downstream MRC model. First, the method substantially reduced the inference time of the entire question answering system by 6 to 7 times. Second, our approach helped the MRC model predict the answer more correctly compared with using the original context (F1-score increased from 0.724 to 0.744 for the Question Answering Data Set for COVID-19 and from 0.651 to 0.704 for the Biomedical Semantic Indexing and Question Answering). We also found a potential problem where extractive transformer MRC models predict poorly despite being given a more precise context in some cases. Conclusions: The proposed context extraction method allows the MRC model to achieve improved prediction correctness and a significantly reduced MRC inference time. This approach works technically with any MRC model and has potential in tasks involving processing long texts. ", doi="10.2196/52482", url="https://formative.jmir.org/2024/1/e52482", url="http://www.ncbi.nlm.nih.gov/pubmed/38526545" } @Article{info:doi/10.2196/52967, author="Peng, Yuan and Bathelt, Franziska and Gebler, Richard and G{\"o}tt, Robert and Heidenreich, Andreas and Henke, Elisa and Kadioglu, Dennis and Lorenz, Stephan and Vengadeswaran, Abishaa and Sedlmayr, Martin", title="Use of Metadata-Driven Approaches for Data Harmonization in the Medical Domain: Scoping Review", journal="JMIR Med Inform", year="2024", month="Feb", day="14", volume="12", pages="e52967", keywords="ETL", keywords="ELT", keywords="Extract-Load-Transform", keywords="Extract-Transform-Load", keywords="interoperability", keywords="metadata-driven", keywords="medical domain", keywords="data harmonization", abstract="Background: Multisite clinical studies are increasingly using real-world data to gain real-world evidence. However, due to the heterogeneity of source data, it is difficult to analyze such data in a unified way across clinics. Therefore, the implementation of Extract-Transform-Load (ETL) or Extract-Load-Transform (ELT) processes for harmonizing local health data is necessary, in order to guarantee the data quality for research. However, the development of such processes is time-consuming and unsustainable. A promising way to ease this is the generalization of ETL/ELT processes. Objective: In this work, we investigate existing possibilities for the development of generic ETL/ELT processes. Particularly, we focus on approaches with low development complexity by using descriptive metadata and structural metadata. Methods: We conducted a literature review following the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) guidelines. We used 4 publication databases (ie, PubMed, IEEE Explore, Web of Science, and Biomed Center) to search for relevant publications from 2012 to 2022. The PRISMA flow was then visualized using an R-based tool (Evidence Synthesis Hackathon). All relevant contents of the publications were extracted into a spreadsheet for further analysis and visualization. Results: Regarding the PRISMA guidelines, we included 33 publications in this literature review. All included publications were categorized into 7 different focus groups (ie, medicine, data warehouse, big data, industry, geoinformatics, archaeology, and military). Based on the extracted data, ontology-based and rule-based approaches were the 2 most used approaches in different thematic categories. Different approaches and tools were chosen to achieve different purposes within the use cases. Conclusions: Our literature review shows that using metadata-driven (MDD) approaches to develop an ETL/ELT process can serve different purposes in different thematic categories. The results show that it is promising to implement an ETL/ELT process by applying MDD approach to automate the data transformation from Fast Healthcare Interoperability Resources to Observational Medical Outcomes Partnership Common Data Model. However, the determining of an appropriate MDD approach and tool to implement such an ETL/ELT process remains a challenge. This is due to the lack of comprehensive insight into the characterizations of the MDD approaches presented in this study. Therefore, our next step is to evaluate the MDD approaches presented in this study and to determine the most appropriate MDD approaches and the way to integrate them into the ETL/ELT process. This could verify the ability of using MDD approaches to generalize the ETL process for harmonizing medical data. ", doi="10.2196/52967", url="https://medinform.jmir.org/2024/1/e52967", url="http://www.ncbi.nlm.nih.gov/pubmed/38354027" } @Article{info:doi/10.2196/42271, author="Li, Angie and Mullin, Sarah and Elkin, L. Peter", title="Improving Prediction of Survival for Extremely Premature Infants Born at 23 to 29 Weeks Gestational Age in the Neonatal Intensive Care Unit: Development and Evaluation of Machine Learning Models", journal="JMIR Med Inform", year="2024", month="Feb", day="14", volume="12", pages="e42271", keywords="reproductive informatics", keywords="pregnancy complications", keywords="premature birth", keywords="neonatal mortality", keywords="machine learning", keywords="clinical decision support", keywords="preterm", keywords="pediatrics", keywords="intensive care unit outcome", keywords="health care outcome", keywords="survival prediction", keywords="maternal health", keywords="decision tree model", keywords="socioeconomic", abstract="Background: Infants born at extremely preterm gestational ages are typically admitted to the neonatal intensive care unit (NICU) after initial resuscitation. The subsequent hospital course can be highly variable, and despite counseling aided by available risk calculators, there are significant challenges with shared decision-making regarding life support and transition to end-of-life care. Improving predictive models can help providers and families navigate these unique challenges. Objective: Machine learning methods have previously demonstrated added predictive value for determining intensive care unit outcomes, and their use allows consideration of a greater number of factors that potentially influence newborn outcomes, such as maternal characteristics. Machine learning--based models were analyzed for their ability to predict the survival of extremely preterm neonates at initial admission. Methods: Maternal and newborn information was extracted from the health records of infants born between 23 and 29 weeks of gestation in the Medical Information Mart for Intensive Care III (MIMIC-III) critical care database. Applicable machine learning models predicting survival during the initial NICU admission were developed and compared. The same type of model was also examined using only features that would be available prepartum for the purpose of survival prediction prior to an anticipated preterm birth. Features most correlated with the predicted outcome were determined when possible for each model. Results: Of included patients, 37 of 459 (8.1\%) expired. The resulting random forest model showed higher predictive performance than the frequently used Score for Neonatal Acute Physiology With Perinatal Extension II (SNAPPE-II) NICU model when considering extremely preterm infants of very low birth weight. Several other machine learning models were found to have good performance but did not show a statistically significant difference from previously available models in this study. Feature importance varied by model, and those of greater importance included gestational age; birth weight; initial oxygenation level; elements of the APGAR (appearance, pulse, grimace, activity, and respiration) score; and amount of blood pressure support. Important prepartum features also included maternal age, steroid administration, and the presence of pregnancy complications. Conclusions: Machine learning methods have the potential to provide robust prediction of survival in the context of extremely preterm births and allow for consideration of additional factors such as maternal clinical and socioeconomic information. Evaluation of larger, more diverse data sets may provide additional clarity on comparative performance. ", doi="10.2196/42271", url="https://medinform.jmir.org/2024/1/e42271", url="http://www.ncbi.nlm.nih.gov/pubmed/38354033" } @Article{info:doi/10.2196/49007, author="Mehra, Tarun and Wekhof, Tobias and Keller, Iris Dagmar", title="Additional Value From Free-Text Diagnoses in Electronic Health Records: Hybrid Dictionary and Machine Learning Classification Study", journal="JMIR Med Inform", year="2024", month="Jan", day="17", volume="12", pages="e49007", keywords="electronic health records", keywords="free text", keywords="natural language processing", keywords="NLP", keywords="artificial intelligence", keywords="AI", abstract="Background: Physicians are hesitant to forgo the opportunity of entering unstructured clinical notes for structured data entry in electronic health records. Does free text increase informational value in comparison with structured data? Objective: This study aims to compare information from unstructured text-based chief complaints harvested and processed by a natural language processing (NLP) algorithm with clinician-entered structured diagnoses in terms of their potential utility for automated improvement of patient workflows. Methods: Electronic health records of 293,298 patient visits at the emergency department of a Swiss university hospital from January 2014 to October 2021 were analyzed. Using emergency department overcrowding as a case in point, we compared supervised NLP-based keyword dictionaries of symptom clusters from unstructured clinical notes and clinician-entered chief complaints from a structured drop-down menu with the following 2 outcomes: hospitalization and high Emergency Severity Index (ESI) score. Results: Of 12 symptom clusters, the NLP cluster was substantial in predicting hospitalization in 11 (92\%) clusters; 8 (67\%) clusters remained significant even after controlling for the cluster of clinician-determined chief complaints in the model. All 12 NLP symptom clusters were significant in predicting a low ESI score, of which 9 (75\%) remained significant when controlling for clinician-determined chief complaints. The correlation between NLP clusters and chief complaints was low (r=?0.04 to 0.6), indicating complementarity of information. Conclusions: The NLP-derived features and clinicians' knowledge were complementary in explaining patient outcome heterogeneity. They can provide an efficient approach to patient flow management, for example, in an emergency medicine setting. We further demonstrated the feasibility of creating extensive and precise keyword dictionaries with NLP by medical experts without requiring programming knowledge. Using the dictionary, we could classify short and unstructured clinical texts into diagnostic categories defined by the clinician. ", doi="10.2196/49007", url="https://medinform.jmir.org/2024/1/e49007", url="http://www.ncbi.nlm.nih.gov/pubmed/38231569" } @Article{info:doi/10.2196/50738, author="Benacek, Jiri and Lawal, Nimotalai and Ong, Tommy and Tomasik, Jakub and Martin-Key, A. Nayra and Funnell, L. Erin and Barton-Owen, Giles and Olmert, Tony and Cowell, Dan and Bahn, Sabine", title="Identification of Predictors of Mood Disorder Misdiagnosis and Subsequent Help-Seeking Behavior in Individuals With Depressive Symptoms: Gradient-Boosted Tree Machine Learning Approach", journal="JMIR Ment Health", year="2024", month="Jan", day="11", volume="11", pages="e50738", keywords="misdiagnosis", keywords="help-seeking", keywords="gradient-boosted trees", keywords="machine learning", keywords="depression", keywords="bipolar disorder", keywords="diagnose", keywords="diagnosis", keywords="mood", keywords="mental health", keywords="mental disorder", keywords="mental disorders", keywords="depressive", keywords="predict", keywords="predictive", keywords="prediction", keywords="depressed", keywords="algorithm", keywords="algorithms", abstract="Background: Misdiagnosis and delayed help-seeking cause significant burden for individuals with mood disorders such as major depressive disorder and bipolar disorder. Misdiagnosis can lead to inappropriate treatment, while delayed help-seeking can result in more severe symptoms, functional impairment, and poor treatment response. Such challenges are common in individuals with major depressive disorder and bipolar disorder due to the overlap of symptoms with other mental and physical health conditions, as well as, stigma and insufficient understanding of these disorders. Objective: In this study, we aimed to identify factors that may contribute to mood disorder misdiagnosis and delayed help-seeking. Methods: Participants with current depressive symptoms were recruited online and data were collected using an extensive digital mental health questionnaire, with the World Health Organization World Mental Health Composite International Diagnostic Interview delivered via telephone. A series of predictive gradient-boosted tree algorithms were trained and validated to identify the most important predictors of misdiagnosis and subsequent help-seeking in misdiagnosed individuals. Results: The analysis included data from 924 symptomatic individuals for predicting misdiagnosis and from a subset of 379 misdiagnosed participants who provided follow-up information when predicting help-seeking. Models achieved good predictive power, with area under the receiver operating characteristic curve of 0.75 and 0.71 for misdiagnosis and help-seeking, respectively. The most predictive features with respect to misdiagnosis were high severity of depressed mood, instability of self-image, the involvement of a psychiatrist in diagnosing depression, higher age at depression diagnosis, and reckless spending. Regarding help-seeking behavior, the strongest predictors included shorter time elapsed since last speaking to a general practitioner about mental health, sleep problems disrupting daily tasks, taking antidepressant medication, and being diagnosed with depression at younger ages. Conclusions: This study provides a novel, machine learning--based approach to understand the interplay of factors that may contribute to the misdiagnosis and subsequent help-seeking in patients experiencing low mood. The present findings can inform the development of targeted interventions to improve early detection and appropriate treatment of individuals with mood disorders. ", doi="10.2196/50738", url="https://mental.jmir.org/2024/1/e50738", url="http://www.ncbi.nlm.nih.gov/pubmed/38206660" } @Article{info:doi/10.2196/48892, author="Schapranow, Matthieu-P and Bayat, Mozhgan and Rasheed, Aadil and Naik, Marcel and Graf, Verena and Schmidt, Danilo and Budde, Klemens and Cardinal, H{\'e}lo{\"i}se and Sapir-Pichhadze, Ruth and Fenninger, Franz and Sherwood, Karen and Keown, Paul and G{\"u}nther, P. Oliver and Pandl, D. Konstantin and Leiser, Florian and Thiebes, Scott and Sunyaev, Ali and Niemann, Matthias and Schimanski, Andreas and Klein, Thomas", title="NephroCAGE---German-Canadian Consortium on AI for Improved Kidney Transplantation Outcome: Protocol for an Algorithm Development and Validation Study", journal="JMIR Res Protoc", year="2023", month="Dec", day="22", volume="12", pages="e48892", keywords="posttransplant risks", keywords="kidney transplantation", keywords="federated learning infrastructure", keywords="clinical prediction model", keywords="donor-recipient matching", keywords="multinational transplant data set", abstract="Background: Recent advances in hardware and software enabled the use of artificial intelligence (AI) algorithms for analysis of complex data in a wide range of daily-life use cases. We aim to explore the benefits of applying AI to a specific use case in transplant nephrology: risk prediction for severe posttransplant events. For the first time, we combine multinational real-world transplant data, which require specific legal and technical protection measures. Objective: The German-Canadian NephroCAGE consortium aims to develop and evaluate specific processes, software tools, and methods to (1) combine transplant data of more than 8000 cases over the past decades from leading transplant centers in Germany and Canada, (2) implement specific measures to protect sensitive transplant data, and (3) use multinational data as a foundation for developing high-quality prognostic AI models. Methods: To protect sensitive transplant data addressing the first and second objectives, we aim to implement a decentralized NephroCAGE federated learning infrastructure upon a private blockchain. Our NephroCAGE federated learning infrastructure enables a switch of paradigms: instead of pooling sensitive data into a central database for analysis, it enables the transfer of clinical prediction models (CPMs) to clinical sites for local data analyses. Thus, sensitive transplant data reside protected in their original sites while the comparable small algorithms are exchanged instead. For our third objective, we will compare the performance of selected AI algorithms, for example, random forest and extreme gradient boosting, as foundation for CPMs to predict severe short- and long-term posttransplant risks, for example, graft failure or mortality. The CPMs will be trained on donor and recipient data from retrospective cohorts of kidney transplant patients. Results: We have received initial funding for NephroCAGE in February 2021. All clinical partners have applied for and received ethics approval as of 2022. The process of exploration of clinical transplant database for variable extraction has started at all the centers in 2022. In total, 8120 patient records have been retrieved as of August 2023. The development and validation of CPMs is ongoing as of 2023. Conclusions: For the first time, we will (1) combine kidney transplant data from nephrology centers in Germany and Canada, (2) implement federated learning as a foundation to use such real-world transplant data as a basis for the training of CPMs in a privacy-preserving way, and (3) develop a learning software system to investigate population specifics, for example, to understand population heterogeneity, treatment specificities, and individual impact on selected posttransplant outcomes. International Registered Report Identifier (IRRID): DERR1-10.2196/48892 ", doi="10.2196/48892", url="https://www.researchprotocols.org/2023/1/e48892", url="http://www.ncbi.nlm.nih.gov/pubmed/38133915" } @Article{info:doi/10.2196/50017, author="Renner, Christopher and Reimer, Niklas and Christoph, Jan and Busch, Hauke and Metzger, Patrick and Boerries, Melanie and Ustjanzew, Arsenij and Boehm, Dominik and Unberath, Philipp", title="Extending cBioPortal for Therapy Recommendation Documentation in Molecular Tumor Boards: Development and Usability Study", journal="JMIR Med Inform", year="2023", month="Dec", day="11", volume="11", pages="e50017", keywords="molecular tumor board", keywords="documentation platform", keywords="usability evaluation", keywords="cBioPortal", keywords="precision medicine", keywords="genomics", keywords="health information interoperability", keywords="tumor", keywords="implementation", keywords="cancer", keywords="tool", keywords="platform", keywords="development", keywords="precision", keywords="use", keywords="user-centered", abstract="Background: In molecular tumor boards (MTBs), patients with rare or advanced cancers are discussed by a multidisciplinary team of health care professionals. Software support for MTBs is lacking; in particular, tools for preparing and documenting MTB therapy recommendations need to be developed. Objective: We aimed to implement an extension to cBioPortal to provide a tool for the documentation of therapy recommendations from MTB sessions in a secure and standardized manner. The developed extension should be embedded in the patient view of cBioPortal to enable easy documentation during MTB sessions. The resulting architecture for storing therapy recommendations should be integrable into various hospital information systems. Methods: On the basis of a requirements analysis and technology analysis for authentication techniques, a prototype was developed and iteratively refined through a user-centered development process. In conclusion, the tool was evaluated via a usability evaluation, including interviews, structured questionnaires, and the System Usability Scale. Results: The patient view of cBioPortal was extended with a new tab that enables users to document MTB sessions and therapy recommendations. The role-based access control was expanded to allow for a finer distinction among the rights to view, edit, and delete data. The usability evaluation showed overall good usability and a System Usability Scale score of 83.57. Conclusions: This study demonstrates how cBioPortal can be extended to not only visualize MTB patient data but also be used as a documentation platform for therapy recommendations. ", doi="10.2196/50017", url="https://medinform.jmir.org/2023/1/e50017", url="http://www.ncbi.nlm.nih.gov/pubmed/38079196" } @Article{info:doi/10.2196/44773, author="Peine, Arne and Gronholz, Maike and Seidl-Rathkopf, Katharina and Wolfram, Thomas and Hallawa, Ahmed and Reitz, Annika and Celi, Anthony Leo and Marx, Gernot and Martin, Lukas", title="Standardized Comparison of Voice-Based Information and Documentation Systems to Established Systems in Intensive Care: Crossover Study", journal="JMIR Med Inform", year="2023", month="Nov", day="28", volume="11", pages="e44773", keywords="artificial intelligence", keywords="documentation", keywords="ICU", keywords="intensive care medicine", keywords="speech-recognition", keywords="user perception", keywords="workload", abstract="Background: The medical teams in intensive care units (ICUs) spend increasing amounts of time at computer systems for data processing, input, and interpretation purposes. As each patient creates about 1000 data points per hour, the available information is abundant, making the interpretation difficult and time-consuming. This data flood leads to a decrease in time for evidence-based, patient-centered care. Information systems, such as patient data management systems (PDMSs), are increasingly used at ICUs. However, they often create new challenges arising from the increasing documentation burden. Objective: New concepts, such as artificial intelligence (AI)--based assistant systems, are hence introduced to the workflow to cope with these challenges. However, there is a lack of standardized, published metrics in order to compare the various data input and management systems in the ICU setting. The objective of this study is to compare established documentation and retrieval processes with newer methods, such as PDMSs and voice information and documentation systems (VIDSs). Methods: In this crossover study, we compare traditional, paper-based documentation systems with PDMSs and newer AI-based VIDSs in terms of performance (required time), accuracy, mental workload, and user experience in an intensive care setting. Performance is assessed on a set of 6 standardized, typical ICU tasks, ranging from documentation to medical interpretation. Results: A total of 60 ICU-experienced medical professionals participated in the study. The VIDS showed a statistically significant advantage compared to the other 2 systems. The tasks were completed significantly faster with the VIDS than with the PDMS (1-tailed t59=12.48; Cohen d=1.61; P<.001) or paper documentation (t59=20.41; Cohen d=2.63; P<.001). Significantly fewer errors were made with VIDS than with the PDMS (t59=3.45; Cohen d=0.45; P=.03) and paper-based documentation (t59=11.2; Cohen d=1.45; P<.001). The analysis of the mental workload of VIDS and PDMS showed no statistically significant difference (P=.06). However, the analysis of subjective user perception showed a statistically significant perceived benefit of the VIDS compared to the PDMS (P<.001) and paper documentation (P<.001). Conclusions: The results of this study show that the VIDS reduced error rate, documentation time, and mental workload regarding the set of 6 standardized typical ICU tasks. In conclusion, this indicates that AI-based systems such as the VIDS tested in this study have the potential to reduce this workload and improve evidence-based and safe patient care. ", doi="10.2196/44773", url="https://medinform.jmir.org/2023/1/e44773", url="http://www.ncbi.nlm.nih.gov/pubmed/38015593" } @Article{info:doi/10.2196/48933, author="Schopow, Nikolas and Osterhoff, Georg and Baur, David", title="Applications of the Natural Language Processing Tool ChatGPT in Clinical Practice: Comparative Study and Augmented Systematic Review", journal="JMIR Med Inform", year="2023", month="Nov", day="28", volume="11", pages="e48933", keywords="natural language processing", keywords="clinical practice", keywords="systematic review", keywords="healthcare", keywords="health care", keywords="GPT-3", keywords="GPT-4", keywords="large language models", keywords="artificial intelligence", keywords="machine learning", keywords="clinical decision support systems", keywords="language model", keywords="NLP", keywords="ChatGPT", keywords="systematic", keywords="review methods", keywords="review methodology", keywords="text", keywords="unstructured", keywords="extract", keywords="extraction", abstract="Background: This research integrates a comparative analysis of the performance of human researchers and OpenAI's ChatGPT in systematic review tasks and describes an assessment of the application of natural language processing (NLP) models in clinical practice through a review of 5 studies. Objective: This study aimed to evaluate the reliability between ChatGPT and human researchers in extracting key information from clinical articles, and to investigate the practical use of NLP in clinical settings as evidenced by selected studies. Methods: The study design comprised a systematic review of clinical articles executed independently by human researchers and ChatGPT. The level of agreement between and within raters for parameter extraction was assessed using the Fleiss and Cohen $\kappa$ statistics. Results: The comparative analysis revealed a high degree of concordance between ChatGPT and human researchers for most parameters, with less agreement for study design, clinical task, and clinical implementation. The review identified 5 significant studies that demonstrated the diverse applications of NLP in clinical settings. These studies' findings highlight the potential of NLP to improve clinical efficiency and patient outcomes in various contexts, from enhancing allergy detection and classification to improving quality metrics in psychotherapy treatments for veterans with posttraumatic stress disorder. Conclusions: Our findings underscore the potential of NLP models, including ChatGPT, in performing systematic reviews and other clinical tasks. Despite certain limitations, NLP models present a promising avenue for enhancing health care efficiency and accuracy. Future studies must focus on broadening the range of clinical applications and exploring the ethical considerations of implementing NLP applications in health care settings. ", doi="10.2196/48933", url="https://medinform.jmir.org/2023/1/e48933", url="http://www.ncbi.nlm.nih.gov/pubmed/38015610" } @Article{info:doi/10.2196/47833, author="Liu, Kui and Li, Linyi and Ma, Yifei and Jiang, Jun and Liu, Zhenhua and Ye, Zichen and Liu, Shuang and Pu, Chen and Chen, Changsheng and Wan, Yi", title="Machine Learning Models for Blood Glucose Level Prediction in Patients With Diabetes Mellitus: Systematic Review and Network Meta-Analysis", journal="JMIR Med Inform", year="2023", month="Nov", day="20", volume="11", pages="e47833", keywords="machine learning", keywords="diabetes", keywords="hypoglycemia", keywords="blood glucose", keywords="blood glucose management", abstract="Background: Machine learning (ML) models provide more choices to patients with diabetes mellitus (DM) to more properly manage blood glucose (BG) levels. However, because of numerous types of ML algorithms, choosing an appropriate model is vitally important. Objective: In a systematic review and network meta-analysis, this study aimed to comprehensively assess the performance of ML models in predicting BG levels. In addition, we assessed ML models used to detect and predict adverse BG (hypoglycemia) events by calculating pooled estimates of sensitivity and specificity. Methods: PubMed, Embase, Web of Science, and Institute of Electrical and Electronics Engineers Explore databases were systematically searched for studies on predicting BG levels and predicting or detecting adverse BG events using ML models, from inception to November 2022. Studies that assessed the performance of different ML models in predicting or detecting BG levels or adverse BG events of patients with DM were included. Studies with no derivation or performance metrics of ML models were excluded. The Quality Assessment of Diagnostic Accuracy Studies tool was applied to assess the quality of included studies. Primary outcomes were the relative ranking of ML models for predicting BG levels in different prediction horizons (PHs) and pooled estimates of the sensitivity and specificity of ML models in detecting or predicting adverse BG events. Results: In total, 46 eligible studies were included for meta-analysis. Regarding ML models for predicting BG levels, the means of the absolute root mean square error (RMSE) in a PH of 15, 30, 45, and 60 minutes were 18.88 (SD 19.71), 21.40 (SD 12.56), 21.27 (SD 5.17), and 30.01 (SD 7.23) mg/dL, respectively. The neural network model (NNM) showed the highest relative performance in different PHs. Furthermore, the pooled estimates of the positive likelihood ratio and the negative likelihood ratio of ML models were 8.3 (95\% CI 5.7-12.0) and 0.31 (95\% CI 0.22-0.44), respectively, for predicting hypoglycemia and 2.4 (95\% CI 1.6-3.7) and 0.37 (95\% CI 0.29-0.46), respectively, for detecting hypoglycemia. Conclusions: Statistically significant high heterogeneity was detected in all subgroups, with different sources of heterogeneity. For predicting precise BG levels, the RMSE increases with a rise in the PH, and the NNM shows the highest relative performance among all the ML models. Meanwhile, current ML models have sufficient ability to predict adverse BG events, while their ability to detect adverse BG events needs to be enhanced. Trial Registration: PROSPERO CRD42022375250; https://www.crd.york.ac.uk/prospero/display\_record.php?RecordID=375250 ", doi="10.2196/47833", url="https://medinform.jmir.org/2023/1/e47833", url="http://www.ncbi.nlm.nih.gov/pubmed/37983072" } @Article{info:doi/10.2196/50872, author="Berry, Michael and Taylor, Lauren and Huang, Zhuoran and Chwyl, Christina and Kerrigan, Stephanie and Forman, Evan", title="Automated Messaging Delivered Alongside Behavioral Treatment for Weight Loss: Qualitative Study", journal="JMIR Form Res", year="2023", month="Nov", day="6", volume="7", pages="e50872", keywords="mobile health technology", keywords="weight loss", keywords="tailored messaging", keywords="lifestyle modification", keywords="mobile health", keywords="mHealth", keywords="messaging", keywords="intervention", keywords="overweight", keywords="obesity", keywords="qualitative", keywords="thematic analysis", abstract="Background: Mobile health interventions for weight loss frequently use automated messaging. However, this intervention modality appears to have limited weight loss efficacy. Furthermore, data on users' subjective experiences while receiving automated messaging--based interventions for weight loss are scarce, especially for more advanced messaging systems providing users with individually tailored, data-informed feedback. Objective: The purpose of this study was to characterize the experiences of individuals with overweight or obesity who received automated messages for 6-12 months as part of a behavioral weight loss trial. Methods: Participants (n=40) provided Likert-scale ratings of messaging acceptability and completed a structured qualitative interview (n=39) focused on their experiences with the messaging system and generating suggestions for improvement. Interview data were analyzed using thematic analysis. Results: Participants found the messages most useful for summarizing goal progress and least useful for suggesting new behavioral strategies. Overall message acceptability was moderate (2.67 out of 5). From the interviews, 2 meta-themes emerged. Participants indicated that although the messages provided useful reminders of intervention goals and skills, they did not adequately capture their lived experiences while losing weight. Conclusions: Many participants found the automated messages insufficiently tailored to their personal weight loss experiences. Future studies should explore alternative methods for message tailoring (eg, allowing for a higher degree of participant input and interactivity) that may boost treatment engagement and efficacy. Trial Registration: ClinicalTrials.gov NCT05231824; https://clinicaltrials.gov/study/NCT05231824 ", doi="10.2196/50872", url="https://formative.jmir.org/2023/1/e50872", url="http://www.ncbi.nlm.nih.gov/pubmed/37930786" } @Article{info:doi/10.2196/46547, author="Liang, Xueping and Zhao, Juan and Chen, Yan and Bandara, Eranga and Shetty, Sachin", title="Architectural Design of a Blockchain-Enabled, Federated Learning Platform for Algorithmic Fairness in Predictive Health Care: Design Science Study", journal="J Med Internet Res", year="2023", month="Oct", day="30", volume="25", pages="e46547", keywords="fairness", keywords="federated learning", keywords="bias", keywords="health care", keywords="blockchain", keywords="software", keywords="proof of concept", keywords="implementation", keywords="privacy", abstract="Background: Developing effective and generalizable predictive models is critical for disease prediction and clinical decision-making, often requiring diverse samples to mitigate population bias and address algorithmic fairness. However, a major challenge is to retrieve learning models across multiple institutions without bringing in local biases and inequity, while preserving individual patients' privacy at each site. Objective: This study aims to understand the issues of bias and fairness in the machine learning process used in the predictive health care domain. We proposed a software architecture that integrates federated learning and blockchain to improve fairness, while maintaining acceptable prediction accuracy and minimizing overhead costs. Methods: We improved existing federated learning platforms by integrating blockchain through an iterative design approach. We used the design science research method, which involves 2 design cycles (federated learning for bias mitigation and decentralized architecture). The design involves a bias-mitigation process within the blockchain-empowered federated learning framework based on a novel architecture. Under this architecture, multiple medical institutions can jointly train predictive models using their privacy-protected data effectively and efficiently and ultimately achieve fairness in decision-making in the health care domain. Results: We designed and implemented our solution using the Aplos smart contract, microservices, Rahasak blockchain, and Apache Cassandra--based distributed storage. By conducting 20,000 local model training iterations and 1000 federated model training iterations across 5 simulated medical centers as peers in the Rahasak blockchain network, we demonstrated how our solution with an improved fairness mechanism can enhance the accuracy of predictive diagnosis. Conclusions: Our study identified the technical challenges of prediction biases faced by existing predictive models in the health care domain. To overcome these challenges, we presented an innovative design solution using federated learning and blockchain, along with the adoption of a unique distributed architecture for a fairness-aware system. We have illustrated how this design can address privacy, security, prediction accuracy, and scalability challenges, ultimately improving fairness and equity in the predictive health care domain. ", doi="10.2196/46547", url="https://www.jmir.org/2023/1/e46547", url="http://www.ncbi.nlm.nih.gov/pubmed/37902833" } @Article{info:doi/10.2196/47881, author="Amig{\'o}-Vega, Joaqu{\'i}n and Ottenhoff, C. Maarten and Verwoert, Maxime and Kubben, Pieter and Herff, Christian", title="The Easy and Versatile Neural Recording Platform (T-REX): Design and Development Study", journal="JMIR Neurotech", year="2023", month="Oct", day="24", volume="2", pages="e47881", keywords="recording", keywords="platform", keywords="flexible", keywords="data recording", keywords="neurotechnology", keywords="experiments", abstract="Background: Recording time in invasive neuroscientific research is limited and must be used as efficiently as possible. Time is often lost due to a long setup time and errors by the researcher, driven by the number of manually performed steps. Currently, recording solutions that automate experimental overhead are either custom-made by researchers or provided as a submodule in comprehensive neuroscientific toolboxes, and there are no platforms focused explicitly on recording. Objective: Minimizing the number of manual actions may reduce error rates and experimental overhead. However, automation should avoid reducing the flexibility of the system. Therefore, we developed a software package named T-REX (Standalone Recorder of Experiments) that specifically simplifies the recording of experiments while focusing on retaining flexibility. Methods: The proposed solution is a standalone webpage that the researcher can provide without an active internet connection. It is built using Bootstrap5 for the frontend and the Python package Flask for the backend. Only Python 3.7+ and a few dependencies are required to start the different experiments. Data synchronization is implemented using Lab Streaming Layer, an open-source networked synchronization ecosystem, enabling all major programming languages and toolboxes to be used for developing and executing the experiments. Additionally, T-REX runs on Windows, Linux, and macOS. Results: The system reduces experimental overhead during recordings to a minimum. Multiple experiments are centralized in a simple local web interface that reduces an experiment's setup, start, and stop to a single button press. In principle, any type of experiment, regardless of the scientific field (eg, behavioral or cognitive sciences, and electrophysiology), can be executed with the platform. T-REX includes an easy-to-use interface that can be adjusted to specific recording modalities, amplifiers, and participants. Because of the automated setup, easy recording, and easy-to-use interface, participants may even start and stop experiments by themselves, thus potentially providing data without the researcher's presence. Conclusions: We developed a new recording platform that is operating system independent, user friendly, and robust. We provide researchers with a solution that can greatly increase the time spent on recording instead of setting up (with its possible errors). ", doi="10.2196/47881", url="https://neuro.jmir.org/2023/1/e47881" } @Article{info:doi/10.2196/44332, author="Pfisterer, J. Kaylen and Lohani, Raima and Janes, Elizabeth and Ng, Denise and Wang, Dan and Bryant-Lukosius, Denise and Rendon, Ricardo and Berlin, Alejandro and Bender, Jacqueline and Brown, Ian and Feifer, Andrew and Gotto, Geoffrey and Saha, Shumit and Cafazzo, A. Joseph and Pham, Quynh", title="An Actionable Expert-System Algorithm to Support Nurse-Led Cancer Survivorship Care: Algorithm Development Study", journal="JMIR Cancer", year="2023", month="Oct", day="4", volume="9", pages="e44332", keywords="prostate cancer", keywords="patient-reported outcomes", keywords="nurse-led model of care", keywords="expert system", keywords="artificial intelligence--powered decision support", keywords="digital health", keywords="nursing", keywords="algorithm development", keywords="cancer treatment", keywords="AI", keywords="survivorship", keywords="cancer", abstract="Background: Comprehensive models of survivorship care are necessary to improve access to and coordination of care. New models of care provide the opportunity to address the complexity of physical and psychosocial problems and long-term health needs experienced by patients following cancer treatment. Objective: This paper presents our expert-informed, rules-based survivorship algorithm to build a nurse-led model of survivorship care to support men living with prostate cancer (PCa). The algorithm is called No Evidence of Disease (Ned) and supports timelier decision-making, enhanced safety, and continuity of care. Methods: An initial rule set was developed and refined through working groups with clinical experts across Canada (eg, nurse experts, physician experts, and scientists; n=20), and patient partners (n=3). Algorithm priorities were defined through a multidisciplinary consensus meeting with clinical nurse specialists, nurse scientists, nurse practitioners, urologic oncologists, urologists, and radiation oncologists (n=17). The system was refined and validated using the nominal group technique. Results: Four levels of alert classification were established, initiated by responses on the Expanded Prostate Cancer Index Composite for Clinical Practice survey, and mediated by changes in minimal clinically important different alert thresholds, alert history, and clinical urgency with patient autonomy influencing clinical acuity. Patient autonomy was supported through tailored education as a first line of response, and alert escalation depending on a patient-initiated request for a nurse consultation. Conclusions: The Ned algorithm is positioned to facilitate PCa nurse-led care models with a high nurse-to-patient ratio. This novel expert-informed PCa survivorship care algorithm contains a defined escalation pathway for clinically urgent symptoms while honoring patient preference. Though further validation is required through a pragmatic trial, we anticipate the Ned algorithm will support timelier decision-making and enhance continuity of care through the automation of more frequent automated checkpoints, while empowering patients to self-manage their symptoms more effectively than standard care. International Registered Report Identifier (IRRID): RR2-10.1136/bmjopen-2020-045806 ", doi="10.2196/44332", url="https://cancer.jmir.org/2023/1/e44332", url="http://www.ncbi.nlm.nih.gov/pubmed/37792435" } @Article{info:doi/10.2196/48425, author="Kim, Hyeonseong and Jeong, Seohyun and Hwang, Inae and Sung, Kiyoung and Moon, Woori and Shin, Min-Sup", title="Validation of a Brief Internet-Based Self-Report Measure of Maladaptive Personality and Interpersonal Schema: Confirmatory Factor Analysis", journal="Interact J Med Res", year="2023", month="Sep", day="29", volume="12", pages="e48425", keywords="maladaptive schema", keywords="measure of schema", keywords="self-report measure", keywords="internet-based measure", keywords="digital mental health care", keywords="interpersonal schema", abstract="Background: Existing digital mental health interventions mainly focus on the symptoms of specific mental disorders, but do not focus on Maladaptive Personalities and Interpersonal Schemas (MPISs). As an initial step toward considering personalities and schemas in intervention programs, there is a need for the development of tools for measuring core personality traits and interpersonal schemas known to cause psychological discomfort among potential users of digital mental health interventions. Thus, the MPIS was developed. Objective: The objectives of this study are to validate the MPIS by comparing 2 models of the MPIS factor structure and to understand the characteristics of the MPIS by assessing its correlations with other measures. Methods: Data were collected from 234 participants who were using web-based community sites in South Korea, including university students, graduate students, working professionals, and homemakers. All the data were gathered through web-based surveys. Confirmatory factor analysis was used to compare a single-factor model with a 5-factor model. Reliability and correlation analyses with other scales were performed. Results: The results of confirmatory factor analysis indicated that the 5-factor model ($\chi$2550=1278.1; Tucker-Lewis index=0.80; comparative fit index=0.81; and Root Mean Square Error of Approximation=0.07) was more suitable than the single-factor model ($\chi$2560=2341.5; Tucker-Lewis index=0.52; comparative fit index=0.54; and Root Mean Square Error of Approximation=0.11) for measuring maladaptive personality traits and interpersonal relationship patterns. The internal consistency of each factor of the MPIS was good (Cronbach $\alpha$=.71-.88), and the correlations with existing measures were statistically significant. The MPIS is a validated 35-item tool for measuring 5 essential personality traits and interpersonal schemas in adults aged 18-39 years. Conclusions: This study introduced the MPIS, a concise and effective questionnaire capable of measuring maladaptive personality traits and interpersonal relationship schemas. Through analysis, the MPIS was shown to reliably assess these psychological constructs and validate them. Its web-based accessibility and reduced item count make it a valuable tool for mental health assessment. Future applications include its integration into digital mental health care services, allowing easy web-based administration and aiding in the classification of psychological therapy programs based on the obtained results. Trial Registration: ClinicalTrials.gov NCT05952063; https://www.clinicaltrials.gov/study/NCT05952063 ", doi="10.2196/48425", url="https://www.i-jmr.org/2023/1/e48425", url="http://www.ncbi.nlm.nih.gov/pubmed/37773606" } @Article{info:doi/10.2196/49438, author="Wac, Marceli and Craddock, Ian and Chantziara, Sofia and Campbell, Tabitha and Santos-Rodriguez, Raul and Davidson, Brittany and McWilliams, Chris", title="Design and Evaluation of an Intensive Care Unit Dashboard Built in Response to the COVID-19 Pandemic: Semistructured Interview Study", journal="JMIR Hum Factors", year="2023", month="Sep", day="26", volume="10", pages="e49438", keywords="software engineering", keywords="dashboard", keywords="interactive display", keywords="COVID-19", keywords="intensive care", keywords="critical care", keywords="intensive care unit", keywords="ICU", keywords="human-centered design", keywords="participatory design", keywords="health", keywords="design", keywords="interview", keywords="electronic health record", keywords="EHR", keywords="electronic patient record", keywords="EPR", keywords="clinical information system", keywords="CIS", keywords="thematic analysis", abstract="Background: Dashboards and interactive displays are becoming increasingly prevalent in most health care settings and have the potential to streamline access to information, consolidate disparate data sources and deliver new insights. Our research focuses on intensive care units (ICUs) which are heavily instrumented, critical care environments that generate vast amounts of data and frequently require individualized support for each patient. Consequently, clinicians experience a high cognitive load, which can translate to suboptimal performance. The global COVID-19 pandemic exacerbated this problem by generating a large number of additional hospitalizations, which necessitated a new tool that would help manage ICUs' census. In a previous study, we interviewed clinicians at the University Hospitals Bristol and Weston National Health Service Foundation Trust to capture the requirements for bespoke dashboards that would alleviate this problem. Objective: This study aims to design, implement, and evaluate an ICU dashboard to allow for monitoring of the high volume of patients in need of critical care, particularly tailored to high-demand situations, such as those seen during the COVID-19 pandemic. Methods: Building upon the previously gathered requirements, we developed a dashboard, integrated it within the ICU of a National Health Service trust, and allowed all staff to access our tool. For evaluation purposes, participants were recruited and interviewed following a 25-day period during which they were able to use the dashboard clinically. The semistructured interviews followed a topic guide aimed at capturing the usability of the dashboard, supplemented with additional questions asked post hoc to probe themes established during the interview. Interview transcripts were analyzed using a thematic analysis framework that combined inductive and deductive approaches and integrated the Technology Acceptance Model. Results: A total of 10 participants with 4 different roles in the ICU (6 consultants, 2 junior doctors, 1 nurse, and 1 advanced clinical practitioner) participated in the interviews. Our analysis generated 4 key topics that prevailed across the data: our dashboard met the usability requirements of the participants and was found useful and intuitive; participants perceived that it impacted their delivery of patient care by improving the access to the information and better equipping them to do their job; the tool was used in a variety of ways and for different reasons and tasks; and there were barriers to integration of our dashboard into practice, including familiarity with existing systems, which stifled the adoption of our tool. Conclusions: Our findings show that the perceived utility of the dashboard had a positive impact on the clinicians' workflows in the ICU. Improving access to information translated into more efficient patient care and transformed some of the existing processes. The introduction of our tool was met with positive reception, but its integration during the COVID-19 pandemic limited its adoption into practice. ", doi="10.2196/49438", url="https://humanfactors.jmir.org/2023/1/e49438", url="http://www.ncbi.nlm.nih.gov/pubmed/37751239" } @Article{info:doi/10.2196/45510, author="Butler, Mark and D'Angelo, Stefani and Ahn, Heejoon and Chandereng, Thevaa and Miller, Danielle and Perrin, Alexandra and Romain, N. Anne-Marie and Scatoni, Ava and Friel, P. Ciaran and Cheung, Ying-Kuen and Davidson, W. Karina", title="A Series of Personalized Virtual Light Therapy Interventions for Fatigue: Feasibility Randomized Crossover Trial for N-of-1 Treatment", journal="JMIR Form Res", year="2023", month="Sep", day="18", volume="7", pages="e45510", keywords="virtual light therapy interventions", keywords="fatigue", keywords="light therapy", keywords="primary care", keywords="feasibility", keywords="acceptability", keywords="effectiveness", keywords="usability", keywords="seasonal affective disorder", keywords="phototherapy", keywords="photoradiation", keywords="photochemotherapy", keywords="color therapy", keywords="heliotherapy", keywords="photothermal therapy", keywords="UV therapy", keywords="chromotherapy", keywords="color light therapy", keywords="mobile phone", abstract="Background: Fatigue is one of the most common symptoms treated in primary care and can lead to deficits in mental health and functioning. Light therapy can be an effective treatment for symptoms of fatigue; however, the feasibility, scalability, and individual-level heterogeneity of light therapy for fatigue are unknown. Objective: This study aimed to evaluate the feasibility, acceptability, and effectiveness of a series of personalized (N-of-1) interventions for the virtual delivery of bright light (BL) therapy and dim light (DL) therapy versus usual care (UC) treatment for fatigue in 60 participants. Methods: Participants completed satisfaction surveys comprising the System Usability Scale (SUS) and items assessing satisfaction with the components of the personalized trial. Symptoms of fatigue were measured using the Patient-Reported Outcomes Measurement Information System (PROMIS) daily, PROMIS weekly, and ecological momentary assessment (EMA) questionnaires delivered 3 times daily. Comparisons of fatigue between the BL, DL, and UC treatment periods were conducted using generalized linear mixed model analyses between participants and generalized least squares analyses within individual participants. Results: Participants rated the usability of the personalized trial as acceptable (average SUS score=78.9, SD 15.6), and 92\% (49/53) of those who completed satisfaction surveys stated that they would recommend the trial to others. The levels of fatigue symptoms measured using the PROMIS daily fatigue measure were lower or improved in the BL (B=?1.63, 95\% CI ?2.63 to ?0.63) and DL (B=?1.44, 95\% CI ?2.50 to ?0.38) periods relative to UC. The treatment effects of BL and DL on the PROMIS daily measure varied among participants. Similar findings were demonstrated for the PROMIS weekly and EMA measures of fatigue symptoms. Conclusions: The participant scores on the SUS and satisfaction surveys suggest that personalized N-of-1 trials of light therapy for fatigue symptoms are both feasible and acceptable. Both interventions produced significant (P<.05) reductions in participant-reported PROMIS and EMA fatigue symptoms relative to UC. However, the heterogeneity of these treatment effects across participants indicated that the effect of light therapy was not uniform. This heterogeneity along with high ratings of usability and satisfaction support the use of personalized N-of-1 research designs in evaluating the effect of light therapy on fatigue for each patient. Furthermore, the results of this trial provide additional support for the use of a series of personalized N-of-1 research trials. Trial Registration: ClinicalTrials.gov NCT04707846; https://clinicaltrials.gov/ct2/show/NCT04707846 ", doi="10.2196/45510", url="https://formative.jmir.org/2023/1/e45510", url="http://www.ncbi.nlm.nih.gov/pubmed/37721795" } @Article{info:doi/10.2196/42047, author="Fernandes, J. Glenn and Choi, Arthur and Schauer, Michael Jacob and Pfammatter, F. Angela and Spring, J. Bonnie and Darwiche, Adnan and Alshurafa, I. Nabil", title="An Explainable Artificial Intelligence Software Tool for Weight Management Experts (PRIMO): Mixed Methods Study", journal="J Med Internet Res", year="2023", month="Sep", day="6", volume="25", pages="e42047", keywords="explainable artificial intelligence", keywords="explainable AI", keywords="machine learning", keywords="ML", keywords="interpretable ML", keywords="random forest", keywords="decision-making", keywords="weight loss prediction", keywords="mobile phone", abstract="Background: Predicting the likelihood of success of weight loss interventions using machine learning (ML) models may enhance intervention effectiveness by enabling timely and dynamic modification of intervention components for nonresponders to treatment. However, a lack of understanding and trust in these ML models impacts adoption among weight management experts. Recent advances in the field of explainable artificial intelligence enable the interpretation of ML models, yet it is unknown whether they enhance model understanding, trust, and adoption among weight management experts. Objective: This study aimed to build and evaluate an ML model that can predict 6-month weight loss success (ie, ?7\% weight loss) from 5 engagement and diet-related features collected over the initial 2 weeks of an intervention, to assess whether providing ML-based explanations increases weight management experts' agreement with ML model predictions, and to inform factors that influence the understanding and trust of ML models to advance explainability in early prediction of weight loss among weight management experts. Methods: We trained an ML model using the random forest (RF) algorithm and data from a 6-month weight loss intervention (N=419). We leveraged findings from existing explainability metrics to develop Prime Implicant Maintenance of Outcome (PRIMO), an interactive tool to understand predictions made by the RF model. We asked 14 weight management experts to predict hypothetical participants' weight loss success before and after using PRIMO. We compared PRIMO with 2 other explainability methods, one based on feature ranking and the other based on conditional probability. We used generalized linear mixed-effects models to evaluate participants' agreement with ML predictions and conducted likelihood ratio tests to examine the relationship between explainability methods and outcomes for nested models. We conducted guided interviews and thematic analysis to study the impact of our tool on experts' understanding and trust in the model. Results: Our RF model had 81\% accuracy in the early prediction of weight loss success. Weight management experts were significantly more likely to agree with the model when using PRIMO ($\chi$2=7.9; P=.02) compared with the other 2 methods with odds ratios of 2.52 (95\% CI 0.91-7.69) and 3.95 (95\% CI 1.50-11.76). From our study, we inferred that our software not only influenced experts' understanding and trust but also impacted decision-making. Several themes were identified through interviews: preference for multiple explanation types, need to visualize uncertainty in explanations provided by PRIMO, and need for model performance metrics on similar participant test instances. Conclusions: Our results show the potential for weight management experts to agree with the ML-based early prediction of success in weight loss treatment programs, enabling timely and dynamic modification of intervention components to enhance intervention effectiveness. Our findings provide methods for advancing the understandability and trust of ML models among weight management experts. ", doi="10.2196/42047", url="https://www.jmir.org/2023/1/e42047", url="http://www.ncbi.nlm.nih.gov/pubmed/37672333" } @Article{info:doi/10.2196/47284, author="Bostr{\o}m, Katrine and B{\o}r{\o}sund, Elin and Eide, Hilde and Varsi, Cecilie and Kristjansdottir, Birna {\'O}l{\"o}f and Schreurs, G. Karlein M. and Waxenberg, B. Lori and Weiss, E. Karen and Morrison, J. Eleshia and Stavenes St{\o}le, Hanne and Cvancarova Sm{\aa}stuen, Milada and Stubhaug, Audun and Solberg Nes, Lise", title="Short-Term Findings From Testing EPIO, a Digital Self-Management Program for People Living With Chronic Pain: Randomized Controlled Trial", journal="J Med Internet Res", year="2023", month="Aug", day="25", volume="25", pages="e47284", keywords="chronic pain", keywords="self-management", keywords="digital health", keywords="efficacy", keywords="cognitive behavioral therapy", keywords="acceptance and commitment therapy", abstract="Background: Chronic pain conditions involve numerous physical and psychological challenges, and while psychosocial self-management interventions can be of benefit for people living with chronic pain, such in-person treatment is not always accessible. Digital self-management approaches could improve this disparity, potentially bolstering outreach and providing easy, relatively low-cost access to pain self-management interventions. Objective: This randomized controlled trial aimed to evaluate the short-term efficacy of EPIO (ie, inspired by the Greek goddess for the soothing of pain, Epione), a digital self-management intervention, for people living with chronic pain. Methods: Patients (N=266) were randomly assigned to either the EPIO intervention (n=132) or a care-as-usual control group (n=134). Outcome measures included pain interference (Brief Pain Inventory; primary outcome measure), anxiety and depression (Hospital Anxiety and Depression Scale), self-regulatory fatigue (Self-Regulatory Fatigue 18 scale), health-related quality of life (SF-36 Short Form Health Survey), pain catastrophizing (Pain Catastrophizing Scale), and pain acceptance (Chronic Pain Acceptance Questionnaire). Linear regression models used change scores as the dependent variables. Results: The participants were primarily female (210/259, 81.1\%), with a median age of 49 (range 22-78) years and a variety of pain conditions. Analyses (n=229) after 3 months revealed no statistically significant changes for the primary outcome of pain interference (P=.84), but significant reductions in the secondary outcomes of depression (mean difference ?0.90; P=.03) and self-regulatory fatigue (mean difference ?2.76; P=.008) in favor of the intervention group. No other statistically significant changes were observed at 3 months (all P>.05). Participants described EPIO as useful (ie, totally agree or agree; 95/109, 87.2\%) and easy to use (101/109, 92.7\%), with easily understandable exercises (106/109, 97.2\%). Conclusions: Evidence-informed, user-centered digital pain self-management interventions such as EPIO may have the potential to effectively support self-management and improve psychological functioning in the form of reduced symptoms of depression and improved capacity to regulate thoughts, feelings, and behavior for people living with chronic pain. Trial Registration: ClinicalTrials.gov NCT03705104; https://clinicaltrials.gov/ct2/show/NCT03705104 ", doi="10.2196/47284", url="https://www.jmir.org/2023/1/e47284", url="http://www.ncbi.nlm.nih.gov/pubmed/37624622" } @Article{info:doi/10.2196/46322, author="Liu, Leibo and Perez-Concha, Oscar and Nguyen, Anthony and Bennett, Vicki and Blake, Victoria and Gallego, Blanca and Jorm, Louisa", title="Web-Based Application Based on Human-in-the-Loop Deep Learning for Deidentifying Free-Text Data in Electronic Medical Records: Development and Usability Study", journal="Interact J Med Res", year="2023", month="Aug", day="25", volume="12", pages="e46322", keywords="web-based system", keywords="deidentification", keywords="electronic medical records", keywords="deep learning", keywords="narrative free text", keywords="human in the loop", keywords="free text", keywords="unstructured data", keywords="electronic health records", keywords="machine learning", abstract="Background: The narrative free-text data in electronic medical records (EMRs) contain valuable clinical information for analysis and research to inform better patient care. However, the release of free text for secondary use is hindered by concerns surrounding personally identifiable information (PII), as protecting individuals' privacy is paramount. Therefore, it is necessary to deidentify free text to remove PII. Manual deidentification is a time-consuming and labor-intensive process. Numerous automated deidentification approaches and systems have been attempted to overcome this challenge over the past decade. Objective: We sought to develop an accurate, web-based system deidentifying free text (DEFT), which can be readily and easily adopted in real-world settings for deidentification of free text in EMRs. The system has several key features including a simple and task-focused web user interface, customized PII types, use of a state-of-the-art deep learning model for tagging PII from free text, preannotation by an interactive learning loop, rapid manual annotation with autosave, support for project management and team collaboration, user access control, and central data storage. Methods: DEFT comprises frontend and backend modules and communicates with central data storage through a filesystem path access. The frontend web user interface provides end users with a user-friendly workspace for managing and annotating free text. The backend module processes the requests from the frontend and performs relevant persistence operations. DEFT manages the deidentification workflow as a project, which can contain one or more data sets. Customized PII types and user access control can also be configured. The deep learning model is based on a Bidirectional Long Short-Term Memory-Conditional Random Field (BiLSTM-CRF) with RoBERTa as the word embedding layer. The interactive learning loop is further integrated into DEFT to speed up the deidentification process and increase its performance over time. Results: DEFT has many advantages over existing deidentification systems in terms of its support for project management, user access control, data management, and an interactive learning process. Experimental results from DEFT on the 2014 i2b2 data set obtained the highest performance compared to 5 benchmark models in terms of microaverage strict entity--level recall and F1-scores of 0.9563 and 0.9627, respectively. In a real-world use case of deidentifying clinical notes, extracted from 1 referral hospital in Sydney, New South Wales, Australia, DEFT achieved a high microaverage strict entity--level F1-score of 0.9507 on a corpus of 600 annotated clinical notes. Moreover, the manual annotation process with preannotation demonstrated a 43\% increase in work efficiency compared to the process without preannotation. Conclusions: DEFT is designed for health domain researchers and data custodians to easily deidentify free text in EMRs. DEFT supports an interactive learning loop and end users with minimal technical knowledge can perform the deidentification work with only a shallow learning curve. ", doi="10.2196/46322", url="https://www.i-jmr.org/2023/1/e46322", url="http://www.ncbi.nlm.nih.gov/pubmed/37624624" } @Article{info:doi/10.2196/45651, author="Yang, Dan and Su, Zihan and Mu, Runqing and Diao, Yingying and Zhang, Xin and Liu, Yusi and Wang, Shuo and Wang, Xu and Zhao, Lei and Wang, Hongyi and Zhao, Min", title="Effects of Using Different Indirect Techniques on the Calculation of Reference Intervals: Observational Study", journal="J Med Internet Res", year="2023", month="Jul", day="17", volume="25", pages="e45651", keywords="comparative study", keywords="data transformation", keywords="indirect method", keywords="outliers", keywords="reference interval", keywords="clinical decision-making", keywords="complete blood count", keywords="red blood cells", keywords="white blood cells", keywords="platelets", keywords="laboratory", keywords="clinical", abstract="Background: Reference intervals (RIs) play an important role in clinical decision-making. However, due to the time, labor, and financial costs involved in establishing RIs using direct means, the use of indirect methods, based on big data previously obtained from clinical laboratories, is getting increasing attention. Different indirect techniques combined with different data transformation methods and outlier removal might cause differences in the calculation of RIs. However, there are few systematic evaluations of this. Objective: This study used data derived from direct methods as reference standards and evaluated the accuracy of combinations of different data transformation, outlier removal, and indirect techniques in establishing complete blood count (CBC) RIs for large-scale data. Methods: The CBC data of populations aged ?18 years undergoing physical examination from January 2010 to December 2011 were retrieved from the First Affiliated Hospital of China Medical University in northern China. After exclusion of repeated individuals, we performed parametric, nonparametric, Hoffmann, Bhattacharya, and truncation points and Kolmogorov--Smirnov distance (kosmic) indirect methods, combined with log or BoxCox transformation, and Reed--Dixon, Tukey, and iterative mean (3SD) outlier removal methods in order to derive the RIs of 8 CBC parameters and compared the results with those directly and previously established. Furthermore, bias ratios (BRs) were calculated to assess which combination of indirect technique, data transformation pattern, and outlier removal method is preferrable. Results: Raw data showed that the degrees of skewness of the white blood cell (WBC) count, platelet (PLT) count, mean corpuscular hemoglobin (MCH), mean corpuscular hemoglobin concentration (MCHC), and mean corpuscular volume (MCV) were much more obvious than those of other CBC parameters. After log or BoxCox transformation combined with Tukey or iterative mean (3SD) processing, the distribution types of these data were close to Gaussian distribution. Tukey-based outlier removal yielded the maximum number of outliers. The lower-limit bias of WBC (male), PLT (male), hemoglobin (HGB; male), MCH (male/female), and MCV (female) was greater than that of the corresponding upper limit for more than half of 30 indirect methods. Computational indirect choices of CBC parameters for males and females were inconsistent. The RIs of MCHC established by the direct method for females were narrow. For this, the kosmic method was markedly superior, which contrasted with the RI calculation of CBC parameters with high |BR| qualification rates for males. Among the top 10 methodologies for the WBC count, PLT count, HGB, MCV, and MCHC with a high-BR qualification rate among males, the Bhattacharya, Hoffmann, and parametric methods were superior to the other 2 indirect methods. Conclusions: Compared to results derived by the direct method, outlier removal methods and indirect techniques markedly influence the final RIs, whereas data transformation has negligible effects, except for obviously skewed data. Specifically, the outlier removal efficiency of Tukey and iterative mean (3SD) methods is almost equivalent. Furthermore, the choice of indirect techniques depends more on the characteristics of the studied analyte itself. This study provides scientific evidence for clinical laboratories to use their previous data sets to establish RIs. ", doi="10.2196/45651", url="https://www.jmir.org/2023/1/e45651", url="http://www.ncbi.nlm.nih.gov/pubmed/37459170" } @Article{info:doi/10.2196/44331, author="Nelson, Walter and Khanna, Nityan and Ibrahim, Mohamed and Fyfe, Justin and Geiger, Maxwell and Edwards, Keith and Petch, Jeremy", title="Optimizing Patient Record Linkage in a Master Patient Index Using Machine Learning: Algorithm Development and Validation", journal="JMIR Form Res", year="2023", month="Jun", day="29", volume="7", pages="e44331", keywords="medical record linkage", keywords="electronic health records", keywords="medical record systems", keywords="computerized", keywords="machine learning", keywords="quality of care", keywords="health care system", keywords="open-source software", keywords="Bayesian optimization", keywords="pilot", keywords="data linkage", keywords="master patient index", keywords="master index", keywords="record link", keywords="matching algorithm", keywords="FEBRL", abstract="Background: To provide quality care, modern health care systems must match and link data about the same patient from multiple sources, a function often served by master patient index (MPI) software. Record linkage in the MPI is typically performed manually by health care providers, guided by automated matching algorithms. These matching algorithms must be configured in advance, such as by setting the weights of patient attributes, usually by someone with knowledge of both the matching algorithm and the patient population being served. Objective: We aimed to develop and evaluate a machine learning--based software tool, which automatically configures a patient matching algorithm by learning from pairs of patient records previously linked by humans already present in the database. Methods: We built a free and open-source software tool to optimize record linkage algorithm parameters based on historical record linkages. The tool uses Bayesian optimization to identify the set of configuration parameters that lead to optimal matching performance in a given patient population, by learning from prior record linkages by humans. The tool is written assuming only the existence of a minimal HTTP application programming interface (API), and so is agnostic to the choice of MPI software, record linkage algorithm, and patient population. As a proof of concept, we integrated our tool with Sant{\'e}MPI, an open-source MPI. We validated the tool using several synthetic patient populations in Sant{\'e}MPI by comparing the performance of the optimized configuration in held-out data to Sant{\'e}MPI's default matching configuration using sensitivity and specificity. Results: The machine learning--optimized configurations correctly detect over 90\% of true record linkages as definite matches in all data sets, with 100\% specificity and positive predictive value in all data sets, whereas the baseline detects none. In the largest data set examined, the baseline matching configuration detects possible record linkages with a sensitivity of 90.2\% (95\% CI 88.4\%-92.0\%) and specificity of 100\%. By comparison, the machine learning--optimized matching configuration attains a sensitivity of 100\%, with a decreased specificity of 95.9\% (95\% CI 95.9\%-96.0\%). We report significant gains in sensitivity in all data sets examined, at the cost of only marginally decreased specificity. The configuration optimization tool, data, and data set generator have been made freely available. Conclusions: Our machine learning software tool can be used to significantly improve the performance of existing record linkage algorithms, without knowledge of the algorithm being used or specific details of the patient population being served. ", doi="10.2196/44331", url="https://formative.jmir.org/2023/1/e44331", url="http://www.ncbi.nlm.nih.gov/pubmed/37384382" } @Article{info:doi/10.2196/43633, author="Brankovic, Aida and Hendrie, A. Gilly and Baird, L. Danielle and Khanna, Sankalp", title="Predicting Disengagement to Better Support Outcomes in a Web-Based Weight Loss Program Using Machine Learning Models: Cross-Sectional Study", journal="J Med Internet Res", year="2023", month="Jun", day="26", volume="25", pages="e43633", keywords="web-based weight loss program", keywords="predicting engagement", keywords="machine learning--driven intervention", keywords="machine learning", keywords="artificial intelligence", abstract="Background: Engagement is key to interventions that achieve successful behavior change and improvements in health. There is limited literature on the application of predictive machine learning (ML) models to data from commercially available weight loss programs to predict disengagement. Such data could help participants achieve their goals. Objective: This study aimed to use explainable ML to predict the risk of member disengagement week by week over 12 weeks on a commercially available web-based weight loss program. Methods: Data were available from 59,686 adults who participated in the weight loss program between October 2014 and September 2019. Data included year of birth, sex, height, weight, motivation to join the program, use statistics (eg, weight entries, entries into the food diary, views of the menu, and program content), program type, and weight loss. Random forest, extreme gradient boosting, and logistic regression with L1 regularization models were developed and validated using a 10-fold cross-validation approach. In addition, temporal validation was performed on a test cohort of 16,947 members who participated in the program between April 2018 and September 2019, and the remaining data were used for model development. Shapley values were used to identify globally relevant features and explain individual predictions. Results: The average age of the participants was 49.60 (SD 12.54) years, the average starting BMI was 32.43 (SD 6.19), and 81.46\% (39,594/48,604) of the participants were female. The class distributions (active and inactive members) changed from 39,369 and 9235 in week 2 to 31,602 and 17,002 in week 12, respectively. With 10-fold-cross-validation, extreme gradient boosting models had the best predictive performance, which ranged from 0.85 (95\% CI 0.84-0.85) to 0.93 (95\% CI 0.93-0.93) for area under the receiver operating characteristic curve and from 0.57 (95\% CI 0.56-0.58) to 0.95 (95\% CI 0.95-0.96) for area under the precision-recall curve (across 12 weeks of the program). They also presented a good calibration. Results obtained with temporal validation ranged from 0.51 to 0.95 for area under a precision-recall curve and 0.84 to 0.93 for area under the receiver operating characteristic curve across the 12 weeks. There was a considerable improvement in area under a precision-recall curve of 20\% in week 3 of the program. On the basis of the computed Shapley values, the most important features for predicting disengagement in the following week were those related to the total activity on the platform and entering a weight in the previous weeks. Conclusions: This study showed the potential of applying ML predictive algorithms to help predict and understand participants' disengagement with a web-based weight loss program. Given the association between engagement and health outcomes, these findings can prove valuable in providing better support to individuals to enhance their engagement and potentially achieve greater weight loss. ", doi="10.2196/43633", url="https://www.jmir.org/2023/1/e43633", url="http://www.ncbi.nlm.nih.gov/pubmed/37358890" } @Article{info:doi/10.2196/44042, author="Sigle, Manuel and Berliner, Leon and Richter, Erich and van Iersel, Mart and Gorgati, Eleonora and Hubloue, Ives and Bamberg, Maximilian and Grasshoff, Christian and Rosenberger, Peter and Wunderlich, Robert", title="Development of an Anticipatory Triage-Ranking Algorithm Using Dynamic Simulation of the Expected Time Course of Patients With Trauma: Modeling and Simulation Study", journal="J Med Internet Res", year="2023", month="Jun", day="15", volume="25", pages="e44042", keywords="novel triage algorithm", keywords="patient with trauma", keywords="dynamic patient simulation", keywords="mathematic model", keywords="artificial patient database", keywords="semisupervised generation of patients with artificial trauma", keywords="high-dimensional analysis of patient database", keywords="Germany", keywords="algorithm", keywords="trauma", keywords="proof-of-concept", keywords="model", keywords="emergency", keywords="triage", keywords="simulation", keywords="urgency", keywords="urgent", keywords="severity", keywords="rank", keywords="vital sign", abstract="Background: In cases of terrorism, disasters, or mass casualty incidents, far-reaching life-and-death decisions about prioritizing patients are currently made using triage algorithms that focus solely on the patient's current health status rather than their prognosis, thus leaving a fatal gap of patients who are under- or overtriaged. Objective: The aim of this proof-of-concept study is to demonstrate a novel approach for triage that no longer classifies patients into triage categories but ranks their urgency according to the anticipated survival time without intervention. Using this approach, we aim to improve the prioritization of casualties by respecting individual injury patterns and vital signs, survival likelihoods, and the availability of rescue resources. Methods: We designed a mathematical model that allows dynamic simulation of the time course of a patient's vital parameters, depending on individual baseline vital signs and injury severity. The 2 variables were integrated using the well-established Revised Trauma Score (RTS) and the New Injury Severity Score (NISS). An artificial patient database of unique patients with trauma (N=82,277) was then generated and used for analysis of the time course modeling and triage classification. Comparative performance analysis of different triage algorithms was performed. In addition, we applied a sophisticated, state-of-the-art clustering method using the Gower distance to visualize patient cohorts at risk for mistriage. Results: The proposed triage algorithm realistically modeled the time course of a patient's life, depending on injury severity and current vital parameters. Different casualties were ranked by their anticipated time course, reflecting their priority for treatment. Regarding the identification of patients at risk for mistriage, the model outperformed the Simple Triage And Rapid Treatment's triage algorithm but also exclusive stratification by the RTS or the NISS. Multidimensional analysis separated patients with similar patterns of injuries and vital parameters into clusters with different triage classifications. In this large-scale analysis, our algorithm confirmed the previously mentioned conclusions during simulation and descriptive analysis and underlined the significance of this novel approach to triage. Conclusions: The findings of this study suggest the feasibility and relevance of our model, which is unique in terms of its ranking system, prognosis outline, and time course anticipation. The proposed triage-ranking algorithm could offer an innovative triage method with a wide range of applications in prehospital, disaster, and emergency medicine, as well as simulation and research. ", doi="10.2196/44042", url="https://www.jmir.org/2023/1/e44042", url="http://www.ncbi.nlm.nih.gov/pubmed/37318826" } @Article{info:doi/10.2196/44567, author="Kusejko, Katharina and Smith, Daniel and Scherrer, Alexandra and Paioni, Paolo and Kohns Vasconcelos, Malte and Aebi-Popp, Karoline and Kouyos, D. Roger and G{\"u}nthard, F. Huldrych and Kahlert, R. Christian and ", title="Migrating a Well-Established Longitudinal Cohort Database From Oracle SQL to Research Electronic Data Entry (REDCap): Data Management Research and Design Study", journal="JMIR Form Res", year="2023", month="May", day="31", volume="7", pages="e44567", keywords="REDCap", keywords="cohort study", keywords="data collection", keywords="electronic case report forms", keywords="eCRF", keywords="software", keywords="digital solution", keywords="electronic data entry", keywords="HIV", abstract="Background: Providing user-friendly electronic data collection tools for large multicenter studies is key for obtaining high-quality research data. Research Electronic Data Capture (REDCap) is a software solution developed for setting up research databases with integrated graphical user interfaces for electronic data entry. The Swiss Mother and Child HIV Cohort Study (MoCHiV) is a longitudinal cohort study with around 2 million data entries dating back to the early 1980s. Until 2022, data collection in MoCHiV was paper-based. Objective: The objective of this study was to provide a user-friendly graphical interface for electronic data entry for physicians and study nurses reporting MoCHiV data. Methods: MoCHiV collects information on obstetric events among women living with HIV and children born to mothers living with HIV. Until 2022, MoCHiV data were stored in an Oracle SQL relational database. In this project, R and REDCap were used to develop an electronic data entry platform for MoCHiV with migration of already collected data. Results: The key steps for providing an electronic data entry option for MoCHiV were (1) design, (2) data cleaning and formatting, (3) migration and compliance, and (4) add-on features. In the first step, the database structure was defined in REDCap, including the specification of primary and foreign keys, definition of study variables, and the hierarchy of questions (termed ``branching logic''). In the second step, data stored in Oracle were cleaned and formatted to adhere to the defined database structure. Systematic data checks ensured compliance to all branching logic and levels of categorical variables. REDCap-specific variables and numbering of repeated events for enabling a relational data structure in REDCap were generated using R. In the third step, data were imported to REDCap and then systematically compared to the original data. In the last step, add-on features, such as data access groups, redirections, and summary reports, were integrated to facilitate data entry in the multicenter MoCHiV study. Conclusions: By combining different software tools---Oracle SQL, R, and REDCap---and building a systematic pipeline for data cleaning, formatting, and comparing, we were able to migrate a multicenter longitudinal cohort study from Oracle SQL to REDCap. REDCap offers a flexible way for developing customized study designs, even in the case of longitudinal studies with different study arms (ie, obstetric events, women, and mother-child pairs). However, REDCap does not offer built-in tools for preprocessing large data sets before data import. Additional software is needed (eg, R) for data formatting and cleaning to achieve the predefined REDCap data structure. ", doi="10.2196/44567", url="https://formative.jmir.org/2023/1/e44567", url="http://www.ncbi.nlm.nih.gov/pubmed/37256686" } @Article{info:doi/10.2196/43871, author="Benis, Arriel and Haghi, Mostafa and Deserno, M. Thomas and Tamburis, Oscar", title="One Digital Health Intervention for Monitoring Human and Animal Welfare in Smart Cities: Viewpoint and Use Case", journal="JMIR Med Inform", year="2023", month="May", day="19", volume="11", pages="e43871", keywords="One Health", keywords="Digital Health", keywords="One Digital Health", keywords="accident and emergency informatics", keywords="eHealth", keywords="informatics", keywords="medicine", keywords="veterinary medicine", keywords="environmental monitoring", keywords="education", keywords="patient engagement", keywords="citizen science", keywords="data science", keywords="pets", keywords="human-animal bond", keywords="intervention", keywords="ambulatory monitoring", keywords="health monitoring", keywords="Internet of Things", keywords="smart environment", keywords="mobile phone", doi="10.2196/43871", url="https://medinform.jmir.org/2023/1/e43871", url="http://www.ncbi.nlm.nih.gov/pubmed/36305540" } @Article{info:doi/10.2196/45156, author="Peretz, Gal and Taylor, Barr C. and Ruzek, I. Josef and Jefroykin, Samuel and Sadeh-Sharvit, Shiri", title="Machine Learning Model to Predict Assignment of Therapy Homework in Behavioral Treatments: Algorithm Development and Validation", journal="JMIR Form Res", year="2023", month="May", day="15", volume="7", pages="e45156", keywords="deep learning", keywords="empirically-based practice", keywords="natural language processing", keywords="behavioral treatment", keywords="machine learning", keywords="homework", keywords="treatment fidelity", keywords="artificial intelligence", keywords="intervention", keywords="therapy", keywords="mental health", keywords="mHealth", abstract="Background: Therapeutic homework is a core element of cognitive and behavioral interventions, and greater homework compliance predicts improved treatment outcomes. To date, research in this area has relied mostly on therapists' and clients' self-reports or studies carried out in academic settings, and there is little knowledge on how homework is used as a treatment intervention in routine clinical care. Objective: This study tested whether a machine learning (ML) model using natural language processing could identify homework assignments in behavioral health sessions. By leveraging this technology, we sought to develop a more objective and accurate method for detecting the presence of homework in therapy sessions. Methods: We analyzed 34,497 audio-recorded treatment sessions provided in 8 behavioral health care programs via an artificial intelligence (AI) platform designed for therapy provided by Eleos Health. Therapist and client utterances were captured and analyzed via the AI platform. Experts reviewed the homework assigned in 100 sessions to create classifications. Next, we sampled 4000 sessions and labeled therapist-client microdialogues that suggested homework to train an unsupervised sentence embedding model. This model was trained on 2.83 million therapist-client microdialogues. Results: An analysis of 100 random sessions found that homework was assigned in 61\% (n=61) of sessions, and in 34\% (n=21) of these cases, more than one homework assignment was provided. Homework addressed practicing skills (n=34, 37\%), taking action (n=26, 28.5\%), journaling (n=17, 19\%), and learning new skills (n=14, 15\%). Our classifier reached a 72\% F1-score, outperforming state-of-the-art ML models. The therapists reviewing the microdialogues agreed in 90\% (n=90) of cases on whether or not homework was assigned. Conclusions: The findings of this study demonstrate the potential of ML and natural language processing to improve the detection of therapeutic homework assignments in behavioral health sessions. Our findings highlight the importance of accurately capturing homework in real-world settings and the potential for AI to support therapists in providing evidence-based care and increasing fidelity with science-backed interventions. By identifying areas where AI can facilitate homework assignments and tracking, such as reminding therapists to prescribe homework and reducing the charting associated with homework, we can ultimately improve the overall quality of behavioral health care. Additionally, our approach can be extended to investigate the impact of homework assignments on therapeutic outcomes, providing insights into the effectiveness of specific types of homework. ", doi="10.2196/45156", url="https://formative.jmir.org/2023/1/e45156", url="http://www.ncbi.nlm.nih.gov/pubmed/37184927" } @Article{info:doi/10.2196/44644, author="Jing, Xia and Patel, L. Vimla and Cimino, J. James and Shubrook, H. Jay and Zhou, Yuchun and Draghi, N. Brooke and Ernst, A. Mytchell and Liu, Chang and De Lacalle, Sonsoles", title="A Visual Analytic Tool (VIADS) to Assist the Hypothesis Generation Process in Clinical Research: Mixed Methods Usability Study", journal="JMIR Hum Factors", year="2023", month="Apr", day="27", volume="10", pages="e44644", keywords="usability", keywords="VIADS", keywords="data-driven hypothesis generation", keywords="visualization", keywords="clinical research", keywords="SUS", keywords="mixed methods study", abstract="Background: Visualization can be a powerful tool to comprehend data sets, especially when they can be represented via hierarchical structures. Enhanced comprehension can facilitate the development of scientific hypotheses. However, the inclusion of excessive data can make visualizations overwhelming. Objective: We developed a visual interactive analytic tool for filtering and summarizing large health data sets coded with hierarchical terminologies (VIADS). In this study, we evaluated the usability of VIADS for visualizing data sets of patient diagnoses and procedures coded in the International Classification of Diseases, Ninth Revision, Clinical Modification (ICD-9-CM). Methods: We used mixed methods in the study. A group of 12 clinical researchers participated in the generation of data-driven hypotheses using the same data sets and time frame (a 1-hour training session and a 2-hour study session) utilizing VIADS via the think-aloud protocol. The audio and screen activities were recorded remotely. A modified version of the System Usability Scale (SUS) survey and a brief survey with open-ended questions were administered after the study to assess the usability of VIADS and verify their intense usage experience with VIADS. Results: The range of SUS scores was 37.5 to 87.5. The mean SUS score for VIADS was 71.88 (out of a possible 100, SD 14.62), and the median SUS was 75. The participants unanimously agreed that VIADS offers new perspectives on data sets (12/12, 100\%), while 75\% (8/12) agreed that VIADS facilitates understanding, presentation, and interpretation of underlying data sets. The comments on the utility of VIADS were positive and aligned well with the design objectives of VIADS. The answers to the open-ended questions in the modified SUS provided specific suggestions regarding potential improvements for VIADS, and the identified problems with usability were used to update the tool. Conclusions: This usability study demonstrates that VIADS is a usable tool for analyzing secondary data sets with good average usability, good SUS score, and favorable utility. Currently, VIADS accepts data sets with hierarchical codes and their corresponding frequencies. Consequently, only specific types of use cases are supported by the analytical results. Participants agreed, however, that VIADS provides new perspectives on data sets and is relatively easy to use. The VIADS functionalities most appreciated by participants were the ability to filter, summarize, compare, and visualize data. International Registered Report Identifier (IRRID): RR2-10.2196/39414 ", doi="10.2196/44644", url="https://humanfactors.jmir.org/2023/1/e44644", url="http://www.ncbi.nlm.nih.gov/pubmed/37011112" } @Article{info:doi/10.2196/43664, author="Shen, Alexander and Francisco, Luke and Sen, Srijan and Tewari, Ambuj", title="Exploring the Relationship Between Privacy and Utility in Mobile Health: Algorithm Development and Validation via Simulations of Federated Learning, Differential Privacy, and External Attacks", journal="J Med Internet Res", year="2023", month="Apr", day="20", volume="25", pages="e43664", keywords="privacy", keywords="data protection", keywords="machine learning", keywords="federated learning", keywords="neural networks", keywords="mobile health", keywords="mHealth", keywords="wearable electronic devices", keywords="differential privacy", keywords="learning", keywords="evidence", keywords="feasibility", keywords="applications", keywords="training", keywords="technology", keywords="mobile phone", abstract="Background: Although evidence supporting the feasibility of large-scale mobile health (mHealth) systems continues to grow, privacy protection remains an important implementation challenge. The potential scale of publicly available mHealth applications and the sensitive nature of the data involved will inevitably attract unwanted attention from adversarial actors seeking to compromise user privacy. Although privacy-preserving technologies such as federated learning (FL) and differential privacy (DP) offer strong theoretical guarantees, it is not clear how such technologies actually perform under real-world conditions. Objective: Using data from the University of Michigan Intern Health Study (IHS), we assessed the privacy protection capabilities of FL and DP against the trade-offs in the associated model's accuracy and training time. Using a simulated external attack on a target mHealth system, we aimed to measure the effectiveness of such an attack under various levels of privacy protection on the target system and measure the costs to the target system's performance associated with the chosen levels of privacy protection. Methods: A neural network classifier that attempts to predict IHS participant daily mood ecological momentary assessment score from sensor data served as our target system. An external attacker attempted to identify participants whose average mood ecological momentary assessment score is lower than the global average. The attack followed techniques in the literature, given the relevant assumptions about the abilities of the attacker. For measuring attack effectiveness, we collected attack success metrics (area under the curve [AUC], positive predictive value, and sensitivity), and for measuring privacy costs, we calculated the target model training time and measured the model utility metrics. Both sets of metrics are reported under varying degrees of privacy protection on the target. Results: We found that FL alone does not provide adequate protection against the privacy attack proposed above, where the attacker's AUC in determining which participants exhibit lower than average mood is over 0.90 in the worst-case scenario. However, under the highest level of DP tested in this study, the attacker's AUC fell to approximately 0.59 with only a 10\% point decrease in the target's R2 and a 43\% increase in model training time. Attack positive predictive value and sensitivity followed similar trends. Finally, we showed that participants in the IHS most likely to require strong privacy protection are also most at risk from this particular privacy attack and subsequently stand to benefit the most from these privacy-preserving technologies. Conclusions: Our results demonstrated both the necessity of proactive privacy protection research and the feasibility of the current FL and DP methods implemented in a real mHealth scenario. Our simulation methods characterized the privacy-utility trade-off in our mHealth setup using highly interpretable metrics, providing a framework for future research into privacy-preserving technologies in data-driven health and medical applications. ", doi="10.2196/43664", url="https://www.jmir.org/2023/1/e43664", url="http://www.ncbi.nlm.nih.gov/pubmed/37079370" } @Article{info:doi/10.2196/43484, author="Velummailum, Ruthiran Russanthy and McKibbon, Chelsea and Brenner, R. Darren and Stringer, Ann Elizabeth and Ekstrom, Leeland and Dron, Louis", title="Data Challenges for Externally Controlled Trials: Viewpoint", journal="J Med Internet Res", year="2023", month="Apr", day="5", volume="25", pages="e43484", keywords="external control arm", keywords="synthetic control arm", keywords="single-arm trial", keywords="real-world evidence", keywords="regulatory approval", keywords="data", keywords="clinical", keywords="decision-making", keywords="efficacy", keywords="rare conditions", keywords="trial", doi="10.2196/43484", url="https://www.jmir.org/2023/1/e43484", url="http://www.ncbi.nlm.nih.gov/pubmed/37018021" } @Article{info:doi/10.2196/43316, author="Howell, Pamella and Aryal, Arun and Wu, Crystal", title="Web-Based Patient Recommender Systems for Preventive Care: Protocol for Empirical Research Propositions", journal="JMIR Res Protoc", year="2023", month="Mar", day="30", volume="12", pages="e43316", keywords="recommender systems", keywords="preventive care", keywords="health information systems", keywords="eHealth", keywords="clinical quality measures", abstract="Background: Preventive care helps patients identify and address medical issues early when they are easy to treat. The internet offers vast information about preventive measures, but the sheer volume of data can be overwhelming for individuals to process. To help individuals navigate this information, recommender systems filter and recommend relevant information to specific users. Despite their popularity in other fields, such as e-commerce, recommender systems have yet to be extensively studied as tools to support the implementation of prevention strategies in health care. This underexplored area presents an opportunity for recommender systems to serve as a complementary tool for medical professionals to enhance patient-centered decision-making and for patients to access health information. Thus, these systems can potentially improve the delivery of preventive care. Objective: This study proposes practical, evidence-based propositions. It aims to identify the key factors influencing patients' use of recommender systems and outlines a study design, methods for creating a survey, and techniques for conducting an analysis. Methods: This study proposes a 6-stage approach to examine user perceptions of the factors that may influence the use of recommender systems for preventive care. First, we formulate 6 research propositions that can be developed later into hypotheses for empirical testing. Second, we will create a survey instrument by collecting items from extant literature and then verify their relevance using expert analysis. This stage will continue with content and face validity testing to ensure the robustness of the selected items. Using Qualtrics (Qualtrics), the survey can be customized and prepared for deployment on Amazon Mechanical Turk. Third, we will obtain institutional review board approval because this is a human subject study. In the fourth stage, we propose using the survey to collect data from approximately 600 participants on Amazon Mechanical Turk and then using R to analyze the research model. This platform will serve as a recruitment tool and the method of obtaining informed consent. In our fifth stage, we will perform principal component analysis, Harman Single Factor test, exploratory factor analysis, and correlational analysis; examine the reliability and convergent validity of individual items; test if multicollinearity exists; and complete a confirmatory factor analysis. Results: Data collection and analysis will begin after institutional review board approval is obtained. Conclusions: In pursuit of better health outcomes, low costs, and improved patient and provider experiences, the integration of recommender systems with health care services can extend the reach and scale of preventive care. Examining recommender systems for preventive care can be vital in achieving the quadruple aims by advancing the steps toward precision medicine and applying best practices. International Registered Report Identifier (IRRID): PRR1-10.2196/43316 ", doi="10.2196/43316", url="https://www.researchprotocols.org/2023/1/e43316", url="http://www.ncbi.nlm.nih.gov/pubmed/36995747" } @Article{info:doi/10.2196/43988, author="Schmidt, Christian and Keszty{\"u}s, Dorothea and Haag, Martin and Wilhelm, Manfred and Keszty{\"u}s, Tibor", title="Proposal of a Method for Transferring High-Quality Scientific Literature Data to Virtual Patient Cases Using Categorical Data Generated by Bernoulli-Distributed Random Values: Development and Prototypical Implementation", journal="JMIR Med Educ", year="2023", month="Mar", day="9", volume="9", pages="e43988", keywords="medical education", keywords="computer programs and programming", keywords="probability", keywords="rare diseases", keywords="diagnosis", keywords="medical literature", keywords="automation", keywords="automated", keywords="virtual patient", keywords="simulation", keywords="computer based", keywords="Bernoulli", abstract="Background: Teaching medicine is a complex task because medical teachers are also involved in clinical practice and research and the availability of cases with rare diseases is very restricted. Automatic creation of virtual patient cases would be a great benefit, saving time and providing a wider choice of virtual patient cases for student training. Objective: This study explored whether the medical literature provides usable quantifiable information on rare diseases. The study implemented a computerized method that simulates basic clinical patient cases utilizing probabilities of symptom occurrence for a disease. Methods: Medical literature was searched for suitable rare diseases and the required information on the respective probabilities of specific symptoms. We developed a statistical script that delivers basic virtual patient cases with random symptom complexes generated by Bernoulli experiments, according to probabilities reported in the literature. The number of runs and thus the number of patient cases generated are arbitrary. Results: We illustrated the function of our generator with the exemplary diagnosis ``brain abscess'' with the related symptoms ``headache, mental status change, focal neurologic deficit, fever, seizure, nausea and vomiting, nuchal rigidity, and papilledema'' and the respective probabilities from the literature. With a growing number of repetitions of the Bernoulli experiment, the relative frequencies of occurrence increasingly converged with the probabilities from the literature. For example, the relative frequency for headache after 10.000 repetitions was 0.7267 and, after rounding, equaled the mean value of the probability range of 0.73 reported in the literature. The same applied to the other symptoms. Conclusions: The medical literature provides specific information on characteristics of rare diseases that can be transferred to probabilities. The results of our computerized method suggest that automated creation of virtual patient cases based on these probabilities is possible. With additional information provided in the literature, an extension of the generator can be implemented in further research. ", doi="10.2196/43988", url="https://mededu.jmir.org/2023/1/e43988", url="http://www.ncbi.nlm.nih.gov/pubmed/36892938" } @Article{info:doi/10.2196/41153, author="Diaz, Claudio and Caillaud, Corinne and Yacef, Kalina", title="Mining Sensor Data to Assess Changes in Physical Activity Behaviors in Health Interventions: Systematic Review", journal="JMIR Med Inform", year="2023", month="Mar", day="6", volume="11", pages="e41153", keywords="activity tracker", keywords="wearable electronic devices", keywords="fitness trackers", keywords="data mining", keywords="artificial intelligence", keywords="health", keywords="education", keywords="behavior change", keywords="physical activity", keywords="wearable devices", keywords="trackers", keywords="health education", keywords="sensor data", abstract="Background: Sensors are increasingly used in health interventions to unobtrusively and continuously capture participants' physical activity in free-living conditions. The rich granularity of sensor data offers great potential for analyzing patterns and changes in physical activity behaviors. The use of specialized machine learning and data mining techniques to detect, extract, and analyze these patterns has increased, helping to better understand how participants' physical activity evolves. Objective: The aim of this systematic review was to identify and present the various data mining techniques employed to analyze changes in physical activity behaviors from sensors-derived data in health education and health promotion intervention studies. We addressed two main research questions: (1) What are the current techniques used for mining physical activity sensor data to detect behavior changes in health education or health promotion contexts? (2) What are the challenges and opportunities in mining physical activity sensor data for detecting physical activity behavior changes? Methods: The systematic review was performed in May 2021 using the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) guidelines. We queried the Association for Computing Machinery (ACM), IEEE Xplore, ProQuest, Scopus, Web of Science, Education Resources Information Center (ERIC), and Springer literature databases for peer-reviewed references related to wearable machine learning to detect physical activity changes in health education. A total of 4388 references were initially retrieved from the databases. After removing duplicates and screening titles and abstracts, 285 references were subjected to full-text review, resulting in 19 articles included for analysis. Results: All studies used accelerometers, sometimes in combination with another sensor (37\%). Data were collected over a period ranging from 4 days to 1 year (median 10 weeks) from a cohort size ranging between 10 and 11615 (median 74). Data preprocessing was mainly carried out using proprietary software, generally resulting in step counts and time spent in physical activity aggregated predominantly at the daily or minute level. The main features used as input for the data mining models were descriptive statistics of the preprocessed data. The most common data mining methods were classifiers, clusters, and decision-making algorithms, and these focused on personalization (58\%) and analysis of physical activity behaviors (42\%). Conclusions: Mining sensor data offers great opportunities to analyze physical activity behavior changes, build models to better detect and interpret behavior changes, and allow for personalized feedback and support for participants, especially where larger sample sizes and longer recording times are available. Exploring different data aggregation levels can help detect subtle and sustained behavior changes. However, the literature suggests that there is still work remaining to improve the transparency, explicitness, and standardization of the data preprocessing and mining processes to establish best practices and make the detection methods easier to understand, scrutinize, and reproduce. ", doi="10.2196/41153", url="https://medinform.jmir.org/2023/1/e41153", url="http://www.ncbi.nlm.nih.gov/pubmed/36877559" } @Article{info:doi/10.2196/43092, author="Single, Michael and Bruhin, C. Lena and Sch{\"u}tz, Narayan and Naef, C. Aileen and Hegi, Heinz and Reuse, Pascal and Schindler, A. Kaspar and Krack, Paul and Wiest, Roland and Chan, Andrew and Nef, Tobias and Gerber, M. Stephan", title="Development of an Open-source and Lightweight Sensor Recording Software System for Conducting Biomedical Research: Technical Report", journal="JMIR Form Res", year="2023", month="Feb", day="17", volume="7", pages="e43092", keywords="sensor recording software", keywords="on-demand deployment", keywords="digital measures", keywords="sensor platform", keywords="biomedical research", abstract="Background: Digital sensing devices have become an increasingly important component of modern biomedical research, as they help provide objective insights into individuals' everyday behavior in terms of changes in motor and nonmotor symptoms. However, there are significant barriers to the adoption of sensor-enhanced biomedical solutions in terms of both technical expertise and associated costs. The currently available solutions neither allow easy integration of custom sensing devices nor offer a practicable methodology in cases of limited resources. This has become particularly relevant, given the need for real-time sensor data that could help lower health care costs by reducing the frequency of clinical assessments performed by specialists and improve access to health assessments (eg, for people living in remote areas or older adults living at home). Objective: The objective of this paper is to detail the end-to-end development of a novel sensor recording software system that supports the integration of heterogeneous sensor technologies, runs as an on-demand service on consumer-grade hardware to build sensor systems, and can be easily used to reliably record longitudinal sensor measurements in research settings. Methods: The proposed software system is based on a server-client architecture, consisting of multiple self-contained microservices that communicated with each other (eg, the web server transfers data to a database instance) and were implemented as Docker containers. The design of the software is based on state-of-the-art open-source technologies (eg, Node.js or MongoDB), which fulfill nonfunctional requirements and reduce associated costs. A series of programs to facilitate the use of the software were documented. To demonstrate performance, the software was tested in 3 studies (2 gait studies and 1 behavioral study assessing activities of daily living) that ran between 2 and 225 days, with a total of 114 participants. We used descriptive statistics to evaluate longitudinal measurements for reliability, error rates, throughput rates, latency, and usability (with the System Usability Scale [SUS] and the Post-Study System Usability Questionnaire [PSSUQ]). Results: Three qualitative features (event annotation program, sample delay analysis program, and monitoring dashboard) were elaborated and realized as integrated programs. Our quantitative findings demonstrate that the system operates reliably on consumer-grade hardware, even across multiple months (>420 days), providing high throughput (2000 requests per second) with a low latency and error rate (<0.002\%). In addition, the results of the usability tests indicate that the system is effective, efficient, and satisfactory to use (mean usability ratings for the SUS and PSSUQ were 89.5 and 1.62, respectively). Conclusions: Overall, this sensor recording software could be leveraged to test sensor devices, as well as to develop and validate algorithms that are able to extract digital measures (eg, gait parameters or actigraphy). The proposed software could help significantly reduce barriers related to sensor-enhanced biomedical research and allow researchers to focus on the research questions at hand rather than on developing recording technologies. ", doi="10.2196/43092", url="https://formative.jmir.org/2023/1/e43092", url="http://www.ncbi.nlm.nih.gov/pubmed/36800219" } @Article{info:doi/10.2196/41344, author="Kinast, Benjamin and Ulrich, Hannes and Bergh, Bj{\"o}rn and Schreiweis, Bj{\"o}rn", title="Functional Requirements for Medical Data Integration into Knowledge Management Environments: Requirements Elicitation Approach Based on Systematic Literature Analysis", journal="J Med Internet Res", year="2023", month="Feb", day="9", volume="25", pages="e41344", keywords="data integration", keywords="requirements engineering", keywords="requirements", keywords="knowledge management", keywords="software engineering", abstract="Background: In patient care, data are historically generated and stored in heterogeneous databases that are domain specific and often noninteroperable or isolated. As the amount of health data increases, the number of isolated data silos is also expected to grow, limiting the accessibility of the collected data. Medical informatics is developing ways to move from siloed data to a more harmonized arrangement in information architectures. This paradigm shift will allow future research to integrate medical data at various levels and from various sources. Currently, comprehensive requirements engineering is working on data integration projects in both patient care-- and research-oriented contexts, and it is significantly contributing to the success of such projects. In addition to various stakeholder-based methods, document-based requirement elicitation is a valid method for improving the scope and quality of requirements. Objective: Our main objective was to provide a general catalog of functional requirements for integrating medical data into knowledge management environments. We aimed to identify where integration projects intersect to derive consistent and representative functional requirements from the literature. On the basis of these findings, we identified which functional requirements for data integration exist in the literature and thus provide a general catalog of requirements. Methods: This work began by conducting a literature-based requirement elicitation based on a broad requirement engineering approach. Thus, in the first step, we performed a web-based systematic literature review to identify published articles that dealt with the requirements for medical data integration. We identified and analyzed the available literature by applying the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) guidelines. In the second step, we screened the results for functional requirements using the requirements engineering method of document analysis and derived the requirements into a uniform requirement syntax. Finally, we classified the elicited requirements into a category scheme that represents the data life cycle. Results: Our 2-step requirements elicitation approach yielded 821 articles, of which 61 (7.4\%) were included in the requirement elicitation process. There, we identified 220 requirements, which were covered by 314 references. We assigned the requirements to different data life cycle categories as follows: 25\% (55/220) to data acquisition, 35.9\% (79/220) to data processing, 12.7\% (28/220) to data storage, 9.1\% (20/220) to data analysis, 6.4\% (14/220) to metadata management, 2.3\% (5/220) to data lineage, 3.2\% (7/220) to data traceability, and 5.5\% (12/220) to data security. Conclusions: The aim of this study was to present a cross-section of functional data integration--related requirements defined in the literature by other researchers. The aim was achieved with 220 distinct requirements from 61 publications. We concluded that scientific publications are, in principle, a reliable source of information for functional requirements with respect to medical data integration. Finally, we provide a broad catalog to support other scientists in the requirement elicitation phase. ", doi="10.2196/41344", url="https://www.jmir.org/2023/1/e41344", url="http://www.ncbi.nlm.nih.gov/pubmed/36757764" } @Article{info:doi/10.2196/41614, author="Kozak, Karol and Seidel, Andr{\'e} and Matvieieva, Nataliia and Neupetsch, Constanze and Teicher, Uwe and Lemme, Gordon and Ben Achour, Anas and Barth, Martin and Ihlenfeldt, Steffen and Drossel, Welf-Guntram", title="Unique Device Identification--Based Linkage of Hierarchically Accessible Data Domains in Prospective Surgical Hospital Data Ecosystems: User-Centered Design Approach", journal="JMIR Med Inform", year="2023", month="Jan", day="27", volume="11", pages="e41614", keywords="electronic health record", keywords="unique device identification", keywords="cyber-physical production systems", keywords="mHealth", keywords="data integration ecosystem", keywords="hierarchical data access", keywords="shell embedded role model", abstract="Background: The electronic health record (EHR) targets systematized collection of patient-specific, electronically stored health data. The EHR is an evolving concept driven by ongoing developments and open or unclear legal issues concerning medical technologies, cross-domain data integration, and unclear access roles. Consequently, an interdisciplinary discourse based on representative pilot scenarios is required to connect previously unconnected domains. Objective: We address cross-domain data integration including access control using the specific example of a unique device identification (UDI)--expanded hip implant. In fact, the integration of technical focus data into the hospital information system (HIS) is considered based on surgically relevant information. Moreover, the acquisition of social focus data based on mobile health (mHealth) is addressed, covering data integration and networking with therapeutic intervention and acute diagnostics data. Methods: In addition to the additive manufacturing of a hip implant with the integration of a UDI, we built a database that combines database technology and a wrapper layer known from extract, transform, load systems and brings it into a SQL database, WEB application programming interface (API) layer (back end), interface layer (rest API), and front end. It also provides semantic integration through connection mechanisms between data elements. Results: A hip implant is approached by design, production, and verification while linking operation-relevant specifics like implant-bone fit by merging patient-specific image material (computed tomography, magnetic resonance imaging, or a biomodel) and the digital implant twin for well-founded selection pairing. This decision-facilitating linkage, which improves surgical planning, relates to patient-specific postoperative influencing factors during the healing phase. A unique product identification approach is presented, allowing a postoperative read-out with state-of-the-art hospital technology while enabling future access scenarios for patient and implant data. The latter was considered from the manufacturing perspective using the process manufacturing chain for a (patient-specific) implant to identify quality-relevant data for later access. In addition, sensor concepts were identified to use to monitor the patient-implant interaction during the healing phase using wearables, for example. A data aggregation and integration concept for heterogeneous data sources from the considered focus domains is also presented. Finally, a hierarchical data access concept is shown, protecting sensitive patient data from misuse using existing scenarios. Conclusions: Personalized medicine requires cross-domain linkage of data, which, in turn, require an appropriate data infrastructure and adequate hierarchical data access solutions in a shared and federated data space. The hip implant is used as an example for the usefulness of cross-domain data linkage since it bundles social, medical, and technical aspects of the implantation. It is necessary to open existing databases using interfaces for secure integration of data from end devices and to assure availability through suitable access models while guaranteeing long-term, independent data persistence. A suitable strategy requires the combination of technical solutions from the areas of identity and trust, federated data storage, cryptographic procedures, and software engineering as well as organizational changes. ", doi="10.2196/41614", url="https://medinform.jmir.org/2023/1/e41614", url="http://www.ncbi.nlm.nih.gov/pubmed/36705946" } @Article{info:doi/10.2196/38861, author="Jahn, Franziska and Ammenwerth, Elske and Dornauer, Verena and H{\"o}ffner, Konrad and Bindel, Michelle and Karopka, Thomas and Winter, Alfred", title="A Linked Open Data--Based Terminology to Describe Libre/Free and Open-source Software: Incremental Development Study", journal="JMIR Med Inform", year="2023", month="Jan", day="20", volume="11", pages="e38861", keywords="health informatics", keywords="ontology", keywords="free/libre open-source software", keywords="software applications", keywords="health IT", keywords="terminology", abstract="Background: There is a variety of libre/free and open-source software (LIFOSS) products for medicine and health care. To support health care and IT professionals select an appropriate software product for given tasks, several comparison studies and web platforms, such as Medfloss.org, are available. However, due to the lack of a uniform terminology for health informatics, ambiguous or imprecise terms are used to describe the functionalities of LIFOSS. This makes comparisons of LIFOSS difficult and may lead to inappropriate software selection decisions. Using Linked Open Data (LOD) promises to address these challenges. Objective: We describe LIFOSS systematically with the help of the underlying Health Information Technology Ontology (HITO). We publish HITO and HITO-based software product descriptions using LOD to obtain the following benefits: (1) linking and reusing existing terminologies and (2) using Semantic Web tools for viewing and querying the LIFOSS data on the World Wide Web. Methods: HITO was incrementally developed and implemented. First, classes for the description of software products in health IT evaluation studies were identified. Second, requirements for describing LIFOSS were elicited by interviewing domain experts. Third, to describe domain-specific functionalities of software products, existing catalogues of features and enterprise functions were analyzed and integrated into the HITO knowledge base. As a proof of concept, HITO was used to describe 25 LIFOSS products. Results: HITO provides a defined set of classes and their relationships to describe LIFOSS in medicine and health care. With the help of linked or integrated catalogues for languages, programming languages, licenses, features, and enterprise functions, the functionalities of LIFOSS can be precisely described and compared. We publish HITO and the LIFOSS descriptions as LOD; they can be queried and viewed using different Semantic Web tools, such as Resource Description Framework (RDF) browsers, SPARQL Protocol and RDF Query Language (SPARQL) queries, and faceted searches. The advantages of providing HITO as LOD are demonstrated by practical examples. Conclusions: HITO is a building block to achieving unambiguous communication among health IT professionals and researchers. Providing LIFOSS product information as LOD enables barrier-free and easy access to data that are often hidden in user manuals of software products or are not available at all. Efforts to establish a unique terminology of medical and health informatics should be further supported and continued. ", doi="10.2196/38861", url="https://medinform.jmir.org/2023/1/e38861", url="http://www.ncbi.nlm.nih.gov/pubmed/36662569" } @Article{info:doi/10.2196/38590, author="Chen, Xiaojie and Chen, Han and Nan, Shan and Kong, Xiangtian and Duan, Huilong and Zhu, Haiyan", title="Dealing With Missing, Imbalanced, and Sparse Features During the Development of a Prediction Model for Sudden Death Using Emergency Medicine Data: Machine Learning Approach", journal="JMIR Med Inform", year="2023", month="Jan", day="20", volume="11", pages="e38590", keywords="emergency medicine", keywords="prediction model", keywords="data preprocessing", keywords="imbalanced data", keywords="missing value interpolation", keywords="sparse features", keywords="clinical informatics", keywords="machine learning", keywords="medical informatics", abstract="Background: In emergency departments (EDs), early diagnosis and timely rescue, which are supported by prediction modes using ED data, can increase patients' chances of survival. Unfortunately, ED data usually contain missing, imbalanced, and sparse features, which makes it challenging to build early identification models for diseases. Objective: This study aims to propose a systematic approach to deal with the problems of missing, imbalanced, and sparse features for developing sudden-death prediction models using emergency medicine (or ED) data. Methods: We proposed a 3-step approach to deal with data quality issues: a random forest (RF) for missing values, k-means for imbalanced data, and principal component analysis (PCA) for sparse features. For continuous and discrete variables, the decision coefficient R2 and the $\kappa$ coefficient were used to evaluate performance, respectively. The area under the receiver operating characteristic curve (AUROC) and the area under the precision-recall curve (AUPRC) were used to estimate the model's performance. To further evaluate the proposed approach, we carried out a case study using an ED data set obtained from the Hainan Hospital of Chinese PLA General Hospital. A logistic regression (LR) prediction model for patient condition worsening was built. Results: A total of 1085 patients with rescue records and 17,959 patients without rescue records were selected and significantly imbalanced. We extracted 275, 402, and 891 variables from laboratory tests, medications, and diagnosis, respectively. After data preprocessing, the median R2 of the RF continuous variable interpolation was 0.623 (IQR 0.647), and the median of the $\kappa$ coefficient for discrete variable interpolation was 0.444 (IQR 0.285). The LR model constructed using the initial diagnostic data showed poor performance and variable separation, which was reflected in the abnormally high odds ratio (OR) values of the 2 variables of cardiac arrest and respiratory arrest (201568034532 and 1211118945, respectively) and an abnormal 95\% CI. Using processed data, the recall of the model reached 0.746, the F1-score was 0.73, and the AUROC was 0.708. Conclusions: The proposed systematic approach is valid for building a prediction model for emergency patients. ", doi="10.2196/38590", url="https://medinform.jmir.org/2023/1/e38590", url="http://www.ncbi.nlm.nih.gov/pubmed/36662548" } @Article{info:doi/10.2196/38266, author="Upadhyaya, Pulakesh and Zhang, Kai and Li, Can and Jiang, Xiaoqian and Kim, Yejin", title="Scalable Causal Structure Learning: Scoping Review of Traditional and Deep Learning Algorithms and New Opportunities in Biomedicine", journal="JMIR Med Inform", year="2023", month="Jan", day="17", volume="11", pages="e38266", keywords="causal inference", keywords="causal structure discovery", keywords="deep learning", keywords="biomedicine", keywords="networks", abstract="Background: Causal structure learning refers to a process of identifying causal structures from observational data, and it can have multiple applications in biomedicine and health care. Objective: This paper provides a practical review and tutorial on scalable causal structure learning models with examples of real-world data to help health care audiences understand and apply them. Methods: We reviewed traditional (combinatorial and score-based) methods for causal structure discovery and machine learning--based schemes. Various traditional approaches have been studied to tackle this problem, the most important among these being the Peter Spirtes and Clark Glymour algorithms. This was followed by analyzing the literature on score-based methods, which are computationally faster. Owing to the continuous constraint on acyclicity, there are new deep learning approaches to the problem in addition to traditional and score-based methods. Such methods can also offer scalability, particularly when there is a large amount of data involving multiple variables. Using our own evaluation metrics and experiments on linear, nonlinear, and benchmark Sachs data, we aimed to highlight the various advantages and disadvantages associated with these methods for the health care community. We also highlighted recent developments in biomedicine where causal structure learning can be applied to discover structures such as gene networks, brain connectivity networks, and those in cancer epidemiology. Results: We also compared the performance of traditional and machine learning--based algorithms for causal discovery over some benchmark data sets. Directed Acyclic Graph-Graph Neural Network has the lowest structural hamming distance (19) and false positive rate (0.13) based on the Sachs data set, whereas Greedy Equivalence Search and Max-Min Hill Climbing have the best false discovery rate (0.68) and true positive rate (0.56), respectively. Conclusions: Machine learning--based approaches, including deep learning, have many advantages over traditional approaches, such as scalability, including a greater number of variables, and potentially being applied in a wide range of biomedical applications, such as genetics, if sufficient data are available. Furthermore, these models are more flexible than traditional models and are poised to positively affect many applications in the future. ", doi="10.2196/38266", url="https://medinform.jmir.org/2023/1/e38266", url="http://www.ncbi.nlm.nih.gov/pubmed/36649070" } @Article{info:doi/10.2196/42379, author="G{\'e}rardin, Christel and Mageau, Arthur and M{\'e}kinian, Ars{\`e}ne and Tannier, Xavier and Carrat, Fabrice", title="Construction of Cohorts of Similar Patients From Automatic Extraction of Medical Concepts: Phenotype Extraction Study", journal="JMIR Med Inform", year="2022", month="Dec", day="19", volume="10", number="12", pages="e42379", keywords="natural language processing", keywords="similar patient cohort", keywords="phenotype", keywords="systemic disease", keywords="NLP", keywords="algorithm", keywords="automatic extraction", keywords="automated extraction", keywords="named entity", keywords="MeSH", keywords="medical subject heading", keywords="data extraction", keywords="text extraction", abstract="Background: Reliable and interpretable automatic extraction of clinical phenotypes from large electronic medical record databases remains a challenge, especially in a language other than English. Objective: We aimed to provide an automated end-to-end extraction of cohorts of similar patients from electronic health records for systemic diseases. Methods: Our multistep algorithm includes a named-entity recognition step, a multilabel classification using medical subject headings ontology, and the computation of patient similarity. A selection of cohorts of similar patients on a priori annotated phenotypes was performed. Six phenotypes were selected for their clinical significance: P1, osteoporosis; P2, nephritis in systemic erythematosus lupus; P3, interstitial lung disease in systemic sclerosis; P4, lung infection; P5, obstetric antiphospholipid syndrome; and P6, Takayasu arteritis. We used a training set of 151 clinical notes and an independent validation set of 256 clinical notes, with annotated phenotypes, both extracted from the Assistance Publique-H{\^o}pitaux de Paris data warehouse. We evaluated the precision of the 3 patients closest to the index patient for each phenotype with precision-at-3 and recall and average precision. Results: For P1-P4, the precision-at-3 ranged from 0.85 (95\% CI 0.75-0.95) to 0.99 (95\% CI 0.98-1), the recall ranged from 0.53 (95\% CI 0.50-0.55) to 0.83 (95\% CI 0.81-0.84), and the average precision ranged from 0.58 (95\% CI 0.54-0.62) to 0.88 (95\% CI 0.85-0.90). P5-P6 phenotypes could not be analyzed due to the limited number of phenotypes. Conclusions: Using a method close to clinical reasoning, we built a scalable and interpretable end-to-end algorithm for extracting cohorts of similar patients. ", doi="10.2196/42379", url="https://medinform.jmir.org/2022/12/e42379", url="http://www.ncbi.nlm.nih.gov/pubmed/36534446" } @Article{info:doi/10.2196/23422, author="Lin, Chen and Yousefi, Safoora and Kahoro, Elvis and Karisani, Payam and Liang, Donghai and Sarnat, Jeremy and Agichtein, Eugene", title="Detecting Elevated Air Pollution Levels by Monitoring Web Search Queries: Algorithm Development and Validation", journal="JMIR Form Res", year="2022", month="Dec", day="19", volume="6", number="12", pages="e23422", keywords="nowcasting of air pollution", keywords="web-based public health surveillance", keywords="neural network sequence modeling", keywords="search engine log analysis", keywords="air pollution exposure assessment", keywords="mobile phone", abstract="Background: Real-time air pollution monitoring is a valuable tool for public health and environmental surveillance. In recent years, there has been a dramatic increase in air pollution forecasting and monitoring research using artificial neural networks. Most prior work relied on modeling pollutant concentrations collected from ground-based monitors and meteorological data for long-term forecasting of outdoor ozone (O3), oxides of nitrogen, and fine particulate matter (PM2.5). Given that traditional, highly sophisticated air quality monitors are expensive and not universally available, these models cannot adequately serve those not living near pollutant monitoring sites. Furthermore, because prior models were built based on physical measurement data collected from sensors, they may not be suitable for predicting the public health effects of pollution exposure. Objective: This study aimed to develop and validate models to nowcast the observed pollution levels using web search data, which are publicly available in near real time from major search engines. Methods: We developed novel machine learning--based models using both traditional supervised classification methods and state-of-the-art deep learning methods to detect elevated air pollution levels at the US city level by using generally available meteorological data and aggregate web-based search volume data derived from Google Trends. We validated the performance of these methods by predicting 3 critical air pollutants (O3, nitrogen dioxide, and PM2.5) across 10 major US metropolitan statistical areas in 2017 and 2018. We also explore different variations of the long short-term memory model and propose a novel search term dictionary learner-long short-term memory model to learn sequential patterns across multiple search terms for prediction. Results: The top-performing model was a deep neural sequence model long short-term memory, using meteorological and web search data, and reached an accuracy of 0.82 (F1-score 0.51) for O3, 0.74 (F1-score 0.41) for nitrogen dioxide, and 0.85 (F1-score 0.27) for PM2.5, when used for detecting elevated pollution levels. Compared with using only meteorological data, the proposed method achieved superior accuracy by incorporating web search data. Conclusions: The results show that incorporating web search data with meteorological data improves the nowcasting performance for all 3 pollutants and suggest promising novel applications for tracking global physical phenomena using web search data. ", doi="10.2196/23422", url="https://formative.jmir.org/2022/12/e23422", url="http://www.ncbi.nlm.nih.gov/pubmed/36534457" } @Article{info:doi/10.2196/35712, author="Domingueti, Daniel and Barbosa Feres Carvalho, Darlinton and Colombo Dias, Roberto Diego and Oliveira, Concei{\c{c}}{\~a}o Val{\'e}ria", title="Software-Based Simulation on a 3D Environment for Vaccination Teaching and Learning: Design Science Research", journal="JMIR Med Educ", year="2022", month="Dec", day="2", volume="8", number="4", pages="e35712", keywords="software simulation", keywords="vaccination room", keywords="immunization", keywords="teaching", keywords="training", keywords="evaluation", keywords="virtual world", keywords="Unity3D", keywords="SUS", keywords="UTAUT2", abstract="Background: Student training requires specific laboratories for vaccination practice, which are usually limited, and even professionals' continuing education regularly lacks proper care. Thus, new methodologies, concepts, and technologies, such as software-based simulations, are in highly demand. Objective: This work aims to develop a 3D virtual environment to support teaching activities in the vaccination room. The software-based simulation must contribute positively to teaching considering a variable set of scenarios. Methods: We applied the design science research method to guide the work. First, the concepts and opportunities were raised, which we used to build the simulation (ie, the proposed technological artifact). The development was assisted by a specialist, in which we sought to create a vaccination room according to Brazilian standards. The artifact evaluation was achieved in 2 stages: (1) an evaluation to validate the design with experts through the Delphi method; and (2) a field evaluation with nursing students to validate aspects of usability (System Usability Scale [SUS]) and technology acceptance and use (Unified Theory of Acceptance and Use of Technology version 2). Results: We built the simulation software using the Unity game engine. An additional module was also developed to create simulation scenarios and view the students' performance reports. The design evaluation showed that the proposed solution is adequate. Students' evaluations confirm good usability (SUS score of 81.4), besides highlighting Performance Expectation as the most positively influential factor of Behavioral Intention. Effort Expectancy is positively affected by younger users. Both evaluation audiences cited the high relevance of the proposed artifact for teaching. Points for improvement are also reported. Conclusions: The research accomplished its goal of creating a software-based simulation to support teaching scenarios in the vaccination room. The evaluations still reveal desirable improvements and user behavior toward this kind of technological artifact. ", doi="10.2196/35712", url="https://mededu.jmir.org/2022/4/e35712", url="http://www.ncbi.nlm.nih.gov/pubmed/36459390" } @Article{info:doi/10.2196/38783, author="Hardin, Jill and Murray, Gayle and Swerdel, Joel", title="Phenotype Algorithms to Identify Hidradenitis Suppurativa Using Real-World Data: Development and Validation Study", journal="JMIR Dermatol", year="2022", month="Nov", day="30", volume="5", number="4", pages="e38783", keywords="dermatology", keywords="hidradenitis suppurativa", keywords="medical dermatology", keywords="observational data", keywords="phenotype", keywords="inflammation", keywords="skin disease", keywords="epidemiology", keywords="algorithm", abstract="Background: Hidradenitis suppurativa (HS) is a potentially debilitating, chronic, recurring inflammatory disease. Observational databases provide opportunities to study the epidemiology of HS. Objective: This study's objective was to develop phenotype algorithms for HS suitable for epidemiological studies based on a network of observational databases. Methods: A data-driven approach was used to develop 4 HS algorithms. A literature search identified prior HS algorithms. Standardized databases from the Observational Medical Outcomes Partnership (n=9) were used to develop 2 incident and 2 prevalent HS phenotype algorithms. Two open-source diagnostic tools, CohortDiagnostics and PheValuator, were used to evaluate and generate phenotype performance metric estimates, including sensitivity, specificity, positive predictive value (PPV), and negative predictive value. Results: We developed 2 prevalent and 2 incident HS algorithms. Validation showed that PPV estimates were highest (mean 86\%) for the prevalent HS algorithm requiring at least two HS diagnosis codes. Sensitivity estimates were highest (mean 58\%) for the prevalent HS algorithm requiring at least one HS code. Conclusions: This study illustrates the evaluation process and provides performance metrics for 2 incident and 2 prevalent HS algorithms across 9 observational databases. The use of a rigorous data-driven approach applied to a large number of databases provides confidence that the HS algorithms can correctly identify HS subjects. ", doi="10.2196/38783", url="https://derma.jmir.org/2022/4/e38783", url="http://www.ncbi.nlm.nih.gov/pubmed/37632892" } @Article{info:doi/10.2196/36340, author="Heiden, Emily and Jones, Tom and Brogaard Maczka, Annika and Kapoor, Melissa and Chauhan, Milan and Wiffen, Laura and Barham, Helen and Holland, Jeremy and Saxena, Manish and Wegerif, Simon and Brown, Thomas and Lomax, Mitch and Massey, Heather and Rostami, Shahin and Pearce, Laurence and Chauhan, Anoop", title="Measurement of Vital Signs Using Lifelight Remote Photoplethysmography: Results of the VISION-D and VISION-V Observational Studies", journal="JMIR Form Res", year="2022", month="Nov", day="14", volume="6", number="11", pages="e36340", keywords="general practice", keywords="vital signs/methods", keywords="vital signs/standards", keywords="photoplethysmography", keywords="remote photoplethysmography", keywords="Lifelight", keywords="contactless", keywords="software", keywords="algorithm development", keywords="algorithm", keywords="blood pressure", keywords="health monitoring", keywords="health technology", keywords="remote monitoring", abstract="Background: The detection of early changes in vital signs (VSs) enables timely intervention; however, the measurement of VSs requires hands-on technical expertise and is often time-consuming. The contactless measurement of VSs is beneficial to prevent infection, such as during the COVID-19 pandemic. Lifelight is a novel software being developed to measure VSs by remote photoplethysmography based on video captures of the face via the integral camera on mobile phones and tablets. We report two early studies in the development of Lifelight. Objective: The objective of the Vital Sign Comparison Between Lifelight and Standard of Care: Development (VISION-D) study (NCT04763746) was to measure respiratory rate (RR), pulse rate (PR), and blood pressure (BP) simultaneously by using the current standard of care manual methods and the Lifelight software to iteratively refine the software algorithms. The objective of the Vital Sign Comparison Between Lifelight and Standard of Care: Validation (VISION-V) study (NCT03998098) was to validate the use of Lifelight software to accurately measure VSs. Methods: BP, PR, and RR were measured simultaneously using Lifelight, a sphygmomanometer (BP and PR), and the manual counting of RR. Accuracy performance targets for each VS were defined from a systematic literature review of the performance of state-of-the-art VSs technologies. Results: The VISION-D data set (17,233 measurements from 8585 participants) met the accuracy targets for RR (mean error 0.3, SD 3.6 vs target mean error 2.3, SD 5.0; n=7462), PR (mean error 0.3, SD 4.0 vs mean error 2.2, SD 9.2; n=10,214), and diastolic BP (mean error ?0.4, SD 8.5 vs mean error 5.5, SD 8.9; n=8951); for systolic BP, the mean error target was met but not the SD (mean error 3.5, SD 16.8 vs mean error 6.7, SD 15.3; n=9233). Fitzpatrick skin type did not affect accuracy. The VISION-V data set (679 measurements from 127 participants) met all the standards: mean error ?0.1, SD 3.4 for RR; mean error 1.4, SD 3.8 for PR; mean error 2.8, SD 14.5 for systolic BP; and mean error ?0.3, SD 7.0 for diastolic BP. Conclusions: At this early stage in development, Lifelight demonstrates sufficient accuracy in the measurement of VSs to support certification for a Level 1 Conformit{\'e} Europ{\'e}enne mark. As the use of Lifelight does not require specific training or equipment, the software is potentially useful for the contactless measurement of VSs by nonclinical staff in residential and home care settings. Work is continuing to enhance data collection and processing to achieve the robustness and accuracy required for routine clinical use. International Registered Report Identifier (IRRID): RR2-10.2196/14326 ", doi="10.2196/36340", url="https://formative.jmir.org/2022/11/e36340", url="http://www.ncbi.nlm.nih.gov/pubmed/36374541" } @Article{info:doi/10.2196/41342, author="Chen, Pei-Fu and He, Tai-Liang and Lin, Sheng-Che and Chu, Yuan-Chia and Kuo, Chen-Tsung and Lai, Feipei and Wang, Ssu-Ming and Zhu, Wan-Xuan and Chen, Kuan-Chih and Kuo, Lu-Cheng and Hung, Fang-Ming and Lin, Yu-Cheng and Tsai, I-Chang and Chiu, Chi-Hao and Chang, Shu-Chih and Yang, Chi-Yu", title="Training a Deep Contextualized Language Model for International Classification of Diseases, 10th Revision Classification via Federated Learning: Model Development and Validation Study", journal="JMIR Med Inform", year="2022", month="Nov", day="10", volume="10", number="11", pages="e41342", keywords="federated learning", keywords="International Classification of Diseases", keywords="machine learning", keywords="natural language processing", keywords="multilabel text classification", abstract="Background: The automatic coding of clinical text documents by using the International Classification of Diseases, 10th Revision (ICD-10) can be performed for statistical analyses and reimbursements. With the development of natural language processing models, new transformer architectures with attention mechanisms have outperformed previous models. Although multicenter training may increase a model's performance and external validity, the privacy of clinical documents should be protected. We used federated learning to train a model with multicenter data, without sharing data per se. Objective: This study aims to train a classification model via federated learning for ICD-10 multilabel classification. Methods: Text data from discharge notes in electronic medical records were collected from the following three medical centers: Far Eastern Memorial Hospital, National Taiwan University Hospital, and Taipei Veterans General Hospital. After comparing the performance of different variants of bidirectional encoder representations from transformers (BERT), PubMedBERT was chosen for the word embeddings. With regard to preprocessing, the nonalphanumeric characters were retained because the model's performance decreased after the removal of these characters. To explain the outputs of our model, we added a label attention mechanism to the model architecture. The model was trained with data from each of the three hospitals separately and via federated learning. The models trained via federated learning and the models trained with local data were compared on a testing set that was composed of data from the three hospitals. The micro F1 score was used to evaluate model performance across all 3 centers. Results: The F1 scores of PubMedBERT, RoBERTa (Robustly Optimized BERT Pretraining Approach), ClinicalBERT, and BioBERT (BERT for Biomedical Text Mining) were 0.735, 0.692, 0.711, and 0.721, respectively. The F1 score of the model that retained nonalphanumeric characters was 0.8120, whereas the F1 score after removing these characters was 0.7875---a decrease of 0.0245 (3.11\%). The F1 scores on the testing set were 0.6142, 0.4472, 0.5353, and 0.2522 for the federated learning, Far Eastern Memorial Hospital, National Taiwan University Hospital, and Taipei Veterans General Hospital models, respectively. The explainable predictions were displayed with highlighted input words via the label attention architecture. Conclusions: Federated learning was used to train the ICD-10 classification model on multicenter clinical text while protecting data privacy. The model's performance was better than that of models that were trained locally. ", doi="10.2196/41342", url="https://medinform.jmir.org/2022/11/e41342", url="http://www.ncbi.nlm.nih.gov/pubmed/36355417" } @Article{info:doi/10.2196/29404, author="Ye, Chao and Hu, Wenxing and Gaeta, Bruno", title="Prediction of Antibody-Antigen Binding via Machine Learning: Development of Data Sets and Evaluation of Methods", journal="JMIR Bioinform Biotech", year="2022", month="Oct", day="28", volume="3", number="1", pages="e29404", keywords="DNA sequencing", keywords="DNA", keywords="DNA sequence", keywords="sequence data", keywords="molecular biology", keywords="genomic", keywords="random forest", keywords="nearest neighbor", keywords="immunoglobulin", keywords="genetics", keywords="antibody-antigen binding", keywords="antigen", keywords="antibody", keywords="structural biology", keywords="machine learning", keywords="protein modeling", keywords="protein", keywords="proteomic", abstract="Background: The mammalian immune system is able to generate antibodies against a huge variety of antigens, including bacteria, viruses, and toxins. The ultradeep DNA sequencing of rearranged immunoglobulin genes has considerable potential in furthering our understanding of the immune response, but it is limited by the lack of a high-throughput, sequence-based method for predicting the antigen(s) that a given immunoglobulin recognizes. Objective: As a step toward the prediction of antibody-antigen binding from sequence data alone, we aimed to compare a range of machine learning approaches that were applied to a collated data set of antibody-antigen pairs in order to predict antibody-antigen binding from sequence data. Methods: Data for training and testing were extracted from the Protein Data Bank and the Coronavirus Antibody Database, and additional antibody-antigen pair data were generated by using a molecular docking protocol. Several machine learning methods, including the weighted nearest neighbor method, the nearest neighbor method with the BLOSUM62 matrix, and the random forest method, were applied to the problem. Results: The final data set contained 1157 antibodies and 57 antigens that were combined in 5041 antibody-antigen pairs. The best performance for the prediction of interactions was obtained by using the nearest neighbor method with the BLOSUM62 matrix, which resulted in around 82\% accuracy on the full data set. These results provide a useful frame of reference, as well as protocols and considerations, for machine learning and data set creation in the prediction of antibody-antigen binding. Conclusions: Several machine learning approaches were compared to predict antibody-antigen interaction from protein sequences. Both the data set (in CSV format) and the machine learning program (coded in Python) are freely available for download on GitHub. ", doi="10.2196/29404", url="https://bioinform.jmir.org/2022/1/e29404" } @Article{info:doi/10.2196/38450, author="van der Ploeg, Tjeerd and Gobbens, J. Robbert J.", title="Prediction of COVID-19 Infections for Municipalities in the Netherlands: Algorithm Development and Interpretation", journal="JMIR Public Health Surveill", year="2022", month="Oct", day="20", volume="8", number="10", pages="e38450", keywords="municipality properties", keywords="data merging", keywords="modeling technique", keywords="variable selection", keywords="prediction model", keywords="public health", keywords="COVID-19", keywords="surveillance", keywords="static data", keywords="Dutch public domain", keywords="pandemic", keywords="Wuhan", keywords="virus", keywords="public", keywords="infections", keywords="fever", keywords="cough", keywords="congestion", keywords="fatigue", keywords="symptoms", keywords="pneumonia", keywords="dyspnea", keywords="death", abstract="Background: COVID-19 was first identified in December 2019 in the city of Wuhan, China. The virus quickly spread and was declared a pandemic on March 11, 2020. After infection, symptoms such as fever, a (dry) cough, nasal congestion, and fatigue can develop. In some cases, the virus causes severe complications such as pneumonia and dyspnea and could result in death. The virus also spread rapidly in the Netherlands, a small and densely populated country with an aging population. Health care in the Netherlands is of a high standard, but there were nevertheless problems with hospital capacity, such as the number of available beds and staff. There were also regions and municipalities that were hit harder than others. In the Netherlands, there are important data sources available for daily COVID-19 numbers and information about municipalities. Objective: We aimed to predict the cumulative number of confirmed COVID-19 infections per 10,000 inhabitants per municipality in the Netherlands, using a data set with the properties of 355 municipalities in the Netherlands and advanced modeling techniques. Methods: We collected relevant static data per municipality from data sources that were available in the Dutch public domain and merged these data with the dynamic daily number of infections from January 1, 2020, to May 9, 2021, resulting in a data set with 355 municipalities in the Netherlands and variables grouped into 20 topics. The modeling techniques random forest and multiple fractional polynomials were used to construct a prediction model for predicting the cumulative number of confirmed COVID-19 infections per 10,000 inhabitants per municipality in the Netherlands. Results: The final prediction model had an R2 of 0.63. Important properties for predicting the cumulative number of confirmed COVID-19 infections per 10,000 inhabitants in a municipality in the Netherlands were exposure to particulate matter with diameters <10 $\mu$m (PM10) in the air, the percentage of Labour party voters, and the number of children in a household. Conclusions: Data about municipality properties in relation to the cumulative number of confirmed infections in a municipality in the Netherlands can give insight into the most important properties of a municipality for predicting the cumulative number of confirmed COVID-19 infections per 10,000 inhabitants in a municipality. This insight can provide policy makers with tools to cope with COVID-19 and may also be of value in the event of a future pandemic, so that municipalities are better prepared. ", doi="10.2196/38450", url="https://publichealth.jmir.org/2022/10/e38450", url="http://www.ncbi.nlm.nih.gov/pubmed/36219835" } @Article{info:doi/10.2196/38936, author="Lamer, Antoine and Fruchart, Mathilde and Paris, Nicolas and Popoff, Benjamin and Payen, Ana{\"i}s and Balcaen, Thibaut and Gacquer, William and Bouzill{\'e}, Guillaume and Cuggia, Marc and Doutreligne, Matthieu and Chazard, Emmanuel", title="Standardized Description of the Feature Extraction Process to Transform Raw Data Into Meaningful Information for Enhancing Data Reuse: Consensus Study", journal="JMIR Med Inform", year="2022", month="Oct", day="17", volume="10", number="10", pages="e38936", keywords="feature extraction", keywords="data reuse", keywords="data warehouse", keywords="database", keywords="algorithm", keywords="Observation Medical Outcomes Partnership", abstract="Background: Despite the many opportunities data reuse offers, its implementation presents many difficulties, and raw data cannot be reused directly. Information is not always directly available in the source database and needs to be computed afterwards with raw data for defining an algorithm. Objective: The main purpose of this article is to present a standardized description of the steps and transformations required during the feature extraction process when conducting retrospective observational studies. A secondary objective is to identify how the features could be stored in the schema of a data warehouse. Methods: This study involved the following 3 main steps: (1) the collection of relevant study cases related to feature extraction and based on the automatic and secondary use of data; (2) the standardized description of raw data, steps, and transformations, which were common to the study cases; and (3) the identification of an appropriate table to store the features in the Observation Medical Outcomes Partnership (OMOP) common data model (CDM). Results: We interviewed 10 researchers from 3 French university hospitals and a national institution, who were involved in 8 retrospective and observational studies. Based on these studies, 2 states (track and feature) and 2 transformations (track definition and track aggregation) emerged. ``Track'' is a time-dependent signal or period of interest, defined by a statistical unit, a value, and 2 milestones (a start event and an end event). ``Feature'' is time-independent high-level information with dimensionality identical to the statistical unit of the study, defined by a label and a value. The time dimension has become implicit in the value or name of the variable. We propose the 2 tables ``TRACK'' and ``FEATURE'' to store variables obtained in feature extraction and extend the OMOP CDM. Conclusions: We propose a standardized description of the feature extraction process. The process combined the 2 steps of track definition and track aggregation. By dividing the feature extraction into these 2 steps, difficulty was managed during track definition. The standardization of tracks requires great expertise with regard to the data, but allows the application of an infinite number of complex transformations. On the contrary, track aggregation is a very simple operation with a finite number of possibilities. A complete description of these steps could enhance the reproducibility of retrospective studies. ", doi="10.2196/38936", url="https://medinform.jmir.org/2022/10/e38936", url="http://www.ncbi.nlm.nih.gov/pubmed/36251369" } @Article{info:doi/10.2196/38464, author="Oates, John and Shafiabady, Niusha and Ambagtsheer, Rachel and Beilby, Justin and Seiboth, Chris and Dent, Elsa", title="Evolving Hybrid Partial Genetic Algorithm Classification Model for Cost-effective Frailty Screening: Investigative Study", journal="JMIR Aging", year="2022", month="Oct", day="7", volume="5", number="4", pages="e38464", keywords="machine learning", keywords="frailty screening", keywords="partial genetic algorithms", keywords="SVM", keywords="KNN", keywords="decision trees", keywords="frailty", keywords="algorithm", keywords="cost", keywords="model", keywords="index", keywords="database", keywords="ai", keywords="ageing", keywords="adults", keywords="older people", keywords="screening", keywords="tool", abstract="Background: A commonly used method for measuring frailty is the accumulation of deficits expressed as a frailty index (FI). FIs can be readily adapted to many databases, as the parameters to use are not prescribed but rather reflect a subset of extracted features (variables). Unfortunately, the structure of many databases does not permit the direct extraction of a suitable subset, requiring additional effort to determine and verify the value of features for each record and thus significantly increasing cost. Objective: Our objective is to describe how an artificial intelligence (AI) optimization technique called partial genetic algorithms can be used to refine the subset of features used to calculate an FI and favor features that have the least cost of acquisition. Methods: This is a secondary analysis of a residential care database compiled from 10 facilities in Queensland, Australia. The database is comprised of routinely collected administrative data and unstructured patient notes for 592 residents aged 75 years and over. The primary study derived an electronic frailty index (eFI) calculated from 36 suitable features. We then structurally modified a genetic algorithm to find an optimal predictor of the calculated eFI (0.21 threshold) from 2 sets of features. Partial genetic algorithms were used to optimize 4 underlying classification models: logistic regression, decision trees, random forest, and support vector machines. Results: Among the underlying models, logistic regression was found to produce the best models in almost all scenarios and feature set sizes. The best models were built using all the low-cost features and as few as 10 high-cost features, and they performed well enough (sensitivity 89\%, specificity 87\%) to be considered candidates for a low-cost frailty screening test. Conclusions: In this study, a systematic approach for selecting an optimal set of features with a low cost of acquisition and performance comparable to the eFI for detecting frailty was demonstrated on an aged care database. Partial genetic algorithms have proven useful in offering a trade-off between cost and accuracy to systematically identify frailty. ", doi="10.2196/38464", url="https://aging.jmir.org/2022/4/e38464", url="http://www.ncbi.nlm.nih.gov/pubmed/36206042" } @Article{info:doi/10.2196/37174, author="Bardia, Amit and Deshpande, Ranjit and Michel, George and Yanez, David and Dai, Feng and Pace, L. Nathan and Schuster, Kevin and Mathis, R. Michael and Kheterpal, Sachin and Schonberger, B. Robert", title="Demonstration and Performance Evaluation of Two Novel Algorithms for Removing Artifacts From Automated Intraoperative Temperature Data Sets: Multicenter, Observational, Retrospective Study", journal="JMIR Perioper Med", year="2022", month="Oct", day="5", volume="5", number="1", pages="e37174", keywords="temperature", keywords="intraoperative", keywords="artifacts", keywords="algorithms", keywords="perioperative", keywords="surgery", keywords="temperature probe", keywords="artifact reduction", keywords="data acquisition", keywords="accuracy", abstract="Background: The automated acquisition of intraoperative patient temperature data via temperature probes leads to the possibility of producing a number of artifacts related to probe positioning that may impact these probes' utility for observational research. Objective: We sought to compare the performance of two de novo algorithms for filtering such artifacts. Methods: In this observational retrospective study, the intraoperative temperature data of adults who received general anesthesia for noncardiac surgery were extracted from the Multicenter Perioperative Outcomes Group registry. Two algorithms were developed and then compared to the reference standard---anesthesiologists' manual artifact detection process. Algorithm 1 (a slope-based algorithm) was based on the linear curve fit of 3 adjacent temperature data points. Algorithm 2 (an interval-based algorithm) assessed for time gaps between contiguous temperature recordings. Sensitivity and specificity values for artifact detection were calculated for each algorithm, as were mean temperatures and areas under the curve for hypothermia (temperatures below 36 {\textdegree}C) for each patient, after artifact removal via each methodology. Results: A total of 27,683 temperature readings from 200 anesthetic records were analyzed. The overall agreement among the anesthesiologists was 92.1\%. Both algorithms had high specificity but moderate sensitivity (specificity: 99.02\% for algorithm 1 vs 99.54\% for algorithm 2; sensitivity: 49.13\% for algorithm 1 vs 37.72\% for algorithm 2; F-score: 0.65 for algorithm 1 vs 0.55 for algorithm 2). The areas under the curve for time {\texttimes} hypothermic temperature and the mean temperatures recorded for each case after artifact removal were similar between the algorithms and the anesthesiologists. Conclusions: The tested algorithms provide an automated way to filter intraoperative temperature artifacts that closely approximates manual sorting by anesthesiologists. Our study provides evidence demonstrating the efficacy of highly generalizable artifact reduction algorithms that can be readily used by observational studies that rely on automated intraoperative data acquisition. ", doi="10.2196/37174", url="https://periop.jmir.org/2022/1/e37174", url="http://www.ncbi.nlm.nih.gov/pubmed/36197702" } @Article{info:doi/10.2196/37894, author="Diehl, Ceci and Martins, Ana and Almeida, Ana and Silva, Telmo and Ribeiro, {\'O}scar and Santinha, Gon{\c{c}}alo and Rocha, Nelson and Silva, G. Anabela", title="Defining Recommendations to Guide User Interface Design: Multimethod Approach", journal="JMIR Hum Factors", year="2022", month="Sep", day="30", volume="9", number="3", pages="e37894", keywords="user interface design", keywords="usability principles", keywords="interaction paradigm", keywords="generic recommendations", keywords="specific recommendations", abstract="Background: For the development of digital solutions, different aspects of user interface design must be taken into consideration. Different technologies, interaction paradigms, user characteristics and needs, and interface design components are some of the aspects that designers and developers should pay attention to when designing a solution. Many user interface design recommendations for different digital solutions and user profiles are found in the literature, but these recommendations have numerous similarities, contradictions, and different levels of detail. A detailed critical analysis is needed that compares, evaluates, and validates existing recommendations and allows the definition of a practical set of recommendations. Objective: This study aimed to analyze and synthesize existing user interface design recommendations and propose a practical set of recommendations that guide the development of different technologies. Methods: Based on previous studies, a set of recommendations on user interface design was generated following 4 steps: (1) interview with user interface design experts; (2) analysis of the experts' feedback and drafting of a set of recommendations; (3) reanalysis of the shorter list of recommendations by a group of experts; and (4) refining and finalizing the list. Results: The findings allowed us to define a set of 174 recommendations divided into 12 categories, according to usability principles, and organized into 2 levels of hierarchy: generic (69 recommendations) and specific (105 recommendations). Conclusions: This study shows that user interface design recommendations can be divided according to usability principles and organized into levels of detail. Moreover, this study reveals that some recommendations, as they address different technologies and interaction paradigms, need further work. ", doi="10.2196/37894", url="https://humanfactors.jmir.org/2022/3/e37894", url="http://www.ncbi.nlm.nih.gov/pubmed/36178714" } @Article{info:doi/10.2196/33775, author="Li, Xiaochun and Xu, Huiping and Grannis, Shaun", title="The Data-Adaptive Fellegi-Sunter Model for Probabilistic Record Linkage: Algorithm Development and Validation for Incorporating Missing Data and Field Selection", journal="J Med Internet Res", year="2022", month="Sep", day="29", volume="24", number="9", pages="e33775", keywords="record linkage", keywords="Fellegi-Sunter model", keywords="latent class model", keywords="missing at random", keywords="matching field selection", abstract="Background: Quality patient care requires comprehensive health care data from a broad set of sources. However, missing data in medical records and matching field selection are 2 real-world challenges in patient-record linkage. Objective: In this study, we aimed to evaluate the extent to which incorporating the missing at random (MAR)--assumption in the Fellegi-Sunter model and using data-driven selected fields improve patient-matching accuracy using real-world use cases. Methods: We adapted the Fellegi-Sunter model to accommodate missing data using the MAR assumption and compared the adaptation to the common strategy of treating missing values as disagreement with matching fields specified by experts or selected by data-driven methods. We used 4 use cases, each containing a random sample of record pairs with match statuses ascertained by manual reviews. Use cases included health information exchange (HIE) record deduplication, linkage of public health registry records to HIE, linkage of Social Security Death Master File records to HIE, and deduplication of newborn screening records, which represent real-world clinical and public health scenarios. Matching performance was evaluated using the sensitivity, specificity, positive predictive value, negative predictive value, and F1-score. Results: Incorporating the MAR assumption in the Fellegi-Sunter model maintained or improved F1-scores, regardless of whether matching fields were expert-specified or selected by data-driven methods. Combining the MAR assumption and data-driven fields optimized the F1-scores in the 4 use cases. Conclusions: MAR is a reasonable assumption in real-world record linkage applications: it maintains or improves F1-scores regardless of whether matching fields are expert-specified or data-driven. Data-driven selection of fields coupled with MAR achieves the best overall performance, which can be especially useful in privacy-preserving record linkage. ", doi="10.2196/33775", url="https://www.jmir.org/2022/9/e33775", url="http://www.ncbi.nlm.nih.gov/pubmed/36173664" } @Article{info:doi/10.2196/33720, author="Kavianpour, Sanaz and Sutherland, James and Mansouri-Benssassi, Esma and Coull, Natalie and Jefferson, Emily", title="Next-Generation Capabilities in Trusted Research Environments: Interview Study", journal="J Med Internet Res", year="2022", month="Sep", day="20", volume="24", number="9", pages="e33720", keywords="data safe haven", keywords="health data analysis", keywords="trusted research environment", keywords="TRE", abstract="Background: A Trusted Research Environment (TRE; also known as a Safe Haven) is an environment supported by trained staff and agreed processes (principles and standards), providing access to data for research while protecting patient confidentiality. Accessing sensitive data without compromising the privacy and security of the data is a complex process. Objective: This paper presents the security measures, administrative procedures, and technical approaches adopted by TREs. Methods: We contacted 73 TRE operators, 22 (30\%) of whom, in the United Kingdom and internationally, agreed to be interviewed remotely under a nondisclosure agreement and to complete a questionnaire about their TRE. Results: We observed many similar processes and standards that TREs follow to adhere to the Seven Safes principles. The security processes and TRE capabilities for supporting observational studies using classical statistical methods were mature, and the requirements were well understood. However, we identified limitations in the security measures and capabilities of TREs to support ``next-generation'' requirements such as wide ranges of data types, ability to develop artificial intelligence algorithms and software within the environment, handling of big data, and timely import and export of data. Conclusions: We found a lack of software or other automation tools to support the community and limited knowledge of how to meet the next-generation requirements from the research community. Disclosure control for exporting artificial intelligence algorithms and software was found to be particularly challenging, and there is a clear need for additional controls to support this capability within TREs. ", doi="10.2196/33720", url="https://www.jmir.org/2022/9/e33720", url="http://www.ncbi.nlm.nih.gov/pubmed/36125859" } @Article{info:doi/10.2196/30094, author="Godia, Jordi and Pifarr{\'e}, Marc and Vilaplana, Jordi and Solsona, Francesc and Abella, Francesc and Calvo, Antoni and Mitjans, Anna and Gonzalez-Olmedo, Pau Maria", title="A Free App for Diagnosing Burnout (BurnOut App): Development Study", journal="JMIR Med Inform", year="2022", month="Sep", day="6", volume="10", number="9", pages="e30094", keywords="diagnose burnout", keywords="Android app", keywords="medical informatics", keywords="health care", keywords="health professionals", keywords="mobile health", keywords="digital health", keywords="health applications", keywords="online health", keywords="mobile phone", abstract="Background: Health specialists take care of us, but who takes care of them? These professionals are the most vulnerable to the increasingly common syndrome known as burnout. Burnout is a syndrome conceptualized as a result of chronic workplace stress that has not been successfully managed. Objective: This study aims to develop a useful app providing burnout self-diagnosis and tracking of burnout through a simple, intuitive, and user-friendly interface. Methods: We present the BurnOut app, an Android app developed using the Xamarin and MVVMCross platforms, which allows users to detect critical cases of psychological discomfort by implementing the Goldberg and Copenhagen Burnout Inventory tests. Results: The BurnOut app is robust, user-friendly, and ef?cient. The good performance of the app was demonstrated by comparing its features with those of similar apps in the literature. Conclusions: The BurnOut app is very useful for health specialists or users, in general, to detect burnout early and track its evolution. ", doi="10.2196/30094", url="https://medinform.jmir.org/2022/9/e30094", url="http://www.ncbi.nlm.nih.gov/pubmed/36066932" } @Article{info:doi/10.2196/38155, author="Han, Peijin and Fu, Sunyang and Kolis, Julie and Hughes, Richard and Hallstrom, R. Brian and Carvour, Martha and Maradit-Kremers, Hilal and Sohn, Sunghwan and Vydiswaran, Vinod V. G.", title="Multicenter Validation of Natural Language Processing Algorithms for the Detection of Common Data Elements in Operative Notes for Total Hip Arthroplasty: Algorithm Development and Validation", journal="JMIR Med Inform", year="2022", month="Aug", day="31", volume="10", number="8", pages="e38155", keywords="total hip arthroplasty", keywords="natural language processing", keywords="information extraction", keywords="model transferability", abstract="Background: Natural language processing (NLP) methods are powerful tools for extracting and analyzing critical information from free-text data. MedTaggerIE, an open-source NLP pipeline for information extraction based on text patterns, has been widely used in the annotation of clinical notes. A rule-based system, MedTagger-total hip arthroplasty (THA), developed based on MedTaggerIE, was previously shown to correctly identify the surgical approach, fixation, and bearing surface from the THA operative notes at Mayo Clinic. Objective: This study aimed to assess the implementability, usability, and portability of MedTagger-THA at two external institutions, Michigan Medicine and the University of Iowa, and provide lessons learned for best practices. Methods: We conducted iterative test-apply-refinement processes with three involved sites---the development site (Mayo Clinic) and two deployment sites (Michigan Medicine and the University of Iowa). Mayo Clinic was the primary NLP development site, with the THA registry as the gold standard. The activities at the two deployment sites included the extraction of the operative notes, gold standard development (Michigan: registry data; Iowa: manual chart review), the refinement of NLP algorithms on training data, and the evaluation of test data. Error analyses were conducted to understand language variations across sites. To further assess the model specificity for approach and fixation, we applied the refined MedTagger-THA to arthroscopic hip procedures and periacetabular osteotomy cases, as neither of these operative notes should contain any approach or fixation keywords. Results: MedTagger-THA algorithms were implemented and refined independently for both sites. At Michigan, the study comprised THA-related notes for 2569 patient-date pairs. Before model refinement, MedTagger-THA algorithms demonstrated excellent accuracy for approach (96.6\%, 95\% CI 94.6\%-97.9\%) and fixation (95.7\%, 95\% CI 92.4\%-97.6\%). These results were comparable with internal accuracy at the development site (99.2\% for approach and 90.7\% for fixation). Model refinement improved accuracies slightly for both approach (99\%, 95\% CI 97.6\%-99.6\%) and fixation (98\%, 95\% CI 95.3\%-99.3\%). The specificity of approach identification was 88.9\% for arthroscopy cases, and the specificity of fixation identification was 100\% for both periacetabular osteotomy and arthroscopy cases. At the Iowa site, the study comprised an overall data set of 100 operative notes (50 training notes and 50 test notes). MedTagger-THA algorithms achieved moderate-high performance on the training data. After model refinement, the model achieved high performance for approach (100\%, 95\% CI 91.3\%-100\%), fixation (98\%, 95\% CI 88.3\%-100\%), and bearing surface (92\%, 95\% CI 80.5\%-97.3\%). Conclusions: High performance across centers was achieved for the MedTagger-THA algorithms, demonstrating that they were sufficiently implementable, usable, and portable to different deployment sites. This study provided important lessons learned during the model deployment and validation processes, and it can serve as a reference for transferring rule-based electronic health record models. ", doi="10.2196/38155", url="https://medinform.jmir.org/2022/8/e38155", url="http://www.ncbi.nlm.nih.gov/pubmed/36044253" } @Article{info:doi/10.2196/39057, author="Kiser, C. Amber and Eilbeck, Karen and Ferraro, P. Jeffrey and Skarda, E. David and Samore, H. Matthew and Bucher, Brian", title="Standard Vocabularies to Improve Machine Learning Model Transferability With Electronic Health Record Data: Retrospective Cohort Study Using Health Care--Associated Infection", journal="JMIR Med Inform", year="2022", month="Aug", day="30", volume="10", number="8", pages="e39057", keywords="standard vocabularies", keywords="machine learning", keywords="electronic health records", keywords="model transferability", keywords="data heterogeneity", abstract="Background: With the widespread adoption of electronic healthcare records (EHRs) by US hospitals, there is an opportunity to leverage this data for the development of predictive algorithms to improve clinical care. A key barrier in model development and implementation includes the external validation of model discrimination, which is rare and often results in worse performance. One reason why machine learning models are not externally generalizable is data heterogeneity. A potential solution to address the substantial data heterogeneity between health care systems is to use standard vocabularies to map EHR data elements. The advantage of these vocabularies is a hierarchical relationship between elements, which allows the aggregation of specific clinical features to more general grouped concepts. Objective: This study aimed to evaluate grouping EHR data using standard vocabularies to improve the transferability of machine learning models for the detection of postoperative health care--associated infections across institutions with different EHR systems. Methods: Patients who underwent surgery from the University of Utah Health and Intermountain Healthcare from July 2014 to August 2017 with complete follow-up data were included. The primary outcome was a health care--associated infection within 30 days of the procedure. EHR data from 0-30 days after the operation were mapped to standard vocabularies and grouped using the hierarchical relationships of the vocabularies. Model performance was measured using the area under the receiver operating characteristic curve (AUC) and F1-score in internal and external validations. To evaluate model transferability, a difference-in-difference metric was defined as the difference in performance drop between internal and external validations for the baseline and grouped models. Results: A total of 5775 patients from the University of Utah and 15,434 patients from Intermountain Healthcare were included. The prevalence of selected outcomes was from 4.9\% (761/15,434) to 5\% (291/5775) for surgical site infections, from 0.8\% (44/5775) to 1.1\% (171/15,434) for pneumonia, from 2.6\% (400/15,434) to 3\% (175/5775) for sepsis, and from 0.8\% (125/15,434) to 0.9\% (50/5775) for urinary tract infections. In all outcomes, the grouping of data using standard vocabularies resulted in a reduced drop in AUC and F1-score in external validation compared to baseline features (all P<.001, except urinary tract infection AUC: P=.002). The difference-in-difference metrics ranged from 0.005 to 0.248 for AUC and from 0.075 to 0.216 for F1-score. Conclusions: We demonstrated that grouping machine learning model features based on standard vocabularies improved model transferability between data sets across 2 institutions. Improving model transferability using standard vocabularies has the potential to improve the generalization of clinical prediction models across the health care system. ", doi="10.2196/39057", url="https://medinform.jmir.org/2022/8/e39057", url="http://www.ncbi.nlm.nih.gov/pubmed/36040784" } @Article{info:doi/10.2196/32319, author="Casanova, J. Isidoro and Campos, Manuel and Juarez, M. Jose and Gomariz, Antonio and Lorente-Ros, Marta and Lorente, A. Jose", title="Using the Diagnostic Odds Ratio to Select Patterns to Build an Interpretable Pattern-Based Classifier in a Clinical Domain: Multivariate Sequential Pattern Mining Study", journal="JMIR Med Inform", year="2022", month="Aug", day="10", volume="10", number="8", pages="e32319", keywords="sequential patterns", keywords="survival classification", keywords="diagnostic odds ratio", keywords="burn units", abstract="Background: It is important to exploit all available data on patients in settings such as intensive care burn units (ICBUs), where several variables are recorded over time. It is possible to take advantage of the multivariate patterns that model the evolution of patients to predict their survival. However, pattern discovery algorithms generate a large number of patterns, of which only some are relevant for classification. Objective: We propose to use the diagnostic odds ratio (DOR) to select multivariate sequential patterns used in the classification in a clinical domain, rather than employing frequency properties. Methods: We used data obtained from the ICBU at the University Hospital of Getafe, where 6 temporal variables for 465 patients were registered every day during 5 days, and to model the evolution of these clinical variables, we used multivariate sequential patterns by applying 2 different discretization methods for the continuous attributes. We compared 4 ways in which to employ the DOR for pattern selection: (1) we used it as a threshold to select patterns with a minimum DOR; (2) we selected patterns whose differential DORs are higher than a threshold with regard to their extensions; (3) we selected patterns whose DOR CIs do not overlap; and (4) we proposed the combination of threshold and nonoverlapping CIs to select the most discriminative patterns. As a baseline, we compared our proposals with Jumping Emerging Patterns, one of the most frequently used techniques for pattern selection that utilizes frequency properties. Results: We have compared the number and length of the patterns eventually selected, classification performance, and pattern and model interpretability. We show that discretization has a great impact on the accuracy of the classification model, but that a trade-off must be found between classification accuracy and the physicians' capacity to interpret the patterns obtained. We have also identified that the experiments combining threshold and nonoverlapping CIs (Option 4) obtained the fewest number of patterns but also with the smallest size, thus implying the loss of an acceptable accuracy with regard to clinician interpretation. The best classification model according to the trade-off is a JRIP classifier with only 5 patterns (20 items) that was built using unsupervised correlation preserving discretization and differential DOR in a beam search for the best pattern. It achieves a specificity of 56.32\% and an area under the receiver operating characteristic curve of 0.767. Conclusions: A method for the classification of patients' survival can benefit from the use of sequential patterns, as these patterns consider knowledge about the temporal evolution of the variables in the case of ICBU. We have proved that the DOR can be used in several ways, and that it is a suitable measure to select discriminative and interpretable quality patterns. ", doi="10.2196/32319", url="https://medinform.jmir.org/2022/8/e32319", url="http://www.ncbi.nlm.nih.gov/pubmed/35947437" } @Article{info:doi/10.2196/36687, author="Dixit, Abhishek and Lee, Michael", title="Quantification of Digital Body Maps for Pain: Development and Application of an Algorithm for Generating Pain Frequency Maps", journal="JMIR Form Res", year="2022", month="Jun", day="24", volume="6", number="6", pages="e36687", keywords="Scalable Vector Graphics", keywords="SVG", keywords="pain drawing", keywords="pain location", keywords="Body Pain Map", keywords="overlap computation", keywords="heat map", keywords="pain frequency map", keywords="algorithm", abstract="Background: Pain is an unpleasant sensation that signals potential or actual bodily injury. The locations of bodily pain can be communicated and recorded by freehand drawing on 2D or 3D (manikin) surface maps. Freehand pain drawings are often part of validated pain questionnaires (eg, the Brief Pain Inventory) and use 2D templates with undemarcated body outlines. The simultaneous analysis of drawings allows the generation of pain frequency maps that are clinically useful for identifying areas of common pain in a disease. The grid-based approach (dividing a template into cells) allows easy generation of pain frequency maps, but the grid's granularity influences data capture accuracy and end-user usability. The grid-free templates circumvent the problem related to grid creation and selection and provide an unbiased basis for drawings that most resemble paper drawings. However, the precise capture of drawn areas poses considerable challenges in producing pain frequency maps. While web-based applications and mobile-based apps for freehand digital drawings are widely available, tools for generating pain frequency maps from grid-free drawings are lacking. Objective: We sought to provide an algorithm that can process any number of freehand drawings on any grid-free 2D body template to generate a pain frequency map. We envisage the use of the algorithm in clinical or research settings to facilitate fine-grain comparisons of human pain anatomy between disease diagnosis or disorders or as an outcome metric to guide monitoring or discovery of treatments. Methods: We designed a web-based tool to capture freehand pain drawings using a grid-free 2D body template. Each drawing consisted of overlapping rectangles (Scalable Vector Graphics elements) created by scribbling in the same area of the body template. An algorithm was developed and implemented in Python to compute the overlap of rectangles and generate a pain frequency map. The utility of the algorithm was demonstrated on drawings obtained from 2 clinical data sets, one of which was a clinical drug trial (ISRCTN68734605). We also used simulated data sets of overlapping rectangles to evaluate the performance of the algorithm. Results: The algorithm produced nonoverlapping rectangles representing unique locations on the body template. Each rectangle carries an overlap frequency that denotes the number of participants with pain at that location. When transformed into an HTML file, the output is feasibly rendered as a pain frequency map on web browsers. The layout (vertical-horizontal) of the output rectangles can be specified based on the dimensions of the body regions. The output can also be exported to a CSV file for further analysis. Conclusions: Although further validation in much larger clinical data sets is required, the algorithm in its current form allows for the generation of pain frequency maps from any number of freehand drawings on any 2D body template. ", doi="10.2196/36687", url="https://formative.jmir.org/2022/6/e36687", url="http://www.ncbi.nlm.nih.gov/pubmed/35749160" } @Article{info:doi/10.2196/33847, author="Chatterjee, Ayan and Prinz, Andreas", title="Personalized Recommendations for Physical Activity e-Coaching (OntoRecoModel): Ontological Modeling", journal="JMIR Med Inform", year="2022", month="Jun", day="23", volume="10", number="6", pages="e33847", keywords="descriptive logic", keywords="ontology", keywords="e-coach", keywords="reasoning", keywords="recommendation generation", abstract="Background: Automatic e-coaching may motivate individuals to lead a healthy lifestyle with early health risk prediction, personalized recommendation generation, and goal evaluation. Multiple studies have reported on uninterrupted and automatic monitoring of behavioral aspects (such as sedentary time, amount, and type of physical activity); however, e-coaching and personalized feedback techniques are still in a nascent stage. Current intelligent coaching strategies are mostly based on the handcrafted string messages that rarely individualize to each user's needs, context, and preferences. Therefore, more realistic, flexible, practical, sophisticated, and engaging strategies are needed to model personalized recommendations. Objective: This study aims to design and develop an ontology to model personalized recommendation message intent, components (such as suggestion, feedback, argument, and follow-ups), and contents (such as spatial and temporal content and objects relevant to perform the recommended activities). A reasoning technique will help to discover implied knowledge from the proposed ontology. Furthermore, recommendation messages can be classified into different categories in the proposed ontology. Methods: The ontology was created using Prot{\'e}g{\'e} (version 5.5.0) open-source software. We used the Java-based Jena Framework (version 3.16) to build a semantic web application as a proof of concept, which included Resource Description Framework application programming interface, World Wide Web Consortium Web Ontology Language application programming interface, native tuple database, and SPARQL Protocol and Resource Description Framework Query Language query engine. The HermiT (version 1.4.3.x) ontology reasoner available in Prot{\'e}g{\'e} 5.x implemented the logical and structural consistency of the proposed ontology. To verify the proposed ontology model, we simulated data for 8 test cases. The personalized recommendation messages were generated based on the processing of personal activity data in combination with contextual weather data and personal preference data. The developed ontology was processed using a query engine against a rule base to generate personalized recommendations. Results: The proposed ontology was implemented in automatic activity coaching to generate and deliver meaningful, personalized lifestyle recommendations. The ontology can be visualized using OWLViz and OntoGraf. In addition, we developed an ontology verification module that behaves similar to a rule-based decision support system to analyze the generation and delivery of personalized recommendation messages following a logical structure. Conclusions: This study led to the creation of a meaningful ontology to generate and model personalized recommendation messages for physical activity coaching. ", doi="10.2196/33847", url="https://medinform.jmir.org/2022/6/e33847", url="http://www.ncbi.nlm.nih.gov/pubmed/35737439" } @Article{info:doi/10.2196/36914, author="Sun, Chen and Xu, Jing and Tao, Junxian and Dong, Yu and Chen, Haiyan and Jia, Zhe and Ma, Yingnan and Zhang, Mingming and Wei, Siyu and Tang, Guoping and Lyu, Hongchao and Jiang, Yongshuai", title="Mobile-Based and Self-Service Tool (iPed) to Collect, Manage, and Visualize Pedigree Data: Development Study", journal="JMIR Form Res", year="2022", month="Jun", day="23", volume="6", number="6", pages="e36914", keywords="pedigree", keywords="pedigree data", keywords="visualization", keywords="self-service", keywords="mobile-based", abstract="Background: Pedigree data (family history) are indispensable for genetics studies and the assessment of individuals' disease susceptibility. With the popularity of genetics testing, the collection of pedigree data is becoming more common. However, it can be time-consuming, laborious, and tedious for clinicians to investigate all pedigree data for each patient. A self-service robot could inquire about patients' family history in place of professional clinicians or genetic counselors. Objective: The aim of this study was to develop a mobile-based and self-service tool to collect and visualize pedigree data, not only for professionals but also for those who know little about genetics. Methods: There are 4 main aspects in the iPed construction, including interface building, data processing, data storage, and data visualization. The user interface was built using HTML, JavaScript libraries, and Cascading Style Sheets (version 3; Daniel Eden). Processing of the submitted data is carried out by PHP programming language. MySQL is used to document and manage the pedigree data. PHP calls the R script to accomplish the visualization. Results: iPed is freely available to all users through the iPed website. No software is required to be installed, no pedigree files need to be prepared, and no knowledge of genetics or programs is required. The users can easily complete their pedigree data collection and visualization on their own and through a dialogue with iPed. Meanwhile, iPed provides a database that stores all users' information. Therefore, when the users need to construct new pedigree trees for other genetic traits or modify the pedigree trees that have already been created, unnecessary duplication of operations can be avoided. Conclusions: iPed is a mobile-based and self-service tool that could be used by both professionals and nonprofessionals at any time and from any place. It reduces the amount of time required to collect, manage, and visualize pedigree data. ", doi="10.2196/36914", url="https://formative.jmir.org/2022/6/e36914", url="http://www.ncbi.nlm.nih.gov/pubmed/35737451" } @Article{info:doi/10.2196/34141, author="Cooper, R. Ian and Lindsay, Cameron and Fraser, Keaton and Hill, T. Tiffany and Siu, Andrew and Fletcher, Sarah and Klimas, Jan and Hamilton, Michee-Ana and Frazer, D. Amanda and Humphrys, Elka and Koepke, Kira and Hedden, Lindsay and Price, Morgan and McCracken, K. Rita", title="Finding Primary Care---Repurposing Physician Registration Data to Generate a Regionally Accurate List of Primary Care Clinics: Development and Validation of an Open-Source Algorithm", journal="JMIR Form Res", year="2022", month="Jun", day="22", volume="6", number="6", pages="e34141", keywords="physicians, primary care", keywords="primary health care", keywords="health services accessibility", keywords="practice patterns, physicians", keywords="physicians' offices", keywords="computing methodologies", keywords="algorithms", abstract="Background: Some Canadians have limited access to longitudinal primary care, despite its known advantages for population health. Current initiatives to transform primary care aim to increase access to team-based primary care clinics. However, many regions lack a reliable method to enumerate clinics, limiting estimates of clinical capacity and ongoing access gaps. A region-based complete clinic list is needed to effectively describe clinic characteristics and to compare primary care outcomes at the clinic level. Objective: The objective of this study is to show how publicly available data sources, including the provincial physician license registry, can be used to generate a verifiable, region-wide list of primary care clinics in British Columbia, Canada, using a process named the Clinic List Algorithm (CLA). Methods: The CLA has 10 steps: (1) collect data sets, (2) develop clinic inclusion and exclusion criteria, (3) process data sets, (4) consolidate data sets, (5) transform from list of physicians to initial list of clinics, (6) add additional metadata, (7) create working lists, (8) verify working lists, (9) consolidate working lists, and (10) adjust processing steps based on learnings. Results: The College of Physicians and Surgeons of British Columbia Registry contained 13,726 physicians, at 2915 unique addresses, 6942 (50.58\%) of whom were family physicians (FPs) licensed to practice in British Columbia. The CLA identified 1239 addresses where primary care was delivered by 4262 (61.39\%) FPs. Of the included addresses, 84.50\% (n=1047) were in urban locations, and there was a median of 2 (IQR 2-4, range 1-23) FPs at each unique address. Conclusions: The CLA provides a region-wide description of primary care clinics that improves on simple counts of primary care providers or self-report lists. It identifies the number and location of primary care clinics and excludes primary care providers who are likely not providing community-based primary care. Such information may be useful for estimates of capacity of primary care, as well as for policy planning and research in regions engaged in primary care evaluation or transformation. ", doi="10.2196/34141", url="https://formative.jmir.org/2022/6/e34141", url="http://www.ncbi.nlm.nih.gov/pubmed/35731556" } @Article{info:doi/10.2196/30890, author="Choudhury, Joydhriti and Ashraf, Bin Faisal", title="An Analysis of Different Distance-Linkage Methods for Clustering Gene Expression Data and Observing Pleiotropy: Empirical Study", journal="JMIR Bioinform Biotech", year="2022", month="Jun", day="17", volume="3", number="1", pages="e30890", keywords="gene clustering", keywords="gene expression", keywords="distance metric", keywords="linkage method", keywords="hierarchical clustering", keywords="pleiotropy", abstract="Background: Large amounts of biological data have been generated over the last few decades, encouraging scientists to look for connections between genes that cause various diseases. Clustering illustrates such a relationship between numerous species and genes. Finding an appropriate distance-linkage metric to construct clusters from diverse biological data sets has thus become critical. Pleiotropy is also important for a gene's expression to vary and create varied consequences in living things. Finding the pleiotropy of genes responsible for various diseases has become a major research challenge. Objective: Our goal was to establish the optimal distance-linkage strategy for creating reliable clusters from diverse data sets and identifying the common genes that cause various tumors to observe genes with pleiotropic effect. Methods: We considered 4 linking methods---single, complete, average, and ward---and 3 distance metrics---Euclidean, maximum, and Manhattan distance. For assessing the quality of different sets of clusters, we used a fitness function that combines silhouette width and within-cluster distance. Results: According to our findings, the maximum distance measure produces the highest-quality clusters. Moreover, for medium data set, the average linkage method, and for large data set, the ward linkage method works best. The outcome is not improved by using ensemble clustering. We also discovered genes that cause 3 different cancers and used gene enrichment to confirm our findings. Conclusions: Accuracy is crucial in clustering, and we investigated the accuracy of numerous clustering techniques in our research. Other studies may aid related works if the data set is similar to ours. ", doi="10.2196/30890", url="https://bioinform.jmir.org/2022/1/e30890" } @Article{info:doi/10.2196/35696, author="Husted, Skov Karina Louise and Brink-Kj{\ae}r, Andreas and Fogelstr{\o}m, Mathilde and Hulst, Pernille and Bleibach, Akita and Henneberg, Kaj-{\AA}ge and S{\o}rensen, Dissing Helge Bjarup and Dela, Flemming and Jacobsen, Brings Jens Christian and Helge, Wulff J{\o}rn", title="A Model for Estimating Biological Age From Physiological Biomarkers of Healthy Aging: Cross-sectional Study", journal="JMIR Aging", year="2022", month="May", day="10", volume="5", number="2", pages="e35696", keywords="biological age", keywords="model development", keywords="principal component analysis", keywords="healthy aging", keywords="biomarkers", keywords="aging", abstract="Background: Individual differences in the rate of aging and susceptibility to disease are not accounted for by chronological age alone. These individual differences are better explained by biological age, which may be estimated by biomarker prediction models. In the light of the aging demographics of the global population and the increase in lifestyle-related morbidities, it is interesting to invent a new biological age model to be used for health promotion. Objective: This study aims to develop a model that estimates biological age based on physiological biomarkers of healthy aging. Methods: Carefully selected physiological variables from a healthy study population of 100 women and men were used as biomarkers to establish an estimate of biological age. Principal component analysis was applied to the biomarkers and the first principal component was used to define the algorithm estimating biological age. Results: The first principal component accounted for 31\% in women and 25\% in men of the total variance in the biological age model combining mean arterial pressure, glycated hemoglobin, waist circumference, forced expiratory volume in 1 second, maximal oxygen consumption, adiponectin, high-density lipoprotein, total cholesterol, and soluble urokinase-type plasminogen activator receptor. The correlation between the corrected biological age and chronological age was r=0.86 (P<.001) and r=0.81 (P<.001) for women and men, respectively, and the agreement was high and unbiased. No difference was found between mean chronological age and mean biological age, and the slope of the regression line was near 1 for both sexes. Conclusions: Estimating biological age from these 9 biomarkers of aging can be used to assess general health compared with the healthy aging trajectory. This may be useful to evaluate health interventions and as an aid to enhance awareness of individual health risks and behavior when deviating from this trajectory. Trial Registration: ClinicalTrials.gov NCT03680768; https://clinicaltrials.gov/ct2/show/NCT03680768 International Registered Report Identifier (IRRID): RR2-10.2196/19209 ", doi="10.2196/35696", url="https://aging.jmir.org/2022/2/e35696", url="http://www.ncbi.nlm.nih.gov/pubmed/35536617" } @Article{info:doi/10.2196/33219, author="Cowie, Kathryn and Rahmatullah, Asad and Hardy, Nicole and Holub, Karl and Kallmes, Kevin", title="Web-Based Software Tools for Systematic Literature Review in Medicine: Systematic Search and Feature Analysis", journal="JMIR Med Inform", year="2022", month="May", day="2", volume="10", number="5", pages="e33219", keywords="software tools", keywords="feature analysis", keywords="systematic reviews", abstract="Background: Systematic reviews (SRs) are central to evaluating therapies but have high costs in terms of both time and money. Many software tools exist to assist with SRs, but most tools do not support the full process, and transparency and replicability of SR depend on performing and presenting evidence according to established best practices. Objective: This study aims to provide a basis for comparing and selecting between web-based software tools that support SR, by conducting a feature-by-feature comparison of SR tools. Methods: We searched for SR tools by reviewing any such tool listed in the SR Toolbox, previous reviews of SR tools, and qualitative Google searching. We included all SR tools that were currently functional and required no coding, and excluded reference managers, desktop applications, and statistical software. The list of features to assess was populated by combining all features assessed in 4 previous reviews of SR tools; we also added 5 features (manual addition, screening automation, dual extraction, living review, and public outputs) that were independently noted as best practices or enhancements of transparency and replicability. Then, 2 reviewers assigned binary present or absent assessments to all SR tools with respect to all features, and a third reviewer adjudicated all disagreements. Results: Of the 53 SR tools found, 55\% (29/53) were excluded, leaving 45\% (24/53) for assessment. In total, 30 features were assessed across 6 classes, and the interobserver agreement was 86.46\%. Giotto Compliance (27/30, 90\%), DistillerSR (26/30, 87\%), and Nested Knowledge (26/30, 87\%) support the most features, followed by EPPI-Reviewer Web (25/30, 83\%), LitStream (23/30, 77\%), JBI SUMARI (21/30, 70\%), and SRDB.PRO (VTS Software) (21/30, 70\%). Fewer than half of all the features assessed are supported by 7 tools: RobotAnalyst (National Centre for Text Mining), SRDR (Agency for Healthcare Research and Quality), SyRF (Systematic Review Facility), Data Abstraction Assistant (Center for Evidence Synthesis in Health), SR Accelerator (Institute for Evidence-Based Healthcare), RobotReviewer (RobotReviewer), and COVID-NMA (COVID-NMA). Notably, of the 24 tools, only 10 (42\%) support direct search, only 7 (29\%) offer dual extraction, and only 13 (54\%) offer living/updatable reviews. Conclusions: DistillerSR, Nested Knowledge, and EPPI-Reviewer Web each offer a high density of SR-focused web-based tools. By transparent comparison and discussion regarding SR tool functionality, the medical community can both choose among existing software offerings and note the areas of growth needed, most notably in the support of living reviews. ", doi="10.2196/33219", url="https://medinform.jmir.org/2022/5/e33219", url="http://www.ncbi.nlm.nih.gov/pubmed/35499859" } @Article{info:doi/10.2196/32645, author="McIntyre, F. Anne and Fellows, E. Ian and Gutreuter, Steve and Hladik, Wolfgang", title="Population Size Estimation From Capture-Recapture Studies Using shinyrecap: Design and Implementation of a Web-Based Graphical User Interface", journal="JMIR Public Health Surveill", year="2022", month="Apr", day="26", volume="8", number="4", pages="e32645", keywords="population size estimation", keywords="multiple-source capture-recapture", keywords="Bayesian models", keywords="latent-class models", keywords="Shiny", keywords="HIV", keywords="key populations", keywords="epidemiology", keywords="digital health", keywords="online health application", keywords="populations", keywords="risk factors", keywords="online communities", abstract="Background: Population size estimates (PSE) provide critical information in determining resource allocation for HIV services geared toward those at high risk of HIV, including female sex workers, men who have sex with men, and people who inject drugs. Capture-recapture (CRC) is often used to estimate the size of these often-hidden populations. Compared with the commonly used 2-source CRC, CRC relying on 3 (or more) samples (3S-CRC) can provide more robust PSE but involve far more complex statistical analysis. Objective: This study aims to design and describe the Shiny application (shinyrecap), a user-friendly interface that can be used by field epidemiologists to produce PSE. Methods: shinyrecap is built on the Shiny web application framework for R. This allows it to seamlessly integrate with the sophisticated CRC statistical packages (eg, Rcapture, dga, LCMCR). Additionally, the application may be accessed online or run locally on the user's machine. Results: The application enables users to engage in sample size calculation based on a simulation framework. It assists in the proper formatting of collected data by providing a tool to convert commonly used formats to that used by the analysis software. A wide variety of methodologies are supported by the analysis tool, including log-linear, Bayesian model averaging, and Bayesian latent class models. For each methodology, diagnostics and model checking interfaces are provided. Conclusions: Through a use case, we demonstrated the broad utility of this powerful tool with 3S-CRC data to produce PSE for female sex workers in a subnational unit of a country in sub-Saharan Africa. ", doi="10.2196/32645", url="https://publichealth.jmir.org/2022/4/e32645", url="http://www.ncbi.nlm.nih.gov/pubmed/35471234" } @Article{info:doi/10.2196/36762, author="Adamowicz, Lukas and Christakis, Yiorgos and Czech, D. Matthew and Adamusiak, Tomasz", title="SciKit Digital Health: Python Package for Streamlined Wearable Inertial Sensor Data Processing", journal="JMIR Mhealth Uhealth", year="2022", month="Apr", day="21", volume="10", number="4", pages="e36762", keywords="wearable sensors", keywords="digital medicine", keywords="gait analysis", keywords="human movement analysis", keywords="digital biomarkers", keywords="uHealth", keywords="wearable", keywords="sensor", keywords="gait", keywords="movement", keywords="mobility", keywords="physical activity", keywords="sleep", keywords="Python", keywords="coding", keywords="open source", keywords="software package", keywords="algorithm", keywords="machine learning", keywords="data science", keywords="computer programming", doi="10.2196/36762", url="https://mhealth.jmir.org/2022/4/e36762", url="http://www.ncbi.nlm.nih.gov/pubmed/35353039" } @Article{info:doi/10.2196/32578, author="Chew, Jocelyn Han Shi", title="The Use of Artificial Intelligence--Based Conversational Agents (Chatbots) for Weight Loss: Scoping Review and Practical Recommendations", journal="JMIR Med Inform", year="2022", month="Apr", day="13", volume="10", number="4", pages="e32578", keywords="chatbot", keywords="conversational agent", keywords="artificial intelligence", keywords="weight loss", keywords="obesity", keywords="overweight", keywords="natural language processing", keywords="sentiment analysis", keywords="machine learning", keywords="behavior change", keywords="mobile phone", abstract="Background: Overweight and obesity have now reached a state of a pandemic despite the clinical and commercial programs available. Artificial intelligence (AI) chatbots have a strong potential in optimizing such programs for weight loss. Objective: This study aimed to review AI chatbot use cases for weight loss and to identify the essential components for prolonging user engagement. Methods: A scoping review was conducted using the 5-stage framework by Arksey and O'Malley. Articles were searched across nine electronic databases (ACM Digital Library, CINAHL, Cochrane Central, Embase, IEEE Xplore, PsycINFO, PubMed, Scopus, and Web of Science) until July 9, 2021. Gray literature, reference lists, and Google Scholar were also searched. Results: A total of 23 studies with 2231 participants were included and evaluated in this review. Most studies (8/23, 35\%) focused on using AI chatbots to promote both a healthy diet and exercise, 13\% (3/23) of the studies used AI chatbots solely for lifestyle data collection and obesity risk assessment whereas only 4\% (1/23) of the studies focused on promoting a combination of a healthy diet, exercise, and stress management. In total, 48\% (11/23) of the studies used only text-based AI chatbots, 52\% (12/23) operationalized AI chatbots through smartphones, and 39\% (9/23) integrated data collected through fitness wearables or Internet of Things appliances. The core functions of AI chatbots were to provide personalized recommendations (20/23, 87\%), motivational messages (18/23, 78\%), gamification (6/23, 26\%), and emotional support (6/23, 26\%). Study participants who experienced speech- and augmented reality--based chatbot interactions in addition to text-based chatbot interactions reported higher user engagement because of the convenience of hands-free interactions. Enabling conversations through multiple platforms (eg, SMS text messaging, Slack, Telegram, Signal, WhatsApp, or Facebook Messenger) and devices (eg, laptops, Google Home, and Amazon Alexa) was reported to increase user engagement. The human semblance of chatbots through verbal and nonverbal cues improved user engagement through interactivity and empathy. Other techniques used in text-based chatbots included personally and culturally appropriate colloquial tones and content; emojis that emulate human emotional expressions; positively framed words; citations of credible information sources; personification; validation; and the provision of real-time, fast, and reliable recommendations. Prevailing issues included privacy; accountability; user burden; and interoperability with other databases, third-party applications, social media platforms, devices, and appliances. Conclusions: AI chatbots should be designed to be human-like, personalized, contextualized, immersive, and enjoyable to enhance user experience, engagement, behavior change, and weight loss. These require the integration of health metrics (eg, based on self-reports and wearable trackers), personality and preferences (eg, based on goal achievements), circumstantial behaviors (eg, trigger-based overconsumption), and emotional states (eg, chatbot conversations and wearable stress detectors) to deliver personalized and effective recommendations for weight loss. ", doi="10.2196/32578", url="https://medinform.jmir.org/2022/4/e32578", url="http://www.ncbi.nlm.nih.gov/pubmed/35416791" } @Article{info:doi/10.2196/29982, author="Park, Yeongjun James and Hsu, Tzu-Chun and Hu, Jiun-Ruey and Chen, Chun-Yuan and Hsu, Wan-Ting and Lee, Matthew and Ho, Joshua and Lee, Chien-Chang", title="Predicting Sepsis Mortality in a Population-Based National Database: Machine Learning Approach", journal="J Med Internet Res", year="2022", month="Apr", day="13", volume="24", number="4", pages="e29982", keywords="sepsis", keywords="mortality", keywords="machine learning", keywords="SuperLearner", abstract="Background: Although machine learning (ML) algorithms have been applied to point-of-care sepsis prognostication, ML has not been used to predict sepsis mortality in an administrative database. Therefore, we examined the performance of common ML algorithms in predicting sepsis mortality in adult patients with sepsis and compared it with that of the conventional context knowledge--based logistic regression approach. Objective: The aim of this study is to examine the performance of common ML algorithms in predicting sepsis mortality in adult patients with sepsis and compare it with that of the conventional context knowledge--based logistic regression approach. Methods: We examined inpatient admissions for sepsis in the US National Inpatient Sample using hospitalizations in 2010-2013 as the training data set. We developed four ML models to predict in-hospital mortality: logistic regression with least absolute shrinkage and selection operator regularization, random forest, gradient-boosted decision tree, and deep neural network. To estimate their performance, we compared our models with the Super Learner model. Using hospitalizations in 2014 as the testing data set, we examined the models' area under the receiver operating characteristic curve (AUC), confusion matrix results, and net reclassification improvement. Results: Hospitalizations of 923,759 adults were included in the analysis. Compared with the reference logistic regression (AUC: 0.786, 95\% CI 0.783-0.788), all ML models showed superior discriminative ability (P<.001), including logistic regression with least absolute shrinkage and selection operator regularization (AUC: 0.878, 95\% CI 0.876-0.879), random forest (AUC: 0.878, 95\% CI 0.877-0.880), xgboost (AUC: 0.888, 95\% CI 0.886-0.889), and neural network (AUC: 0.893, 95\% CI 0.891-0.895). All 4 ML models showed higher sensitivity, specificity, positive predictive value, and negative predictive value compared with the reference logistic regression model (P<.001). We obtained similar results from the Super Learner model (AUC: 0.883, 95\% CI 0.881-0.885). Conclusions: ML approaches can improve sensitivity, specificity, positive predictive value, negative predictive value, discrimination, and calibration in predicting in-hospital mortality in patients hospitalized with sepsis in the United States. These models need further validation and could be applied to develop more accurate models to compare risk-standardized mortality rates across hospitals and geographic regions, paving the way for research and policy initiatives studying disparities in sepsis care. ", doi="10.2196/29982", url="https://www.jmir.org/2022/4/e29982", url="http://www.ncbi.nlm.nih.gov/pubmed/35416785" } @Article{info:doi/10.2196/34274, author="Nicolet, Anna and Assouline, Dan and Le Pogam, Marie-Annick and Perraudin, Cl{\'e}mence and Bagnoud, Christophe and Wagner, Jo{\"e}l and Marti, Joachim and Peytremann-Bridevaux, Isabelle", title="Exploring Patient Multimorbidity and Complexity Using Health Insurance Claims Data: A Cluster Analysis Approach", journal="JMIR Med Inform", year="2022", month="Apr", day="4", volume="10", number="4", pages="e34274", keywords="multimorbidity", keywords="pharmacy cost groups", keywords="cluster analysis", keywords="claims data", keywords="patient complexity", keywords="health claims", keywords="informatics", abstract="Background: Although the trend of progressing morbidity is widely recognized, there are numerous challenges when studying multimorbidity and patient complexity. For multimorbid or complex patients, prone to fragmented care and high health care use, novel estimation approaches need to be developed. Objective: This study aims to investigate the patient multimorbidity and complexity of Swiss residents aged ?50 years using clustering methodology in claims data. Methods: We adopted a clustering methodology based on random forests and used 34 pharmacy-based cost groups as the only input feature for the procedure. To detect clusters, we applied hierarchical density-based spatial clustering of applications with noise. The reasonable hyperparameters were chosen based on various metrics embedded in the algorithms (out-of-bag misclassification error, normalized stress, and cluster persistence) and the clinical relevance of the obtained clusters. Results: Based on cluster analysis output for 18,732 individuals, we identified an outlier group and 7 clusters: individuals without diseases, patients with only hypertension-related diseases, patients with only mental diseases, complex high-cost high-need patients, slightly complex patients with inexpensive low-severity pharmacy-based cost groups, patients with 1 costly disease, and older high-risk patients. Conclusions: Our study demonstrated that cluster analysis based on pharmacy-based cost group information from claims-based data is feasible and highlights clinically relevant clusters. Such an approach allows expanding the understanding of multimorbidity beyond simple disease counts and can identify the population profiles with increased health care use and costs. This study may foster the development of integrated and coordinated care, which is high on the agenda in policy making, care planning, and delivery. ", doi="10.2196/34274", url="https://medinform.jmir.org/2022/4/e34274", url="http://www.ncbi.nlm.nih.gov/pubmed/35377334" } @Article{info:doi/10.2196/34096, author="McLeod, Graeme and Kennedy, Iain and Simpson, Eilidh and Joss, Judith and Goldmann, Katriona", title="Pilot Project for a Web-Based Dynamic Nomogram to Predict Survival 1 Year After Hip Fracture Surgery: Retrospective Observational Study", journal="Interact J Med Res", year="2022", month="Mar", day="30", volume="11", number="1", pages="e34096", keywords="hip fracture", keywords="survival", keywords="prediction", keywords="nomogram", keywords="web", keywords="surgery", keywords="postoperative", keywords="machine learning", keywords="model", keywords="mortality", keywords="hip", keywords="fracture", abstract="Background: Hip fracture is associated with high mortality. Identification of individual risk informs anesthetic and surgical decision-making and can reduce the risk of death. However, interpreting mathematical models and applying them in clinical practice can be difficult. There is a need to simplify risk indices for clinicians and laypeople alike. Objective: Our primary objective was to develop a web-based nomogram for prediction of survival up to 365 days after hip fracture surgery. Methods: We collected data from 329 patients. Our variables included sex; age; BMI; white cell count; levels of lactate, creatinine, hemoglobin, and C-reactive protein; physical status according to the American Society of Anesthesiologists Physical Status Classification System; socioeconomic status; duration of surgery; total time in the operating room; side of surgery; and procedure urgency. Thereafter, we internally calibrated and validated a Cox proportional hazards model of survival 365 days after hip fracture surgery; logistic regression models of survival 30, 120, and 365 days after surgery; and a binomial model. To present the models on a laptop, tablet, or mobile phone in a user-friendly way, we built an app using Shiny (RStudio). The app showed a drop-down box for model selection and horizontal sliders for data entry, model summaries, and prediction and survival plots. A slider represented patient follow-up over 365 days. Results: Of the 329 patients, 24 (7.3\%) died within 30 days of surgery, 65 (19.8\%) within 120 days, and 94 (28.6\%) within 365 days. In all models, the independent predictors of mortality were age, BMI, creatinine level, and lactate level. The logistic model also incorporated white cell count as a predictor. The Cox proportional hazards model showed that mortality differed as follows: age 80 vs 60 years had a hazard ratio (HR) of 0.6 (95\% CI 0.3-1.1), a plasma lactate level of 2 vs 1 mmol/L had an HR of 2.4 (95\% CI 1.5-3.9), and a plasma creatinine level of 60 vs 90 mol/L had an HR of 2.3 (95\% CI 1.3-3.9). Conclusions: In conclusion, we provide an easy-to-read web-based nomogram that predicts survival up to 365 days after hip fracture. The Cox proportional hazards model and logistic models showed good discrimination, with concordance index values of 0.732 and 0.781, respectively. ", doi="10.2196/34096", url="https://www.i-jmr.org/2022/1/e34096", url="http://www.ncbi.nlm.nih.gov/pubmed/35238320" } @Article{info:doi/10.2196/26634, author="Zhang, Zheqing and Yang, Luqian and Han, Wentao and Wu, Yaoyu and Zhang, Linhui and Gao, Chun and Jiang, Kui and Liu, Yun and Wu, Huiqun", title="Machine Learning Prediction Models for Gestational Diabetes Mellitus: Meta-analysis", journal="J Med Internet Res", year="2022", month="Mar", day="16", volume="24", number="3", pages="e26634", keywords="digital health", keywords="gestational diabetes mellitus", keywords="machine learning", keywords="prediction model", keywords="prognostic model", abstract="Background: Gestational diabetes mellitus (GDM) is a common endocrine metabolic disease, involving a carbohydrate intolerance of variable severity during pregnancy. The incidence of GDM-related complications and adverse pregnancy outcomes has declined, in part, due to early screening. Machine learning (ML) models are increasingly used to identify risk factors and enable the early prediction of GDM. Objective: The aim of this study was to perform a meta-analysis and comparison of published prognostic models for predicting the risk of GDM and identify predictors applicable to the models. Methods: Four reliable electronic databases were searched for studies that developed ML prediction models for GDM in the general population instead of among high-risk groups only. The novel Prediction Model Risk of Bias Assessment Tool (PROBAST) was used to assess the risk of bias of the ML models. The Meta-DiSc software program (version 1.4) was used to perform the meta-analysis and determination of heterogeneity. To limit the influence of heterogeneity, we also performed sensitivity analyses, a meta-regression, and subgroup analysis. Results: A total of 25 studies that included women older than 18 years without a history of vital disease were analyzed. The pooled area under the receiver operating characteristic curve (AUROC) for ML models predicting GDM was 0.8492; the pooled sensitivity was 0.69 (95\% CI 0.68-0.69; P<.001; I2=99.6\%) and the pooled specificity was 0.75 (95\% CI 0.75-0.75; P<.001; I2=100\%). As one of the most commonly employed ML methods, logistic regression achieved an overall pooled AUROC of 0.8151, while non--logistic regression models performed better, with an overall pooled AUROC of 0.8891. Additionally, maternal age, family history of diabetes, BMI, and fasting blood glucose were the four most commonly used features of models established by the various feature selection methods. Conclusions: Compared to current screening strategies, ML methods are attractive for predicting GDM. To expand their use, the importance of quality assessments and unified diagnostic criteria should be further emphasized. ", doi="10.2196/26634", url="https://www.jmir.org/2022/3/e26634", url="http://www.ncbi.nlm.nih.gov/pubmed/35294369" } @Article{info:doi/10.2196/35768, author="Ma, Zhuo and Huang, Sijia and Wu, Xiaoqing and Huang, Yinying and Chan, Wai-Chi Sally and Lin, Yilan and Zheng, Xujuan and Zhu, Jiemin", title="Development of a Prognostic App (iCanPredict) to Predict Survival for Chinese Women With Breast Cancer: Retrospective Study", journal="J Med Internet Res", year="2022", month="Mar", day="9", volume="24", number="3", pages="e35768", keywords="app", keywords="breast cancer", keywords="survival prediction model", keywords="iCanPredict", abstract="Background: Accurate prediction of survival is crucial for both physicians and women with breast cancer to enable clinical decision making on appropriate treatments. The currently available survival prediction tools were developed based on demographic and clinical data obtained from specific populations and may underestimate or overestimate the survival of women with breast cancer in China. Objective: This study aims to develop and validate a prognostic app to predict the overall survival of women with breast cancer in China. Methods: Nine-year (January 2009-December 2017) clinical data of women with breast cancer who received surgery and adjuvant therapy from 2 hospitals in Xiamen were collected and matched against the death data from the Xiamen Center of Disease Control and Prevention. All samples were randomly divided (7:3 ratio) into a training set for model construction and a test set for model external validation. Multivariable Cox regression analysis was used to construct a survival prediction model. The model performance was evaluated by receiver operating characteristic (ROC) curve and Brier score. Finally, by running the survival prediction model in the app background thread, the prognostic app, called iCanPredict, was developed for women with breast cancer in China. Results: A total of 1592 samples were included for data analysis. The training set comprised 1114 individuals and the test set comprised 478 individuals. Age at diagnosis, clinical stage, molecular classification, operative type, axillary lymph node dissection, chemotherapy, and endocrine therapy were incorporated into the model, where age at diagnosis (hazard ratio [HR] 1.031, 95\% CI 1.011-1.051; P=.002), clinical stage (HR 3.044, 95\% CI 2.347-3.928; P<.001), and endocrine therapy (HR 0.592, 95\% CI 0.384-0.914; P=.02) significantly influenced the survival of women with breast cancer. The operative type (P=.81) and the other 4 variables (molecular classification [P=.91], breast reconstruction [P=.36], axillary lymph node dissection [P=.32], and chemotherapy [P=.84]) were not significant. The ROC curve of the training set showed that the model exhibited good discrimination for predicting 1- (area under the curve [AUC] 0.802, 95\% CI 0.713-0.892), 5- (AUC 0.813, 95\% CI 0.760-0.865), and 10-year (AUC 0.740, 95\% CI 0.672-0.808) overall survival. The Brier scores at 1, 5, and 10 years after diagnosis were 0.005, 0.055, and 0.103 in the training set, respectively, and were less than 0.25, indicating good predictive ability. The test set externally validated model discrimination and calibration. In the iCanPredict app, when physicians or women input women's clinical information and their choice of surgery and adjuvant therapy, the corresponding 10-year survival prediction will be presented. Conclusions: This survival prediction model provided good model discrimination and calibration. iCanPredict is the first tool of its kind in China to provide survival predictions to women with breast cancer. iCanPredict will increase women's awareness of the similar survival rate of different surgeries and the importance of adherence to endocrine therapy, ultimately helping women to make informed decisions regarding treatment for breast cancer. ", doi="10.2196/35768", url="https://www.jmir.org/2022/3/e35768", url="http://www.ncbi.nlm.nih.gov/pubmed/35262503" } @Article{info:doi/10.2196/30328, author="Evans, Richard and Burns, Jennifer and Damschroder, Laura and Annis, Ann and Freitag, B. Michelle and Raffa, Susan and Wiitala, Wyndy", title="Deriving Weight From Big Data: Comparison of Body Weight Measurement--Cleaning Algorithms", journal="JMIR Med Inform", year="2022", month="Mar", day="9", volume="10", number="3", pages="e30328", keywords="veterans", keywords="weight", keywords="algorithms", keywords="obesity", keywords="measurement", keywords="electronic health record", abstract="Background: Patient body weight is a frequently used measure in biomedical studies, yet there are no standard methods for processing and cleaning weight data. Conflicting documentation on constructing body weight measurements presents challenges for research and program evaluation. Objective: In this study, we aim to describe and compare methods for extracting and cleaning weight data from electronic health record databases to develop guidelines for standardized approaches that promote reproducibility. Methods: We conducted a systematic review of studies published from 2008 to 2018 that used Veterans Health Administration electronic health record weight data and documented the algorithms for constructing patient weight. We applied these algorithms to a cohort of veterans with at least one primary care visit in 2016. The resulting weight measures were compared at the patient and site levels. Results: We identified 496 studies and included 62 (12.5\%) that used weight as an outcome. Approximately 48\% (27/62) included a replicable algorithm. Algorithms varied from cutoffs of implausible weights to complex models using measures within patients over time. We found differences in the number of weight values after applying the algorithms (71,961/1,175,995, 6.12\% to 1,175,177/1,175,995, 99.93\% of raw data) but little difference in average weights across methods (93.3, SD 21.0 kg to 94.8, SD 21.8 kg). The percentage of patients with at least 5\% weight loss over 1 year ranged from 9.37\% (4933/52,642) to 13.99\% (3355/23,987). Conclusions: Contrasting algorithms provide similar results and, in some cases, the results are not different from using raw, unprocessed data despite algorithm complexity. Studies using point estimates of weight may benefit from a simple cleaning rule based on cutoffs of implausible values; however, research questions involving weight trajectories and other, more complex scenarios may benefit from a more nuanced algorithm that considers all available weight data. ", doi="10.2196/30328", url="https://medinform.jmir.org/2022/3/e30328", url="http://www.ncbi.nlm.nih.gov/pubmed/35262492" } @Article{info:doi/10.2196/30104, author="Ip, Wui and Prahalad, Priya and Palma, Jonathan and Chen, H. Jonathan", title="A Data-Driven Algorithm to Recommend Initial Clinical Workup for Outpatient Specialty Referral: Algorithm Development and Validation Using Electronic Health Record Data and Expert Surveys", journal="JMIR Med Inform", year="2022", month="Mar", day="3", volume="10", number="3", pages="e30104", keywords="recommender system", keywords="electronic health records", keywords="clinical decision support", keywords="specialty consultation", keywords="machine learning", keywords="EHR", keywords="algorithm", keywords="algorithm development", keywords="algorithm validation", keywords="automation", keywords="prediction", keywords="patient needs", abstract="Background: Millions of people have limited access to specialty care. The problem is exacerbated by ineffective specialty visits due to incomplete prereferral workup, leading to delays in diagnosis and treatment. Existing processes to guide prereferral diagnostic workup are labor-intensive (ie, building a consensus guideline between primary care doctors and specialists) and require the availability of the specialists (ie, electronic consultation). Objective: Using pediatric endocrinology as an example, we develop a recommender algorithm to anticipate patients' initial workup needs at the time of specialty referral and compare it to a reference benchmark using the most common workup orders. We also evaluate the clinical appropriateness of the algorithm recommendations. Methods: Electronic health record data were extracted from 3424 pediatric patients with new outpatient endocrinology referrals at an academic institution from 2015 to 2020. Using item co-occurrence statistics, we predicted the initial workup orders that would be entered by specialists and assessed the recommender's performance in a holdout data set based on what the specialists actually ordered. We surveyed endocrinologists to assess the clinical appropriateness of the predicted orders and to understand the initial workup process. Results: Specialists (n=12) indicated that <50\% of new patient referrals arrive with complete initial workup for common referral reasons. The algorithm achieved an area under the receiver operating characteristic curve of 0.95 (95\% CI 0.95-0.96). Compared to a reference benchmark using the most common orders, precision and recall improved from 37\% to 48\% (P<.001) and from 27\% to 39\% (P<.001) for the top 4 recommendations, respectively. The top 4 recommendations generated for common referral conditions (abnormal thyroid studies, obesity, amenorrhea) were considered clinically appropriate the majority of the time by specialists surveyed and practice guidelines reviewed. Conclusions: ?An item association--based recommender algorithm can predict appropriate specialists' workup orders with high discriminatory accuracy. This could support future clinical decision support tools to increase effectiveness and access to specialty referrals. Our study demonstrates important first steps toward a data-driven paradigm for outpatient specialty consultation with a tier of automated recommendations that proactively enable initial workup that would otherwise be delayed by awaiting an in-person visit. ", doi="10.2196/30104", url="https://medinform.jmir.org/2022/3/e30104", url="http://www.ncbi.nlm.nih.gov/pubmed/35238788" } @Article{info:doi/10.2196/34560, author="Bove, Riley and Schleimer, Erica and Sukhanov, Paul and Gilson, Michael and Law, M. Sindy and Barnecut, Andrew and Miller, L. Bruce and Hauser, L. Stephen and Sanders, J. Stephan and Rankin, P. Katherine", title="Building a Precision Medicine Delivery Platform for Clinics: The University of California, San Francisco, BRIDGE Experience", journal="J Med Internet Res", year="2022", month="Feb", day="15", volume="24", number="2", pages="e34560", keywords="precision medicine", keywords="clinical implementation", keywords="in silico trials", keywords="clinical dashboard", keywords="precision", keywords="implementation", keywords="dashboard", keywords="design", keywords="experience", keywords="analytic", keywords="tool", keywords="analysis", keywords="decision-making", keywords="real time", keywords="platform", keywords="human-centered design", doi="10.2196/34560", url="https://www.jmir.org/2022/2/e34560", url="http://www.ncbi.nlm.nih.gov/pubmed/35166689" } @Article{info:doi/10.2196/34932, author="Shara, Nawar and Anderson, M. Kelley and Falah, Noor and Ahmad, F. Maryam and Tavazoei, Darya and Hughes, M. Justin and Talmadge, Bethany and Crovatt, Samantha and Dempers, Ramon", title="Early Identification of Maternal Cardiovascular Risk Through Sourcing and Preparing Electronic Health Record Data: Machine Learning Study", journal="JMIR Med Inform", year="2022", month="Feb", day="10", volume="10", number="2", pages="e34932", keywords="electronic health record", keywords="maternal health", keywords="machine learning", keywords="maternal morbidity and mortality", keywords="cardiovascular risk", keywords="data transformation", keywords="extract", keywords="transform", keywords="load", keywords="artificial intelligence", keywords="electronic medical record", abstract="Background: Health care data are fragmenting as patients seek care from diverse sources. Consequently, patient care is negatively impacted by disparate health records. Machine learning (ML) offers a disruptive force in its ability to inform and improve patient care and outcomes. However, the differences that exist in each individual's health records, combined with the lack of health data standards, in addition to systemic issues that render the data unreliable and that fail to create a single view of each patient, create challenges for ML. Although these problems exist throughout health care, they are especially prevalent within maternal health and exacerbate the maternal morbidity and mortality crisis in the United States. Objective: This study aims to demonstrate that patient records extracted from the electronic health records (EHRs) of a large tertiary health care system can be made actionable for the goal of effectively using ML to identify maternal cardiovascular risk before evidence of diagnosis or intervention within the patient's record. Maternal patient records were extracted from the EHRs of a large tertiary health care system and made into patient-specific, complete data sets through a systematic method. Methods: We outline the effort that was required to define the specifications of the computational systems, the data set, and access to relevant systems, while ensuring that data security, privacy laws, and policies were met. Data acquisition included the concatenation, anonymization, and normalization of health data across multiple EHRs in preparation for their use by a proprietary risk stratification algorithm designed to establish patient-specific baselines to identify and establish cardiovascular risk based on deviations from the patient's baselines to inform early interventions. Results: Patient records can be made actionable for the goal of effectively using ML, specifically to identify cardiovascular risk in pregnant patients. Conclusions: Upon acquiring data, including their concatenation, anonymization, and normalization across multiple EHRs, the use of an ML-based tool can provide early identification of cardiovascular risk in pregnant patients. ", doi="10.2196/34932", url="https://medinform.jmir.org/2022/2/e34932", url="http://www.ncbi.nlm.nih.gov/pubmed/35142637" } @Article{info:doi/10.2196/31528, author="Renner, Simon and Marty, Tom and Khadhar, Micka{\"i}l and Foulqui{\'e}, Pierre and Voillot, Pam{\'e}la and Mebarki, Adel and Montagni, Ilaria and Texier, Nathalie and Sch{\"u}ck, St{\'e}phane", title="A New Method to Extract Health-Related Quality of Life Data From Social Media Testimonies: Algorithm Development and Validation", journal="J Med Internet Res", year="2022", month="Jan", day="28", volume="24", number="1", pages="e31528", keywords="health-related quality of life", keywords="social media use", keywords="measures", keywords="real world", keywords="natural language processing", keywords="social media", keywords="NLP", keywords="infoveillance", keywords="quality of life", keywords="digital health", keywords="social listening", abstract="Background: Monitoring social media has been shown to be a useful means to capture patients' opinions and feelings about medical issues, ranging from diseases to treatments. Health-related quality of life (HRQoL) is a useful indicator of overall patients' health, which can be captured online. Objective: This study aimed to describe a social media listening algorithm able to detect the impact of diseases or treatments on specific dimensions of HRQoL based on posts written by patients in social media and forums. Methods: Using a web crawler, 19 forums in France were harvested, and messages related to patients' experience with disease or treatment were specifically collected. The SF-36 (Short Form Health Survey) and EQ-5D (Euro Quality of Life 5 Dimensions) HRQoL surveys were mixed and adapted for a tailored social media listening system. This was carried out to better capture the variety of expression on social media, resulting in 5 dimensions of the HRQoL, which are physical, psychological, activity-based, social, and financial. Models were trained using cross-validation and hyperparameter optimization. Oversampling was used to increase the infrequent dimension: after annotation, SMOTE (synthetic minority oversampling technique) was used to balance the proportions of the dimensions among messages. Results: The training set was composed of 1399 messages, randomly taken from a batch of 20,000 health-related messages coming from forums. The algorithm was able to detect a general impact on HRQoL (sensitivity of 0.83 and specificity of 0.74), a physical impact (0.67 and 0.76), a psychic impact (0.82 and 0.60), an activity-related impact (0.73 and 0.78), a relational impact (0.73 and 0.70), and a financial impact (0.79 and 0.74). Conclusions: The development of an innovative method to extract health data from social media as real time assessment of patients' HRQoL is useful to a patient-centered medical care. As a source of real-world data, social media provide a complementary point of view to understand patients' concerns and unmet needs, as well as shedding light on how diseases and treatments can be a burden in their daily lives. ", doi="10.2196/31528", url="https://www.jmir.org/2022/1/e31528", url="http://www.ncbi.nlm.nih.gov/pubmed/35089152" } @Article{info:doi/10.2196/28934, author="Liu, Yun-Chung and Cheng, Hao-Yuan and Chang, Tu-Hsuan and Ho, Te-Wei and Liu, Ting-Chi and Yen, Ting-Yu and Chou, Chia-Ching and Chang, Luan-Yin and Lai, Feipei", title="Evaluation of the Need for Intensive Care in Children With Pneumonia: Machine Learning Approach", journal="JMIR Med Inform", year="2022", month="Jan", day="27", volume="10", number="1", pages="e28934", keywords="child pneumonia", keywords="intensive care", keywords="machine learning", keywords="decision making", keywords="clinical index", abstract="Background: Timely decision-making regarding intensive care unit (ICU) admission for children with pneumonia is crucial for a better prognosis. Despite attempts to establish a guideline or triage system for evaluating ICU care needs, no clinically applicable paradigm is available. Objective: The aim of this study was to develop machine learning (ML) algorithms to predict ICU care needs for pediatric pneumonia patients within 24 hours of admission, evaluate their performance, and identify clinical indices for making decisions for pediatric pneumonia patients. Methods: Pneumonia patients admitted to National Taiwan University Hospital from January 2010 to December 2019 aged under 18 years were enrolled. Their underlying diseases, clinical manifestations, and laboratory data at admission were collected. The outcome of interest was ICU transfer within 24 hours of hospitalization. We compared clinically relevant features between early ICU transfer patients and patients without ICU care. ML algorithms were developed to predict ICU admission. The performance of the algorithms was evaluated using sensitivity, specificity, area under the receiver operating characteristic curve (AUC), and average precision. The relative feature importance of the best-performing algorithm was compared with physician-rated feature importance for explainability. Results: A total of 8464 pediatric hospitalizations due to pneumonia were recorded, and 1166 (1166/8464, 13.8\%) hospitalized patients were transferred to the ICU within 24 hours. Early ICU transfer patients were younger (P<.001), had higher rates of underlying diseases (eg, cardiovascular, neuropsychological, and congenital anomaly/genetic disorders; P<.001), had abnormal laboratory data, had higher pulse rates (P<.001), had higher breath rates (P<.001), had lower oxygen saturation (P<.001), and had lower peak body temperature (P<.001) at admission than patients without ICU transfer. The random forest (RF) algorithm achieved the best performance (sensitivity 0.94, 95\% CI 0.92-0.95; specificity 0.94, 95\% CI 0.92-0.95; AUC 0.99, 95\% CI 0.98-0.99; and average precision 0.93, 95\% CI 0.90-0.96). The lowest systolic blood pressure and presence of cardiovascular and neuropsychological diseases ranked in the top 10 in both RF relative feature importance and clinician judgment. Conclusions: The ML approach could provide a clinically applicable triage algorithm and identify important clinical indices, such as age, underlying diseases, abnormal vital signs, and laboratory data for evaluating the need for intensive care in children with pneumonia. ", doi="10.2196/28934", url="https://medinform.jmir.org/2022/1/e28934", url="http://www.ncbi.nlm.nih.gov/pubmed/35084358" } @Article{info:doi/10.2196/28036, author="Yu, Jia-Ruei and Chen, Chun-Hsien and Huang, Tsung-Wei and Lu, Jang-Jih and Chung, Chia-Ru and Lin, Ting-Wei and Wu, Min-Hsien and Tseng, Yi-Ju and Wang, Hsin-Yao", title="Energy Efficiency of Inference Algorithms for Clinical Laboratory Data Sets: Green Artificial Intelligence Study", journal="J Med Internet Res", year="2022", month="Jan", day="25", volume="24", number="1", pages="e28036", keywords="medical informatics", keywords="machine learning", keywords="algorithms", keywords="energy consumption", keywords="artificial intelligence", keywords="energy efficient", keywords="medical domain", keywords="medical data sets", keywords="informatics", abstract="Background: The use of artificial intelligence (AI) in the medical domain has attracted considerable research interest. Inference applications in the medical domain require energy-efficient AI models. In contrast to other types of data in visual AI, data from medical laboratories usually comprise features with strong signals. Numerous energy optimization techniques have been developed to relieve the burden on the hardware required to deploy a complex learning model. However, the energy efficiency levels of different AI models used for medical applications have not been studied. Objective: The aim of this study was to explore and compare the energy efficiency levels of commonly used machine learning algorithms---logistic regression (LR), k-nearest neighbor, support vector machine, random forest (RF), and extreme gradient boosting (XGB) algorithms, as well as four different variants of neural network (NN) algorithms---when applied to clinical laboratory datasets. Methods: We applied the aforementioned algorithms to two distinct clinical laboratory data sets: a mass spectrometry data set regarding Staphylococcus aureus for predicting methicillin resistance (3338 cases; 268 features) and a urinalysis data set for predicting Trichomonas vaginalis infection (839,164 cases; 9 features). We compared the performance of the nine inference algorithms in terms of accuracy, area under the receiver operating characteristic curve (AUROC), time consumption, and power consumption. The time and power consumption levels were determined using performance counter data from Intel Power Gadget 3.5. Results: The experimental results indicated that the RF and XGB algorithms achieved the two highest AUROC values for both data sets (84.7\% and 83.9\%, respectively, for the mass spectrometry data set; 91.1\% and 91.4\%, respectively, for the urinalysis data set). The XGB and LR algorithms exhibited the shortest inference time for both data sets (0.47 milliseconds for both in the mass spectrometry data set; 0.39 and 0.47 milliseconds, respectively, for the urinalysis data set). Compared with the RF algorithm, the XGB and LR algorithms exhibited a 45\% and 53\%-60\% reduction in inference time for the mass spectrometry and urinalysis data sets, respectively. In terms of energy efficiency, the XGB algorithm exhibited the lowest power consumption for the mass spectrometry data set (9.42 Watts) and the LR algorithm exhibited the lowest power consumption for the urinalysis data set (9.98 Watts). Compared with a five-hidden-layer NN, the XGB and LR algorithms achieved 16\%-24\% and 9\%-13\% lower power consumption levels for the mass spectrometry and urinalysis data sets, respectively. In all experiments, the XGB algorithm exhibited the best performance in terms of accuracy, run time, and energy efficiency. Conclusions: The XGB algorithm achieved balanced performance levels in terms of AUROC, run time, and energy efficiency for the two clinical laboratory data sets. Considering the energy constraints in real-world scenarios, the XGB algorithm is ideal for medical AI applications. ", doi="10.2196/28036", url="https://www.jmir.org/2022/1/e28036", url="http://www.ncbi.nlm.nih.gov/pubmed/35076405" } @Article{info:doi/10.2196/28366, author="Yamanaka, Syunsuke and Goto, Tadahiro and Morikawa, Koji and Watase, Hiroko and Okamoto, Hiroshi and Hagiwara, Yusuke and Hasegawa, Kohei", title="Machine Learning Approaches for Predicting Difficult Airway and First-Pass Success in the Emergency Department: Multicenter Prospective Observational Study", journal="Interact J Med Res", year="2022", month="Jan", day="25", volume="11", number="1", pages="e28366", keywords="intubation", keywords="machine learning", keywords="difficult airway", keywords="first-pass success", abstract="Background: There is still room for improvement in the modified LEMON (look, evaluate, Mallampati, obstruction, neck mobility) criteria for difficult airway prediction and no prediction tool for first-pass success in the emergency department (ED). Objective: We applied modern machine learning approaches to predict difficult airways and first-pass success. Methods: In a multicenter prospective study that enrolled consecutive patients who underwent tracheal intubation in 13 EDs, we developed 7 machine learning models (eg, random forest model) using routinely collected data (eg, demographics, initial airway assessment). The outcomes were difficult airway and first-pass success. Model performance was evaluated using c-statistics, calibration slopes, and association measures (eg, sensitivity) in the test set (randomly selected 20\% of the data). Their performance was compared with the modified LEMON criteria for difficult airway success and a logistic regression model for first-pass success. Results: Of 10,741 patients who underwent intubation, 543 patients (5.1\%) had a difficult airway, and 7690 patients (71.6\%) had first-pass success. In predicting a difficult airway, machine learning models---except for k-point nearest neighbor and multilayer perceptron---had higher discrimination ability than the modified LEMON criteria (all, P?.001). For example, the ensemble method had the highest c-statistic (0.74 vs 0.62 with the modified LEMON criteria;?P<.001). Machine learning models---except k-point nearest neighbor and random forest models---had higher discrimination ability for first-pass success. In particular, the ensemble model had the highest c-statistic (0.81 vs 0.76 with the reference regression;?P<.001). Conclusions: Machine learning models demonstrated greater ability for predicting difficult airway and first-pass success in the ED. ", doi="10.2196/28366", url="https://www.i-jmr.org/2022/1/e28366", url="http://www.ncbi.nlm.nih.gov/pubmed/35076398" } @Article{info:doi/10.2196/31549, author="He, Fang and Page, H. John and Weinberg, R. Kerry and Mishra, Anirban", title="The Development and Validation of Simplified Machine Learning Algorithms to Predict Prognosis of Hospitalized Patients With COVID-19: Multicenter, Retrospective Study", journal="J Med Internet Res", year="2022", month="Jan", day="21", volume="24", number="1", pages="e31549", keywords="COVID-19", keywords="predictive algorithm", keywords="prognostic model", keywords="machine learning", abstract="Background: The current COVID-19 pandemic is unprecedented; under resource-constrained settings, predictive algorithms can help to stratify disease severity, alerting physicians of high-risk patients; however, there are only few risk scores derived from a substantially large electronic health record (EHR) data set, using simplified predictors as input. Objective: The objectives of this study were to develop and validate simplified machine learning algorithms that predict COVID-19 adverse outcomes; to evaluate the area under the receiver operating characteristic curve (AUC), sensitivity, specificity, and calibration of the algorithms; and to derive clinically meaningful thresholds. Methods: We performed machine learning model development and validation via a cohort study using multicenter, patient-level, longitudinal EHRs from the Optum COVID-19 database that provides anonymized, longitudinal EHR from across the United States. The models were developed based on clinical characteristics to predict 28-day in-hospital mortality, intensive care unit (ICU) admission, respiratory failure, and mechanical ventilator usages at inpatient setting. Data from patients who were admitted from February 1, 2020, to September 7, 2020, were randomly sampled into development, validation, and test data sets; data collected from September 7, 2020, to November 15, 2020, were reserved as the postdevelopment prospective test data set. Results: Of the 3.7 million patients in the analysis, 585,867 patients were diagnosed or tested positive for SARS-CoV-2, and 50,703 adult patients were hospitalized with COVID-19 between February 1 and November 15, 2020. Among the study cohort (n=50,703), there were 6204 deaths, 9564 ICU admissions, 6478 mechanically ventilated or EMCO patients, and 25,169 patients developed acute respiratory distress syndrome or respiratory failure within 28 days since hospital admission. The algorithms demonstrated high accuracy (AUC 0.89, 95\% CI 0.89-0.89 on the test data set [n=10,752]), consistent prediction through the second wave of the pandemic from September to November (AUC 0.85, 95\% CI 0.85-0.86) on the postdevelopment prospective test data set [n=14,863], great clinical relevance, and utility. Besides, a comprehensive set of 386 input covariates from baseline or at admission were included in the analysis; the end-to-end pipeline automates feature selection and model development. The parsimonious model with only 10 input predictors produced comparably accurate predictions; these 10 predictors (age, blood urea nitrogen, SpO2, systolic and diastolic blood pressures, respiration rate, pulse, temperature, albumin, and major cognitive disorder excluding stroke) are commonly measured and concordant with recognized risk factors for COVID-19. Conclusions: The systematic approach and rigorous validation demonstrate consistent model performance to predict even beyond the period of data collection, with satisfactory discriminatory power and great clinical utility. Overall, the study offers an accurate, validated, and reliable prediction model based on only 10 clinical features as a prognostic tool to stratifying patients with COVID-19 into intermediate-, high-, and very high-risk groups. This simple predictive tool is shared with a wider health care community, to enable service as an early warning system to alert physicians of possible high-risk patients, or as a resource triaging tool to optimize health care resources. ", doi="10.2196/31549", url="https://www.jmir.org/2022/1/e31549", url="http://www.ncbi.nlm.nih.gov/pubmed/34951865" } @Article{info:doi/10.2196/28953, author="Zeng, Siyang and Arjomandi, Mehrdad and Tong, Yao and Liao, C. Zachary and Luo, Gang", title="Developing a Machine Learning Model to Predict Severe Chronic Obstructive Pulmonary Disease Exacerbations: Retrospective Cohort Study", journal="J Med Internet Res", year="2022", month="Jan", day="6", volume="24", number="1", pages="e28953", keywords="chronic obstructive pulmonary disease", keywords="machine learning", keywords="forecasting", keywords="symptom exacerbation", keywords="patient care management", abstract="Background: Chronic obstructive pulmonary disease (COPD) poses a large burden on health care. Severe COPD exacerbations require emergency department visits or inpatient stays, often cause an irreversible decline in lung function and health status, and account for 90.3\% of the total medical cost related to COPD. Many severe COPD exacerbations are deemed preventable with appropriate outpatient care. Current models for predicting severe COPD exacerbations lack accuracy, making it difficult to effectively target patients at high risk for preventive care management to reduce severe COPD exacerbations and improve outcomes. Objective: The aim of this study is to develop a more accurate model to predict severe COPD exacerbations. Methods: We examined all patients with COPD who visited the University of Washington Medicine facilities between 2011 and 2019 and identified 278 candidate features. By performing secondary analysis on 43,576 University of Washington Medicine data instances from 2011 to 2019, we created a machine learning model to predict severe COPD exacerbations in the next year for patients with COPD. Results: The final model had an area under the receiver operating characteristic curve of 0.866. When using the top 9.99\% (752/7529) of the patients with the largest predicted risk to set the cutoff threshold for binary classification, the model gained an accuracy of 90.33\% (6801/7529), a sensitivity of 56.6\% (103/182), and a specificity of 91.17\% (6698/7347). Conclusions: Our model provided a more accurate prediction of severe COPD exacerbations in the next year compared with prior published models. After further improvement of its performance measures (eg, by adding features extracted from clinical notes), our model could be used in a decision support tool to guide the identification of patients with COPD and at high risk for care management to improve outcomes. International Registered Report Identifier (IRRID): RR2-10.2196/13783 ", doi="10.2196/28953", url="https://www.jmir.org/2022/1/e28953", url="http://www.ncbi.nlm.nih.gov/pubmed/34989686" } @Article{info:doi/10.2196/32635, author="Kumar, Bharat and Zetumer, Samuel and Swee, Melissa and Endelman, Keyser Ellen L. and Suneja, Manish and Davis, Benjamin", title="Reducing Delays in Diagnosing Primary Immunodeficiency Through the Development and Implementation of a Clinical Decision Support Tool: Protocol for a Quality Improvement Project", journal="JMIR Res Protoc", year="2022", month="Jan", day="4", volume="11", number="1", pages="e32635", keywords="immunology", keywords="clinical decision support", keywords="diagnostic decision-making", abstract="Background: Primary immunodeficiencies (PIs) are a set of heterogeneous chronic disorders characterized by immune dysfunction. They are diagnostically challenging because of their clinical heterogeneity, knowledge gaps among primary care physicians, and continuing shortages of clinically trained immunologists. As a result, patients with undiagnosed PIs are at increased risk for recurrent infections, cancers, and autoimmune diseases. Objective: The aim of this research is to develop and implement a clinical decision support (CDS) tool for the identification of underlying PIs. Methods: We will develop and implement a CDS tool for the identification of underlying PIs among patients who receive primary care through a health care provider at the University of Iowa Hospitals and Clinics. The CDS tool will function through an algorithm that is based on the Immune Deficiency Foundation's 10 Warning Signs for Primary Immunodeficiency. Over the course of a year, we will use Lean Six Sigma principles and the Define, Measure, Analyze, Improve, and Control (DMAIC) framework to guide the project. The primary measure is the number of newly diagnosed PI patients per month. Secondary measures include the following: (1) the number of new patients identified by the CDS as being at high risk for PI, (2) the number of new PI cases in which immunoglobulin replacement or rotating antibiotics are started, (3) the cost of evaluation of each patient identified by the CDS tool as being at high risk for PIs, (4) the number of new consults not diagnosed with a PI, and (5) patient satisfaction with the process of referral to the Immunology Clinic. Results: This study was determined to not be Human Subjects Research by the Institutional Review Board at the University of Iowa. Data collection will begin in August 2021. Conclusions: The development and implementation of a CDS tool is a promising approach to identifying patients with underlying PI. This protocol assesses whether such an approach will be able to achieve its objective of reducing diagnostic delays. The disciplined approach, using Lean Six Sigma and the DMAIC framework, will guide implementation to maximize opportunities for a successful intervention that meets the study's goals and objectives as well as to allow for replication and adaptation of these methods at other sites. International Registered Report Identifier (IRRID): PRR1-10.2196/32635 ", doi="10.2196/32635", url="https://www.researchprotocols.org/2022/1/e32635", url="http://www.ncbi.nlm.nih.gov/pubmed/34587114" } @Article{info:doi/10.2196/34415, author="Ko, Hoon and Huh, Jimi and Kim, Won Kyung and Chung, Heewon and Ko, Yousun and Kim, Keun Jai and Lee, Hee Jei and Lee, Jinseok", title="A Deep Residual U-Net Algorithm for Automatic Detection and Quantification of Ascites on Abdominopelvic Computed Tomography Images Acquired in the Emergency Department: Model Development and Validation", journal="J Med Internet Res", year="2022", month="Jan", day="3", volume="24", number="1", pages="e34415", keywords="ascites", keywords="computed tomography", keywords="deep residual U-Net", keywords="artificial intelligence", abstract="Background: Detection and quantification of intra-abdominal free fluid (ie, ascites) on computed tomography (CT) images are essential processes for finding emergent or urgent conditions in patients. In an emergency department, automatic detection and quantification of ascites will be beneficial. Objective: We aimed to develop an artificial intelligence (AI) algorithm for the automatic detection and quantification of ascites simultaneously using a single deep learning model (DLM). Methods: We developed 2D DLMs based on deep residual U-Net, U-Net, bidirectional U-Net, and recurrent residual U-Net (R2U-Net) algorithms to segment areas of ascites on abdominopelvic CT images. Based on segmentation results, the DLMs detected ascites by classifying CT images into ascites images and nonascites images. The AI algorithms were trained using 6337 CT images from 160 subjects (80 with ascites and 80 without ascites) and tested using 1635 CT images from 40 subjects (20 with ascites and 20 without ascites). The performance of the AI algorithms was evaluated for diagnostic accuracy of ascites detection and for segmentation accuracy of ascites areas. Of these DLMs, we proposed an AI algorithm with the best performance. Results: The segmentation accuracy was the highest for the deep residual U-Net model with a mean intersection over union (mIoU) value of 0.87, followed by U-Net, bidirectional U-Net, and R2U-Net models (mIoU values of 0.80, 0.77, and 0.67, respectively). The detection accuracy was the highest for the deep residual U-Net model (0.96), followed by U-Net, bidirectional U-Net, and R2U-Net models (0.90, 0.88, and 0.82, respectively). The deep residual U-Net model also achieved high sensitivity (0.96) and high specificity (0.96). Conclusions: We propose a deep residual U-Net--based AI algorithm for automatic detection and quantification of ascites on abdominopelvic CT scans, which provides excellent performance. ", doi="10.2196/34415", url="https://www.jmir.org/2022/1/e34415", url="http://www.ncbi.nlm.nih.gov/pubmed/34982041" } @Article{info:doi/10.2196/31038, author="Zolnoori, Maryam and Song, Jiyoun and McDonald, V. Margaret and Barr{\'o}n, Yolanda and Cato, Kenrick and Sockolow, Paulina and Sridharan, Sridevi and Onorato, Nicole and Bowles, H. Kathryn and Topaz, Maxim", title="Exploring Reasons for Delayed Start-of-Care Nursing Visits in Home Health Care: Algorithm Development and Data Science Study", journal="JMIR Nursing", year="2021", month="Dec", day="30", volume="4", number="4", pages="e31038", keywords="delayed start-of-care nursing visit", keywords="home healthcare services", keywords="natural language processing", keywords="nursing note", keywords="NLP", keywords="nursing", keywords="eHealth", keywords="home care", keywords="clinical notes", keywords="classification", keywords="clinical informatics", abstract="Background: Delayed start-of-care nursing visits in home health care (HHC) can result in negative outcomes, such as hospitalization. No previous studies have investigated why start-of-care HHC nursing visits are delayed, in part because most reasons for delayed visits are documented in free-text HHC nursing notes. Objective: The aims of this study were to (1) develop and test a natural language processing (NLP) algorithm that automatically identifies reasons for delayed visits in HHC free-text clinical notes and (2) describe reasons for delayed visits in a large patient sample. Methods: This study was conducted at the Visiting Nurse Service of New York (VNSNY). We examined data available at the VNSNY on all new episodes of care started in 2019 (N=48,497). An NLP algorithm was developed and tested to automatically identify and classify reasons for delayed visits. Results: The performance of the NLP algorithm was 0.8, 0.75, and 0.77 for precision, recall, and F-score, respectively. A total of one-third of HHC episodes (n=16,244) had delayed start-of-care HHC nursing visits. The most prevalent identified category of reasons for delayed start-of-care nursing visits was no answer at the door or phone (3728/8051, 46.3\%), followed by patient/family request to postpone or refuse some HHC services (n=2858, 35.5\%), and administrative or scheduling issues (n=1465, 18.2\%). In 40\% (n=16,244) of HHC episodes, 2 or more reasons were documented. Conclusions: To avoid critical delays in start-of-care nursing visits, HHC organizations might examine and improve ways to effectively address the reasons for delayed visits, using effective interventions, such as educating patients or caregivers on the importance of a timely nursing visit and improving patients' intake procedures. ", doi="10.2196/31038", url="https://nursing.jmir.org/2021/4/e31038", url="http://www.ncbi.nlm.nih.gov/pubmed/34967749" } @Article{info:doi/10.2196/27008, author="Yao, Li-Hung and Leung, Ka-Chun and Tsai, Chu-Lin and Huang, Chien-Hua and Fu, Li-Chen", title="A Novel Deep Learning--Based System for Triage in the Emergency Department Using Electronic Medical Records: Retrospective Cohort Study", journal="J Med Internet Res", year="2021", month="Dec", day="27", volume="23", number="12", pages="e27008", keywords="emergency department", keywords="triage system", keywords="deep learning", keywords="hospital admission", keywords="data to text", keywords="electronic health record", abstract="Background: Emergency department (ED) crowding has resulted in delayed patient treatment and has become a universal health care problem. Although a triage system, such as the 5-level emergency severity index, somewhat improves the process of ED treatment, it still heavily relies on the nurse's subjective judgment and triages too many patients to emergency severity index level 3 in current practice. Hence, a system that can help clinicians accurately triage a patient's condition is imperative. Objective: This study aims to develop a deep learning--based triage system using patients' ED electronic medical records to predict clinical outcomes after ED treatments. Methods: We conducted a retrospective study using data from an open data set from the National Hospital Ambulatory Medical Care Survey from 2012 to 2016 and data from a local data set from the National Taiwan University Hospital from 2009 to 2015. In this study, we transformed structured data into text form and used convolutional neural networks combined with recurrent neural networks and attention mechanisms to accomplish the classification task. We evaluated our performance using area under the receiver operating characteristic curve (AUROC). Results: A total of 118,602 patients from the National Hospital Ambulatory Medical Care Survey were included in this study for predicting hospitalization, and the accuracy and AUROC were 0.83 and 0.87, respectively. On the other hand, an external experiment was to use our own data set from the National Taiwan University Hospital that included 745,441 patients, where the accuracy and AUROC were similar, that is, 0.83 and 0.88, respectively. Moreover, to effectively evaluate the prediction quality of our proposed system, we also applied the model to other clinical outcomes, including mortality and admission to the intensive care unit, and the results showed that our proposed method was approximately 3\% to 5\% higher in accuracy than other conventional methods. Conclusions: Our proposed method achieved better performance than the traditional method, and its implementation is relatively easy, it includes commonly used variables, and it is better suited for real-world clinical settings. It is our future work to validate our novel deep learning--based triage algorithm with prospective clinical trials, and we hope to use it to guide resource allocation in a busy ED once the validation succeeds. ", doi="10.2196/27008", url="https://www.jmir.org/2021/12/e27008", url="http://www.ncbi.nlm.nih.gov/pubmed/34958305" } @Article{info:doi/10.2196/30805, author="Chua, Horng-Ruey and Zheng, Kaiping and Vathsala, Anantharaman and Ngiam, Kee-Yuan and Yap, Hui-Kim and Lu, Liangjian and Tiong, Ho-Yee and Mukhopadhyay, Amartya and MacLaren, Graeme and Lim, Shir-Lynn and Akalya, K. and Ooi, Beng-Chin", title="Health Care Analytics With Time-Invariant and Time-Variant Feature Importance to Predict Hospital-Acquired Acute Kidney Injury: Observational Longitudinal Study", journal="J Med Internet Res", year="2021", month="Dec", day="24", volume="23", number="12", pages="e30805", keywords="acute kidney injury", keywords="artificial intelligence", keywords="biomarkers", keywords="clinical deterioration", keywords="electronic health records", keywords="hospital medicine", keywords="machine learning", abstract="Background: Acute kidney injury (AKI) develops in 4\% of hospitalized patients and is a marker of clinical deterioration and nephrotoxicity. AKI onset is highly variable in hospitals, which makes it difficult to time biomarker assessment in all patients for preemptive care. Objective: The study sought to apply machine learning techniques to electronic health records and predict hospital-acquired AKI by a 48-hour lead time, with the aim to create an AKI surveillance algorithm that is deployable in real time. Methods: The data were sourced from 20,732 case admissions in 16,288 patients over 1 year in our institution. We enhanced the bidirectional recurrent neural network model with a novel time-invariant and time-variant aggregated module to capture important clinical features temporal to AKI in every patient. Time-series features included laboratory parameters that preceded a 48-hour prediction window before AKI onset; the latter's corresponding reference was the final in-hospital serum creatinine performed in case admissions without AKI episodes. Results: The cohort was of mean age 53 (SD 25) years, of whom 29\%, 12\%, 12\%, and 53\% had diabetes, ischemic heart disease, cancers, and baseline eGFR <90 mL/min/1.73 m2, respectively. There were 911 AKI episodes in 869 patients. We derived and validated an algorithm in the testing dataset with an AUROC of 0.81 (0.78-0.85) for predicting AKI. At a 15\% prediction threshold, our model generated 699 AKI alerts with 2 false positives for every true AKI and predicted 26\% of AKIs. A lowered 5\% prediction threshold improved the recall to 60\% but generated 3746 AKI alerts with 6 false positives for every true AKI. Representative interpretation results produced by our model alluded to the top-ranked features that predicted AKI that could be categorized in association with sepsis, acute coronary syndrome, nephrotoxicity, or multiorgan injury, specific to every case at risk. Conclusions: We generated an accurate algorithm from electronic health records through machine learning that predicted AKI by a lead time of at least 48 hours. The prediction threshold could be adjusted during deployment to optimize recall and minimize alert fatigue, while its precision could potentially be augmented by targeted AKI biomarker assessment in the high-risk cohort identified. ", doi="10.2196/30805", url="https://www.jmir.org/2021/12/e30805", url="http://www.ncbi.nlm.nih.gov/pubmed/34951595" } @Article{info:doi/10.2196/32427, author="Fedoruk, Benjamin and Nelson, Harrison and Frost, Russell and Fucile Ladouceur, Kai", title="The Plebeian Algorithm: A Democratic Approach to Censorship and Moderation", journal="JMIR Form Res", year="2021", month="Dec", day="21", volume="5", number="12", pages="e32427", keywords="infodemiology", keywords="misinformation", keywords="algorithm", keywords="social media", keywords="plebeian", keywords="natural language processing", keywords="sentiment analysis", keywords="sentiment", keywords="trust", keywords="decision-making", keywords="COVID-19", abstract="Background: The infodemic created by the COVID-19 pandemic has created several societal issues, including a rise in distrust between the public and health experts, and even a refusal of some to accept vaccination; some sources suggest that 1 in 4 Americans will refuse the vaccine. This social concern can be traced to the level of digitization today, particularly in the form of social media. Objective: The goal of the research is to determine an optimal social media algorithm, one which is able to reduce the number of cases of misinformation and which also ensures that certain individual freedoms (eg, the freedom of expression) are maintained. After performing the analysis described herein, an algorithm was abstracted. The discovery of a set of abstract aspects of an optimal social media algorithm was the purpose of the study. Methods: As social media was the most significant contributing factor to the spread of misinformation, the team decided to examine infodemiology across various text-based platforms (Twitter, 4chan, Reddit, Parler, Facebook, and YouTube). This was done by using sentiment analysis to compare general posts with key terms flagged as misinformation (all of which concern COVID-19) to determine their verity. In gathering the data sets, both application programming interfaces (installed using Python's pip) and pre-existing data compiled by standard scientific third parties were used. Results: The sentiment can be described using bimodal distributions for each platform, with a positive and negative peak, as well as a skewness. It was found that in some cases, misinforming posts can have up to 92.5\% more negative sentiment skew compared to accurate posts. Conclusions: From this, the novel Plebeian Algorithm is proposed, which uses sentiment analysis and post popularity as metrics to flag a post as misinformation. This algorithm diverges from that of the status quo, as the Plebeian Algorithm uses a democratic process to detect and remove misinformation. A method was constructed in which content deemed as misinformation to be removed from the platform is determined by a randomly selected jury of anonymous users. This not only prevents these types of infodemics but also guarantees a more democratic way of using social media that is beneficial for repairing social trust and encouraging the public's evidence-informed decision-making. ", doi="10.2196/32427", url="https://formative.jmir.org/2021/12/e32427", url="http://www.ncbi.nlm.nih.gov/pubmed/34854812" } @Article{info:doi/10.2196/31232, author="Pronk, Yvette and Pilot, Peter and van der Weegen, Walter and Brinkman, Justus-Martijn and Schreurs, Willem Berend", title="A Patient-Reported Outcome Tool to Triage Total Hip Arthroplasty Patients to Hospital or Video Consultation: Pilot Study With Expert Panels and a Cohort of 1228 Patients", journal="JMIR Form Res", year="2021", month="Dec", day="20", volume="5", number="12", pages="e31232", keywords="PROMs", keywords="total hip arthroplasty", keywords="triage tool", keywords="video consultation", keywords="telemedicine", keywords="digital transformation", abstract="Background: The digital transformation in health care has been accelerated by the COVID-19 pandemic. Video consultation has become the alternative for hospital consultation. It remains unknown how to select patients suitable for video consultation. Objective: This study aimed to develop a tool based on patient-reported outcomes (PROs) to triage total hip arthroplasty (THA) patients to hospital or video consultation. Methods: A pilot study with expert panels and a retrospective cohort with prospectively collected data from 1228 THA patients was executed. The primary outcome was a PRO triage tool to allocate THA patients to hospital or video consultation 6 weeks postoperatively. Expert panels defined the criteria and selected the patient-reported outcome measure (PROM) questions including thresholds. Data were divided into training and test cohorts. Distribution, floor effect, correlation, responsiveness, PRO patient journey, and homogeneity of the selected questions were investigated in the training cohort. The test cohort was used to provide an unbiased evaluation of the final triage tool. Results: The expert panels selected moderate or severe pain and using 2 crutches as the triage tool criteria. PROM questions included in the final triage tool were numeric rating scale (NRS) pain during activity, 3-level version of the EuroQol 5 dimensions (EQ-5D-3L) questions 1 and 4, and Oxford Hip Score (OHS) questions 6, 8, and 12. Of the training cohort, 201 (201/703, 28.6\%) patients needed a hospital consultation, which was statistically equal to the 150 (150/463, 32.4\%) patients in the test cohort who needed a hospital consultation (P=.19). Conclusions: A PRO triage tool based on moderate or severe pain and using 2 crutches was developed. Around 70\% of THA patients could safely have a video consultation, and 30\% needed a hospital consultation 6 weeks postoperatively. This tool is promising for selecting patients for video consultation while using an existing PROM infrastructure. ", doi="10.2196/31232", url="https://formative.jmir.org/2021/12/e31232", url="http://www.ncbi.nlm.nih.gov/pubmed/34931989" } @Article{info:doi/10.2196/30970, author="Paris, Nicolas and Lamer, Antoine and Parrot, Adrien", title="Transformation and Evaluation of the MIMIC Database in the OMOP Common Data Model: Development and Usability Study", journal="JMIR Med Inform", year="2021", month="Dec", day="14", volume="9", number="12", pages="e30970", keywords="data reuse", keywords="open data", keywords="OMOP", keywords="common data model", keywords="critical care", keywords="machine learning", keywords="big data", keywords="health informatics", keywords="health data", keywords="health database", keywords="electronic health records", keywords="open access database", keywords="digital health", keywords="intensive care", keywords="health care", abstract="Background: In the era of big data, the intensive care unit (ICU) is likely to benefit from real-time computer analysis and modeling based on close patient monitoring and electronic health record data. The Medical Information Mart for Intensive Care (MIMIC) is the first open access database in the ICU domain. Many studies have shown that common data models (CDMs) improve database searching by allowing code, tools, and experience to be shared. The Observational Medical Outcomes Partnership (OMOP) CDM is spreading all over the world. Objective: The objective was to transform MIMIC into an OMOP database and to evaluate the benefits of this transformation for analysts. Methods: We transformed MIMIC (version 1.4.21) into OMOP format (version 5.3.3.1) through semantic and structural mapping. The structural mapping aimed at moving the MIMIC data into the right place in OMOP, with some data transformations. The mapping was divided into 3 phases: conception, implementation, and evaluation. The conceptual mapping aimed at aligning the MIMIC local terminologies to OMOP's standard ones. It consisted of 3 phases: integration, alignment, and evaluation. A documented, tested, versioned, exemplified, and open repository was set up to support the transformation and improvement of the MIMIC community's source code. The resulting data set was evaluated over a 48-hour datathon. Results: With an investment of 2 people for 500 hours, 64\% of the data items of the 26 MIMIC tables were standardized into the OMOP CDM and 78\% of the source concepts mapped to reference terminologies. The model proved its ability to support community contributions and was well received during the datathon, with 160 participants and 15,000 requests executed with a maximum duration of 1 minute. Conclusions: The resulting MIMIC-OMOP data set is the first MIMIC-OMOP data set available free of charge with real disidentified data ready for replicable intensive care research. This approach can be generalized to any medical field. ", doi="10.2196/30970", url="https://medinform.jmir.org/2021/12/e30970", url="http://www.ncbi.nlm.nih.gov/pubmed/34904958" } @Article{info:doi/10.2196/27363, author="Maile, Howard and Li, Olivia Ji-Peng and Gore, Daniel and Leucci, Marcello and Mulholland, Padraig and Hau, Scott and Szabo, Anita and Moghul, Ismail and Balaskas, Konstantinos and Fujinami, Kaoru and Hysi, Pirro and Davidson, Alice and Liskova, Petra and Hardcastle, Alison and Tuft, Stephen and Pontikos, Nikolas", title="Machine Learning Algorithms to Detect Subclinical Keratoconus: Systematic Review", journal="JMIR Med Inform", year="2021", month="Dec", day="13", volume="9", number="12", pages="e27363", keywords="artificial intelligence", keywords="machine learning", keywords="cornea", keywords="keratoconus", keywords="corneal tomography", keywords="subclinical", keywords="corneal imaging", keywords="decision support systems", keywords="corneal disease", keywords="keratometry", abstract="Background: Keratoconus is a disorder characterized by progressive thinning and distortion of the cornea. If detected at an early stage, corneal collagen cross-linking can prevent disease progression and further visual loss. Although advanced forms are easily detected, reliable identification of subclinical disease can be problematic. Several different machine learning algorithms have been used to improve the detection of subclinical keratoconus based on the analysis of multiple types of clinical measures, such as corneal imaging, aberrometry, or biomechanical measurements. Objective: The aim of this study is to survey and critically evaluate the literature on the algorithmic detection of subclinical keratoconus and equivalent definitions. Methods: For this systematic review, we performed a structured search of the following databases: MEDLINE, Embase, and Web of Science and Cochrane Library from January 1, 2010, to October 31, 2020. We included all full-text studies that have used algorithms for the detection of subclinical keratoconus and excluded studies that did not perform validation. This systematic review followed the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) recommendations. Results: We compared the measured parameters and the design of the machine learning algorithms reported in 26 papers that met the inclusion criteria. All salient information required for detailed comparison, including diagnostic criteria, demographic data, sample size, acquisition system, validation details, parameter inputs, machine learning algorithm, and key results are reported in this study. Conclusions: Machine learning has the potential to improve the detection of subclinical keratoconus or early keratoconus in routine ophthalmic practice. Currently, there is no consensus regarding the corneal parameters that should be included for assessment and the optimal design for the machine learning algorithm. We have identified avenues for further research to improve early detection and stratification of patients for early treatment to prevent disease progression. ", doi="10.2196/27363", url="https://medinform.jmir.org/2021/12/e27363", url="http://www.ncbi.nlm.nih.gov/pubmed/34898463" } @Article{info:doi/10.2196/23440, author="Alanazi, M. Eman and Abdou, Aalaa and Luo, Jake", title="Predicting Risk of Stroke From Lab Tests Using Machine Learning Algorithms: Development and Evaluation of Prediction Models", journal="JMIR Form Res", year="2021", month="Dec", day="2", volume="5", number="12", pages="e23440", keywords="stroke", keywords="lab tests", keywords="machine learning technology", keywords="predictive analytics", abstract="Background: Stroke, a cerebrovascular disease, is one of the major causes of death. It causes significant health and financial burdens for both patients and health care systems. One of the important risk factors for stroke is health-related behavior, which is becoming an increasingly important focus of prevention. Many machine learning models have been built to predict the risk of stroke or to automatically diagnose stroke, using predictors such as lifestyle factors or radiological imaging. However, there have been no models built using data from lab tests. Objective: The aim of this study was to apply computational methods using machine learning techniques to predict stroke from lab test data. Methods: We used the National Health and Nutrition Examination Survey data sets with three different data selection methods (ie, without data resampling, with data imputation, and with data resampling) to develop predictive models. We used four machine learning classifiers and six performance measures to evaluate the performance of the models. Results: We found that accurate and sensitive machine learning models can be created to predict stroke from lab test data. Our results show that the data resampling approach performed the best compared to the other two data selection techniques. Prediction with the random forest algorithm, which was the best algorithm tested, achieved an accuracy, sensitivity, specificity, positive predictive value, negative predictive value, and area under the curve of 0.96, 0.97, 0.96, 0.75, 0.99, and 0.97, respectively, when all of the attributes were used. Conclusions: The predictive model, built using data from lab tests, was easy to use and had high accuracy. In future studies, we aim to use data that reflect different types of stroke and to explore the data to build a prediction model for each type. ", doi="10.2196/23440", url="https://formative.jmir.org/2021/12/e23440", url="http://www.ncbi.nlm.nih.gov/pubmed/34860663" } @Article{info:doi/10.2196/30304, author="Majam, Mohammed and Phatsoane, Mothepane and Hanna, Keith and Faul, Charles and Arora, Lovkesh and Makthal, Sarvesh and Kumar, Akhil and Jois, Kashyap and Lalla-Edward, Tresha Samanta", title="Utility of a Machine-Guided Tool for Assessing Risk Behavior Associated With Contracting HIV in Three Sites in South Africa: Protocol for an In-Field Evaluation", journal="JMIR Res Protoc", year="2021", month="Dec", day="2", volume="10", number="12", pages="e30304", keywords="machine learning", keywords="predictive risk", keywords="modeling", keywords="algorithm", keywords="HIV status", keywords="HIV", keywords="risk assessment", keywords="South Africa", abstract="Background: Mobile technology has helped to advance health programs, and studies have shown that an automated risk prediction model can successfully be used to identify patients who exhibit a high probable risk of contracting human immunodeficiency virus (HIV). A machine-guided tool is an algorithm that takes a set of subjective and objective answers from a simple questionnaire and computes an HIV risk assessment score. Objective: The primary objective of this study is to establish that machine learning can be used to develop machine-guided tools and give us a deeper statistical understanding of the correlation between certain behavioral patterns and HIV. Methods: In total, 200 HIV-negative adult individuals across three South African study sites each (two semirural and one urban) will be recruited. Study processes will include (1) completing a series of questions (demographic, sexual behavior and history, personal, lifestyle, and symptoms) on an application system, unaided (assistance will only be provided upon user request); (2) two HIV tests (one per study visit) being performed by a nurse/counselor according to South African national guidelines (to evaluate the prediction accuracy of the tool); and (3) communicating test results and completing a user experience survey questionnaire. The output metrics for this study will be computed by using the participants' risk assessment scores as ``predictions'' and the test results as the ``ground truth.'' Analyses will be completed after visit 1 and then again after visit 2. All risk assessment scores will be used to calculate the reliability of the machine-guided tool. Results: Ethical approval was received from the University of Witwatersrand Human Research Ethics Committee (HREC; ethics reference no. 200312) on August 20, 2020. This study is ongoing. Data collection has commenced and is expected to be completed in the second half of 2021. We will report on the machine-guided tool's performance and usability, together with user satisfaction and recommendations for improvement. Conclusions: Machine-guided risk assessment tools can provide a cost-effective alternative to large-scale HIV screening and help in providing targeted counseling and testing to prevent the spread of HIV. Trial Registration: South African National Clinical Trial Registry DOH-27-042021-679; https://sanctr.samrc.ac.za/TrialDisplay.aspx?TrialID=5545 International Registered Report Identifier (IRRID): DERR1-10.2196/30304 ", doi="10.2196/30304", url="https://www.researchprotocols.org/2021/12/e30304", url="http://www.ncbi.nlm.nih.gov/pubmed/34860679" } @Article{info:doi/10.2196/29398, author="Abdelkader, Wael and Navarro, Tamara and Parrish, Rick and Cotoi, Chris and Germini, Federico and Linkins, Lori-Ann and Iorio, Alfonso and Haynes, Brian R. and Ananiadou, Sophia and Chu, Lingyang and Lokker, Cynthia", title="A Deep Learning Approach to Refine the Identification of High-Quality Clinical Research Articles From the Biomedical Literature: Protocol for Algorithm Development and Validation", journal="JMIR Res Protoc", year="2021", month="Nov", day="29", volume="10", number="11", pages="e29398", keywords="bioinformatics", keywords="machine learning", keywords="evidence-based medicine", keywords="literature retrieval", keywords="medical informatics", keywords="natural language processing", keywords="NLP", keywords="biomedical", keywords="literature", keywords="literature surveillance", keywords="model development", abstract="Background: A barrier to practicing evidence-based medicine is the rapidly increasing body of biomedical literature. Use of method terms to limit the search can help reduce the burden of screening articles for clinical relevance; however, such terms are limited by their partial dependence on indexing terms and usually produce low precision, especially when high sensitivity is required. Machine learning has been applied to the identification of high-quality literature with the potential to achieve high precision without sacrificing sensitivity. The use of artificial intelligence has shown promise to improve the efficiency of identifying sound evidence. Objective: The primary objective of this research is to derive and validate deep learning machine models using iterations of Bidirectional Encoder Representations from Transformers (BERT) to retrieve high-quality, high-relevance evidence for clinical consideration from the biomedical literature. Methods: Using the HuggingFace Transformers library, we will experiment with variations of BERT models, including BERT, BioBERT, BlueBERT, and PubMedBERT, to determine which have the best performance in article identification based on quality criteria. Our experiments will utilize a large data set of over 150,000 PubMed citations from 2012 to 2020 that have been manually labeled based on their methodological rigor for clinical use. We will evaluate and report on the performance of the classifiers in categorizing articles based on their likelihood of meeting quality criteria. We will report fine-tuning hyperparameters for each model, as well as their performance metrics, including recall (sensitivity), specificity, precision, accuracy, F-score, the number of articles that need to be read before finding one that is positive (meets criteria), and classification probability scores. Results: Initial model development is underway, with further development planned for early 2022. Performance testing is expected to star in February 2022. Results will be published in 2022. Conclusions: The experiments will aim to improve the precision of retrieving high-quality articles by applying a machine learning classifier to PubMed searching. International Registered Report Identifier (IRRID): DERR1-10.2196/29398 ", doi="10.2196/29398", url="https://www.researchprotocols.org/2021/11/e29398", url="http://www.ncbi.nlm.nih.gov/pubmed/34847061" } @Article{info:doi/10.2196/31366, author="Tan, Yi Ming and Goh, Enhui Charlene and Tan, Hon Hee", title="Contemporary English Pain Descriptors as Detected on Social Media Using Artificial Intelligence and Emotion Analytics Algorithms: Cross-sectional Study", journal="JMIR Form Res", year="2021", month="Nov", day="25", volume="5", number="11", pages="e31366", keywords="pain descriptors", keywords="social media", keywords="artificial intelligence", keywords="emotion analytics", keywords="McGill Pain Questionnaire", abstract="Background: Pain description is fundamental to health care. The McGill Pain Questionnaire (MPQ) has been validated as a tool for the multidimensional measurement of pain; however, its use relies heavily on language proficiency. Although the MPQ has remained unchanged since its inception, the English language has evolved significantly since then. The advent of the internet and social media has allowed for the generation of a staggering amount of publicly available data, allowing linguistic analysis at a scale never seen before. Objective: The aim of this study is to use social media data to examine the relevance of pain descriptors from the existing MPQ, identify novel contemporary English descriptors for pain among users of social media, and suggest a modification for a new MPQ for future validation and testing. Methods: All posts from social media platforms from January 1, 2019, to December 31, 2019, were extracted. Artificial intelligence and emotion analytics algorithms (Crystalace and CrystalFeel) were used to measure the emotional properties of the text, including sarcasm, anger, fear, sadness, joy, and valence. Word2Vec was used to identify new pain descriptors associated with the original descriptors from the MPQ. Analysis of count and pain intensity formed the basis for proposing new pain descriptors and determining the order of pain descriptors within each subclass. Results: A total of 118 new associated words were found via Word2Vec. Of these 118 words, 49 (41.5\%) words had a count of at least 110, which corresponded to the count of the bottom 10\% (8/78) of the original MPQ pain descriptors. The count and intensity of pain descriptors were used to formulate the inclusion criteria for a new pain questionnaire. For the suggested new pain questionnaire, 11 existing pain descriptors were removed, 13 new descriptors were added to existing subclasses, and a new Psychological subclass comprising 9 descriptors was added. Conclusions: This study presents a novel methodology using social media data to identify new pain descriptors and can be repeated at regular intervals to ensure the relevance of pain questionnaires. The original MPQ contains several potentially outdated pain descriptors and is inadequate for reporting the psychological aspects of pain. Further research is needed to examine the reliability and validity of the revised MPQ. ", doi="10.2196/31366", url="https://formative.jmir.org/2021/11/e31366", url="http://www.ncbi.nlm.nih.gov/pubmed/34842554" } @Article{info:doi/10.2196/28620, author="May, B. Sarah and Giordano, P. Thomas and Gottlieb, Assaf", title="A Phenotyping Algorithm to Identify People With HIV in Electronic Health Record Data (HIV-Phen): Development and Evaluation Study", journal="JMIR Form Res", year="2021", month="Nov", day="25", volume="5", number="11", pages="e28620", keywords="phenotyping", keywords="algorithms", keywords="electronic health records", keywords="people with HIV", keywords="cohort identification", abstract="Background: Identification of people with HIV from electronic health record (EHR) data is an essential first step in the study of important HIV outcomes, such as risk assessment. This task has been historically performed via manual chart review, but the increased availability of large clinical data sets has led to the emergence of phenotyping algorithms to automate this process. Existing algorithms for identifying people with HIV rely on a combination of International Classification of Disease codes and laboratory tests or closely mimic clinical testing guidelines for HIV diagnosis. However, we found that existing algorithms in the literature missed a significant proportion of people with HIV in our data. Objective: The aim of this study is to develop and evaluate HIV-Phen, an updated criteria-based HIV phenotyping algorithm. Methods: We developed an algorithm using HIV-specific laboratory tests and medications and compared it with previously published algorithms in national and local data sets to identify cohorts of people with HIV. Cohort demographics were compared with those reported in the national and local surveillance data. Chart reviews were performed on a subsample of patients from the local database to calculate the sensitivity, specificity, positive predictive value, negative predictive value, and accuracy of the algorithm. Results: Our new algorithm identified substantially more people with HIV in both national (up to an 85.75\% increase) and local (up to an 83.20\% increase) EHR databases than the previously published algorithms. The demographic characteristics of people with HIV identified using our algorithm were similar to those reported in national and local HIV surveillance data. Our algorithm demonstrated improved sensitivity over existing algorithms (98\% vs 56\%-92\%) while maintaining a similar overall accuracy (96\% vs 80\%-96\%). Conclusions: We developed and evaluated an updated criteria-based phenotyping algorithm for identifying people with HIV in EHR data that demonstrates improved sensitivity over existing algorithms. ", doi="10.2196/28620", url="https://formative.jmir.org/2021/11/e28620", url="http://www.ncbi.nlm.nih.gov/pubmed/34842532" } @Article{info:doi/10.2196/26993, author="Bohlmann, Aaron and Mostafa, Javed and Kumar, Manish", title="Machine Learning and Medication Adherence: Scoping Review", journal="JMIRx Med", year="2021", month="Nov", day="24", volume="2", number="4", pages="e26993", keywords="machine learning", keywords="medication adherence", keywords="adherence monitoring", keywords="adherence prediction", keywords="medication compliance", keywords="health technology", abstract="Background: This is the first scoping review to focus broadly on the topics of machine learning and medication adherence. Objective: This review aims to categorize, summarize, and analyze literature focused on using machine learning for actions related to medication adherence. Methods: PubMed, Scopus, ACM Digital Library, IEEE, and Web of Science were searched to find works that meet the inclusion criteria. After full-text review, 43 works were included in the final analysis. Information of interest was systematically charted before inclusion in the final draft. Studies were placed into natural categories for additional analysis dependent upon the combination of actions related to medication adherence. The protocol for this scoping review was created using the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) guidelines. Results: Publications focused on predicting medication adherence have uncovered 20 strong predictors that were significant in two or more studies. A total of 13 studies that predicted medication adherence used either self-reported questionnaires or pharmacy claims data to determine medication adherence status. In addition, 13 studies that predicted medication adherence did so using either logistic regression, artificial neural networks, random forest, or support vector machines. Of the 15 studies that predicted medication adherence, 6 reported predictor accuracy, the lowest of which was 77.6\%. Of 13 monitoring systems, 12 determined medication administration using medication container sensors or sensors in consumer electronics, like smartwatches or smartphones. A total of 11 monitoring systems used logistic regression, artificial neural networks, support vector machines, or random forest algorithms to determine medication administration. The 4 systems that monitored inhaler administration reported a classification accuracy of 93.75\% or higher. The 2 systems that monitored medication status in patients with Parkinson disease reported a classification accuracy of 78\% or higher. A total of 3 studies monitored medication administration using only smartwatch sensors and reported a classification accuracy of 78.6\% or higher. Two systems that provided context-aware medication reminders helped patients to achieve an adherence level of 92\% or higher. Two conversational artificial intelligence reminder systems significantly improved adherence rates when compared against traditional reminder systems. Conclusions: Creation of systems that accurately predict medication adherence across multiple data sets may be possible due to predictors remaining strong across multiple studies. Higher quality measures of adherence should be adopted when possible so that prediction algorithms are based on accurate information. Currently, medication adherence can be predicted with a good level of accuracy, potentially allowing for the development of interventions aimed at preventing nonadherence. Monitoring systems that track inhaler use currently classify inhaler-related actions with an excellent level of accuracy, allowing for tracking of adherence and potentially proper inhaler technique. Systems that monitor medication states in patients with Parkinson disease can currently achieve a good level of classification accuracy and have the potential to inform medication therapy changes in the future. Medication administration monitoring systems that only use motion sensors in smartwatches can currently achieve a good level of classification accuracy but only when differentiating between a small number of possible activities. Context-aware reminder systems can help patients achieve high levels of medication adherence but are also intrusive, which may not be acceptable to users. Conversational artificial intelligence reminder systems can significantly improve adherence. ", doi="10.2196/26993", url="https://med.jmirx.org/2021/4/e26993", url="http://www.ncbi.nlm.nih.gov/pubmed/37725549" } @Article{info:doi/10.2196/29749, author="Jan, Zainab and AI-Ansari, Noor and Mousa, Osama and Abd-alrazaq, Alaa and Ahmed, Arfan and Alam, Tanvir and Househ, Mowafa", title="The Role of Machine Learning in Diagnosing Bipolar Disorder: Scoping Review", journal="J Med Internet Res", year="2021", month="Nov", day="19", volume="23", number="11", pages="e29749", keywords="machine learning", keywords="bipolar disorder", keywords="diagnosis", keywords="support vector machine", keywords="clinical data", keywords="mental health", keywords="scoping review", abstract="Background: Bipolar disorder (BD) is the 10th most common cause of frailty in young individuals and has triggered morbidity and mortality worldwide. Patients with BD have a life expectancy 9 to 17 years lower than that of normal people. BD is a predominant mental disorder, but it can be misdiagnosed as depressive disorder, which leads to difficulties in treating affected patients. Approximately 60\% of patients with BD are treated for depression. However, machine learning provides advanced skills and techniques for better diagnosis of BD. Objective: This review aims to explore the machine learning algorithms used for the detection and diagnosis of bipolar disorder and its subtypes. Methods: The study protocol adopted the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) guidelines. We explored 3 databases, namely Google Scholar, ScienceDirect, and PubMed. To enhance the search, we performed backward screening of all the references of the included studies. Based on the predefined selection criteria, 2 levels of screening were performed: title and abstract review, and full review of the articles that met the inclusion criteria. Data extraction was performed independently by all investigators. To synthesize the extracted data, a narrative synthesis approach was followed. Results: We retrieved 573 potential articles were from the 3 databases. After preprocessing and screening, only 33 articles that met our inclusion criteria were identified. The most commonly used data belonged to the clinical category (19, 58\%). We identified different machine learning models used in the selected studies, including classification models (18, 55\%), regression models (5, 16\%), model-based clustering methods (2, 6\%), natural language processing (1, 3\%), clustering algorithms (1, 3\%), and deep learning--based models (3, 9\%). Magnetic resonance imaging data were most commonly used for classifying bipolar patients compared to other groups (11, 34\%), whereas microarray expression data sets and genomic data were the least commonly used. The maximum ratio of accuracy was 98\%, whereas the minimum accuracy range was 64\%. Conclusions: This scoping review provides an overview of recent studies based on machine learning models used to diagnose patients with BD regardless of their demographics or if they were compared to patients with psychiatric diagnoses. Further research can be conducted to provide clinical decision support in the health industry. ", doi="10.2196/29749", url="https://www.jmir.org/2021/11/e29749", url="http://www.ncbi.nlm.nih.gov/pubmed/34806996" } @Article{info:doi/10.2196/30079, author="Wang, Huan and Wu, Wei and Han, Chunxia and Zheng, Jiaqi and Cai, Xinyu and Chang, Shimin and Shi, Junlong and Xu, Nan and Ai, Zisheng", title="Prediction Model of Osteonecrosis of the Femoral Head After Femoral Neck Fracture: Machine Learning--Based Development and Validation Study", journal="JMIR Med Inform", year="2021", month="Nov", day="19", volume="9", number="11", pages="e30079", keywords="femoral neck fracture", keywords="osteonecrosis of the femoral head", keywords="machine learning", keywords="interpretability", abstract="Background: The absolute number of femoral neck fractures (FNFs) is increasing; however, the prediction of traumatic femoral head necrosis remains difficult. Machine learning algorithms have the potential to be superior to traditional prediction methods for the prediction of traumatic femoral head necrosis. Objective: The aim of this study is to use machine learning to construct a model for the analysis of risk factors and prediction of osteonecrosis of the femoral head (ONFH) in patients with FNF after internal fixation. Methods: We retrospectively collected preoperative, intraoperative, and postoperative clinical data of patients with FNF in 4 hospitals in Shanghai and followed up the patients for more than 2.5 years. A total of 259 patients with 43 variables were included in the study. The data were randomly divided into a training set (181/259, 69.8\%) and a validation set (78/259, 30.1\%). External data (n=376) were obtained from a retrospective cohort study of patients with FNF in 3 other hospitals. Least absolute shrinkage and selection operator regression and the support vector machine algorithm were used for variable selection. Logistic regression, random forest, support vector machine, and eXtreme Gradient Boosting (XGBoost) were used to develop the model on the training set. The validation set was used to tune the model hyperparameters to determine the final prediction model, and the external data were used to compare and evaluate the model performance. We compared the accuracy, discrimination, and calibration of the models to identify the best machine learning algorithm for predicting ONFH. Shapley additive explanations and local interpretable model-agnostic explanations were used to determine the interpretability of the black box model. Results: A total of 11 variables were selected for the models. The XGBoost model performed best on the validation set and external data. The accuracy, sensitivity, and area under the receiver operating characteristic curve of the model on the validation set were 0.987, 0.929, and 0.992, respectively. The accuracy, sensitivity, specificity, and area under the receiver operating characteristic curve of the model on the external data were 0.907, 0.807, 0.935, and 0.933, respectively, and the log-loss was 0.279. The calibration curve demonstrated good agreement between the predicted probability and actual risk. The interpretability of the features and individual predictions were realized using the Shapley additive explanations and local interpretable model-agnostic explanations algorithms. In addition, the XGBoost model was translated into a self-made web-based risk calculator to estimate an individual's probability of ONFH. Conclusions: Machine learning performs well in predicting ONFH after internal fixation of FNF. The 6-variable XGBoost model predicted the risk of ONFH well and had good generalization ability on the external data, which can be used for the clinical prediction of ONFH after internal fixation of FNF. ", doi="10.2196/30079", url="https://medinform.jmir.org/2021/11/e30079", url="http://www.ncbi.nlm.nih.gov/pubmed/34806984" } @Article{info:doi/10.2196/30066, author="Kim, Taewoo and Lee, Hyun Dong and Park, Eun-Kee and Choi, Sanghun", title="Deep Learning Techniques for Fatty Liver Using Multi-View Ultrasound Images Scanned by Different Scanners: Development and Validation Study", journal="JMIR Med Inform", year="2021", month="Nov", day="18", volume="9", number="11", pages="e30066", keywords="fatty liver", keywords="deep learning", keywords="transfer learning", keywords="classification", keywords="regression", keywords="magnetic resonance imaging--proton density fat fraction", keywords="multi-view ultrasound images", keywords="artificial intelligence", keywords="machine imaging", keywords="imaging", keywords="informatics", keywords="fatty liver disease", keywords="detection", keywords="diagnosis", abstract="Background: Fat fraction values obtained from magnetic resonance imaging (MRI) can be used to obtain an accurate diagnosis of fatty liver diseases. However, MRI is expensive and cannot be performed for everyone. Objective: In this study, we aim to develop multi-view ultrasound image--based convolutional deep learning models to detect fatty liver disease and yield fat fraction values. Methods: We extracted 90 ultrasound images of the right intercostal view and 90 ultrasound images of the right intercostal view containing the right renal cortex from 39 cases of fatty liver (MRI--proton density fat fraction [MRI--PDFF] ? 5\%) and 51 normal subjects (MRI--PDFF < 5\%), with MRI--PDFF values obtained from Good Gang-An Hospital. We obtained combined liver and kidney-liver (CLKL) images to train the deep learning models and developed classification and regression models based on the VGG19 model to classify fatty liver disease and yield fat fraction values. We employed the data augmentation techniques such as flip and rotation to prevent the deep learning model from overfitting. We determined the deep learning model with performance metrics such as accuracy, sensitivity, specificity, and coefficient of determination (R2). Results: In demographic information, all metrics such as age and sex were similar between the two groups---fatty liver disease and normal subjects. In classification, the model trained on CLKL images achieved 80.1\% accuracy, 86.2\% precision, and 80.5\% specificity to detect fatty liver disease. In regression, the predicted fat fraction values of the regression model trained on CLKL images correlated with MRI--PDFF values (R2=0.633), indicating that the predicted fat fraction values were moderately estimated. Conclusions: With deep learning techniques and multi-view ultrasound images, it is potentially possible to replace MRI--PDFF values with deep learning predictions for detecting fatty liver disease and estimating fat fraction values. ", doi="10.2196/30066", url="https://medinform.jmir.org/2021/11/e30066", url="http://www.ncbi.nlm.nih.gov/pubmed/34792476" } @Article{info:doi/10.2196/28090, author="Garcia-Rudolph, Alejandro and Opisso, Eloy and Tormos, M. Jose and Madai, Istvan Vince and Frey, Dietmar and Becerra, Helard and Kelleher, D. John and Bernabeu Guitart, Montserrat and L{\'o}pez, Jaume", title="Toward Personalized Web-Based Cognitive Rehabilitation for Patients With Ischemic Stroke: Elo Rating Approach", journal="JMIR Med Inform", year="2021", month="Nov", day="10", volume="9", number="11", pages="e28090", keywords="cognitive rehabilitation", keywords="Elo rating", keywords="predictors", keywords="stroke rehabilitation", keywords="web-based tasks", abstract="Background: Stroke is a worldwide cause of disability; 40\% of stroke survivors sustain cognitive impairments, most of them following inpatient rehabilitation at specialized clinical centers. Web-based cognitive rehabilitation tasks are extensively used in clinical settings. The impact of task execution depends on the ratio between the skills of the treated patient and the challenges imposed by the task itself. Thus, treatment personalization requires a trade-off between patients' skills and task difficulties, which is still an open issue. In this study, we propose Elo ratings to support clinicians in tasks assignations and representing patients' skills to optimize rehabilitation outcomes. Objective: This study aims to stratify patients with ischemic stroke at an early stage of rehabilitation into three levels according to their Elo rating; to show the relationships between the Elo rating levels, task difficulty levels, and rehabilitation outcomes; and to determine if the Elo rating obtained at early stages of rehabilitation is a significant predictor of rehabilitation outcomes. Methods: The PlayerRatings R library was used to obtain the Elo rating for each patient. Working memory was assessed using the DIGITS subtest of the Barcelona test, and the Rey Auditory Verbal Memory Test (RAVLT) was used to assess verbal memory. Three subtests of RAVLT were used: RAVLT learning (RAVLT075), free-recall memory (RAVLT015), and recognition (RAVLT015R). Memory predictors were identified using forward stepwise selection to add covariates to the models, which were evaluated by assessing discrimination using the area under the receiver operating characteristic curve (AUC) for logistic regressions and adjusted R2 for linear regressions. Results: Three Elo levels (low, middle, and high) with the same number of patients (n=96) in each Elo group were obtained using the 50 initial task executions (from a total of 38,177) for N=288 adult patients consecutively admitted for inpatient rehabilitation in a clinical setting. The mid-Elo level showed the highest proportions of patients that improved in all four memory items: 56\% (54/96) of them improved in DIGITS, 67\% (64/96) in RAVLT075, 58\% (56/96) in RAVLT015, and 53\% (51/96) in RAVLT015R (P<.001). The proportions of patients from the mid-Elo level that performed tasks at difficulty levels 1, 2, and 3 were 32.1\% (3997/12,449), 31.\% (3997/12,449), and 36.9\% (4595/12,449), respectively (P<.001), showing the highest match between skills (represented by Elo level) and task difficulties, considering the set of 38,177 task executions. Elo ratings were significant predictors in three of the four models and quasi-significant in the fourth. When predicting RAVLT075 and DIGITS at discharge, we obtained R2=0.54 and 0.43, respectively; meanwhile, we obtained AUC=0.73 (95\% CI 0.64-0.82) and AUC=0.81 (95\% CI 0.72-0.89) in RAVLT075 and DIGITS improvement predictions, respectively. Conclusions: Elo ratings can support clinicians in early rehabilitation stages in identifying cognitive profiles to be used for assigning task difficulty levels. ", doi="10.2196/28090", url="https://medinform.jmir.org/2021/11/e28090", url="http://www.ncbi.nlm.nih.gov/pubmed/34757325" } @Article{info:doi/10.2196/26914, author="Sung, MinDong and Cha, Dongchul and Park, Rang Yu", title="Local Differential Privacy in the Medical Domain to Protect Sensitive Information: Algorithm Development and Real-World Validation", journal="JMIR Med Inform", year="2021", month="Nov", day="8", volume="9", number="11", pages="e26914", keywords="privacy-preserving", keywords="differential privacy", keywords="medical informatics", keywords="medical data", keywords="privacy", keywords="electronic health record", keywords="algorithm", keywords="development", keywords="validation", keywords="big data", keywords="feasibility", keywords="machine learning", keywords="synthetic data", abstract="Background: Privacy is of increasing interest in the present big data era, particularly the privacy of medical data. Specifically, differential privacy has emerged as the standard method for preservation of privacy during data analysis and publishing. Objective: Using machine learning techniques, we applied differential privacy to medical data with diverse parameters and checked the feasibility of our algorithms with synthetic data as well as the balance between data privacy and utility. Methods: All data were normalized to a range between --1 and 1, and the bounded Laplacian method was applied to prevent the generation of out-of-bound values after applying the differential privacy algorithm. To preserve the cardinality of the categorical variables, we performed postprocessing via discretization. The algorithm was evaluated using both synthetic and real-world data (from the eICU Collaborative Research Database). We evaluated the difference between the original data and the perturbated data using misclassification rates and the mean squared error for categorical data and continuous data, respectively. Further, we compared the performance of classification models that predict in-hospital mortality using real-world data. Results: The misclassification rate of categorical variables ranged between 0.49 and 0.85 when the value of $\epsilon$ was 0.1, and it converged to 0 as $\epsilon$ increased. When $\epsilon$ was between 102 and 103, the misclassification rate rapidly dropped to 0. Similarly, the mean squared error of the continuous variables decreased as $\epsilon$ increased. The performance of the model developed from perturbed data converged to that of the model developed from original data as $\epsilon$ increased. In particular, the accuracy of a random forest model developed from the original data was 0.801, and this value ranged from 0.757 to 0.81 when $\epsilon$ was 10-1 and 104, respectively. Conclusions: We applied local differential privacy to medical domain data, which are diverse and high dimensional. Higher noise may offer enhanced privacy, but it simultaneously hinders utility. We should choose an appropriate degree of noise for data perturbation to balance privacy and utility depending on specific situations. ", doi="10.2196/26914", url="https://medinform.jmir.org/2021/11/e26914", url="http://www.ncbi.nlm.nih.gov/pubmed/34747711" } @Article{info:doi/10.2196/33047, author="Hatch, Gabe S. and Lobaina, Diana and Doss, D. Brian", title="Optimizing Coaching During Web-Based Relationship Education for Low-Income Couples: Protocol for Precision Medicine Research", journal="JMIR Res Protoc", year="2021", month="Nov", day="4", volume="10", number="11", pages="e33047", keywords="online relationship education", keywords="precision medicine", keywords="low-income couples", keywords="coaching", keywords="OurRelationship", keywords="ePREP", abstract="Background: In-person relationship education classes funded by the federal government tend to experience relatively high attrition rates and have only a limited effect on relationships. In contrast, low-income couples tend to report meaningful gains from web-based relationship education when provided with individualized coach contact. However, little is known about the method and intensity of practitioner contact that a couple requires to complete the web-based program and receive the intended benefit. Objective: The aim of this study is to use within-group models to create an algorithm to assign future couples to different programs and levels of coach contact, identify the most powerful predictors of treatment adherence and gains in relationship satisfaction within 3 different levels of coaching, and examine the most powerful predictors of treatment adherence and gains in relationship satisfaction among the 3 levels of coach contact. Methods: To accomplish these goals, this project intends to use data from a web-based Sequential Multiple Assignment Randomized Trial of the OurRelationship and web-based Prevention and Relationship Enhancement programs, in which the method and type of coach contact were randomly varied across 1248 couples (2496 individuals), with the hope of advancing theory in this area and generating accurate predictions. This study was funded by the US Department of Health and Human Services, Administration for Children and Families (grant number 90PD0309). Results: Data collection from the Sequential Multiple Assignment Randomized Trial of the OurRelationship and web-based Prevention and Relationship Enhancement Program was completed in October of 2020. Conclusions: Some of the direct benefits of this study include benefits to social services program administrators, tailoring of more effective relationship education, and effective delivery of evidence- and web-based relationship health interventions. International Registered Report Identifier (IRRID): DERR1-10.2196/33047 ", doi="10.2196/33047", url="https://www.researchprotocols.org/2021/11/e33047", url="http://www.ncbi.nlm.nih.gov/pubmed/34734838" } @Article{info:doi/10.2196/28329, author="Skovlund, E. Soren and Troelsen, Havb{\ae}k Lise and Noergaard, Mellergaard Lise and Pietraszek, Anna and Jakobsen, Erik Poul and Ejskjaer, Niels", title="Feasibility and Acceptability of a Digital Patient-Reported Outcome Tool in Routine Outpatient Diabetes Care: Mixed Methods Formative Pilot Study", journal="JMIR Form Res", year="2021", month="Nov", day="3", volume="5", number="11", pages="e28329", keywords="patient-reported outcomes", keywords="diabetes", keywords="person-centered care", keywords="person-centered communication", keywords="dialog", keywords="mental health", keywords="self-management", keywords="collaborative care", keywords="value-based health care", keywords="mixed-methods", keywords="mobile phone", keywords="mHealth", abstract="Background: Improvements in the digital capabilities of health systems provide new opportunities for the integration of patient-reported outcome (PRO) solutions in routine care, which can facilitate the delivery of person-centered diabetes care. We undertook this study as part of our development of a new digital PRO diabetes questionnaire and clinical dialog support tool for use by people with diabetes and their health care professionals (HCPs) to improve person-centered diabetes care quality and outcomes. Objective: This study evaluates the feasibility, acceptability, and perceived benefits and impacts of using a digital PRO diabetes tool, DiaProfil, in routine outpatient diabetes care. Methods: Overall, 12 people with diabetes scheduled for routine medical diabetes visits at the outpatient clinic were recruited. Purposive sampling was used to optimize heterogeneity regarding age, gender, duration, type of diabetes, treatment modality, and disease severity. Participants filled out a PRO diabetes questionnaire 2 to 5 days before their visit. During the visit, HCPs used a digital PRO tool to review PRO data with the person with diabetes for collaborative care planning. Participants completed evaluation forms before and after the visit and were interviewed for 30 to 45 minutes after the visit. HCPs completed the evaluation questionnaires after each visit. All visits were audio-recorded and transcribed for analysis. Data were analyzed using quantitative, qualitative, and mixed methods analyses. Results: People with diabetes found the PRO diabetes questionnaire to be relevant, acceptable, and feasible to complete from home. People with diabetes and HCPs found the digital PRO tool to be feasible and acceptable for use during the diabetes visit and would like to continue using it. HCPs were able to use the tool in a person-centered manner, as intended. For several people with diabetes, completion of the questionnaire facilitated positive reflection and better preparation for the visit. The use of the PRO tool primarily improved the quality of the dialog by improving the identification and focus on the issues most important to the person with diabetes. People with diabetes did not report any negative aspects of the PRO tool, whereas HCPs highlighted that it was demanding when the person with diabetes had many PRO issues that required attention within the predefined time allocated for a visit. Conclusions: The Danish PRO diabetes questionnaire and the digital tool, DiaProfil, are feasible and acceptable solutions for routine diabetes visits, and this tool may generate important benefits related to advancement of person-centered care. Further research is now required to corroborate and expand these formative insights on a larger scale and in diverse health care settings. The results of this study are therefore being used to define research hypotheses and finalize real-world PRO evaluation tools for a forthcoming large-scale multisector implementation study in Denmark. ", doi="10.2196/28329", url="https://formative.jmir.org/2021/11/e28329", url="http://www.ncbi.nlm.nih.gov/pubmed/34730545" } @Article{info:doi/10.2196/26524, author="Akbarian, Sina and Ghahjaverestan, Montazeri Nasim and Yadollahi, Azadeh and Taati, Babak", title="Noncontact Sleep Monitoring With Infrared Video Data to Estimate Sleep Apnea Severity and Distinguish Between Positional and Nonpositional Sleep Apnea: Model Development and Experimental Validation", journal="J Med Internet Res", year="2021", month="Nov", day="1", volume="23", number="11", pages="e26524", keywords="sleep apnea", keywords="deep learning", keywords="noncontact monitoring", keywords="computer vision", keywords="positional sleep apnea", keywords="3D convolutional neural network", keywords="3D-CNN", abstract="Background: Sleep apnea is a respiratory disorder characterized by frequent breathing cessation during sleep. Sleep apnea severity is determined by the apnea-hypopnea index (AHI), which is the hourly rate of respiratory events. In positional sleep apnea, the AHI is higher in the supine sleeping position than it is in other sleeping positions. Positional therapy is a behavioral strategy (eg, wearing an item to encourage sleeping toward the lateral position) to treat positional apnea. The gold standard of diagnosing sleep apnea and whether or not it is positional is polysomnography; however, this test is inconvenient, expensive, and has a long waiting list. Objective: The objective of this study was to develop and evaluate a noncontact method to estimate sleep apnea severity and to distinguish positional versus nonpositional sleep apnea. Methods: A noncontact deep-learning algorithm was developed to analyze infrared video of sleep for estimating AHI and to distinguish patients with positional vs nonpositional sleep apnea. Specifically, a 3D convolutional neural network (CNN) architecture was used to process movements extracted by optical flow to detect respiratory events. Positional sleep apnea patients were subsequently identified by combining the AHI information provided by the 3D-CNN model with the sleeping position (supine vs lateral) detected via a previously developed CNN model. Results: The algorithm was validated on data of 41 participants, including 26 men and 15 women with a mean age of 53 (SD 13) years, BMI of 30 (SD 7), AHI of 27 (SD 31) events/hour, and sleep duration of 5 (SD 1) hours; 20 participants had positional sleep apnea, 15 participants had nonpositional sleep apnea, and the positional status could not be discriminated for the remaining 6 participants. AHI values estimated by the 3D-CNN model correlated strongly and significantly with the gold standard (Spearman correlation coefficient 0.79, P<.001). Individuals with positional sleep apnea (based on an AHI threshold of 15) were identified with 83\% accuracy and an F1-score of 86\%. Conclusions: This study demonstrates the possibility of using a camera-based method for developing an accessible and easy-to-use device for screening sleep apnea at home, which can be provided in the form of a tablet or smartphone app. ", doi="10.2196/26524", url="https://www.jmir.org/2021/11/e26524", url="http://www.ncbi.nlm.nih.gov/pubmed/34723817" } @Article{info:doi/10.2196/25378, author="Tsuji, Shintaro and Wen, Andrew and Takahashi, Naoki and Zhang, Hongjian and Ogasawara, Katsuhiko and Jiang, Gouqian", title="Developing a RadLex-Based Named Entity Recognition Tool for Mining Textual Radiology Reports: Development and Performance Evaluation Study", journal="J Med Internet Res", year="2021", month="Oct", day="29", volume="23", number="10", pages="e25378", keywords="named entity recognition (NER)", keywords="natural language processing (NLP)", keywords="RadLex", keywords="ontology", keywords="stem term", abstract="Background: Named entity recognition (NER) plays an important role in extracting the features of descriptions such as the name and location of a disease for mining free-text radiology reports. However, the performance of existing NER tools is limited because the number of entities that can be extracted depends on the dictionary lookup. In particular, the recognition of compound terms is very complicated because of the variety of patterns. Objective: The aim of this study is to develop and evaluate an NER tool concerned with compound terms using RadLex for mining free-text radiology reports. Methods: We leveraged the clinical Text Analysis and Knowledge Extraction System (cTAKES) to develop customized pipelines using both RadLex and SentiWordNet (a general purpose dictionary). We manually annotated 400 radiology reports for compound terms in noun phrases and used them as the gold standard for performance evaluation (precision, recall, and F-measure). In addition, we created a compound terms--enhanced dictionary (CtED) by analyzing false negatives and false positives and applied it to another 100 radiology reports for validation. We also evaluated the stem terms of compound terms by defining two measures: occurrence ratio (OR) and matching ratio (MR). Results: The F-measure of cTAKES+RadLex+general purpose dictionary was 30.9\% (precision 73.3\% and recall 19.6\%) and that of the combined CtED was 63.1\% (precision 82.8\% and recall 51\%). The OR indicated that the stem terms of effusion, node, tube, and disease were used frequently, but it still lacks capturing compound terms. The MR showed that 71.85\% (9411/13,098) of the stem terms matched with that of the ontologies, and RadLex improved approximately 22\% of the MR from the cTAKES default dictionary. The OR and MR revealed that the characteristics of stem terms would have the potential to help generate synonymous phrases using the ontologies. Conclusions: We developed a RadLex-based customized pipeline for parsing radiology reports and demonstrated that CtED and stem term analysis has the potential to improve dictionary-based NER performance with regard to expanding vocabularies. ", doi="10.2196/25378", url="https://www.jmir.org/2021/10/e25378", url="http://www.ncbi.nlm.nih.gov/pubmed/34714247" } @Article{info:doi/10.2196/28752, author="Wang, Jie-Teng and Lin, Wen-Yang", title="Privacy-Preserving Anonymity for Periodical Releases of Spontaneous Adverse Drug Event Reporting Data: Algorithm Development and Validation", journal="JMIR Med Inform", year="2021", month="Oct", day="28", volume="9", number="10", pages="e28752", keywords="adverse drug reaction", keywords="data anonymization", keywords="incremental data publishing", keywords="privacy preserving data publishing", keywords="spontaneous reporting system", keywords="drug", keywords="data set", keywords="anonymous", keywords="privacy", keywords="security", keywords="algorithm", keywords="development", keywords="validation", keywords="data", abstract="Background: Spontaneous reporting systems (SRSs) have been increasingly established to collect adverse drug events for fostering adverse drug reaction (ADR) detection and analysis research. SRS data contain personal information, and so their publication requires data anonymization to prevent the disclosure of individuals' privacy. We have previously proposed a privacy model called MS(k, $\theta$*)-bounding and the associated MS-Anonymization algorithm to fulfill the anonymization of SRS data. In the real world, the SRS data usually are released periodically (eg, FDA Adverse Event Reporting System [FAERS]) to accommodate newly collected adverse drug events. Different anonymized releases of SRS data available to the attacker may thwart our single-release-focus method, that is, MS(k, $\theta$*)-bounding. Objective: We investigate the privacy threat caused by periodical releases of SRS data and propose anonymization methods to prevent the disclosure of personal privacy information while maintaining the utility of published data. Methods: We identify potential attacks on periodical releases of SRS data, namely, BFL-attacks, mainly caused by follow-up cases. We present a new privacy model called PPMS(k, $\theta$*)-bounding, and propose the associated PPMS-Anonymization algorithm and 2 improvements: PPMS+-Anonymization and PPMS++-Anonymization. Empirical evaluations were performed using 32 selected FAERS quarter data sets from 2004Q1 to 2011Q4. The performance of the proposed versions of PPMS-Anonymization was inspected against MS-Anonymization from some aspects, including data distortion, measured by normalized information loss; privacy risk of anonymized data, measured by dangerous identity ratio and dangerous sensitivity ratio; and data utility, measured by the bias of signal counting and strength (proportional reporting ratio). Results: The best version of PPMS-Anonymization, PPMS++-Anonymization, achieves nearly the same quality as MS-Anonymization in both privacy protection and data utility. Overall, PPMS++-Anonymization ensures zero privacy risk on record and attribute linkage, and exhibits 51\%-78\% and 59\%-82\% improvements on information loss over PPMS+-Anonymization and PPMS-Anonymization, respectively, and significantly reduces the bias of ADR signal. Conclusions: The proposed PPMS(k, $\theta$*)-bounding model and PPMS-Anonymization algorithm are effective in anonymizing SRS data sets in the periodical data publishing scenario, preventing the series of releases from disclosing personal sensitive information caused by BFL-attacks while maintaining the data utility for ADR signal detection. ", doi="10.2196/28752", url="https://medinform.jmir.org/2021/10/e28752", url="http://www.ncbi.nlm.nih.gov/pubmed/34709197" } @Article{info:doi/10.2196/30093, author="Veludhandi, Anirudh and Ross, Diana and Sinha, B. Cynthia and McCracken, Courtney and Bakshi, Nitya and Krishnamurti, Lakshmanan", title="A Decision Support Tool for Allogeneic Hematopoietic Stem Cell Transplantation for Children With Sickle Cell Disease: Acceptability and Usability Study", journal="JMIR Form Res", year="2021", month="Oct", day="28", volume="5", number="10", pages="e30093", keywords="decision support tool", keywords="sickle cell disease", keywords="mobile application", keywords="mHealth", keywords="pediatrics", keywords="transplant", keywords="mobile phone", abstract="Background: Individuals living with sickle cell disease (SCD) may benefit from a variety of disease-modifying therapies, including hydroxyurea, voxelotor, crizanlizumab, L-glutamine, and chronic blood transfusions. However, allogeneic hematopoietic stem cell transplantation (HCT) remains the only nonexperimental treatment with curative intent. As HCT outcomes can be influenced by the complex interaction of several risk factors, HCT can be a difficult decision for health care providers to make for their patients with SCD. Objective: The aim of this study is to determine the acceptability and usability of a prototype decision support tool for health care providers in decision-making about HCT for SCD, together with patients and their families. Methods: On the basis of published transplant registry data, we developed the Sickle Options Decision Support Tool for Children, which provides health care providers with personalized transplant survival and risk estimates for their patients to help them make informed decisions regarding their patients' management of SCD. To evaluate the tool for its acceptability and usability, we conducted beta tests of the tool and surveys with physicians using the Ottawa Decision Support Framework and mobile health app usability questionnaire, respectively. Results: According to the mobile health app usability questionnaire survey findings, the overall usability of the tool was high (mean 6.15, SD 0.79; range 4.2-7). According to the Ottawa Decision Support Framework survey findings, acceptability of the presentation of information on the decision support tool was also high (mean 2.94, SD 0.63; range 2-4), but the acceptability regarding the amount of information was mixed (mean 2.59, SD 0.5; range 2-3). Most participants expressed that they would use the tool in their own patient consults (13/15, 87\%) and suggested that the tool would ease the decision-making process regarding HCT (8/9, 89\%). The 4 major emergent themes from the qualitative analysis of participant beta tests include user interface, data content, usefulness during a patient consult, and potential for a patient-focused decision aid. Most participants supported the idea of a patient-focused decision aid but recommended that it should include more background on HCT and a simplification of medical terminology. Conclusions: We report the development, acceptability, and usability of a prototype decision support tool app to provide individualized risk and survival estimates to patients interested in HCT in a patient consultation setting. We propose to finalize the tool by validating predictive analytics using a large data set of patients with SCD who have undergone HCT. Such a tool may be useful in promoting physician-patient collaboration in making shared decisions regarding HCT for SCD. Further incorporation of patient-specific measures, including the HCT comorbidity index and the quality of life after transplant, may improve the applicability of the decision support tool in a health care setting. ", doi="10.2196/30093", url="https://formative.jmir.org/2021/10/e30093", url="http://www.ncbi.nlm.nih.gov/pubmed/34709190" } @Article{info:doi/10.2196/22651, author="Hudon, Alexandre and Beaudoin, M{\'e}lissa and Phraxayavong, Kingsada and Dellazizzo, Laura and Potvin, St{\'e}phane and Dumais, Alexandre", title="Use of Automated Thematic Annotations for Small Data Sets in a Psychotherapeutic Context: Systematic Review of Machine Learning Algorithms", journal="JMIR Ment Health", year="2021", month="Oct", day="22", volume="8", number="10", pages="e22651", keywords="psychotherapy", keywords="artificial intelligence", keywords="automated text classification", keywords="machine learning", keywords="systematic review", abstract="Background: A growing body of literature has detailed the use of qualitative analyses to measure the therapeutic processes and intrinsic effectiveness of psychotherapies, which yield small databases. Nonetheless, these approaches have several limitations and machine learning algorithms are needed. Objective: The objective of this study is to conduct a systematic review of the use of machine learning for automated text classification for small data sets in the fields of psychiatry, psychology, and social sciences. This review will identify available algorithms and assess if automated classification of textual entities is comparable to the classification done by human evaluators. Methods: A systematic search was performed in the electronic databases of Medline, Web of Science, PsycNet (PsycINFO), and Google Scholar from their inception dates to 2021. The fields of psychiatry, psychology, and social sciences were selected as they include a vast array of textual entities in the domain of mental health that can be reviewed. Additional records identified through cross-referencing were used to find other studies. Results: This literature search identified 5442 articles that were eligible for our study after the removal of duplicates. Following abstract screening, 114 full articles were assessed in their entirety, of which 107 were excluded. The remaining 7 studies were analyzed. Classification algorithms such as naive Bayes, decision tree, and support vector machine classifiers were identified. Support vector machine is the most used algorithm and best performing as per the identified articles. Prediction classification scores for the identified algorithms ranged from 53\%-91\% for the classification of textual entities in 4-7 categories. In addition, 3 of the 7 studies reported an interjudge agreement statistic; these were consistent with agreement statistics for text classification done by human evaluators. Conclusions: A systematic review of available machine learning algorithms for automated text classification for small data sets in several fields (psychiatry, psychology, and social sciences) was conducted. We compared automated classification with classification done by human evaluators. Our results show that it is possible to automatically classify textual entities of a transcript based solely on small databases. Future studies are nevertheless needed to assess whether such algorithms can be implemented in the context of psychotherapies. ", doi="10.2196/22651", url="https://mental.jmir.org/2021/10/e22651", url="http://www.ncbi.nlm.nih.gov/pubmed/34677133" } @Article{info:doi/10.2196/20458, author="Abujarad, Fuad and Peduzzi, Peter and Mun, Sophia and Carlson, Kristina and Edwards, Chelsea and Dziura, James and Brandt, Cynthia and Alfano, Sandra and Chupp, Geoffrey", title="Comparing a Multimedia Digital Informed Consent Tool With Traditional Paper-Based Methods: Randomized Controlled Trial", journal="JMIR Form Res", year="2021", month="Oct", day="19", volume="5", number="10", pages="e20458", keywords="digital consent", keywords="digital health", keywords="e-consent", keywords="informed consent", keywords="mobile phone", abstract="Background: The traditional informed consent (IC) process rarely emphasizes research participants' comprehension of medical information, leaving them vulnerable to unknown risks and consequences associated with procedures or studies. Objective: This paper explores how we evaluated the feasibility of a digital health tool called Virtual Multimedia Interactive Informed Consent (VIC) for advancing the IC process and compared the results with traditional paper-based methods of IC. Methods: Using digital health and web-based coaching, we developed the VIC tool that uses multimedia and other digital features to improve the current IC process. The tool was developed on the basis of the user-centered design process and Mayer's cognitive theory of multimedia learning. This study is a randomized controlled trial that compares the feasibility of VIC with standard paper consent to understand the impact of interactive digital consent. Participants were recruited from the Winchester Chest Clinic at Yale New Haven Hospital in New Haven, Connecticut, and healthy individuals were recruited from the community using fliers. In this coordinator-assisted trial, participants were randomized to complete the IC process using VIC on the iPad or with traditional paper consent. The study was conducted at the Winchester Chest Clinic, and the outcomes were self-assessed through coordinator-administered questionnaires. Results: A total of 50 participants were recruited in the study (VIC, n=25; paper, n=25). The participants in both groups had high comprehension. VIC participants reported higher satisfaction, higher perceived ease of use, higher ability to complete the consent independently, and shorter perceived time to complete the consent process. Conclusions: The use of dynamic, interactive audiovisual elements in VIC may improve participants' satisfaction and facilitate the IC process. We believe that using VIC in an ongoing, real-world study rather than a hypothetical study improved the reliability of our findings, which demonstrates VIC's potential to improve research participants' comprehension and the overall process of IC. Trial Registration: ClinicalTrials.gov NCT02537886; https://clinicaltrials.gov/ct2/show/NCT02537886 ", doi="10.2196/20458", url="https://formative.jmir.org/2021/10/e20458", url="http://www.ncbi.nlm.nih.gov/pubmed/34665142" } @Article{info:doi/10.2196/29017, author="Meng, Weilin and Mosesso, M. Kelly and Lane, A. Kathleen and Roberts, R. Anna and Griffith, Ashley and Ou, Wanmei and Dexter, R. Paul", title="An Automated Line-of-Therapy Algorithm for Adults With Metastatic Non--Small Cell Lung Cancer: Validation Study Using Blinded Manual Chart Review", journal="JMIR Med Inform", year="2021", month="Oct", day="12", volume="9", number="10", pages="e29017", keywords="automated algorithm", keywords="line of therapy", keywords="longitudinal changes", keywords="manual chart review", keywords="non--small cell lung cancer", keywords="systemic anticancer therapy", abstract="Background: Extraction of line-of-therapy (LOT) information from electronic health record and claims data is essential for determining longitudinal changes in systemic anticancer therapy in real-world clinical settings. Objective: The aim of this retrospective cohort analysis is to validate and refine our previously described open-source LOT algorithm by comparing the output of the algorithm with results obtained through blinded manual chart review. Methods: We used structured electronic health record data and clinical documents to identify 500 adult patients treated for metastatic non--small cell lung cancer with systemic anticancer therapy from 2011 to mid-2018; we assigned patients to training (n=350) and test (n=150) cohorts, randomly divided proportional to the overall ratio of simple:complex cases (n=254:246). Simple cases were patients who received one LOT and no maintenance therapy; complex cases were patients who received more than one LOT and/or maintenance therapy. Algorithmic changes were performed using the training cohort data, after which the refined algorithm was evaluated against the test cohort. Results: For simple cases, 16 instances of discordance between the LOT algorithm and chart review prerefinement were reduced to 8 instances postrefinement; in the test cohort, there was no discordance between algorithm and chart review. For complex cases, algorithm refinement reduced the discordance from 68 to 62 instances, with 37 instances in the test cohort. The percentage agreement between LOT algorithm output and chart review for patients who received one LOT was 89\% prerefinement, 93\% postrefinement, and 93\% for the test cohort, whereas the likelihood of precise matching between algorithm output and chart review decreased with an increasing number of unique regimens. Several areas of discordance that arose from differing definitions of LOTs and maintenance therapy could not be objectively resolved because of a lack of precise definitions in the medical literature. Conclusions: Our findings identify common sources of discordance between the LOT algorithm and clinician documentation, providing the possibility of targeted algorithm refinement. ", doi="10.2196/29017", url="https://medinform.jmir.org/2021/10/e29017", url="http://www.ncbi.nlm.nih.gov/pubmed/34636730" } @Article{info:doi/10.2196/32444, author="Chen, Hung-Chang and Tzeng, Shin-Shi and Hsiao, Yen-Chang and Chen, Ruei-Feng and Hung, Erh-Chien and Lee, K. Oscar", title="Smartphone-Based Artificial Intelligence--Assisted Prediction for Eyelid Measurements: Algorithm Development and Observational Validation Study", journal="JMIR Mhealth Uhealth", year="2021", month="Oct", day="8", volume="9", number="10", pages="e32444", keywords="artificial intelligence", keywords="AI", keywords="deep learning", keywords="margin reflex distance 1", keywords="margin reflex distance 2", keywords="levator muscle function", keywords="smartphone", keywords="measurement", keywords="eye", keywords="prediction", keywords="processing", keywords="limit", keywords="image", keywords="algorithm", keywords="observational", abstract="Background: Margin reflex distance 1 (MRD1), margin reflex distance 2 (MRD2), and levator muscle function (LF) are crucial metrics for ptosis evaluation and management. However, manual measurements of MRD1, MRD2, and LF are time-consuming, subjective, and prone to human error. Smartphone-based artificial intelligence (AI) image processing is a potential solution to overcome these limitations. Objective: We propose the first smartphone-based AI-assisted image processing algorithm for MRD1, MRD2, and LF measurements. Methods: This observational study included 822 eyes of 411 volunteers aged over 18 years from August 1, 2020, to April 30, 2021. Six orbital photographs (bilateral primary gaze, up-gaze, and down-gaze) were taken using a smartphone (iPhone 11 Pro Max). The gold-standard measurements and normalized eye photographs were obtained from these orbital photographs and compiled using AI-assisted software to create MRD1, MRD2, and LF models. Results: The Pearson correlation coefficients between the gold-standard measurements and the predicted values obtained with the MRD1 and MRD2 models were excellent (r=0.91 and 0.88, respectively) and that obtained with the LF model was good (r=0.73). The intraclass correlation coefficient demonstrated excellent agreement between the gold-standard measurements and the values predicted by the MRD1 and MRD2 models (0.90 and 0.84, respectively), and substantial agreement with the LF model (0.69). The mean absolute errors were 0.35 mm, 0.37 mm, and 1.06 mm for the MRD1, MRD2, and LF models, respectively. The 95\% limits of agreement were --0.94 to 0.94 mm for the MRD1 model, --0.92 to 1.03 mm for the MRD2 model, and --0.63 to 2.53 mm for the LF model. Conclusions: We developed the first smartphone-based AI-assisted image processing algorithm for eyelid measurements. MRD1, MRD2, and LF measures can be taken in a quick, objective, and convenient manner. Furthermore, by using a smartphone, the examiner can check these measurements anywhere and at any time, which facilitates data collection. ", doi="10.2196/32444", url="https://mhealth.jmir.org/2021/10/e32444", url="http://www.ncbi.nlm.nih.gov/pubmed/34538776" } @Article{info:doi/10.2196/29849, author="Rahimi-Eichi, Habiballah and Coombs III, Garth and Vidal Bustamante, M. Constanza and Onnela, Jukka-Pekka and Baker, T. Justin and Buckner, L. Randy", title="Open-source Longitudinal Sleep Analysis From Accelerometer Data (DPSleep): Algorithm Development and Validation", journal="JMIR Mhealth Uhealth", year="2021", month="Oct", day="6", volume="9", number="10", pages="e29849", keywords="actigraphy", keywords="accelerometer", keywords="sleep", keywords="deep-phenotyping", keywords="smartphone", keywords="mobile phone", abstract="Background: Wearable devices are now widely available to collect continuous objective behavioral data from individuals and to measure sleep. Objective: This study aims to introduce a pipeline to infer sleep onset, duration, and quality from raw accelerometer data and then quantify the relationships between derived sleep metrics and other variables of interest. Methods: The pipeline released here for the deep phenotyping of sleep, as the DPSleep software package, uses a stepwise algorithm to detect missing data; within-individual, minute-based, spectral power percentiles of activity; and iterative, forward-and-backward--sliding windows to estimate the major Sleep Episode onset and offset. Software modules allow for manual quality control adjustment of the derived sleep features and correction for time zone changes. In this paper, we have illustrated the pipeline with data from participants studied for more than 200 days each. Results: Actigraphy-based measures of sleep duration were associated with self-reported sleep quality ratings. Simultaneous measures of smartphone use and GPS location data support the validity of the sleep timing inferences and reveal how phone measures of sleep timing can differ from actigraphy data. Conclusions: We discuss the use of DPSleep in relation to other available sleep estimation approaches and provide example use cases that include multi-dimensional, deep longitudinal phenotyping, extended measurement of dynamics associated with mental illness, and the possibility of combining wearable actigraphy and personal electronic device data (eg, smartphones and tablets) to measure individual differences across a wide range of behavioral variations in health and disease. A new open-source pipeline for deep phenotyping of sleep, DPSleep, analyzes raw accelerometer data from wearable devices and estimates sleep onset and offset while allowing for manual quality control adjustments. ", doi="10.2196/29849", url="https://mhealth.jmir.org/2021/10/e29849", url="http://www.ncbi.nlm.nih.gov/pubmed/34612831" } @Article{info:doi/10.2196/29200, author="Conway, Aaron and Jungquist, R. Carla and Chang, Kristina and Kamboj, Navpreet and Sutherland, Joanna and Mafeld, Sebastian and Parotto, Matteo", title="Predicting Prolonged Apnea During Nurse-Administered Procedural Sedation: Machine Learning Study", journal="JMIR Perioper Med", year="2021", month="Oct", day="5", volume="4", number="2", pages="e29200", keywords="procedural sedation and analgesia", keywords="conscious sedation", keywords="nursing", keywords="informatics", keywords="patient safety", keywords="machine learning", keywords="capnography", keywords="anesthesia", keywords="anaesthesia", keywords="medical informatics", keywords="sleep apnea", keywords="apnea", keywords="apnoea", keywords="sedation", abstract="Background: Capnography is commonly used for nurse-administered procedural sedation. Distinguishing between capnography waveform abnormalities that signal the need for clinical intervention for an event and those that do not indicate the need for intervention is essential for the successful implementation of this technology into practice. It is possible that capnography alarm management may be improved by using machine learning to create a ``smart alarm'' that can alert clinicians to apneic events that are predicted to be prolonged. Objective: To determine the accuracy of machine learning models for predicting at the 15-second time point if apnea will be prolonged (ie, apnea that persists for >30 seconds). Methods: A secondary analysis of an observational study was conducted. We selected several candidate models to evaluate, including a random forest model, generalized linear model (logistic regression), least absolute shrinkage and selection operator regression, ridge regression, and the XGBoost model. Out-of-sample accuracy of the models was calculated using 10-fold cross-validation. The net benefit decision analytic measure was used to assist with deciding whether using the models in practice would lead to better outcomes on average than using the current default capnography alarm management strategies. The default strategies are the aggressive approach, in which an alarm is triggered after brief periods of apnea (typically 15 seconds) and the conservative approach, in which an alarm is triggered for only prolonged periods of apnea (typically >30 seconds). Results: A total of 384 apneic events longer than 15 seconds were observed in 61 of the 102 patients (59.8\%) who participated in the observational study. Nearly half of the apneic events (180/384, 46.9\%) were prolonged. The random forest model performed the best in terms of discrimination (area under the receiver operating characteristic curve 0.66) and calibration. The net benefit associated with the random forest model exceeded that associated with the aggressive strategy but was lower than that associated with the conservative strategy. Conclusions: Decision curve analysis indicated that using a random forest model would lead to a better outcome for capnography alarm management than using an aggressive strategy in which alarms are triggered after 15 seconds of apnea. The model would not be superior to the conservative strategy in which alarms are only triggered after 30 seconds. ", doi="10.2196/29200", url="https://periop.jmir.org/2021/2/e29200", url="http://www.ncbi.nlm.nih.gov/pubmed/34609322" } @Article{info:doi/10.2196/27177, author="Yun, Donghwan and Cho, Semin and Kim, Chul Yong and Kim, Ki Dong and Oh, Kook-Hwan and Joo, Wook Kwon and Kim, Su Yon and Han, Seok Seung", title="Use of Deep Learning to Predict Acute Kidney Injury After Intravenous Contrast Media Administration: Prediction Model Development Study", journal="JMIR Med Inform", year="2021", month="Oct", day="1", volume="9", number="10", pages="e27177", keywords="acute kidney injury", keywords="artificial intelligence", keywords="contrast media", keywords="deep learning", keywords="machine learning", keywords="kidney injury", keywords="computed tomography", abstract="Background: Precise prediction of contrast media--induced acute kidney injury (CIAKI) is an important issue because of its relationship with poor outcomes. Objective: Herein, we examined whether a deep learning algorithm could predict the risk of intravenous CIAKI better than other machine learning and logistic regression models in patients undergoing computed tomography (CT). Methods: A total of 14,185 patients who were administered intravenous contrast media for CT at the preventive and monitoring facility in Seoul National University Hospital were reviewed. CIAKI was defined as an increase in serum creatinine of ?0.3 mg/dL within 2 days or ?50\% within 7 days. Using both time-varying and time-invariant features, machine learning models, such as the recurrent neural network (RNN), light gradient boosting machine (LGM), extreme gradient boosting machine (XGB), random forest (RF), decision tree (DT), support vector machine (SVM), $\kappa$-nearest neighbors, and logistic regression, were developed using a training set, and their performance was compared using the area under the receiver operating characteristic curve (AUROC) in a test set. Results: CIAKI developed in 261 cases (1.8\%). The RNN model had the highest AUROC of 0.755 (0.708-0.802) for predicting CIAKI, which was superior to that obtained from other machine learning models. Although CIAKI was defined as an increase in serum creatinine of ?0.5 mg/dL or ?25\% within 3 days, the highest performance was achieved in the RNN model with an AUROC of 0.716 (95\% confidence interval [CI] 0.664-0.768). In feature ranking analysis, the albumin level was the most highly contributing factor to RNN performance, followed by time-varying kidney function. Conclusions: Application of a deep learning algorithm improves the predictability of intravenous CIAKI after CT, representing a basis for future clinical alarming and preventive systems. ", doi="10.2196/27177", url="https://medinform.jmir.org/2021/10/e27177", url="http://www.ncbi.nlm.nih.gov/pubmed/34596574" } @Article{info:doi/10.2196/28000, author="Persson, Inger and {\"O}stling, Andreas and Arlbrandt, Martin and S{\"o}derberg, Joakim and Becedas, David", title="A Machine Learning Sepsis Prediction Algorithm for Intended Intensive Care Unit Use (NAVOY Sepsis): Proof-of-Concept Study", journal="JMIR Form Res", year="2021", month="Sep", day="30", volume="5", number="9", pages="e28000", keywords="sepsis", keywords="prediction", keywords="early detection", keywords="machine learning", keywords="electronic health record", keywords="EHR", keywords="software as a medical device", keywords="algorithm", keywords="detection", keywords="intensive care unit", keywords="ICU", keywords="proof of concept", abstract="Background: Despite decades of research, sepsis remains a leading cause of mortality and morbidity in intensive care units worldwide. The key to effective management and patient outcome is early detection, for which no prospectively validated machine learning prediction algorithm is currently available for clinical use in Europe. Objective: We aimed to develop a high-performance machine learning sepsis prediction algorithm based on routinely collected intensive care unit data, designed to be implemented in European intensive care units. Methods: The machine learning algorithm was developed using convolutional neural networks, based on Massachusetts Institute of Technology Lab for Computational Physiology MIMIC-III clinical data from intensive care unit patients aged 18 years or older. The model uses 20 variables to produce hourly predictions of onset of sepsis, defined by international Sepsis-3 criteria. Predictive performance was externally validated using hold-out test data. Results: The algorithm---NAVOY Sepsis---uses 4 hours of input and can identify patients with high risk of developing sepsis, with high performance (area under the receiver operating characteristics curve 0.90; area under the precision-recall curve 0.62) for predictions up to 3 hours before sepsis onset. Conclusions: The prediction performance of NAVOY Sepsis was superior to that of existing sepsis early warning scoring systems and comparable with those of other prediction algorithms designed to predict sepsis onset. The algorithm has excellent predictive properties and uses variables that are routinely collected in intensive care units. ", doi="10.2196/28000", url="https://formative.jmir.org/2021/9/e28000", url="http://www.ncbi.nlm.nih.gov/pubmed/34591016" } @Article{info:doi/10.2196/28209, author="Mann, D. Kay and Good, M. Norm and Fatehi, Farhad and Khanna, Sankalp and Campbell, Victoria and Conway, Roger and Sullivan, Clair and Staib, Andrew and Joyce, Christopher and Cook, David", title="Predicting Patient Deterioration: A Review of Tools in the Digital Hospital Setting", journal="J Med Internet Res", year="2021", month="Sep", day="30", volume="23", number="9", pages="e28209", keywords="patient deterioration", keywords="early warning scores", keywords="digital tools", keywords="vital signs", keywords="electronic medical record", abstract="Background: Early warning tools identify patients at risk of deterioration in hospitals. Electronic medical records in hospitals offer real-time data and the opportunity to automate early warning tools and provide real-time, dynamic risk estimates. Objective: This review describes published studies on the development, validation, and implementation of tools for predicting patient deterioration in general wards in hospitals. Methods: An electronic database search of peer reviewed journal papers from 2008-2020 identified studies reporting the use of tools and algorithms for predicting patient deterioration, defined by unplanned transfer to the intensive care unit, cardiac arrest, or death. Studies conducted solely in intensive care units, emergency departments, or single diagnosis patient groups were excluded. Results: A total of 46 publications were eligible for inclusion. These publications were heterogeneous in design, setting, and outcome measures. Most studies were retrospective studies using cohort data to develop, validate, or statistically evaluate prediction tools. The tools consisted of early warning, screening, or scoring systems based on physiologic data, as well as more complex algorithms developed to better represent real-time data, deal with complexities of longitudinal data, and warn of deterioration risk earlier. Only a few studies detailed the results of the implementation of deterioration warning tools. Conclusions: Despite relative progress in the development of algorithms to predict patient deterioration, the literature has not shown that the deployment or implementation of such algorithms is reproducibly associated with improvements in patient outcomes. Further work is needed to realize the potential of automated predictions and update dynamic risk estimates as part of an operational early warning system for inpatient deterioration. ", doi="10.2196/28209", url="https://www.jmir.org/2021/9/e28209", url="http://www.ncbi.nlm.nih.gov/pubmed/34591017" } @Article{info:doi/10.2196/21810, author="Alaqra, Sarah Ala and Kane, Bridget and Fischer-H{\"u}bner, Simone", title="Machine Learning--Based Analysis of Encrypted Medical Data in the Cloud: Qualitative Study of Expert Stakeholders' Perspectives", journal="JMIR Hum Factors", year="2021", month="Sep", day="16", volume="8", number="3", pages="e21810", keywords="medical data analysis", keywords="encryption", keywords="privacy-enhancing technologies", keywords="machine learning", keywords="stakeholders", keywords="tradeoffs", keywords="perspectives", keywords="eHealth", keywords="interviews", abstract="Background: Third-party cloud-based data analysis applications are proliferating in electronic health (eHealth) because of the expertise offered and their monetary advantage. However, privacy and security are critical concerns when handling sensitive medical data in the cloud. Technical advances based on ``crypto magic'' in privacy-preserving machine learning (ML) enable data analysis in encrypted form for maintaining confidentiality. Such privacy-enhancing technologies (PETs) could be counterintuitive to relevant stakeholders in eHealth, which could in turn hinder adoption; thus, more attention is needed on human factors for establishing trust and transparency. Objective: The aim of this study was to analyze eHealth expert stakeholders' perspectives and the perceived tradeoffs in regard to data analysis on encrypted medical data in the cloud, and to derive user requirements for development of a privacy-preserving data analysis tool. Methods: We used semistructured interviews and report on 14 interviews with individuals having medical, technical, or research expertise in eHealth. We used thematic analysis for analyzing interview data. In addition, we conducted a workshop for eliciting requirements. Results: Our results show differences in the understanding of and in trusting the technology; caution is advised by technical experts, whereas patient safety assurances are required by medical experts. Themes were identified with general perspectives on data privacy and practices (eg, acceptance of using external services), as well as themes highlighting specific perspectives (eg, data protection drawbacks and concerns of the data analysis on encrypted data). The latter themes result in requiring assurances and conformance testing for trusting tools such as the proposed ML-based tool. Communicating privacy, and utility benefits and tradeoffs with stakeholders is essential for trust. Furthermore, stakeholders and their organizations share accountability of patient data. Finally, stakeholders stressed the importance of informing patients about the privacy of their data. Conclusions: Understanding the benefits and risks of using eHealth PETs is crucial, and collaboration among diverse stakeholders is essential. Assurances of the tool's privacy, accuracy, and patient safety should be in place for establishing trust of ML-based PETs, especially if used in the cloud. ", doi="10.2196/21810", url="https://humanfactors.jmir.org/2021/3/e21810", url="http://www.ncbi.nlm.nih.gov/pubmed/34528892" } @Article{info:doi/10.2196/28028, author="Lam, Carson and Tso, Foon Chak and Green-Saxena, Abigail and Pellegrini, Emily and Iqbal, Zohora and Evans, Daniel and Hoffman, Jana and Calvert, Jacob and Mao, Qingqing and Das, Ritankar", title="Semisupervised Deep Learning Techniques for Predicting Acute Respiratory Distress Syndrome From Time-Series Clinical Data: Model Development and Validation Study", journal="JMIR Form Res", year="2021", month="Sep", day="14", volume="5", number="9", pages="e28028", keywords="acute respiratory distress syndrome", keywords="COVID-19", keywords="semisupervised learning", keywords="deep learning", keywords="machine learning", keywords="algorithm", keywords="prediction", keywords="decision support", abstract="Background: A high number of patients who are hospitalized with COVID-19 develop acute respiratory distress syndrome (ARDS). Objective: In response to the need for clinical decision support tools to help manage the next pandemic during the early stages (ie, when limited labeled data are present), we developed machine learning algorithms that use semisupervised learning (SSL) techniques to predict ARDS development in general and COVID-19 populations based on limited labeled data. Methods: SSL techniques were applied to 29,127 encounters with patients who were admitted to 7 US hospitals from May 1, 2019, to May 1, 2021. A recurrent neural network that used a time series of electronic health record data was applied to data that were collected when a patient's peripheral oxygen saturation level fell below the normal range (<97\%) to predict the subsequent development of ARDS during the remaining duration of patients' hospital stay. Model performance was assessed with the area under the receiver operating characteristic curve and area under the precision recall curve of an external hold-out test set. Results: For the whole data set, the median time between the first peripheral oxygen saturation measurement of <97\% and subsequent respiratory failure was 21 hours. The area under the receiver operating characteristic curve for predicting subsequent ARDS development was 0.73 when the model was trained on a labeled data set of 6930 patients, 0.78 when the model was trained on the labeled data set that had been augmented with the unlabeled data set of 16,173 patients by using SSL techniques, and 0.84 when the model was trained on the entire training set of 23,103 labeled patients. Conclusions: In the context of using time-series inpatient data and a careful model training design, unlabeled data can be used to improve the performance of machine learning models when labeled data for predicting ARDS development are scarce or expensive. ", doi="10.2196/28028", url="https://formative.jmir.org/2021/9/e28028", url="http://www.ncbi.nlm.nih.gov/pubmed/34398784" } @Article{info:doi/10.2196/27098, author="Liu, Yi-Shiuan and Yang, Chih-Yu and Chiu, Ping-Fang and Lin, Hui-Chu and Lo, Chung-Chuan and Lai, Szu-Han Alan and Chang, Chia-Chu and Lee, Kuang-Sheng Oscar", title="Machine Learning Analysis of Time-Dependent Features for Predicting Adverse Events During Hemodialysis Therapy: Model Development and Validation Study", journal="J Med Internet Res", year="2021", month="Sep", day="7", volume="23", number="9", pages="e27098", keywords="hemodialysis", keywords="intradialytic adverse events", keywords="prediction algorithm", keywords="machine learning", abstract="Background: Hemodialysis (HD) therapy is an indispensable tool used in critical care management. Patients undergoing HD are at risk for intradialytic adverse events, ranging from muscle cramps to cardiac arrest. So far, there is no effective HD device--integrated algorithm to assist medical staff in response to these adverse events a step earlier during HD. Objective: We aimed to develop machine learning algorithms to predict intradialytic adverse events in an unbiased manner. Methods: Three-month dialysis and physiological time-series data were collected from all patients who underwent maintenance HD therapy at a tertiary care referral center. Dialysis data were collected automatically by HD devices, and physiological data were recorded by medical staff. Intradialytic adverse events were documented by medical staff according to patient complaints. Features extracted from the time series data sets by linear and differential analyses were used for machine learning to predict adverse events during HD. Results: Time series dialysis data were collected during the 4-hour HD session in 108 patients who underwent maintenance HD therapy. There were a total of 4221 HD sessions, 406 of which involved at least one intradialytic adverse event. Models were built by classification algorithms and evaluated by four-fold cross-validation. The developed algorithm predicted overall intradialytic adverse events, with an area under the curve (AUC) of 0.83, sensitivity of 0.53, and specificity of 0.96. The algorithm also predicted muscle cramps, with an AUC of 0.85, and blood pressure elevation, with an AUC of 0.93. In addition, the model built based on ultrafiltration-unrelated features predicted all types of adverse events, with an AUC of 0.81, indicating that ultrafiltration-unrelated factors also contribute to the onset of adverse events. Conclusions: Our results demonstrated that algorithms combining linear and differential analyses with two-class classification machine learning can predict intradialytic adverse events in quasi-real time with high AUCs. Such a methodology implemented with local cloud computation and real-time optimization by personalized HD data could warn clinicians to take timely actions in advance. ", doi="10.2196/27098", url="https://www.jmir.org/2021/9/e27098", url="http://www.ncbi.nlm.nih.gov/pubmed/34491204" } @Article{info:doi/10.2196/28245, author="Hettiachchi, Danula and Hayes, Lachie and Goncalves, Jorge and Kostakos, Vassilis", title="Team Dynamics in Hospital Workflows: An Exploratory Study of a Smartphone Task Manager", journal="JMIR Med Inform", year="2021", month="Aug", day="16", volume="9", number="8", pages="e28245", keywords="task assignment", keywords="smartphones", keywords="hospital communication", keywords="clinical workflows", keywords="mobile app", keywords="clinical platform", keywords="mHealth", abstract="Background: Although convenient and reliable modern messaging apps like WhatsApp enable efficient communication among hospital staff, hospitals are now pivoting toward purpose-built structured communication apps for various reasons, including security and privacy concerns. However, there is limited understanding of how we can examine and improve hospital workflows using the data collected through such apps as an alternative to costly and challenging research methods like ethnography and patient record analysis. Objective: We seek to identify whether the structure of the collected communication data provides insights into hospitals' workflows. Our analysis also aims to identify ways in which task management platforms can be improved and designed to better support clinical workflows. Methods: We present an exploratory analysis of clinical task records collected over 22 months through a smartphone app that enables structured communication between staff to manage and execute clinical workflows. We collected over 300,000 task records between July 2018 and May 2020 completed by staff members including doctors, nurses, and pharmacists across all wards in an Australian hospital. Results: We show that important insights into how teams function in a clinical setting can be readily drawn from task assignment data. Our analysis indicates that predefined labels such as urgency and task type are important and impact how tasks are accepted and completed. Our results show that both task sent-to-accepted (P<.001) and sent-to-completed (P<.001) times are significantly higher for routine tasks when compared to urgent tasks. We also show how task acceptance varies across teams and roles and that internal tasks are more efficiently managed than external tasks, possibly due to increased trust among team members. For example, task sent-to-accepted time (minutes) is significantly higher (P<.001) for external assignments (mean 22.10, SD 91.45) when compared to internal assignments (mean 19.03, SD 82.66). Conclusions: Smartphone-based task assignment apps can provide unique insights into team dynamics in clinical settings. These insights can be used to further improve how well these systems support clinical work and staff. ", doi="10.2196/28245", url="https://medinform.jmir.org/2021/8/e28245", url="http://www.ncbi.nlm.nih.gov/pubmed/34398797" } @Article{info:doi/10.2196/28287, author="Zhang, Xiaoyi and Luo, Gang", title="Ranking Rule-Based Automatic Explanations for Machine Learning Predictions on Asthma Hospital Encounters in Patients With Asthma: Retrospective Cohort Study", journal="JMIR Med Inform", year="2021", month="Aug", day="11", volume="9", number="8", pages="e28287", keywords="asthma", keywords="clinical decision support", keywords="machine learning", keywords="patient care management", keywords="forecasting", abstract="Background: Asthma hospital encounters impose a heavy burden on the health care system. To improve preventive care and outcomes for patients with asthma, we recently developed a black-box machine learning model to predict whether a patient with asthma will have one or more asthma hospital encounters in the succeeding 12 months. Our model is more accurate than previous models. However, black-box machine learning models do not explain their predictions, which forms a barrier to widespread clinical adoption. To solve this issue, we previously developed a method to automatically provide rule-based explanations for the model's predictions and to suggest tailored interventions without sacrificing model performance. For an average patient correctly predicted by our model to have future asthma hospital encounters, our explanation method generated over 5000 rule-based explanations, if any. However, the user of the automated explanation function, often a busy clinician, will want to quickly obtain the most useful information for a patient by viewing only the top few explanations. Therefore, a methodology is required to appropriately rank the explanations generated for a patient. However, this is currently an open problem. Objective: The aim of this study is to develop a method to appropriately rank the rule-based explanations that our automated explanation method generates for a patient. Methods: We developed a ranking method that struck a balance among multiple factors. Through a secondary analysis of 82,888 data instances of adults with asthma from the University of Washington Medicine between 2011 and 2018, we demonstrated our ranking method on the test case of predicting asthma hospital encounters in patients with asthma. Results: For each patient predicted to have asthma hospital encounters in the succeeding 12 months, the top few explanations returned by our ranking method typically have high quality and low redundancy. Many top-ranked explanations provide useful insights on the various aspects of the patient's situation, which cannot be easily obtained by viewing the patient's data in the current electronic health record system. Conclusions: The explanation ranking module is an essential component of the automated explanation function, and it addresses the interpretability issue that deters the widespread adoption of machine learning predictive models in clinical practice. In the next few years, we plan to test our explanation ranking method on predictive modeling problems addressing other diseases as well as on data from other health care systems. International Registered Report Identifier (IRRID): RR2-10.2196/5039 ", doi="10.2196/28287", url="https://medinform.jmir.org/2021/8/e28287", url="http://www.ncbi.nlm.nih.gov/pubmed/34383673" } @Article{info:doi/10.2196/17971, author="Oxholm, Christina and Christensen, Soendergaard Anne-Marie and Christiansen, Regina and Wiil, Kock Uffe and Nielsen, S{\o}gaard Anette", title="Attitudes of Patients and Health Professionals Regarding Screening Algorithms: Qualitative Study", journal="JMIR Form Res", year="2021", month="Aug", day="9", volume="5", number="8", pages="e17971", keywords="screening", keywords="algorithms", keywords="alcohol", keywords="qualitative study", keywords="attitudes", keywords="opinions", keywords="patients", keywords="health professionals", abstract="Background: As a preamble to an attempt to develop a tool that can aid health professionals at hospitals in identifying whether the patient may have an alcohol abuse problem, this study investigates opinions and attitudes among both health professionals and patients about using patient data from electronic health records (EHRs) in an algorithm screening for alcohol problems. Objective: The aim of this study was to investigate the attitudes and opinions of patients and health professionals at hospitals regarding the use of previously collected data in developing and implementing an algorithmic helping tool in EHR for screening inexpedient alcohol habits; in addition, the study aims to analyze how patients would feel about asking and being asked about alcohol by staff, based on a notification in the EHR from such a tool. Methods: Using semistructured interviews, we interviewed 9 health professionals and 5 patients to explore their opinions and attitudes about an algorithm-based helping tool and about asking and being asked about alcohol usage when being given a reminder from this type of tool. The data were analyzed using an ad hoc method consistent with a close reading and meaning condensing. Results: The health professionals were both positive and negative about a helping tool grounded in algorithms. They were optimistic about the potential of such a tool to save some time by providing a quick overview if it was easy to use but, on the negative side, noted that this type of helping tool might take away the professionals' instinct. The patients were overall positive about the helping tool, stating that they would find this tool beneficial for preventive care. Some of the patients expressed concerns that the information provided by the tool could be misused. Conclusions: When developing and implementing an algorithmic helping tool, the following aspects should be considered: (1) making the helping tool as transparent in its recommendations as possible, avoiding black boxing, and ensuring room for professional discretion in clinical decision making; and (2) including and taking into account the attitudes and opinions of patients and health professionals in the design and development process of such an algorithmic helping tool. ", doi="10.2196/17971", url="https://formative.jmir.org/2021/8/e17971", url="http://www.ncbi.nlm.nih.gov/pubmed/34383666" } @Article{info:doi/10.2196/25531, author="Sch{\"u}ttler, Christina and Prokosch, Hans-Ulrich and Sedlmayr, Martin and Sedlmayr, Brita", title="Evaluation of Three Feasibility Tools for Identifying Patient Data and Biospecimen Availability: Comparative Usability Study", journal="JMIR Med Inform", year="2021", month="Jul", day="21", volume="9", number="7", pages="e25531", keywords="software tools", keywords="user interface", keywords="feasibility", keywords="evaluation", keywords="research", abstract="Background: To meet the growing importance of real-word data analysis, clinical data and biosamples must be timely made available. Feasibility platforms are often the first contact point for determining the availability of such data for specific research questions. Therefore, a user-friendly interface should be provided to enable access to this information easily. The German Medical Informatics Initiative also aims to establish such a platform for its infrastructure. Although some of these platforms are actively used, their tools still have limitations. Consequently, the Medical Informatics Initiative consortium MIRACUM (Medical Informatics in Research and Care in University Medicine) committed itself to analyzing the pros and cons of existing solutions and to designing an optimized graphical feasibility user interface. Objective: The aim of this study is to identify the system that is most user-friendly and thus forms the best basis for developing a harmonized tool. To achieve this goal, we carried out a comparative usability evaluation of existing tools used by researchers acting as end users. Methods: The evaluation included three preselected search tools and was conducted as a qualitative exploratory study with a randomized design over a period of 6 weeks. The tools in question were the MIRACUM i2b2 (Informatics for Integrating Biology and the Bedside) feasibility platform, OHDSI's (Observational Health Data Sciences and Informatics) ATLAS, and the Sample Locator of the German Biobank Alliance. The evaluation was conducted in the form of a web-based usability test (usability walkthrough combined with a web-based questionnaire) with participants aged between 26 and 63 years who work as medical doctors. Results: In total, 17 study participants evaluated the three tools. The overall evaluation of usability, which was based on the System Usability Scale, showed that the Sample Locator, with a mean System Usability Scale score of 77.03 (SD 20.62), was significantly superior to the other two tools (Wilcoxon test; Sample Locator vs i2b2: P=.047; Sample Locator vs ATLAS: P=.001). i2b2, with a score of 59.83 (SD 25.36), performed significantly better than ATLAS, which had a score of 27.81 (SD 21.79; Wilcoxon test; i2b2 vs ATLAS: P=.005). The analysis of the material generated by the usability walkthrough method confirmed these findings. ATLAS caused the most usability problems (n=66), followed by i2b2 (n=48) and the Sample Locator (n=22). Moreover, the Sample Locator achieved the highest ratings with respect to additional questions regarding satisfaction with the tools. Conclusions: This study provides data to develop a suitable basis for the selection of a harmonized tool for feasibility studies via concrete evaluation and a comparison of the usability of three different types of query builders. The feedback obtained from the participants during the usability test made it possible to identify user problems and positive design aspects of the individual tools and compare them qualitatively. ", doi="10.2196/25531", url="https://medinform.jmir.org/2021/7/e25531", url="http://www.ncbi.nlm.nih.gov/pubmed/34287211" } @Article{info:doi/10.2196/25913, author="Verdonck, Micha{\"e}l and Carvalho, Hugo and Berghmans, Johan and Forget, Patrice and Poelaert, Jan", title="Exploratory Outlier Detection for Acceleromyographic Neuromuscular Monitoring: Machine Learning Approach", journal="J Med Internet Res", year="2021", month="Jun", day="21", volume="23", number="6", pages="e25913", keywords="neuromuscular monitoring", keywords="outlier analysis", keywords="acceleromyography", keywords="postoperative residual curarization", keywords="train-of-four", keywords="monitoring devices", keywords="neuromuscular", keywords="machine learning", keywords="monitors", keywords="anesthesiology", abstract="Background: Perioperative quantitative monitoring of neuromuscular function in patients receiving neuromuscular blockers hasbecome internationally recognized as an absolute and core necessity in modern anesthesia care. Because of their kinetic nature, artifactual recordings of acceleromyography-based neuromuscular monitoring devices are not unusual. These generate a great deal of cynicism among anesthesiologists, constituting an obstacle toward their widespread adoption. Through outlier analysis techniques, monitoring devices can learn to detect and flag signal abnormalities. Outlier analysis (or anomaly detection) refers to the problem of finding patterns in data that do not conform to expected behavior. Objective: This study was motivated by the development of a smartphone app intended for neuromuscular monitoring based on combined accelerometric and angular hand movement data. During the paired comparison stage of this app against existing acceleromyography monitoring devices, it was noted that the results from both devices did not always concur. This study aims to engineer a set of features that enable the detection of outliers in the form of erroneous train-of-four (TOF) measurements from an acceleromyographic-based device. These features are tested for their potential in the detection of erroneous TOF measurements by developing an outlier detection algorithm. Methods: A data set encompassing 533 high-sensitivity TOF measurements from 35 patients was created based on a multicentric open label trial of a purpose-built accelero- and gyroscopic-based neuromuscular monitoring app. A basic set of features was extracted based on raw data while a second set of features was purpose engineered based on TOF pattern characteristics. Two cost-sensitive logistic regression (CSLR) models were deployed to evaluate the performance of these features. The final output of the developed models was a binary classification, indicating if a TOF measurement was an outlier or not. Results: A total of 7 basic features were extracted based on raw data, while another 8 features were engineered based on TOF pattern characteristics. The model training and testing were based on separate data sets: one with 319 measurements (18 outliers) and a second with 214 measurements (12 outliers). The F1 score (95\% CI) was 0.86 (0.48-0.97) for the CSLR model with engineered features, significantly larger than the CSLR model with the basic features (0.29 [0.17-0.53]; P<.001). Conclusions: The set of engineered features and their corresponding incorporation in an outlier detection algorithm have the potential to increase overall neuromuscular monitoring data consistency. Integrating outlier flagging algorithms within neuromuscular monitors could potentially reduce overall acceleromyography-based reliability issues. Trial Registration: ClinicalTrials.gov NCT03605225; https://clinicaltrials.gov/ct2/show/NCT03605225 ", doi="10.2196/25913", url="https://www.jmir.org/2021/6/e25913/", url="http://www.ncbi.nlm.nih.gov/pubmed/34152273" } @Article{info:doi/10.2196/26448, author="Ferreira, Ferraz Gabriel and Quiles, Gon{\c{c}}alves Marcos and Nazar{\'e}, Santana Tiago and Rezende, Oliveira Solange and Demarzo, Marcelo", title="Automation of Article Selection Process in Systematic Reviews Through Artificial Neural Network Modeling and Machine Learning: Protocol for an Article Selection Model", journal="JMIR Res Protoc", year="2021", month="Jun", day="15", volume="10", number="6", pages="e26448", keywords="deep learning", keywords="machine learning", keywords="systematic review", keywords="mindfulness", abstract="Background: A systematic review can be defined as a summary of the evidence found in the literature via a systematic search in the available scientific databases. One of the steps involved is article selection, which is typically a laborious task. Machine learning and artificial intelligence can be important tools in automating this step, thus aiding researchers. Objective: The aim of this study is to create models based on an artificial neural network system to automate the article selection process in systematic reviews related to ``Mindfulness and Health Promotion.'' Methods: The study will be performed using Python programming software. The system will consist of six main steps: (1) data import, (2) exclusion of duplicates, (3) exclusion of non-articles, (4) article reading and model creation using artificial neural network, (5) comparison of the models, and (6) system sharing. We will choose the 10 most relevant systematic reviews published in the fields of ``Mindfulness and Health Promotion'' and ``Orthopedics'' (control group) to serve as a test of the effectiveness of the article selection. Results: Data collection will begin in July 2021, with completion scheduled for December 2021, and final publication available in March 2022. Conclusions: An automated system with a modifiable sensitivity will be created to select scientific articles in systematic review that can be expanded to various fields. We will disseminate our results and models through the ``Observatory of Evidence'' in public health, an open and online platform that will assist researchers in systematic reviews. International Registered Report Identifier (IRRID): PRR1-10.2196/26448 ", doi="10.2196/26448", url="https://www.researchprotocols.org/2021/6/e26448", url="http://www.ncbi.nlm.nih.gov/pubmed/34128820" } @Article{info:doi/10.2196/25560, author="Surodina, Svitlana and Lam, Ching and Grbich, Svetislav and Milne-Ives, Madison and van Velthoven, Michelle and Meinert, Edward", title="Machine Learning for Risk Group Identification and User Data Collection in a Herpes Simplex Virus Patient Registry: Algorithm Development and Validation Study", journal="JMIRx Med", year="2021", month="Jun", day="11", volume="2", number="2", pages="e25560", keywords="data collection", keywords="herpes simplex virus", keywords="registries", keywords="machine learning", keywords="risk assessment", keywords="artificial intelligence", keywords="medical information system", keywords="user-centered design", keywords="predictor", keywords="risk", abstract="Background: Researching people with herpes simplex virus (HSV) is challenging because of poor data quality, low user engagement, and concerns around stigma and anonymity. Objective: This project aimed to improve data collection for a real-world HSV registry by identifying predictors of HSV infection and selecting a limited number of relevant questions to ask new registry users to determine their level of HSV infection risk. Methods: The US National Health and Nutrition Examination Survey (NHANES, 2015-2016) database includes the confirmed HSV type 1 and type 2 (HSV-1 and HSV-2, respectively) status of American participants (14-49 years) and a wealth of demographic and health-related data. The questionnaires and data sets from this survey were used to form two data sets: one for HSV-1 and one for HSV-2. These data sets were used to train and test a model that used a random forest algorithm (devised using Python) to minimize the number of anonymous lifestyle-based questions needed to identify risk groups for HSV. Results: The model selected a reduced number of questions from the NHANES questionnaire that predicted HSV infection risk with high accuracy scores of 0.91 and 0.96 and high recall scores of 0.88 and 0.98 for the HSV-1 and HSV-2 data sets, respectively. The number of questions was reduced from 150 to an average of 40, depending on age and gender. The model, therefore, provided high predictability of risk of infection with minimal required input. Conclusions: This machine learning algorithm can be used in a real-world evidence registry to collect relevant lifestyle data and identify individuals' levels of risk of HSV infection. A limitation is the absence of real user data and integration with electronic medical records, which would enable model learning and improvement. Future work will explore model adjustments, anonymization options, explicit permissions, and a standardized data schema that meet the General Data Protection Regulation, Health Insurance Portability and Accountability Act, and third-party interface connectivity requirements. ", doi="10.2196/25560", url="https://xmed.jmir.org/2021/2/e25560", url="http://www.ncbi.nlm.nih.gov/pubmed/37725536" } @Article{info:doi/10.2196/26598, author="Cha, Dongchul and Sung, MinDong and Park, Yu-Rang", title="Implementing Vertical Federated Learning Using Autoencoders: Practical Application, Generalizability, and Utility Study", journal="JMIR Med Inform", year="2021", month="Jun", day="9", volume="9", number="6", pages="e26598", keywords="federated learning", keywords="vertically incomplete data", keywords="privacy", keywords="machine learning", keywords="coding", keywords="data", keywords="performance", keywords="model", keywords="security", keywords="training", keywords="dataset", keywords="unsupervised learning", keywords="data sharing", keywords="protection", abstract="Background: Machine learning (ML) is now widely deployed in our everyday lives. Building robust ML models requires a massive amount of data for training. Traditional ML algorithms require training data centralization, which raises privacy and data governance issues. Federated learning (FL) is an approach to overcome this issue. We focused on applying FL on vertically partitioned data, in which an individual's record is scattered among different sites. Objective: The aim of this study was to perform FL on vertically partitioned data to achieve performance comparable to that of centralized models without exposing the raw data. Methods: We used three different datasets (Adult income, Schwannoma, and eICU datasets) and vertically divided each dataset into different pieces. Following the vertical division of data, overcomplete autoencoder-based model training was performed for each site. Following training, each site's data were transformed into latent data, which were aggregated for training. A tabular neural network model with categorical embedding was used for training. A centrally based model was used as a baseline model, which was compared to that of FL in terms of accuracy and area under the receiver operating characteristic curve (AUROC). Results: The autoencoder-based network successfully transformed the original data into latent representations with no domain knowledge applied. These altered data were different from the original data in terms of the feature space and data distributions, indicating appropriate data security. The loss of performance was minimal when using an overcomplete autoencoder; accuracy loss was 1.2\%, 8.89\%, and 1.23\%, and AUROC loss was 1.1\%, 0\%, and 1.12\% in the Adult income, Schwannoma, and eICU dataset, respectively. Conclusions: We proposed an autoencoder-based ML model for vertically incomplete data. Since our model is based on unsupervised learning, no domain-specific knowledge is required in individual sites. Under the circumstances where direct data sharing is not available, our approach may be a practical solution enabling both data protection and building a robust model. ", doi="10.2196/26598", url="https://medinform.jmir.org/2021/6/e26598", url="http://www.ncbi.nlm.nih.gov/pubmed/34106083" } @Article{info:doi/10.2196/28902, author="Sasaki, Keisuke and Fujishige, Yuki and Kikuchi, Yutaka and Odagaki, Masato", title="A Transcranial Magnetic Stimulation Trigger System for Suppressing Motor-Evoked Potential Fluctuation Using Electroencephalogram Coherence Analysis: Algorithm Development and Validation Study", journal="JMIR Biomed Eng", year="2021", month="Jun", day="7", volume="6", number="2", pages="e28902", keywords="motor-evoked potential", keywords="transcranial magnetic stimulation", keywords="electroencephalogram", keywords="coherence", keywords="variability", keywords="fluctuation", keywords="trigger", keywords="threshold", keywords="coefficient of variation", keywords="primary motor cortex", abstract="Background: Transcranial magnetic stimulation (TMS), when applied over the primary motor cortex, elicits a motor-evoked potential (MEP) in electromyograms measured from peripheral muscles. MEP amplitude has often been observed to fluctuate trial to trial, even with a constant stimulus. Many factors cause MEP fluctuations in TMS. One of the primary factors is the weak stationarity and instability of cortical activity in the brain, from which we assumed MEP fluctuations originate. We hypothesized that MEP fluctuations are suppressed when TMS is delivered to the primary motor cortex at a time when several electroencephalogram (EEG) channels measured on the scalp are highly similar in the frequency domain. Objective: We developed a TMS triggering system to suppress MEP fluctuations using EEG coherence analysis, which was performed to detect the EEG signal similarity between the 2 channels in the frequency domain. Methods: Seven healthy adults participated in the experiment to confirm whether the TMS trigger system works adequately, and the mean amplitude and coefficient of the MEP variation were recorded and compared with the values obtained during the control task. We also determined the experimental time under each condition and verified whether it was within the predicted time. Results: The coefficient of variation of MEP amplitude decreased in 5 of the 7 participants, and significant differences (P=.02) were confirmed in 2 of the participants according to an F test. The coefficient of variation of the experimental time required for each stimulus after threshold modification was less than that without threshold modification, and a significant difference (P<.001) was confirmed by performing an F test. Conclusions: We found that MEP could be suppressed using the system developed in this study and that the TMS trigger system could also stabilize the experimental time by changing the triggering threshold automatically. ", doi="10.2196/28902", url="https://biomedeng.jmir.org/2021/2/e28902" } @Article{info:doi/10.2196/17056, author="Balikuddembe, S. Michael and Wakholi, K. Peter and Tumwesigye, M. Nazarius and Tylleskar, Thorkild", title="An Algorithm (LaD) for Monitoring Childbirth in Settings Where Tracking All Parameters in the World Health Organization Partograph Is Not Feasible: Design and Expert Validation", journal="JMIR Med Inform", year="2021", month="May", day="27", volume="9", number="5", pages="e17056", keywords="algorithm", keywords="software validation", keywords="childbirth monitoring", keywords="WHO partograph", abstract="Background: After determining the key childbirth monitoring items from experts, we designed an algorithm (LaD) to represent the experts' suggestions and validated it. In this paper we describe an abridged algorithm for labor and delivery management and use theoretical case to compare its performance with human childbirth experts. Objective: The objective of this study was to describe the LaD algorithm, its development, and its validation. In addition, in the validation phase we wanted to assess if the algorithm was inferior, equivalent, or superior to human experts in recommending the necessary clinical actions during childbirth decision making. Methods: The LaD algorithm encompasses the tracking of 6 of the 12 childbirth parameters monitored using the World Health Organization (WHO) partograph. It has recommendations on how to manage a patient when parameters are outside the normal ranges. We validated the algorithm with purposively selected experts selecting actions for a stratified sample of patient case scenarios. The experts' selections were compared to obtain pairwise sensitivity and false-positive rates (FPRs) between them and the algorithm. Results: The mean weighted pairwise sensitivity among experts was 68.2\% (SD 6.95; 95\% CI 59.6-76.8), whereas that between experts and the LaD algorithm was 69.4\% (SD 17.95; 95\% CI 47.1-91.7). The pairwise FPR among the experts ranged from 12\% to 33\% with a mean of 23.9\% (SD 9.14; 95\% CI 12.6-35.2), whereas that between experts and the algorithm ranged from 18\% to 43\% (mean 26.3\%; SD 10.4; 95\% CI 13.3-39.3). The was a correlation (mean 0.67 [SD 0.06]) in the actions selected by the expert pairs for the different patient cases with a reliability coefficient ($\alpha$) of .91. Conclusions: The LaD algorithm was more sensitive, but had a higher FPR than the childbirth experts, although the differences were not statistically significant. An electronic tool for childbirth monitoring with fewer WHO-recommended parameters may not be inferior to human experts in labor and delivery clinical decision support. ", doi="10.2196/17056", url="https://medinform.jmir.org/2021/5/e17056", url="http://www.ncbi.nlm.nih.gov/pubmed/34042599" } @Article{info:doi/10.2196/25520, author="McMurray, Josephine and Levy, AnneMarie and Holyoke, Paul", title="Psychometric Evaluation and Workflow Integration Study of a Tablet-Based Tool to Detect Mild Cognitive Impairment in Older Adults: Protocol for a Mixed Methods Study", journal="JMIR Res Protoc", year="2021", month="May", day="21", volume="10", number="5", pages="e25520", keywords="cognitive dysfunction", keywords="dementia", keywords="neuropsychological tests", keywords="evaluation study", keywords="technology", keywords="aged", keywords="primary health care", abstract="Background: With the rapid aging of the global population, experts anticipate a surge in the prevalence of mild cognitive impairment (MCI) and dementia worldwide. It is argued that developing more sensitive, easy to administer, and valid MCI screening tools for use in primary care settings may initiate timely clinical and personal care planning and treatment, enabling early access to programs and services. Including functional competence measures in screening tests makes them more ecologically valid and may help to identify cognitive deficits at an earlier stage. Objective: We aim to conduct a preliminary evaluative study comparing the sensitivity, specificity, and reliability of the BrainFx Screen (referred to as SCREEN hereafter), a novel digital tool designed to assess functional competence and detect early signs of cognitive impairment, with the Quick Mild Cognitive Impairment, a validated and highly sensitive tool that detects MCI in the older adult population. We will also investigate the perceived usefulness and integration of the SCREEN into primary care practice to identify demonstrable impacts on clinical workflow and health care providers' (HCP) perceptions of its success as a screening tool. Patients' perceptions of completing the SCREEN and its impact on their quality of life will also be explored. Methods: This study has a concurrent, mixed methods, prospective, and quasi-experimental design. Participants will be recruited from 5 primary care family health teams (FHTs; defined by multidisciplinary practice and capitated funding) across southwestern Ontario, Canada. Participants will include HCPs, patients, care partners, and FHT administrative executives. Patients 55 years and older with no history of diagnoses for MCI, dementia, or Alzheimer disease rostered in one of the FHTs participating in the study will be eligible to participate. Their care partners will help triangulate the qualitative data collected from patients. Participating FHTs will identify an occupational therapist from their site to participate in the study; this HCP will both administer the research protocol and participate in semistructured in-depth interviews and questionnaires. Principal component analysis will be conducted on the SCREEN data to understand the test components better. Tests comparing sensitivity, specificity, and test-retest reliability will assess the validity of SCREEN as a screening tool for MCI. Results: This paper describes the study protocol and its activities to date. Data collection was halted early because of COVID-19 restrictions on research activity, and data analysis is currently in progress. Conclusions: At the end of the project, we anticipate having an initial comparative evaluation of the SCREEN as a tool for early detection of MCI in primary care older adult patient populations. Resource constraints on this research study limit our ability to conduct a randomized controlled trial; however, the results will assist developers of the SCREEN in determining whether rigorous controlled testing is warranted. International Registered Report Identifier (IRRID): DERR1-10.2196/25520 ", doi="10.2196/25520", url="https://www.researchprotocols.org/2021/5/e25520", url="http://www.ncbi.nlm.nih.gov/pubmed/34018966" } @Article{info:doi/10.2196/22664, author="Li, Lei and Zhu, Haogang and Zhang, Zhenyu and Zhao, Liang and Xu, Liang and Jonas, A. Rahul and Garway-Heath, F. David and Jonas, B. Jost and Wang, Xing Ya", title="Neural Network--Based Retinal Nerve Fiber Layer Profile Compensation for Glaucoma Diagnosis in Myopia: Model Development and Validation", journal="JMIR Med Inform", year="2021", month="May", day="18", volume="9", number="5", pages="e22664", keywords="retinal nerve fiber layer thickness", keywords="radial basis neural network", keywords="neural network", keywords="glaucoma", keywords="optic nerve head", keywords="optical coherence tomography", keywords="myopia", keywords="optic nerve", abstract="Background: Due to the axial elongation--associated changes in the optic nerve and retina in high myopia, traditional methods like optic disc evaluation and visual field are not able to correctly differentiate glaucomatous lesions. It has been clinically challenging to detect glaucoma in highly myopic eyes. Objective: This study aimed to develop a neural network to adjust for the dependence of the peripapillary retinal nerve fiber layer (RNFL) thickness (RNFLT) profile on age, gender, and ocular biometric parameters and to evaluate the network's performance for glaucoma diagnosis, especially in high myopia. Methods: RNFLT with 768 points on the circumferential 3.4-mm scan was measured using spectral-domain optical coherence tomography. A fully connected network and a radial basis function network were trained for vertical (scaling) and horizontal (shift) transformation of the RNFLT profile with adjustment for age, axial length (AL), disc-fovea angle, and distance in a test group of 2223 nonglaucomatous eyes. The performance of RNFLT compensation was evaluated in an independent group of 254 glaucoma patients and 254 nonglaucomatous participants. Results: By applying the RNFL compensation algorithm, the area under the receiver operating characteristic curve for detecting glaucoma increased from 0.70 to 0.84, from 0.75 to 0.89, from 0.77 to 0.89, and from 0.78 to 0.87 for eyes in the highest 10\% percentile subgroup of the AL distribution (mean 26.0, SD 0.9 mm), highest 20\% percentile subgroup of the AL distribution (mean 25.3, SD 1.0 mm), highest 30\% percentile subgroup of the AL distribution (mean 24.9, SD 1.0 mm), and any AL (mean 23.5, SD 1.2 mm), respectively, in comparison with unadjusted RNFLT. The difference between uncompensated and compensated RNFLT values increased with longer axial length, with enlargement of 19.8\%, 18.9\%, 16.2\%, and 11.3\% in the highest 10\% percentile subgroup, highest 20\% percentile subgroup, highest 30\% percentile subgroup, and all eyes, respectively. Conclusions: In a population-based study sample, an algorithm-based adjustment for age, gender, and ocular biometric parameters improved the diagnostic precision of the RNFLT profile for glaucoma detection particularly in myopic and highly myopic eyes. ", doi="10.2196/22664", url="https://medinform.jmir.org/2021/5/e22664", url="http://www.ncbi.nlm.nih.gov/pubmed/34003137" } @Article{info:doi/10.2196/27172, author="Churov{\'a}, Vendula and Vy{\vs}kovsk{\'y}, Roman and Mar{\vs}{\'a}lov{\'a}, Kate?ina and Kudl{\'a}{\v c}ek, David and Schwarz, Daniel", title="Anomaly Detection Algorithm for Real-World Data and Evidence in Clinical Research: Implementation, Evaluation, and Validation Study", journal="JMIR Med Inform", year="2021", month="May", day="7", volume="9", number="5", pages="e27172", keywords="clinical research data", keywords="real-world evidence", keywords="registry database", keywords="data quality", keywords="EDC system", keywords="anomaly detection", abstract="Background: Statistical analysis, which has become an integral part of evidence-based medicine, relies heavily on data quality that is of critical importance in modern clinical research. Input data are not only at risk of being falsified or fabricated, but also at risk of being mishandled by investigators. Objective: The urgent need to assure the highest data quality possible has led to the implementation of various auditing strategies designed to monitor clinical trials and detect errors of different origin that frequently occur in the field. The objective of this study was to describe a machine learning--based algorithm to detect anomalous patterns in data created as a consequence of carelessness, systematic error, or intentionally by entering fabricated values. Methods: A particular electronic data capture (EDC) system, which is used for data management in clinical registries, is presented including its architecture and data structure. This EDC system features an algorithm based on machine learning designed to detect anomalous patterns in quantitative data. The detection algorithm combines clustering with a series of 7 distance metrics that serve to determine the strength of an anomaly. For the detection process, the thresholds and combinations of the metrics were used and the detection performance was evaluated and validated in the experiments involving simulated anomalous data and real-world data. Results: Five different clinical registries related to neuroscience were presented---all of them running in the given EDC system. Two of the registries were selected for the evaluation experiments and served also to validate the detection performance on an independent data set. The best performing combination of the distance metrics was that of Canberra, Manhattan, and Mahalanobis, whereas Cosine and Chebyshev metrics had been excluded from further analysis due to the lowest performance when used as single distance metric--based classifiers. Conclusions: The experimental results demonstrate that the algorithm is universal in nature, and as such may be implemented in other EDC systems, and is capable of anomalous data detection with a sensitivity exceeding 85\%. ", doi="10.2196/27172", url="https://medinform.jmir.org/2021/5/e27172", url="http://www.ncbi.nlm.nih.gov/pubmed/33851576" } @Article{info:doi/10.2196/25714, author="Vaghela, Uddhav and Rabinowicz, Simon and Bratsos, Paris and Martin, Guy and Fritzilas, Epameinondas and Markar, Sheraz and Purkayastha, Sanjay and Stringer, Karl and Singh, Harshdeep and Llewellyn, Charlie and Dutta, Debabrata and Clarke, M. Jonathan and Howard, Matthew and and Serban, Ovidiu and Kinross, James", title="Using a Secure, Continually Updating, Web Source Processing Pipeline to Support the Real-Time Data Synthesis and Analysis of Scientific Literature: Development and Validation Study", journal="J Med Internet Res", year="2021", month="May", day="6", volume="23", number="5", pages="e25714", keywords="structured data synthesis", keywords="data science", keywords="critical analysis", keywords="web crawl data", keywords="pipeline", keywords="database", keywords="literature", keywords="research", keywords="COVID-19", keywords="infodemic", keywords="decision making", keywords="data", keywords="data synthesis", keywords="misinformation", keywords="infrastructure", keywords="methodology", abstract="Background: The scale and quality of the global scientific response to the COVID-19 pandemic have unquestionably saved lives. However, the COVID-19 pandemic has also triggered an unprecedented ``infodemic''; the velocity and volume of data production have overwhelmed many key stakeholders such as clinicians and policy makers, as they have been unable to process structured and unstructured data for evidence-based decision making. Solutions that aim to alleviate this data synthesis--related challenge are unable to capture heterogeneous web data in real time for the production of concomitant answers and are not based on the high-quality information in responses to a free-text query. Objective: The main objective of this project is to build a generic, real-time, continuously updating curation platform that can support the data synthesis and analysis of a scientific literature framework. Our secondary objective is to validate this platform and the curation methodology for COVID-19--related medical literature by expanding the COVID-19 Open Research Dataset via the addition of new, unstructured data. Methods: To create an infrastructure that addresses our objectives, the PanSurg Collaborative at Imperial College London has developed a unique data pipeline based on a web crawler extraction methodology. This data pipeline uses a novel curation methodology that adopts a human-in-the-loop approach for the characterization of quality, relevance, and key evidence across a range of scientific literature sources. Results: REDASA (Realtime Data Synthesis and Analysis) is now one of the world's largest and most up-to-date sources of COVID-19--related evidence; it consists of 104,000 documents. By capturing curators' critical appraisal methodologies through the discrete labeling and rating of information, REDASA rapidly developed a foundational, pooled, data science data set of over 1400 articles in under 2 weeks. These articles provide COVID-19--related information and represent around 10\% of all papers about COVID-19. Conclusions: This data set can act as ground truth for the future implementation of a live, automated systematic review. The three benefits of REDASA's design are as follows: (1) it adopts a user-friendly, human-in-the-loop methodology by embedding an efficient, user-friendly curation platform into a natural language processing search engine; (2) it provides a curated data set in the JavaScript Object Notation format for experienced academic reviewers' critical appraisal choices and decision-making methodologies; and (3) due to the wide scope and depth of its web crawling method, REDASA has already captured one of the world's largest COVID-19--related data corpora for searches and curation. ", doi="10.2196/25714", url="https://www.jmir.org/2021/5/e25714", url="http://www.ncbi.nlm.nih.gov/pubmed/33835932" } @Article{info:doi/10.2196/21459, author="Her, Qoua and Kent, Thomas and Samizo, Yuji and Slavkovic, Aleksandra and Vilk, Yury and Toh, Sengwee", title="Automatable Distributed Regression Analysis of Vertically Partitioned Data Facilitated by PopMedNet: Feasibility and Enhancement Study", journal="JMIR Med Inform", year="2021", month="Apr", day="23", volume="9", number="4", pages="e21459", keywords="distributed regression analysis", keywords="distributed data networks", keywords="privacy-protecting analytics", keywords="vertically partitioned data", keywords="informatics", keywords="data networks", keywords="data", abstract="Background: In clinical research, important variables may be collected from multiple data sources. Physical pooling of patient-level data from multiple sources often raises several challenges, including proper protection of patient privacy and proprietary interests. We previously developed an SAS-based package to perform distributed regression---a suite of privacy-protecting methods that perform multivariable-adjusted regression analysis using only summary-level information---with horizontally partitioned data, a setting where distinct cohorts of patients are available from different data sources. We integrated the package with PopMedNet, an open-source file transfer software, to facilitate secure file transfer between the analysis center and the data-contributing sites. The feasibility of using PopMedNet to facilitate distributed regression analysis (DRA) with vertically partitioned data, a setting where the data attributes from a cohort of patients are available from different data sources, was unknown. Objective: The objective of the study was to describe the feasibility of using PopMedNet and enhancements to PopMedNet to facilitate automatable vertical DRA (vDRA) in real-world settings. Methods: We gathered the statistical and informatic requirements of using PopMedNet to facilitate automatable vDRA. We enhanced PopMedNet based on these requirements to improve its technical capability to support vDRA. Results: PopMedNet can enable automatable vDRA. We identified and implemented two enhancements to PopMedNet that improved its technical capability to perform automatable vDRA in real-world settings. The first was the ability to simultaneously upload and download multiple files, and the second was the ability to directly transfer summary-level information between the data-contributing sites without a third-party analysis center. Conclusions: PopMedNet can be used to facilitate automatable vDRA to protect patient privacy and support clinical research in real-world settings. ", doi="10.2196/21459", url="https://medinform.jmir.org/2021/4/e21459", url="http://www.ncbi.nlm.nih.gov/pubmed/33890866" } @Article{info:doi/10.2196/25035, author="Jeon, Hokyun and You, Chan Seng and Kang, Yun Seok and Seo, In Seung and Warner, L. Jeremy and Belenkaya, Rimma and Park, Woong Rae", title="Characterizing the Anticancer Treatment Trajectory and Pattern in Patients Receiving Chemotherapy for Cancer Using Harmonized Observational Databases: Retrospective Study", journal="JMIR Med Inform", year="2021", month="Apr", day="6", volume="9", number="4", pages="e25035", keywords="antineoplastic combined chemotherapy protocols", keywords="electronic health record", keywords="cancer", keywords="pattern", keywords="chemotherapy", keywords="database", keywords="retrospective", keywords="algorithm", keywords="scalability", keywords="interoperability", abstract="Background: Accurate and rapid clinical decisions based on real-world evidence are essential for patients with cancer. However, the complexity of chemotherapy regimens for cancer impedes retrospective research that uses observational health databases. Objective: The aim of this study is to compare the anticancer treatment trajectories and patterns of clinical events according to regimen type using the chemotherapy episodes determined by an algorithm. Methods: We developed an algorithm to extract the regimen-level abstracted chemotherapy episodes from medication records in a conventional Observational Medical Outcomes Partnership (OMOP) common data model (CDM) database. The algorithm was validated on the Ajou University School Of Medicine (AUSOM) database by manual review of clinical notes. Using the algorithm, we extracted episodes of chemotherapy from patients in the EHR database and the claims database. We also developed an application software for visualizing the chemotherapy treatment patterns based on the treatment episodes in the OMOP-CDM database. Using this software, we generated the trends in the types of regimen used in the institutions, the patterns of the iterative chemotherapy use, and the trajectories of cancer treatment in two EHR-based OMOP-CDM databases. As a pilot study, the time of onset of chemotherapy-induced neutropenia according to regimen was measured using the AUSOM database. The anticancer treatment trajectories for patients with COVID-19 were also visualized based on the nationwide claims database. Results: We generated 178,360 treatment episodes for patients with colorectal, breast, and lung cancer for 85 different regimens. The algorithm precisely identified the type of chemotherapy regimen in 400 patients (average positive predictive value >98\%). The trends in the use of routine clinical chemotherapy regimens from 2008-2018 were identified for 8236 patients. For a total of 12 regimens (those administered to the largest proportion of patients), the number of repeated treatments was concordant with the protocols for standard chemotherapy regimens for certain cases. In addition, the anticancer treatment trajectories for 8315 patients were shown, including 62 patients with COVID-19. A comparative analysis of neutropenia showed that its onset in colorectal cancer regimens tended to cluster between days 9-15, whereas it tended to cluster between days 2-8 for certain regimens for breast cancer or lung cancer. Conclusions: We propose a method for generating chemotherapy episodes for introduction into the oncology extension module of the OMOP-CDM databases. These proof-of-concept studies demonstrated the usability, scalability, and interoperability of the proposed framework through a distributed research network. ", doi="10.2196/25035", url="https://medinform.jmir.org/2021/4/e25035", url="http://www.ncbi.nlm.nih.gov/pubmed/33720842" } @Article{info:doi/10.2196/20986, author="Marlin, Nadine and Rivas, Carol and Allotey, John and Dodds, Julie and Horne, Andrew and Ball, Elizabeth", title="Development and Validation of Clinical Prediction Models for Surgical Success in Patients With Endometriosis: Protocol for a Mixed Methods Study", journal="JMIR Res Protoc", year="2021", month="Apr", day="5", volume="10", number="4", pages="e20986", keywords="endometriosis", keywords="algorithm", keywords="laparoscopy", keywords="pain", keywords="therapeutic", abstract="Background: Endometriosis is a chronic inflammatory condition affecting 6\%-10\% of women of reproductive age and is defined by the presence of endometrial-like tissue outside the uterus (lesions), commonly affecting the pelvis and ovaries. It is associated with debilitating pelvic pain, infertility, and fatigue and often has devastating effects on the quality of life (QoL). Although it is as common as back pain, it is poorly understood, and treatment and diagnosis are often delayed, leading to unnecessary suffering. Endometriosis has no cure. Surgery is one of several management options. Quantifying the probability of successful surgery is important for guiding clinical decisions and treatment strategies. Factors predicting success through pain reduction after endometriosis surgery have not yet been adequately identified. Objective: This study aims to determine which women with confirmed endometriosis benefit from surgical improvement in pain and QoL and whether these women could be identified from clinical symptoms measured before laparoscopy. Methods: First, we will carry out a systematic search and review and, if appropriate, meta-analysis of observational cohort and case-control studies reporting one or more risk factors for endometriosis and postsurgical treatment success. We will search PubMed, Embase, and Cochrane databases from inception without language restrictions and supplement the reference lists by manual searches. Second, we will develop separate clinical prediction models for women with confirmed and suspected diagnoses of endometriosis. A total of three suitable databases have been identified for development and external validation (the MEDAL [ISRCTN13028601] and LUNA [ISRCTN41196151] studies, and the BSGE database), and access has been guaranteed. The models will be developed using a linear regression approach that links candidate factors to outcomes. Third, we will hold 2 stakeholder co-design workshops involving eight clinicians and eight women with endometriosis separately and then bring all 16 participants together. Participants will discuss the implementation, delivery, usefulness, and sustainability of the prediction models. Clinicians will also focus on the ease of use and access to clinical prediction tools. Results: This project was funded in March 2018 and approved by the Institutional Research Ethics Board in December 2019. At the time of writing, this study was in the data analysis phase, and the results are expected to be available in April 2021. Conclusions: This study is the first to aim to predict who will benefit most from laparoscopic surgery through the reduction of pain or increased QoL. The models will provide clinicians with robustly developed and externally validated support tools, improving decision making in the diagnosis and treatment of women. International Registered Report Identifier (IRRID): DERR1-10.2196/20986 ", doi="10.2196/20986", url="https://www.researchprotocols.org/2021/4/e20986", url="http://www.ncbi.nlm.nih.gov/pubmed/33818394" } @Article{info:doi/10.2196/22603, author="van Noort, J. Esther M. and Claessens, Danny and Moor, C. Catharina and Berg, Den Carlijn A. L. Van and Kasteleyn, J. Marise and in 't Veen, M. Johannes C. C. and Van Schayck, P. Onno C. and Chavannes, H. Niels", title="Online Tool for the Assessment of the Burden of COVID-19 in Patients: Development Study", journal="JMIR Form Res", year="2021", month="Mar", day="31", volume="5", number="3", pages="e22603", keywords="COVID-19", keywords="patient-reported outcomes", keywords="ABCoV tool", keywords="monitoring", keywords="patient outcome", keywords="long-term impact", keywords="tool", keywords="assessment", keywords="online patient platform", abstract="Background: The impact of COVID-19 has been felt worldwide, yet we are still unsure about its full impact. One of the gaps in our current knowledge relates to the long-term mental and physical impact of the infection on affected individuals. The COVID-19 pandemic hit the Netherlands at the end of February 2020, resulting in over 900,000 people testing positive for the virus, over 24,000 hospitalizations, and over 13,000 deaths by the end of January 2021. Although many patients recover from the acute phase of the disease, experience with other virus outbreaks has raised concerns regarding possible late sequelae of the infection. Objective: This study aims to develop an online tool to assess the long-term burden of COVID-19 in patients. Methods: In this paper, we describe the process of development, assessment, programming, implementation, and use of this new tool: the assessment of burden of COVID-19 (ABCoV) tool. This new tool is based on the well-validated assessment of burden of chronic obstructive pulmonary disease tool. Results: As of January 2021, the new ABCoV tool has been used in an online patient platform by more than 2100 self-registered patients and another 400 patients in a hospital setting, resulting in over 2500 patients. These patients have submitted the ABCoV questionnaire 3926 times. Among the self-registered patients who agreed to have their data analyzed (n=1898), the number of females was high (n=1153, 60.7\%), many were medically diagnosed with COVID-19 (n=892, 47.0\%), and many were relatively young with only 7.4\% (n=141) being older than 60 years. Of all patients that actually used the tool (n=1517), almost one-quarter (n=356, 23.5\%) used the tool twice, and only a small group (n=76, 5.0\%) used the tool 6 times. Conclusions: This new ABCoV tool has been broadly and repeatedly used, and may provide insight into the perceived burden of disease, provide direction for personalized aftercare for people post COVID-19, and help us to be prepared for possible future recurrences. ", doi="10.2196/22603", url="https://formative.jmir.org/2021/3/e22603", url="http://www.ncbi.nlm.nih.gov/pubmed/33729982" } @Article{info:doi/10.2196/23983, author="Park, Jimyung and You, Chan Seng and Jeong, Eugene and Weng, Chunhua and Park, Dongsu and Roh, Jin and Lee, Yun Dong and Cheong, Youn Jae and Choi, Wook Jin and Kang, Mira and Park, Woong Rae", title="A Framework (SOCRATex) for Hierarchical Annotation of Unstructured Electronic Health Records and Integration Into a Standardized Medical Database: Development and Usability Study", journal="JMIR Med Inform", year="2021", month="Mar", day="30", volume="9", number="3", pages="e23983", keywords="natural language processing", keywords="search engine", keywords="data curation", keywords="data management", keywords="common data model", abstract="Background: Although electronic health records (EHRs) have been widely used in secondary assessments, clinical documents are relatively less utilized owing to the lack of standardized clinical text frameworks across different institutions. Objective: This study aimed to develop a framework for processing unstructured clinical documents of EHRs and integration with standardized structured data. Methods: We developed a framework known as Staged Optimization of Curation, Regularization, and Annotation of clinical text (SOCRATex). SOCRATex has the following four aspects: (1) extracting clinical notes for the target population and preprocessing the data, (2) defining the annotation schema with a hierarchical structure, (3) performing document-level hierarchical annotation using the annotation schema, and (4) indexing annotations for a search engine system. To test the usability of the proposed framework, proof-of-concept studies were performed on EHRs. We defined three distinctive patient groups and extracted their clinical documents (ie, pathology reports, radiology reports, and admission notes). The documents were annotated and integrated into the Observational Medical Outcomes Partnership (OMOP)-common data model (CDM) database. The annotations were used for creating Cox proportional hazard models with different settings of clinical analyses to measure (1) all-cause mortality, (2) thyroid cancer recurrence, and (3) 30-day hospital readmission. Results: Overall, 1055 clinical documents of 953 patients were extracted and annotated using the defined annotation schemas. The generated annotations were indexed into an unstructured textual data repository. Using the annotations of pathology reports, we identified that node metastasis and lymphovascular tumor invasion were associated with all-cause mortality among colon and rectum cancer patients (both P=.02). The other analyses involving measuring thyroid cancer recurrence using radiology reports and 30-day hospital readmission using admission notes in depressive disorder patients also showed results consistent with previous findings. Conclusions: We propose a framework for hierarchical annotation of textual data and integration into a standardized OMOP-CDM medical database. The proof-of-concept studies demonstrated that our framework can effectively process and integrate diverse clinical documents with standardized structured data for clinical research. ", doi="10.2196/23983", url="https://medinform.jmir.org/2021/3/e23983", url="http://www.ncbi.nlm.nih.gov/pubmed/33783361" } @Article{info:doi/10.2196/19408, author="Smit, A. Marloes and van Pelt, W. Gabi and Dequeker, MC Elisabeth and Al Dieri, Raed and Tollenaar, AEM Rob and van Krieken, JM J. Han and Mesker, E. Wilma and ", title="e-Learning for Instruction and to Improve Reproducibility of Scoring Tumor-Stroma Ratio in Colon Carcinoma: Performance and Reproducibility Assessment in the UNITED Study", journal="JMIR Form Res", year="2021", month="Mar", day="19", volume="5", number="3", pages="e19408", keywords="colon cancer", keywords="tumor-stroma ratio", keywords="validation", keywords="e-Learning", keywords="reproducibility study", keywords="cancer", keywords="tumor", keywords="colon", keywords="reproducibility", keywords="carcinoma", keywords="prognosis", keywords="diagnostic", keywords="implementation", keywords="online learning", abstract="Background: The amount of stroma in the primary tumor is an important prognostic parameter. The tumor-stroma ratio (TSR) was previously validated by international research groups as a robust parameter with good interobserver agreement. Objective: The Uniform Noting for International Application of the Tumor-Stroma Ratio as an Easy Diagnostic Tool (UNITED) study was developed to bring the TSR to clinical implementation. As part of the study, an e-Learning module was constructed to confirm the reproducibility of scoring the TSR after proper instruction. Methods: The e-Learning module consists of an autoinstruction for TSR determination (instruction video or written protocol) and three sets of 40 cases (training, test, and repetition sets). Scoring the TSR is performed on hematoxylin and eosin--stained sections and takes only 1-2 minutes. Cases are considered stroma-low if the amount of stroma is ?50\%, whereas a stroma-high case is defined as >50\% stroma. Inter- and intraobserver agreements were determined based on the Cohen $\kappa$ score after each set to evaluate the reproducibility. Results: Pathologists and pathology residents (N=63) with special interest in colorectal cancer participated in the e-Learning. Forty-nine participants started the e-Learning and 31 (63\%) finished the whole cycle (3 sets). A significant improvement was observed from the training set to the test set; the median $\kappa$ score improved from 0.72 to 0.77 (P=.002). Conclusions: e-Learning is an effective method to instruct pathologists and pathology residents for scoring the TSR. The reliability of scoring improved from the training to the test set and did not fall back with the repetition set, confirming the reproducibility of the TSR scoring method. Trial Registration: The Netherlands Trial Registry NTR7270; https://www.trialregister.nl/trial/7072 International Registered Report Identifier (IRRID): RR2-10.2196/13464 ", doi="10.2196/19408", url="https://formative.jmir.org/2021/3/e19408", url="http://www.ncbi.nlm.nih.gov/pubmed/33739293" } @Article{info:doi/10.2196/17993, author="Heidary, Zahra and Cochran, M. Jeffrey and Peters-Strickland, Timothy and Knights, Jonathan", title="A Rest Quality Metric Using a Cluster-Based Analysis of Accelerometer Data and Correlation With Digital Medicine Ingestion Data: Algorithm Development", journal="JMIR Form Res", year="2021", month="Mar", day="2", volume="5", number="3", pages="e17993", keywords="serious mental illness", keywords="rest quality", keywords="actimetry", keywords="behavioral health", keywords="digital medicine", keywords="accelerometer", keywords="medication adherence", abstract="Background: Adherence to medication regimens and patient rest are two important factors in the well-being of patients with serious mental illness. Both of these behaviors are traditionally difficult to record objectively in unsupervised populations. Objective: A digital medicine system that provides objective time-stamped medication ingestion records was used by patients with serious mental illness. Accelerometer data from the digital medicine system was used to assess rest quality and thus allow for investigation into correlations between rest and medication ingestion. Methods: Longest daily rest periods were identified and then evaluated using a k-means clustering algorithm and distance metric to quantify the relative quality of patient rest during these periods. This accelerometer-derived quality-of-rest metric, along with other accepted metrics of rest quality, such as duration and start time of the longest rest periods, was compared to the objective medication ingestion records. Overall medication adherence classification based on rest features was not performed due to a lack of patients with poor adherence in the sample population. Results: Explorations of the relationship between these rest metrics and ingestion did seem to indicate that patients with poor adherence experienced relatively low quality of rest; however, patients with better adherence did not necessarily exhibit consistent rest quality. This sample did not contain sufficient patients with poor adherence to draw more robust correlations between rest quality and ingestion behavior. The correlation of temporal outliers in these rest metrics with daily outliers in ingestion time was also explored. Conclusions: This result demonstrates the ability of digital medicine systems to quantify patient rest quality, providing a framework for further work to expand the participant population, compare these rest metrics to gold-standard sleep measurements, and correlate these digital medicine biomarkers with objective medication ingestion data. ", doi="10.2196/17993", url="https://formative.jmir.org/2021/3/e17993", url="http://www.ncbi.nlm.nih.gov/pubmed/33650981" } @Article{info:doi/10.2196/25530, author="Kim, Taehyeong and Han, Won Sung and Kang, Minji and Lee, Ha Se and Kim, Jong-Ho and Joo, Joon Hyung and Sohn, Wook Jang", title="Similarity-Based Unsupervised Spelling Correction Using BioWordVec: Development and Usability Study of Bacterial Culture and Antimicrobial Susceptibility Reports", journal="JMIR Med Inform", year="2021", month="Feb", day="22", volume="9", number="2", pages="e25530", keywords="spelling correction", keywords="natural language processing", keywords="bacteria", keywords="electronic health record", abstract="Background: Existing bacterial culture test results for infectious diseases are written in unrefined text, resulting in many problems, including typographical errors and stop words. Effective spelling correction processes are needed to ensure the accuracy and reliability of data for the study of infectious diseases, including medical terminology extraction. If a dictionary is established, spelling algorithms using edit distance are efficient. However, in the absence of a dictionary, traditional spelling correction algorithms that utilize only edit distances have limitations. Objective: In this research, we proposed a similarity-based spelling correction algorithm using pretrained word embedding with the BioWordVec technique. This method uses a character-level N-grams--based distributed representation through unsupervised learning rather than the existing rule-based method. In other words, we propose a framework that detects and corrects typographical errors when a dictionary is not in place. Methods: For detected typographical errors not mapped to Systematized Nomenclature of Medicine (SNOMED) clinical terms, a correction candidate group with high similarity considering the edit distance was generated using pretrained word embedding from the clinical database. From the embedding matrix in which the vocabulary is arranged in descending order according to frequency, a grid search was used to search for candidate groups of similar words. Thereafter, the correction candidate words were ranked in consideration of the frequency of the words, and the typographical errors were finally corrected according to the ranking. Results: Bacterial identification words were extracted from 27,544 bacterial culture and antimicrobial susceptibility reports, and 16 types of spelling errors and 914 misspelled words were found. The similarity-based spelling correction algorithm using BioWordVec proposed in this research corrected 12 types of typographical errors and showed very high performance in correcting 97.48\% (based on F1 score) of all spelling errors. Conclusions: This tool corrected spelling errors effectively in the absence of a dictionary based on bacterial identification words in bacterial culture and antimicrobial susceptibility reports. This method will help build a high-quality refined database of vast text data for electronic health records. ", doi="10.2196/25530", url="https://medinform.jmir.org/2021/2/e25530", url="http://www.ncbi.nlm.nih.gov/pubmed/33616536" } @Article{info:doi/10.2196/26552, author="Lam, Kyle and Iqbal, M. Fahad and Purkayastha, Sanjay and Kinross, M. James", title="Investigating the Ethical and Data Governance Issues of Artificial Intelligence in Surgery: Protocol for a Delphi Study", journal="JMIR Res Protoc", year="2021", month="Feb", day="22", volume="10", number="2", pages="e26552", keywords="artificial intelligence", keywords="digital surgery", keywords="Delphi", keywords="ethics", keywords="data governance", keywords="digital technology", keywords="operating room", keywords="surgery", abstract="Background: The rapid uptake of digital technology into the operating room has the potential to improve patient outcomes, increase efficiency of the use of operating rooms, and allow surgeons to progress quickly up learning curves. These technologies are, however, dependent on huge amounts of data, and the consequences of their mismanagement are significant. While the field of artificial intelligence ethics is able to provide a broad framework for those designing and implementing these technologies into the operating room, there is a need to determine and address the ethical and data governance challenges of using digital technology in this unique environment. Objective: The objectives of this study are to define the term digital surgery and gain expert consensus on the key ethical and data governance issues, barriers, and future research goals of the use of artificial intelligence in surgery. Methods: Experts from the fields of surgery, ethics and law, policy, artificial intelligence, and industry will be invited to participate in a 4-round consensus Delphi exercise. In the first round, participants will supply free-text responses across 4 key domains: ethics, data governance, barriers, and future research goals. They will also be asked to provide their understanding of the term digital surgery. In subsequent rounds, statements will be grouped, and participants will be asked to rate the importance of each issue on a 9-point Likert scale ranging from 1 (not at all important) to 9 (critically important). Consensus is defined a priori as a score of 7 to 9 by 70\% of respondents and 1 to 3 by less than 30\% of respondents. A final online meeting round will be held to discuss inclusion of statements and draft a consensus document. Results: Full ethical approval has been obtained for the study by the local research ethics committee at Imperial College, London (20IC6136). We anticipate round 1 to commence in January 2021. Conclusions: The results of this study will define the term digital surgery, identify the key issues and barriers, and shape future research in this area. International Registered Report Identifier (IRRID): PRR1-10.2196/26552 ", doi="10.2196/26552", url="https://www.researchprotocols.org/2021/2/e26552", url="http://www.ncbi.nlm.nih.gov/pubmed/33616543" } @Article{info:doi/10.2196/18840, author="Walkey, J. Allan and Bashar, K. Syed and Hossain, Billal Md and Ding, Eric and Albuquerque, Daniella and Winter, Michael and Chon, H. Ki and McManus, D. David", title="Development and Validation of an Automated Algorithm to Detect Atrial Fibrillation Within Stored Intensive Care Unit Continuous Electrocardiographic Data: Observational Study", journal="JMIR Cardio", year="2021", month="Feb", day="15", volume="5", number="1", pages="e18840", keywords="atrial fibrillation", keywords="sepsis", keywords="intensive care unit", keywords="big data", keywords="data science", abstract="Background: Atrial fibrillation (AF) is the most common arrhythmia during critical illness, representing a sepsis-defining cardiac dysfunction associated with adverse outcomes. Large burdens of premature beats and noisy signal during sepsis may pose unique challenges to automated AF detection. Objective: The objective of this study is to develop and validate an automated algorithm to accurately identify AF within electronic health care data among critically ill patients with sepsis. Methods: This is a retrospective cohort study of patients hospitalized with sepsis identified from Medical Information Mart for Intensive Care (MIMIC III) electronic health data with linked electrocardiographic (ECG) telemetry waveforms. Within 3 separate cohorts of 50 patients, we iteratively developed and validated an automated algorithm that identifies ECG signals, removes noise, and identifies irregular rhythm and premature beats in order to identify AF. We compared the automated algorithm to current methods of AF identification in large databases, including ICD-9 (International Classification of Diseases, 9th edition) codes and hourly nurse annotation of heart rhythm. Methods of AF identification were tested against gold-standard manual ECG review. Results: AF detection algorithms that did not differentiate AF from premature atrial and ventricular beats performed modestly, with 76\% (95\% CI 61\%-87\%) accuracy. Performance improved (P=.02) with the addition of premature beat detection (validation set accuracy: 94\% [95\% CI 83\%-99\%]). Median time between automated and manual detection of AF onset was 30 minutes (25th-75th percentile 0-208 minutes). The accuracy of ICD-9 codes (68\%; P=.002 vs automated algorithm) and nurse charting (80\%; P=.02 vs algorithm) was lower than that of the automated algorithm. Conclusions: An automated algorithm using telemetry ECG data can feasibly and accurately detect AF among critically ill patients with sepsis, and represents an improvement in AF detection within large databases. ", doi="10.2196/18840", url="http://cardio.jmir.org/2021/1/e18840/", url="http://www.ncbi.nlm.nih.gov/pubmed/33587041" } @Article{info:doi/10.2196/22164, author="Bhalodiya, Maganbhai Jayendra and Palit, Arnab and Giblin, Gerard and Tiwari, Kumar Manoj and Prasad, K. Sanjay and Bhudia, K. Sunil and Arvanitis, N. Theodoros and Williams, A. Mark", title="Identifying Myocardial Infarction Using Hierarchical Template Matching--Based Myocardial Strain: Algorithm Development and Usability Study", journal="JMIR Med Inform", year="2021", month="Feb", day="10", volume="9", number="2", pages="e22164", keywords="left ventricle", keywords="myocardial infarction", keywords="myocardium", keywords="strain", abstract="Background: Myocardial infarction (MI; location and extent of infarction) can be determined by late enhancement cardiac magnetic resonance (CMR) imaging, which requires the injection of a potentially harmful gadolinium-based contrast agent (GBCA). Alternatively, emerging research in the area of myocardial strain has shown potential to identify MI using strain values. Objective: This study aims to identify the location of MI by developing an applied algorithmic method of circumferential strain (CS) values, which are derived through a novel hierarchical template matching (HTM) method. Methods: HTM-based CS H-spread from end-diastole to end-systole was used to develop an applied method. Grid-tagging magnetic resonance imaging was used to calculate strain values in the left ventricular (LV) myocardium, followed by the 16-segment American Heart Association model. The data set was used with k-fold cross-validation to estimate the percentage reduction of H-spread among infarcted and noninfarcted LV segments. A total of 43 participants (38 MI and 5 healthy) who underwent CMR imaging were retrospectively selected. Infarcted segments detected by using this method were validated by comparison with late enhancement CMR, and the diagnostic performance of the applied algorithmic method was evaluated with a receiver operating characteristic curve test. Results: The H-spread of the CS was reduced in infarcted segments compared with noninfarcted segments of the LV. The reductions were 30\% in basal segments, 30\% in midventricular segments, and 20\% in apical LV segments. The diagnostic accuracy of detection, using the reported method, was represented by area under the curve values, which were 0.85, 0.82, and 0.87 for basal, midventricular, and apical slices, respectively, demonstrating good agreement with the late-gadolinium enhancement--based detections. Conclusions: The proposed applied algorithmic method has the potential to accurately identify the location of infarcted LV segments without the administration of late-gadolinium enhancement. Such an approach adds the potential to safely identify MI, potentially reduce patient scanning time, and extend the utility of CMR in patients who are contraindicated for the use of GBCA. ", doi="10.2196/22164", url="https://medinform.jmir.org/2021/2/e22164", url="http://www.ncbi.nlm.nih.gov/pubmed/33565992" } @Article{info:doi/10.2196/18298, author="Elghafari, Anas and Finkelstein, Joseph", title="Automated Identification of Common Disease-Specific Outcomes for Comparative Effectiveness Research Using ClinicalTrials.gov: Algorithm Development and Validation Study", journal="JMIR Med Inform", year="2021", month="Feb", day="8", volume="9", number="2", pages="e18298", keywords="clinical trials", keywords="clinical outcomes", keywords="common data elements", keywords="data processing", keywords="ClinicalTrials.gov", abstract="Background: Common disease-specific outcomes are vital for ensuring comparability of clinical trial data and enabling meta analyses and interstudy comparisons. Traditionally, the process of deciding which outcomes should be recommended as common for a particular disease relied on assembling and surveying panels of subject-matter experts. This is usually a time-consuming and laborious process. Objective: The objectives of this work were to develop and evaluate a generalized pipeline that can automatically identify common outcomes specific to any given disease by finding, downloading, and analyzing data of previous clinical trials relevant to that disease. Methods: An automated pipeline to interface with ClinicalTrials.gov's application programming interface and download the relevant trials for the input condition was designed. The primary and secondary outcomes of those trials were parsed and grouped based on text similarity and ranked based on frequency. The quality and usefulness of the pipeline's output were assessed by comparing the top outcomes identified by it for chronic obstructive pulmonary disease (COPD) to a list of 80 outcomes manually abstracted from the most frequently cited and comprehensive reviews delineating clinical outcomes for COPD. Results: The common disease-specific outcome pipeline successfully downloaded and processed 3876 studies related to COPD. Manual verification indicated that the pipeline was downloading and processing the same number of trials as were obtained from the self-service ClinicalTrials.gov portal. Evaluating the automatically identified outcomes against the manually abstracted ones showed that the pipeline achieved a recall of 92\% and precision of 79\%. The precision number indicated that the pipeline was identifying many outcomes that were not covered in the literature reviews. Assessment of those outcomes indicated that they are relevant to COPD and could be considered in future research. Conclusions: An automated evidence-based pipeline can identify common clinical trial outcomes of comparable breadth and quality as the outcomes identified in comprehensive literature reviews. Moreover, such an approach can highlight relevant outcomes for further consideration. ", doi="10.2196/18298", url="http://medinform.jmir.org/2021/2/e18298/", url="http://www.ncbi.nlm.nih.gov/pubmed/33460388" } @Article{info:doi/10.2196/18837, author="Aleknaite, Ausra and Simutis, Gintaras and Stanaitis, Juozas and Jucaitis, Tomas and Drungilas, Mantas and Valantinas, Jonas and Strupas, Kestutis", title="Comparison of Endoscopy First and Laparoscopic Cholecystectomy First Strategies for Patients With Gallstone Disease and Intermediate Risk of Choledocholithiasis: Protocol for a Clinical Randomized Controlled Trial", journal="JMIR Res Protoc", year="2021", month="Feb", day="4", volume="10", number="2", pages="e18837", keywords="choledocholithiasis", keywords="endoscopic ultrasound", keywords="intraoperative cholangiography", keywords="common bile duct stone", keywords="endoscopic retrograde cholangiopancreatography", keywords="laparoscopic cholecystectomy", abstract="Background: The optimal approach for patients with gallbladder stones and intermediate risk of choledocholithiasis remains undetermined. The use of endoscopic retrograde cholangiopancreatography for diagnosis should be minimized as it carries considerable risk of postprocedural complications, and nowadays, less invasive and safer techniques are available. Objective: This study compares the two management strategies of endoscopic ultrasound before laparoscopic cholecystectomy and intraoperative cholangiography for patients with symptomatic cholecystolithiasis and intermediate risk of choledocholithiasis. Methods: This is a randomized, active-controlled, single-center clinical trial enrolling adult patients undergoing laparoscopic cholecystectomy for symptomatic gallbladder stones with intermediate risk of choledocholithiasis. The risk of choledocholithiasis is calculated using an original prognostic score (the Vilnius University Hospital Index). This index in a retrospective evaluation showed better prognostic performance than the score proposed by the American Society for Gastrointestinal Endoscopy in 2010. A total of 106 participants will be included and randomized into two groups. Evaluation of bile ducts using endoscopic ultrasound and endoscopic retrograde cholangiography on demand will be performed before laparoscopic cholecystectomy for one arm (``endoscopy first''). Intraoperative cholangiography during laparoscopic cholecystectomy and postoperative endoscopic retrograde cholangiopancreatography on demand will be performed in another arm (``cholecystectomy first''). Postoperative follow-up is 6 months. The primary endpoint is the length of hospital stay. The secondary endpoints are accuracy of the different management strategies, adverse events of the interventions, duct clearance and technical success of the interventions (intraoperative cholangiography, endoscopic ultrasound, and endoscopic retrograde cholangiography), and cost of treatment. Results: The trial protocol was approved by the Vilnius Regional Biomedical Research Ethics Committee in December 2017. Enrollment of patients was started in January 2018. As of June 2020, 66 patients have been enrolled. Conclusions: This trial is planned to determine the superior strategy for patients with intermediate risk of common bile duct stones and to define a simple and safe algorithm for managing choledocholithiasis. Trial Registration: ClinicalTrials.gov NCT03658863; https://clinicaltrials.gov/ct2/show/NCT03658863. International Registered Report Identifier (IRRID): DERR1-10.2196/18837 ", doi="10.2196/18837", url="https://www.researchprotocols.org/2021/2/e18837", url="http://www.ncbi.nlm.nih.gov/pubmed/33538700" } @Article{info:doi/10.2196/24382, author="Kosowan, Leanne and Katz, Alan and Halas, Gayle and LaBine, Lisa and Singer, Alexander", title="Using Information Technology to Assess Patient Risk Factors in Primary Care Clinics: Pragmatic Evaluation", journal="JMIR Form Res", year="2021", month="Feb", day="2", volume="5", number="2", pages="e24382", keywords="risk factors", keywords="information technology", keywords="primary health care", keywords="primary prevention", abstract="Background: Tobacco use, physical inactivity, and poor diet are associated with morbidity and premature death. Health promotion and primary prevention counseling, advice, and support by a primary care provider lead to behavior change attempts among patients. However, although physicians consider preventative health important, there is often a larger focus on symptom presentation, acute care, and medication review. Objective: This study evaluated the feasibility, adoption, and integration of the tablet-based Risk Factor Identification Tool (RFIT) that uses algorithmic information technology to support obtainment of patient risk factor information in primary care clinics. Methods: This is a pragmatic developmental evaluation. Each clinic developed a site-specific implementation plan adapted to their workflow. The RFIT was implemented in 2 primary care clinics located in Manitoba. Perceptions of 10 clinic staff and 8 primary care clinicians informed this evaluation. Results: Clinicians reported a smooth and fast transfer of RFIT responses to an electronic medical record encounter note. The RFIT was used by 207 patients, with a completion rate of 86\%. Clinic staff reported that approximately 3\%-5\% of patients declined the use of the RFIT or required assistance to use the tablet. Among the 207 patients that used the RFIT, 22 (12.1\%) smoked, 39 (21.2\%) felt their diet could be improved, 20 (12.0\%) reported high alcohol consumption, 103 (56.9\%) reported less than 150 minutes of physical activity a week, and 6 (8.2\%) patients lived in poverty. Clinicians suggested that although a wide variety of patients were able to use the tablet-based RFIT, implemented surveys should be tailored to patient subgroups. Conclusions: Clinicians and clinic staff positively reviewed the use of information technology in primary care. Algorithmic information technology can collect, organize, and synthesize individual health information to inform and tailor primary care counseling to the patients' context and readiness to change. The RFIT is a user-friendly tool that provides an effective method for obtaining risk factor information from patients. It is particularly useful for subsets of patients lacking continuity in the care they receive. When implemented within a context that can support practical interventions to address identified risk factors, the RFIT can inform brief interventions within primary care. ", doi="10.2196/24382", url="https://formative.jmir.org/2021/2/e24382", url="http://www.ncbi.nlm.nih.gov/pubmed/33528376" } @Article{info:doi/10.2196/14326, author="Jones, L. Thomas and Heiden, Emily and Mitchell, Felicity and Fogg, Carole and McCready, Sharon and Pearce, Laurence and Kapoor, Melissa and Bassett, Paul and Chauhan, J. Anoop", title="Developing the Accuracy of Vital Sign Measurements Using the Lifelight Software Application in Comparison to Standard of Care Methods: Observational Study Protocol", journal="JMIR Res Protoc", year="2021", month="Jan", day="28", volume="10", number="1", pages="e14326", keywords="health technology", keywords="remote monitoring", keywords="vital signs", keywords="patient deterioration", abstract="Background: Vital sign measurements are an integral component of clinical care, but current challenges with the accuracy and timeliness of patient observations can impact appropriate clinical decision making. Advanced technologies using techniques such as photoplethysmography have the potential to automate noncontact physiological monitoring and recording, improving the quality and accessibility of this essential clinical information. Objective: In this study, we aim to develop the algorithm used in the Lifelight software application and improve the accuracy of its estimated heart rate, respiratory rate, oxygen saturation, and blood pressure measurements. Methods: This preliminary study will compare measurements predicted by the Lifelight software with standard of care measurements for an estimated population sample of 2000 inpatients, outpatients, and healthy people attending a large acute hospital. Both training datasets and validation datasets will be analyzed to assess the degree of correspondence between the vital sign measurements predicted by the Lifelight software and the direct physiological measurements taken using standard of care methods. Subgroup analyses will explore how the performance of the algorithm varies with particular patient characteristics, including age, sex, health condition, and medication. Results: Recruitment of participants to this study began in July 2018, and data collection will continue for a planned study period of 12 months. Conclusions: Digital health technology is a rapidly evolving area for health and social care. Following this initial exploratory study to develop and refine the Lifelight software application, subsequent work will evaluate its performance across a range of health characteristics, and extended validation trials will support its pathway to registration as a medical device. Innovations in health technology such as this may provide valuable opportunities for increasing the efficiency and accessibility of vital sign measurements and improve health care services on a large scale across multiple health and care settings. International Registered Report Identifier (IRRID): DERR1-10.2196/14326 ", doi="10.2196/14326", url="http://www.researchprotocols.org/2021/1/e14326/", url="http://www.ncbi.nlm.nih.gov/pubmed/33507157" } @Article{info:doi/10.2196/20184, author="Zolnoori, Maryam and McDonald, V. Margaret and Barr{\'o}n, Yolanda and Cato, Kenrick and Sockolow, Paulina and Sridharan, Sridevi and Onorato, Nicole and Bowles, Kathryn and Topaz, Maxim", title="Improving Patient Prioritization During Hospital-Homecare Transition: Protocol for a Mixed Methods Study of a Clinical Decision Support Tool Implementation", journal="JMIR Res Protoc", year="2021", month="Jan", day="22", volume="10", number="1", pages="e20184", keywords="clinical decision support system", keywords="homecare agencies", keywords="rehospitalization", keywords="RE-AIM framework", keywords="PREVENT", keywords="effective implementation", abstract="Background: Homecare settings across the United States provide care to more than 5 million patients every year. About one in five homecare patients are rehospitalized during the homecare episode, with up to two-thirds of these rehospitalizations occurring within the first 2 weeks of services. Timely allocation of homecare services might prevent a significant portion of these rehospitalizations. The first homecare nursing visit is one of the most critical steps of the homecare episode. This visit includes an assessment of the patient's capacity for self-care, medication reconciliation, an examination of the home environment, and a discussion regarding whether a caregiver is present. Hence, appropriate timing of the first visit is crucial, especially for patients with urgent health care needs. However, nurses often have limited and inaccurate information about incoming patients, and patient priority decisions vary significantly between nurses. We developed an innovative decision support tool called Priority for the First Nursing Visit Tool (PREVENT) to assist nurses in prioritizing patients in need of immediate first homecare nursing visits. Objective: This study aims to evaluate the effectiveness of the PREVENT tool on process and patient outcomes and to examine the reach, adoption, and implementation of PREVENT. Methods: Employing a pre-post design, survival analysis, and logistic regression with propensity score matching analysis, we will test the following hypotheses: compared with not using the tool in the preintervention phase, when homecare clinicians use the PREVENT tool, high-risk patients in the intervention phase will (1) receive more timely first homecare visits and (2) have decreased incidence of rehospitalization and have decreased emergency department use within 60 days. Reach, adoption, and implementation will be assessed using mixed methods including homecare admission staff interviews, think-aloud observations, and analysis of staffing and other relevant data. Results: The study research protocol was approved by the institutional review board in October 2019. PREVENT is currently being integrated into the electronic health records at the participating study sites. Data collection is planned to start in early 2021. Conclusions: Mixed methods will enable us to gain an in-depth understanding of the complex socio-technological aspects of the hospital to homecare transition. The results have the potential to (1) influence the standardization and individualization of nurse decision making through the use of cutting-edge technology and (2) improve patient outcomes in the understudied homecare setting. Trial Registration: ClinicalTrials.gov NCT04136951; https://clinicaltrials.gov/ct2/show/NCT04136951 International Registered Report Identifier (IRRID): PRR1-10.2196/20184 ", doi="10.2196/20184", url="https://www.researchprotocols.org/2021/1/e20184", url="http://www.ncbi.nlm.nih.gov/pubmed/33480855" } @Article{info:doi/10.2196/21804, author="Hill, Adele and Joyner, H. Christopher and Keith-Jopp, Chloe and Yet, Barbaros and Tuncer Sakar, Ceren and Marsh, William and Morrissey, Dylan", title="A Bayesian Network Decision Support Tool for Low Back Pain Using a RAND Appropriateness Procedure: Proposal and Internal Pilot Study", journal="JMIR Res Protoc", year="2021", month="Jan", day="15", volume="10", number="1", pages="e21804", keywords="back pain", keywords="decision making", keywords="Bayesian methods", keywords="consensus", abstract="Background: Low back pain (LBP) is an increasingly burdensome condition for patients and health professionals alike, with consistent demonstration of increasing persistent pain and disability. Previous decision support tools for LBP management have focused on a subset of factors owing to time constraints and ease of use for the clinician. With the explosion of interest in machine learning tools and the commitment from Western governments to introduce this technology, there are opportunities to develop intelligent decision support tools. We will do this for LBP using a Bayesian network, which will entail constructing a clinical reasoning model elicited from experts. Objective: This paper proposes a method for conducting a modified RAND appropriateness procedure to elicit the knowledge required to construct a Bayesian network from a group of domain experts in LBP, and reports the lessons learned from the internal pilot of the procedure. Methods: We propose to recruit expert clinicians with a special interest in LBP from across a range of medical specialties, such as orthopedics, rheumatology, and sports medicine. The procedure will consist of four stages. Stage 1 is an online elicitation of variables to be considered by the model, followed by a face-to-face workshop. Stage 2 is an online elicitation of the structure of the model, followed by a face-to-face workshop. Stage 3 consists of an online phase to elicit probabilities to populate the Bayesian network. Stage 4 is a rudimentary validation of the Bayesian network. Results: Ethical approval has been obtained from the Research Ethics Committee at Queen Mary University of London. An internal pilot of the procedure has been run with clinical colleagues from the research team. This showed that an alternating process of three remote activities and two in-person meetings was required to complete the elicitation without overburdening participants. Lessons learned have included the need for a bespoke online elicitation tool to run between face-to-face meetings and for careful operational definition of descriptive terms, even if widely clinically used. Further, tools are required to remotely deliver training about self-identification of various forms of cognitive bias and explain the underlying principles of a Bayesian network. The use of the internal pilot was recognized as being a methodological necessity. Conclusions: We have proposed a method to construct Bayesian networks that are representative of expert clinical reasoning for a musculoskeletal condition in this case. We have tested the method with an internal pilot to refine the process prior to deployment, which indicates the process can be successful. The internal pilot has also revealed the software support requirements for the elicitation process to model clinical reasoning for a range of conditions. International Registered Report Identifier (IRRID): DERR1-10.2196/21804 ", doi="10.2196/21804", url="http://www.researchprotocols.org/2021/1/e21804/", url="http://www.ncbi.nlm.nih.gov/pubmed/33448937" } @Article{info:doi/10.2196/21447, author="Conca, Antoinette and Koch, Daniel and Regez, Katharina and Kutz, Alexander and B{\"a}chli, Ciril and Haubitz, Sebastian and Schuetz, Philipp and Mueller, Beat and Spirig, Rebecca and Petry, Heidi", title="Self-Care Index and Post-Acute Care Discharge Score to Predict Discharge Destination of Adult Medical Inpatients: Protocol for a Multicenter Validation Study", journal="JMIR Res Protoc", year="2021", month="Jan", day="14", volume="10", number="1", pages="e21447", keywords="discharge planning", keywords="forecasting", keywords="logistic models", keywords="patient transfer", keywords="post-acute care discharge score", keywords="protocol", keywords="self-care index", keywords="sensitivity", keywords="specificity", keywords="validation study", abstract="Background: Delays in patient discharge can not only lead to deterioration, especially among geriatric patients, but also incorporate unnecessary resources at the hospital level. Many of these delays and their negative impact may be preventable by early focused screening to identify patients at risk for transfer to a post-acute care facility. Early interprofessional discharge planning is crucial in order to fit the appropriate individual discharge destination. While prediction of discharge to a post-acute care facility using post-acute care discharge score, the self-care index, and a combination of both has been shown in a single-center pilot study, an external validation is still missing. Objective: This paper outlines the study protocol and methodology currently being used to replicate the previous pilot findings and determine whether the post-acute care discharge score, the self-care index, or the combination of both can reliably identify patients requiring transfer to post-acute care facilities. Methods: This study will use prospective data involving all phases of the quasi-experimental study ``In-HospiTOOL'' conducted at 7 Swiss hospitals in urban and rural areas. During an 18-month period, consecutive adult medical patients admitted to the hospitals through the emergency department will be included. We aim to include 6000 patients based on sample size calculation. These data will enable a prospective external validation of the prediction instruments. Results: We expect to gain more insight into the predictive capability of the above-mentioned prediction instruments. This approach will allow us to get important information about the generalizability of the three different models. The study was approved by the institutional review board on November 21, 2016, and funded in May 2020. Expected results are planned to be published in spring 2021. Conclusions: This study will provide evidence on prognostic properties, comparative performance, reliability of scoring, and suitability of the instruments for the screening purpose in order to be able to recommend application in clinical practice. International Registered Report Identifier (IRRID): DERR1-10.2196/21447 ", doi="10.2196/21447", url="http://www.researchprotocols.org/2021/1/e21447/", url="http://www.ncbi.nlm.nih.gov/pubmed/33263553" } @Article{info:doi/10.2196/18675, author="Parsons Leigh, Jeanna and Brundin-Mather, Rebecca and Whalen-Browne, Liam and Kashyap, Devika and Sauro, Khara and Soo, Andrea and Petersen, Jennie and Taljaard, Monica and Stelfox, T. Henry", title="Effectiveness of an Electronic Communication Tool on Transitions in Care From the Intensive Care Unit: Protocol for a Cluster-Specific Pre-Post Trial", journal="JMIR Res Protoc", year="2021", month="Jan", day="8", volume="10", number="1", pages="e18675", keywords="patient transfers", keywords="interprovider communication", keywords="transitions in care", keywords="electronic charting", keywords="clinical documentation", keywords="discharge tools", keywords="patient discharge summaries", keywords="electronic transfer summaries", keywords="intensive care unit", keywords="electronic tool", keywords="ICU", keywords="protocol", keywords="effective", keywords="communication", keywords="transfer", keywords="patient", keywords="transition", abstract="Background: Transitions in care are vulnerable periods in health care that can expose patients to preventable errors due to incomplete or delayed communication between health care providers. Transitioning critically ill patients from intensive care units (ICUs) to other patient care units (PCUs) is particularly risky, due to the high acuity of the patients and the diversity of health care providers involved in their care. Instituting structured documentation to standardize written communication between health care providers during transitions has been identified as a promising means to reduce communication breakdowns. We developed an evidence-informed, computer-enabled, ICU-specific structured tool---an electronic transfer (e-transfer) tool---to facilitate and standardize the composition of written transfer summaries in the ICUs of one Canadian city. The tool consisted of 10 primary sections with a user interface combination of structured, automated, and free-text fields. Objective: Our overarching goal is to evaluate whether implementation of our e-transfer tool will improve the completeness and timeliness of transfer summaries and streamline communications between health care providers during high-risk transitions. Methods: This study is a cluster-specific pre-post trial, with randomized and staggered implementation of the e-transfer tool in four hospitals in Calgary, Alberta. Hospitals (ie, clusters) were allocated randomly to cross over every 2 months from control (ie, dictation only) to intervention (ie, e-transfer tool). Implementation at each site was facilitated with user education, point-of-care support, and audit and feedback. We will compare transfer summaries randomly sampled over 6 months postimplementation to summaries randomly sampled over 6 months preimplementation. The primary outcome will be a binary composite measure of the timeliness and completeness of transfer summaries. Secondary measures will include overall completeness, timeliness, and provider ratings of transfer summaries; hospital and ICU lengths of stay; and post-ICU patient outcomes, including ICU readmission, adverse events, cardiac arrest, rapid response team activation, and mortality. We will use descriptive statistics (ie, medians and means) to describe demographic characteristics. The primary outcome will be compared within each hospital pre- and postimplementation using separate logistic regression models for each hospital, with adjustment for patient characteristics. Results: Participating hospitals were cluster randomized to the intervention between July 2018 and January 2019. Preliminary extraction of ICU patient admission lists was completed in September 2019. We anticipate that evaluation data collection will be completed by early 2021, with first results ready for publication in spring or summer 2021. Conclusions: This study will report the impact of implementing an evidence-informed, computer-enabled, ICU-specific structured transfer tool on communication and preventable medical errors among patients transferred from the ICU to other hospital care units. Trial Registration: ClinicalTrials.gov NCT03590002; https://www.clinicaltrials.gov/ct2/show/NCT03590002 International Registered Report Identifier (IRRID): DERR1-10.2196/18675 ", doi="10.2196/18675", url="https://www.researchprotocols.org/2021/1/e18675", url="http://www.ncbi.nlm.nih.gov/pubmed/33416509" } @Article{info:doi/10.2196/18001, author="Wheless, Lee and Baker, Laura and Edwards, LaVar and Anand, Nimay and Birdwell, Kelly and Hanlon, Allison and Chren, Mary-Margaret", title="Development of Phenotyping Algorithms for the Identification of Organ Transplant Recipients: Cohort Study", journal="JMIR Med Inform", year="2020", month="Dec", day="10", volume="8", number="12", pages="e18001", keywords="phenotyping", keywords="electronic health record", keywords="organ transplant recipients", abstract="Background: Studies involving organ transplant recipients (OTRs) are often limited to the variables collected in the national Scientific Registry of Transplant Recipients database. Electronic health records contain additional variables that can augment this data source if OTRs can be identified accurately. Objective: The aim of this study was to develop phenotyping algorithms to identify OTRs from electronic health records. Methods: We used Vanderbilt's deidentified version of its electronic health record database, which contains nearly 3 million subjects, to develop algorithms to identify OTRs. We identified all 19,817 individuals with at least one International Classification of Diseases (ICD) or Current Procedural Terminology (CPT) code for organ transplantation. We performed a chart review on 1350 randomly selected individuals to determine the transplant status. We constructed machine learning models to calculate positive predictive values and sensitivity for combinations of codes by using classification and regression trees, random forest, and extreme gradient boosting algorithms. Results: Of the 1350 reviewed patient charts, 827 were organ transplant recipients while 511 had no record of a transplant, and 12 were equivocal. Most patients with only 1 or 2 transplant codes did not have a transplant. The most common reasons for being labeled a nontransplant patient were the lack of data (229/511, 44.8\%) or the patient being evaluated for an organ transplant (174/511, 34.1\%). All 3 machine learning algorithms identified OTRs with overall >90\% positive predictive value and >88\% sensitivity. Conclusions: Electronic health records linked to biobanks are increasingly used to conduct large-scale studies but have not been well-utilized in organ transplantation research. We present rigorously evaluated methods for phenotyping OTRs from electronic health records that will enable the use of the full spectrum of clinical data in transplant research. Using several different machine learning algorithms, we were able to identify transplant cases with high accuracy by using only ICD and CPT codes. ", doi="10.2196/18001", url="http://medinform.jmir.org/2020/12/e18001/", url="http://www.ncbi.nlm.nih.gov/pubmed/33156808" } @Article{info:doi/10.2196/20840, author="Shehzad, Aaqib and Rockwood, Kenneth and Stanley, Justin and Dunn, Taylor and Howlett, E. Susan", title="Use of Patient-Reported Symptoms from an Online Symptom Tracking Tool for Dementia Severity Staging: Development and Validation of a Machine Learning Approach", journal="J Med Internet Res", year="2020", month="Nov", day="11", volume="22", number="11", pages="e20840", keywords="dementia stage", keywords="Alzheimer disease", keywords="mild cognitive impairment", keywords="machine learning", abstract="Background: SymptomGuide Dementia (DGI Clinical Inc) is a publicly available online symptom tracking tool to support caregivers of persons living with dementia. The value of such data are enhanced when the specific dementia stage is identified. Objective: We aimed to develop a supervised machine learning algorithm to classify dementia stages based on tracked symptoms. Methods: We employed clinical data from 717 people from 3 sources: (1) a memory clinic; (2) long-term care; and (3) an open-label trial of donepezil in vascular and mixed dementia (VASPECT). Symptoms were captured with SymptomGuide Dementia. A clinician classified participants into 4 groups using either the Functional Assessment Staging Test or the Global Deterioration Scale as mild cognitive impairment, mild dementia, moderate dementia, or severe dementia. Individualized symptom profiles from the pooled data were used to train machine learning models to predict dementia severity. Models trained with 6 different machine learning algorithms were compared using nested cross-validation to identify the best performing model. Model performance was assessed using measures of balanced accuracy, precision, recall, Cohen $\kappa$, area under the receiver operating characteristic curve (AUROC), and area under the precision-recall curve (AUPRC). The best performing algorithm was used to train a model optimized for balanced accuracy. Results: The study population was mostly female (424/717, 59.1\%), older adults (mean 77.3 years, SD 10.6, range 40-100) with mild to moderate dementia (332/717, 46.3\%). Age, duration of symptoms, 37 unique dementia symptoms, and 10 symptom-derived variables were used to distinguish dementia stages. A model trained with a support vector machine learning algorithm using a one-versus-rest approach showed the best performance. The correct dementia stage was identified with 83\% balanced accuracy (Cohen $\kappa$=0.81, AUPRC 0.91, AUROC 0.96). The best performance was seen when classifying severe dementia (AUROC 0.99). Conclusions: A supervised machine learning algorithm exhibited excellent performance in identifying dementia stages based on dementia symptoms reported in an online environment. This novel dementia staging algorithm can be used to describe dementia stage based on user-reported symptoms. This type of symptom recording offers real-world data that reflect important symptoms in people with dementia. ", doi="10.2196/20840", url="http://www.jmir.org/2020/11/e20840/", url="http://www.ncbi.nlm.nih.gov/pubmed/33174853" } @Article{info:doi/10.2196/18507, author="Sudo, Kyoko and Murasaki, Kazuhiko and Kinebuchi, Tetsuya and Kimura, Shigeko and Waki, Kayo", title="Machine Learning--Based Screening of Healthy Meals From Image Analysis: System Development and Pilot Study", journal="JMIR Form Res", year="2020", month="Oct", day="26", volume="4", number="10", pages="e18507", keywords="meal images", keywords="healthiness", keywords="deep neural network", keywords="nutrition", keywords="medical informatics", keywords="diet", keywords="neural network", abstract="Background: Recent research has led to the development of many information technology--supported systems for health care control, including systems estimating nutrition from images of meals. Systems that capture data about eating and exercise are useful for people with diabetes as well as for people who are simply on a diet. Continuous monitoring is key to effective dietary control, requiring systems that are simple to use and motivate users to pay attention to their meals. Unfortunately, most current systems are complex or fail to motivate. Such systems require some manual inputs such as selection of an icon or image, or by inputting the category of the user's food. The nutrition information fed back to users is not especially helpful, as only the estimated detailed nutritional values contained in the meal are typically provided. Objective: In this paper, we introduce healthiness of meals as a more useful and meaningful general standard, and present a novel algorithm that can estimate healthiness from meal images without requiring manual inputs. Methods: We propose a system that estimates meal healthiness using a deep neural network that extracts features and a ranking network that learns the relationship between the degrees of healthiness of a meal using a dataset prepared by a human dietary expert. First, we examined whether a registered dietitian can judge the healthiness of meals solely by viewing meal images using a small dataset (100 meals). We then generated ranking data based on comparisons of sets of meal images (850 meals) by a registered dietitian's viewing meal images and trained a ranking network. Finally, we estimated each meal's healthiness score to detect unhealthy meals. Results: The ranking estimated by the proposed network and the ranking of healthiness based on the dietitian's judgment were correlated (correlation coefficient 0.72). In addition, extracting network features through pretraining with a publicly available large meal dataset enabled overcoming the limited availability of specific healthiness data. Conclusions: We have presented an image-based system that can rank meals in terms of the overall healthiness of the dishes constituting the meal. The ranking obtained by the proposed method showed a good correlation to nutritional value--based ranking by a dietitian. We then proposed a network that allows conditions that are important for judging the meal image, extracting features that eliminate background information and are independent of location. Under these conditions, the experimental results showed that our network achieves higher accuracy of healthiness ranking estimation than the conventional image ranking method. The results of this experiment in detecting unhealthy meals suggest that our system can be used to assist health care workers in establishing meal plans for patients with diabetes who need advice in choosing healthy meals. ", doi="10.2196/18507", url="http://formative.jmir.org/2020/10/e18507/", url="http://www.ncbi.nlm.nih.gov/pubmed/33104010" } @Article{info:doi/10.2196/16901, author="Fan, Yunzhou and Wu, Yanyan and Cao, Xiongjing and Zou, Junning and Zhu, Ming and Dai, Di and Lu, Lin and Yin, Xiaoxv and Xiong, Lijuan", title="Automated Cluster Detection of Health Care--Associated Infection Based on the Multisource Surveillance of Process Data in the Area Network: Retrospective Study of Algorithm Development and Validation", journal="JMIR Med Inform", year="2020", month="Oct", day="23", volume="8", number="10", pages="e16901", keywords="health care--associated infection", keywords="cluster detection", keywords="early warning", keywords="multi sources surveillance", keywords="process data", abstract="Background: The cluster detection of health care--associated infections (HAIs) is crucial for identifying HAI outbreaks in the early stages. Objective: We aimed to verify whether multisource surveillance based on the process data in an area network can be effective in detecting HAI clusters. Methods: We retrospectively analyzed the incidence of HAIs and 3 indicators of process data relative to infection, namely, antibiotic utilization rate in combination, inspection rate of bacterial specimens, and positive rate of bacterial specimens, from 4 independent high-risk units in a tertiary hospital in China. We utilized the Shewhart warning model to detect the peaks of the time-series data. Subsequently, we designed 5 surveillance strategies based on the process data for the HAI cluster detection: (1) antibiotic utilization rate in combination only, (2) inspection rate of bacterial specimens only, (3) positive rate of bacterial specimens only, (4) antibiotic utilization rate in combination + inspection rate of bacterial specimens + positive rate of bacterial specimens in parallel, and (5) antibiotic utilization rate in combination + inspection rate of bacterial specimens + positive rate of bacterial specimens in series. We used the receiver operating characteristic (ROC) curve and Youden index to evaluate the warning performance of these surveillance strategies for the detection of HAI clusters. Results: The ROC curves of the 5 surveillance strategies were located above the standard line, and the area under the curve of the ROC was larger in the parallel strategy than in the series strategy and the single-indicator strategies. The optimal Youden indexes were 0.48 (95\% CI 0.29-0.67) at a threshold of 1.5 in the antibiotic utilization rate in combination--only strategy, 0.49 (95\% CI 0.45-0.53) at a threshold of 0.5 in the inspection rate of bacterial specimens--only strategy, 0.50 (95\% CI 0.28-0.71) at a threshold of 1.1 in the positive rate of bacterial specimens--only strategy, 0.63 (95\% CI 0.49-0.77) at a threshold of 2.6 in the parallel strategy, and 0.32 (95\% CI 0.00-0.65) at a threshold of 0.0 in the series strategy. The warning performance of the parallel strategy was greater than that of the single-indicator strategies when the threshold exceeded 1.5. Conclusions: The multisource surveillance of process data in the area network is an effective method for the early detection of HAI clusters. The combination of multisource data and the threshold of the warning model are 2 important factors that influence the performance of the model. ", doi="10.2196/16901", url="http://medinform.jmir.org/2020/10/e16901/", url="http://www.ncbi.nlm.nih.gov/pubmed/32965228" } @Article{info:doi/10.2196/19516, author="Dolci, Elisa and Sch{\"a}rer, Barbara and Grossmann, Nicole and Musy, Naima Sarah and Z{\'u}{\~n}iga, Franziska and Bachnick, Stefanie and Simon, Michael", title="Automated Fall Detection Algorithm With Global Trigger Tool, Incident Reports, Manual Chart Review, and Patient-Reported Falls: Algorithm Development and Validation With a Retrospective Diagnostic Accuracy Study", journal="J Med Internet Res", year="2020", month="Sep", day="21", volume="22", number="9", pages="e19516", keywords="falls", keywords="adverse event", keywords="harm", keywords="algorithm", keywords="natural language processing", abstract="Background: Falls are common adverse events in hospitals, frequently leading to additional health costs due to prolonged stays and extra care. Therefore, reliable fall detection is vital to develop and test fall prevention strategies. However, conventional methods---voluntary incident reports and manual chart reviews---are error-prone and time consuming, respectively. Using a search algorithm to examine patients' electronic health record data and flag fall indicators offers an inexpensive, sensitive, cost-effective alternative. Objective: This study's purpose was to develop a fall detection algorithm for use with electronic health record data, then to evaluate it alongside the Global Trigger Tool, incident reports, a manual chart review, and patient-reported falls. Methods: Conducted on 2 campuses of a large hospital system in Switzerland, this retrospective diagnostic accuracy study consisted of 2 substudies: the first, targeting 240 patients, for algorithm development and the second, targeting 298 patients, for validation. In the development study, we compared the new algorithm's in-hospital fall rates with those indicated by the Global Trigger Tool and incident reports; in the validation study, we compared the algorithm's in-hospital fall rates with those from patient-reported falls and manual chart review. We compared the various methods by calculating sensitivity, specificity, and predictive values. Results: Twenty in-hospital falls were discovered in the development study sample. Of these, the algorithm detected 19 (sensitivity 95\%), the Global Trigger Tool detected 18 (90\%), and incident reports detected 14 (67\%). Of the 15 falls found in the validation sample, the algorithm identified all 15 (100\%), the manual chart review identified 14 (93\%), and the patient-reported fall measure identified 5 (33\%). Owing to relatively high numbers of false positives based on falls present on admission, the algorithm's positive predictive values were 50\% (development sample) and 47\% (validation sample). Instead of requiring 10 minutes per case for a full manual review or 20 minutes to apply the Global Trigger Tool, the algorithm requires only a few seconds, after which only the positive results (roughly 11\% of the full case number) require review. Conclusions: The newly developed electronic health record algorithm demonstrated very high sensitivity for fall detection. Applied in near real time, the algorithm can record in-hospital falls events effectively and help to develop and test fall prevention measures. ", doi="10.2196/19516", url="http://www.jmir.org/2020/9/e19516/", url="http://www.ncbi.nlm.nih.gov/pubmed/32955445" } @Article{info:doi/10.2196/18542, author="Weissler, Hope Elizabeth and Lippmann, J. Steven and Smerek, M. Michelle and Ward, A. Rachael and Kansal, Aman and Brock, Adam and Sullivan, C. Robert and Long, Chandler and Patel, R. Manesh and Greiner, A. Melissa and Hardy, Chantelle N. and Curtis, H. Lesley and Jones, Schuyler W.", title="Model-Based Algorithms for Detecting Peripheral Artery Disease Using Administrative Data From an Electronic Health Record Data System: Algorithm Development Study", journal="JMIR Med Inform", year="2020", month="Aug", day="19", volume="8", number="8", pages="e18542", keywords="peripheral artery disease", keywords="patient selection", keywords="electronic health records", keywords="cardiology", keywords="health data", abstract="Background: Peripheral artery disease (PAD) affects 8 to 10 million Americans, who face significantly elevated risks of both mortality and major limb events such as amputation. Unfortunately, PAD is relatively underdiagnosed, undertreated, and underresearched, leading to wide variations in treatment patterns and outcomes. Efforts to improve PAD care and outcomes have been hampered by persistent difficulties identifying patients with PAD for clinical and investigatory purposes. Objective: The aim of this study is to develop and validate a model-based algorithm to detect patients with peripheral artery disease (PAD) using data from an electronic health record (EHR) system. Methods: An initial query of the EHR in a large health system identified all patients with PAD-related diagnosis codes for any encounter during the study period. Clinical adjudication of PAD diagnosis was performed by chart review on a random subgroup. A binary logistic regression to predict PAD was built and validated using a least absolute shrinkage and selection operator (LASSO) approach in the adjudicated patients. The algorithm was then applied to the nonsampled records to further evaluate its performance. Results: The initial EHR data query using 406 diagnostic codes yielded 15,406 patients. Overall, 2500 patients were randomly selected for ground truth PAD status adjudication. In the end, 108 code flags remained after removing rarely- and never-used codes. We entered these code flags plus administrative encounter, imaging, procedure, and specialist flags into a LASSO model. The area under the curve for this model was 0.862. Conclusions: The algorithm we constructed has two main advantages over other approaches to the identification of patients with PAD. First, it was derived from a broad population of patients with many different PAD manifestations and treatment pathways across a large health system. Second, our model does not rely on clinical notes and can be applied in situations in which only administrative billing data (eg, large administrative data sets) are available. A combination of diagnosis codes and administrative flags can accurately identify patients with PAD in large cohorts. ", doi="10.2196/18542", url="http://medinform.jmir.org/2020/8/e18542/", url="http://www.ncbi.nlm.nih.gov/pubmed/32663152" } @Article{info:doi/10.2196/20974, author="Li, Yong", title="Diagnostic Model for In-Hospital Bleeding in Patients with Acute ST-Segment Elevation Myocardial Infarction: Algorithm Development and Validation", journal="JMIR Med Inform", year="2020", month="Aug", day="14", volume="8", number="8", pages="e20974", keywords="coronary disease", keywords="ST-segment elevation myocardial infarction", keywords="hemorrhage", keywords="nomogram", abstract="Background: Bleeding complications in patients with acute ST-segment elevation myocardial infarction (STEMI) have been associated with increased risk of subsequent adverse consequences. Objective: The objective of our study was to develop and externally validate a diagnostic model of in-hospital bleeding. Methods: We performed multivariate logistic regression of a cohort for hospitalized patients with acute STEMI in the emergency department of a university hospital. Participants: The model development data set was obtained from 4262 hospitalized patients with acute STEMI from January 2002 to December 2013. A set of 6015 hospitalized patients with acute STEMI from January 2014 to August 2019 were used for external validation. We used logistic regression analysis to analyze the risk factors of in-hospital bleeding in the development data set. We developed a diagnostic model of in-hospital bleeding and constructed a nomogram. We assessed the predictive performance of the diagnostic model in the validation data sets by examining measures of discrimination, calibration, and decision curve analysis (DCA). Results: In-hospital bleeding occurred in 112 of 4262 participants (2.6\%) in the development data set. The strongest predictors of in-hospital bleeding were advanced age and high Killip classification. Logistic regression analysis showed differences between the groups with and without in-hospital bleeding in age (odds ratio [OR] 1.047, 95\% CI 1.029-1.066; P<.001), Killip III (OR 3.265, 95\% CI 2.008-5.31; P<.001), and Killip IV (OR 5.133, 95\% CI 3.196-8.242; P<.001). We developed a diagnostic model of in-hospital bleeding. The area under the receiver operating characteristic curve (AUC) was 0.777 (SD 0.021, 95\% CI 0.73576-0.81823). We constructed a nomogram based on age and Killip classification. In-hospital bleeding occurred in 117 of 6015 participants (1.9\%) in the validation data set. The AUC was 0.7234 (SD 0.0252, 95\% CI 0.67392-0.77289). Conclusions: We developed and externally validated a diagnostic model of in-hospital bleeding in patients with acute STEMI. The discrimination, calibration, and DCA of the model were found to be satisfactory. Trial Registration: ChiCTR.org ChiCTR1900027578; http://www.chictr.org.cn/showprojen.aspx?proj=45926 ", doi="10.2196/20974", url="http://medinform.jmir.org/2020/8/e20974/", url="http://www.ncbi.nlm.nih.gov/pubmed/32795995" } @Article{info:doi/10.2196/18370, author="Liu, Jiaxing and Zhao, Yang and Lai, Boya and Wang, Hailiang and Tsui, Leung Kwok", title="Wearable Device Heart Rate and Activity Data in an Unsupervised Approach to Personalized Sleep Monitoring: Algorithm Validation", journal="JMIR Mhealth Uhealth", year="2020", month="Aug", day="5", volume="8", number="8", pages="e18370", keywords="sleep/wake identification", keywords="hidden Markov model", keywords="personalized health", keywords="unsupervised learning", keywords="sleep", keywords="physical activity", keywords="wearables", keywords="heart rate", abstract="Background: The proliferation of wearable devices that collect activity and heart rate data has facilitated new ways to measure sleeping and waking durations unobtrusively and longitudinally. Most existing sleep/wake identification algorithms are based on activity only and are trained on expensive and laboriously annotated polysomnography (PSG). Heart rate can also be reflective of sleep/wake transitions, which has motivated its investigation herein in an unsupervised algorithm. Moreover, it is necessary to develop a personalized approach to deal with interindividual variance in sleep/wake patterns. Objective: We aimed to develop an unsupervised personalized sleep/wake identification algorithm using multifaceted data to explore the benefits of incorporating both heart rate and activity level in these types of algorithms and to compare this approach's output with that of an existing commercial wearable device's algorithms. Methods: In this study, a total of 14 community-dwelling older adults wore wearable devices (Fitbit Alta; Fitbit Inc) 24 hours a day and 7 days a week over period of 3 months during which their heart rate and activity data were collected. After preprocessing the data, a model was developed to distinguish sleep/wake states based on each individual's data. We proposed the use of hidden Markov models and compared different modeling schemes. With the best model selected, sleep/wake patterns were characterized by estimated parameters in hidden Markov models, and sleep/wake states were identified. Results: When applying our proposed algorithm on a daily basis, we found there were significant differences in estimated parameters between weekday models and weekend models for some participants. Conclusions: Our unsupervised approach can be effectively implemented based on an individual's multifaceted sleep-related data from a commercial wearable device. A personalized model is shown to be necessary given the interindividual variability in estimated parameters. ", doi="10.2196/18370", url="https://mhealth.jmir.org/2020/8/e18370", url="http://www.ncbi.nlm.nih.gov/pubmed/32755887" } @Article{info:doi/10.2196/16422, author="Bao, Hongchang and Baker, O. Christopher J. and Adisesh, Anil", title="Occupation Coding of Job Titles: Iterative Development of an Automated Coding Algorithm for the Canadian National Occupation Classification (ACA-NOC)", journal="JMIR Form Res", year="2020", month="Aug", day="5", volume="4", number="8", pages="e16422", keywords="occupation coding", keywords="automated coding", keywords="occupational health", keywords="job title", abstract="Background: In many research studies, the identification of social determinants is an important activity, in particular, information about occupations is frequently added to existing patient data. Such information is usually solicited during interviews with open-ended questions such as ``What is your job?'' and ``What industry sector do you work in?'' Before being able to use this information for further analysis, the responses need to be categorized using a coding system, such as the Canadian National Occupational Classification (NOC). Manual coding is the usual method, which is a time-consuming and error-prone activity, suitable for automation. Objective: This study aims to facilitate automated coding by introducing a rigorous algorithm that will be able to identify the NOC (2016) codes using only job title and industry information as input. Using manually coded data sets, we sought to benchmark and iteratively improve the performance of the algorithm. Methods: We developed the ACA-NOC algorithm based on the NOC (2016), which allowed users to match NOC codes with job and industry titles. We employed several different search strategies in the ACA-NOC algorithm to find the best match, including exact search, minor exact search, like search, near (same order) search, near (different order) search, any search, and weak match search. In addition, a filtering step based on the hierarchical structure of the NOC data was applied to the algorithm to select the best matching codes. Results: The ACA-NOC was applied to over 500 manually coded job and industry titles. The accuracy rate at the four-digit NOC code level was 58.7\% (332/566) and improved when broader job categories were considered (65.0\% at the three-digit NOC code level, 72.3\% at the two-digit NOC code level, and 81.6\% at the one-digit NOC code level). Conclusions: The ACA-NOC is a rigorous algorithm for automatically coding the Canadian NOC system and has been evaluated using real-world data. It allows researchers to code moderate-sized data sets with occupation in a timely and cost-efficient manner such that further analytics are possible. Initial assessments indicate that it has state-of-the-art performance and is readily extensible upon further benchmarking on larger data sets. ", doi="10.2196/16422", url="https://formative.jmir.org/2020/8/e16422", url="http://www.ncbi.nlm.nih.gov/pubmed/32755893" } @Article{info:doi/10.2196/16850, author="Zhang, Lei and Shang, Xianwen and Sreedharan, Subhashaan and Yan, Xixi and Liu, Jianbin and Keel, Stuart and Wu, Jinrong and Peng, Wei and He, Mingguang", title="Predicting the Development of Type 2 Diabetes in a Large Australian Cohort Using Machine-Learning Techniques: Longitudinal Survey Study", journal="JMIR Med Inform", year="2020", month="Jul", day="28", volume="8", number="7", pages="e16850", keywords="diabetes", keywords="machine learning", keywords="risk prediction", keywords="cohort study", abstract="Background: Previous conventional models for the prediction of diabetes could be updated by incorporating the increasing amount of health data available and new risk prediction methodology. Objective: We aimed to develop a substantially improved diabetes risk prediction model using sophisticated machine-learning algorithms based on a large retrospective population cohort of over 230,000 people who were enrolled in the study during 2006-2017. Methods: We collected demographic, medical, behavioral, and incidence data for type 2 diabetes mellitus (T2DM) in over 236,684 diabetes-free participants recruited from the 45 and Up Study. We predicted and compared the risk of diabetes onset in these participants at 3, 5, 7, and 10 years based on three machine-learning approaches and the conventional regression model. Results: Overall, 6.05\% (14,313/236,684) of the participants developed T2DM during an average 8.8-year follow-up period. The 10-year diabetes incidence in men was 8.30\% (8.08\%-8.49\%), which was significantly higher (odds ratio 1.37, 95\% CI 1.32-1.41) than that in women at 6.20\% (6.00\%-6.40\%). The incidence of T2DM was doubled in individuals with obesity (men: 17.78\% [17.05\%-18.43\%]; women: 14.59\% [13.99\%-15.17\%]) compared with that of nonobese individuals. The gradient boosting machine model showed the best performance among the four models (area under the curve of 79\% in 3-year prediction and 75\% in 10-year prediction). All machine-learning models predicted BMI as the most significant factor contributing to diabetes onset, which explained 12\%-50\% of the variance in the prediction of diabetes. The model predicted that if BMI in obese and overweight participants could be hypothetically reduced to a healthy range, the 10-year probability of diabetes onset would be significantly reduced from 8.3\% to 2.8\% (P<.001). Conclusions: A one-time self-reported survey can accurately predict the risk of diabetes using a machine-learning approach. Achieving a healthy BMI can significantly reduce the risk of developing T2DM. ", doi="10.2196/16850", url="https://medinform.jmir.org/2020/7/e16850", url="http://www.ncbi.nlm.nih.gov/pubmed/32720912" } @Article{info:doi/10.2196/15918, author="Spengler, Helmut and Lang, Claudia and Mahapatra, Tanmaya and Gatz, Ingrid and Kuhn, A. Klaus and Prasser, Fabian", title="Enabling Agile Clinical and Translational Data Warehousing: Platform Development and Evaluation", journal="JMIR Med Inform", year="2020", month="Jul", day="21", volume="8", number="7", pages="e15918", keywords="cohort selection", keywords="hypothesis generation", keywords="data warehouse", keywords="translational research", keywords="hosting", keywords="Docker", keywords="extract-transform-load", keywords="i2b2", keywords="tranSMART", abstract="Background: Modern data-driven medical research provides new insights into the development and course of diseases and enables novel methods of clinical decision support. Clinical and translational data warehouses, such as Informatics for Integrating Biology and the Bedside (i2b2) and tranSMART, are important infrastructure components that provide users with unified access to the large heterogeneous data sets needed to realize this and support use cases such as cohort selection, hypothesis generation, and ad hoc data analysis. Objective: Often, different warehousing platforms are needed to support different use cases and different types of data. Moreover, to achieve an optimal data representation within the target systems, specific domain knowledge is needed when designing data-loading processes. Consequently, informaticians need to work closely with clinicians and researchers in short iterations. This is a challenging task as installing and maintaining warehousing platforms can be complex and time consuming. Furthermore, data loading typically requires significant effort in terms of data preprocessing, cleansing, and restructuring. The platform described in this study aims to address these challenges. Methods: We formulated system requirements to achieve agility in terms of platform management and data loading. The derived system architecture includes a cloud infrastructure with unified management interfaces for multiple warehouse platforms and a data-loading pipeline with a declarative configuration paradigm and meta-loading approach. The latter compiles data and configuration files into forms required by existing loading tools, thereby automating a wide range of data restructuring and cleansing tasks. We demonstrated the fulfillment of the requirements and the originality of our approach by an experimental evaluation and a comparison with previous work. Results: The platform supports both i2b2 and tranSMART with built-in security. Our experiments showed that the loading pipeline accepts input data that cannot be loaded with existing tools without preprocessing. Moreover, it lowered efforts significantly, reducing the size of configuration files required by factors of up to 22 for tranSMART and 1135 for i2b2. The time required to perform the compilation process was roughly equivalent to the time required for actual data loading. Comparison with other tools showed that our solution was the only tool fulfilling all requirements. Conclusions: Our platform significantly reduces the efforts required for managing clinical and translational warehouses and for loading data in various formats and structures, such as complex entity-attribute-value structures often found in laboratory data. Moreover, it facilitates the iterative refinement of data representations in the target platforms, as the required configuration files are very compact. The quantitative measurements presented are consistent with our experiences of significantly reduced efforts for building warehousing platforms in close cooperation with medical researchers. Both the cloud-based hosting infrastructure and the data-loading pipeline are available to the community as open source software with comprehensive documentation. ", doi="10.2196/15918", url="https://medinform.jmir.org/2020/7/e15918", url="http://www.ncbi.nlm.nih.gov/pubmed/32706673" } @Article{info:doi/10.2196/18055, author="Abdalla, Mohamed and Abdalla, Moustafa and Hirst, Graeme and Rudzicz, Frank", title="Exploring the Privacy-Preserving Properties of Word Embeddings: Algorithmic Validation Study", journal="J Med Internet Res", year="2020", month="Jul", day="15", volume="22", number="7", pages="e18055", keywords="privacy", keywords="data anonymization", keywords="natural language processing", keywords="personal health records", abstract="Background: Word embeddings are dense numeric vectors used to represent language in neural networks. Until recently, there had been no publicly released embeddings trained on clinical data. Our work is the first to study the privacy implications of releasing these models. Objective: This paper aims to demonstrate that traditional word embeddings created on clinical corpora that have been deidentified by removing personal health information (PHI) can nonetheless be exploited to reveal sensitive patient information. Methods: We used embeddings created from 400,000 doctor-written consultation notes and experimented with 3 common word embedding methods to explore the privacy-preserving properties of each. Results: We found that if publicly released embeddings are trained from a corpus anonymized by PHI removal, it is possible to reconstruct up to 68.5\% (n=411/600) of the full names that remain in the deidentified corpus and associated sensitive information to specific patients in the corpus from which the embeddings were created. We also found that the distance between the word vector representation of a patient's name and a diagnostic billing code is informative and differs significantly from the distance between the name and a code not billed for that patient. Conclusions: Special care must be taken when sharing word embeddings created from clinical texts, as current approaches may compromise patient privacy. If PHI removal is used for anonymization before traditional word embeddings are trained, it is possible to attribute sensitive information to patients who have not been fully deidentified by the (necessarily imperfect) removal algorithms. A promising alternative (ie, anonymization by PHI replacement) may avoid these flaws. Our results are timely and critical, as an increasing number of researchers are pushing for publicly available health data. ", doi="10.2196/18055", url="https://www.jmir.org/2020/7/e18055", url="http://www.ncbi.nlm.nih.gov/pubmed/32673230" } @Article{info:doi/10.2196/16849, author="Di Tosto, Gennaro and McAlearney, Scheck Ann and Fareed, Naleef and Huerta, R. Timothy", title="Metrics for Outpatient Portal Use Based on Log File Analysis: Algorithm Development", journal="J Med Internet Res", year="2020", month="Jun", day="12", volume="22", number="6", pages="e16849", keywords="patient portals", keywords="health records, personal", keywords="health information technology", keywords="electronic health record", abstract="Background: Web-based outpatient portals help patients engage in the management of their health by allowing them to access their medical information, schedule appointments, track their medications, and communicate with their physicians and care team members. Initial studies have shown that portal adoption positively affects health outcomes; however, early studies typically relied on survey data. Using data from health portal applications, we conducted systematic assessments of patients' use of an outpatient portal to examine how patients engage with the tool. Objective: This study aimed to document the functionality of an outpatient portal in the context of outpatient care by mining portal usage data and to provide insights into how patients use this tool. Methods: Using audit log files from the outpatient portal associated with the electronic health record system implemented at a large multihospital academic medical center, we investigated the behavioral traces of a study population of 2607 patients who used the portal between July 2015 and February 2019. Patient portal use was defined as having an active account and having accessed any portal function more than once during the study time frame. Results: Through our analysis of audit log file data of the number and type of user interactions, we developed a taxonomy of functions and actions and computed analytic metrics, including frequency and comprehensiveness of use. We additionally documented the computational steps required to diagnose artifactual data and arrive at valid usage metrics. Of the 2607 patients in our sample, 2511 were active users of the patients portal where the median number of sessions was 94 (IQR 207). Function use was comprehensive at the patient level, while each session was instead limited to the use of one specific function. Only 17.45\% (78,787/451,762) of the sessions were linked to activities involving more than one portal function. Conclusions: In discussing the full methodological choices made in our analysis, we hope to promote the replicability of our study at other institutions and contribute to the establishment of best practices that can facilitate the adoption of behavioral metrics that enable the measurement of patient engagement based on the outpatient portal use. ", doi="10.2196/16849", url="https://www.jmir.org/2020/6/e16849", url="http://www.ncbi.nlm.nih.gov/pubmed/32530435" } @Article{info:doi/10.2196/11512, author="Yeng, Kandabongee Prosper and Woldaregay, Zebene Ashenafi and Solvoll, Terje and Hartvigsen, Gunnar", title="Cluster Detection Mechanisms for Syndromic Surveillance Systems: Systematic Review and Framework Development", journal="JMIR Public Health Surveill", year="2020", month="May", day="26", volume="6", number="2", pages="e11512", keywords="sentinel surveillance", keywords="space-time clustering", keywords="aberration detection", abstract="Background: The time lag in detecting disease outbreaks remains a threat to global health security. The advancement of technology has made health-related data and other indicator activities easily accessible for syndromic surveillance of various datasets. At the heart of disease surveillance lies the clustering algorithm, which groups data with similar characteristics (spatial, temporal, or both) to uncover significant disease outbreak. Despite these developments, there is a lack of updated reviews of trends and modelling options in cluster detection algorithms. Objective: Our purpose was to systematically review practically implemented disease surveillance clustering algorithms relating to temporal, spatial, and spatiotemporal clustering mechanisms for their usage and performance efficacies, and to develop an efficient cluster detection mechanism framework. Methods: We conducted a systematic review exploring Google Scholar, ScienceDirect, PubMed, IEEE Xplore, ACM Digital Library, and Scopus. Between January and March 2018, we conducted the literature search for articles published to date in English in peer-reviewed journals. The main eligibility criteria were studies that (1) examined a practically implemented syndromic surveillance system with cluster detection mechanisms, including over-the-counter medication, school and work absenteeism, and disease surveillance relating to the presymptomatic stage; and (2) focused on surveillance of infectious diseases. We identified relevant articles using the title, keywords, and abstracts as a preliminary filter with the inclusion criteria, and then conducted a full-text review of the relevant articles. We then developed a framework for cluster detection mechanisms for various syndromic surveillance systems based on the review. Results: The search identified a total of 5936 articles. Removal of duplicates resulted in 5839 articles. After an initial review of the titles, we excluded 4165 articles, with 1674 remaining. Reading of abstracts and keywords eliminated 1549 further records. An in-depth assessment of the remaining 125 articles resulted in a total of 27 articles for inclusion in the review. The result indicated that various clustering and aberration detection algorithms have been empirically implemented or assessed with real data and tested. Based on the findings of the review, we subsequently developed a framework to include data processing, clustering and aberration detection, visualization, and alerts and alarms. Conclusions: The review identified various algorithms that have been practically implemented and tested. These results might foster the development of effective and efficient cluster detection mechanisms in empirical syndromic surveillance systems relating to a broad spectrum of space, time, or space-time. ", doi="10.2196/11512", url="http://publichealth.jmir.org/2020/2/e11512/", url="http://www.ncbi.nlm.nih.gov/pubmed/32357126" } @Article{info:doi/10.2196/15407, author="Fernandes, Chrystinne and Miles, Simon and Lucena, Pereira Carlos Jos{\'e}", title="Detecting False Alarms by Analyzing Alarm-Context Information: Algorithm Development and Validation", journal="JMIR Med Inform", year="2020", month="May", day="20", volume="8", number="5", pages="e15407", keywords="alarm fatigue", keywords="alarm safety", keywords="false alarms", keywords="eHealth systems", keywords="remote patient monitoring", keywords="notification", keywords="reasoning", keywords="sensors", abstract="Background: Although alarm safety is a critical issue that needs to be addressed to improve patient care, hospitals have not given serious consideration about how their staff should be using, setting, and responding to clinical alarms. Studies have indicated that 80\%-99\% of alarms in hospital units are false or clinically insignificant and do not represent real danger for patients, leading caregivers to miss relevant alarms that might indicate significant harmful events. The lack of use of any intelligent filter to detect recurrent, irrelevant, and/or false alarms before alerting health providers can culminate in a complex and overwhelming scenario of sensory overload for the medical team, known as alarm fatigue. Objective: This paper's main goal is to propose a solution to mitigate alarm fatigue by using an automatic reasoning mechanism to decide how to calculate false alarm probability (FAP) for alarms and whether to include an indication of the FAP (ie, FAP\_LABEL) with a notification to be visualized by health care team members designed to help them prioritize which alerts they should respond to next. Methods: We present a new approach to cope with the alarm fatigue problem that uses an automatic reasoner to decide how to notify caregivers with an indication of FAP. Our reasoning algorithm calculates FAP for alerts triggered by sensors and multiparametric monitors based on statistical analysis of false alarm indicators (FAIs) in a simulated environment of an intensive care unit (ICU), where a large number of warnings can lead to alarm fatigue. Results: The main contributions described are as follows: (1) a list of FAIs we defined that can be utilized and possibly extended by other researchers, (2) a novel approach to assess the probability of a false alarm using statistical analysis of multiple inputs representing alarm-context information, and (3) a reasoning algorithm that uses alarm-context information to detect false alarms in order to decide whether to notify caregivers with an indication of FAP (ie, FAP\_LABEL) to avoid alarm fatigue. Conclusions: Experiments were conducted to demonstrate that by providing an intelligent notification system, we could decide how to identify false alarms by analyzing alarm-context information. The reasoner entity we described in this paper was able to attribute FAP values to alarms based on FAIs and to notify caregivers with a FAP\_LABEL indication without compromising patient safety. ", doi="10.2196/15407", url="http://medinform.jmir.org/2020/5/e15407/", url="http://www.ncbi.nlm.nih.gov/pubmed/32432551" } @Article{info:doi/10.2196/18402, author="Essay, Patrick and Mosier, Jarrod and Subbian, Vignesh", title="Rule-Based Cohort Definitions for Acute Respiratory Failure: Electronic Phenotyping Algorithm", journal="JMIR Med Inform", year="2020", month="Apr", day="15", volume="8", number="4", pages="e18402", keywords="computable phenotype", keywords="electronic health record", keywords="intensive care units", keywords="critical care informatics", keywords="telemedicine", keywords="respiratory", abstract="Background: Acute respiratory failure is generally treated with invasive mechanical ventilation or noninvasive respiratory support strategies. The efficacies of the various strategies are not fully understood. There is a need for accurate therapy-based phenotyping for secondary analyses of electronic health record data to answer research questions regarding respiratory management and outcomes with each strategy. Objective: The objective of this study was to address knowledge gaps related to ventilation therapy strategies across diverse patient populations by developing an algorithm for accurate identification of patients with acute respiratory failure. To accomplish this objective, our goal was to develop rule-based computable phenotypes for patients with acute respiratory failure using remotely monitored intensive care unit (tele-ICU) data. This approach permits analyses by ventilation strategy across broad patient populations of interest with the ability to sub-phenotype as research questions require. Methods: Tele-ICU data from ?200 hospitals were used to create a rule-based algorithm for phenotyping patients with acute respiratory failure, defined as an adult patient requiring invasive mechanical ventilation or a noninvasive strategy. The dataset spans a wide range of hospitals and ICU types across all US regions. Structured clinical data, including ventilation therapy start and stop times, medication records, and nurse and respiratory therapy charts, were used to define clinical phenotypes. All adult patients of any diagnoses with record of ventilation therapy were included. Patients were categorized by ventilation type, and analysis of event sequences using record timestamps defined each phenotype. Manual validation was performed on 5\% of patients in each phenotype. Results: We developed 7 phenotypes: (0) invasive mechanical ventilation, (1) noninvasive positive-pressure ventilation, (2) high-flow nasal insufflation, (3) noninvasive positive-pressure ventilation subsequently requiring intubation, (4) high-flow nasal insufflation subsequently requiring intubation, (5) invasive mechanical ventilation with extubation to noninvasive positive-pressure ventilation, and (6) invasive mechanical ventilation with extubation to high-flow nasal insufflation. A total of 27,734 patients met our phenotype criteria and were categorized into these ventilation subgroups. Manual validation of a random selection of 5\% of records from each phenotype resulted in a total accuracy of 88\% and a precision and recall of 0.8789 and 0.8785, respectively, across all phenotypes. Individual phenotype validation showed that the algorithm categorizes patients particularly well but has challenges with patients that require ?2 management strategies. Conclusions: Our proposed computable phenotyping algorithm for patients with acute respiratory failure effectively identifies patients for therapy-focused research regardless of admission diagnosis or comorbidities and allows for management strategy comparisons across populations of interest. ", doi="10.2196/18402", url="http://medinform.jmir.org/2020/4/e18402/", url="http://www.ncbi.nlm.nih.gov/pubmed/32293579" } @Article{info:doi/10.2196/15963, author="Wu, Yi-Ying and Huang, Tzu-Chuan and Ye, Ren-Hua and Fang, Wen-Hui and Lai, Shiue-Wei and Chang, Ping-Ying and Liu, Wei-Nung and Kuo, Tai-Yu and Lee, Cho-Hao and Tsai, Wen-Chiuan and Lin, Chin", title="A Hematologist-Level Deep Learning Algorithm (BMSNet) for Assessing the Morphologies of Single Nuclear Balls in Bone Marrow Smears: Algorithm Development", journal="JMIR Med Inform", year="2020", month="Apr", day="8", volume="8", number="4", pages="e15963", keywords="artificial intelligence", keywords="bone marrow examination", keywords="leukemia", keywords="myelodysplastic syndrome", keywords="deep learning", abstract="Background: Bone marrow aspiration and biopsy remain the gold standard for the diagnosis of hematological diseases despite the development of flow cytometry (FCM) and molecular and gene analyses. However, the interpretation of the results is laborious and operator dependent. Furthermore, the obtained results exhibit inter- and intravariations among specialists. Therefore, it is important to develop a more objective and automated analysis system. Several deep learning models have been developed and applied in medical image analysis but not in the field of hematological histology, especially for bone marrow smear applications. Objective: The aim of this study was to develop a deep learning model (BMSNet) for assisting hematologists in the interpretation of bone marrow smears for faster diagnosis and disease monitoring. Methods: From January 1, 2016, to December 31, 2018, 122 bone marrow smears were photographed and divided into a development cohort (N=42), a validation cohort (N=70), and a competition cohort (N=10). The development cohort included 17,319 annotated cells from 291 high-resolution photos. In total, 20 photos were taken for each patient in the validation cohort and the competition cohort. This study included eight annotation categories: erythroid, blasts, myeloid, lymphoid, plasma cells, monocyte, megakaryocyte, and unable to identify. BMSNet is a convolutional neural network with the YOLO v3 architecture, which detects and classifies single cells in a single model. Six visiting staff members participated in a human-machine competition, and the results from the FCM were regarded as the ground truth. Results: In the development cohort, according to 6-fold cross-validation, the average precision of the bounding box prediction without consideration of the classification is 67.4\%. After removing the bounding box prediction error, the precision and recall of BMSNet were similar to those of the hematologists in most categories. In detecting more than 5\% of blasts in the validation cohort, the area under the curve (AUC) of BMSNet (0.948) was higher than the AUC of the hematologists (0.929) but lower than the AUC of the pathologists (0.985). In detecting more than 20\% of blasts, the AUCs of the hematologists (0.981) and pathologists (0.980) were similar and were higher than the AUC of BMSNet (0.942). Further analysis showed that the performance difference could be attributed to the myelodysplastic syndrome cases. In the competition cohort, the mean value of the correlations between BMSNet and FCM was 0.960, and the mean values of the correlations between the visiting staff and FCM ranged between 0.952 and 0.990. Conclusions: Our deep learning model can assist hematologists in interpreting bone marrow smears by facilitating and accelerating the detection of hematopoietic cells. However, a detailed morphological interpretation still requires trained hematologists. ", doi="10.2196/15963", url="http://medinform.jmir.org/2020/4/e15963/", url="http://www.ncbi.nlm.nih.gov/pubmed/32267237" } @Article{info:doi/10.2196/16042, author="Pfaff, R. Emily and Crosskey, Miles and Morton, Kenneth and Krishnamurthy, Ashok", title="Clinical Annotation Research Kit (CLARK): Computable Phenotyping Using Machine Learning", journal="JMIR Med Inform", year="2020", month="Jan", day="24", volume="8", number="1", pages="e16042", keywords="natural language processing", keywords="machine learning", keywords="electronic health records", doi="10.2196/16042", url="http://medinform.jmir.org/2020/1/e16042/", url="http://www.ncbi.nlm.nih.gov/pubmed/32012059" } @Article{info:doi/10.2196/16487, author="Oliveira, R. Carlos and Avni-Singer, Lital and Badaro, Geovanna and Sullivan, L. Erin and Sheth, S. Sangini and Shapiro, D. Eugene and Niccolai, M. Linda", title="Feasibility and Accuracy of a Computer-Assisted Self-Interviewing Instrument to Ascertain Prior Immunization With Human Papillomavirus Vaccine by Self-Report: Cross-Sectional Analysis", journal="JMIR Med Inform", year="2020", month="Jan", day="22", volume="8", number="1", pages="e16487", keywords="human papillomavirus vaccine", keywords="self-report", keywords="accuracy", keywords="computer-assisted self-interviewing", abstract="Background: Ascertaining history of prior immunization with human papillomavirus (HPV) vaccine can be challenging and resource-intensive. Computer-assisted self-interviewing instruments have the potential to address some of the challenges of self-reporting, and may also reduce the time, costs, and efforts associated with ascertaining immunization status. Objective: This study assesses both the feasibility and the accuracy of a computer-assisted self-interviewing instrument to ascertain a patient's history of immunization with the HPV vaccine. Methods: We developed both a survey and a Web-based data collection system using computer-assisted self-interviewing to ascertain self-reported HPV vaccine immunization history. We implemented the instrument in a sample of adult women enrolled in an ongoing study of the HPV vaccine. Vaccine records from prior sources of care were reviewed to verify reported immunization history. Results: Among the 312 participants who provided HPV vaccine immunization history by self-report, almost all (99\%) were able to do so using the computer-assisted self-interviewing instrument. The median survey completion time was 10 minutes (IQR 7-17). The accuracy of self-report was 84\%, sensitivity was 89\%, specificity was 80\%, and the negative predictive value was 92\%. Conclusions: We found that it is feasible to collect a history of immunization with the HPV vaccine using a computer-assisted self-interviewing instrument. This approach is likely to be acceptable to adult women and is reasonably accurate in a clinical research setting. ", doi="10.2196/16487", url="http://medinform.jmir.org/2020/1/e16487/", url="http://www.ncbi.nlm.nih.gov/pubmed/32012073" } @Article{info:doi/10.2196/15980, author="Spasic, Irena and Krzeminski, Dominik and Corcoran, Padraig and Balinsky, Alexander", title="Cohort Selection for Clinical Trials From Longitudinal Patient Records: Text Mining Approach", journal="JMIR Med Inform", year="2019", month="Oct", day="31", volume="7", number="4", pages="e15980", keywords="natural language processing", keywords="machine learning", keywords="electronic medical records", keywords="clinical trial", keywords="eligibility determination", abstract="Background: Clinical trials are an important step in introducing new interventions into clinical practice by generating data on their safety and efficacy. Clinical trials need to ensure that participants are similar so that the findings can be attributed to the interventions studied and not to some other factors. Therefore, each clinical trial defines eligibility criteria, which describe characteristics that must be shared by the participants. Unfortunately, the complexities of eligibility criteria may not allow them to be translated directly into readily executable database queries. Instead, they may require careful analysis of the narrative sections of medical records. Manual screening of medical records is time consuming, thus negatively affecting the timeliness of the recruitment process. Objective: Track 1 of the 2018 National Natural Language Processing Clinical Challenge focused on the task of cohort selection for clinical trials, aiming to answer the following question: Can natural language processing be applied to narrative medical records to identify patients who meet eligibility criteria for clinical trials? The task required the participating systems to analyze longitudinal patient records to determine if the corresponding patients met the given eligibility criteria. We aimed to describe a system developed to address this task. Methods: Our system consisted of 13 classifiers, one for each eligibility criterion. All classifiers used a bag-of-words document representation model. To prevent the loss of relevant contextual information associated with such representation, a pattern-matching approach was used to extract context-sensitive features. They were embedded back into the text as lexically distinguishable tokens, which were consequently featured in the bag-of-words representation. Supervised machine learning was chosen wherever a sufficient number of both positive and negative instances was available to learn from. A rule-based approach focusing on a small set of relevant features was chosen for the remaining criteria. Results: The system was evaluated using microaveraged F measure. Overall, 4 machine algorithms, including support vector machine, logistic regression, na{\"i}ve Bayesian classifier, and gradient tree boosting (GTB), were evaluated on the training data using 10--fold cross-validation. Overall, GTB demonstrated the most consistent performance. Its performance peaked when oversampling was used to balance the training data. The final evaluation was performed on previously unseen test data. On average, the F measure of 89.04\% was comparable to 3 of the top ranked performances in the shared task (91.11\%, 90.28\%, and 90.21\%). With an F measure of 88.14\%, we significantly outperformed these systems (81.03\%, 78.50\%, and 70.81\%) in identifying patients with advanced coronary artery disease. Conclusions: The holdout evaluation provides evidence that our system was able to identify eligible patients for the given clinical trial with high accuracy. Our approach demonstrates how rule-based knowledge infusion can improve the performance of machine learning algorithms even when trained on a relatively small dataset. ", doi="10.2196/15980", url="http://medinform.jmir.org/2019/4/e15980/", url="http://www.ncbi.nlm.nih.gov/pubmed/31674914" } @Article{info:doi/10.2196/15794, author="Chartash, David and Paek, Hyung and Dziura, D. James and Ross, K. Bill and Nogee, P. Daniel and Boccio, Eric and Hines, Cory and Schott, M. Aaron and Jeffery, M. Molly and Patel, D. Mehul and Platts-Mills, F. Timothy and Ahmed, Osama and Brandt, Cynthia and Couturier, Katherine and Melnick, Edward", title="Identifying Opioid Use Disorder in the Emergency Department: Multi-System Electronic Health Record--Based Computable Phenotype Derivation and Validation Study", journal="JMIR Med Inform", year="2019", month="Oct", day="31", volume="7", number="4", pages="e15794", keywords="electronic health records", keywords="emergency medicine", keywords="algorithms", keywords="phenotype", keywords="opioid-related disorders", abstract="Background: Deploying accurate computable phenotypes in pragmatic trials requires a trade-off between precise and clinically sensical variable selection. In particular, evaluating the medical encounter to assess a pattern leading to clinically significant impairment or distress indicative of disease is a difficult modeling challenge for the emergency department. Objective: This study aimed to derive and validate an electronic health record--based computable phenotype to identify emergency department patients with opioid use disorder using physician chart review as a reference standard. Methods: A two-algorithm computable phenotype was developed and evaluated using structured clinical data across 13 emergency departments in two large health care systems. Algorithm 1 combined clinician and billing codes. Algorithm 2 used chief complaint structured data suggestive of opioid use disorder. To evaluate the algorithms in both internal and external validation phases, two emergency medicine physicians, with a third acting as adjudicator, reviewed a pragmatic sample of 231 charts: 125 internal validation (75 positive and 50 negative), 106 external validation (56 positive and 50 negative). Results: Cohen kappa, measuring agreement between reviewers, for the internal and external validation cohorts was 0.95 and 0.93, respectively. In the internal validation phase, Algorithm 1 had a positive predictive value (PPV) of 0.96 (95\% CI 0.863-0.995) and a negative predictive value (NPV) of 0.98 (95\% CI 0.893-0.999), and Algorithm 2 had a PPV of 0.8 (95\% CI 0.593-0.932) and an NPV of 1.0 (one-sided 97.5\% CI 0.863-1). In the external validation phase, the phenotype had a PPV of 0.95 (95\% CI 0.851-0.989) and an NPV of 0.92 (95\% CI 0.807-0.978). Conclusions: This phenotype detected emergency department patients with opioid use disorder with high predictive values and reliability. Its algorithms were transportable across health care systems and have potential value for both clinical and research purposes. ", doi="10.2196/15794", url="http://medinform.jmir.org/2019/4/e15794/", url="http://www.ncbi.nlm.nih.gov/pubmed/31674913" } @Article{info:doi/10.2196/14083, author="Kim, Mina and Shin, Soo-Yong and Kang, Mira and Yi, Byoung-Kee and Chang, Kyung Dong", title="Developing a Standardization Algorithm for Categorical Laboratory Tests for Clinical Big Data Research: Retrospective Study", journal="JMIR Med Inform", year="2019", month="Aug", day="29", volume="7", number="3", pages="e14083", keywords="standardization", keywords="electronic health records", keywords="data quality", keywords="data science", abstract="Background: Data standardization is essential in electronic health records (EHRs) for both clinical practice and retrospective research. However, it is still not easy to standardize EHR data because of nonidentical duplicates, typographical errors, or inconsistencies. To overcome this drawback, standardization efforts have been undertaken for collecting data in a standardized format as well as for curating the stored data in EHRs. To perform clinical big data research, the stored data in EHR should be standardized, starting from laboratory results, given their importance. However, most of the previous efforts have been based on labor-intensive manual methods. Objective: We aimed to develop an automatic standardization method for eliminating the noises of categorical laboratory data, grouping, and mapping of cleaned data using standard terminology. Methods: We developed a method called standardization algorithm for laboratory test--categorical result (SALT-C) that can process categorical laboratory data, such as pos +, 250 4+ (urinalysis results), and reddish (urinalysis color results). SALT-C consists of five steps. First, it applies data cleaning rules to categorical laboratory data. Second, it categorizes the cleaned data into 5 predefined groups (urine color, urine dipstick, blood type, presence-finding, and pathogenesis tests). Third, all data in each group are vectorized. Fourth, similarity is calculated between the vectors of data and those of each value in the predefined value sets. Finally, the value closest to the data is assigned. Results: The performance of SALT-C was validated using 59,213,696 data points (167,938 unique values) generated over 23 years from a tertiary hospital. Apart from the data whose original meaning could not be interpreted correctly (eg, ** and \_^), SALT-C mapped unique raw data to the correct reference value for each group with accuracy of 97.6\% (123/126; urine color tests), 97.5\% (198/203; (urine dipstick tests), 95\% (53/56; blood type tests), 99.68\% (162,291/162,805; presence-finding tests), and 99.61\% (4643/4661; pathogenesis tests). Conclusions: The proposed SALT-C successfully standardized the categorical laboratory test results with high reliability. SALT-C can be beneficial for clinical big data research by reducing laborious manual standardization efforts. ", doi="10.2196/14083", url="http://medinform.jmir.org/2019/3/e14083/", url="http://www.ncbi.nlm.nih.gov/pubmed/31469075" } @Article{info:doi/10.2196/12770, author="Kwon, Soonil and Hong, Joonki and Choi, Eue-Keun and Lee, Euijae and Hostallero, Earl David and Kang, Ju Wan and Lee, Byunghwan and Jeong, Eui-Rim and Koo, Bon-Kwon and Oh, Seil and Yi, Yung", title="Deep Learning Approaches to Detect Atrial Fibrillation Using Photoplethysmographic Signals: Algorithms Development Study", journal="JMIR Mhealth Uhealth", year="2019", month="Jun", day="6", volume="7", number="6", pages="e12770", keywords="atrial fibrillation", keywords="deep learning", keywords="photoplethysmography", keywords="pulse oximetry", keywords="diagnosis", abstract="Background: Wearable devices have evolved as screening tools for atrial fibrillation (AF). A photoplethysmographic (PPG) AF detection algorithm was developed and applied to a convenient smartphone-based device with good accuracy. However, patients with paroxysmal AF frequently exhibit premature atrial complexes (PACs), which result in poor unmanned AF detection, mainly because of rule-based or handcrafted machine learning techniques that are limited in terms of diagnostic accuracy and reliability. Objective: This study aimed to develop deep learning (DL) classifiers using PPG data to detect AF from the sinus rhythm (SR) in the presence of PACs after successful cardioversion. Methods: We examined 75 patients with AF who underwent successful elective direct-current cardioversion (DCC). Electrocardiogram and pulse oximetry data over a 15-min period were obtained before and after DCC and labeled as AF or SR. A 1-dimensional convolutional neural network (1D-CNN) and recurrent neural network (RNN) were chosen as the 2 DL architectures. The PAC indicator estimated the burden of PACs on the PPG dataset. We defined a metric called the confidence level (CL) of AF or SR diagnosis and compared the CLs of true and false diagnoses. We also compared the diagnostic performance of 1D-CNN and RNN with previously developed AF detectors (support vector machine with root-mean-square of successive difference of RR intervals and Shannon entropy, autocorrelation, and ensemble by combining 2 previous methods) using 10 5-fold cross-validation processes. Results: Among the 14,298 training samples containing PPG data, 7157 samples were obtained during the post-DCC period. The PAC indicator estimated 29.79\% (2132/7157) of post-DCC samples had PACs. The diagnostic accuracy of AF versus SR was 99.32\% (70,925/71,410) versus 95.85\% (68,602/71,570) in 1D-CNN and 98.27\% (70,176/71,410) versus 96.04\% (68,736/71,570) in RNN methods. The area under receiver operating characteristic curves of the 2 DL classifiers was 0.998 (95\% CI 0.995-1.000) for 1D-CNN and 0.996 (95\% CI 0.993-0.998) for RNN, which were significantly higher than other AF detectors (P<.001). If we assumed that the dataset could emulate a sufficient number of patients in training, both DL classifiers improved their diagnostic performances even further especially for the samples with a high burden of PACs. The average CLs for true versus false classification were 98.56\% versus 78.75\% for 1D-CNN and 98.37\% versus 82.57\% for RNN (P<.001 for all cases). Conclusions: New DL classifiers could detect AF using PPG monitoring signals with high diagnostic accuracy even with frequent PACs and could outperform previously developed AF detectors. Although diagnostic performance decreased as the burden of PACs increased, performance improved when samples from more patients were trained. Moreover, the reliability of the diagnosis could be indicated by the CL. Wearable devices sensing PPG signals with DL classifiers should be validated as tools to screen for AF. ", doi="10.2196/12770", url="http://mhealth.jmir.org/2019/6/e12770/", url="http://www.ncbi.nlm.nih.gov/pubmed/31199302" } @Article{info:doi/10.2196/12843, author="Luz, Friedemann Christian and Berends, S. Matthijs and Dik, H. Jan-Willem and Lokate, Mari{\"e}tte and Pulcini, C{\'e}line and Glasner, Corinna and Sinha, Bhanu", title="Rapid Analysis of Diagnostic and Antimicrobial Patterns in R (RadaR): Interactive Open-Source Software App for Infection Management and Antimicrobial Stewardship", journal="J Med Internet Res", year="2019", month="May", day="24", volume="21", number="6", pages="e12843", keywords="antimicrobial stewardship", keywords="software", keywords="hospital records", keywords="data visualization", keywords="infection, medical informatics applications", abstract="Background: Analyzing process and outcome measures for all patients diagnosed with an infection in a hospital, including those suspected of having an infection, requires not only processing of large datasets but also accounting for numerous patient parameters and guidelines. Substantial technical expertise is required to conduct such rapid, reproducible, and adaptable analyses; however, such analyses can yield valuable insights for infection management and antimicrobial stewardship (AMS) teams. Objective: The aim of this study was to present the design, development, and testing of RadaR (Rapid analysis of diagnostic and antimicrobial patterns in R), a software app for infection management, and to ascertain whether RadaR can facilitate user-friendly, intuitive, and interactive analyses of large datasets in the absence of prior in-depth software or programming knowledge. Methods: RadaR was built in the open-source programming language R, using Shiny, an additional package to implement Web-app frameworks in R. It was developed in the context of a 1339-bed academic tertiary referral hospital to handle data of more than 180,000 admissions. Results: RadaR enabled visualization of analytical graphs and statistical summaries in a rapid and interactive manner. It allowed users to filter patient groups by 17 different criteria and investigate antimicrobial use, microbiological diagnostic use and results including antimicrobial resistance, and outcome in length of stay. Furthermore, with RadaR, results can be stratified and grouped to compare defined patient groups on the basis of individual patient features. Conclusions: AMS teams can use RadaR to identify areas within their institutions that might benefit from increased support and targeted interventions. It can be used for the assessment of diagnostic and therapeutic procedures and for visualizing and communicating analyses. RadaR demonstrated the feasibility of developing software tools for use in infection management and for AMS teams in an open-source approach, thus making it free to use and adaptable to different settings. ", doi="10.2196/12843", url="https://www.jmir.org/2019/6/e12843/", url="http://www.ncbi.nlm.nih.gov/pubmed/31199325" } @Article{info:doi/10.2196/12013, author="Thorpe, Rosemary Julia and Forchhammer, Hysse Birgitte and Maier, M. Anja", title="Development of a Sensor-Based Behavioral Monitoring Solution to Support Dementia Care", journal="JMIR Mhealth Uhealth", year="2019", month="May", day="30", volume="7", number="6", pages="e12013", keywords="ambulatory monitoring", keywords="patient-centered care", keywords="physical activity", keywords="dementia", keywords="wearable electronics devices", keywords="activity trackers", keywords="mHealth", keywords="human behavior", keywords="system design", abstract="Background: Mobile and wearable technology presents exciting opportunities for monitoring behavior using widely available sensor data. This could support clinical research and practice aimed at improving quality of life among the growing number of people with dementia. However, it requires suitable tools for measuring behavior in a natural real-life setting that can be easily implemented by others. Objective: The objectives of this study were to develop and test a set of algorithms for measuring mobility and activity and to describe a technical setup for collecting the sensor data that these algorithms require using off-the-shelf devices. Methods: A mobility measurement module was developed to extract travel trajectories and home location from raw GPS (global positioning system) data and to use this information to calculate a set of spatial, temporal, and count-based mobility metrics. Activity measurement comprises activity bout extraction from recognized activity data and daily step counts. Location, activity, and step count data were collected using smartwatches and mobile phones, relying on open-source resources as far as possible for accessing data from device sensors. The behavioral monitoring solution was evaluated among 5 healthy subjects who simultaneously logged their movements for 1 week. Results: The evaluation showed that the behavioral monitoring solution successfully measures travel trajectories and mobility metrics from location data and extracts multimodal activity bouts during travel between locations. While step count could be used to indicate overall daily activity level, a concern was raised regarding device validity for step count measurement, which was substantially higher from the smartwatches than the mobile phones. Conclusions: This study contributes to clinical research and practice by providing a comprehensive behavioral monitoring solution for use in a real-life setting that can be replicated for a range of applications where knowledge about individual mobility and activity is relevant. ", doi="10.2196/12013", url="https://mhealth.jmir.org/2019/6/e12013/", url="http://www.ncbi.nlm.nih.gov/pubmed/31199304" } @Article{info:doi/10.2196/12577, author="Tang, Chunlei and Sun, Huajun and Xiong, Yun and Yang, Jiahong and Vitale, Christopher and Ruan, Lu and Ai, Angela and Yu, Guangjun and Ma, Jing and Bates, David", title="Medication Use for Childhood Pneumonia at a Children's Hospital in Shanghai, China: Analysis of Pattern Mining Algorithms", journal="JMIR Med Inform", year="2019", month="Mar", day="22", volume="7", number="1", pages="e12577", keywords="drug therapy", keywords="combination", keywords="computer-assisted", keywords="pattern recognition", keywords="data mining", keywords="precision medicine", keywords="childhood pneumonia", keywords="hospital", abstract="Background: Pattern mining utilizes multiple algorithms to explore objective and sometimes unexpected patterns in real-world data. This technique could be applied to electronic medical record data mining; however, it first requires a careful clinical assessment and validation. Objective: The aim of this study was to examine the use of pattern mining techniques on a large clinical dataset to detect treatment and medication use patterns for childhood pneumonia. Methods: We applied 3 pattern mining algorithms to 680,138 medication administration records from 30,512 childhood inpatients with diagnosis of pneumonia during a 6-year period at a children's hospital in China. Patients' ages ranged from 0 to 17 years, where 37.53\% (11,453/30,512) were 0 to 3 months old, 86.55\% (26,408/30,512) were under 5 years, 60.37\% (18,419/30,512) were male, and 60.10\% (18,338/30,512) had a hospital stay of 9 to 15 days. We used the FP-Growth, PrefixSpan, and USpan pattern mining algorithms. The first 2 are more traditional methods of pattern mining and mine a complete set of frequent medication use patterns. PrefixSpan also incorporates an administration sequence. The newer USpan method considers medication utility, defined by the dose, frequency, and timing of use of the 652 individual medications in the dataset. Together, these 3 methods identified the top 10 patterns from 6 age groups, forming a total of 180 distinct medication combinations. These medications encompassed the top 40 (73.66\%, 500,982/680,138) most frequently used medications. These patterns were then evaluated by subject matter experts to summarize 5 medication use and 2 treatment patterns. Results: We identified 5 medication use patterns: (1) antiasthmatics and expectorants and corticosteroids, (2) antibiotics and (antiasthmatics or expectorants or corticosteroids), (3) third-generation cephalosporin antibiotics with (or followed by) traditional antibiotics, (4) antibiotics and (medications for enteritis or skin diseases), and (5) (antiasthmatics or expectorants or corticosteroids) and (medications for enteritis or skin diseases). We also identified 2 frequent treatment patterns: (1) 42.89\% (291,701/680,138) of specific medication administration records were of intravenous therapy with antibiotics, diluents, and nutritional supplements and (2) 11.53\% (78,390/680,138) were of various combinations of inhalation of antiasthmatics, expectorants, or corticosteroids. Fleiss kappa for the subject experts' evaluation was 0.693, indicating moderate agreement. Conclusions: Utilizing a pattern mining approach, we summarized 5 medication use patterns and 2 treatment patterns. These warrant further investigation. ", doi="10.2196/12577", url="http://medinform.jmir.org/2019/1/e12577/", url="http://www.ncbi.nlm.nih.gov/pubmed/30900998" } @Article{info:doi/10.2196/11728, author="Pan, Liyan and Liu, Guangjian and Mao, Xiaojian and Li, Huixian and Zhang, Jiexin and Liang, Huiying and Li, Xiuzhen", title="Development of Prediction Models Using Machine Learning Algorithms for Girls with Suspected Central Precocious Puberty: Retrospective Study", journal="JMIR Med Inform", year="2019", month="Feb", day="12", volume="7", number="1", pages="e11728", keywords="central precocious puberty", keywords="GnRHa-stimulation test", keywords="machine learning", keywords="prediction model", abstract="Background: Central precocious puberty (CPP) in girls seriously affects their physical and mental development in childhood. The method of diagnosis---gonadotropin-releasing hormone (GnRH)--stimulation test or GnRH analogue (GnRHa)--stimulation test---is expensive and makes patients uncomfortable due to the need for repeated blood sampling. Objective: We aimed to combine multiple CPP--related features and construct machine learning models to predict response to the GnRHa-stimulation test. Methods: In this retrospective study, we analyzed clinical and laboratory data of 1757 girls who underwent a GnRHa test in order to develop XGBoost and random forest classifiers for prediction of response to the GnRHa test. The local interpretable model-agnostic explanations (LIME) algorithm was used with the black-box classifiers to increase their interpretability. We measured sensitivity, specificity, and area under receiver operating characteristic (AUC) of the models. Results: Both the XGBoost and random forest models achieved good performance in distinguishing between positive and negative responses, with the AUC ranging from 0.88 to 0.90, sensitivity ranging from 77.91\% to 77.94\%, and specificity ranging from 84.32\% to 87.66\%. Basal serum luteinizing hormone, follicle-stimulating hormone, and insulin-like growth factor-I levels were found to be the three most important factors. In the interpretable models of LIME, the abovementioned variables made high contributions to the prediction probability. Conclusions: The prediction models we developed can help diagnose CPP and may be used as a prescreening tool before the GnRHa-stimulation test. ", doi="10.2196/11728", url="http://medinform.jmir.org/2019/1/e11728/", url="http://www.ncbi.nlm.nih.gov/pubmed/30747712" } @Article{info:doi/10.2196/12539, author="Vidal-Alaball, Josep and Royo Fibla, D{\'i}dac and Zapata, A. Miguel and Marin-Gomez, X. Francesc and Solans Fernandez, Oscar", title="Artificial Intelligence for the Detection of Diabetic Retinopathy in Primary Care: Protocol for Algorithm Development", journal="JMIR Res Protoc", year="2019", month="Feb", day="01", volume="8", number="2", pages="e12539", keywords="diabetes mellitus", keywords="diabetic retinopathy", keywords="fundus oculi", keywords="artificial intelligence", keywords="computer assisted diagnosis", keywords="neural network computer", abstract="Background: Diabetic retinopathy (DR) is one of the most important causes of blindness worldwide, especially in developed countries. In diabetic patients, periodic examination of the back of the eye using a nonmydriatic camera has been widely demonstrated to be an effective system to control and prevent the onset of DR. Convolutional neural networks have been used to detect DR, achieving very high sensitivities and specificities. Objective: The objective of this is paper was to develop an artificial intelligence (AI) algorithm for the detection of signs of DR in diabetic patients and to scientifically validate the algorithm to be used as a screening tool in primary care. Methods: Under this project, 2 studies will be conducted in a concomitant way: (1) Development of an algorithm with AI to detect signs of DR in patients with diabetes and (2) A prospective study comparing the diagnostic capacity of the AI algorithm with respect to the actual system of family physicians evaluating the images. The standard reference to compare with will be a blinded double reading conducted by retina specialists. For the development of the AI algorithm, different iterations and workouts will be performed on the same set of data. Before starting each new workout, the strategy of dividing the set date into 2 groups will be used randomly. A group with 80\% of the images will be used during the training (training dataset), and the remaining 20\% images will be used to validate the results (validation dataset) of each cycle (epoch). During the prospective study, true-positive, true-negative, false-positive, and false-negative values will be calculated again. From here, we will obtain the resulting confusion matrix and other indicators to measure the performance of the algorithm. Results: Cession of the images began at the end of 2018. The development of the AI algorithm is calculated to last about 3 to 4 months. Inclusion of patients in the cohort will start in early 2019 and is expected to last 3 to 4 months. Preliminary results are expected to be published by the end of 2019. Conclusions: The study will allow the development of an algorithm based on AI that can demonstrate an equal or superior performance, and that constitutes a complement or an alternative, to the current screening of DR in diabetic patients. International Registered Report Identifier (IRRID): PRR1-10.2196/12539 ", doi="10.2196/12539", url="http://www.researchprotocols.org/2019/2/e12539/", url="http://www.ncbi.nlm.nih.gov/pubmed/30707105" } @Article{info:doi/10.2196/10295, author="Newe, Axel and Becker, Linda", title="Three-Dimensional Portable Document Format (3D PDF) in Clinical Communication and Biomedical Sciences: Systematic Review of Applications, Tools, and Protocols", journal="JMIR Med Inform", year="2018", month="Aug", day="07", volume="6", number="3", pages="e10295", keywords="3D PDF", keywords="3D visualization", keywords="interactive", keywords="clinical communication", keywords="biomedical science", keywords="tools", keywords="protocols", keywords="apps", keywords="online data sharing", keywords="scholarly publishing", keywords="electronic publishing", abstract="Background: The Portable Document Format (PDF) is the standard file format for the communication of biomedical information via the internet and for electronic scholarly publishing. Although PDF allows for the embedding of three-dimensional (3D) objects and although this technology has great potential for the communication of such data, it is not broadly used by the scientific community or by clinicians. Objective: The objective of this review was to provide an overview of existing publications that apply 3D PDF technology and the protocols and tools for the creation of model files and 3D PDFs for scholarly purposes to demonstrate the possibilities and the ways to use this technology. Methods: A systematic literature review was performed using PubMed and Google Scholar. Articles searched for were in English, peer-reviewed with biomedical reference, published since 2005 in a journal or presented at a conference or scientific meeting. Ineligible articles were removed after screening. The found literature was categorized into articles that (1) applied 3D PDF for visualization, (2) showed ways to use 3D PDF, and (3) provided tools or protocols for the creation of 3D PDFs or necessary models. Finally, the latter category was analyzed in detail to provide an overview of the state of the art. Results: The search retrieved a total of 902 items. Screening identified 200 in-scope publications, 13 covering the use of 3D PDF for medical purposes. Only one article described a clinical routine use case; all others were pure research articles. The disciplines that were covered beside medicine were many. In most cases, either animal or human anatomies were visualized. A method, protocol, software, library, or other tool for the creation of 3D PDFs or model files was described in 19 articles. Most of these tools required advanced programming skills and/or the installation of further software packages. Only one software application presented an all-in-one solution with a graphical user interface. Conclusions: The use of 3D PDF for visualization purposes in clinical communication and in biomedical publications is still not in common use, although both the necessary technique and suitable tools are available, and there are many arguments in favor of this technique. The potential of 3D PDF usage should be disseminated in the clinical and biomedical community. Furthermore, easy-to-use, standalone, and free-of-charge software tools for the creation of 3D PDFs should be developed. ", doi="10.2196/10295", url="http://medinform.jmir.org/2018/3/e10295/", url="http://www.ncbi.nlm.nih.gov/pubmed/30087092" } @Article{info:doi/10.2196/medinform.7992, author="Shachar, Netta and Mitelpunkt, Alexis and Kozlovski, Tal and Galili, Tal and Frostig, Tzviel and Brill, Barak and Marcus-Kalish, Mira and Benjamini, Yoav", title="The Importance of Nonlinear Transformations Use in Medical Data Analysis", journal="JMIR Med Inform", year="2018", month="May", day="11", volume="6", number="2", pages="e27", keywords="data mining", keywords="statistics", keywords="preprocessing", keywords="medical informatics", keywords="health informatics", keywords="big data", keywords="transformations", abstract="Background: The accumulation of data and its accessibility through easier-to-use platforms will allow data scientists and practitioners who are less sophisticated data analysts to get answers by using big data for many purposes in multiple ways. Data scientists working with medical data are aware of the importance of preprocessing, yet in many cases, the potential benefits of using nonlinear transformations is overlooked. Objective: Our aim is to present a semi-automated approach of symmetry-aiming transformations tailored for medical data analysis and its advantages. Methods: We describe 10 commonly encountered data types used in the medical field and the relevant transformations for each data type. Data from the Alzheimer's Disease Neuroimaging Initiative study, Parkinson's disease hospital cohort, and disease-simulating data were used to demonstrate the approach and its benefits. Results: Symmetry-targeted monotone transformations were applied, and the advantages gained in variance, stability, linearity, and clustering are demonstrated. An open source application implementing the described methods was developed. Both linearity of relationships and increase of stability of variability improved after applying proper nonlinear transformation. Clustering simulated nonsymmetric data gave low agreement to the generating clusters (Rand value=0.681), while capturing the original structure after applying nonlinear transformation to symmetry (Rand value=0.986). Conclusions: This work presents the use of nonlinear transformations for medical data and the importance of their semi-automated choice. Using the described approach, the data analyst increases the ability to create simpler, more robust and translational models, thereby facilitating the interpretation and implementation of the analysis by medical practitioners. Applying nonlinear transformations as part of the preprocessing is essential to the quality and interpretability of results. ", doi="10.2196/medinform.7992", url="http://medinform.jmir.org/2018/2/e27/", url="http://www.ncbi.nlm.nih.gov/pubmed/29752251" } @Article{info:doi/10.2196/mhealth.8516, author="Magistro, Daniele and Sessa, Salvatore and Kingsnorth, P. Andrew and Loveday, Adam and Simeone, Alessandro and Zecca, Massimiliano and Esliger, W. Dale", title="A Novel Algorithm for Determining the Contextual Characteristics of Movement Behaviors by Combining Accelerometer Features and Wireless Beacons: Development and Implementation", journal="JMIR Mhealth Uhealth", year="2018", month="Apr", day="20", volume="6", number="4", pages="e100", keywords="context", keywords="indoor location", keywords="activity monitor", keywords="behavior", keywords="wearable sensor", keywords="beacons/proximity", keywords="algorithm", keywords="physical activity", keywords="sedentary behavior", abstract="Background: Unfortunately, global efforts to promote ``how much'' physical activity people should be undertaking have been largely unsuccessful. Given the difficulty of achieving a sustained lifestyle behavior change, many scientists are reexamining their approaches. One such approach is to focus on understanding the context of the lifestyle behavior (ie, where, when, and with whom) with a view to identifying promising intervention targets. Objective: The aim of this study was to develop and implement an innovative algorithm to determine ``where'' physical activity occurs using proximity sensors coupled with a widely used physical activity monitor. Methods: A total of 19 Bluetooth beacons were placed in fixed locations within a multilevel, mixed-use building. In addition, 4 receiver-mode sensors were fitted to the wrists of a roving technician who moved throughout the building. The experiment was divided into 4 trials with different walking speeds and dwelling times. The data were analyzed using an original and innovative algorithm based on graph generation and Bayesian filters. Results: Linear regression models revealed significant correlations between beacon-derived location and ground-truth tracking time, with intraclass correlations suggesting a high goodness of fit (R2=.9780). The algorithm reliably predicted indoor location, and the robustness of the algorithm improved with a longer dwelling time (>100 s; error <10\%, R2=.9775). Increased error was observed for transitions between areas due to the device sampling rate, currently limited to 0.1 Hz by the manufacturer. Conclusions: This study shows that our algorithm can accurately predict the location of an individual within an indoor environment. This novel implementation of ``context sensing'' will facilitate a wealth of new research questions on promoting healthy behavior change, the optimization of patient care, and efficient health care planning (eg, patient-clinician flow, patient-clinician interaction). ", doi="10.2196/mhealth.8516", url="http://mhealth.jmir.org/2018/4/e100/", url="http://www.ncbi.nlm.nih.gov/pubmed/29678806" } @Article{info:doi/10.2196/medinform.9679, author="Basit, A. Mujeeb and Baldwin, L. Krystal and Kannan, Vaishnavi and Flahaven, L. Emily and Parks, J. Cassandra and Ott, M. Jason and Willett, L. Duwayne", title="Agile Acceptance Test--Driven Development of Clinical Decision Support Advisories: Feasibility of Using Open Source Software", journal="JMIR Med Inform", year="2018", month="Apr", day="13", volume="6", number="2", pages="e23", keywords="clinical decision support systems", keywords="electronic health records", keywords="software validation", keywords="software verification", keywords="agile methods", keywords="test driven development", abstract="Background: Moving to electronic health records (EHRs) confers substantial benefits but risks unintended consequences. Modern EHRs consist of complex software code with extensive local configurability options, which can introduce defects. Defects in clinical decision support (CDS) tools are surprisingly common. Feasible approaches to prevent and detect defects in EHR configuration, including CDS tools, are needed. In complex software systems, use of test--driven development and automated regression testing promotes reliability. Test--driven development encourages modular, testable design and expanding regression test coverage. Automated regression test suites improve software quality, providing a ``safety net'' for future software modifications. Each automated acceptance test serves multiple purposes, as requirements (prior to build), acceptance testing (on completion of build), regression testing (once live), and ``living'' design documentation. Rapid-cycle development or ``agile'' methods are being successfully applied to CDS development. The agile practice of automated test--driven development is not widely adopted, perhaps because most EHR software code is vendor-developed. However, key CDS advisory configuration design decisions and rules stored in the EHR may prove amenable to automated testing as ``executable requirements.'' Objective: We aimed to establish feasibility of acceptance test--driven development of clinical decision support advisories in a commonly used EHR, using an open source automated acceptance testing framework (FitNesse). Methods: Acceptance tests were initially constructed as spreadsheet tables to facilitate clinical review. Each table specified one aspect of the CDS advisory's expected behavior. Table contents were then imported into a test suite in FitNesse, which queried the EHR database to automate testing. Tests and corresponding CDS configuration were migrated together from the development environment to production, with tests becoming part of the production regression test suite. Results: We used test--driven development to construct a new CDS tool advising Emergency Department nurses to perform a swallowing assessment prior to administering oral medication to a patient with suspected stroke. Test tables specified desired behavior for (1) applicable clinical settings, (2) triggering action, (3) rule logic, (4) user interface, and (5) system actions in response to user input. Automated test suite results for the ``executable requirements'' are shown prior to building the CDS alert, during build, and after successful build. Conclusions: Automated acceptance test--driven development and continuous regression testing of CDS configuration in a commercial EHR proves feasible with open source software. Automated test--driven development offers one potential contribution to achieving high-reliability EHR configuration. Vetting acceptance tests with clinicians elicits their input on crucial configuration details early during initial CDS design and iteratively during rapid-cycle optimization. ", doi="10.2196/medinform.9679", url="http://medinform.jmir.org/2018/2/e23/", url="http://www.ncbi.nlm.nih.gov/pubmed/29653922" } @Article{info:doi/10.2196/medinform.7744, author="Lee, Junghye and Sun, Jimeng and Wang, Fei and Wang, Shuang and Jun, Chi-Hyuck and Jiang, Xiaoqian", title="Privacy-Preserving Patient Similarity Learning in a Federated Environment: Development and Analysis", journal="JMIR Med Inform", year="2018", month="Apr", day="13", volume="6", number="2", pages="e20", keywords="privacy", keywords="federated environment", keywords="similarity learning", keywords="hashing", keywords="homomorphic encryption", abstract="Background: There is an urgent need for the development of global analytic frameworks that can perform analyses in a privacy-preserving federated environment across multiple institutions without privacy leakage. A few studies on the topic of federated medical analysis have been conducted recently with the focus on several algorithms. However, none of them have solved similar patient matching, which is useful for applications such as cohort construction for cross-institution observational studies, disease surveillance, and clinical trials recruitment. Objective: The aim of this study was to present a privacy-preserving platform in a federated setting for patient similarity learning across institutions. Without sharing patient-level information, our model can find similar patients from one hospital to another. Methods: We proposed a federated patient hashing framework and developed a novel algorithm to learn context-specific hash codes to represent patients across institutions. The similarities between patients can be efficiently computed using the resulting hash codes of corresponding patients. To avoid security attack from reverse engineering on the model, we applied homomorphic encryption to patient similarity search in a federated setting. Results: We used sequential medical events extracted from the Multiparameter Intelligent Monitoring in Intensive Care-III database to evaluate the proposed algorithm in predicting the incidence of five diseases independently. Our algorithm achieved averaged area under the curves of 0.9154 and 0.8012 with balanced and imbalanced data, respectively, in $\kappa$-nearest neighbor with $\kappa$=3. We also confirmed privacy preservation in similarity search by using homomorphic encryption. Conclusions: The proposed algorithm can help search similar patients across institutions effectively to support federated data analysis in a privacy-preserving manner. ", doi="10.2196/medinform.7744", url="http://medinform.jmir.org/2018/2/e20/", url="http://www.ncbi.nlm.nih.gov/pubmed/29653917" } @Article{info:doi/10.2196/medinform.8240, author="Oreskovic, Michel Nicolas and Maniates, Jennifer and Weilburg, Jeffrey and Choy, Garry", title="Optimizing the Use of Electronic Health Records to Identify High-Risk Psychosocial Determinants of Health", journal="JMIR Med Inform", year="2017", month="Aug", day="14", volume="5", number="3", pages="e25", keywords="word recognition", keywords="Medicaid", keywords="psychosocial determinants of health", keywords="social determinants of health", keywords="care coordination", abstract="Background: Care coordination programs have traditionally focused on medically complex patients, identifying patients that qualify by analyzing formatted clinical data and claims data. However, not all clinically relevant data reside in claims and formatted data. Recently, there has been increasing interest in including patients with complex psychosocial determinants of health in care coordination programs. Psychosocial risk factors, including social determinants of health, mental health disorders, and substance abuse disorders, are less amenable to rapid and systematic data analyses, as these data are often not collected or stored as formatted data, and due to US Health Insurance Portability and Accountability Act (HIPAA) regulations are often not available as claims data. Objective: The objective of our study was to develop a systematic approach using word recognition software to identifying psychosocial risk factors within any part of a patient's electronic health record (EHR). Methods: We used QPID (Queriable Patient Inference Dossier), an ontology-driven word recognition software, to scan adult patients' EHRs to identify terms predicting a high-risk patient suitable to be followed in a care coordination program in Massachusetts, USA. Search terms identified high-risk conditions in patients known to be enrolled in a care coordination program, and were then tested against control patients. We calculated precision, recall, and balanced F-measure for the search terms. Results: We identified 22 EHR-available search terms to define psychosocial high-risk status; the presence of 9 or more of these terms predicted that a patient would meet inclusion criteria for a care coordination program. Precision was .80, recall .98, and balanced F-measure .88 for the identified terms. For adult patients insured by Medicaid and enrolled in the program, a mean of 14 terms (interquartile range [IQR] 11-18) were present as identified by the search tool, ranging from 2 to 22 terms. For patients enrolled in the program but not insured by Medicaid, a mean of 6 terms (IQR 3-8) were present as identified by the search tool, ranging from 1 to 21. Conclusions: Selected informatics tools such as word recognition software can be leveraged to improve health care delivery, such as an EHR-based protocol that identifies psychosocially complex patients eligible for enrollment in a care coordination program. ", doi="10.2196/medinform.8240", url="http://medinform.jmir.org/2017/3/e25/", url="http://www.ncbi.nlm.nih.gov/pubmed/28807893" } @Article{info:doi/10.2196/medinform.6808, author="Kaiser, Tim and Laireiter, Rupert Anton", title="DynAMo: A Modular Platform for Monitoring Process, Outcome, and Algorithm-Based Treatment Planning in Psychotherapy", journal="JMIR Med Inform", year="2017", month="Jul", day="20", volume="5", number="3", pages="e20", keywords="health information management", keywords="mental health", keywords="mental disorders", keywords="psychotherapeutic processes", keywords="algorithms", abstract="Background: In recent years, the assessment of mental disorders has become more and more personalized. Modern advancements such as Internet-enabled mobile phones and increased computing capacity make it possible to tap sources of information that have long been unavailable to mental health practitioners. Objective: Software packages that combine algorithm-based treatment planning, process monitoring, and outcome monitoring are scarce. The objective of this study was to assess whether the DynAMo Web application can fill this gap by providing a software solution that can be used by both researchers to conduct state-of-the-art psychotherapy process research and clinicians to plan treatments and monitor psychotherapeutic processes. Methods: In this paper, we report on the current state of a Web application that can be used for assessing the temporal structure of mental disorders using information on their temporal and synchronous associations. A treatment planning algorithm automatically interprets the data and delivers priority scores of symptoms to practitioners. The application is also capable of monitoring psychotherapeutic processes during therapy and of monitoring treatment outcomes. This application was developed using the R programming language (R Core Team, Vienna) and the Shiny Web application framework (RStudio, Inc, Boston). It is made entirely from open-source software packages and thus is easily extensible. Results: The capabilities of the proposed application are demonstrated. Case illustrations are provided to exemplify its usefulness in clinical practice. Conclusions: With the broad availability of Internet-enabled mobile phones and similar devices, collecting data on psychopathology and psychotherapeutic processes has become easier than ever. The proposed application is a valuable tool for capturing, processing, and visualizing these data. The combination of dynamic assessment and process- and outcome monitoring has the potential to improve the efficacy and effectiveness of psychotherapy. ", doi="10.2196/medinform.6808", url="http://medinform.jmir.org/2017/3/e20/", url="http://www.ncbi.nlm.nih.gov/pubmed/28729233" } @Article{info:doi/10.2196/medinform.6693, author="Fernandes, Oliveira Chrystinne and Lucena, De Carlos Jos{\'e} Pereira", title="A Software Framework for Remote Patient Monitoring by Using Multi-Agent Systems Support", journal="JMIR Med Inform", year="2017", month="Mar", day="27", volume="5", number="1", pages="e9", keywords="eHealth systems", keywords="remote patient monitoring", keywords="biometric sensors", abstract="Background: Although there have been significant advances in network, hardware, and software technologies, the health care environment has not taken advantage of these developments to solve many of its inherent problems. Research activities in these 3 areas make it possible to apply advanced technologies to address many of these issues such as real-time monitoring of a large number of patients, particularly where a timely response is critical. Objective: The objective of this research was to design and develop innovative technological solutions to offer a more proactive and reliable medical care environment. The short-term and primary goal was to construct IoT4Health, a flexible software framework to generate a range of Internet of things (IoT) applications, containing components such as multi-agent systems that are designed to perform Remote Patient Monitoring (RPM) activities autonomously. An investigation into its full potential to conduct such patient monitoring activities in a more proactive way is an expected future step. Methods: A framework methodology was selected to evaluate whether the RPM domain had the potential to generate customized applications that could achieve the stated goal of being responsive and flexible within the RPM domain. As a proof of concept of the software framework's flexibility, 3 applications were developed with different implementations for each framework hot spot to demonstrate potential. Agents4Health was selected to illustrate the instantiation process and IoT4Health's operation. To develop more concrete indicators of the responsiveness of the simulated care environment, an experiment was conducted while Agents4Health was operating, to measure the number of delays incurred in monitoring the tasks performed by agents. Results: IoT4Health's construction can be highlighted as our contribution to the development of eHealth solutions. As a software framework, IoT4Health offers extensibility points for the generation of applications. Applications can extend the framework in the following ways: identification, collection, storage, recovery, visualization, monitoring, anomalies detection, resource notification, and dynamic reconfiguration. Based on other outcomes involving observation of the resulting applications, it was noted that its design contributed toward more proactive patient monitoring. Through these experimental systems, anomalies were detected in real time, with agents sending notifications instantly to the health providers. Conclusions: We conclude that the cost-benefit of the construction of a more generic and complex system instead of a custom-made software system demonstrated the worth of the approach, making it possible to generate applications in this domain in a more timely fashion. ", doi="10.2196/medinform.6693", url="http://medinform.jmir.org/2017/1/e9/", url="http://www.ncbi.nlm.nih.gov/pubmed/28347973" } @Article{info:doi/10.2196/medinform.5977, author="Farhan, Wael and Wang, Zhimu and Huang, Yingxiang and Wang, Shuang and Wang, Fei and Jiang, Xiaoqian", title="A Predictive Model for Medical Events Based on Contextual Embedding of Temporal Sequences", journal="JMIR Med Inform", year="2016", month="Nov", day="25", volume="4", number="4", pages="e39", keywords="clinical decision support", keywords="early classification", keywords="temporal phenotyping", keywords="clinical event context embedding", abstract="Background: Medical concepts are inherently ambiguous and error-prone due to human fallibility, which makes it hard for them to be fully used by classical machine learning methods (eg, for tasks like early stage disease prediction). Objective: Our work was to create a new machine-friendly representation that resembles the semantics of medical concepts. We then developed a sequential predictive model for medical events based on this new representation. Methods: We developed novel contextual embedding techniques to combine different medical events (eg, diagnoses, prescriptions, and labs tests). Each medical event is converted into a numerical vector that resembles its ``semantics,'' via which the similarity between medical events can be easily measured. We developed simple and effective predictive models based on these vectors to predict novel diagnoses. Results: We evaluated our sequential prediction model (and standard learning methods) in estimating the risk of potential diseases based on our contextual embedding representation. Our model achieved an area under the receiver operating characteristic (ROC) curve (AUC) of 0.79 on chronic systolic heart failure and an average AUC of 0.67 (over the 80 most common diagnoses) using the Medical Information Mart for Intensive Care III (MIMIC-III) dataset. Conclusions: We propose a general early prognosis predictor for 80 different diagnoses. Our method computes numeric representation for each medical event to uncover the potential meaning of those events. Our results demonstrate the efficiency of the proposed method, which will benefit patients and physicians by offering more accurate diagnosis. ", doi="10.2196/medinform.5977", url="http://medinform.jmir.org/2016/4/e39/", url="http://www.ncbi.nlm.nih.gov/pubmed/27888170" } @Article{info:doi/10.2196/jmir.6560, author="Skonnord, Trygve and Steen, Finn and Skjeie, Holgeir and Fetveit, Arne and Brekke, Mette and Klovning, Atle", title="Survey Email Scheduling and Monitoring in eRCTs (SESAMe): A Digital Tool to Improve Data Collection in Randomized Controlled Clinical Trials", journal="J Med Internet Res", year="2016", month="Nov", day="22", volume="18", number="11", pages="e311", keywords="randomized controlled trials", keywords="data collection", keywords="surveys and questionnaires", keywords="quality improvement", keywords="sample size", keywords="Internet", keywords="email", keywords="text messaging", abstract="Background: Electronic questionnaires can ease data collection in randomized controlled trials (RCTs) in clinical practice. We found no existing software that could automate the sending of emails to participants enrolled into an RCT at different study participant inclusion time points. Objective: Our aim was to develop suitable software to facilitate data collection in an ongoing multicenter RCT of low back pain (the Acuback study). For the Acuback study, we determined that we would need to send a total of 5130 emails to 270 patients recruited at different centers and at 19 different time points. Methods: The first version of the software was tested in a pilot study in November 2013 but was unable to deliver multiuser or Web-based access. We resolved these shortcomings in the next version, which we tested on the Web in February 2014. Our new version was able to schedule and send the required emails in the full-scale Acuback trial that started in March 2014. The system architecture evolved through an iterative, inductive process between the project study leader and the software programmer. The program was tested and updated when errors occurred. To evaluate the development of the software, we used a logbook, a research assistant dialogue, and Acuback trial participant queries. Results: We have developed a Web-based app, Survey Email Scheduling and Monitoring in eRCTs (SESAMe), that monitors responses in electronic surveys and sends reminders by emails or text messages (short message service, SMS) to participants. The overall response rate for the 19 surveys in the Acuback study increased from 76.4\% (655/857) before we introduced reminders to 93.11\% (1149/1234) after the new function (P<.001). Further development will aim at securing encryption and data storage. Conclusions: The SESAMe software facilitates consecutive patient data collection in RCTs and can be used to increase response rates and quality of research, both in general practice and in other clinical trial settings. ", doi="10.2196/jmir.6560", url="http://www.jmir.org/2016/11/e311/", url="http://www.ncbi.nlm.nih.gov/pubmed/27876689" } @Article{info:doi/10.2196/medinform.6328, author="Zheng, Le and Wang, Yue and Hao, Shiying and Shin, Y. Andrew and Jin, Bo and Ngo, D. Anh and Jackson-Browne, S. Medina and Feller, J. Daniel and Fu, Tianyun and Zhang, Karena and Zhou, Xin and Zhu, Chunqing and Dai, Dorothy and Yu, Yunxian and Zheng, Gang and Li, Yu-Ming and McElhinney, B. Doff and Culver, S. Devore and Alfreds, T. Shaun and Stearns, Frank and Sylvester, G. Karl and Widen, Eric and Ling, Bruce Xuefeng", title="Web-based Real-Time Case Finding for the Population Health Management of Patients With Diabetes Mellitus: A Prospective Validation of the Natural Language Processing--Based Algorithm With Statewide Electronic Medical Records", journal="JMIR Med Inform", year="2016", month="Nov", day="11", volume="4", number="4", pages="e37", keywords="electronic medical record", keywords="natural language processing", keywords="diabetes mellitus", keywords="data mining", abstract="Background: Diabetes case finding based on structured medical records does not fully identify diabetic patients whose medical histories related to diabetes are available in the form of free text. Manual chart reviews have been used but involve high labor costs and long latency. Objective: This study developed and tested a Web-based diabetes case finding algorithm using both structured and unstructured electronic medical records (EMRs). Methods: This study was based on the health information exchange (HIE) EMR database that covers almost all health facilities in the state of Maine, United States. Using narrative clinical notes, a Web-based natural language processing (NLP) case finding algorithm was retrospectively (July 1, 2012, to June 30, 2013) developed with a random subset of HIE-associated facilities, which was then blind tested with the remaining facilities. The NLP-based algorithm was subsequently integrated into the HIE database and validated prospectively (July 1, 2013, to June 30, 2014). Results: Of the 935,891 patients in the prospective cohort, 64,168 diabetes cases were identified using diagnosis codes alone. Our NLP-based case finding algorithm prospectively found an additional 5756 uncodified cases (5756/64,168, 8.97\% increase) with a positive predictive value of .90. Of the 21,720 diabetic patients identified by both methods, 6616 patients (6616/21,720, 30.46\%) were identified by the NLP-based algorithm before a diabetes diagnosis was noted in the structured EMR (mean time difference = 48 days). Conclusions: The online NLP algorithm was effective in identifying uncodified diabetes cases in real time, leading to a significant improvement in diabetes case finding. The successful integration of the NLP-based case finding algorithm into the Maine HIE database indicates a strong potential for application of this novel method to achieve a more complete ascertainment of diagnoses of diabetes mellitus. ", doi="10.2196/medinform.6328", url="http://medinform.jmir.org/2016/4/e37/", url="http://www.ncbi.nlm.nih.gov/pubmed/27836816" } @Article{info:doi/10.2196/medinform.6530, author="Rajkomar, Alvin and Yim, Lan Joanne Wing and Grumbach, Kevin and Parekh, Ami", title="Weighting Primary Care Patient Panel Size: A Novel Electronic Health Record-Derived Measure Using Machine Learning", journal="JMIR Med Inform", year="2016", month="Oct", day="14", volume="4", number="4", pages="e29", keywords="primary health care", keywords="risk adjustment", keywords="patient acceptance of health care", keywords="ambulatory care", keywords="health care economics and organizations", keywords="medical informatics", keywords="machine learning", abstract="Background: Characterizing patient complexity using granular electronic health record (EHR) data regularly available to health systems is necessary to optimize primary care processes at scale. Objective: To characterize the utilization patterns of primary care patients and create weighted panel sizes for providers based on work required to care for patients with different patterns. Methods: We used EHR data over a 2-year period from patients empaneled to primary care clinicians in a single academic health system, including their in-person encounter history and virtual encounters such as telephonic visits, electronic messaging, and care coordination with specialists. Using a combination of decision rules and k-means clustering, we identified clusters of patients with similar health care system activity. Phenotypes with basic demographic information were used to predict future health care utilization using log-linear models. Phenotypes were also used to calculate weighted panel sizes. Results: We identified 7 primary care utilization phenotypes, which were characterized by various combinations of primary care and specialty usage and were deemed clinically distinct by primary care physicians. These phenotypes, combined with age-sex and primary payer variables, predicted future primary care utilization with R2 of .394 and were used to create weighted panel sizes. Conclusions: Individual patients' health care utilization may be useful for classifying patients by primary care work effort and for predicting future primary care usage. ", doi="10.2196/medinform.6530", url="http://medinform.jmir.org/2016/4/e29/", url="http://www.ncbi.nlm.nih.gov/pubmed/27742603" } @Article{info:doi/10.2196/medinform.5650, author="Gopakumar, Shivapratap and Tran, Truyen and Luo, Wei and Phung, Dinh and Venkatesh, Svetha", title="Forecasting Daily Patient Outflow From a Ward Having No Real-Time Clinical Data", journal="JMIR Med Inform", year="2016", month="Jul", day="21", volume="4", number="3", pages="e25", keywords="patient flow", keywords="discharge planning", keywords="predictive models", abstract="Objective: Our study investigates different models to forecast the total number of next-day discharges from an open ward having no real-time clinical data. Methods: We compared 5 popular regression algorithms to model total next-day discharges: (1) autoregressive integrated moving average (ARIMA), (2) the autoregressive moving average with exogenous variables (ARMAX), (3) k-nearest neighbor regression, (4) random forest regression, and (5) support vector regression. Although the autoregressive integrated moving average model relied on past 3-month discharges, nearest neighbor forecasting used median of similar discharges in the past in estimating next-day discharge. In addition, the ARMAX model used the day of the week and number of patients currently in ward as exogenous variables. For the random forest and support vector regression models, we designed a predictor set of 20 patient features and 88 ward-level features. Results: Our data consisted of 12,141 patient visits over 1826 days. Forecasting quality was measured using mean forecast error, mean absolute error, symmetric mean absolute percentage error, and root mean square error. When compared with a moving average prediction model, all 5 models demonstrated superior performance with the random forests achieving 22.7\% improvement in mean absolute error, for all days in the year 2014. Conclusions: In the absence of clinical information, our study recommends using patient-level and ward-level data in predicting next-day discharges. Random forest and support vector regression models are able to use all available features from such data, resulting in superior performance over traditional autoregressive methods. An intelligent estimate of available beds in wards plays a crucial role in relieving access block in emergency departments. ", doi="10.2196/medinform.5650", url="http://medinform.jmir.org/2016/3/e25/", url="http://www.ncbi.nlm.nih.gov/pubmed/27444059" } @Article{info:doi/10.2196/medinform.4732, author="Mbagwu, Michael and French, D. Dustin and Gill, Manjot and Mitchell, Christopher and Jackson, Kathryn and Kho, Abel and Bryar, J. Paul", title="Creation of an Accurate Algorithm to Detect Snellen Best Documented Visual Acuity from Ophthalmology Electronic Health Record Notes", journal="JMIR Med Inform", year="2016", month="May", day="04", volume="4", number="2", pages="e14", keywords="visual acuity", keywords="best documented visual acuity", keywords="best corrected visual acuity", keywords="electronic health record", keywords="electronic medical record", keywords="phenotyping", keywords="data mining", keywords="ophthalmology", abstract="Background: Visual acuity is the primary measure used in ophthalmology to determine how well a patient can see. Visual acuity for a single eye may be recorded in multiple ways for a single patient visit (eg, Snellen vs. J{\"a}ger units vs. font print size), and be recorded for either distance or near vision. Capturing the best documented visual acuity (BDVA) of each eye in an individual patient visit is an important step for making electronic ophthalmology clinical notes useful in research. Objective: Currently, there is limited methodology for capturing BDVA in an efficient and accurate manner from electronic health record (EHR) notes. We developed an algorithm to detect BDVA for right and left eyes from defined fields within electronic ophthalmology clinical notes. Methods: We designed an algorithm to detect the BDVA from defined fields within 295,218 ophthalmology clinical notes with visual acuity data present. About 5668 unique responses were identified and an algorithm was developed to map all of the unique responses to a structured list of Snellen visual acuities. Results: Visual acuity was captured from a total of 295,218 ophthalmology clinical notes during the study dates. The algorithm identified all visual acuities in the defined visual acuity section for each eye and returned a single BDVA for each eye. A clinician chart review of 100 random patient notes showed a 99\% accuracy detecting BDVA from these records and 1\% observed error. Conclusions: Our algorithm successfully captures best documented Snellen distance visual acuity from ophthalmology clinical notes and transforms a variety of inputs into a structured Snellen equivalent list. Our work, to the best of our knowledge, represents the first attempt at capturing visual acuity accurately from large numbers of electronic ophthalmology notes. Use of this algorithm can benefit research groups interested in assessing visual acuity for patient centered outcome. All codes used for this study are currently available, and will be made available online at https://phekb.org. ", doi="10.2196/medinform.4732", url="http://medinform.jmir.org/2016/2/e14/", url="http://www.ncbi.nlm.nih.gov/pubmed/27146002" } @Article{info:doi/10.2196/jmir.4448, author="Sadasivam, Shankar Rajani and Cutrona, L. Sarah and Kinney, L. Rebecca and Marlin, M. Benjamin and Mazor, M. Kathleen and Lemon, C. Stephenie and Houston, K. Thomas", title="Collective-Intelligence Recommender Systems: Advancing Computer Tailoring for Health Behavior Change Into the 21st Century", journal="J Med Internet Res", year="2016", month="Mar", day="07", volume="18", number="3", pages="e42", keywords="computer-tailored health communication", keywords="machine learning", keywords="recommender systems", abstract="Background: What is the next frontier for computer-tailored health communication (CTHC) research? In current CTHC systems, study designers who have expertise in behavioral theory and mapping theory into CTHC systems select the variables and develop the rules that specify how the content should be tailored, based on their knowledge of the targeted population, the literature, and health behavior theories. In collective-intelligence recommender systems (hereafter recommender systems) used by Web 2.0 companies (eg, Netflix and Amazon), machine learning algorithms combine user profiles and continuous feedback ratings of content (from themselves and other users) to empirically tailor content. Augmenting current theory-based CTHC with empirical recommender systems could be evaluated as the next frontier for CTHC. Objective: The objective of our study was to uncover barriers and challenges to using recommender systems in health promotion. Methods: We conducted a focused literature review, interviewed subject experts (n=8), and synthesized the results. Results: We describe (1) limitations of current CTHC systems, (2) advantages of incorporating recommender systems to move CTHC forward, and (3) challenges to incorporating recommender systems into CTHC. Based on the evidence presented, we propose a future research agenda for CTHC systems. Conclusions: We promote discussion of ways to move CTHC into the 21st century by incorporation of recommender systems. ", doi="10.2196/jmir.4448", url="http://www.jmir.org/2016/3/e42/", url="http://www.ncbi.nlm.nih.gov/pubmed/26952574" } @Article{info:doi/10.2196/medinform.4842, author="Cuba Gyllensten, Illapha and Bonomi, G. Alberto and Goode, M. Kevin and Reiter, Harald and Habetha, Joerg and Amft, Oliver and Cleland, GF John", title="Early Indication of Decompensated Heart Failure in Patients on Home-Telemonitoring: A Comparison of Prediction Algorithms Based on Daily Weight and Noninvasive Transthoracic Bio-impedance", journal="JMIR Med Inform", year="2016", month="Feb", day="18", volume="4", number="1", pages="e3", keywords="Heart failure", keywords="telemonitoring", keywords="deterioration detection", keywords="alert algorithms", keywords="ambulatory monitoring", keywords="impedance", abstract="Background: Heart Failure (HF) is a common reason for hospitalization. Admissions might be prevented by early detection of and intervention for decompensation. Conventionally, changes in weight, a possible measure of fluid accumulation, have been used to detect deterioration. Transthoracic impedance may be a more sensitive and accurate measure of fluid accumulation. Objective: In this study, we review previously proposed predictive algorithms using body weight and noninvasive transthoracic bio-impedance (NITTI) to predict HF decompensations. Methods: We monitored 91 patients with chronic HF for an average of 10 months using a weight scale and a wearable bio-impedance vest. Three algorithms were tested using either simple rule-of-thumb differences (RoT), moving averages (MACD), or cumulative sums (CUSUM). Results: Algorithms using NITTI in the 2 weeks preceding decompensation predicted events (P<.001); however, using weight alone did not. Cross-validation showed that NITTI improved sensitivity of all algorithms tested and that trend algorithms provided the best performance for either measurement (Weight-MACD: 33\%, NITTI-CUSUM: 60\%) in contrast to the simpler rules-of-thumb (Weight-RoT: 20\%, NITTI-RoT: 33\%) as proposed in HF guidelines. Conclusions: NITTI measurements decrease before decompensations, and combined with trend algorithms, improve the detection of HF decompensation over current guideline rules; however, many alerts are not associated with clinically overt decompensation. ", doi="10.2196/medinform.4842", url="http://medinform.jmir.org/2016/1/e3/", url="http://www.ncbi.nlm.nih.gov/pubmed/26892844" } @Article{info:doi/10.2196/medinform.4923, author="Kim, Jae Young and Park, Won Ji and Kim, Wan Jong and Park, Chan-Soo and Gonzalez, S. John Paul and Lee, Hyun Seung and Kim, Gi Kwang and Oh, Hwan Jae", title="Computerized Automated Quantification of Subcutaneous and Visceral Adipose Tissue From Computed Tomography Scans: Development and Validation Study", journal="JMIR Med Inform", year="2016", month="Feb", day="04", volume="4", number="1", pages="e2", keywords="obesity", keywords="visceral adipose tissue", keywords="subcutaneous adipose tissue", keywords="computed tomography", keywords="computer-assisted image analysis", abstract="Background: Computed tomography (CT) is often viewed as one of the most accurate methods for measuring visceral adipose tissue (VAT). However, measuring VAT and subcutaneous adipose tissue (SAT) from CT is a time-consuming and tedious process. Thus, evaluating patients' obesity levels during clinical trials using CT scans is both cumbersome and limiting. Objective: To describe an image-processing-based and automated method for measuring adipose tissue in the entire abdominal region. Methods: The method detects SAT and VAT levels using a separation mask based on muscles of the human body. The separation mask is the region that minimizes the unnecessary space between a closed path and muscle area. In addition, a correction mask, based on bones, corrects the error in VAT. Results: To validate the method, the volume of total adipose tissue (TAT), SAT, and VAT were measured for a total of 100 CTs using the automated method, and the results compared with those from manual measurements obtained by 2 experts. Dice's similarity coefficients (DSCs) between the first manual measurement and the automated result for TAT, SAT, and VAT are 0.99, 0.98, and 0.97, respectively. The DSCs between the second manual measurement and the automated result for TAT, SAT, and VAT are 0.98, 0.98, and 0.97, respectively. Moreover, intraclass correlation coefficients (ICCs) between the automated method and the results of the manual measurements indicate high reliability as the ICCs for the items are all .99 (P<.001). Conclusions: The results described in this paper confirm the accuracy and reliability of the proposed method. The method is expected to be both convenient and useful in the clinical evaluation and study of obesity in patients who require SAT and VAT measurements. ", doi="10.2196/medinform.4923", url="http://medinform.jmir.org/2016/1/e2/", url="http://www.ncbi.nlm.nih.gov/pubmed/26846251" } @Article{info:doi/10.2196/medinform.4959, author="Zhang, Guo-Qiang and Tao, Shiqiang and Xing, Guangming and Mozes, Jeno and Zonjy, Bilal and Lhatoo, D. Samden and Cui, Licong", title="NHash: Randomized N-Gram Hashing for Distributed Generation of Validatable Unique Study Identifiers in Multicenter Research", journal="JMIR Med Inform", year="2015", month="Nov", day="10", volume="3", number="4", pages="e35", keywords="cryptographic hash function", keywords="multi-center study", keywords="study identifiers", keywords="health information management", keywords="data integration", keywords="patient cohort identification", keywords="search interface", abstract="Background: A unique study identifier serves as a key for linking research data about a study subject without revealing protected health information in the identifier. While sufficient for single-site and limited-scale studies, the use of common unique study identifiers has several drawbacks for large multicenter studies, where thousands of research participants may be recruited from multiple sites. An important property of study identifiers is error tolerance (or validatable), in that inadvertent editing mistakes during their transmission and use will most likely result in invalid study identifiers. Objective: This paper introduces a novel method called ``Randomized N-gram Hashing (NHash),'' for generating unique study identifiers in a distributed and validatable fashion, in multicenter research. NHash has a unique set of properties: (1) it is a pseudonym serving the purpose of linking research data about a study participant for research purposes; (2) it can be generated automatically in a completely distributed fashion with virtually no risk for identifier collision; (3) it incorporates a set of cryptographic hash functions based on N-grams, with a combination of additional encryption techniques such as a shift cipher; (d) it is validatable (error tolerant) in the sense that inadvertent edit errors will mostly result in invalid identifiers. Methods: NHash consists of 2 phases. First, an intermediate string using randomized N-gram hashing is generated. This string consists of a collection of N-gram hashes f1, f2, ..., fk. The input for each function fi has 3 components: a random number r, an integer n, and input data m. The result, fi(r, n, m), is an n-gram of m with a starting position s, which is computed as (r mod |m|), where |m| represents the length of m. The output for Step 1 is the concatenation of the sequence f1(r1, n1, m1), f2(r2, n2, m2), ..., fk(rk, nk, mk). In the second phase, the intermediate string generated in Phase 1 is encrypted using techniques such as shift cipher. The result of the encryption, concatenated with the random number r, is the final NHash study identifier. Results: We performed experiments using a large synthesized dataset comparing NHash with random strings, and demonstrated neglegible probability for collision. We implemented NHash for the Center for SUDEP Research (CSR), a National Institute for Neurological Disorders and Stroke-funded Center Without Walls for Collaborative Research in the Epilepsies. This multicenter collaboration involves 14 institutions across the United States and Europe, bringing together extensive and diverse expertise to understand sudden unexpected death in epilepsy patients (SUDEP). Conclusions: The CSR Data Repository has successfully used NHash to link deidentified multimodal clinical data collected in participating CSR institutions, meeting all desired objectives of NHash. ", doi="10.2196/medinform.4959", url="http://medinform.jmir.org/2015/4/e35/", url="http://www.ncbi.nlm.nih.gov/pubmed/26554419" } @Article{info:doi/10.2196/medinform.4211, author="Zhou, Xiaofang and Zheng, An and Yin, Jiaheng and Chen, Rudan and Zhao, Xianyang and Xu, Wei and Cheng, Wenqing and Xia, Tian and Lin, Simon", title="Context-Sensitive Spelling Correction of Consumer-Generated Content on Health Care", journal="JMIR Med Inform", year="2015", month="Jul", day="31", volume="3", number="3", pages="e27", keywords="spelling correction system", keywords="context sensitive", keywords="consumer-generated content", keywords="biomedical ontology", abstract="Background: Consumer-generated content, such as postings on social media websites, can serve as an ideal source of information for studying health care from a consumer's perspective. However, consumer-generated content on health care topics often contains spelling errors, which, if not corrected, will be obstacles for downstream computer-based text analysis. Objective: In this study, we proposed a framework with a spelling correction system designed for consumer-generated content and a novel ontology-based evaluation system which was used to efficiently assess the correction quality. Additionally, we emphasized the importance of context sensitivity in the correction process, and demonstrated why correction methods designed for electronic medical records (EMRs) failed to perform well with consumer-generated content. Methods: First, we developed our spelling correction system based on Google Spell Checker. The system processed postings acquired from MedHelp, a biomedical bulletin board system (BBS), and saved misspelled words (eg, sertaline) and corresponding corrected words (eg, sertraline) into two separate sets. Second, to reduce the number of words needing manual examination in the evaluation process, we respectively matched the words in the two sets with terms in two biomedical ontologies: RxNorm and Systematized Nomenclature of Medicine -- Clinical Terms (SNOMED CT). The ratio of words which could be matched and appropriately corrected was used to evaluate the correction system's overall performance. Third, we categorized the misspelled words according to the types of spelling errors. Finally, we calculated the ratio of abbreviations in the postings, which remarkably differed between EMRs and consumer-generated content and could largely influence the overall performance of spelling checkers. Results: An uncorrected word and the corresponding corrected word was called a spelling pair, and the two words in the spelling pair were its members. In our study, there were 271 spelling pairs detected, among which 58 (21.4\%) pairs had one or two members matched in the selected ontologies. The ratio of appropriate correction in the 271 overall spelling errors was 85.2\% (231/271). The ratio of that in the 58 spelling pairs was 86\% (50/58), close to the overall ratio. We also found that linguistic errors took up 31.4\% (85/271) of all errors detected, and only 0.98\% (210/21,358) of words in the postings were abbreviations, which was much lower than the ratio in the EMRs (33.6\%). Conclusions: We conclude that our system can accurately correct spelling errors in consumer-generated content. Context sensitivity is indispensable in the correction process. Additionally, it can be confirmed that consumer-generated content differs from EMRs in that consumers seldom use abbreviations. Also, the evaluation method, taking advantage of biomedical ontology, can effectively estimate the accuracy of the correction system and reduce manual examination time. ", doi="10.2196/medinform.4211", url="http://medinform.jmir.org/2015/3/e27/", url="http://www.ncbi.nlm.nih.gov/pubmed/26232246" } @Article{info:doi/10.2196/medinform.3738, author="English, M. Thomas and Kinney, L. Rebecca and Davis, J. Michael and Kamberi, Ariana and Chan, Wayne and Sadasivam, S. Rajani and Houston, K. Thomas", title="Identification of Relationships Between Patients Through Elements in a Data Warehouse Using the Familial, Associational, and Incidental Relationship (FAIR) Initiative: A Pilot Study", journal="JMIR Med Inform", year="2015", month="Feb", day="13", volume="3", number="1", pages="e9", keywords="Informatics for Integrating Biology and the Bedside (i2b2)", keywords="data warehouse", keywords="familial relationship", abstract="Background: Over the last several years there has been widespread development of medical data warehouses. Current data warehouses focus on individual cases, but lack the ability to identify family members that could be used for dyadic or familial research. Currently, the patient's family history in the medical record is the only documentation we have to understand the health status and social habits of their family members. Identifying familial linkages in a phenotypic data warehouse can be valuable in cohort identification and in beginning to understand the interactions of diseases among families. Objective: The goal of the Familial, Associational, \& Incidental Relationships (FAIR) initiative is to identify an index set of patients' relationships through elements in a data warehouse. Methods: Using a test set of 500 children, we measured the sensitivity and specificity of available linkage algorithm identifiers (eg, insurance identification numbers and phone numbers) and validated this tool/algorithm through a manual chart audit. Results: Of all the children, 52.4\% (262/500) were male, and the mean age of the cohort was 8 years old (SD 5). Of the children, 51.6\% (258/500) were identified as white in race. The identifiers used for FAIR were available for the majority of patients: insurance number (483/500, 96.6\%), phone number (500/500, 100\%), and address (497/500, 99.4\%). When utilizing the FAIR tool and various combinations of identifiers, sensitivity ranged from 15.5\% (62/401) to 83.8\% (336/401), and specificity from 72\% (71/99) to 100\% (99/99). The preferred method was matching patients using insurance or phone number, which had a sensitivity of 72.1\% (289/401) and a specificity of 94\% (93/99). Using the Informatics for Integrating Biology and the Bedside (i2b2) warehouse infrastructure, we have now developed a Web app that facilitates FAIR for any index population. Conclusions: FAIR is a valuable research and clinical resource that extends the capabilities of existing data warehouses and lays the groundwork for family-based research. FAIR will expedite studies that would otherwise require registry or manual chart abstraction data sources. ", doi="10.2196/medinform.3738", url="http://medinform.jmir.org/2015/1/e9/", url="http://www.ncbi.nlm.nih.gov/pubmed/25803561" } @Article{info:doi/10.2196/medinform.3339, author="Dixit, Abhishek and Dobson, B. Richard J.", title="CohortExplorer: A Generic Application Programming Interface for Entity Attribute Value Database Schemas", journal="JMIR Med Inform", year="2014", month="Dec", day="01", volume="2", number="2", pages="e32", keywords="entity-attribute-value schema", keywords="biobank database", keywords="clinical information systems", keywords="CDISC ODM", keywords="SQL", abstract="Background: Most electronic data capture (EDC) and electronic data management (EDM) systems developed to collect and store clinical data from participants recruited into studies are based on generic entity-attribute-value (EAV) database schemas which enable rapid and flexible deployment in a range of study designs. The drawback to such schemas is that they are cumbersome to query with structured query language (SQL). The problem increases when researchers involved in multiple studies use multiple electronic data capture and management systems each with variation on the EAV schema. Objective: The aim of this study is to develop a generic application which allows easy and rapid exploration of data and metadata stored under EAV schemas that are organized into a survey format (questionnaires/events, questions, values), in other words, the Clinical Data Interchange Standards Consortium (CDISC) Observational Data Model (ODM). Methods: CohortExplorer is written in Perl programming language and uses the concept of SQL abstract which allows the SQL query to be treated like a hash (key-value pairs). Results: We have developed a tool, CohortExplorer, which once configured for a EAV system will ``plug-n-play'' with EAV schemas, enabling the easy construction of complex queries through an abstracted interface. To demonstrate the utility of the CohortExplorer system, we show how it can be used with the popular EAV based frameworks; Opal (OBiBa) and REDCap. Conclusions: The application is available under a GPL-3+ license at the CPAN website. Currently the application only provides datasource application programming interfaces (APIs) for Opal and REDCap. In the future the application will be available with datasource APIs for all major electronic data capture and management systems such as OpenClinica and LabKey. At present the application is only compatible with EAV systems where the metadata is organized into surveys, questionnaires and events. Further work is needed to make the application compatible with EAV schemas where the metadata is organized into hierarchies such as Informatics for Integrating Biology \& the Bedside (i2b2). A video tutorial demonstrating the application setup, datasource configuration, and search features is available on YouTube. The application source code is available at the GitHub website and the users are encouraged to suggest new features and contribute to the development of APIs for new EAV systems. ", doi="10.2196/medinform.3339", url="http://medinform.jmir.org/2014/2/e32/", url="http://www.ncbi.nlm.nih.gov/pubmed/25601296" } @Article{info:doi/10.2196/medinform.3463, author="Rastegar-Mojarad, Majid and Kadolph, Christopher and Ye, Zhan and Wall, Daniel and Murali, Narayana and Lin, Simon", title="A Fuzzy-Match Search Engine for Physician Directories", journal="JMIR Med Inform", year="2014", month="Nov", day="04", volume="2", number="2", pages="e30", keywords="Fuzzy-Match", keywords="Levenshtein Distance", keywords="Physician Name", keywords="Physician Directory", abstract="Background: A search engine to find physicians' information is a basic but crucial function of a health care provider's website. Inefficient search engines, which return no results or incorrect results, can lead to patient frustration and potential customer loss. A search engine that can handle misspellings and spelling variations of names is needed, as the United States (US) has culturally, racially, and ethnically diverse names. Objective: The Marshfield Clinic website provides a search engine for users to search for physicians' names. The current search engine provides an auto-completion function, but it requires an exact match. We observed that 26\% of all searches yielded no results. The goal was to design a fuzzy-match algorithm to aid users in finding physicians easier and faster. Methods: Instead of an exact match search, we used a fuzzy algorithm to find similar matches for searched terms. In the algorithm, we solved three types of search engine failures: ``Typographic'', ``Phonetic spelling variation'', and ``Nickname''. To solve these mismatches, we used a customized Levenshtein distance calculation that incorporated Soundex coding and a lookup table of nicknames derived from US census data. Results: Using the ``Challenge Data Set of Marshfield Physician Names,'' we evaluated the accuracy of fuzzy-match engine--top ten (90\%) and compared it with exact match (0\%), Soundex (24\%), Levenshtein distance (59\%), and fuzzy-match engine--top one (71\%). Conclusions: We designed, created a reference implementation, and evaluated a fuzzy-match search engine for physician directories. The open-source code is available at the codeplex website and a reference implementation is available for demonstration at the datamarsh website. ", doi="10.2196/medinform.3463", url="http://medinform.jmir.org/2014/2/e30/", url="http://www.ncbi.nlm.nih.gov/pubmed/25601050" } @Article{info:doi/10.2196/medinform.3251, author="Install{\'e}, JF Arnaud and Van den Bosch, Thierry and De Moor, Bart and Timmerman, Dirk", title="Clinical Data Miner: An Electronic Case Report Form System With Integrated Data Preprocessing and Machine-Learning Libraries Supporting Clinical Diagnostic Model Research", journal="JMIR Med Inform", year="2014", month="Oct", day="20", volume="2", number="2", pages="e28", keywords="data collection", keywords="machine-learning", keywords="clinical decision support systems", keywords="data analysis", abstract="Background: Using machine-learning techniques, clinical diagnostic model research extracts diagnostic models from patient data. Traditionally, patient data are often collected using electronic Case Report Form (eCRF) systems, while mathematical software is used for analyzing these data using machine-learning techniques. Due to the lack of integration between eCRF systems and mathematical software, extracting diagnostic models is a complex, error-prone process. Moreover, due to the complexity of this process, it is usually only performed once, after a predetermined number of data points have been collected, without insight into the predictive performance of the resulting models. Objective: The objective of the study of Clinical Data Miner (CDM) software framework is to offer an eCRF system with integrated data preprocessing and machine-learning libraries, improving efficiency of the clinical diagnostic model research workflow, and to enable optimization of patient inclusion numbers through study performance monitoring. Methods: The CDM software framework was developed using a test-driven development (TDD) approach, to ensure high software quality. Architecturally, CDM's design is split over a number of modules, to ensure future extendability. Results: The TDD approach has enabled us to deliver high software quality. CDM's eCRF Web interface is in active use by the studies of the International Endometrial Tumor Analysis consortium, with over 4000 enrolled patients, and more studies planned. Additionally, a derived user interface has been used in six separate interrater agreement studies.CDM's integrated data preprocessing and machine-learning libraries simplify some otherwise manual and error-prone steps in the clinical diagnostic model research workflow. Furthermore, CDM's libraries provide study coordinators with a method to monitor a study's predictive performance as patient inclusions increase. Conclusions: To our knowledge, CDM is the only eCRF system integrating data preprocessing and machine-learning libraries. This integration improves the efficiency of the clinical diagnostic model research workflow. Moreover, by simplifying the generation of learning curves, CDM enables study coordinators to assess more accurately when data collection can be terminated, resulting in better models or lower patient recruitment costs. ", doi="10.2196/medinform.3251", url="http://medinform.jmir.org/2014/2/e28/", url="http://www.ncbi.nlm.nih.gov/pubmed/25600863" } @Article{info:doi/10.2196/medinform.2671, author="De Silva, Daswin and Burstein, Frada", title="An Intelligent Content Discovery Technique for Health Portal Content Management", journal="JMIR Med Inform", year="2014", month="Apr", day="23", volume="2", number="1", pages="e7", keywords="health information retrieval, personalised content management, health information portal, fuzzy multi-criteria ranking, automated content discovery, data analytics, text mining", abstract="Background: Continuous content management of health information portals is a feature vital for its sustainability and widespread acceptance. Knowledge and experience of a domain expert is essential for content management in the health domain. The rate of generation of online health resources is exponential and thereby manual examination for relevance to a specific topic and audience is a formidable challenge for domain experts. Intelligent content discovery for effective content management is a less researched topic. An existing expert-endorsed content repository can provide the necessary leverage to automatically identify relevant resources and evaluate qualitative metrics. Objective: This paper reports on the design research towards an intelligent technique for automated content discovery and ranking for health information portals. The proposed technique aims to improve efficiency of the current mostly manual process of portal content management by utilising an existing expert-endorsed content repository as a supporting base and a benchmark to evaluate the suitability of new content Methods: A model for content management was established based on a field study of potential users. The proposed technique is integral to this content management model and executes in several phases (ie, query construction, content search, text analytics and fuzzy multi-criteria ranking). The construction of multi-dimensional search queries with input from Wordnet, the use of multi-word and single-word terms as representative semantics for text analytics and the use of fuzzy multi-criteria ranking for subjective evaluation of quality metrics are original contributions reported in this paper. Results: The feasibility of the proposed technique was examined with experiments conducted on an actual health information portal, the BCKOnline portal. Both intermediary and final results generated by the technique are presented in the paper and these help to establish benefits of the technique and its contribution towards effective content management. Conclusions: The prevalence of large numbers of online health resources is a key obstacle for domain experts involved in content management of health information portals and websites. The proposed technique has proven successful at search and identification of resources and the measurement of their relevance. It can be used to support the domain expert in content management and thereby ensure the health portal is up-to-date and current. ", doi="10.2196/medinform.2671", url="http://medinform.jmir.org/2014/1/e7/", url="http://www.ncbi.nlm.nih.gov/pubmed/25654440" } @Article{info:doi/10.2196/medinform.3028, author="Singh, Rajendra and Mathiassen, Lars and Switzer, A. Jeffrey and Adams, J. Robert", title="Assimilation of Web-Based Urgent Stroke Evaluation: A Qualitative Study of Two Networks", journal="JMIR Med Inform", year="2014", month="Apr", day="15", volume="2", number="1", pages="e6", keywords="telemedicine", keywords="stroke", keywords="telestroke", keywords="information technology assimilation", keywords="case study", abstract="Background: Stroke is a leading cause of death and serious, long-term disability across the world. Urgent stroke care treatment is time-sensitive and requires a stroke-trained neurologist for clinical diagnosis. Rural areas, where neurologists and stroke specialists are lacking, have a high incidence of stroke-related death and disability. By virtually connecting emergency department physicians in rural hospitals to regional medical centers for consultations, specialized Web-based stroke evaluation systems (telestroke) have helped address the challenge of urgent stroke care in underserved communities. However, many rural hospitals that have deployed telestroke have not fully assimilated this technology. Objective: The objective of this study was to explore potential sources of variations in the utilization of a Web-based telestroke system for urgent stroke evaluation and propose a telestroke assimilation model to improve stroke care performance. Methods: An exploratory, qualitative case study of two telestroke networks, each comprising an academic stroke center (hub) and connected rural hospitals (spokes), was conducted. Data were collected from 50 semistructured interviews with 40 stakeholders, telestroke usage logs from 32 spokes, site visits, published papers, and reports. Results: The two networks used identical technology (called Remote Evaluation of Acute isCHemic stroke, REACH) and were of similar size and complexity, but showed large variations in telestroke assimilation across spokes. Several observed hub- and spoke-related characteristics can explain these variations. The hub-related characteristics included telestroke institutionalization into stroke care, resources for the telestroke program, ongoing support for stroke readiness of spokes, telestroke performance monitoring, and continuous telestroke process improvement. The spoke-related characteristics included managerial telestroke championship, stroke center certification, dedicated telestroke coordinator, stroke committee of key stakeholders, local neurological expertise, and continuous telestroke process improvement. Conclusions: Rural hospitals can improve their stroke readiness with use of telestroke systems. However, they need to integrate the technology into their stroke delivery processes. A telestroke assimilation model may improve stroke care performance. ", doi="10.2196/medinform.3028", url="http://medinform.jmir.org/2014/1/e6/", url="http://www.ncbi.nlm.nih.gov/pubmed/25601232" } @Article{info:doi/10.2196/medinform.3172, author="Adamusiak, Tomasz and Shimoyama, Naoki and Shimoyama, Mary", title="Next Generation Phenotyping Using the Unified Medical Language System", journal="JMIR Med Inform", year="2014", month="Mar", day="18", volume="2", number="1", pages="e5", keywords="meaningful use", keywords="semantic interoperability", keywords="UMLS", keywords="SNOMED CT", keywords="LOINC", keywords="RxNorm", keywords="CPT", keywords="HCPCS", keywords="ICD-9", keywords="ICD-10", abstract="Background: Structured information within patient medical records represents a largely untapped treasure trove of research data. In the United States, privacy issues notwithstanding, this has recently become more accessible thanks to the increasing adoption of electronic health records (EHR) and health care data standards fueled by the Meaningful Use legislation. The other side of the coin is that it is now becoming increasingly more difficult to navigate the profusion of many disparate clinical terminology standards, which often span millions of concepts. Objective: The objective of our study was to develop a methodology for integrating large amounts of structured clinical information that is both terminology agnostic and able to capture heterogeneous clinical phenotypes including problems, procedures, medications, and clinical results (such as laboratory tests and clinical observations). In this context, we define phenotyping as the extraction of all clinically relevant features contained in the EHR. Methods: The scope of the project was framed by the Common Meaningful Use (MU) Dataset terminology standards; the Systematized Nomenclature of Medicine Clinical Terms (SNOMED CT), RxNorm, the Logical Observation Identifiers Names and Codes (LOINC), the Current Procedural Terminology (CPT), the Health care Common Procedure Coding System (HCPCS), the International Classification of Diseases Ninth Revision Clinical Modification (ICD-9-CM), and the International Classification of Diseases Tenth Revision Clinical Modification (ICD-10-CM). The Unified Medical Language System (UMLS) was used as a mapping layer among the MU ontologies. An extract, load, and transform approach separated original annotations in the EHR from the mapping process and allowed for continuous updates as the terminologies were updated. Additionally, we integrated all terminologies into a single UMLS derived ontology and further optimized it to make the relatively large concept graph manageable. Results: The initial evaluation was performed with simulated data from the Clinical Avatars project using 100,000 virtual patients undergoing a 90 day, genotype guided, warfarin dosing protocol. This dataset was annotated with standard MU terminologies, loaded, and transformed using the UMLS. We have deployed this methodology to scale in our in-house analytics platform using structured EHR data for 7931 patients (12 million clinical observations) treated at the Froedtert Hospital. A demonstration limited to Clinical Avatars data is available on the Internet using the credentials user ``jmirdemo'' and password ``jmirdemo''. Conclusions: Despite its inherent complexity, the UMLS can serve as an effective interface terminology for many of the clinical data standards currently used in the health care domain. ", doi="10.2196/medinform.3172", url="http://medinform.jmir.org/2014/1/e5/", url="http://www.ncbi.nlm.nih.gov/pubmed/25601137" } @Article{info:doi/10.2196/medinform.3205, author="Harvey, Harlan and Krishnaraj, Arun and Alkasab, K. Tarik", title="Use of Expert Relevancy Ratings to Validate Task-Specific Search Strategies for Electronic Medical Records", journal="JMIR Med Inform", year="2014", month="Mar", day="11", volume="2", number="1", pages="e4", keywords="medical informatics", keywords="medical records systems", keywords="computerized", keywords="health information management", doi="10.2196/medinform.3205", url="http://medinform.jmir.org/2014/1/e4/", url="http://www.ncbi.nlm.nih.gov/pubmed/25601018" } @Article{info:doi/10.2196/medinform.3204, author="Harvey, Benjamin H. and Krishnaraj, Arun and Alkasab, K. Tarik", title="A Software System to Collect Expert Relevance Ratings of Medical Record Items for Specific Clinical Tasks", journal="JMIR Med Inform", year="2014", month="Feb", day="28", volume="2", number="1", pages="e3", keywords="medical informatics", keywords="health information management", keywords="computerized medical records system", doi="10.2196/medinform.3204", url="http://medinform.jmir.org/2014/1/e3/", url="http://www.ncbi.nlm.nih.gov/pubmed/25600925" } @Article{info:doi/10.2196/medinform.3090, author="Pal, Doyel and Chen, Tingting and Zhong, Sheng and Khethavath, Praveen", title="Designing an Algorithm to Preserve Privacy for Medical Record Linkage With Error-Prone Data", journal="JMIR Med Inform", year="2014", month="Jan", day="20", volume="2", number="1", pages="e2", keywords="privacy", keywords="medical record linkage", keywords="error-prone data", abstract="Background: Linking medical records across different medical service providers is important to the enhancement of health care quality and public health surveillance. In records linkage, protecting the patients' privacy is a primary requirement. In real-world health care databases, records may well contain errors due to various reasons such as typos. Linking the error-prone data and preserving data privacy at the same time are very difficult. Existing privacy preserving solutions for this problem are only restricted to textual data. Objective: To enable different medical service providers to link their error-prone data in a private way, our aim was to provide a holistic solution by designing and developing a medical record linkage system for medical service providers. Methods: To initiate a record linkage, one provider selects one of its collaborators in the Connection Management Module, chooses some attributes of the database to be matched, and establishes the connection with the collaborator after the negotiation. In the Data Matching Module, for error-free data, our solution offered two different choices for cryptographic schemes. For error-prone numerical data, we proposed a newly designed privacy preserving linking algorithm named the Error-Tolerant Linking Algorithm, that allows the error-prone data to be correctly matched if the distance between the two records is below a threshold. Results: We designed and developed a comprehensive and user-friendly software system that provides privacy preserving record linkage functions for medical service providers, which meets the regulation of Health Insurance Portability and Accountability Act. It does not require a third party and it is secure in that neither entity can learn the records in the other's database. Moreover, our novel Error-Tolerant Linking Algorithm implemented in this software can work well with error-prone numerical data. We theoretically proved the correctness and security of our Error-Tolerant Linking Algorithm. We have also fully implemented the software. The experimental results showed that it is reliable and efficient. The design of our software is open so that the existing textual matching methods can be easily integrated into the system. Conclusions: Designing algorithms to enable medical records linkage for error-prone numerical data and protect data privacy at the same time is difficult. Our proposed solution does not need a trusted third party and is secure in that in the linking process, neither entity can learn the records in the other's database. ", doi="10.2196/medinform.3090", url="http://medinform.jmir.org/2014/1/e2/", url="http://www.ncbi.nlm.nih.gov/pubmed/25600786" } @Article{info:doi/10.2196/medinform.2519, author="Ping, Xiao-Ou and Chung, Yufang and Tseng, Yi-Ju and Liang, Ja-Der and Yang, Pei-Ming and Huang, Guan-Tarn and Lai, Feipei", title="A Web-Based Data-Querying Tool Based on Ontology-Driven Methodology and Flowchart-Based Model", journal="JMIR Med Inform", year="2013", month="Oct", day="08", volume="1", number="1", pages="e2", keywords="electronic medical records", keywords="query languages", keywords="information retrieval query processing", keywords="ontology engineering", keywords="clinical practice guideline", abstract="Background: Because of the increased adoption rate of electronic medical record (EMR) systems, more health care records have been increasingly accumulating in clinical data repositories. Therefore, querying the data stored in these repositories is crucial for retrieving the knowledge from such large volumes of clinical data. Objective: The aim of this study is to develop a Web-based approach for enriching the capabilities of the data-querying system along the three following considerations: (1) the interface design used for query formulation, (2) the representation of query results, and (3) the models used for formulating query criteria. Methods: The Guideline Interchange Format version 3.5 (GLIF3.5), an ontology-driven clinical guideline representation language, was used for formulating the query tasks based on the GLIF3.5 flowchart in the Prot{\'e}g{\'e} environment. The flowchart-based data-querying model (FBDQM) query execution engine was developed and implemented for executing queries and presenting the results through a visual and graphical interface. To examine a broad variety of patient data, the clinical data generator was implemented to automatically generate the clinical data in the repository, and the generated data, thereby, were employed to evaluate the system. The accuracy and time performance of the system for three medical query tasks relevant to liver cancer were evaluated based on the clinical data generator in the experiments with varying numbers of patients. Results: In this study, a prototype system was developed to test the feasibility of applying a methodology for building a query execution engine using FBDQMs by formulating query tasks using the existing GLIF. The FBDQM-based query execution engine was used to successfully retrieve the clinical data based on the query tasks formatted using the GLIF3.5 in the experiments with varying numbers of patients. The accuracy of the three queries (ie, ``degree of liver damage,'' ``degree of liver damage when applying a mutually exclusive setting,'' and ``treatments for liver cancer'') was 100\% for all four experiments (10 patients, 100 patients, 1000 patients, and 10,000 patients). Among the three measured query phases, (1) structured query language operations, (2) criteria verification, and (3) other, the first two had the longest execution time. Conclusions: The ontology-driven FBDQM-based approach enriched the capabilities of the data-querying system. The adoption of the GLIF3.5 increased the potential for interoperability, shareability, and reusability of the query tasks. ", doi="10.2196/medinform.2519", url="http://medinform.jmir.org/2013/1/e2/", url="http://www.ncbi.nlm.nih.gov/pubmed/25600078" } @Article{info:doi/10.2196/jmir.2392, author="Xiao, Lan and Huang, Qiwen and Yank, Veronica and Ma, Jun", title="An Easily Accessible Web-Based Minimization Random Allocation System for Clinical Trials", journal="J Med Internet Res", year="2013", month="Jul", day="19", volume="15", number="7", pages="e139", keywords="randomized controlled trials", keywords="randomization", keywords="minimization", keywords="adaptive randomization", keywords="Kullback--Leibler divergence", keywords="Web-based", abstract="Background: Minimization as an adaptive allocation technique has been recommended in the literature for use in randomized clinical trials. However, it remains uncommonly used due in part to a lack of easily accessible implementation tools. Objective: To provide clinical trialists with a robust, flexible, and readily accessible tool for implementing covariate-adaptive biased-coin randomization. Methods: We developed a Web-based random allocation system, MinimRan, that applies Pocock--Simon (for trials with 2 or more arms) and 2-way (currently limited to 2-arm trials) minimization methods for trials using only categorical prognostic factors or the symmetric Kullback--Leibler divergence minimization method for trials (currently limited to 2-arm trials) using continuous prognostic factors with or without categorical factors, in covariate-adaptive biased-coin randomization. Results: In this paper, we describe the system's essential statistical and computer programming features and provide as an example the randomization results generated by it in a recently completed trial. The system can be used in single- and double-blind trials as well as single-center and multicenter trials. Conclusions: We expect the system to facilitate the translation of the 3 validated random allocation methods into broad, efficient clinical research practice. ", doi="10.2196/jmir.2392", url="http://www.jmir.org/2013/7/e139/", url="http://www.ncbi.nlm.nih.gov/pubmed/23872035" }