@Article{info:doi/10.2196/24008, author="Shen, Feichen and Liu, Sijia and Fu, Sunyang and Wang, Yanshan and Henry, Sam and Uzuner, Ozlem and Liu, Hongfang", title="Family History Extraction From Synthetic Clinical Narratives Using Natural Language Processing: Overview and Evaluation of a Challenge Data Set and Solutions for the 2019 National NLP Clinical Challenges (n2c2)/Open Health Natural Language Processing (OHNLP) Competition", journal="JMIR Med Inform", year="2021", month="Jan", day="27", volume="9", number="1", pages="e24008", keywords="family history extraction", keywords="information extraction", keywords="natural language processing", keywords="named entity recognition", keywords="relation extraction", abstract="Background: As a risk factor for many diseases, family history (FH) captures both shared genetic variations and living environments among family members. Though there are several systems focusing on FH extraction using natural language processing (NLP) techniques, the evaluation protocol of such systems has not been standardized. Objective: The n2c2/OHNLP (National NLP Clinical Challenges/Open Health Natural Language Processing) 2019 FH extraction task aims to encourage the community efforts on a standard evaluation and system development on FH extraction from synthetic clinical narratives. Methods: We organized the first BioCreative/OHNLP FH extraction shared task in 2018. We continued the shared task in 2019 in collaboration with the n2c2 and OHNLP consortium, and organized the 2019 n2c2/OHNLP FH extraction track. The shared task comprises 2 subtasks. Subtask 1 focuses on identifying family member entities and clinical observations (diseases), and subtask 2 expects the association of the living status, side of the family, and clinical observations with family members to be extracted. Subtask 2 is an end-to-end task which is based on the result of subtask 1. We manually curated the first deidentified clinical narrative from FH sections of clinical notes at Mayo Clinic Rochester, the content of which is highly relevant to patients' FH. Results: A total of 17 teams from all over the world participated in the n2c2/OHNLP FH extraction shared task, where 38 runs were submitted for subtask 1 and 21 runs were submitted for subtask 2. For subtask 1, the top 3 runs were generated by Harbin Institute of Technology, ezDI, Inc., and The Medical University of South Carolina with F1 scores of 0.8745, 0.8225, and 0.8130, respectively. For subtask 2, the top 3 runs were from Harbin Institute of Technology, ezDI, Inc., and University of Florida with F1 scores of 0.681, 0.6586, and 0.6544, respectively. The workshop was held in conjunction with the AMIA 2019 Fall Symposium. Conclusions: A wide variety of methods were used by different teams in both tasks, such as Bidirectional Encoder Representations from Transformers, convolutional neural network, bidirectional long short-term memory, conditional random field, support vector machine, and rule-based strategies. System performances show that relation extraction from FH is a more challenging task when compared to entity identification task. ", doi="10.2196/24008", url="http://medinform.jmir.org/2021/1/e24008/", url="http://www.ncbi.nlm.nih.gov/pubmed/33502329" } @Article{info:doi/10.2196/25113, author="Chen, Yen-Pin and Lo, Yuan-Hsun and Lai, Feipei and Huang, Chien-Hua", title="Disease Concept-Embedding Based on the Self-Supervised Method for Medical Information Extraction from Electronic Health Records and Disease Retrieval: Algorithm Development and Validation Study", journal="J Med Internet Res", year="2021", month="Jan", day="27", volume="23", number="1", pages="e25113", keywords="electronic health record", keywords="EHR", keywords="disease embedding", keywords="disease retrieval", keywords="emergency department", keywords="concept", keywords="extraction", keywords="deep learning", keywords="machine learning", keywords="natural language processing", keywords="NLP", abstract="Background: The electronic health record (EHR) contains a wealth of medical information. An organized EHR can greatly help doctors treat patients. In some cases, only limited patient information is collected to help doctors make treatment decisions. Because EHRs can serve as a reference for this limited information, doctors' treatment capabilities can be enhanced. Natural language processing and deep learning methods can help organize and translate EHR information into medical knowledge and experience. Objective: In this study, we aimed to create a model to extract concept embeddings from EHRs for disease pattern retrieval and further classification tasks. Methods: We collected 1,040,989 emergency department visits from the National Taiwan University Hospital Integrated Medical Database and 305,897 samples from the National Hospital and Ambulatory Medical Care Survey Emergency Department data. After data cleansing and preprocessing, the data sets were divided into training, validation, and test sets. We proposed a Transformer-based model to embed EHRs and used Bidirectional Encoder Representations from Transformers (BERT) to extract features from free text and concatenate features with structural data as input to our proposed model. Then, Deep InfoMax (DIM) and Simple Contrastive Learning of Visual Representations (SimCLR) were used for the unsupervised embedding of the disease concept. The pretrained disease concept-embedding model, named EDisease, was further finetuned to adapt to the critical care outcome prediction task. We evaluated the performance of embedding using t-distributed stochastic neighbor embedding (t-SNE) to perform dimension reduction for visualization. The performance of the finetuned predictive model was evaluated against published models using the area under the receiver operating characteristic (AUROC). Results: The performance of our model on the outcome prediction had the highest AUROC of 0.876. In the ablation study, the use of a smaller data set or fewer unsupervised methods for pretraining deteriorated the prediction performance. The AUROCs were 0.857, 0.870, and 0.868 for the model without pretraining, the model pretrained by only SimCLR, and the model pretrained by only DIM, respectively. On the smaller finetuning set, the AUROC was 0.815 for the proposed model. Conclusions: Through contrastive learning methods, disease concepts can be embedded meaningfully. Moreover, these methods can be used for disease retrieval tasks to enhance clinical practice capabilities. The disease concept model is also suitable as a pretrained model for subsequent prediction tasks. ", doi="10.2196/25113", url="http://www.jmir.org/2021/1/e25113/", url="http://www.ncbi.nlm.nih.gov/pubmed/33502324" } @Article{info:doi/10.2196/24594, author="Gaudet-Blavignac, Christophe and Foufi, Vasiliki and Bjelogrlic, Mina and Lovis, Christian", title="Use of the Systematized Nomenclature of Medicine Clinical Terms (SNOMED CT) for Processing Free Text in Health Care: Systematic Scoping Review", journal="J Med Internet Res", year="2021", month="Jan", day="26", volume="23", number="1", pages="e24594", keywords="SNOMED CT", keywords="natural language processing", keywords="scoping review", keywords="terminology", abstract="Background: Interoperability and secondary use of data is a challenge in health care. Specifically, the reuse of clinical free text remains an unresolved problem. The Systematized Nomenclature of Medicine Clinical Terms (SNOMED CT) has become the universal language of health care and presents characteristics of a natural language. Its use to represent clinical free text could constitute a solution to improve interoperability. Objective: Although the use of SNOMED and SNOMED CT has already been reviewed, its specific use in processing and representing unstructured data such as clinical free text has not. This review aims to better understand SNOMED CT's use for representing free text in medicine. Methods: A scoping review was performed on the topic by searching MEDLINE, Embase, and Web of Science for publications featuring free-text processing and SNOMED CT. A recursive reference review was conducted to broaden the scope of research. The review covered the type of processed data, the targeted language, the goal of the terminology binding, the method used and, when appropriate, the specific software used. Results: In total, 76 publications were selected for an extensive study. The language targeted by publications was 91\% (n=69) English. The most frequent types of documents for which the terminology was used are complementary exam reports (n=18, 24\%) and narrative notes (n=16, 21\%). Mapping to SNOMED CT was the final goal of the research in 21\% (n=16) of publications and a part of the final goal in 33\% (n=25). The main objectives of mapping are information extraction (n=44, 39\%), feature in a classification task (n=26, 23\%), and data normalization (n=23, 20\%). The method used was rule-based in 70\% (n=53) of publications, hybrid in 11\% (n=8), and machine learning in 5\% (n=4). In total, 12 different software packages were used to map text to SNOMED CT concepts, the most frequent being Medtex, Mayo Clinic Vocabulary Server, and Medical Text Extraction Reasoning and Mapping System. Full terminology was used in 64\% (n=49) of publications, whereas only a subset was used in 30\% (n=23) of publications. Postcoordination was proposed in 17\% (n=13) of publications, and only 5\% (n=4) of publications specifically mentioned the use of the compositional grammar. Conclusions: SNOMED CT has been largely used to represent free-text data, most frequently with rule-based approaches, in English. However, currently, there is no easy solution for mapping free text to this terminology and to perform automatic postcoordination. Most solutions conceive SNOMED CT as a simple terminology rather than as a compositional bag of ontologies. Since 2012, the number of publications on this subject per year has decreased. However, the need for formal semantic representation of free text in health care is high, and automatic encoding into a compositional ontology could be a solution. ", doi="10.2196/24594", url="http://www.jmir.org/2021/1/e24594/", url="http://www.ncbi.nlm.nih.gov/pubmed/33496673" } @Article{info:doi/10.2196/25314, author="Klein, Z. Ari and Magge, Arjun and O'Connor, Karen and Flores Amaro, Ivan Jesus and Weissenbacher, Davy and Gonzalez Hernandez, Graciela", title="Toward Using Twitter for Tracking COVID-19: A Natural Language Processing Pipeline and Exploratory Data Set", journal="J Med Internet Res", year="2021", month="Jan", day="22", volume="23", number="1", pages="e25314", keywords="natural language processing", keywords="social media", keywords="data mining", keywords="COVID-19", keywords="coronavirus", keywords="pandemics", keywords="epidemiology", keywords="infodemiology", abstract="Background: In the United States, the rapidly evolving COVID-19 outbreak, the shortage of available testing, and the delay of test results present challenges for actively monitoring its spread based on testing alone. Objective: The objective of this study was to develop, evaluate, and deploy an automatic natural language processing pipeline to collect user-generated Twitter data as a complementary resource for identifying potential cases of COVID-19 in the United States that are not based on testing and, thus, may not have been reported to the Centers for Disease Control and Prevention. Methods: Beginning January 23, 2020, we collected English tweets from the Twitter Streaming application programming interface that mention keywords related to COVID-19. We applied handwritten regular expressions to identify tweets indicating that the user potentially has been exposed to COVID-19. We automatically filtered out ``reported speech'' (eg, quotations, news headlines) from the tweets that matched the regular expressions, and two annotators annotated a random sample of 8976 tweets that are geo-tagged or have profile location metadata, distinguishing tweets that self-report potential cases of COVID-19 from those that do not. We used the annotated tweets to train and evaluate deep neural network classifiers based on bidirectional encoder representations from transformers (BERT). Finally, we deployed the automatic pipeline on more than 85 million unlabeled tweets that were continuously collected between March 1 and August 21, 2020. Results: Interannotator agreement, based on dual annotations for 3644 (41\%) of the 8976 tweets, was 0.77 (Cohen $\kappa$). A deep neural network classifier, based on a BERT model that was pretrained on tweets related to COVID-19, achieved an F1-score of 0.76 (precision=0.76, recall=0.76) for detecting tweets that self-report potential cases of COVID-19. Upon deploying our automatic pipeline, we identified 13,714 tweets that self-report potential cases of COVID-19 and have US state--level geolocations. Conclusions: We have made the 13,714 tweets identified in this study, along with each tweet's time stamp and US state--level geolocation, publicly available to download. This data set presents the opportunity for future work to assess the utility of Twitter data as a complementary resource for tracking the spread of COVID-19. ", doi="10.2196/25314", url="http://www.jmir.org/2021/1/e25314/", url="http://www.ncbi.nlm.nih.gov/pubmed/33449904" } @Article{info:doi/10.2196/23086, author="Li, Junyi and Zhang, Xuejie and Zhou, Xiaobing", title="ALBERT-Based Self-Ensemble Model With Semisupervised Learning and Data Augmentation for Clinical Semantic Textual Similarity Calculation: Algorithm Validation Study", journal="JMIR Med Inform", year="2021", month="Jan", day="22", volume="9", number="1", pages="e23086", keywords="data augmentation", keywords="semisupervised", keywords="self-ensemble", keywords="ALBERT", keywords="clinical semantic textual similarity", keywords="algorithm", keywords="semantic", keywords="model", keywords="data sets", abstract="Background: In recent years, with increases in the amount of information available and the importance of information screening, increased attention has been paid to the calculation of textual semantic similarity. In the field of medicine, electronic medical records and medical research documents have become important data resources for clinical research. Medical textual semantic similarity calculation has become an urgent problem to be solved. Objective: This research aims to solve 2 problems---(1) when the size of medical data sets is small, leading to insufficient learning with understanding of the models and (2) when information is lost in the process of long-distance propagation, causing the models to be unable to grasp key information. Methods: This paper combines a text data augmentation method and a self-ensemble ALBERT model under semisupervised learning to perform clinical textual semantic similarity calculations. Results: Compared with the methods in the 2019 National Natural Language Processing Clinical Challenges Open Health Natural Language Processing shared task Track on Clinical Semantic Textual Similarity, our method surpasses the best result by 2 percentage points and achieves a Pearson correlation coefficient of 0.92. Conclusions: When the size of medical data set is small, data augmentation can increase the size of the data set and improved semisupervised learning can boost the learning efficiency of the model. Additionally, self-ensemble methods improve the model performance. Our method had excellent performance and has great potential to improve related medical problems. ", doi="10.2196/23086", url="http://medinform.jmir.org/2021/1/e23086/", url="http://www.ncbi.nlm.nih.gov/pubmed/33480858" } @Article{info:doi/10.2196/23104, author="Kate, J. Rohit", title="Clinical Term Normalization Using Learned Edit Patterns and Subconcept Matching: System Development and Evaluation", journal="JMIR Med Inform", year="2021", month="Jan", day="14", volume="9", number="1", pages="e23104", keywords="clinical term normalization", keywords="edit distance", keywords="machine learning", keywords="natural language processing", abstract="Background: Clinical terms mentioned in clinical text are often not in their standardized forms as listed in clinical terminologies because of linguistic and stylistic variations. However, many automated downstream applications require clinical terms mapped to their corresponding concepts in clinical terminologies, thus necessitating the task of clinical term normalization. Objective: In this paper, a system for clinical term normalization is presented that utilizes edit patterns to convert clinical terms into their normalized forms. Methods: The edit patterns are automatically learned from the Unified Medical Language System (UMLS) Metathesaurus as well as from the given training data. The edit patterns are generalized sequences of edits that are derived from edit distance computations. The edit patterns are both character based as well as word based and are learned separately for different semantic types. In addition to these edit patterns, the system also normalizes clinical terms through the subconcepts mentioned within them. Results: The system was evaluated as part of the 2019 n2c2 Track 3 shared task of clinical term normalization. It obtained 80.79\% accuracy on the standard test data. This paper includes ablation studies to evaluate the contributions of different components of the system. A challenging part of the task was disambiguation when a clinical term could be normalized to multiple concepts. Conclusions: The learned edit patterns led the system to perform well on the normalization task. Given that the system is based on patterns, it is human interpretable and is also capable of giving insights about common variations of clinical terms mentioned in clinical text that are different from their standardized forms. ", doi="10.2196/23104", url="https://medinform.jmir.org/2021/1/e23104", url="http://www.ncbi.nlm.nih.gov/pubmed/33443483" } @Article{info:doi/10.2196/19689, author="Liu, Honglei and Zhang, Zhiqiang and Xu, Yan and Wang, Ni and Huang, Yanqun and Yang, Zhenghan and Jiang, Rui and Chen, Hui", title="Use of BERT (Bidirectional Encoder Representations from Transformers)-Based Deep Learning Method for Extracting Evidences in Chinese Radiology Reports: Development of a Computer-Aided Liver Cancer Diagnosis Framework", journal="J Med Internet Res", year="2021", month="Jan", day="12", volume="23", number="1", pages="e19689", keywords="BiLSTM-CRF", keywords="natural language processing", keywords="radiology reports", keywords="information extraction", keywords="computer-aided diagnosis", keywords="BERT", abstract="Background: Liver cancer is a substantial disease burden in China. As one of the primary diagnostic tools for detecting liver cancer, dynamic contrast-enhanced computed tomography provides detailed evidences for diagnosis that are recorded in free-text radiology reports. Objective: The aim of our study was to apply a deep learning model and rule-based natural language processing (NLP) method to identify evidences for liver cancer diagnosis automatically. Methods: We proposed a pretrained, fine-tuned BERT (Bidirectional Encoder Representations from Transformers)-based BiLSTM-CRF (Bidirectional Long Short-Term Memory-Conditional Random Field) model to recognize the phrases of APHE (hyperintense enhancement in the arterial phase) and PDPH (hypointense in the portal and delayed phases). To identify more essential diagnostic evidences, we used the traditional rule-based NLP methods for the extraction of radiological features. APHE, PDPH, and other extracted radiological features were used to design a computer-aided liver cancer diagnosis framework by random forest. Results: The BERT-BiLSTM-CRF predicted the phrases of APHE and PDPH with an F1 score of 98.40\% and 90.67\%, respectively. The prediction model using combined features had a higher performance (F1 score, 88.55\%) than those using APHE and PDPH (84.88\%) or other extracted radiological features (83.52\%). APHE and PDPH were the top 2 essential features for liver cancer diagnosis. Conclusions: This work was a comprehensive NLP study, wherein we identified evidences for the diagnosis of liver cancer from Chinese radiology reports, considering both clinical knowledge and radiology findings. The BERT-based deep learning method for the extraction of diagnostic evidence achieved state-of-the-art performance. The high performance proves the feasibility of the BERT-BiLSTM-CRF model in information extraction from Chinese radiology reports. The findings of our study suggest that the deep learning--based method for automatically identifying evidences for diagnosis can be extended to other types of Chinese clinical texts. ", doi="10.2196/19689", url="http://www.jmir.org/2021/1/e19689/", url="http://www.ncbi.nlm.nih.gov/pubmed/33433395" } @Article{info:doi/10.2196/21453, author="Leung, W. Yvonne and Wouterloot, Elise and Adikari, Achini and Hirst, Graeme and de Silva, Daswin and Wong, Jiahui and Bender, L. Jacqueline and Gancarz, Mathew and Gratzer, David and Alahakoon, Damminda and Esplen, Jane Mary", title="Natural Language Processing--Based Virtual Cofacilitator for Online Cancer Support Groups: Protocol for an Algorithm Development and Validation Study", journal="JMIR Res Protoc", year="2021", month="Jan", day="7", volume="10", number="1", pages="e21453", keywords="artificial intelligence", keywords="cancer", keywords="online support groups", keywords="emotional distress", keywords="natural language processing", keywords="participant engagement", abstract="Background: Cancer and its treatment can significantly impact the short- and long-term psychological well-being of patients and families. Emotional distress and depressive symptomatology are often associated with poor treatment adherence, reduced quality of life, and higher mortality. Cancer support groups, especially those led by health care professionals, provide a safe place for participants to discuss fear, normalize stress reactions, share solidarity, and learn about effective strategies to build resilience and enhance coping. However, in-person support groups may not always be accessible to individuals; geographic distance is one of the barriers for access, and compromised physical condition (eg, fatigue, pain) is another. Emerging evidence supports the effectiveness of online support groups in reducing access barriers. Text-based and professional-led online support groups have been offered by Cancer Chat Canada. Participants join the group discussion using text in real time. However, therapist leaders report some challenges leading text-based online support groups in the absence of visual cues, particularly in tracking participant distress. With multiple participants typing at the same time, the nuances of the text messages or red flags for distress can sometimes be missed. Recent advances in artificial intelligence such as deep learning--based natural language processing offer potential solutions. This technology can be used to analyze online support group text data to track participants' expressed emotional distress, including fear, sadness, and hopelessness. Artificial intelligence allows session activities to be monitored in real time and alerts the therapist to participant disengagement. Objective: We aim to develop and evaluate an artificial intelligence--based cofacilitator prototype to track and monitor online support group participants' distress through real-time analysis of text-based messages posted during synchronous sessions. Methods: An artificial intelligence--based cofacilitator will be developed to identify participants who are at-risk for increased emotional distress and track participant engagement and in-session group cohesion levels, providing real-time alerts for therapist to follow-up; generate postsession participant profiles that contain discussion content keywords and emotion profiles for each session; and automatically suggest tailored resources to participants according to their needs. The study is designed to be conducted in 4 phases consisting of (1) development based on a subset of data and an existing natural language processing framework, (2) performance evaluation using human scoring, (3) beta testing, and (4) user experience evaluation. Results: This study received ethics approval in August 2019. Phase 1, development of an artificial intelligence--based cofacilitator, was completed in January 2020. As of December 2020, phase 2 is underway. The study is expected to be completed by September 2021. Conclusions: An artificial intelligence--based cofacilitator offers a promising new mode of delivery of person-centered online support groups tailored to individual needs. International Registered Report Identifier (IRRID): DERR1-10.2196/21453 ", doi="10.2196/21453", url="https://www.researchprotocols.org/2021/1/e21453", url="http://www.ncbi.nlm.nih.gov/pubmed/33410754" } @Article{info:doi/10.2196/23357, author="Xiong, Ying and Chen, Shuai and Chen, Qingcai and Yan, Jun and Tang, Buzhou", title="Using Character-Level and Entity-Level Representations to Enhance Bidirectional Encoder Representation From Transformers-Based Clinical Semantic Textual Similarity Model: ClinicalSTS Modeling Study", journal="JMIR Med Inform", year="2020", month="Dec", day="29", volume="8", number="12", pages="e23357", keywords="natural language processing", keywords="deep learning", keywords="clinical semantic textual similarity", keywords="knowledge graph", abstract="Background: With the popularity of electronic health records (EHRs), the quality of health care has been improved. However, there are also some problems caused by EHRs, such as the growing use of copy-and-paste and templates, resulting in EHRs of low quality in content. In order to minimize data redundancy in different documents, Harvard Medical School and Mayo Clinic organized a national natural language processing (NLP) clinical challenge (n2c2) on clinical semantic textual similarity (ClinicalSTS) in 2019. The task of this challenge is to compute the semantic similarity among clinical text snippets. Objective: In this study, we aim to investigate novel methods to model ClinicalSTS and analyze the results. Methods: We propose a semantically enhanced text matching model for the 2019 n2c2/Open Health NLP (OHNLP) challenge on ClinicalSTS. The model includes 3 representation modules to encode clinical text snippet pairs at different levels: (1) character-level representation module based on convolutional neural network (CNN) to tackle the out-of-vocabulary problem in NLP; (2) sentence-level representation module that adopts a pretrained language model bidirectional encoder representation from transformers (BERT) to encode clinical text snippet pairs; and (3) entity-level representation module to model clinical entity information in clinical text snippets. In the case of entity-level representation, we compare 2 methods. One encodes entities by the entity-type label sequence corresponding to text snippet (called entity I), whereas the other encodes entities by their representation in MeSH, a knowledge graph in the medical domain (called entity II). Results: We conduct experiments on the ClinicalSTS corpus of the 2019 n2c2/OHNLP challenge for model performance evaluation. The model only using BERT for text snippet pair encoding achieved a Pearson correlation coefficient (PCC) of 0.848. When character-level representation and entity-level representation are individually added into our model, the PCC increased to 0.857 and 0.854 (entity I)/0.859 (entity II), respectively. When both character-level representation and entity-level representation are added into our model, the PCC further increased to 0.861 (entity I) and 0.868 (entity II). Conclusions: Experimental results show that both character-level information and entity-level information can effectively enhance the BERT-based STS model. ", doi="10.2196/23357", url="http://medinform.jmir.org/2020/12/e23357/", url="http://www.ncbi.nlm.nih.gov/pubmed/33372664" } @Article{info:doi/10.2196/23082, author="Geng, Wenye and Qin, Xuanfeng and Yang, Tao and Cong, Zhilei and Wang, Zhuo and Kong, Qing and Tang, Zihui and Jiang, Lin", title="Model-Based Reasoning of Clinical Diagnosis in Integrative Medicine: Real-World Methodological Study of Electronic Medical Records and Natural Language Processing Methods", journal="JMIR Med Inform", year="2020", month="Dec", day="21", volume="8", number="12", pages="e23082", keywords="model-based reasoning", keywords="integrative medicine", keywords="electronic medical records", keywords="natural language processing", abstract="Background: Integrative medicine is a form of medicine that combines practices and treatments from alternative medicine with conventional medicine. The diagnosis in integrative medicine involves the clinical diagnosis based on modern medicine and syndrome pattern diagnosis. Electronic medical records (EMRs) are the systematized collection of patients health information stored in a digital format that can be shared across different health care settings. Although syndrome and sign information or relative information can be extracted from the EMR and content texts can be mapped to computability vectors using natural language processing techniques, application of artificial intelligence techniques to support physicians in medical practices remains a major challenge. Objective: The purpose of this study was to investigate model-based reasoning (MBR) algorithms for the clinical diagnosis in integrative medicine based on EMRs and natural language processing. We also estimated the associations among the factors of sample size, number of syndrome pattern type, and diagnosis in modern medicine using the MBR algorithms. Methods: A total of 14,075 medical records of clinical cases were extracted from the EMRs as the development data set, and an external test data set consisting of 1000 medical records of clinical cases was extracted from independent EMRs. MBR methods based on word embedding, machine learning, and deep learning algorithms were developed for the automatic diagnosis of syndrome pattern in integrative medicine. MBR algorithms combining rule-based reasoning (RBR) were also developed. A standard evaluation metrics consisting of accuracy, precision, recall, and F1 score was used for the performance estimation of the methods. The association analyses were conducted on the sample size, number of syndrome pattern type, and diagnosis of lung diseases with the best algorithms. Results: The Word2Vec convolutional neural network (CNN) MBR algorithms showed high performance (accuracy of 0.9586 in the test data set) in the syndrome pattern diagnosis of lung diseases. The Word2Vec CNN MBR combined with RBR also showed high performance (accuracy of 0.9229 in the test data set). The diagnosis of lung diseases could enhance the performance of the Word2Vec CNN MBR algorithms. Each group sample size and syndrome pattern type affected the performance of these algorithms. Conclusions: The MBR methods based on Word2Vec and CNN showed high performance in the syndrome pattern diagnosis of lung diseases in integrative medicine. The parameters of each group's sample size, syndrome pattern type, and diagnosis of lung diseases were associated with the performance of the methods. Trial Registration: ClinicalTrials.gov NCT03274908; https://clinicaltrials.gov/ct2/show/NCT03274908 ", doi="10.2196/23082", url="http://medinform.jmir.org/2020/12/e23082/", url="http://www.ncbi.nlm.nih.gov/pubmed/33346740" } @Article{info:doi/10.2196/24490, author="Sai Prashanthi, Gumpili and Deva, Ayush and Vadapalli, Ranganath and Das, Vipin Anthony", title="Automated Categorization of Systemic Disease and Duration From Electronic Medical Record System Data Using Finite-State Machine Modeling: Prospective Validation Study", journal="JMIR Form Res", year="2020", month="Dec", day="17", volume="4", number="12", pages="e24490", keywords="electronic health records", keywords="data analysis", keywords="machine learning", keywords="algorithms", keywords="ophthalmology", abstract="Background: One of the major challenges in the health care sector is that approximately 80\% of generated data remains unstructured and unused. Since it is difficult to handle unstructured data from electronic medical record systems, it tends to be neglected for analyses in most hospitals and medical centers. Therefore, there is a need to analyze unstructured big data in health care systems so that we can optimally utilize and unearth all unexploited information from it. Objective: In this study, we aimed to extract a list of diseases and associated keywords along with the corresponding time durations from an indigenously developed electronic medical record system and describe the possibility of analytics from the acquired datasets. Methods: We propose a novel, finite-state machine to sequentially detect and cluster disease names from patients' medical history. We defined 3 states in the finite-state machine and transition matrix, which depend on the identified keyword. In addition, we also defined a state-change action matrix, which is essentially an action associated with each transition. The dataset used in this study was obtained from an indigenously developed electronic medical record system called eyeSmart that was implemented across a large, multitier ophthalmology network in India. The dataset included patients' past medical history and contained records of 10,000 distinct patients. Results: We extracted disease names and associated keywords by using the finite-state machine with an accuracy of 95\%, sensitivity of 94.9\%, and positive predictive value of 100\%. For the extraction of the duration of disease, the machine's accuracy was 93\%, sensitivity was 92.9\%, and the positive predictive value was 100\%. Conclusions: We demonstrated that the finite-state machine we developed in this study can be used to accurately identify disease names, associated keywords, and time durations from a large cohort of patient records obtained using an electronic medical record system. ", doi="10.2196/24490", url="http://formative.jmir.org/2020/12/e24490/", url="http://www.ncbi.nlm.nih.gov/pubmed/33331823" } @Article{info:doi/10.2196/20756, author="Abd-Alrazaq, Alaa and Alajlani, Mohannad and Alhuwail, Dari and Schneider, Jens and Al-Kuwari, Saif and Shah, Zubair and Hamdi, Mounir and Househ, Mowafa", title="Artificial Intelligence in the Fight Against COVID-19: Scoping Review", journal="J Med Internet Res", year="2020", month="Dec", day="15", volume="22", number="12", pages="e20756", keywords="artificial intelligence", keywords="machine learning", keywords="deep learning", keywords="natural language processing", keywords="coronavirus", keywords="COVID-19", keywords="2019-nCoV", keywords="SARS-CoV-2", abstract="Background: In December 2019, COVID-19 broke out in Wuhan, China, leading to national and international disruptions in health care, business, education, transportation, and nearly every aspect of our daily lives. Artificial intelligence (AI) has been leveraged amid the COVID-19 pandemic; however, little is known about its use for supporting public health efforts. Objective: This scoping review aims to explore how AI technology is being used during the COVID-19 pandemic, as reported in the literature. Thus, it is the first review that describes and summarizes features of the identified AI techniques and data sets used for their development and validation. Methods: A scoping review was conducted following the guidelines of PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews). We searched the most commonly used electronic databases (eg, MEDLINE, EMBASE, and PsycInfo) between April 10 and 12, 2020. These terms were selected based on the target intervention (ie, AI) and the target disease (ie, COVID-19). Two reviewers independently conducted study selection and data extraction. A narrative approach was used to synthesize the extracted data. Results: We considered 82 studies out of the 435 retrieved studies. The most common use of AI was diagnosing COVID-19 cases based on various indicators. AI was also employed in drug and vaccine discovery or repurposing and for assessing their safety. Further, the included studies used AI for forecasting the epidemic development of COVID-19 and predicting its potential hosts and reservoirs. Researchers used AI for patient outcome--related tasks such as assessing the severity of COVID-19, predicting mortality risk, its associated factors, and the length of hospital stay. AI was used for infodemiology to raise awareness to use water, sanitation, and hygiene. The most prominent AI technique used was convolutional neural network, followed by support vector machine. Conclusions: The included studies showed that AI has the potential to fight against COVID-19. However, many of the proposed methods are not yet clinically accepted. Thus, the most rewarding research will be on methods promising value beyond COVID-19. More efforts are needed for developing standardized reporting protocols or guidelines for studies on AI. ", doi="10.2196/20756", url="http://www.jmir.org/2020/12/e20756/", url="http://www.ncbi.nlm.nih.gov/pubmed/33284779" } @Article{info:doi/10.2196/22982, author="Yang, Xi and Zhang, Hansi and He, Xing and Bian, Jiang and Wu, Yonghui", title="Extracting Family History of Patients From Clinical Narratives: Exploring an End-to-End Solution With Deep Learning Models", journal="JMIR Med Inform", year="2020", month="Dec", day="15", volume="8", number="12", pages="e22982", keywords="family history", keywords="information extraction", keywords="natural language processing", keywords="deep learning", abstract="Background: Patients' family history (FH) is a critical risk factor associated with numerous diseases. However, FH information is not well captured in the structured database but often documented in clinical narratives. Natural language processing (NLP) is the key technology to extract patients' FH from clinical narratives. In 2019, the National NLP Clinical Challenge (n2c2) organized shared tasks to solicit NLP methods for FH information extraction. Objective: This study presents our end-to-end FH extraction system developed during the 2019 n2c2 open shared task as well as the new transformer-based models that we developed after the challenge. We seek to develop a machine learning--based solution for FH information extraction without task-specific rules created by hand. Methods: We developed deep learning--based systems for FH concept extraction and relation identification. We explored deep learning models including long short-term memory-conditional random fields and bidirectional encoder representations from transformers (BERT) as well as developed ensemble models using a majority voting strategy. To further optimize performance, we systematically compared 3 different strategies to use BERT output representations for relation identification. Results: Our system was among the top-ranked systems (3 out of 21) in the challenge. Our best system achieved micro-averaged F1 scores of 0.7944 and 0.6544 for concept extraction and relation identification, respectively. After challenge, we further explored new transformer-based models and improved the performances of both subtasks to 0.8249 and 0.6775, respectively. For relation identification, our system achieved a performance comparable to the best system (0.6810) reported in the challenge. Conclusions: This study demonstrated the feasibility of utilizing deep learning methods to extract FH information from clinical narratives. ", doi="10.2196/22982", url="http://medinform.jmir.org/2020/12/e22982/", url="http://www.ncbi.nlm.nih.gov/pubmed/33320104" } @Article{info:doi/10.2196/18953, author="Rivera Zavala, Renzo and Martinez, Paloma", title="The Impact of Pretrained Language Models on Negation and Speculation Detection in Cross-Lingual Medical Text: Comparative Study", journal="JMIR Med Inform", year="2020", month="Dec", day="3", volume="8", number="12", pages="e18953", keywords="natural language processing", keywords="clinical text", keywords="deep learning", keywords="long short-term memory", keywords="contextual information", abstract="Background: Negation and speculation are critical elements in natural language processing (NLP)-related tasks, such as information extraction, as these phenomena change the truth value of a proposition. In the clinical narrative that is informal, these linguistic facts are used extensively with the objective of indicating hypotheses, impressions, or negative findings. Previous state-of-the-art approaches addressed negation and speculation detection tasks using rule-based methods, but in the last few years, models based on machine learning and deep learning exploiting morphological, syntactic, and semantic features represented as spare and dense vectors have emerged. However, although such methods of named entity recognition (NER) employ a broad set of features, they are limited to existing pretrained models for a specific domain or language. Objective: As a fundamental subsystem of any information extraction pipeline, a system for cross-lingual and domain-independent negation and speculation detection was introduced with special focus on the biomedical scientific literature and clinical narrative. In this work, detection of negation and speculation was considered as a sequence-labeling task where cues and the scopes of both phenomena are recognized as a sequence of nested labels recognized in a single step. Methods: We proposed the following two approaches for negation and speculation detection: (1) bidirectional long short-term memory (Bi-LSTM) and conditional random field using character, word, and sense embeddings to deal with the extraction of semantic, syntactic, and contextual patterns and (2) bidirectional encoder representations for transformers (BERT) with fine tuning for NER. Results: The approach was evaluated for English and Spanish languages on biomedical and review text, particularly with the BioScope corpus, IULA corpus, and SFU Spanish Review corpus, with F-measures of 86.6\%, 85.0\%, and 88.1\%, respectively, for NeuroNER and 86.4\%, 80.8\%, and 91.7\%, respectively, for BERT. Conclusions: These results show that these architectures perform considerably better than the previous rule-based and conventional machine learning--based systems. Moreover, our analysis results show that pretrained word embedding and particularly contextualized embedding for biomedical corpora help to understand complexities inherent to biomedical text. ", doi="10.2196/18953", url="https://medinform.jmir.org/2020/12/e18953", url="http://www.ncbi.nlm.nih.gov/pubmed/33270027" } @Article{info:doi/10.2196/21750, author="Dai, Hong-Jie and Lee, You-Qian and Nekkantti, Chandini and Jonnagaddala, Jitendra", title="Family History Information Extraction With Neural Attention and an Enhanced Relation-Side Scheme: Algorithm Development and Validation", journal="JMIR Med Inform", year="2020", month="Dec", day="1", volume="8", number="12", pages="e21750", keywords="family history information", keywords="natural language processing", keywords="deep learning", keywords="electronic health record", abstract="Background: Identifying and extracting family history information (FHI) from clinical reports are significant for recognizing disease susceptibility. However, FHI is usually described in a narrative manner within patients' electronic health records, which requires the application of natural language processing technologies to automatically extract such information to provide more comprehensive patient-centered information to physicians. Objective: This study aimed to overcome the 2 main challenges observed in previous research focusing on FHI extraction. One is the requirement to develop postprocessing rules to infer the member and side information of family mentions. The other is to efficiently utilize intrasentence and intersentence information to assist FHI extraction. Methods: We formulated the task as a sequential labeling problem and propose an enhanced relation-side scheme that encodes the required family member properties to not only eliminate the need for postprocessing rules but also relieve the insufficient training instance issues. Moreover, an attention-based neural network structure was proposed to exploit cross-sentence information to identify FHI and its attributes requiring cross-sentence inference. Results: The dataset released by the 2019 n2c2/OHNLP family history extraction task was used to evaluate the performance of the proposed methods. We started by comparing the performance of the traditional neural sequence models with the ordinary scheme and enhanced scheme. Next, we studied the effectiveness of the proposed attention-enhanced neural networks by comparing their performance with that of the traditional networks. It was observed that, with the enhanced scheme, the recall of the neural network can be improved, leading to an increase in the F score of 0.024. The proposed neural attention mechanism enhanced both the recall and precision and resulted in an improved F score of 0.807, which was ranked fourth in the shared task. Conclusions: We presented an attention-based neural network along with an enhanced tag scheme that enables the neural network model to learn and interpret the implicit relationship and side information of the recognized family members across sentences without relying on heuristic rules. ", doi="10.2196/21750", url="https://medinform.jmir.org/2020/12/e21750", url="http://www.ncbi.nlm.nih.gov/pubmed/33258777" } @Article{info:doi/10.2196/23930, author="Maarseveen, D. Tjardo and Meinderink, Timo and Reinders, T. Marcel J. and Knitza, Johannes and Huizinga, J. Tom W. and Kleyer, Arnd and Simon, David and van den Akker, B. Erik and Knevel, Rachel", title="Machine Learning Electronic Health Record Identification of Patients with Rheumatoid Arthritis: Algorithm Pipeline Development and Validation Study", journal="JMIR Med Inform", year="2020", month="Nov", day="30", volume="8", number="11", pages="e23930", keywords="Supervised machine learning", keywords="Electronic Health Records", keywords="Natural Language Processing", keywords="Support Vector Machine", keywords="Gradient Boosting", keywords="Rheumatoid Arthritis", abstract="Background: Financial codes are often used to extract diagnoses from electronic health records. This approach is prone to false positives. Alternatively, queries are constructed, but these are highly center and language specific. A tantalizing alternative is the automatic identification of patients by employing machine learning on format-free text entries. Objective: The aim of this study was to develop an easily implementable workflow that builds a machine learning algorithm capable of accurately identifying patients with rheumatoid arthritis from format-free text fields in electronic health records. Methods: Two electronic health record data sets were employed: Leiden (n=3000) and Erlangen (n=4771). Using a portion of the Leiden data (n=2000), we compared 6 different machine learning methods and a na{\"i}ve word-matching algorithm using 10-fold cross-validation. Performances were compared using the area under the receiver operating characteristic curve (AUROC) and the area under the precision recall curve (AUPRC), and F1 score was used as the primary criterion for selecting the best method to build a classifying algorithm. We selected the optimal threshold of positive predictive value for case identification based on the output of the best method in the training data. This validation workflow was subsequently applied to a portion of the Erlangen data (n=4293). For testing, the best performing methods were applied to remaining data (Leiden n=1000; Erlangen n=478) for an unbiased evaluation. Results: For the Leiden data set, the word-matching algorithm demonstrated mixed performance (AUROC 0.90; AUPRC 0.33; F1 score 0.55), and 4 methods significantly outperformed word-matching, with support vector machines performing best (AUROC 0.98; AUPRC 0.88; F1 score 0.83). Applying this support vector machine classifier to the test data resulted in a similarly high performance (F1 score 0.81; positive predictive value [PPV] 0.94), and with this method, we could identify 2873 patients with rheumatoid arthritis in less than 7 seconds out of the complete collection of 23,300 patients in the Leiden electronic health record system. For the Erlangen data set, gradient boosting performed best (AUROC 0.94; AUPRC 0.85; F1 score 0.82) in the training set, and applied to the test data, resulted once again in good results (F1 score 0.67; PPV 0.97). Conclusions: We demonstrate that machine learning methods can extract the records of patients with rheumatoid arthritis from electronic health record data with high precision, allowing research on very large populations for limited costs. Our approach is language and center independent and could be applied to any type of diagnosis. We have developed our pipeline into a universally applicable and easy-to-implement workflow to equip centers with their own high-performing algorithm. This allows the creation of observational studies of unprecedented size covering different countries for low cost from already available data in electronic health record systems. ", doi="10.2196/23930", url="http://medinform.jmir.org/2020/11/e23930/", url="http://www.ncbi.nlm.nih.gov/pubmed/33252349" } @Article{info:doi/10.2196/21660, author="Singh, Tavleen and Roberts, Kirk and Cohen, Trevor and Cobb, Nathan and Wang, Jing and Fujimoto, Kayo and Myneni, Sahiti", title="Social Media as a Research Tool (SMaaRT) for Risky Behavior Analytics: Methodological Review", journal="JMIR Public Health Surveill", year="2020", month="Nov", day="30", volume="6", number="4", pages="e21660", keywords="social media", keywords="infodemiology", keywords="infoveillance", keywords="online health communities", keywords="risky health behaviors", keywords="data mining", keywords="machine learning", keywords="natural language processing", keywords="text mining", abstract="Background: Modifiable risky health behaviors, such as tobacco use, excessive alcohol use, being overweight, lack of physical activity, and unhealthy eating habits, are some of the major factors for developing chronic health conditions. Social media platforms have become indispensable means of communication in the digital era. They provide an opportunity for individuals to express themselves, as well as share their health-related concerns with peers and health care providers, with respect to risky behaviors. Such peer interactions can be utilized as valuable data sources to better understand inter-and intrapersonal psychosocial mediators and the mechanisms of social influence that drive behavior change. Objective: The objective of this review is to summarize computational and quantitative techniques facilitating the analysis of data generated through peer interactions pertaining to risky health behaviors on social media platforms. Methods: We performed a systematic review of the literature in September 2020 by searching three databases---PubMed, Web of Science, and Scopus---using relevant keywords, such as ``social media,'' ``online health communities,'' ``machine learning,'' ``data mining,'' etc. The reporting of the studies was directed by the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) guidelines. Two reviewers independently assessed the eligibility of studies based on the inclusion and exclusion criteria. We extracted the required information from the selected studies. Results: The initial search returned a total of 1554 studies, and after careful analysis of titles, abstracts, and full texts, a total of 64 studies were included in this review. We extracted the following key characteristics from all of the studies: social media platform used for conducting the study, risky health behavior studied, the number of posts analyzed, study focus, key methodological functions and tools used for data analysis, evaluation metrics used, and summary of the key findings. The most commonly used social media platform was Twitter, followed by Facebook, QuitNet, and Reddit. The most commonly studied risky health behavior was nicotine use, followed by drug or substance abuse and alcohol use. Various supervised and unsupervised machine learning approaches were used for analyzing textual data generated from online peer interactions. Few studies utilized deep learning methods for analyzing textual data as well as image or video data. Social network analysis was also performed, as reported in some studies. Conclusions: Our review consolidates the methodological underpinnings for analyzing risky health behaviors and has enhanced our understanding of how social media can be leveraged for nuanced behavioral modeling and representation. The knowledge gained from our review can serve as a foundational component for the development of persuasive health communication and effective behavior modification technologies aimed at the individual and population levels. ", doi="10.2196/21660", url="http://publichealth.jmir.org/2020/4/e21660/", url="http://www.ncbi.nlm.nih.gov/pubmed/33252345" } @Article{info:doi/10.2196/23375, author="Wang, Yanshan and Fu, Sunyang and Shen, Feichen and Henry, Sam and Uzuner, Ozlem and Liu, Hongfang", title="The 2019 n2c2/OHNLP Track on Clinical Semantic Textual Similarity: Overview", journal="JMIR Med Inform", year="2020", month="Nov", day="27", volume="8", number="11", pages="e23375", keywords="natural language processing", keywords="clinical natural language processing", keywords="medical natural language processing", keywords="semantic textual similarity", keywords="ClinicalSTS", keywords="n2c2", keywords="electronic health records", keywords="challenge", keywords="shared task", abstract="Background: Semantic textual similarity is a common task in the general English domain to assess the degree to which the underlying semantics of 2 text segments are equivalent to each other. Clinical Semantic Textual Similarity (ClinicalSTS) is the semantic textual similarity task in the clinical domain that attempts to measure the degree of semantic equivalence between 2 snippets of clinical text. Due to the frequent use of templates in the Electronic Health Record system, a large amount of redundant text exists in clinical notes, making ClinicalSTS crucial for the secondary use of clinical text in downstream clinical natural language processing applications, such as clinical text summarization, clinical semantics extraction, and clinical information retrieval. Objective: Our objective was to release ClinicalSTS data sets and to motivate natural language processing and biomedical informatics communities to tackle semantic text similarity tasks in the clinical domain. Methods: We organized the first BioCreative/OHNLP ClinicalSTS shared task in 2018 by making available a real-world ClinicalSTS data set. We continued the shared task in 2019 in collaboration with National NLP Clinical Challenges (n2c2) and the Open Health Natural Language Processing (OHNLP) consortium and organized the 2019 n2c2/OHNLP ClinicalSTS track. We released a larger ClinicalSTS data set comprising 1642 clinical sentence pairs, including 1068 pairs from the 2018 shared task and 1006 new pairs from 2 electronic health record systems, GE and Epic. We released 80\% (1642/2054) of the data to participating teams to develop and fine-tune the semantic textual similarity systems and used the remaining 20\% (412/2054) as blind testing to evaluate their systems. The workshop was held in conjunction with the American Medical Informatics Association 2019 Annual Symposium. Results: Of the 78 international teams that signed on to the n2c2/OHNLP ClinicalSTS shared task, 33 produced a total of 87 valid system submissions. The top 3 systems were generated by IBM Research, the National Center for Biotechnology Information, and the University of Florida, with Pearson correlations of r=.9010, r=.8967, and r=.8864, respectively. Most top-performing systems used state-of-the-art neural language models, such as BERT and XLNet, and state-of-the-art training schemas in deep learning, such as pretraining and fine-tuning schema, and multitask learning. Overall, the participating systems performed better on the Epic sentence pairs than on the GE sentence pairs, despite a much larger portion of the training data being GE sentence pairs. Conclusions: The 2019 n2c2/OHNLP ClinicalSTS shared task focused on computing semantic similarity for clinical text sentences generated from clinical notes in the real world. It attracted a large number of international teams. The ClinicalSTS shared task could continue to serve as a venue for researchers in natural language processing and medical informatics communities to develop and improve semantic textual similarity techniques for clinical text. ", doi="10.2196/23375", url="http://medinform.jmir.org/2020/11/e23375/", url="http://www.ncbi.nlm.nih.gov/pubmed/33245291" } @Article{info:doi/10.2196/22661, author="Ujiie, Shogo and Yada, Shuntaro and Wakamiya, Shoko and Aramaki, Eiji", title="Identification of Adverse Drug Event--Related Japanese Articles: Natural Language Processing Analysis", journal="JMIR Med Inform", year="2020", month="Nov", day="27", volume="8", number="11", pages="e22661", keywords="adverse drug events", keywords="medical informatics", keywords="natural language processing", keywords="pharmacovigilance", abstract="Background: Medical articles covering adverse drug events (ADEs) are systematically reported by pharmaceutical companies for drug safety information purposes. Although policies governing reporting to regulatory bodies vary among countries and regions, all medical article reporting may be categorized as precision or recall based. Recall-based reporting, which is implemented in Japan, requires the reporting of any possible ADE. Therefore, recall-based reporting can introduce numerous false negatives or substantial amounts of noise, a problem that is difficult to address using limited manual labor. Objective: Our aim was to develop an automated system that could identify ADE-related medical articles, support recall-based reporting, and alleviate manual labor in Japanese pharmaceutical companies. Methods: Using medical articles as input, our system based on natural language processing applies document-level classification to extract articles containing ADEs (replacing manual labor in the first screening) and sentence-level classification to extract sentences within those articles that imply ADEs (thus supporting experts in the second screening). We used 509 Japanese medical articles annotated by a medical engineer to evaluate the performance of the proposed system. Results: Document-level classification yielded an F1 of 0.903. Sentence-level classification yielded an F1 of 0.413. These were averages of fivefold cross-validations. Conclusions: A simple automated system may alleviate the manual labor involved in screening drug safety--related medical articles in pharmaceutical companies. After improving the accuracy of the sentence-level classification by considering a wider context, we intend to apply this system toward real-world postmarketing surveillance. ", doi="10.2196/22661", url="http://medinform.jmir.org/2020/11/e22661/", url="http://www.ncbi.nlm.nih.gov/pubmed/33245290" } @Article{info:doi/10.2196/22508, author="Mahajan, Diwakar and Poddar, Ananya and Liang, J. Jennifer and Lin, Yen-Ting and Prager, M. John and Suryanarayanan, Parthasarathy and Raghavan, Preethi and Tsou, Ching-Huei", title="Identification of Semantically Similar Sentences in Clinical Notes: Iterative Intermediate Training Using Multi-Task Learning", journal="JMIR Med Inform", year="2020", month="Nov", day="27", volume="8", number="11", pages="e22508", keywords="electronic health records", keywords="semantic textual similarity", keywords="natural language processing", keywords="multi-task learning", keywords="transfer learning", keywords="deep learning", abstract="Background: Although electronic health records (EHRs) have been widely adopted in health care, effective use of EHR data is often limited because of redundant information in clinical notes introduced by the use of templates and copy-paste during note generation. Thus, it is imperative to develop solutions that can condense information while retaining its value. A step in this direction is measuring the semantic similarity between clinical text snippets. To address this problem, we participated in the 2019 National NLP Clinical Challenges (n2c2)/Open Health Natural Language Processing Consortium (OHNLP) clinical semantic textual similarity (ClinicalSTS) shared task. Objective: This study aims to improve the performance and robustness of semantic textual similarity in the clinical domain by leveraging manually labeled data from related tasks and contextualized embeddings from pretrained transformer-based language models. Methods: The ClinicalSTS data set consists of 1642 pairs of deidentified clinical text snippets annotated in a continuous scale of 0-5, indicating degrees of semantic similarity. We developed an iterative intermediate training approach using multi-task learning (IIT-MTL), a multi-task training approach that employs iterative data set selection. We applied this process to bidirectional encoder representations from transformers on clinical text mining (ClinicalBERT), a pretrained domain-specific transformer-based language model, and fine-tuned the resulting model on the target ClinicalSTS task. We incrementally ensembled the output from applying IIT-MTL on ClinicalBERT with the output of other language models (bidirectional encoder representations from transformers for biomedical text mining [BioBERT], multi-task deep neural networks [MT-DNN], and robustly optimized BERT approach [RoBERTa]) and handcrafted features using regression-based learning algorithms. On the basis of these experiments, we adopted the top-performing configurations as our official submissions. Results: Our system ranked first out of 87 submitted systems in the 2019 n2c2/OHNLP ClinicalSTS challenge, achieving state-of-the-art results with a Pearson correlation coefficient of 0.9010. This winning system was an ensembled model leveraging the output of IIT-MTL on ClinicalBERT with BioBERT, MT-DNN, and handcrafted medication features. Conclusions: This study demonstrates that IIT-MTL is an effective way to leverage annotated data from related tasks to improve performance on a target task with a limited data set. This contribution opens new avenues of exploration for optimized data set selection to generate more robust and universal contextual representations of text in the clinical domain. ", doi="10.2196/22508", url="http://medinform.jmir.org/2020/11/e22508/", url="http://www.ncbi.nlm.nih.gov/pubmed/33245284" } @Article{info:doi/10.2196/19761, author="Mohammadi, Ramin and Jain, Sarthak and Namin, T. Amir and Scholem Heller, Melissa and Palacholla, Ramya and Kamarthi, Sagar and Wallace, Byron", title="Predicting Unplanned Readmissions Following a Hip or Knee Arthroplasty: Retrospective Observational Study", journal="JMIR Med Inform", year="2020", month="Nov", day="27", volume="8", number="11", pages="e19761", keywords="deep learning", keywords="natural language processing", keywords="electronic health records", keywords="auto ML", keywords="30-days readmission", keywords="hip arthroplasty", keywords="knee arthroplasty", abstract="Background: Total joint replacements are high-volume and high-cost procedures that should be monitored for cost and quality control. Models that can identify patients at high risk of readmission might help reduce costs by suggesting who should be enrolled in preventive care programs. Previous models for risk prediction have relied on structured data of patients rather than clinical notes in electronic health records (EHRs). The former approach requires manual feature extraction by domain experts, which may limit the applicability of these models. Objective: This study aims to develop and evaluate a machine learning model for predicting the risk of 30-day readmission following knee and hip arthroplasty procedures. The input data for these models come from raw EHRs. We empirically demonstrate that unstructured free-text notes contain a reasonably predictive signal for this task. Methods: We performed a retrospective analysis of data from 7174 patients at Partners Healthcare collected between 2006 and 2016. These data were split into train, validation, and test sets. These data sets were used to build, validate, and test models to predict unplanned readmission within 30 days of hospital discharge. The proposed models made predictions on the basis of clinical notes, obviating the need for performing manual feature extraction by domain and machine learning experts. The notes that served as model inputs were written by physicians, nurses, pathologists, and others who diagnose and treat patients and may have their own predictions, even if these are not recorded. Results: The proposed models output readmission risk scores (propensities) for each patient. The best models (as selected on a development set) yielded an area under the receiver operating characteristic curve of 0.846 (95\% CI 82.75-87.11) for hip and 0.822 (95\% CI 80.94-86.22) for knee surgery, indicating reasonable discriminative ability. Conclusions: Machine learning models can predict which patients are at a high risk of readmission within 30 days following hip and knee arthroplasty procedures on the basis of notes in EHRs with reasonable discriminative power. Following further validation and empirical demonstration that the models realize predictive performance above that which clinical judgment may provide, such models may be used to build an automated decision support tool to help caretakers identify at-risk patients. ", doi="10.2196/19761", url="https://medinform.jmir.org/2020/11/e19761", url="http://www.ncbi.nlm.nih.gov/pubmed/33245283" } @Article{info:doi/10.2196/15293, author="Yao, Hannah and Rashidian, Sina and Dong, Xinyu and Duanmu, Hongyi and Rosenthal, N. Richard and Wang, Fusheng", title="Detection of Suicidality Among Opioid Users on Reddit: Machine Learning--Based Approach", journal="J Med Internet Res", year="2020", month="Nov", day="27", volume="22", number="11", pages="e15293", keywords="opioid epidemic", keywords="opioid-related disorders", keywords="suicide", keywords="social media", keywords="machine learning", keywords="deep learning", keywords="natural language processing", abstract="Background: In recent years, both suicide and overdose rates have been increasing. Many individuals who struggle with opioid use disorder are prone to suicidal ideation; this may often result in overdose. However, these fatal overdoses are difficult to classify as intentional or unintentional. Intentional overdose is difficult to detect, partially due to the lack of predictors and social stigmas that push individuals away from seeking help. These individuals may instead use web-based means to articulate their concerns. Objective: This study aimed to extract posts of suicidality among opioid users on Reddit using machine learning methods. The performance of the models is derivative of the data purity, and the results will help us to better understand the rationale of these users, providing new insights into individuals who are part of the opioid epidemic. Methods: Reddit posts between June 2017 and June 2018 were collected from r/suicidewatch, r/depression, a set of opioid-related subreddits, and a control subreddit set. We first classified suicidal versus nonsuicidal languages and then classified users with opioid usage versus those without opioid usage. Several traditional baselines and neural network (NN) text classifiers were trained using subreddit names as the labels and combinations of semantic inputs. We then attempted to extract out-of-sample data belonging to the intersection of suicide ideation and opioid abuse. Amazon Mechanical Turk was used to provide labels for the out-of-sample data. Results: Classification results were at least 90\% across all models for at least one combination of input; the best classifier was convolutional neural network, which obtained an F1 score of 96.6\%. When predicting out-of-sample data for posts containing both suicidal ideation and signs of opioid addiction, NN classifiers produced more false positives and traditional methods produced more false negatives, which is less desirable for predicting suicidal sentiments. Conclusions: Opioid abuse is linked to the risk of unintentional overdose and suicide risk. Social media platforms such as Reddit contain metadata that can aid machine learning and provide information at a personal level that cannot be obtained elsewhere. We demonstrate that it is possible to use NNs as a tool to predict an out-of-sample target with a model built from data sets labeled by characteristics we wish to distinguish in the out-of-sample target. ", doi="10.2196/15293", url="http://www.jmir.org/2020/11/e15293/", url="http://www.ncbi.nlm.nih.gov/pubmed/33245287" } @Article{info:doi/10.2196/20646, author="Sadeh-Sharvit, Shiri and Hollon, D. Steven", title="Leveraging the Power of Nondisruptive Technologies to Optimize Mental Health Treatment: Case Study", journal="JMIR Ment Health", year="2020", month="Nov", day="26", volume="7", number="11", pages="e20646", keywords="anxiety", keywords="behavioral health", keywords="depression", keywords="digital health", keywords="Eleos Health", keywords="mental health", keywords="natural language processing", doi="10.2196/20646", url="http://mental.jmir.org/2020/11/e20646/", url="http://www.ncbi.nlm.nih.gov/pubmed/33242025" } @Article{info:doi/10.2196/22407, author="Shen, S. Tony and Chen, Z. Aaron and Bovonratwet, Patawut and Shen, L. Carol and Su, P. Edwin", title="COVID-19--Related Internet Search Patterns Among People in the United States: Exploratory Analysis", journal="J Med Internet Res", year="2020", month="Nov", day="23", volume="22", number="11", pages="e22407", keywords="COVID-19", keywords="pandemic", keywords="internet", keywords="infodemic", keywords="infodemiology", keywords="infoveillance", keywords="natural language processing", keywords="NLP", keywords="health information", keywords="information seeking", abstract="Background: The internet is a well-known source of information that patients use to better inform their opinions and to guide their conversations with physicians during clinic visits. The novelty of the recent COVID-19 outbreak has led patients to turn more frequently to the internet to gather more information and to alleviate their concerns about the virus. Objective: The aims of the study were to (1) determine the most commonly searched phrases related to COVID-19 in the United States and (2) identify the sources of information for these web searches. Methods: Search terms related to COVID-19 were entered into Google. Questions and websites from Google web search were extracted to a database using customized software. Each question was categorized into one of 6 topics: clinical signs and symptoms, treatment, transmission, cleaning methods, activity modification, and policy. Additionally, the websites were categorized according to source: World Health Organization (WHO), Centers for Disease Control and Prevention (CDC), non-CDC government, academic, news, and other media. Results: In total, 200 questions and websites were extracted. The most common question topic was transmission (n=63, 31.5\%), followed by clinical signs and symptoms (n=54, 27.0\%) and activity modification (n=31, 15.5\%). Notably, the clinical signs and symptoms category captured questions about myths associated with the disease, such as whether consuming alcohol stops the coronavirus. The most common websites provided were maintained by the CDC, the WHO, and academic medical organizations. Collectively, these three sources accounted for 84.0\% (n=168) of the websites in our sample. Conclusions: In the United States, the most commonly searched topics related to COVID-19 were transmission, clinical signs and symptoms, and activity modification. Reassuringly, a sizable majority of internet sources provided were from major health organizations or from academic medical institutions. ", doi="10.2196/22407", url="http://www.jmir.org/2020/11/e22407/", url="http://www.ncbi.nlm.nih.gov/pubmed/33147163" } @Article{info:doi/10.2196/17903, author="Garcia-Rudolph, Alejandro and Saur{\'i}, Joan and Cegarra, Blanca and Bernabeu Guitart, Montserrat", title="Discovering the Context of People With Disabilities: Semantic Categorization Test and Environmental Factors Mapping of Word Embeddings from Reddit", journal="JMIR Med Inform", year="2020", month="Nov", day="20", volume="8", number="11", pages="e17903", keywords="disability", keywords="Reddit", keywords="social media", keywords="word2vec", keywords="semantic categorization", keywords="silhouette", keywords="activities of daily life", keywords="aspects of daily life", keywords="context", keywords="embeddings", abstract="Background: The World Health Organization's International Classification of Functioning Disability and Health (ICF) conceptualizes disability not solely as a problem that resides in the individual, but as a health experience that occurs in a context. Word embeddings build on the idea that words that occur in similar contexts tend to have similar meanings. In spite of both sharing ``context'' as a key component, word embeddings have been scarcely applied in disability. In this work, we propose social media (particularly, Reddit) to link them. Objective: The objective of our study is to train a model for generating word associations using a small dataset (a subreddit on disability) able to retrieve meaningful content. This content will be formally validated and applied to the discovery of related terms in the corpus of the disability subreddit that represent the physical, social, and attitudinal environment (as defined by a formal framework like the ICF) of people with disabilities. Methods: Reddit data were collected from pushshift.io with the pushshiftr R package as a wrapper. A word2vec model was trained with the wordVectors R package using the disability subreddit comments, and a preliminary validation was performed using a subset of Mikolov analogies. We used Van Overschelde's updated and expanded version of the Battig and Montague norms to perform a semantic categories test. Silhouette coefficients were calculated using cosine distance from the wordVectors R package. For each of the 5 ICF environmental factors (EF), we selected representative subcategories addressing different aspects of daily living (ADLs); then, for each subcategory, we identified specific terms extracted from their formal ICF definition and ran the word2vec model to generate their nearest semantic terms, validating the obtained nearest semantic terms using public evidence. Finally, we applied the model to a specific subcategory of an EF involved in a relevant use case in the field of rehabilitation. Results: We analyzed 96,314 comments posted between February 2009 and December 2019, by 10,411 Redditors. We trained word2vec and identified more than 30 analogies (eg, breakfast -- 8 am + 8 pm = dinner). The semantic categorization test showed promising results over 60 categories; for example, s(A relative)=0.562, s(A sport)=0.475 provided remarkable explanations for low s values. We mapped the representative subcategories of all EF chapters and obtained the closest terms for each, which we confirmed with publications. This allowed immediate access (? 2 seconds) to the terms related to ADLs, ranging from apps ``to know accessibility before you go'' to adapted sports (boccia). For example, for the support and relationships EF subcategory, the closest term discovered by our model was ``resilience,'' recently regarded as a key feature of rehabilitation, not yet having one unified definition. Our model discovered 10 closest terms, which we validated with publications, contributing to the ``resilience'' definition. Conclusions: This study opens up interesting opportunities for the exploration and discovery of the use of a word2vec model that has been trained with a small disability dataset, leading to immediate, accurate, and often unknown (for authors, in many cases) terms related to ADLs within the ICF framework. ", doi="10.2196/17903", url="http://medinform.jmir.org/2020/11/e17903/", url="http://www.ncbi.nlm.nih.gov/pubmed/33216006" } @Article{info:doi/10.2196/15347, author="Homan, Michael Christopher and Schrading, Nicolas J. and Ptucha, W. Raymond and Cerulli, Catherine and Ovesdotter Alm, Cecilia", title="Quantitative Methods for Analyzing Intimate Partner Violence in Microblogs: Observational Study", journal="J Med Internet Res", year="2020", month="Nov", day="19", volume="22", number="11", pages="e15347", keywords="intimate partner violence", keywords="social media", keywords="natural language processing", abstract="Background: Social media is a rich, virtually untapped source of data on the dynamics of intimate partner violence, one that is both global in scale and intimate in detail. Objective: The aim of this study is to use machine learning and other computational methods to analyze social media data for the reasons victims give for staying in or leaving abusive relationships. Methods: Human annotation, part-of-speech tagging, and machine learning predictive models, including support vector machines, were used on a Twitter data set of 8767 \#WhyIStayed and \#WhyILeft tweets each. Results: Our methods explored whether we can analyze micronarratives that include details about victims, abusers, and other stakeholders, the actions that constitute abuse, and how the stakeholders respond. Conclusions: Our findings are consistent across various machine learning methods, which correspond to observations in the clinical literature, and affirm the relevance of natural language processing and machine learning for exploring issues of societal importance in social media. ", doi="10.2196/15347", url="http://www.jmir.org/2020/11/e15347/", url="http://www.ncbi.nlm.nih.gov/pubmed/33211021" } @Article{info:doi/10.2196/18659, author="Carlson, A. Luke and Jeffery, M. Molly and Fu, Sunyang and He, Huan and McCoy, G. Rozalina and Wang, Yanshan and Hooten, Michael William and St Sauver, Jennifer and Liu, Hongfang and Fan, Jungwei", title="Characterizing Chronic Pain Episodes in Clinical Text at Two Health Care Systems: Comprehensive Annotation and Corpus Analysis", journal="JMIR Med Inform", year="2020", month="Nov", day="16", volume="8", number="11", pages="e18659", keywords="chronic pain", keywords="guideline development", keywords="knowledge representation", keywords="corpus annotation", keywords="content analysis", abstract="Background: Chronic pain affects more than 20\% of adults in the United States and is associated with substantial physical, mental, and social burden. Clinical text contains rich information about chronic pain, but no systematic appraisal has been performed to assess the electronic health record (EHR) narratives for these patients. A formal content analysis of the unstructured EHR data can inform clinical practice and research in chronic pain. Objective: We characterized individual episodes of chronic pain by annotating and analyzing EHR notes for a stratified cohort of adults with known chronic pain. Methods: We used the Rochester Epidemiology Project infrastructure to screen all residents of Olmsted County, Minnesota, for evidence of chronic pain, between January 1, 2005, and September 30, 2015. Diagnosis codes were used to assemble a cohort of 6586 chronic pain patients; people with cancer were excluded. The records of an age- and sex-stratified random sample of 62 patients from the cohort were annotated using an iteratively developed guideline. The annotated concepts included date, location, severity, causes, effects on quality of life, diagnostic procedures, medications, and other treatment modalities. Results: A total of 94 chronic pain episodes from 62 distinct patients were identified by reviewing 3272 clinical notes. Documentation was written by clinicians across a wide spectrum of specialties. Most patients (40/62, 65\%) had 1 pain episode during the study period. Interannotator agreement ranged from 0.78 to 1.00 across the annotated concepts. Some pain-related concepts (eg, body location) had 100\% (94/94) coverage among all the episodes, while others had moderate coverage (eg, effects on quality of life) (55/94, 59\%). Back pain and leg pain were the most common types of chronic pain in the annotated cohort. Musculoskeletal issues like arthritis were annotated as the most common causes. Opioids were the most commonly captured medication, while physical and occupational therapies were the most common nonpharmacological treatments. Conclusions: We systematically annotated chronic pain episodes in clinical text. The rich content analysis results revealed complexity of the chronic pain episodes and of their management, as well as the challenges in extracting pertinent information, even for humans. Despite the pilot study nature of the work, the annotation guideline and corpus should be able to serve as informative references for other institutions with shared interest in chronic pain research using EHRs. ", doi="10.2196/18659", url="http://medinform.jmir.org/2020/11/e18659/", url="http://www.ncbi.nlm.nih.gov/pubmed/33108311" } @Article{info:doi/10.2196/21252, author="Spasic, Irena and Button, Kate", title="Patient Triage by Topic Modeling of Referral Letters: Feasibility Study", journal="JMIR Med Inform", year="2020", month="Nov", day="6", volume="8", number="11", pages="e21252", keywords="natural language processing", keywords="machine learning", keywords="data science", keywords="medical informatics", keywords="computer-assisted decision making", abstract="Background: Musculoskeletal conditions are managed within primary care, but patients can be referred to secondary care if a specialist opinion is required. The ever-increasing demand for health care resources emphasizes the need to streamline care pathways with the ultimate aim of ensuring that patients receive timely and optimal care. Information contained in referral letters underpins the referral decision-making process but is yet to be explored systematically for the purposes of treatment prioritization for musculoskeletal conditions. Objective: This study aims to explore the feasibility of using natural language processing and machine learning to automate the triage of patients with musculoskeletal conditions by analyzing information from referral letters. Specifically, we aim to determine whether referral letters can be automatically assorted into latent topics that are clinically relevant, that is, considered relevant when prescribing treatments. Here, clinical relevance is assessed by posing 2 research questions. Can latent topics be used to automatically predict treatment? Can clinicians interpret latent topics as cohorts of patients who share common characteristics or experiences such as medical history, demographics, and possible treatments? Methods: We used latent Dirichlet allocation to model each referral letter as a finite mixture over an underlying set of topics and model each topic as an infinite mixture over an underlying set of topic probabilities. The topic model was evaluated in the context of automating patient triage. Given a set of treatment outcomes, a binary classifier was trained for each outcome using previously extracted topics as the input features of the machine learning algorithm. In addition, a qualitative evaluation was performed to assess the human interpretability of topics. Results: The prediction accuracy of binary classifiers outperformed the stratified random classifier by a large margin, indicating that topic modeling could be used to predict the treatment, thus effectively supporting patient triage. The qualitative evaluation confirmed the high clinical interpretability of the topic model. Conclusions: The results established the feasibility of using natural language processing and machine learning to automate triage of patients with knee or hip pain by analyzing information from their referral letters. ", doi="10.2196/21252", url="https://medinform.jmir.org/2020/11/e21252", url="http://www.ncbi.nlm.nih.gov/pubmed/33155985" } @Article{info:doi/10.2196/20826, author="Oliveira, R. Carlos and Niccolai, Patrick and Ortiz, Michelle Anette and Sheth, S. Sangini and Shapiro, D. Eugene and Niccolai, M. Linda and Brandt, A. Cynthia", title="Natural Language Processing for Surveillance of Cervical and Anal Cancer and Precancer: Algorithm Development and Split-Validation Study", journal="JMIR Med Inform", year="2020", month="Nov", day="3", volume="8", number="11", pages="e20826", keywords="natural language processing", keywords="automated data extraction", keywords="human papillomavirus", keywords="surveillance", keywords="pathology reporting", keywords="cervical cancer", keywords="anal cancer", keywords="precancer", keywords="cancer", keywords="HPV", keywords="accuracy", abstract="Background: Accurate identification of new diagnoses of human papillomavirus--associated cancers and precancers is an important step toward the development of strategies that optimize the use of human papillomavirus vaccines. The diagnosis of human papillomavirus cancers hinges on a histopathologic report, which is typically stored in electronic medical records as free-form, or unstructured, narrative text. Previous efforts to perform surveillance for human papillomavirus cancers have relied on the manual review of pathology reports to extract diagnostic information, a process that is both labor- and resource-intensive. Natural language processing can be used to automate the structuring and extraction of clinical data from unstructured narrative text in medical records and may provide a practical and effective method for identifying patients with vaccine-preventable human papillomavirus disease for surveillance and research. Objective: This study's objective was to develop and assess the accuracy of a natural language processing algorithm for the identification of individuals with cancer or precancer of the cervix and anus. Methods: A pipeline-based natural language processing algorithm was developed, which incorporated machine learning and rule-based methods to extract diagnostic elements from the narrative pathology reports. To test the algorithm's classification accuracy, we used a split-validation study design. Full-length cervical and anal pathology reports were randomly selected from 4 clinical pathology laboratories. Two study team members, blinded to the classifications produced by the natural language processing algorithm, manually and independently reviewed all reports and classified them at the document level according to 2 domains (diagnosis and human papillomavirus testing results). Using the manual review as the gold standard, the algorithm's performance was evaluated using standard measurements of accuracy, recall, precision, and F-measure. Results: The natural language processing algorithm's performance was validated on 949 pathology reports. The algorithm demonstrated accurate identification of abnormal cytology, histology, and positive human papillomavirus tests with accuracies greater than 0.91. Precision was lowest for anal histology reports (0.87, 95\% CI 0.59-0.98) and highest for cervical cytology (0.98, 95\% CI 0.95-0.99). The natural language processing algorithm missed 2 out of the 15 abnormal anal histology reports, which led to a relatively low recall (0.68, 95\% CI 0.43-0.87). Conclusions: This study outlines the development and validation of a freely available and easily implementable natural language processing algorithm that can automate the extraction and classification of clinical data from cervical and anal cytology and histology. ", doi="10.2196/20826", url="https://medinform.jmir.org/2020/11/e20826", url="http://www.ncbi.nlm.nih.gov/pubmed/32469840" } @Article{info:doi/10.2196/18273, author="Zhou, Sicheng and Zhao, Yunpeng and Bian, Jiang and Haynos, F. Ann and Zhang, Rui", title="Exploring Eating Disorder Topics on Twitter: Machine Learning Approach", journal="JMIR Med Inform", year="2020", month="Oct", day="30", volume="8", number="10", pages="e18273", keywords="eating disorders", keywords="topic modeling", keywords="text classification", keywords="social media", keywords="public health", abstract="Background: Eating disorders (EDs) are a group of mental illnesses that have an adverse effect on both mental and physical health. As social media platforms (eg, Twitter) have become an important data source for public health research, some studies have qualitatively explored the ways in which EDs are discussed on these platforms. Initial results suggest that such research offers a promising method for further understanding this group of diseases. Nevertheless, an efficient computational method is needed to further identify and analyze tweets relevant to EDs on a larger scale. Objective: This study aims to develop and validate a machine learning--based classifier to identify tweets related to EDs and to explore factors (ie, topics) related to EDs using a topic modeling method. Methods: We collected potential ED-relevant tweets using keywords from previous studies and annotated these tweets into different groups (ie, ED relevant vs irrelevant and then promotional information vs laypeople discussion). Several supervised machine learning methods, such as convolutional neural network (CNN), long short-term memory (LSTM), support vector machine, and na{\"i}ve Bayes, were developed and evaluated using annotated data. We used the classifier with the best performance to identify ED-relevant tweets and applied a topic modeling method---Correlation Explanation (CorEx)---to analyze the content of the identified tweets. To validate these machine learning results, we also collected a cohort of ED-relevant tweets on the basis of manually curated rules. Results: A total of 123,977 tweets were collected during the set period. We randomly annotated 2219 tweets for developing the machine learning classifiers. We developed a CNN-LSTM classifier to identify ED-relevant tweets published by laypeople in 2 steps: first relevant versus irrelevant (F1 score=0.89) and then promotional versus published by laypeople (F1 score=0.90). A total of 40,790 ED-relevant tweets were identified using the CNN-LSTM classifier. We also identified another set of tweets (ie, 17,632 ED-relevant and 83,557 ED-irrelevant tweets) posted by laypeople using manually specified rules. Using CorEx on all ED-relevant tweets, the topic model identified 162 topics. Overall, the coherence rate for topic modeling was 77.07\% (1264/1640), indicating a high quality of the produced topics. The topics were further reviewed and analyzed by a domain expert. Conclusions: A developed CNN-LSTM classifier could improve the efficiency of identifying ED-relevant tweets compared with the traditional manual-based method. The CorEx topic model was applied on the tweets identified by the machine learning--based classifier and the traditional manual approach separately. Highly overlapping topics were observed between the 2 cohorts of tweets. The produced topics were further reviewed by a domain expert. Some of the topics identified by the potential ED tweets may provide new avenues for understanding this serious set of disorders. ", doi="10.2196/18273", url="http://medinform.jmir.org/2020/10/e18273/", url="http://www.ncbi.nlm.nih.gov/pubmed/33124997" } @Article{info:doi/10.2196/18246, author="McDonnell, Michelle and Owen, Edward Jason and Bantum, O'Carroll Erin", title="Identification of Emotional Expression With Cancer Survivors: Validation of Linguistic Inquiry and Word Count", journal="JMIR Form Res", year="2020", month="Oct", day="30", volume="4", number="10", pages="e18246", keywords="linguistic analysis", keywords="emotion", keywords="validation", abstract="Background: Given the high volume of text-based communication such as email, Facebook, Twitter, and additional web-based and mobile apps, there are unique opportunities to use text to better understand underlying psychological constructs such as emotion. Emotion recognition in text is critical to commercial enterprises (eg, understanding the valence of customer reviews) and to current and emerging clinical applications (eg, as markers of clinical progress and risk of suicide), and the Linguistic Inquiry and Word Count (LIWC) is a commonly used program. Objective: Given the wide use of this program, the purpose of this study is to update previous validation results with two newer versions of LIWC. Methods: Tests of proportions were conducted using the total number of emotion words identified by human coders for each emotional category as the reference group. In addition to tests of proportions, we calculated F scores to evaluate the accuracy of LIWC 2001, LIWC 2007, and LIWC 2015. Results: Results indicate that LIWC 2001, LIWC 2007, and LIWC 2015 each demonstrate good sensitivity for identifying emotional expression, whereas LIWC 2007 and LIWC 2015 were significantly more sensitive than LIWC 2001 for identifying emotional expression and positive emotion; however, more recent versions of LIWC were also significantly more likely to overidentify emotional content than LIWC 2001. LIWC 2001 demonstrated significantly better precision (F score) for identifying overall emotion, negative emotion, and anxiety compared with LIWC 2007 and LIWC 2015. Conclusions: Taken together, these results suggest that LIWC 2001 most accurately reflects the emotional identification of human coders. ", doi="10.2196/18246", url="https://formative.jmir.org/2020/10/e18246", url="http://www.ncbi.nlm.nih.gov/pubmed/33124986" } @Article{info:doi/10.2196/21801, author="Izquierdo, Luis Jose and Ancochea, Julio and and Soriano, B. Joan", title="Clinical Characteristics and Prognostic Factors for Intensive Care Unit Admission of Patients With COVID-19: Retrospective Study Using Machine Learning and Natural Language Processing", journal="J Med Internet Res", year="2020", month="Oct", day="28", volume="22", number="10", pages="e21801", keywords="artificial intelligence", keywords="big data", keywords="COVID-19", keywords="electronic health records", keywords="tachypnea", keywords="SARS-CoV-2", keywords="predictive model", abstract="Background: Many factors involved in the onset and clinical course of the ongoing COVID-19 pandemic are still unknown. Although big data analytics and artificial intelligence are widely used in the realms of health and medicine, researchers are only beginning to use these tools to explore the clinical characteristics and predictive factors of patients with COVID-19. Objective: Our primary objectives are to describe the clinical characteristics and determine the factors that predict intensive care unit (ICU) admission of patients with COVID-19. Determining these factors using a well-defined population can increase our understanding of the real-world epidemiology of the disease. Methods: We used a combination of classic epidemiological methods, natural language processing (NLP), and machine learning (for predictive modeling) to analyze the electronic health records (EHRs) of patients with COVID-19. We explored the unstructured free text in the EHRs within the Servicio de Salud de Castilla-La Mancha (SESCAM) Health Care Network (Castilla-La Mancha, Spain) from the entire population with available EHRs (1,364,924 patients) from January 1 to March 29, 2020. We extracted related clinical information regarding diagnosis, progression, and outcome for all COVID-19 cases. Results: A total of 10,504 patients with a clinical or polymerase chain reaction--confirmed diagnosis of COVID-19 were identified; 5519 (52.5\%) were male, with a mean age of 58.2 years (SD 19.7). Upon admission, the most common symptoms were cough, fever, and dyspnea; however, all three symptoms occurred in fewer than half of the cases. Overall, 6.1\% (83/1353) of hospitalized patients required ICU admission. Using a machine-learning, data-driven algorithm, we identified that a combination of age, fever, and tachypnea was the most parsimonious predictor of ICU admission; patients younger than 56 years, without tachypnea, and temperature <39 degrees Celsius (or >39 {\textordmasculine}C without respiratory crackles) were not admitted to the ICU. In contrast, patients with COVID-19 aged 40 to 79 years were likely to be admitted to the ICU if they had tachypnea and delayed their visit to the emergency department after being seen in primary care. Conclusions: Our results show that a combination of easily obtainable clinical variables (age, fever, and tachypnea with or without respiratory crackles) predicts whether patients with COVID-19 will require ICU admission. ", doi="10.2196/21801", url="http://www.jmir.org/2020/10/e21801/", url="http://www.ncbi.nlm.nih.gov/pubmed/33090964" } @Article{info:doi/10.2196/19810, author="Afzal, Muhammad and Alam, Fakhare and Malik, Mahmood Khalid and Malik, M. Ghaus", title="Clinical Context--Aware Biomedical Text Summarization Using Deep Neural Network: Model Development and Validation", journal="J Med Internet Res", year="2020", month="Oct", day="23", volume="22", number="10", pages="e19810", keywords="biomedical informatics", keywords="automatic text summarization", keywords="deep neural network", keywords="word embedding", keywords="semantic similarity", keywords="brain aneurysm", abstract="Background: Automatic text summarization (ATS) enables users to retrieve meaningful evidence from big data of biomedical repositories to make complex clinical decisions. Deep neural and recurrent networks outperform traditional machine-learning techniques in areas of natural language processing and computer vision; however, they are yet to be explored in the ATS domain, particularly for medical text summarization. Objective: Traditional approaches in ATS for biomedical text suffer from fundamental issues such as an inability to capture clinical context, quality of evidence, and purpose-driven selection of passages for the summary. We aimed to circumvent these limitations through achieving precise, succinct, and coherent information extraction from credible published biomedical resources, and to construct a simplified summary containing the most informative content that can offer a review particular to clinical needs. Methods: In our proposed approach, we introduce a novel framework, termed Biomed-Summarizer, that provides quality-aware Patient/Problem, Intervention, Comparison, and Outcome (PICO)-based intelligent and context-enabled summarization of biomedical text. Biomed-Summarizer integrates the prognosis quality recognition model with a clinical context--aware model to locate text sequences in the body of a biomedical article for use in the final summary. First, we developed a deep neural network binary classifier for quality recognition to acquire scientifically sound studies and filter out others. Second, we developed a bidirectional long-short term memory recurrent neural network as a clinical context--aware classifier, which was trained on semantically enriched features generated using a word-embedding tokenizer for identification of meaningful sentences representing PICO text sequences. Third, we calculated the similarity between query and PICO text sequences using Jaccard similarity with semantic enrichments, where the semantic enrichments are obtained using medical ontologies. Last, we generated a representative summary from the high-scoring PICO sequences aggregated by study type, publication credibility, and freshness score. Results: Evaluation of the prognosis quality recognition model using a large dataset of biomedical literature related to intracranial aneurysm showed an accuracy of 95.41\% (2562/2686) in terms of recognizing quality articles. The clinical context--aware multiclass classifier outperformed the traditional machine-learning algorithms, including support vector machine, gradient boosted tree, linear regression, K-nearest neighbor, and na{\"i}ve Bayes, by achieving 93\% (16127/17341) accuracy for classifying five categories: aim, population, intervention, results, and outcome. The semantic similarity algorithm achieved a significant Pearson correlation coefficient of 0.61 (0-1 scale) on a well-known BIOSSES dataset (with 100 pair sentences) after semantic enrichment, representing an improvement of 8.9\% over baseline Jaccard similarity. Finally, we found a highly positive correlation among the evaluations performed by three domain experts concerning different metrics, suggesting that the automated summarization is satisfactory. Conclusions: By employing the proposed method Biomed-Summarizer, high accuracy in ATS was achieved, enabling seamless curation of research evidence from the biomedical literature to use for clinical decision-making. ", doi="10.2196/19810", url="http://www.jmir.org/2020/10/e19810/", url="http://www.ncbi.nlm.nih.gov/pubmed/33095174" } @Article{info:doi/10.2196/20291, author="Kang, Hongyu and Li, Jiao and Wu, Meng and Shen, Liu and Hou, Li", title="Building a Pharmacogenomics Knowledge Model Toward Precision Medicine: Case Study in Melanoma", journal="JMIR Med Inform", year="2020", month="Oct", day="21", volume="8", number="10", pages="e20291", keywords="pharmacogenomics", keywords="knowledge model", keywords="BERT--CRF model", keywords="named entity recognition", keywords="melanoma", abstract="Background: Many drugs do not work the same way for everyone owing to distinctions in their genes. Pharmacogenomics (PGx) aims to understand how genetic variants influence drug efficacy and toxicity. It is often considered one of the most actionable areas of the personalized medicine paradigm. However, little prior work has included in-depth explorations and descriptions of drug usage, dosage adjustment, and so on. Objective: We present a pharmacogenomics knowledge model to discover the hidden relationships between PGx entities such as drugs, genes, and diseases, especially details in precise medication. Methods: PGx open data such as DrugBank and RxNorm were integrated in this study, as well as drug labels published by the US Food and Drug Administration. We annotated 190 drug labels manually for entities and relationships. Based on the annotation results, we trained 3 different natural language processing models to complete entity recognition. Finally, the pharmacogenomics knowledge model was described in detail. Results: In entity recognition tasks, the Bidirectional Encoder Representations from Transformers--conditional random field model achieved better performance with micro-F1 score of 85.12\%. The pharmacogenomics knowledge model in our study included 5 semantic types: drug, gene, disease, precise medication (population, daily dose, dose form, frequency, etc), and adverse reaction. Meanwhile, 26 semantic relationships were defined in detail. Taking melanoma caused by a BRAF gene mutation into consideration, the pharmacogenomics knowledge model covered 7 related drugs and 4846 triples were established in this case. All the corpora, relationship definitions, and triples were made publically available. Conclusions: We highlighted the pharmacogenomics knowledge model as a scalable framework for clinicians and clinical pharmacists to adjust drug dosage according to patient-specific genetic variation, and for pharmaceutical researchers to develop new drugs. In the future, a series of other antitumor drugs and automatic relation extractions will be taken into consideration to further enhance our framework with more PGx linked data. ", doi="10.2196/20291", url="http://medinform.jmir.org/2020/10/e20291/", url="http://www.ncbi.nlm.nih.gov/pubmed/33084582" } @Article{info:doi/10.2196/22550, author="Almog, Adar Yasmeen and Rai, Angshu and Zhang, Patrick and Moulaison, Amanda and Powell, Ross and Mishra, Anirban and Weinberg, Kerry and Hamilton, Celeste and Oates, Mary and McCloskey, Eugene and Cummings, R. Steven", title="Deep Learning With Electronic Health Records for Short-Term Fracture Risk Identification: Crystal Bone Algorithm Development and Validation", journal="J Med Internet Res", year="2020", month="Oct", day="16", volume="22", number="10", pages="e22550", keywords="fracture", keywords="bone", keywords="osteoporosis", keywords="low bone mass", keywords="prediction", keywords="natural language processing", keywords="NLP", keywords="machine learning", keywords="deep learning", keywords="artificial intelligence", keywords="AI", keywords="electronic health record", keywords="EHR", abstract="Background: Fractures as a result of osteoporosis and low bone mass are common and give rise to significant clinical, personal, and economic burden. Even after a fracture occurs, high fracture risk remains widely underdiagnosed and undertreated. Common fracture risk assessment tools utilize a subset of clinical risk factors for prediction, and often require manual data entry. Furthermore, these tools predict risk over the long term and do not explicitly provide short-term risk estimates necessary to identify patients likely to experience a fracture in the next 1-2 years. Objective: The goal of this study was to develop and evaluate an algorithm for the identification of patients at risk of fracture in a subsequent 1- to 2-year period. In order to address the aforementioned limitations of current prediction tools, this approach focused on a short-term timeframe, automated data entry, and the use of longitudinal data to inform the predictions. Methods: Using retrospective electronic health record data from over 1,000,000 patients, we developed Crystal Bone, an algorithm that applies machine learning techniques from natural language processing to the temporal nature of patient histories to generate short-term fracture risk predictions. Similar to how language models predict the next word in a given sentence or the topic of a document, Crystal Bone predicts whether a patient's future trajectory might contain a fracture event, or whether the signature of the patient's journey is similar to that of a typical future fracture patient. A holdout set with 192,590 patients was used to validate accuracy. Experimental baseline models and human-level performance were used for comparison. Results: The model accurately predicted 1- to 2-year fracture risk for patients aged over 50 years (area under the receiver operating characteristics curve [AUROC] 0.81). These algorithms outperformed the experimental baselines (AUROC 0.67) and showed meaningful improvements when compared to retrospective approximation of human-level performance by correctly identifying 9649 of 13,765 (70\%) at-risk patients who did not receive any preventative bone-health-related medical interventions from their physicians. Conclusions: These findings indicate that it is possible to use a patient's unique medical history as it changes over time to predict the risk of short-term fracture. Validating and applying such a tool within the health care system could enable automated and widespread prediction of this risk and may help with identification of patients at very high risk of fracture. ", doi="10.2196/22550", url="http://www.jmir.org/2020/10/e22550/", url="http://www.ncbi.nlm.nih.gov/pubmed/32956069" } @Article{info:doi/10.2196/21383, author="Osadchiy, Vadim and Jiang, Tommy and Mills, Nelson Jesse and Eleswarapu, Venkata Sriram", title="Low Testosterone on Social Media: Application of Natural Language Processing to Understand Patients' Perceptions of Hypogonadism and Its Treatment", journal="J Med Internet Res", year="2020", month="Oct", day="7", volume="22", number="10", pages="e21383", keywords="hypogonadism", keywords="natural language processing", keywords="Reddit", keywords="social media", keywords="testosterone replacement therapy", keywords="Twitter", abstract="Background: Despite the results of the Testosterone Trials, physicians remain uncomfortable treating men with hypogonadism. Discouraged, men increasingly turn to social media to discuss medical concerns. Objective: The goal of the research was to apply natural language processing (NLP) techniques to social media posts for identification of themes of discussion regarding low testosterone and testosterone replacement therapy (TRT) in order to inform how physicians may better evaluate and counsel patients. Methods: We retrospectively extracted posts from the Reddit community r/Testosterone from December 2015 through May 2019. We applied an NLP technique called the meaning extraction method with principal component analysis (MEM/PCA) to computationally derive discussion themes. We then performed a prospective analysis of Twitter data (tweets) that contained the terms low testosterone, low T, and testosterone replacement from June through September 2019. Results: A total of 199,335 Reddit posts and 6659 tweets were analyzed. MEM/PCA revealed dominant themes of discussion: symptoms of hypogonadism, seeing a doctor, results of laboratory tests, derogatory comments and insults, TRT medications, and cardiovascular risk. More than 25\% of Reddit posts contained the term doctor, and more than 5\% urologist. Conclusions: This study represents the first NLP evaluation of the social media landscape surrounding hypogonadism and TRT. Although physicians traditionally limit their practices to within their clinic walls, the ubiquity of social media demands that physicians understand what patients discuss online. Physicians may do well to bring up online discussions during clinic consultations for low testosterone to pull back the curtain and dispel myths. ", doi="10.2196/21383", url="https://www.jmir.org/2020/10/e21383", url="http://www.ncbi.nlm.nih.gov/pubmed/33026354" } @Article{info:doi/10.2196/19618, author="Teng, Shasha and Khong, Wei Kok and Pahlevan Sharif, Saeed and Ahmed, Amr", title="YouTube Video Comments on Healthy Eating: Descriptive and Predictive Analysis", journal="JMIR Public Health Surveill", year="2020", month="Oct", day="1", volume="6", number="4", pages="e19618", keywords="YouTube comments", keywords="text mining", keywords="healthy eating", keywords="clustering", keywords="structural equation modeling", abstract="Background: Poor nutrition and food selection lead to health issues such as obesity, cardiovascular disease, diabetes, and cancer. This study of YouTube comments aims to uncover patterns of food choices and the factors driving them, in addition to exploring the sentiments of healthy eating in networked communities. Objective: The objectives of the study are to explore the determinants, motives, and barriers to healthy eating behaviors in online communities and provide insight into YouTube video commenters' perceptions and sentiments of healthy eating through text mining techniques. Methods: This paper applied text mining techniques to identify and categorize meaningful healthy eating determinants. These determinants were then incorporated into hypothetically defined constructs that reflect their thematic and sentimental nature in order to test our proposed model using a variance-based structural equation modeling procedure. Results: With a dataset of 4654 comments extracted from YouTube videos in the context of Malaysia, we apply a text mining method to analyze the perceptions and behavior of healthy eating. There were 10 clusters identified with regard to food ingredients, food price, food choice, food portion, well-being, cooking, and culture in the concept of healthy eating. The structural equation modeling results show that clusters are positively associated with healthy eating with all P values less than .001, indicating a statistical significance of the study results. People hold complex and multifaceted beliefs about healthy eating in the context of YouTube videos. Fruits and vegetables are the epitome of healthy foods. Despite having a favorable perception of healthy eating, people may not purchase commonly recognized healthy food if it has a premium price. People associate healthy eating with weight concerns. Food taste, variety, and availability are identified as reasons why Malaysians cannot act on eating healthily. Conclusions: This study offers significant value to the existing literature of health-related studies by investigating the rich and diverse social media data gleaned from YouTube. This research integrated text mining analytics with predictive modeling techniques to identify thematic constructs and analyze the sentiments of healthy eating. ", doi="10.2196/19618", url="https://publichealth.jmir.org/2020/4/e19618", url="http://www.ncbi.nlm.nih.gov/pubmed/33001036" } @Article{info:doi/10.2196/22845, author="Zhang, Jingwen and Oh, Jung Yoo and Lange, Patrick and Yu, Zhou and Fukuoka, Yoshimi", title="Artificial Intelligence Chatbot Behavior Change Model for Designing Artificial Intelligence Chatbots to Promote Physical Activity and a Healthy Diet: Viewpoint", journal="J Med Internet Res", year="2020", month="Sep", day="30", volume="22", number="9", pages="e22845", keywords="chatbot", keywords="conversational agent", keywords="artificial intelligence", keywords="physical activity", keywords="diet", keywords="intervention", keywords="behavior change", keywords="natural language processing", keywords="communication", abstract="Background: Chatbots empowered by artificial intelligence (AI) can increasingly engage in natural conversations and build relationships with users. Applying AI chatbots to lifestyle modification programs is one of the promising areas to develop cost-effective and feasible behavior interventions to promote physical activity and a healthy diet. Objective: The purposes of this perspective paper are to present a brief literature review of chatbot use in promoting physical activity and a healthy diet, describe the AI chatbot behavior change model our research team developed based on extensive interdisciplinary research, and discuss ethical principles and considerations. Methods: We conducted a preliminary search of studies reporting chatbots for improving physical activity and/or diet in four databases in July 2020. We summarized the characteristics of the chatbot studies and reviewed recent developments in human-AI communication research and innovations in natural language processing. Based on the identified gaps and opportunities, as well as our own clinical and research experience and findings, we propose an AI chatbot behavior change model. Results: Our review found a lack of understanding around theoretical guidance and practical recommendations on designing AI chatbots for lifestyle modification programs. The proposed AI chatbot behavior change model consists of the following four components to provide such guidance: (1) designing chatbot characteristics and understanding user background; (2) building relational capacity; (3) building persuasive conversational capacity; and (4) evaluating mechanisms and outcomes. The rationale and evidence supporting the design and evaluation choices for this model are presented in this paper. Conclusions: As AI chatbots become increasingly integrated into various digital communications, our proposed theoretical framework is the first step to conceptualize the scope of utilization in health behavior change domains and to synthesize all possible dimensions of chatbot features to inform intervention design and evaluation. There is a need for more interdisciplinary work to continue developing AI techniques to improve a chatbot's relational and persuasive capacities to change physical activity and diet behaviors with strong ethical principles. ", doi="10.2196/22845", url="https://www.jmir.org/2020/9/e22845", url="http://www.ncbi.nlm.nih.gov/pubmed/32996892" } @Article{info:doi/10.2196/17424, author="Norman, P. Kim and Govindjee, Anita and Norman, R. Seth and Godoy, Michael and Cerrone, L. Kimberlie and Kieschnick, W. Dustin and Kassler, William", title="Natural Language Processing Tools for Assessing Progress and Outcome of Two Veteran Populations: Cohort Study From a Novel Online Intervention for Posttraumatic Growth", journal="JMIR Form Res", year="2020", month="Sep", day="23", volume="4", number="9", pages="e17424", keywords="natural language analysis", keywords="emotional tone", keywords="personality", keywords="values", keywords="PTSD", keywords="military sexual trauma", keywords="online interventions", keywords="internet-based cognitive behavioral therapy", keywords="narrative therapy", keywords="mindfulness", abstract="Background: Over 100 million Americans lack affordable access to behavioral health care. Among these, military veterans are an especially vulnerable population. Military veterans require unique behavioral health services that can address military experiences and challenges transitioning to the civilian sector. Real-world programs to help veterans successfully transition to civilian life must build a sense of community, have the ability to scale, and be able to reach the many veterans who cannot or will not access care. Digitally based behavioral health initiatives have emerged within the past few years to improve this access to care. Our novel behavioral health intervention teaches mindfulness-based cognitive behavioral therapy and narrative therapy using peer support groups as guides, with human-facilitated asynchronous online discussions. Our study applies natural language processing (NLP) analytics to assess effectiveness of our online intervention in order to test whether NLP may provide insights and detect nuances of personal change and growth that are not currently captured by subjective symptom measures. Objective: This paper aims to study the value of NLP analytics in assessing progress and outcomes among combat veterans and military sexual assault survivors participating in novel online interventions for posttraumatic growth. Methods: IBM Watson and Linguistic Inquiry and Word Count tools were applied to the narrative writings of combat veterans and survivors of military sexual trauma who participated in novel online peer-supported group therapies for posttraumatic growth. Participants watched videos, practiced skills such as mindfulness meditation, told their stories through narrative writing, and participated in asynchronous, facilitated online discussions with peers. The writings, including online postings, by the 16 participants who completed the program were analyzed after completion of the program. Results: Our results suggest that NLP can provide valuable insights on shifts in personality traits, personal values, needs, and emotional tone in an evaluation of our novel online behavioral health interventions. Emotional tone analysis demonstrated significant decreases in fear and anxiety, sadness, and disgust, as well as increases in joy. Significant effects were found for personal values and needs, such as needing or desiring closeness and helping others, and for personality traits of openness, conscientiousness, extroversion, agreeableness, and neuroticism (ie, emotional range). Participants also demonstrated increases in authenticity and clout (confidence) of expression. NLP results were generally supported by qualitative observations and analysis, structured data, and course feedback. Conclusions: The aggregate of results in our study suggest that our behavioral health intervention was effective and that NLP can provide valuable insights on shifts in personality traits, personal values, and needs, as well as measure changes in emotional tone. NLP's sensitivity to changes in emotional tone, values, and personality strengths suggests the efficacy of NLP as a leading indicator of treatment progress. ", doi="10.2196/17424", url="https://formative.jmir.org/2020/9/e17424", url="http://www.ncbi.nlm.nih.gov/pubmed/32769074" } @Article{info:doi/10.2196/19516, author="Dolci, Elisa and Sch{\"a}rer, Barbara and Grossmann, Nicole and Musy, Naima Sarah and Z{\'u}{\~n}iga, Franziska and Bachnick, Stefanie and Simon, Michael", title="Automated Fall Detection Algorithm With Global Trigger Tool, Incident Reports, Manual Chart Review, and Patient-Reported Falls: Algorithm Development and Validation With a Retrospective Diagnostic Accuracy Study", journal="J Med Internet Res", year="2020", month="Sep", day="21", volume="22", number="9", pages="e19516", keywords="falls", keywords="adverse event", keywords="harm", keywords="algorithm", keywords="natural language processing", abstract="Background: Falls are common adverse events in hospitals, frequently leading to additional health costs due to prolonged stays and extra care. Therefore, reliable fall detection is vital to develop and test fall prevention strategies. However, conventional methods---voluntary incident reports and manual chart reviews---are error-prone and time consuming, respectively. Using a search algorithm to examine patients' electronic health record data and flag fall indicators offers an inexpensive, sensitive, cost-effective alternative. Objective: This study's purpose was to develop a fall detection algorithm for use with electronic health record data, then to evaluate it alongside the Global Trigger Tool, incident reports, a manual chart review, and patient-reported falls. Methods: Conducted on 2 campuses of a large hospital system in Switzerland, this retrospective diagnostic accuracy study consisted of 2 substudies: the first, targeting 240 patients, for algorithm development and the second, targeting 298 patients, for validation. In the development study, we compared the new algorithm's in-hospital fall rates with those indicated by the Global Trigger Tool and incident reports; in the validation study, we compared the algorithm's in-hospital fall rates with those from patient-reported falls and manual chart review. We compared the various methods by calculating sensitivity, specificity, and predictive values. Results: Twenty in-hospital falls were discovered in the development study sample. Of these, the algorithm detected 19 (sensitivity 95\%), the Global Trigger Tool detected 18 (90\%), and incident reports detected 14 (67\%). Of the 15 falls found in the validation sample, the algorithm identified all 15 (100\%), the manual chart review identified 14 (93\%), and the patient-reported fall measure identified 5 (33\%). Owing to relatively high numbers of false positives based on falls present on admission, the algorithm's positive predictive values were 50\% (development sample) and 47\% (validation sample). Instead of requiring 10 minutes per case for a full manual review or 20 minutes to apply the Global Trigger Tool, the algorithm requires only a few seconds, after which only the positive results (roughly 11\% of the full case number) require review. Conclusions: The newly developed electronic health record algorithm demonstrated very high sensitivity for fall detection. Applied in near real time, the algorithm can record in-hospital falls events effectively and help to develop and test fall prevention measures. ", doi="10.2196/19516", url="http://www.jmir.org/2020/9/e19516/", url="http://www.ncbi.nlm.nih.gov/pubmed/32955445" } @Article{info:doi/10.2196/19133, author="Ferrario, Andrea and Demiray, Burcu and Yordanova, Kristina and Luo, Minxia and Martin, Mike", title="Social Reminiscence in Older Adults' Everyday Conversations: Automated Detection Using Natural Language Processing and Machine Learning", journal="J Med Internet Res", year="2020", month="Sep", day="15", volume="22", number="9", pages="e19133", keywords="aging", keywords="dementia", keywords="reminiscence", keywords="real-life conversations", keywords="electronically activated recorder (EAR)", keywords="natural language processing", keywords="machine learning", keywords="imbalanced learning", abstract="Background: Reminiscence is the act of thinking or talking about personal experiences that occurred in the past. It is a central task of old age that is essential for healthy aging, and it serves multiple functions, such as decision-making and introspection, transmitting life lessons, and bonding with others. The study of social reminiscence behavior in everyday life can be used to generate data and detect reminiscence from general conversations. Objective: The aims of this original paper are to (1) preprocess coded transcripts of conversations in German of older adults with natural language processing (NLP), and (2) implement and evaluate learning strategies using different NLP features and machine learning algorithms to detect reminiscence in a corpus of transcripts. Methods: The methods in this study comprise (1) collecting and coding of transcripts of older adults' conversations in German, (2) preprocessing transcripts to generate NLP features (bag-of-words models, part-of-speech tags, pretrained German word embeddings), and (3) training machine learning models to detect reminiscence using random forests, support vector machines, and adaptive and extreme gradient boosting algorithms. The data set comprises 2214 transcripts, including 109 transcripts with reminiscence. Due to class imbalance in the data, we introduced three learning strategies: (1) class-weighted learning, (2) a meta-classifier consisting of a voting ensemble, and (3) data augmentation with the Synthetic Minority Oversampling Technique (SMOTE) algorithm. For each learning strategy, we performed cross-validation on a random sample of the training data set of transcripts. We computed the area under the curve (AUC), the average precision (AP), precision, recall, as well as F1 score and specificity measures on the test data, for all combinations of NLP features, algorithms, and learning strategies. Results: Class-weighted support vector machines on bag-of-words features outperformed all other classifiers (AUC=0.91, AP=0.56, precision=0.5, recall=0.45, F1=0.48, specificity=0.98), followed by support vector machines on SMOTE-augmented data and word embeddings features (AUC=0.89, AP=0.54, precision=0.35, recall=0.59, F1=0.44, specificity=0.94). For the meta-classifier strategy, adaptive and extreme gradient boosting algorithms trained on word embeddings and bag-of-words outperformed all other classifiers and NLP features; however, the performance of the meta-classifier learning strategy was lower compared to other strategies, with highly imbalanced precision-recall trade-offs. Conclusions: This study provides evidence of the applicability of NLP and machine learning pipelines for the automated detection of reminiscence in older adults' everyday conversations in German. The methods and findings of this study could be relevant for designing unobtrusive computer systems for the real-time detection of social reminiscence in the everyday life of older adults and classifying their functions. With further improvements, these systems could be deployed in health interventions aimed at improving older adults' well-being by promoting self-reflection and suggesting coping strategies to be used in the case of dysfunctional reminiscence cases, which can undermine physical and mental health. ", doi="10.2196/19133", url="http://www.jmir.org/2020/9/e19133/", url="http://www.ncbi.nlm.nih.gov/pubmed/32866108" } @Article{info:doi/10.2196/20701, author="Schachner, Theresa and Keller, Roman and v Wangenheim, Florian", title="Artificial Intelligence-Based Conversational Agents for Chronic Conditions: Systematic Literature Review", journal="J Med Internet Res", year="2020", month="Sep", day="14", volume="22", number="9", pages="e20701", keywords="artificial intelligence", keywords="conversational agents", keywords="chatbots", keywords="healthcare", keywords="chronic diseases", keywords="systematic literature review", abstract="Background: A rising number of conversational agents or chatbots are equipped with artificial intelligence (AI) architecture. They are increasingly prevalent in health care applications such as those providing education and support to patients with chronic diseases, one of the leading causes of death in the 21st century. AI-based chatbots enable more effective and frequent interactions with such patients. Objective: The goal of this systematic literature review is to review the characteristics, health care conditions, and AI architectures of AI-based conversational agents designed specifically for chronic diseases. Methods: We conducted a systematic literature review using PubMed MEDLINE, EMBASE, PyscInfo, CINAHL, ACM Digital Library, ScienceDirect, and Web of Science. We applied a predefined search strategy using the terms ``conversational agent,'' ``healthcare,'' ``artificial intelligence,'' and their synonyms. We updated the search results using Google alerts, and screened reference lists for other relevant articles. We included primary research studies that involved the prevention, treatment, or rehabilitation of chronic diseases, involved a conversational agent, and included any kind of AI architecture. Two independent reviewers conducted screening and data extraction, and Cohen kappa was used to measure interrater agreement.A narrative approach was applied for data synthesis. Results: The literature search found 2052 articles, out of which 10 papers met the inclusion criteria. The small number of identified studies together with the prevalence of quasi-experimental studies (n=7) and prevailing prototype nature of the chatbots (n=7) revealed the immaturity of the field. The reported chatbots addressed a broad variety of chronic diseases (n=6), showcasing a tendency to develop specialized conversational agents for individual chronic conditions. However, there lacks comparison of these chatbots within and between chronic diseases. In addition, the reported evaluation measures were not standardized, and the addressed health goals showed a large range. Together, these study characteristics complicated comparability and open room for future research. While natural language processing represented the most used AI technique (n=7) and the majority of conversational agents allowed for multimodal interaction (n=6), the identified studies demonstrated broad heterogeneity, lack of depth of reported AI techniques and systems, and inconsistent usage of taxonomy of the underlying AI software, further aggravating comparability and generalizability of study results. Conclusions: The literature on AI-based conversational agents for chronic conditions is scarce and mostly consists of quasi-experimental studies with chatbots in prototype stage that use natural language processing and allow for multimodal user interaction. Future research could profit from evidence-based evaluation of the AI-based conversational agents and comparison thereof within and between different chronic health conditions. Besides increased comparability, the quality of chatbots developed for specific chronic conditions and their subsequent impact on the target patients could be enhanced by more structured development and standardized evaluation processes. ", doi="10.2196/20701", url="http://www.jmir.org/2020/9/e20701/", url="http://www.ncbi.nlm.nih.gov/pubmed/32924957" } @Article{info:doi/10.2196/19975, author="Benson, Ryzen and Hu, Mengke and Chen, T. Annie and Nag, Subhadeep and Zhu, Shu-Hong and Conway, Mike", title="Investigating the Attitudes of Adolescents and Young Adults Towards JUUL: Computational Study Using Twitter Data", journal="JMIR Public Health Surveill", year="2020", month="Sep", day="2", volume="6", number="3", pages="e19975", keywords="JUUL", keywords="electronic cigarettes", keywords="smoking cessation", keywords="natural language processing", keywords="NLP", keywords="Twitter", keywords="underage tobacco use", keywords="tobacco", keywords="e-cig", keywords="ENDS", keywords="electronic nicotine delivery system", keywords="machine learning", keywords="infodemiology", keywords="infoveillance", keywords="social media", keywords="public health", abstract="Background: Increases in electronic nicotine delivery system (ENDS) use among high school students from 2017 to 2019 appear to be associated with the increasing popularity of the ENDS device JUUL. Objective: We employed a content analysis approach in conjunction with natural language processing methods using Twitter data to understand salient themes regarding JUUL use on Twitter, sentiment towards JUUL, and underage JUUL use. Methods: Between July 2018 and August 2019, 11,556 unique tweets containing a JUUL-related keyword were collected. We manually annotated 4000 tweets for JUUL-related themes of use and sentiment. We used 3 machine learning algorithms to classify positive and negative JUUL sentiments as well as underage JUUL mentions. Results: Of the annotated tweets, 78.80\% (3152/4000) contained a specific mention of JUUL. Only 1.43\% (45/3152) of tweets mentioned using JUUL as a method of smoking cessation, and only 6.85\% (216/3152) of tweets mentioned the potential health effects of JUUL use. Of the machine learning methods used, the random forest classifier was the best performing algorithm among all 3 classification tasks (ie, positive sentiment, negative sentiment, and underage JUUL mentions). Conclusions: Our findings suggest that a vast majority of Twitter users are not using JUUL to aid in smoking cessation nor do they mention the potential health benefits or detriments of JUUL use. Using machine learning algorithms to identify tweets containing underage JUUL mentions can support the timely surveillance of JUUL habits and opinions, further assisting youth-targeted public health intervention strategies. ", doi="10.2196/19975", url="https://publichealth.jmir.org/2020/3/e19975", url="http://www.ncbi.nlm.nih.gov/pubmed/32876579" } @Article{info:doi/10.2196/17830, author="M{\"u}ller, Martin and Schneider, Manuel and Salath{\'e}, Marcel and Vayena, Effy", title="Assessing Public Opinion on CRISPR-Cas9: Combining Crowdsourcing and Deep Learning", journal="J Med Internet Res", year="2020", month="Aug", day="31", volume="22", number="8", pages="e17830", keywords="CRISPR", keywords="natural language processing", keywords="sentiment analysis", keywords="digital methods", keywords="infodemiology", keywords="infoveillace", keywords="empirical bioethics", keywords="social media", abstract="Background: The discovery of the CRISPR-Cas9--based gene editing method has opened unprecedented new potential for biological and medical engineering, sparking a growing public debate on both the potential and dangers of CRISPR applications. Given the speed of technology development and the almost instantaneous global spread of news, it is important to follow evolving debates without much delay and in sufficient detail, as certain events may have a major long-term impact on public opinion and later influence policy decisions. Objective: Social media networks such as Twitter have shown to be major drivers of news dissemination and public discourse. They provide a vast amount of semistructured data in almost real-time and give direct access to the content of the conversations. We can now mine and analyze such data quickly because of recent developments in machine learning and natural language processing. Methods: Here, we used Bidirectional Encoder Representations from Transformers (BERT), an attention-based transformer model, in combination with statistical methods to analyze the entirety of all tweets ever published on CRISPR since the publication of the first gene editing application in 2013. Results: We show that the mean sentiment of tweets was initially very positive, but began to decrease over time, and that this decline was driven by rare peaks of strong negative sentiments. Due to the high temporal resolution of the data, we were able to associate these peaks with specific events and to observe how trending topics changed over time. Conclusions: Overall, this type of analysis can provide valuable and complementary insights into ongoing public debates, extending the traditional empirical bioethics toolset. ", doi="10.2196/17830", url="http://www.jmir.org/2020/8/e17830/", url="http://www.ncbi.nlm.nih.gov/pubmed/32865499" } @Article{info:doi/10.2196/20794, author="Mackey, Ken Tim and Li, Jiawei and Purushothaman, Vidya and Nali, Matthew and Shah, Neal and Bardier, Cortni and Cai, Mingxiang and Liang, Bryan", title="Big Data, Natural Language Processing, and Deep Learning to Detect and Characterize Illicit COVID-19 Product Sales: Infoveillance Study on Twitter and Instagram", journal="JMIR Public Health Surveill", year="2020", month="Aug", day="25", volume="6", number="3", pages="e20794", keywords="COVID-19", keywords="coronavirus", keywords="infectious disease", keywords="social media", keywords="surveillance", keywords="infoveillance", keywords="infodemiology", keywords="infodemic", keywords="fraud", keywords="cybercrime", abstract="Background: The coronavirus disease (COVID-19) pandemic is perhaps the greatest global health challenge of the last century. Accompanying this pandemic is a parallel ``infodemic,'' including the online marketing and sale of unapproved, illegal, and counterfeit COVID-19 health products including testing kits, treatments, and other questionable ``cures.'' Enabling the proliferation of this content is the growing ubiquity of internet-based technologies, including popular social media platforms that now have billions of global users. Objective: This study aims to collect, analyze, identify, and enable reporting of suspected fake, counterfeit, and unapproved COVID-19--related health care products from Twitter and Instagram. Methods: This study is conducted in two phases beginning with the collection of COVID-19--related Twitter and Instagram posts using a combination of web scraping on Instagram and filtering the public streaming Twitter application programming interface for keywords associated with suspect marketing and sale of COVID-19 products. The second phase involved data analysis using natural language processing (NLP) and deep learning to identify potential sellers that were then manually annotated for characteristics of interest. We also visualized illegal selling posts on a customized data dashboard to enable public health intelligence. Results: We collected a total of 6,029,323 tweets and 204,597 Instagram posts filtered for terms associated with suspect marketing and sale of COVID-19 health products from March to April for Twitter and February to May for Instagram. After applying our NLP and deep learning approaches, we identified 1271 tweets and 596 Instagram posts associated with questionable sales of COVID-19--related products. Generally, product introduction came in two waves, with the first consisting of questionable immunity-boosting treatments and a second involving suspect testing kits. We also detected a low volume of pharmaceuticals that have not been approved for COVID-19 treatment. Other major themes detected included products offered in different languages, various claims of product credibility, completely unsubstantiated products, unapproved testing modalities, and different payment and seller contact methods. Conclusions: Results from this study provide initial insight into one front of the ``infodemic'' fight against COVID-19 by characterizing what types of health products, selling claims, and types of sellers were active on two popular social media platforms at earlier stages of the pandemic. This cybercrime challenge is likely to continue as the pandemic progresses and more people seek access to COVID-19 testing and treatment. This data intelligence can help public health agencies, regulatory authorities, legitimate manufacturers, and technology platforms better remove and prevent this content from harming the public. ", doi="10.2196/20794", url="http://publichealth.jmir.org/2020/3/e20794/", url="http://www.ncbi.nlm.nih.gov/pubmed/32750006" } @Article{info:doi/10.2196/22590, author="Hung, Man and Lauren, Evelyn and Hon, S. Eric and Birmingham, C. Wendy and Xu, Julie and Su, Sharon and Hon, D. Shirley and Park, Jungweon and Dang, Peter and Lipsky, S. Martin", title="Social Network Analysis of COVID-19 Sentiments: Application of Artificial Intelligence", journal="J Med Internet Res", year="2020", month="Aug", day="18", volume="22", number="8", pages="e22590", keywords="COVID-19", keywords="coronavirus", keywords="sentiment", keywords="social network", keywords="Twitter", keywords="infodemiology", keywords="infodemic", keywords="pandemic", keywords="crisis", keywords="public health", keywords="business economy", keywords="artificial intelligence", abstract="Background: The coronavirus disease (COVID-19) pandemic led to substantial public discussion. Understanding these discussions can help institutions, governments, and individuals navigate the pandemic. Objective: The aim of this study is to analyze discussions on Twitter related to COVID-19 and to investigate the sentiments toward COVID-19. Methods: This study applied machine learning methods in the field of artificial intelligence to analyze data collected from Twitter. Using tweets originating exclusively in the United States and written in English during the 1-month period from March 20 to April 19, 2020, the study examined COVID-19--related discussions. Social network and sentiment analyses were also conducted to determine the social network of dominant topics and whether the tweets expressed positive, neutral, or negative sentiments. Geographic analysis of the tweets was also conducted. Results: There were a total of 14,180,603 likes, 863,411 replies, 3,087,812 retweets, and 641,381 mentions in tweets during the study timeframe. Out of 902,138 tweets analyzed, sentiment analysis classified 434,254 (48.2\%) tweets as having a positive sentiment, 187,042 (20.7\%) as neutral, and 280,842 (31.1\%) as negative. The study identified 5 dominant themes among COVID-19--related tweets: health care environment, emotional support, business economy, social change, and psychological stress. Alaska, Wyoming, New Mexico, Pennsylvania, and Florida were the states expressing the most negative sentiment while Vermont, North Dakota, Utah, Colorado, Tennessee, and North Carolina conveyed the most positive sentiment. Conclusions: This study identified 5 prevalent themes of COVID-19 discussion with sentiments ranging from positive to negative. These themes and sentiments can clarify the public's response to COVID-19 and help officials navigate the pandemic. ", doi="10.2196/22590", url="http://www.jmir.org/2020/8/e22590/", url="http://www.ncbi.nlm.nih.gov/pubmed/32750001" } @Article{info:doi/10.2196/20773, author="Neuraz, Antoine and Lerner, Ivan and Digan, William and Paris, Nicolas and Tsopra, Rosy and Rogier, Alice and Baudoin, David and Cohen, Bretonnel Kevin and Burgun, Anita and Garcelon, Nicolas and Rance, Bastien and ", title="Natural Language Processing for Rapid Response to Emergent Diseases: Case Study of Calcium Channel Blockers and Hypertension in the COVID-19 Pandemic", journal="J Med Internet Res", year="2020", month="Aug", day="14", volume="22", number="8", pages="e20773", keywords="medication information", keywords="natural language processing", keywords="electronic health records", keywords="COVID-19", keywords="public health", keywords="response", keywords="emergent disease", keywords="informatics", abstract="Background: A novel disease poses special challenges for informatics solutions. Biomedical informatics relies for the most part on structured data, which require a preexisting data or knowledge model; however, novel diseases do not have preexisting knowledge models. In an emergent epidemic, language processing can enable rapid conversion of unstructured text to a novel knowledge model. However, although this idea has often been suggested, no opportunity has arisen to actually test it in real time. The current coronavirus disease (COVID-19) pandemic presents such an opportunity. Objective: The aim of this study was to evaluate the added value of information from clinical text in response to emergent diseases using natural language processing (NLP). Methods: We explored the effects of long-term treatment by calcium channel blockers on the outcomes of COVID-19 infection in patients with high blood pressure during in-patient hospital stays using two sources of information: data available strictly from structured electronic health records (EHRs) and data available through structured EHRs and text mining. Results: In this multicenter study involving 39 hospitals, text mining increased the statistical power sufficiently to change a negative result for an adjusted hazard ratio to a positive one. Compared to the baseline structured data, the number of patients available for inclusion in the study increased by 2.95 times, the amount of available information on medications increased by 7.2 times, and the amount of additional phenotypic information increased by 11.9 times. Conclusions: In our study, use of calcium channel blockers was associated with decreased in-hospital mortality in patients with COVID-19 infection. This finding was obtained by quickly adapting an NLP pipeline to the domain of the novel disease; the adapted pipeline still performed sufficiently to extract useful information. When that information was used to supplement existing structured data, the sample size could be increased sufficiently to see treatment effects that were not previously statistically detectable. ", doi="10.2196/20773", url="http://www.jmir.org/2020/8/e20773/", url="http://www.ncbi.nlm.nih.gov/pubmed/32759101" } @Article{info:doi/10.2196/18855, author="Baxter, L. Sally and Klie, R. Adam and Radha Saseendrakumar, Bharanidharan and Ye, Y. Gordon and Hogarth, Michael", title="Text Processing for Detection of Fungal Ocular Involvement in Critical Care Patients: Cross-Sectional Study", journal="J Med Internet Res", year="2020", month="Aug", day="14", volume="22", number="8", pages="e18855", keywords="fungemia", keywords="fungal endophthalmitis", keywords="fungal ocular involvement", keywords="electronic health records", keywords="diagnosis codes", keywords="regular expressions", keywords="natural language processing", keywords="unstructured data", abstract="Background: Fungal ocular involvement can develop in patients with fungal bloodstream infections and can be vision-threatening. Ocular involvement has become less common in the current era of improved antifungal therapies. Retrospectively determining the prevalence of fungal ocular involvement is important for informing clinical guidelines, such as the need for routine ophthalmologic consultations. However, manual retrospective record review to detect cases is time-consuming. Objective: This study aimed to determine the prevalence of fungal ocular involvement in a critical care database using both structured and unstructured electronic health record (EHR) data. Methods: We queried microbiology data from 46,467 critical care patients over 12 years (2000-2012) from the Medical Information Mart for Intensive Care III (MIMIC-III) to identify 265 patients with culture-proven fungemia. For each fungemic patient, demographic data, fungal species present in blood culture, and risk factors for fungemia (eg, presence of indwelling catheters, recent major surgery, diabetes, immunosuppressed status) were ascertained. All structured diagnosis codes and free-text narrative notes associated with each patient's hospitalization were also extracted. Screening for fungal endophthalmitis was performed using two approaches: (1) by querying a wide array of eye- and vision-related diagnosis codes, and (2) by utilizing a custom regular expression pipeline to identify and collate relevant text matches pertaining to fungal ocular involvement. Both approaches were validated using manual record review. The main outcome measure was the documentation of any fungal ocular involvement. Results: In total, 265 patients had culture-proven fungemia, with Candida albicans (n=114, 43\%) and Candida glabrata (n=74, 28\%) being the most common fungal species in blood culture. The in-hospital mortality rate was 121 (46\%). In total, 7 patients were identified as having eye- or vision-related diagnosis codes, none of whom had fungal endophthalmitis based on record review. There were 26,830 free-text narrative notes associated with these 265 patients. A regular expression pipeline based on relevant terms yielded possible matches in 683 notes from 108 patients. Subsequent manual record review again demonstrated that no patients had fungal ocular involvement. Therefore, the prevalence of fungal ocular involvement in this cohort was 0\%. Conclusions: MIMIC-III contained no cases of ocular involvement among fungemic patients, consistent with prior studies reporting low rates of ocular involvement in fungemia. This study demonstrates an application of natural language processing to expedite the review of narrative notes. This approach is highly relevant for ophthalmology, where diagnoses are often based on physical examination findings that are documented within clinical notes. ", doi="10.2196/18855", url="https://www.jmir.org/2020/8/e18855", url="http://www.ncbi.nlm.nih.gov/pubmed/32795984" } @Article{info:doi/10.2196/18350, author="Nasralah, Tareq and El-Gayar, Omar and Wang, Yong", title="Social Media Text Mining Framework for Drug Abuse: Development and Validation Study With an Opioid Crisis Case Analysis", journal="J Med Internet Res", year="2020", month="Aug", day="13", volume="22", number="8", pages="e18350", keywords="drug abuse", keywords="social media", keywords="infodemiology", keywords="infoveillance", keywords="text mining", keywords="opioid crisis", abstract="Background: Social media are considered promising and viable sources of data for gaining insights into various disease conditions and patients' attitudes, behaviors, and medications. They can be used to recognize communication and behavioral themes of problematic use of prescription drugs. However, mining and analyzing social media data have challenges and limitations related to topic deduction and data quality. As a result, we need a structured approach to analyze social media content related to drug abuse in a manner that can mitigate the challenges and limitations surrounding the use of such data. Objective: This study aimed to develop and evaluate a framework for mining and analyzing social media content related to drug abuse. The framework is designed to mitigate challenges and limitations related to topic deduction and data quality in social media data analytics for drug abuse. Methods: The proposed framework started with defining different terms related to the keywords, categories, and characteristics of the topic of interest. We then used the Crimson Hexagon platform to collect data based on a search query informed by a drug abuse ontology developed using the identified terms. We subsequently preprocessed the data and examined the quality using an evaluation matrix. Finally, a suitable data analysis approach could be used to analyze the collected data. Results: The framework was evaluated using the opioid epidemic as a drug abuse case analysis. We demonstrated the applicability of the proposed framework to identify public concerns toward the opioid epidemic and the most discussed topics on social media related to opioids. The results from the case analysis showed that the framework could improve the discovery and identification of topics in social media domains characterized by a plethora of highly diverse terms and lack of a commonly available dictionary or language by the community, such as in the case of opioid and drug abuse. Conclusions: The proposed framework addressed the challenges related to topic detection and data quality. We demonstrated the applicability of the proposed framework to identify the common concerns toward the opioid epidemic and the most discussed topics on social media related to opioids. ", doi="10.2196/18350", url="https://www.jmir.org/2020/8/e18350", url="http://www.ncbi.nlm.nih.gov/pubmed/32788147" } @Article{info:doi/10.2196/19222, author="Min, Kyoung-Bok and Song, Sung-Hee and Min, Jin-Young", title="Topic Modeling of Social Networking Service Data on Occupational Accidents in Korea: Latent Dirichlet Allocation Analysis", journal="J Med Internet Res", year="2020", month="Aug", day="13", volume="22", number="8", pages="e19222", keywords="topic modeling", keywords="occupational accident", keywords="social media", keywords="knowledge", keywords="workplace", keywords="accident", keywords="model", keywords="analysis", keywords="safety", abstract="Background: In most industrialized societies, regulations, inspections, insurance, and legal options are established to support workers who suffer injury, disease, or death in relation to their work; in practice, these resources are imperfect or even unavailable due to workplace or employer obstruction. Thus, limitations exist to identify unmet needs in occupational safety and health information. Objective: The aim of this study was to explore hidden issues related to occupational accidents by examining social network services (SNS) data using topic modeling. Methods: Based on the results of a Google search for the phrases occupational accident, industrial accident and occupational diseases, a total of 145 websites were selected. From among these websites, we collected 15,244 documents on queries related to occupational accidents between 2002 and 2018. To transform unstructured text into structure data, natural language processing of the Korean language was conducted. We performed the latent Dirichlet allocation (LDA) as a topic model using a Python library. A time-series linear regression analysis was also conducted to identify yearly trends for the given documents. Results: The results of the LDA model showed 14 topics with 3 themes: workers' compensation benefits (Theme 1), illicit agreements with the employer (Theme 2), and fatal and non-fatal injuries and vulnerable workers (Theme 3). Theme 1 represented the largest cluster (52.2\%) of the collected documents and included keywords related to workers' compensation (ie, company, occupational injury, insurance, accident, approval, and compensation) and keywords describing specific compensation benefits such as medical expense benefits, temporary incapacity benefits, and disability benefits. In the yearly trend, Theme 1 gradually decreased; however, other themes showed an overall increasing pattern. Certain queries (ie, musculoskeletal system, critical care, and foreign workers) showed no significant variation in the number of queries. Conclusions: We conducted LDA analysis of SNS data of occupational accident--related queries and discovered that the primary concerns of workers posting about occupational injuries and diseases were workers' compensation benefits, fatal and non-fatal injuries, vulnerable workers, and illicit agreements with employers. While traditional systems focus mainly on quantitative monitoring of occupational accidents, qualitative aspects formulated by topic modeling from unstructured SNS queries may be valuable to address inequalities and improve occupational health and safety. ", doi="10.2196/19222", url="http://www.jmir.org/2020/8/e19222/", url="http://www.ncbi.nlm.nih.gov/pubmed/32663156" } @Article{info:doi/10.2196/16862, author="Petersen, Lee Curtis and Halter, Ryan and Kotz, David and Loeb, Lorie and Cook, Summer and Pidgeon, Dawna and Christensen, C. Brock and Batsis, A. John", title="Using Natural Language Processing and Sentiment Analysis to Augment Traditional User-Centered Design: Development and Usability Study", journal="JMIR Mhealth Uhealth", year="2020", month="Aug", day="7", volume="8", number="8", pages="e16862", keywords="aged adults", keywords="sarcopenia", keywords="remote sensing technology", keywords="telemedicine", keywords="mobile phone", abstract="Background: Sarcopenia, defined as the age-associated loss of muscle mass and strength, can be effectively mitigated through resistance-based physical activity. With compliance at approximately 40\% for home-based exercise prescriptions, implementing a remote sensing system would help patients and clinicians to better understand treatment progress and increase compliance. The inclusion of end users in the development of mobile apps for remote-sensing systems can ensure that they are both user friendly and facilitate compliance. With advancements in natural language processing (NLP), there is potential for these methods to be used with data collected through the user-centered design process. Objective: This study aims to develop a mobile app for a novel device through a user-centered design process with both older adults and clinicians while exploring whether data collected through this process can be used in NLP and sentiment analysis Methods: Through a user-centered design process, we conducted semistructured interviews during the development of a geriatric-friendly Bluetooth-connected resistance exercise band app. We interviewed patients and clinicians at weeks 0, 5, and 10 of the app development. Each semistructured interview consisted of heuristic evaluations, cognitive walkthroughs, and observations. We used the Bing sentiment library for a sentiment analysis of interview transcripts and then applied NLP-based latent Dirichlet allocation (LDA) topic modeling to identify differences and similarities in patient and clinician participant interviews. Sentiment was defined as the sum of positive and negative words (each word with a +1 or ?1 value). To assess utility, we used quantitative assessment questionnaires---System Usability Scale (SUS) and Usefulness, Satisfaction, and Ease of use (USE). Finally, we used multivariate linear models---adjusting for age, sex, subject group (clinician vs patient), and development---to explore the association between sentiment analysis and SUS and USE outcomes. Results: The mean age of the 22 participants was 68 (SD 14) years, and 17 (77\%) were female. The overall mean SUS and USE scores were 66.4 (SD 13.6) and 41.3 (SD 15.2), respectively. Both patients and clinicians provided valuable insights into the needs of older adults when designing and building an app. The mean positive-negative sentiment per sentence was 0.19 (SD 0.21) and 0.47 (SD 0.21) for patient and clinician interviews, respectively. We found a positive association with positive sentiment in an interview and SUS score ({\ss}=1.38; 95\% CI 0.37 to 2.39; P=.01). There was no significant association between sentiment and the USE score. The LDA analysis found no overlap between patients and clinicians in the 8 identified topics. Conclusions: Involving patients and clinicians allowed us to design and build an app that is user friendly for older adults while supporting compliance. This is the first analysis using NLP and usability questionnaires in the quantification of user-centered design of technology for older adults. ", doi="10.2196/16862", url="https://mhealth.jmir.org/2020/8/e16862", url="http://www.ncbi.nlm.nih.gov/pubmed/32540843" } @Article{info:doi/10.2196/17638, author="Wang, Jian and Chen, Xiaoyu and Zhang, Yu and Zhang, Yijia and Wen, Jiabin and Lin, Hongfei and Yang, Zhihao and Wang, Xin", title="Document-Level Biomedical Relation Extraction Using Graph Convolutional Network and Multihead Attention: Algorithm Development and Validation", journal="JMIR Med Inform", year="2020", month="Jul", day="31", volume="8", number="7", pages="e17638", keywords="biomedical relation extraction", keywords="dependency graph", keywords="multihead attention", keywords="graph convolutional network", abstract="Background: Automatically extracting relations between chemicals and diseases plays an important role in biomedical text mining. Chemical-disease relation (CDR) extraction aims at extracting complex semantic relationships between entities in documents, which contain intrasentence and intersentence relations. Most previous methods did not consider dependency syntactic information across the sentences, which are very valuable for the relations extraction task, in particular, for extracting the intersentence relations accurately. Objective: In this paper, we propose a novel end-to-end neural network based on the graph convolutional network (GCN) and multihead attention, which makes use of the dependency syntactic information across the sentences to improve CDR extraction task. Methods: To improve the performance of intersentence relation extraction, we constructed a document-level dependency graph to capture the dependency syntactic information across sentences. GCN is applied to capture the feature representation of the document-level dependency graph. The multihead attention mechanism is employed to learn the relatively important context features from different semantic subspaces. To enhance the input representation, the deep context representation is used in our model instead of traditional word embedding. Results: We evaluate our method on CDR corpus. The experimental results show that our method achieves an F-measure of 63.5\%, which is superior to other state-of-the-art methods. In the intrasentence level, our method achieves a precision, recall, and F-measure of 59.1\%, 81.5\%, and 68.5\%, respectively. In the intersentence level, our method achieves a precision, recall, and F-measure of 47.8\%, 52.2\%, and 49.9\%, respectively. Conclusions: The GCN model can effectively exploit the across sentence dependency information to improve the performance of intersentence CDR extraction. Both the deep context representation and multihead attention are helpful in the CDR extraction task. ", doi="10.2196/17638", url="https://medinform.jmir.org/2020/7/e17638", url="http://www.ncbi.nlm.nih.gov/pubmed/32459636" } @Article{info:doi/10.2196/17784, author="Obeid, S. Jihad and Dahne, Jennifer and Christensen, Sean and Howard, Samuel and Crawford, Tami and Frey, J. Lewis and Stecker, Tracy and Bunnell, E. Brian", title="Identifying and Predicting Intentional Self-Harm in Electronic Health Record Clinical Notes: Deep Learning Approach", journal="JMIR Med Inform", year="2020", month="Jul", day="30", volume="8", number="7", pages="e17784", keywords="machine learning", keywords="deep learning", keywords="suicide", keywords="suicide, attempted", keywords="electronic health records", keywords="natural language processing", abstract="Background: Suicide is an important public health concern in the United States and around the world. There has been significant work examining machine learning approaches to identify and predict intentional self-harm and suicide using existing data sets. With recent advances in computing, deep learning applications in health care are gaining momentum. Objective: This study aimed to leverage the information in clinical notes using deep neural networks (DNNs) to (1) improve the identification of patients treated for intentional self-harm and (2) predict future self-harm events. Methods: We extracted clinical text notes from electronic health records (EHRs) of 835 patients with International Classification of Diseases (ICD) codes for intentional self-harm and 1670 matched controls who never had any intentional self-harm ICD codes. The data were divided into training and holdout test sets. We tested a number of algorithms on clinical notes associated with the intentional self-harm codes using the training set, including several traditional bag-of-words--based models and 2 DNN models: a convolutional neural network (CNN) and a long short-term memory model. We also evaluated the predictive performance of the DNNs on a subset of patients who had clinical notes 1 to 6 months before the first intentional self-harm event. Finally, we evaluated the impact of a pretrained model using Word2vec (W2V) on performance. Results: The area under the receiver operating characteristic curve (AUC) for the CNN on the phenotyping task, that is, the detection of intentional self-harm in clinical notes concurrent with the events was 0.999, with an F1 score of 0.985. In the predictive task, the CNN achieved the highest performance with an AUC of 0.882 and an F1 score of 0.769. Although pretraining with W2V shortened the DNN training time, it did not improve performance. Conclusions: The strong performance on the first task, namely, phenotyping based on clinical notes, suggests that such models could be used effectively for surveillance of intentional self-harm in clinical text in an EHR. The modest performance on the predictive task notwithstanding, the results using DNN models on clinical text alone are competitive with other reports in the literature using risk factors from structured EHR data. ", doi="10.2196/17784", url="https://medinform.jmir.org/2020/7/e17784", url="http://www.ncbi.nlm.nih.gov/pubmed/32729840" } @Article{info:doi/10.2196/17652, author="Pan, Xiaoyi and Chen, Boyu and Weng, Heng and Gong, Yongyi and Qu, Yingying", title="Temporal Expression Classification and Normalization From Chinese Narrative Clinical Texts: Pattern Learning Approach", journal="JMIR Med Inform", year="2020", month="Jul", day="27", volume="8", number="7", pages="e17652", keywords="Temporal expression extraction", keywords="Temporal expression normalization", keywords="Machine learning", keywords="Heuristic rule", keywords="Pattern learning", keywords="Clinical text", abstract="Background: Temporal information frequently exists in the representation of the disease progress, prescription, medication, surgery progress, or discharge summary in narrative clinical text. The accurate extraction and normalization of temporal expressions can positively boost the analysis and understanding of narrative clinical texts to promote clinical research and practice. Objective: The goal of the study was to propose a novel approach for extracting and normalizing temporal expressions from Chinese narrative clinical text. Methods: TNorm, a rule-based and pattern learning-based approach, has been developed for automatic temporal expression extraction and normalization from unstructured Chinese clinical text data. TNorm consists of three stages: extraction, classification, and normalization. It applies a set of heuristic rules and automatically generated patterns for temporal expression identification and extraction of clinical texts. Then, it collects the features of extracted temporal expressions for temporal type prediction and classification by using machine learning algorithms. Finally, the features are combined with the rule-based and a pattern learning-based approach to normalize the extracted temporal expressions. Results: The evaluation dataset is a set of narrative clinical texts in Chinese containing 1459 discharge summaries of a domestic Grade A Class 3 hospital. The results show that TNorm, combined with temporal expressions extraction and temporal types prediction, achieves a precision of 0.8491, a recall of 0.8328, and a F1 score of 0.8409 in temporal expressions normalization. Conclusions: This study illustrates an automatic approach, TNorm, that extracts and normalizes temporal expression from Chinese narrative clinical texts. TNorm was evaluated on the basis of discharge summary data, and results demonstrate its effectiveness on temporal expression normalization. ", doi="10.2196/17652", url="https://medinform.jmir.org/2020/7/e17652", url="http://www.ncbi.nlm.nih.gov/pubmed/32716307" } @Article{info:doi/10.2196/18599, author="Choudhury, Avishek and Asan, Onur", title="Role of Artificial Intelligence in Patient Safety Outcomes: Systematic Literature Review", journal="JMIR Med Inform", year="2020", month="Jul", day="24", volume="8", number="7", pages="e18599", keywords="artificial intelligence", keywords="patient safety", keywords="drug safety", keywords="clinical error", keywords="report analysis", keywords="natural language processing", keywords="drug", keywords="review", abstract="Background: Artificial intelligence (AI) provides opportunities to identify the health risks of patients and thus influence patient safety outcomes. Objective: The purpose of this systematic literature review was to identify and analyze quantitative studies utilizing or integrating AI to address and report clinical-level patient safety outcomes. Methods: We restricted our search to the PubMed, PubMed Central, and Web of Science databases to retrieve research articles published in English between January 2009 and August 2019. We focused on quantitative studies that reported positive, negative, or intermediate changes in patient safety outcomes using AI apps, specifically those based on machine-learning algorithms and natural language processing. Quantitative studies reporting only AI performance but not its influence on patient safety outcomes were excluded from further review. Results: We identified 53 eligible studies, which were summarized concerning their patient safety subcategories, the most frequently used AI, and reported performance metrics. Recognized safety subcategories were clinical alarms (n=9; mainly based on decision tree models), clinical reports (n=21; based on support vector machine models), and drug safety (n=23; mainly based on decision tree models). Analysis of these 53 studies also identified two essential findings: (1) the lack of a standardized benchmark and (2) heterogeneity in AI reporting. Conclusions: This systematic review indicates that AI-enabled decision support systems, when implemented correctly, can aid in enhancing patient safety by improving error detection, patient stratification, and drug management. Future work is still needed for robust validation of these systems in prospective and real-world clinical environments to understand how well AI can predict safety outcomes in health care settings. ", doi="10.2196/18599", url="http://medinform.jmir.org/2020/7/e18599/", url="http://www.ncbi.nlm.nih.gov/pubmed/32706688" } @Article{info:doi/10.2196/17653, author="Sun, Haixia and Xiao, Jin and Zhu, Wei and He, Yilong and Zhang, Sheng and Xu, Xiaowei and Hou, Li and Li, Jiao and Ni, Yuan and Xie, Guotong", title="Medical Knowledge Graph to Enhance Fraud, Waste, and Abuse Detection on Claim Data: Model Development and Performance Evaluation", journal="JMIR Med Inform", year="2020", month="Jul", day="23", volume="8", number="7", pages="e17653", keywords="medical knowledge graph", keywords="FWA detection", abstract="Background: Fraud, Waste, and Abuse (FWA) detection is a significant yet challenging problem in the health insurance industry. An essential step in FWA detection is to check whether the medication is clinically reasonable with respect to the diagnosis. Currently, human experts with sufficient medical knowledge are required to perform this task. To reduce the cost, insurance inspectors tend to build an intelligent system to detect suspicious claims with inappropriate diagnoses/medications automatically. Objective: The aim of this study was to develop an automated method for making use of a medical knowledge graph to identify clinically suspected claims for FWA detection. Methods: First, we identified the medical knowledge that is required to assess the clinical rationality of the claims. We then searched for data sources that contain information to build such knowledge. In this study, we focused on Chinese medical knowledge. Second, we constructed a medical knowledge graph using unstructured knowledge. We used a deep learning--based method to extract the entities and relationships from the knowledge sources and developed a multilevel similarity matching approach to conduct the entity linking. To guarantee the quality of the medical knowledge graph, we involved human experts to review the entity and relationships with lower confidence. These reviewed results could be used to further improve the machine-learning models. Finally, we developed the rules to identify the suspected claims by reasoning according to the medical knowledge graph. Results: We collected 185,796 drug labels from the China Food and Drug Administration, 3390 types of disease information from medical textbooks (eg, symptoms, diagnosis, treatment, and prognosis), and information from 5272 examinations as the knowledge sources. The final medical knowledge graph includes 1,616,549 nodes and 5,963,444 edges. We designed three knowledge graph reasoning rules to identify three kinds of inappropriate diagnosis/medications. The experimental results showed that the medical knowledge graph helps to detect 70\% of the suspected claims. Conclusions: The medical knowledge graph--based method successfully identified suspected cases of FWA (such as fraud diagnosis, excess prescription, and irrational prescription) from the claim documents, which helped to improve the efficiency of claim processing. ", doi="10.2196/17653", url="http://medinform.jmir.org/2020/7/e17653/", url="http://www.ncbi.nlm.nih.gov/pubmed/32706714" } @Article{info:doi/10.2196/20443, author="Li, Xiaoying and Lin, Xin and Ren, Huiling and Guo, Jinjing", title="Ontological Organization and Bioinformatic Analysis of Adverse Drug Reactions From Package Inserts: Development and Usability Study", journal="J Med Internet Res", year="2020", month="Jul", day="20", volume="22", number="7", pages="e20443", keywords="ontology", keywords="adverse drug reactions", keywords="package inserts", keywords="information retrieval", keywords="natural language processing", keywords="bioinformatics", keywords="drug", keywords="adverse events", keywords="machine-understandable knowledge", keywords="clinical applications", abstract="Background: Licensed drugs may cause unexpected adverse reactions in patients, resulting in morbidity, risk of mortality, therapy disruptions, and prolonged hospital stays. Officially approved drug package inserts list the adverse reactions identified from randomized controlled clinical trials with high evidence levels and worldwide postmarketing surveillance. Formal representation of the adverse drug reaction (ADR) enclosed in semistructured package inserts will enable deep recognition of side effects and rational drug use, substantially reduce morbidity, and decrease societal costs. Objective: This paper aims to present an ontological organization of traceable ADR information extracted from licensed package inserts. In addition, it will provide machine-understandable knowledge for bioinformatics analysis, semantic retrieval, and intelligent clinical applications. Methods: Based on the essential content of package inserts, a generic ADR ontology model is proposed from two dimensions (and nine subdimensions), covering the ADR information and medication instructions. This is followed by a customized natural language processing method programmed with Python to retrieve the relevant information enclosed in package inserts. After the biocuration and identification of retrieved data from the package insert, an ADR ontology is automatically built for further bioinformatic analysis. Results: We collected 165 package inserts of quinolone drugs from the National Medical Products Administration and other drug databases in China, and built a specialized ADR ontology containing 2879 classes and 15,711 semantic relations. For each quinolone drug, the reported ADR information and medication instructions have been logically represented and formally organized in an ADR ontology. To demonstrate its usage, the source data were further bioinformatically analyzed. For example, the number of drug-ADR triples and major ADRs associated with each active ingredient were recorded. The 10 ADRs most frequently observed among quinolones were identified and categorized based on the 18 categories defined in the proposal. The occurrence frequency, severity, and ADR mitigation method explicitly stated in package inserts were also analyzed, as well as the top 5 specific populations with contraindications for quinolone drugs. Conclusions: Ontological representation and organization using officially approved information from drug package inserts enables the identification and bioinformatic analysis of adverse reactions caused by a specific drug with regard to predefined ADR ontology classes and semantic relations. The resulting ontology-based ADR knowledge source classifies drug-specific adverse reactions, and supports a better understanding of ADRs and safer prescription of medications. ", doi="10.2196/20443", url="https://www.jmir.org/2020/7/e20443", url="http://www.ncbi.nlm.nih.gov/pubmed/32706718" } @Article{info:doi/10.2196/18055, author="Abdalla, Mohamed and Abdalla, Moustafa and Hirst, Graeme and Rudzicz, Frank", title="Exploring the Privacy-Preserving Properties of Word Embeddings: Algorithmic Validation Study", journal="J Med Internet Res", year="2020", month="Jul", day="15", volume="22", number="7", pages="e18055", keywords="privacy", keywords="data anonymization", keywords="natural language processing", keywords="personal health records", abstract="Background: Word embeddings are dense numeric vectors used to represent language in neural networks. Until recently, there had been no publicly released embeddings trained on clinical data. Our work is the first to study the privacy implications of releasing these models. Objective: This paper aims to demonstrate that traditional word embeddings created on clinical corpora that have been deidentified by removing personal health information (PHI) can nonetheless be exploited to reveal sensitive patient information. Methods: We used embeddings created from 400,000 doctor-written consultation notes and experimented with 3 common word embedding methods to explore the privacy-preserving properties of each. Results: We found that if publicly released embeddings are trained from a corpus anonymized by PHI removal, it is possible to reconstruct up to 68.5\% (n=411/600) of the full names that remain in the deidentified corpus and associated sensitive information to specific patients in the corpus from which the embeddings were created. We also found that the distance between the word vector representation of a patient's name and a diagnostic billing code is informative and differs significantly from the distance between the name and a code not billed for that patient. Conclusions: Special care must be taken when sharing word embeddings created from clinical texts, as current approaches may compromise patient privacy. If PHI removal is used for anonymization before traditional word embeddings are trained, it is possible to attribute sensitive information to patients who have not been fully deidentified by the (necessarily imperfect) removal algorithms. A promising alternative (ie, anonymization by PHI replacement) may avoid these flaws. Our results are timely and critical, as an increasing number of researchers are pushing for publicly available health data. ", doi="10.2196/18055", url="https://www.jmir.org/2020/7/e18055", url="http://www.ncbi.nlm.nih.gov/pubmed/32673230" } @Article{info:doi/10.2196/18417, author="Dandala, Bharath and Joopudi, Venkata and Tsou, Ching-Huei and Liang, J. Jennifer and Suryanarayanan, Parthasarathy", title="Extraction of Information Related to Drug Safety Surveillance From Electronic Health Record Notes: Joint Modeling of Entities and Relations Using Knowledge-Aware Neural Attentive Models", journal="JMIR Med Inform", year="2020", month="Jul", day="10", volume="8", number="7", pages="e18417", keywords="electronic health records", keywords="adverse drug events", keywords="natural language processing", keywords="deep learning", keywords="information extraction", keywords="adverse drug reaction reporting systems", keywords="named entity recognition", keywords="relation extraction", abstract="Background: An adverse drug event (ADE) is commonly defined as ``an injury resulting from medical intervention related to a drug.'' Providing information related to ADEs and alerting caregivers at the point of care can reduce the risk of prescription and diagnostic errors and improve health outcomes. ADEs captured in structured data in electronic health records (EHRs) as either coded problems or allergies are often incomplete, leading to underreporting. Therefore, it is important to develop capabilities to process unstructured EHR data in the form of clinical notes, which contain a richer documentation of a patient's ADE. Several natural language processing (NLP) systems have been proposed to automatically extract information related to ADEs. However, the results from these systems showed that significant improvement is still required for the automatic extraction of ADEs from clinical notes. Objective: This study aims to improve the automatic extraction of ADEs and related information such as drugs, their attributes, and reason for administration from the clinical notes of patients. Methods: This research was conducted using discharge summaries from the Medical Information Mart for Intensive Care III (MIMIC-III) database obtained through the 2018 National NLP Clinical Challenges (n2c2) annotated with drugs, drug attributes (ie, strength, form, frequency, route, dosage, duration), ADEs, reasons, and relations between drugs and other entities. We developed a deep learning--based system for extracting these drug-centric concepts and relations simultaneously using a joint method enhanced with contextualized embeddings, a position-attention mechanism, and knowledge representations. The joint method generated different sentence representations for each drug, which were then used to extract related concepts and relations simultaneously. Contextualized representations trained on the MIMIC-III database were used to capture context-sensitive meanings of words. The position-attention mechanism amplified the benefits of the joint method by generating sentence representations that capture long-distance relations. Knowledge representations were obtained from graph embeddings created using the US Food and Drug Administration Adverse Event Reporting System database to improve relation extraction, especially when contextual clues were insufficient. Results: Our system achieved new state-of-the-art results on the n2c2 data set, with significant improvements in recognizing crucial drug?reason (F1=0.650 versus F1=0.579) and drug?ADE (F1=0.490 versus F1=0.476) relations. Conclusions: This study presents a system for extracting drug-centric concepts and relations that outperformed current state-of-the-art results and shows that contextualized embeddings, position-attention mechanisms, and knowledge graph embeddings effectively improve deep learning--based concepts and relation extraction. This study demonstrates the potential for deep learning--based methods to help extract real-world evidence from unstructured patient data for drug safety surveillance. ", doi="10.2196/18417", url="https://medinform.jmir.org/2020/7/e18417", url="http://www.ncbi.nlm.nih.gov/pubmed/32459650" } @Article{info:doi/10.2196/18652, author="Lin, Ye and Hong, Alicia Y. and Henson, S. Bradley and Stevenson, D. Robert and Hong, Simon and Lyu, Tianchu and Liang, Chen", title="Assessing Patient Experience and Healthcare Quality of Dental Care Using Patient Online Reviews in the United States: Mixed Methods Study", journal="J Med Internet Res", year="2020", month="Jul", day="7", volume="22", number="7", pages="e18652", keywords="dental care", keywords="healthcare quality", keywords="consumer health informatics", keywords="patient online reviews", keywords="patient review websites", keywords="natural language processing", abstract="Background: Over the last two decades, patient review websites have emerged as an essential online platform for doctor ratings and reviews. Recent studies suggested the significance of such websites as a data source for patients to choose doctors for healthcare providers to learn and improve from patient feedback and to foster a culture of trust and transparency between patients and healthcare providers. However, as compared to other medical specialties, studies of online patient reviews that focus on dentists in the United States remain absent. Objective: This study sought to understand to what extent online patient reviews can provide performance feedbacks that reflect dental care quality and patient experience. Methods: Using mixed informatics methods incorporating statistics, natural language processing, and domain expert evaluation, we analyzed the online patient reviews of 204,751 dentists extracted from HealthGrades with two specific aims. First, we examined the associations between patient ratings and a variety of dentist characteristics. Second, we identified topics from patient reviews that can be mapped to the national assessment of dental patient experience measured by the Patient Experience Measures from the Consumer Assessment of Healthcare Providers and Systems (CAHPS) Dental Plan Survey. Results: Higher ratings were associated with female dentists (t71881=2.45, P<.01, g=0.01), dentists at a younger age (F7, 107128=246.97, P<.001, g=0.11), and those whose patients experienced a short wait time (F4, 150055=10417.77, P<0.001, g=0.18). We also identified several topics that corresponded to CAHPS measures, including discomfort (eg, painful/painless root canal or deep cleaning), and ethics (eg, high-pressure sales, and unnecessary dental work). Conclusions: These findings suggest that online patient reviews could be used as a data source for understanding the patient experience and healthcare quality in dentistry. ", doi="10.2196/18652", url="https://www.jmir.org/2020/7/e18652", url="http://www.ncbi.nlm.nih.gov/pubmed/32673240" } @Article{info:doi/10.2196/16760, author="Jones, H. Kerina and Ford, M. Elizabeth and Lea, Nathan and Griffiths, J. Lucy and Hassan, Lamiece and Heys, Sharon and Squires, Emma and Nenadic, Goran", title="Toward the Development of Data Governance Standards for Using Clinical Free-Text Data in Health Research: Position Paper", journal="J Med Internet Res", year="2020", month="Jun", day="29", volume="22", number="6", pages="e16760", keywords="ethical", keywords="legal", keywords="social implications", keywords="public engagement", keywords="free-text data", keywords="information governance", abstract="Background: Clinical free-text data (eg, outpatient letters or nursing notes) represent a vast, untapped source of rich information that, if more accessible for research, would clarify and supplement information coded in structured data fields. Data usually need to be deidentified or anonymized before they can be reused for research, but there is a lack of established guidelines to govern effective deidentification and use of free-text information and avoid damaging data utility as a by-product. Objective: This study aimed to develop recommendations for the creation of data governance standards to integrate with existing frameworks for personal data use, to enable free-text data to be used safely for research for patient and public benefit. Methods: We outlined data protection legislation and regulations relating to the United Kingdom for context and conducted a rapid literature review and UK-based case studies to explore data governance models used in working with free-text data. We also engaged with stakeholders, including text-mining researchers and the general public, to explore perceived barriers and solutions in working with clinical free-text. Results: We proposed a set of recommendations, including the need for authoritative guidance on data governance for the reuse of free-text data, to ensure public transparency in data flows and uses, to treat deidentified free-text data as potentially identifiable with use limited to accredited data safe havens, and to commit to a culture of continuous improvement to understand the relationships between the efficacy of deidentification and reidentification risks, so this can be communicated to all stakeholders. Conclusions: By drawing together the findings of a combination of activities, we present a position paper to contribute to the development of data governance standards for the reuse of clinical free-text data for secondary purposes. While working in accordance with existing data governance frameworks, there is a need for further work to take forward the recommendations we have proposed, with commitment and investment, to assure and expand the safe reuse of clinical free-text data for public benefit. ", doi="10.2196/16760", url="http://www.jmir.org/2020/6/e16760/", url="http://www.ncbi.nlm.nih.gov/pubmed/32597785" } @Article{info:doi/10.2196/17650, author="Li, Genghao and Li, Bing and Huang, Langlin and Hou, Sibing", title="Automatic Construction of a Depression-Domain Lexicon Based on Microblogs: Text Mining Study", journal="JMIR Med Inform", year="2020", month="Jun", day="23", volume="8", number="6", pages="e17650", keywords="depression detection", keywords="depression diagnosis", keywords="social media", keywords="automatic construction", keywords="domain-specific lexicon", keywords="depression lexicon", keywords="label propagation", abstract="Background: According to a World Health Organization report in 2017, there was almost one patient with depression among every 20 people in China. However, the diagnosis of depression is usually difficult in terms of clinical detection owing to slow observation, high cost, and patient resistance. Meanwhile, with the rapid emergence of social networking sites, people tend to share their daily life and disclose inner feelings online frequently, making it possible to effectively identify mental conditions using the rich text information. There are many achievements regarding an English web-based corpus, but for research in China so far, the extraction of language features from web-related depression signals is still in a relatively primary stage. Objective: The purpose of this study was to propose an effective approach for constructing a depression-domain lexicon. This lexicon will contain language features that could help identify social media users who potentially have depression. Our study also compared the performance of detection with and without our lexicon. Methods: We autoconstructed a depression-domain lexicon using Word2Vec, a semantic relationship graph, and the label propagation algorithm. These two methods combined performed well in a specific corpus during construction. The lexicon was obtained based on 111,052 Weibo microblogs from 1868 users who were depressed or nondepressed. During depression detection, we considered six features, and we used five classification methods to test the detection performance. Results: The experiment results showed that in terms of the F1 value, our autoconstruction method performed 1\% to 6\% better than baseline approaches and was more effective and steadier. When applied to detection models like logistic regression and support vector machine, our lexicon helped the models outperform by 2\% to 9\% and was able to improve the final accuracy of potential depression detection. Conclusions: Our depression-domain lexicon was proven to be a meaningful input for classification algorithms, providing linguistic insights on the depressive status of test subjects. We believe that this lexicon will enhance early depression detection in people on social media. Future work will need to be carried out on a larger corpus and with more complex methods. ", doi="10.2196/17650", url="http://medinform.jmir.org/2020/6/e17650/", url="http://www.ncbi.nlm.nih.gov/pubmed/32574151" } @Article{info:doi/10.2196/17821, author="Liu, Ziqing and He, Haiyang and Yan, Shixing and Wang, Yong and Yang, Tao and Li, Guo-Zheng", title="End-to-End Models to Imitate Traditional Chinese Medicine Syndrome Differentiation in Lung Cancer Diagnosis: Model Development and Validation", journal="JMIR Med Inform", year="2020", month="Jun", day="16", volume="8", number="6", pages="e17821", keywords="traditional Chinese medicine", keywords="syndrome differentiation", keywords="lung cancer", keywords="medical record", keywords="deep learning", keywords="model fusion", abstract="Background: Traditional Chinese medicine (TCM) has been shown to be an efficient mode to manage advanced lung cancer, and accurate syndrome differentiation is crucial to treatment. Documented evidence of TCM treatment cases and the progress of artificial intelligence technology are enabling the development of intelligent TCM syndrome differentiation models. This is expected to expand the benefits of TCM to lung cancer patients. Objective: The objective of this work was to establish end-to-end TCM diagnostic models to imitate lung cancer syndrome differentiation. The proposed models used unstructured medical records as inputs to capitalize on data collected for practical TCM treatment cases by lung cancer experts. The resulting models were expected to be more efficient than approaches that leverage structured TCM datasets. Methods: We approached lung cancer TCM syndrome differentiation as a multilabel text classification problem. First, entity representation was conducted with Bidirectional Encoder Representations from Transformers and conditional random fields models. Then, five deep learning--based text classification models were applied to the construction of a medical record multilabel classifier, during which two data augmentation strategies were adopted to address overfitting issues. Finally, a fusion model approach was used to elevate the performance of the models. Results: The F1 score of the recurrent convolutional neural network (RCNN) model with augmentation was 0.8650, a 2.41\% improvement over the unaugmented model. The Hamming loss for RCNN with augmentation was 0.0987, which is 1.8\% lower than that of the same model without augmentation. Among the models, the text-hierarchical attention network (Text-HAN) model achieved the highest F1 scores of 0.8676 and 0.8751. The mean average precision for the word encoding--based RCNN was 10\% higher than that of the character encoding--based representation. A fusion model of the text-convolutional neural network, text-recurrent neural network, and Text-HAN models achieved an F1 score of 0.8884, which showed the best performance among the models. Conclusions: Medical records could be used more productively by constructing end-to-end models to facilitate TCM diagnosis. With the aid of entity-level representation, data augmentation, and model fusion, deep learning--based multilabel classification approaches can better imitate TCM syndrome differentiation in complex cases such as advanced lung cancer. ", doi="10.2196/17821", url="https://medinform.jmir.org/2020/6/e17821", url="http://www.ncbi.nlm.nih.gov/pubmed/32543445" } @Article{info:doi/10.2196/18186, author="Chen, Weijia and Lu, Zhijun and You, Lijue and Zhou, Lingling and Xu, Jie and Chen, Ken", title="Artificial Intelligence--Based Multimodal Risk Assessment Model for Surgical Site Infection (AMRAMS): Development and Validation Study", journal="JMIR Med Inform", year="2020", month="Jun", day="15", volume="8", number="6", pages="e18186", keywords="surgical site infection", keywords="machine learning", keywords="deep learning", keywords="natural language processing", keywords="artificial intelligence", keywords="risk assessment model", keywords="routinely collected data", keywords="electronic medical record", keywords="neural network", keywords="word embedding", abstract="Background: Surgical site infection (SSI) is one of the most common types of health care--associated infections. It increases mortality, prolongs hospital length of stay, and raises health care costs. Many institutions developed risk assessment models for SSI to help surgeons preoperatively identify high-risk patients and guide clinical intervention. However, most of these models had low accuracies. Objective: We aimed to provide a solution in the form of an Artificial intelligence--based Multimodal Risk Assessment Model for Surgical site infection (AMRAMS) for inpatients undergoing operations, using routinely collected clinical data. We internally and externally validated the discriminations of the models, which combined various machine learning and natural language processing techniques, and compared them with the National Nosocomial Infections Surveillance (NNIS) risk index. Methods: We retrieved inpatient records between January 1, 2014, and June 30, 2019, from the electronic medical record (EMR) system of Rui Jin Hospital, Luwan Branch, Shanghai, China. We used data from before July 1, 2018, as the development set for internal validation and the remaining data as the test set for external validation. We included patient demographics, preoperative lab results, and free-text preoperative notes as our features. We used word-embedding techniques to encode text information, and we trained the LASSO (least absolute shrinkage and selection operator) model, random forest model, gradient boosting decision tree (GBDT) model, convolutional neural network (CNN) model, and self-attention network model using the combined data. Surgeons manually scored the NNIS risk index values. Results: For internal bootstrapping validation, CNN yielded the highest mean area under the receiver operating characteristic curve (AUROC) of 0.889 (95\% CI 0.886-0.892), and the paired-sample t test revealed statistically significant advantages as compared with other models (P<.001). The self-attention network yielded the second-highest mean AUROC of 0.882 (95\% CI 0.878-0.886), but the AUROC was only numerically higher than the AUROC of the third-best model, GBDT with text embeddings (mean AUROC 0.881, 95\% CI 0.878-0.884, P=.47). The AUROCs of LASSO, random forest, and GBDT models using text embeddings were statistically higher than the AUROCs of models not using text embeddings (P<.001). For external validation, the self-attention network yielded the highest AUROC of 0.879. CNN was the second-best model (AUROC 0.878), and GBDT with text embeddings was the third-best model (AUROC 0.872). The NNIS risk index scored by surgeons had an AUROC of 0.651. Conclusions: Our AMRAMS based on EMR data and deep learning methods---CNN and self-attention network---had significant advantages in terms of accuracy compared with other conventional machine learning methods and the NNIS risk index. Moreover, the semantic embeddings of preoperative notes improved the model performance further. Our models could replace the NNIS risk index to provide personalized guidance for the preoperative intervention of SSIs. Through this case, we offered an easy-to-implement solution for building multimodal RAMs for other similar scenarios. ", doi="10.2196/18186", url="http://medinform.jmir.org/2020/6/e18186/", url="http://www.ncbi.nlm.nih.gov/pubmed/32538798" } @Article{info:doi/10.2196/17608, author="Zhang, Hong and Ni, Wandong and Li, Jing and Zhang, Jiajun", title="Artificial Intelligence--Based Traditional Chinese Medicine Assistive Diagnostic System: Validation Study", journal="JMIR Med Inform", year="2020", month="Jun", day="15", volume="8", number="6", pages="e17608", keywords="traditional Chinese medicine", keywords="TCM", keywords="disease diagnosis", keywords="syndrome prediction", keywords="syndrome differentiation", keywords="natural language processing", keywords="NLP", keywords="artificial intelligence", keywords="AI", keywords="assistive diagnostic system", keywords="convolutional neural network", keywords="CNN", keywords="machine learning", keywords="ML", keywords="BiLSTM-CRF", abstract="Background: Artificial intelligence--based assistive diagnostic systems imitate the deductive reasoning process of a human physician in biomedical disease diagnosis and treatment decision making. While impressive progress in this area has been reported, most of the reported successes are applications of artificial intelligence in Western medicine. The application of artificial intelligence in traditional Chinese medicine has lagged mainly because traditional Chinese medicine practitioners need to perform syndrome differentiation as well as biomedical disease diagnosis before a treatment decision can be made. Syndrome, a concept unique to traditional Chinese medicine, is an abstraction of a variety of signs and symptoms. The fact that the relationship between diseases and syndromes is not one-to-one but rather many-to-many makes it very challenging for a machine to perform syndrome predictions. So far, only a handful of artificial intelligence--based assistive traditional Chinese medicine diagnostic models have been reported, and they are limited in application to a single disease-type. Objective: The objective was to develop an artificial intelligence--based assistive diagnostic system capable of diagnosing multiple types of diseases that are common in traditional Chinese medicine, given a patient's electronic health record notes. The system was designed to simultaneously diagnose the disease and produce a list of corresponding syndromes. Methods: Unstructured freestyle electronic health record notes were processed by natural language processing techniques to extract clinical information such as signs and symptoms which were represented by named entities. Natural language processing used a recurrent neural network model called bidirectional long short-term memory network--conditional random forest. A convolutional neural network was then used to predict the disease-type out of 187 diseases in traditional Chinese medicine. A novel traditional Chinese medicine syndrome prediction method---an integrated learning model---was used to produce a corresponding list of probable syndromes. By following a majority-rule voting method, the integrated learning model for syndrome prediction can take advantage of four existing prediction methods (back propagation, random forest, extreme gradient boosting, and support vector classifier) while avoiding their respective weaknesses which resulted in a consistently high prediction accuracy. Results: A data set consisting of 22,984 electronic health records from Guanganmen Hospital of the China Academy of Chinese Medical Sciences that were collected between January 1, 2017 and September 7, 2018 was used. The data set contained a total of 187 diseases that are commonly diagnosed in traditional Chinese medicine. The diagnostic system was designed to be able to detect any one of the 187 disease-types. The data set was partitioned into a training set, a validation set, and a testing set in a ratio of 8:1:1. Test results suggested that the proposed system had a good diagnostic accuracy and a strong capability for generalization. The disease-type prediction accuracies of the top one, top three, and top five were 80.5\%, 91.6\%, and 94.2\%, respectively. Conclusions: The main contributions of the artificial intelligence--based traditional Chinese medicine assistive diagnostic system proposed in this paper are that 187 commonly known traditional Chinese medicine diseases can be diagnosed and a novel prediction method called an integrated learning model is demonstrated. This new prediction method outperformed all four existing methods in our preliminary experimental results. With further improvement of the algorithms and the availability of additional electronic health record data, it is expected that a wider range of traditional Chinese medicine disease-types could be diagnosed and that better diagnostic accuracies could be achieved. ", doi="10.2196/17608", url="http://medinform.jmir.org/2020/6/e17608/", url="http://www.ncbi.nlm.nih.gov/pubmed/32538797" } @Article{info:doi/10.2196/18501, author="Torres, Gomes Fernanda Broering and Gomes, Carvalho Denilsen and Hino, Ferreira Adriano Akira and Moro, Claudia and Cubas, Regina Marcia", title="Comparison of the Results of Manual and Automated Processes of Cross-Mapping Between Nursing Terms: Quantitative Study", journal="JMIR Nursing", year="2020", month="Jun", day="9", volume="3", number="1", pages="e18501", keywords="health information interoperability", keywords="nursing informatics", keywords="controlled vocabulary", keywords="standardized nursing terminology", keywords="ehealth", abstract="Background: Cross-mapping establishes equivalence between terms from different terminology systems, which is useful for interoperability, updated terminological versions, and reuse of terms. Due to the number of terms to be mapped, this work can be extensive, tedious, and thorough, and it is susceptible to errors; this can be minimized by automated processes, which use computational tools. Objective: The aim of this study was to compare the results of manual and automated term mapping processes. Methods: In this descriptive, quantitative study, we used the results of two mapping processes as an empirical basis: manual, which used 2638 terms of nurses' records from a university hospital in southern Brazil and the International Classification for Nursing Practice (ICNP); and automated, which used the same university hospital terms and the primitive terms of the ICNP through MappICNP, an algorithm based on rules of natural language processing. The two processes were compared via equality and exclusivity assessments of new terms of the automated process and of candidate terms. Results: The automated process mapped 569/2638 (21.56\%) of the source bank's terms as identical, and the manual process mapped 650/2638 (24.63\%) as identical. Regarding new terms, the automated process mapped 1031/2638 (39.08\%) of the source bank's terms as new, while the manual process mapped 1251 (47.42\%). In particular, manual mapping identified 101/2638 (3.82\%) terms as identical and 429 (16.26\%) as new, whereas the automated process identified 20 (0.75\%) terms as identical and 209 (7.92\%) as new. Of the 209 terms mapped as new by the automated process, it was possible to establish an equivalence with ICNP terms in 48 (23.0\%) cases. An analysis of the candidate terms offered by the automated process to the 429 new terms mapped exclusively by the manual process resulted in 100 (23.3\%) candidates that had a semantic relationship with the source term. Conclusions: The automated and manual processes map identical and new terms in similar ways and can be considered complementary. Direct identification of identical terms and the offering of candidate terms through the automated process facilitate and enhance the results of the mapping; confirmation of the precision of the automated mapping requires further analysis by researchers. ", doi="10.2196/18501", url="https://nursing.jmir.org/2020/1/e18501/" } @Article{info:doi/10.2196/13745, author="Liu, Yang and Yin, Zhijun", title="Understanding Weight Loss via Online Discussions: Content Analysis of Reddit Posts Using Topic Modeling and Word Clustering Techniques", journal="J Med Internet Res", year="2020", month="Jun", day="8", volume="22", number="6", pages="e13745", keywords="weight loss", keywords="online health community", keywords="machine learning", keywords="topic modeling", keywords="word2vec", keywords="hierarchical clustering", keywords="consumer health", abstract="Background: Maintaining a healthy weight can reduce the risk of developing many diseases, including type 2 diabetes, hypertension, and certain types of cancers. Online social media platforms are popular among people seeking social support regarding weight loss and sharing their weight loss experiences, which provides opportunities for learning about weight loss behaviors. Objective: This study aimed to investigate the extent to which the content posted by users in the r/loseit subreddit, an online community for discussing weight loss, and online interactions were associated with their weight loss in terms of the number of replies and votes that these users received. Methods: All posts that were published before January 2018 in r/loseit were collected. We focused on users who revealed their start weight, current weight, and goal weight and were active in this online community for at least 30 days. A topic modeling technique and a hierarchical clustering algorithm were used to obtain both global topics and local word semantic clusters. Finally, we used a regression model to learn the association between weight loss and topics, word semantic clusters, and online interactions. Results: Our data comprised 477,904 posts that were published by 7660 users within a span of 7 years. We identified 25 topics, including food and drinks, calories, exercises, family members and friends, and communication. Our results showed that the start weight ($\beta$=.823; P<.001), active days ($\beta$=.017; P=.009), and median number of votes ($\beta$=.263; P=.02), mentions of exercises ($\beta$=.145; P<.001), and nutrition ($\beta$=.120; P<.001) were associated with higher weight loss. Users who lost more weight might be motivated by the negative emotions ($\beta$=?.098; P<.001) that they experienced before starting the journey of weight loss. In contrast, users who mentioned vacations ($\beta$=?.108; P=.005) and payments ($\beta$=?.112; P=.001) tended to experience relatively less weight loss. Mentions of family members ($\beta$=?.031; P=.03) and employment status ($\beta$=?.041; P=.03) were associated with less weight loss as well. Conclusions: Our study showed that both online interactions and offline activities were associated with weight loss, suggesting that future interventions based on existing online platforms should focus on both aspects. Our findings suggest that online personal health data can be used to learn about health-related behaviors effectively. ", doi="10.2196/13745", url="https://www.jmir.org/2020/6/e13745", url="http://www.ncbi.nlm.nih.gov/pubmed/32510460" } @Article{info:doi/10.2196/18677, author="Fu, Weifeng", title="Application of an Isolated Word Speech Recognition System in the Field of Mental Health Consultation: Development and Usability Study", journal="JMIR Med Inform", year="2020", month="Jun", day="3", volume="8", number="6", pages="e18677", keywords="speech recognition", keywords="isolated words", keywords="mental health", keywords="small vocabulary", keywords="HMM", keywords="hidden Markov model", keywords="programming", abstract="Background: Speech recognition is a technology that enables machines to understand human language. Objective: In this study, speech recognition of isolated words from a small vocabulary was applied to the field of mental health counseling. Methods: A software platform was used to establish a human-machine chat for psychological counselling. The software uses voice recognition technology to decode the user's voice information. The software system analyzes and processes the user's voice information according to many internal related databases, and then gives the user accurate feedback. For users who need psychological treatment, the system provides them with psychological education. Results: The speech recognition system included features such as speech extraction, endpoint detection, feature value extraction, training data, and speech recognition. Conclusions: The Hidden Markov Model was adopted, based on multithread programming under a VC2005 compilation environment, to realize the parallel operation of the algorithm and improve the efficiency of speech recognition. After the design was completed, simulation debugging was performed in the laboratory. The experimental results showed that the designed program met the basic requirements of a speech recognition system. ", doi="10.2196/18677", url="https://medinform.jmir.org/2020/6/e18677", url="http://www.ncbi.nlm.nih.gov/pubmed/32384054" } @Article{info:doi/10.2196/17819, author="Hane, A. Christopher and Nori, S. Vijay and Crown, H. William and Sanghavi, M. Darshak and Bleicher, Paul", title="Predicting Onset of Dementia Using Clinical Notes and Machine Learning: Case-Control Study", journal="JMIR Med Inform", year="2020", month="Jun", day="3", volume="8", number="6", pages="e17819", keywords="Alzheimer disease", keywords="dementia", keywords="health information systems", keywords="machine learning", keywords="natural language processing", keywords="health information interoperability", abstract="Background: Clinical trials need efficient tools to assist in recruiting patients at risk of Alzheimer disease and related dementias (ADRD). Early detection can also assist patients with financial planning for long-term care. Clinical notes are an important, underutilized source of information in machine learning models because of the cost of collection and complexity of analysis. Objective: This study aimed to investigate the use of deidentified clinical notes from multiple hospital systems collected over 10 years to augment retrospective machine learning models of the risk of developing ADRD. Methods: We used 2 years of data to predict the future outcome of ADRD onset. Clinical notes are provided in a deidentified format with specific terms and sentiments. Terms in clinical notes are embedded into a 100-dimensional vector space to identify clusters of related terms and abbreviations that differ across hospital systems and individual clinicians. Results: When using clinical notes, the area under the curve (AUC) improved from 0.85 to 0.94, and positive predictive value (PPV) increased from 45.07\% (25,245/56,018) to 68.32\% (14,153/20,717) in the model at disease onset. Models with clinical notes improved in both AUC and PPV in years 3-6 when notes' volume was largest; results are mixed in years 7 and 8 with the smallest cohorts. Conclusions: Although clinical notes helped in the short term, the presence of ADRD symptomatic terms years earlier than onset adds evidence to other studies that clinicians undercode diagnoses of ADRD. De-identified clinical notes increase the accuracy of risk models. Clinical notes collected across multiple hospital systems via natural language processing can be merged using postprocessing techniques to aid model accuracy. ", doi="10.2196/17819", url="https://medinform.jmir.org/2020/6/e17819", url="http://www.ncbi.nlm.nih.gov/pubmed/32490841" } @Article{info:doi/10.2196/17349, author="Luo, Aijing and Xin, Zirui and Yuan, Yifeng and Wen, Tingxiao and Xie, Wenzhao and Zhong, Zhuqing and Peng, Xiaoqing and Ouyang, Wei and Hu, Chao and Liu, Fei and Chen, Yang and He, Haiyan", title="Multidimensional Feature Classification of the Health Information Needs of Patients With Hypertension in an Online Health Community Through Analysis of 1000 Patient Question Records: Observational Study", journal="J Med Internet Res", year="2020", month="May", day="29", volume="22", number="5", pages="e17349", keywords="online health community", keywords="health information needs", keywords="patients with hypertension", keywords="physician-patient communication", abstract="Background: With the rapid development of online health communities, increasing numbers of patients and families are seeking health information on the internet. Objective: This study aimed to discuss how to fully reveal the health information needs expressed by patients with hypertension in their questions in a web-based environment and how to use the internet to help patients with hypertension receive personalized health education. Methods: This study randomly selected 1000 text records from the question data of patients with hypertension from 2008 to 2018 collected from Good Doctor Online and constructed a classification system through literature research and content analysis. This paper identified the background characteristics and questioning intention of each patient with hypertension based on the patient's question and used co-occurrence network analysis and the k-means clustering method to explore the features of the health information needs of patients with hypertension. Results: The classification system for the health information needs of patients with hypertension included the following nine dimensions: drugs (355 names), symptoms and signs (395 names), tests and examinations (545 names), demographic data (526 kinds), diseases (80 names), risk factors (37 names), emotions (43 kinds), lifestyles (6 kinds), and questions (49 kinds). There were several characteristics of the explored web-based health information needs of patients with hypertension. First, more than 49\% of patients described features, such as drugs, symptoms and signs, tests and examinations, demographic data, and diseases. Second, patients with hypertension were most concerned about treatment (778/1000, 77.80\%), followed by diagnosis (323/1000, 32.30\%). Third, 65.80\% (658/1000) of patients asked physicians several questions at the same time. Moreover, 28.30\% (283/1000) of patients were very concerned about how to adjust the medication, and they asked other treatment-related questions at the same time, including drug side effects, whether to take the drugs, how to treat the disease, etc. Furthermore, 17.60\% (176/1000) of patients consulted physicians about the causes of clinical findings, including the relationship between the clinical findings and a disease, the treatment of a disease, and medications and examinations. Fourth, by k-means clustering, the questioning intentions of patients with hypertension were classified into the following seven categories: ``how to adjust medication,'' ``what to do,'' ``how to treat,'' ``phenomenon explanation,'' ``test and examination,'' ``disease diagnosis,'' and ``disease prognosis.'' Conclusions: In a web-based environment, the health information needs expressed by Chinese patients with hypertension to physicians are common and distinct, that is, patients with different background features ask relatively common questions to physicians. The classification system constructed in this study can provide guidance to health information service providers for the construction of web-based health resources, as well as guidance for patient education, which could help solve the problem of information asymmetry in communication between physicians and patients. ", doi="10.2196/17349", url="http://www.jmir.org/2020/5/e17349/", url="http://www.ncbi.nlm.nih.gov/pubmed/32469318" } @Article{info:doi/10.2196/17224, author="Rivas, Ryan and Shahbazi, Moloud and Garett, Renee and Hristidis, Vagelis and Young, Sean", title="Mental Health--Related Behaviors and Discussions Among Young Adults: Analysis and Classification", journal="J Med Internet Res", year="2020", month="May", day="29", volume="22", number="5", pages="e17224", keywords="social media", keywords="data analysis", keywords="supervised machine learning", keywords="universities", keywords="students", abstract="Background: There have been recurring reports of web-based harassment and abuse among adolescents and young adults through anonymous social networks. Objective: This study aimed to explore discussions on the popular anonymous social network Yik Yak related to social and mental health messaging behaviors among college students, including cyberbullying, to provide insights into mental health behaviors on college campuses. Methods: From April 6, 2016, to May 7, 2016, we collected anonymous conversations posted on Yik Yak at 19 universities in 4 different states and performed statistical analyses and text classification experiments on a subset of these messages. Results: We found that prosocial messages were 5.23 times more prevalent than bullying messages. The frequency of cyberbullying messages was positively associated with messages seeking emotional help. We found significant geographic variation in the frequency of messages offering supportive vs bullying messages. Across campuses, bullying and political discussions were positively associated. We also achieved a balanced accuracy of over 0.75 for most messaging behaviors and topics with a support vector machine classifier. Conclusions: Our results show that messages containing data about students' mental health--related attitudes and behaviors are prevalent on anonymous social networks, suggesting that these data can be mined for real-time analysis. This information can be used in education and health care services to better engage with students, provide insight into conversations that lead to cyberbullying, and reach out to students who need support. ", doi="10.2196/17224", url="http://www.jmir.org/2020/5/e17224/", url="http://www.ncbi.nlm.nih.gov/pubmed/32469317" } @Article{info:doi/10.2196/17813, author="Bi, Qiqing and Shen, Lining and Evans, Richard and Zhang, Zhiguo and Wang, Shimin and Dai, Wei and Liu, Cui", title="Determining the Topic Evolution and Sentiment Polarity for Albinism in a Chinese Online Health Community: Machine Learning and Social Network Analysis", journal="JMIR Med Inform", year="2020", month="May", day="29", volume="8", number="5", pages="e17813", keywords="albinism", keywords="rare diseases", keywords="topic mining", keywords="social network analysis", keywords="sentiment polarity", keywords="online health community", keywords="machine learning", abstract="Background: There are more than 6000 rare diseases in existence today, with the number of patients with these conditions rapidly increasing. Most research to date has focused on the diagnosis, treatment, and development of orphan drugs, while few studies have examined the topics and emotions expressed by patients living with rare diseases on social media platforms, especially in online health communities (OHCs). Objective: This study aimed to determine the topic categorizations and sentiment polarity for albinism in a Chinese OHC, Baidu Tieba, using multiple methods. The OHC was deeply mined using topic mining, social network analysis, and sentiment polarity analysis. Through these methods, we determined the current situation of community construction, identifying the ongoing needs and problems experienced by people with albinism in their daily lives. Methods: We used the albinism community on the Baidu Tieba platform as the data source in this study. Term frequency--inverse document frequency, latent dirichlet allocation models, and naive Bayes were employed to mine the various topic categories. Social network analysis, which was completed using the Gephi tool, was employed to analyze the evolution of the albinism community. Sentiment polarity analysis was performed using a long short-term memory algorithm. Results: We identified 8 main topics discussed in the community: daily sharing, family, interpersonal communication, social life and security, medical care, occupation and education, beauty, and self-care. Among these topics, daily sharing represented the largest proportion of the discussions. From 2012 to 2019, the average degree and clustering coefficient of the albinism community continued to decline, while the network center transferred from core communities to core users. A total of 68.43\% of the corpus was emotional, with 35.88\% being positive and 32.55\% negative. There were statistically significant differences in the distribution of sentiment polarity between topics (P<.001). Negative emotions were twice as high as positive emotions in the social life and security topic. Conclusions: The study reveals insights into the emotions expressed by people with albinism in the Chinese OHC, Baidu Tieba, providing health care practitioners with greater appreciation of the current emotional support needed by patients and the patient experience. Current OHCs do not exert enough influence due to limited effective organization and development. Health care sectors should take greater advantage of OHCs to support vulnerable patients with rare diseases to meet their evidence-based needs. ", doi="10.2196/17813", url="http://medinform.jmir.org/2020/5/e17813/", url="http://www.ncbi.nlm.nih.gov/pubmed/32469320" } @Article{info:doi/10.2196/17644, author="Liu, Xiaofeng and Fan, Jianye and Dong, Shoubin", title="Document-Level Biomedical Relation Extraction Leveraging Pretrained Self-Attention Structure and Entity Replacement: Algorithm and Pretreatment Method Validation Study", journal="JMIR Med Inform", year="2020", month="May", day="29", volume="8", number="5", pages="e17644", keywords="self-attention", keywords="document-level", keywords="relation extraction", keywords="biomedical entity pretreatment", abstract="Background: The most current methods applied for intrasentence relation extraction in the biomedical literature are inadequate for document-level relation extraction, in which the relationship may cross sentence boundaries. Hence, some approaches have been proposed to extract relations by splitting the document-level datasets through heuristic rules and learning methods. However, these approaches may introduce additional noise and do not really solve the problem of intersentence relation extraction. It is challenging to avoid noise and extract cross-sentence relations. Objective: This study aimed to avoid errors by dividing the document-level dataset, verify that a self-attention structure can extract biomedical relations in a document with long-distance dependencies and complex semantics, and discuss the relative benefits of different entity pretreatment methods for biomedical relation extraction. Methods: This paper proposes a new data preprocessing method and attempts to apply a pretrained self-attention structure for document biomedical relation extraction with an entity replacement method to capture very long-distance dependencies and complex semantics. Results: Compared with state-of-the-art approaches, our method greatly improved the precision. The results show that our approach increases the F1 value, compared with state-of-the-art methods. Through experiments of biomedical entity pretreatments, we found that a model using an entity replacement method can improve performance. Conclusions: When considering all target entity pairs as a whole in the document-level dataset, a pretrained self-attention structure is suitable to capture very long-distance dependencies and learn the textual context and complicated semantics. A replacement method for biomedical entities is conducive to biomedical relation extraction, especially to document-level relation extraction. ", doi="10.2196/17644", url="http://medinform.jmir.org/2020/5/e17644/", url="http://www.ncbi.nlm.nih.gov/pubmed/32469325" } @Article{info:doi/10.2196/15852, author="Leightley, Daniel and Pernet, David and Velupillai, Sumithra and Stewart, J. Robert and Mark, M. Katharine and Opie, Elena and Murphy, Dominic and Fear, T. Nicola and Stevelink, M. Sharon A.", title="The Development of the Military Service Identification Tool: Identifying Military Veterans in a Clinical Research Database Using Natural Language Processing and Machine Learning", journal="JMIR Med Inform", year="2020", month="May", day="25", volume="8", number="5", pages="e15852", keywords="natural language processing", keywords="machine learning", keywords="military personnel", keywords="electronic health care records", keywords="mental health", keywords="veteran", abstract="Background: Electronic health care records (EHRs) are a rich source of health-related information, with potential for secondary research use. In the United Kingdom, there is no national marker for identifying those who have previously served in the Armed Forces, making analysis of the health and well-being of veterans using EHRs difficult. Objective: This study aimed to develop a tool to identify veterans from free-text clinical documents recorded in a psychiatric EHR database. Methods: Veterans were manually identified using the South London and Maudsley (SLaM) Biomedical Research Centre Clinical Record Interactive Search---a database holding secondary mental health care electronic records for the SLaM National Health Service Foundation Trust. An iterative approach was taken; first, a structured query language (SQL) method was developed, which was then refined using natural language processing and machine learning to create the Military Service Identification Tool (MSIT) to identify if a patient was a civilian or veteran. Performance, defined as correct classification of veterans compared with incorrect classification, was measured using positive predictive value, negative predictive value, sensitivity, F1 score, and accuracy (otherwise termed Youden Index). Results: A gold standard dataset of 6672 free-text clinical documents was manually annotated by human coders. Of these documents, 66.00\% (4470/6672) were then used to train the SQL and MSIT approaches and 34.00\% (2202/6672) were used for testing the approaches. To develop the MSIT, an iterative 2-stage approach was undertaken. In the first stage, an SQL method was developed to identify veterans using a keyword rule--based approach. This approach obtained an accuracy of 0.93 in correctly predicting civilians and veterans, a positive predictive value of 0.81, a sensitivity of 0.75, and a negative predictive value of 0.95. This method informed the second stage, which was the development of the MSIT using machine learning, which, when tested, obtained an accuracy of 0.97, a positive predictive value of 0.90, a sensitivity of 0.91, and a negative predictive value of 0.98. Conclusions: The MSIT has the potential to be used in identifying veterans in the United Kingdom from free-text clinical documents, providing new and unique insights into the health and well-being of this population and their use of mental health care services. ", doi="10.2196/15852", url="http://medinform.jmir.org/2020/5/e15852/", url="http://www.ncbi.nlm.nih.gov/pubmed/32348287" } @Article{info:doi/10.2196/17645, author="Li, Linfeng and Wang, Peng and Wang, Yao and Wang, Shenghui and Yan, Jun and Jiang, Jinpeng and Tang, Buzhou and Wang, Chengliang and Liu, Yuting", title="A Method to Learn Embedding of a Probabilistic Medical Knowledge Graph: Algorithm Development", journal="JMIR Med Inform", year="2020", month="May", day="21", volume="8", number="5", pages="e17645", keywords="probabilistic medical knowledge graph", keywords="representation learning", keywords="graph embedding", keywords="PrTransX", keywords="decision support systems, clinical", keywords="knowledge graph", keywords="medical informatics", keywords="electronic health records", keywords="natural language processing", abstract="Background: Knowledge graph embedding is an effective semantic representation method for entities and relations in knowledge graphs. Several translation-based algorithms, including TransE, TransH, TransR, TransD, and TranSparse, have been proposed to learn effective embedding vectors from typical knowledge graphs in which the relations between head and tail entities are deterministic. However, in medical knowledge graphs, the relations between head and tail entities are inherently probabilistic. This difference introduces a challenge in embedding medical knowledge graphs. Objective: We aimed to address the challenge of how to learn the probability values of triplets into representation vectors by making enhancements to existing TransX (where X is E, H, R, D, or Sparse) algorithms, including the following: (1) constructing a mapping function between the score value and the probability, and (2) introducing probability-based loss of triplets into the original margin-based loss function. Methods: We performed the proposed PrTransX algorithm on a medical knowledge graph that we built from large-scale real-world electronic medical records data. We evaluated the embeddings using link prediction task. Results: Compared with the corresponding TransX algorithms, the proposed PrTransX performed better than the TransX model in all evaluation indicators, achieving a higher proportion of corrected entities ranked in the top 10 and normalized discounted cumulative gain of the top 10 predicted tail entities, and lower mean rank. Conclusions: The proposed PrTransX successfully incorporated the uncertainty of the knowledge triplets into the embedding vectors. ", doi="10.2196/17645", url="https://medinform.jmir.org/2020/5/e17645", url="http://www.ncbi.nlm.nih.gov/pubmed/32436854" } @Article{info:doi/10.2196/15371, author="Howard, Derek and Maslej, M. Marta and Lee, Justin and Ritchie, Jacob and Woollard, Geoffrey and French, Leon", title="Transfer Learning for Risk Classification of Social Media Posts: Model Evaluation Study", journal="J Med Internet Res", year="2020", month="May", day="13", volume="22", number="5", pages="e15371", keywords="triage", keywords="classification", keywords="natural language processing", keywords="transfer learning", keywords="machine learning", keywords="data interpretation, statistical", keywords="mental health", keywords="social support", abstract="Background: Mental illness affects a significant portion of the worldwide population. Online mental health forums can provide a supportive environment for those afflicted and also generate a large amount of data that can be mined to predict mental health states using machine learning methods. Objective: This study aimed to benchmark multiple methods of text feature representation for social media posts and compare their downstream use with automated machine learning (AutoML) tools. We tested on datasets that contain posts labeled for perceived suicide risk or moderator attention in the context of self-harm. Specifically, we assessed the ability of the methods to prioritize posts that a moderator would identify for immediate response. Methods: We used 1588 labeled posts from the Computational Linguistics and Clinical Psychology (CLPsych) 2017 shared task collected from the Reachout.com forum. Posts were represented using lexicon-based tools, including Valence Aware Dictionary and sEntiment Reasoner, Empath, and Linguistic Inquiry and Word Count, and also using pretrained artificial neural network models, including DeepMoji, Universal Sentence Encoder, and Generative Pretrained Transformer-1 (GPT-1). We used Tree-based Optimization Tool and Auto-Sklearn as AutoML tools to generate classifiers to triage the posts. Results: The top-performing system used features derived from the GPT-1 model, which was fine-tuned on over 150,000 unlabeled posts from Reachout.com. Our top system had a macroaveraged F1 score of 0.572, providing a new state-of-the-art result on the CLPsych 2017 task. This was achieved without additional information from metadata or preceding posts. Error analyses revealed that this top system often misses expressions of hopelessness. In addition, we have presented visualizations that aid in the understanding of the learned classifiers. Conclusions: In this study, we found that transfer learning is an effective strategy for predicting risk with relatively little labeled data and noted that fine-tuning of pretrained language models provides further gains when large amounts of unlabeled text are available. ", doi="10.2196/15371", url="https://www.jmir.org/2020/5/e15371", url="http://www.ncbi.nlm.nih.gov/pubmed/32401222" } @Article{info:doi/10.2196/14330, author="Lanera, Corrado and Berchialla, Paola and Baldi, Ileana and Lorenzoni, Giulia and Tramontan, Lara and Scamarcia, Antonio and Cantarutti, Luigi and Giaquinto, Carlo and Gregori, Dario", title="Use of Machine Learning Techniques for Case-Detection of Varicella Zoster Using Routinely Collected Textual Ambulatory Records: Pilot Observational Study", journal="JMIR Med Inform", year="2020", month="May", day="5", volume="8", number="5", pages="e14330", keywords="machine learning technique", keywords="text mining", keywords="electronic health report", keywords="varicella zoster", keywords="pediatric infectious disease", abstract="Background: The detection of infectious diseases through the analysis of free text on electronic health reports (EHRs) can provide prompt and accurate background information for the implementation of preventative measures, such as advertising and monitoring the effectiveness of vaccination campaigns. Objective: The purpose of this paper is to compare machine learning techniques in their application to EHR analysis for disease detection. Methods: The Pedianet database was used as a data source for a real-world scenario on the identification of cases of varicella. The models' training and test sets were based on two different Italian regions' (Veneto and Sicilia) data sets of 7631 patients and 1,230,355 records, and 2347 patients and 569,926 records, respectively, for whom a gold standard of varicella diagnosis was available. Elastic-net regularized generalized linear model (GLMNet), maximum entropy (MAXENT), and LogitBoost (boosting) algorithms were implemented in a supervised environment and 5-fold cross-validated. The document-term matrix generated by the training set involves a dictionary of 1,871,532 tokens. The analysis was conducted on a subset of 29,096 tokens, corresponding to a matrix with no more than a 99\% sparsity ratio. Results: The highest predictive values were achieved through boosting (positive predicative value [PPV] 63.1, 95\% CI 42.7-83.5 and negative predicative value [NPV] 98.8, 95\% CI 98.3-99.3). GLMNet delivered superior predictive capability compared to MAXENT (PPV 24.5\% and NPV 98.3\% vs PPV 11.0\% and NPV 98.0\%). MAXENT and GLMNet predictions weakly agree with each other (agreement coefficient 1 [AC1]=0.60, 95\% CI 0.58-0.62), as well as with LogitBoost (MAXENT: AC1=0.64, 95\% CI 0.63-0.66 and GLMNet: AC1=0.53, 95\% CI 0.51-0.55). Conclusions: Boosting has demonstrated promising performance in large-scale EHR-based infectious disease identification. ", doi="10.2196/14330", url="https://medinform.jmir.org/2020/5/e14330", url="http://www.ncbi.nlm.nih.gov/pubmed/32369038" } @Article{info:doi/10.2196/17787, author="Chen, Yen-Pin and Chen, Yi-Ying and Lin, Jr-Jiun and Huang, Chien-Hua and Lai, Feipei", title="Modified Bidirectional Encoder Representations From Transformers Extractive Summarization Model for Hospital Information Systems Based on Character-Level Tokens (AlphaBERT): Development and Performance Evaluation", journal="JMIR Med Inform", year="2020", month="Apr", day="29", volume="8", number="4", pages="e17787", keywords="transformer", keywords="BERT", keywords="deep learning", keywords="emergency medicine", keywords="automatic summarization", abstract="Background: Doctors must care for many patients simultaneously, and it is time-consuming to find and examine all patients' medical histories. Discharge diagnoses provide hospital staff with sufficient information to enable handling multiple patients; however, the excessive amount of words in the diagnostic sentences poses problems. Deep learning may be an effective solution to overcome this problem, but the use of such a heavy model may also add another obstacle to systems with limited computing resources. Objective: We aimed to build a diagnoses-extractive summarization model for hospital information systems and provide a service that can be operated even with limited computing resources. Methods: We used a Bidirectional Encoder Representations from Transformers (BERT)-based structure with a two-stage training method based on 258,050 discharge diagnoses obtained from the National Taiwan University Hospital Integrated Medical Database, and the highlighted extractive summaries written by experienced doctors were labeled. The model size was reduced using a character-level token, the number of parameters was decreased from 108,523,714 to 963,496, and the model was pretrained using random mask characters in the discharge diagnoses and International Statistical Classification of Diseases and Related Health Problems sets. We then fine-tuned the model using summary labels and cleaned up the prediction results by averaging all probabilities for entire words to prevent character level--induced fragment words. Model performance was evaluated against existing models BERT, BioBERT, and Long Short-Term Memory (LSTM) using the Recall-Oriented Understudy for Gisting Evaluation (ROUGE) L score, and a questionnaire website was built to collect feedback from more doctors for each summary proposal. Results: The area under the receiver operating characteristic curve values of the summary proposals were 0.928, 0.941, 0.899, and 0.947 for BERT, BioBERT, LSTM, and the proposed model (AlphaBERT), respectively. The ROUGE-L scores were 0.697, 0.711, 0.648, and 0.693 for BERT, BioBERT, LSTM, and AlphaBERT, respectively. The mean (SD) critique scores from doctors were 2.232 (0.832), 2.134 (0.877), 2.207 (0.844), 1.927 (0.910), and 2.126 (0.874) for reference-by-doctor labels, BERT, BioBERT, LSTM, and AlphaBERT, respectively. Based on the paired t test, there was a statistically significant difference in LSTM compared to the reference (P<.001), BERT (P=.001), BioBERT (P<.001), and AlphaBERT (P=.002), but not in the other models. Conclusions: Use of character-level tokens in a BERT model can greatly decrease the model size without significantly reducing performance for diagnoses summarization. A well-developed deep-learning model will enhance doctors' abilities to manage patients and promote medical studies by providing the capability to use extensive unstructured free-text notes. ", doi="10.2196/17787", url="http://medinform.jmir.org/2020/4/e17787/", url="http://www.ncbi.nlm.nih.gov/pubmed/32347806" } @Article{info:doi/10.2196/17642, author="Wang, Zheyu and Huang, Haoce and Cui, Liping and Chen, Juan and An, Jiye and Duan, Huilong and Ge, Huiqing and Deng, Ning", title="Using Natural Language Processing Techniques to Provide Personalized Educational Materials for Chronic Disease Patients in China: Development and Assessment of a Knowledge-Based Health Recommender System", journal="JMIR Med Inform", year="2020", month="Apr", day="23", volume="8", number="4", pages="e17642", keywords="health education", keywords="ontology", keywords="natural language processing", keywords="chronic disease", keywords="recommender system", abstract="Background: Health education emerged as an important intervention for improving the awareness and self-management abilities of chronic disease patients. The development of information technologies has changed the form of patient educational materials from traditional paper materials to electronic materials. To date, the amount of patient educational materials on the internet is tremendous, with variable quality, which makes it hard to identify the most valuable materials by individuals lacking medical backgrounds. Objective: The aim of this study was to develop a health recommender system to provide appropriate educational materials for chronic disease patients in China and evaluate the effect of this system. Methods: A knowledge-based recommender system was implemented using ontology and several natural language processing (NLP) techniques. The development process was divided into 3 stages. In stage 1, an ontology was constructed to describe patient characteristics contained in the data. In stage 2, an algorithm was designed and implemented to generate recommendations based on the ontology. Patient data and educational materials were mapped to the ontology and converted into vectors of the same length, and then recommendations were generated according to similarity between these vectors. In stage 3, the ontology and algorithm were incorporated into an mHealth system for practical use. Keyword extraction algorithms and pretrained word embeddings were used to preprocess educational materials. Three strategies were proposed to improve the performance of keyword extraction. System evaluation was based on a manually assembled test collection for 50 patients and 100 educational documents. Recommendation performance was assessed using the macro precision of top-ranked documents and the overall mean average precision (MAP). Results: The constructed ontology contained 40 classes, 31 object properties, 67 data properties, and 32 individuals. A total of 80 SWRL rules were defined to implement the semantic logic of mapping patient original data to the ontology vector space. The recommender system was implemented as a separate Web service connected with patients' smartphones. According to the evaluation results, our system can achieve a macro precision up to 0.970 for the top 1 recommendation and an overall MAP score up to 0.628. Conclusions: This study demonstrated that a knowledge-based health recommender system has the potential to accurately recommend educational materials to chronic disease patients. Traditional NLP techniques combined with improvement strategies for specific language and domain proved to be effective for improving system performance. One direction for future work is to explore the effect of such systems from the perspective of patients in a practical setting. ", doi="10.2196/17642", url="http://medinform.jmir.org/2020/4/e17642/", url="http://www.ncbi.nlm.nih.gov/pubmed/32324148" } @Article{info:doi/10.2196/16970, author="Nakatani, Hayao and Nakao, Masatoshi and Uchiyama, Hidefumi and Toyoshiba, Hiroyoshi and Ochiai, Chikayuki", title="Predicting Inpatient Falls Using Natural Language Processing of Nursing Records Obtained From Japanese Electronic Medical Records: Case-Control Study", journal="JMIR Med Inform", year="2020", month="Apr", day="22", volume="8", number="4", pages="e16970", keywords="fall", keywords="risk factor", keywords="prediction", keywords="nursing record", keywords="natural language processing", keywords="machine learning", abstract="Background: Falls in hospitals are the most common risk factor that affects the safety of inpatients and can result in severe harm. Therefore, preventing falls is one of the most important areas of risk management for health care organizations. However, existing methods for predicting falls are laborious and costly. Objective: The objective of this study is to verify whether hospital inpatient falls can be predicted through the analysis of a single input---unstructured nursing records obtained from Japanese electronic medical records (EMRs)---using a natural language processing (NLP) algorithm and machine learning. Methods: The nursing records of 335 fallers and 408 nonfallers for a 12-month period were extracted from the EMRs of an acute care hospital and randomly divided into a learning data set and test data set. The former data set was subjected to NLP and machine learning to extract morphemes that contributed to separating fallers from nonfallers to construct a model for predicting falls. Then, the latter data set was used to determine the predictive value of the model using receiver operating characteristic (ROC) analysis. Results: The prediction of falls using the test data set showed high accuracy, with an area under the ROC curve, sensitivity, specificity, and odds ratio of mean 0.834 (SD 0.005), mean 0.769 (SD 0.013), mean 0.785 (SD 0.020), and mean 12.27 (SD 1.11) for five independent experiments, respectively. The morphemes incorporated into the final model included many words closely related to known risk factors for falls, such as the use of psychotropic drugs, state of consciousness, and mobility, thereby demonstrating that an NLP algorithm combined with machine learning can effectively extract risk factors for falls from nursing records. Conclusions: We successfully established that falls among hospital inpatients can be predicted by analyzing nursing records using an NLP algorithm and machine learning. Therefore, it may be possible to develop a fall risk monitoring system that analyzes nursing records daily and alerts health care professionals when the fall risk of an inpatient is increased. ", doi="10.2196/16970", url="http://medinform.jmir.org/2020/4/e16970/", url="http://www.ncbi.nlm.nih.gov/pubmed/32319959" } @Article{info:doi/10.2196/17984, author="Spasic, Irena and Nenadic, Goran", title="Clinical Text Data in Machine Learning: Systematic Review", journal="JMIR Med Inform", year="2020", month="Mar", day="31", volume="8", number="3", pages="e17984", keywords="natural language processing", keywords="machine learning", keywords="medical informatics", keywords="medical informatics applications", abstract="Background: Clinical narratives represent the main form of communication within health care, providing a personalized account of patient history and assessments, and offering rich information for clinical decision making. Natural language processing (NLP) has repeatedly demonstrated its feasibility to unlock evidence buried in clinical narratives. Machine learning can facilitate rapid development of NLP tools by leveraging large amounts of text data. Objective: The main aim of this study was to provide systematic evidence on the properties of text data used to train machine learning approaches to clinical NLP. We also investigated the types of NLP tasks that have been supported by machine learning and how they can be applied in clinical practice. Methods: Our methodology was based on the guidelines for performing systematic reviews. In August 2018, we used PubMed, a multifaceted interface, to perform a literature search against MEDLINE. We identified 110 relevant studies and extracted information about text data used to support machine learning, NLP tasks supported, and their clinical applications. The data properties considered included their size, provenance, collection methods, annotation, and any relevant statistics. Results: The majority of datasets used to train machine learning models included only hundreds or thousands of documents. Only 10 studies used tens of thousands of documents, with a handful of studies utilizing more. Relatively small datasets were utilized for training even when much larger datasets were available. The main reason for such poor data utilization is the annotation bottleneck faced by supervised machine learning algorithms. Active learning was explored to iteratively sample a subset of data for manual annotation as a strategy for minimizing the annotation effort while maximizing the predictive performance of the model. Supervised learning was successfully used where clinical codes integrated with free-text notes into electronic health records were utilized as class labels. Similarly, distant supervision was used to utilize an existing knowledge base to automatically annotate raw text. Where manual annotation was unavoidable, crowdsourcing was explored, but it remains unsuitable because of the sensitive nature of data considered. Besides the small volume, training data were typically sourced from a small number of institutions, thus offering no hard evidence about the transferability of machine learning models. The majority of studies focused on text classification. Most commonly, the classification results were used to support phenotyping, prognosis, care improvement, resource management, and surveillance. Conclusions: We identified the data annotation bottleneck as one of the key obstacles to machine learning approaches in clinical NLP. Active learning and distant supervision were explored as a way of saving the annotation efforts. Future research in this field would benefit from alternatives such as data augmentation and transfer learning, or unsupervised learning, which do not require data annotation. ", doi="10.2196/17984", url="http://medinform.jmir.org/2020/3/e17984/", url="http://www.ncbi.nlm.nih.gov/pubmed/32229465" } @Article{info:doi/10.2196/16728, author="Osadchiy, Vadim and Mills, Nelson Jesse and Eleswarapu, Venkata Sriram", title="Understanding Patient Anxieties in the Social Media Era: Qualitative Analysis and Natural Language Processing of an Online Male Infertility Community", journal="J Med Internet Res", year="2020", month="Mar", day="10", volume="22", number="3", pages="e16728", keywords="social media", keywords="infertility, male", keywords="online social networking", keywords="Reddit", keywords="discussion board", abstract="Background: Couples struggling with infertility are increasingly turning to the internet for infertility-related content and to connect with others. Most of the published data on infertility and the internet only address the experiences of women, with limited studies focusing exclusively on internet discussions on male factor infertility. Objective: The aim of this study was to understand the concerns and experiences of discussants on an online male infertility community and to provide insight into their perceptions of interactions with health care professionals. Methods: Using the large-scale data analytics tool BigQuery, we extracted all posts in the r/MaleInfertility community (877 members) of the social media website and discussion board Reddit from November 2017 to October 2018. We performed a qualitative thematic analysis and quantitative semantic analysis using Language Inquiry and Word Count 2015 of the extracted posts to identify dominant themes and subthemes of discussions. Descriptive statistics and semantic analytic Z-scores were computed. Results: From the analysis of 97 posts, notable themes and subthemes emerged: 70 (72\%) posts shared personal experiences, including feeling emasculated or isolated or describing a negative (28/97, 29\%), positive (13/97, 13\%), or neutral (56/97, 58\%) experience with a health care professional; 19\% (18/97) of the posts posed questions about personal semen analysis results. On the basis of semantic analysis, posts by men had higher authenticity scores (Z=3.44; P<.001), suggesting more honest or personal texts, but lower clout scores (Z=4.57; P<.001), suggesting a more tentative or anxious style of writing, compared with posts by women. Conclusions: To our knowledge, this study represents the first evaluation of a social media community focused exclusively on male infertility using mixed methodology. These results suggest a role for physicians on social media to engage with patients and connect them to accurate resources, in addition to opportunities to improve in-office patient education. ", doi="10.2196/16728", url="http://www.jmir.org/2020/3/e16728/", url="http://www.ncbi.nlm.nih.gov/pubmed/32154785" } @Article{info:doi/10.2196/16878, author="Miotto, Riccardo and Percha, L. Bethany and Glicksberg, S. Benjamin and Lee, Hao-Chih and Cruz, Lisanne and Dudley, T. Joel and Nabeel, Ismail", title="Identifying Acute Low Back Pain Episodes in Primary Care Practice From Clinical Notes: Observational Study", journal="JMIR Med Inform", year="2020", month="Feb", day="27", volume="8", number="2", pages="e16878", keywords="electronic health records", keywords="clinical notes", keywords="low back pain", keywords="natural language processing", keywords="machine learning", abstract="Background: Acute and chronic low back pain (LBP) are different conditions with different treatments. However, they are coded in electronic health records with the same International Classification of Diseases, 10th revision (ICD-10) code (M54.5) and can be differentiated only by retrospective chart reviews. This prevents an efficient definition of data-driven guidelines for billing and therapy recommendations, such as return-to-work options. Objective: The objective of this study was to evaluate the feasibility of automatically distinguishing acute LBP episodes by analyzing free-text clinical notes. Methods: We used a dataset of 17,409 clinical notes from different primary care practices; of these, 891 documents were manually annotated as acute LBP and 2973 were generally associated with LBP via the recorded ICD-10 code. We compared different supervised and unsupervised strategies for automated identification: keyword search, topic modeling, logistic regression with bag of n-grams and manual features, and deep learning (a convolutional neural network-based architecture [ConvNet]). We trained the supervised models using either manual annotations or ICD-10 codes as positive labels. Results: ConvNet trained using manual annotations obtained the best results with an area under the receiver operating characteristic curve of 0.98 and an F score of 0.70. ConvNet's results were also robust to reduction of the number of manually annotated documents. In the absence of manual annotations, topic models performed better than methods trained using ICD-10 codes, which were unsatisfactory for identifying LBP acuity. Conclusions: This study uses clinical notes to delineate a potential path toward systematic learning of therapeutic strategies, billing guidelines, and management options for acute LBP at the point of care. ", doi="10.2196/16878", url="http://medinform.jmir.org/2020/2/e16878/", url="http://www.ncbi.nlm.nih.gov/pubmed/32130159" } @Article{info:doi/10.2196/15861, author="O'Connor, Karen and Sarker, Abeed and Perrone, Jeanmarie and Gonzalez Hernandez, Graciela", title="Promoting Reproducible Research for Characterizing Nonmedical Use of Medications Through Data Annotation: Description of a Twitter Corpus and Guidelines", journal="J Med Internet Res", year="2020", month="Feb", day="26", volume="22", number="2", pages="e15861", keywords="prescription drug misuse", keywords="social media", keywords="substance abuse detection", keywords="natural language processing", keywords="machine learning", keywords="infodemiology", keywords="infoveillance", abstract="Background: Social media data are being increasingly used for population-level health research because it provides near real-time access to large volumes of consumer-generated data. Recently, a number of studies have explored the possibility of using social media data, such as from Twitter, for monitoring prescription medication abuse. However, there is a paucity of annotated data or guidelines for data characterization that discuss how information related to abuse-prone medications is presented on Twitter. Objective: This study discusses the creation of an annotated corpus suitable for training supervised classification algorithms for the automatic classification of medication abuse--related chatter. The annotation strategies used for improving interannotator agreement (IAA), a detailed annotation guideline, and machine learning experiments that illustrate the utility of the annotated corpus are also described. Methods: We employed an iterative annotation strategy, with interannotator discussions held and updates made to the annotation guidelines at each iteration to improve IAA for the manual annotation task. Using the grounded theory approach, we first characterized tweets into fine-grained categories and then grouped them into 4 broad classes---abuse or misuse, personal consumption, mention, and unrelated. After the completion of manual annotations, we experimented with several machine learning algorithms to illustrate the utility of the corpus and generate baseline performance metrics for automatic classification on these data. Results: Our final annotated set consisted of 16,443 tweets mentioning at least 20 abuse-prone medications including opioids, benzodiazepines, atypical antipsychotics, central nervous system stimulants, and gamma-aminobutyric acid analogs. Our final overall IAA was 0.86 (Cohen kappa), which represents high agreement. The manual annotation process revealed the variety of ways in which prescription medication misuse or abuse is discussed on Twitter, including expressions indicating coingestion, nonmedical use, nonstandard route of intake, and consumption above the prescribed doses. Among machine learning classifiers, support vector machines obtained the highest automatic classification accuracy of 73.00\% (95\% CI 71.4-74.5) over the test set (n=3271). Conclusions: Our manual analysis and annotations of a large number of tweets have revealed types of information posted on Twitter about a set of abuse-prone prescription medications and their distributions. In the interests of reproducible and community-driven research, we have made our detailed annotation guidelines and the training data for the classification experiments publicly available, and the test data will be used in future shared tasks. ", doi="10.2196/15861", url="http://www.jmir.org/2020/2/e15861/", url="http://www.ncbi.nlm.nih.gov/pubmed/32130117" } @Article{info:doi/10.2196/13855, author="Funk, Burkhardt and Sadeh-Sharvit, Shiri and Fitzsimmons-Craft, E. Ellen and Trockel, Todd Mickey and Monterubio, E. Grace and Goel, J. Neha and Balantekin, N. Katherine and Eichen, M. Dawn and Flatt, E. Rachael and Firebaugh, Marie-Laure and Jacobi, Corinna and Graham, K. Andrea and Hoogendoorn, Mark and Wilfley, E. Denise and Taylor, Barr C.", title="A Framework for Applying Natural Language Processing in Digital Health Interventions", journal="J Med Internet Res", year="2020", month="Feb", day="19", volume="22", number="2", pages="e13855", keywords="Digital Health Interventions Text Analytics (DHITA)", keywords="digital health interventions", keywords="eating disorders", keywords="guided self-help", keywords="natural language processing", keywords="text mining", abstract="Background: Digital health interventions (DHIs) are poised to reduce target symptoms in a scalable, affordable, and empirically supported way. DHIs that involve coaching or clinical support often collect text data from 2 sources: (1) open correspondence between users and the trained practitioners supporting them through a messaging system and (2) text data recorded during the intervention by users, such as diary entries. Natural language processing (NLP) offers methods for analyzing text, augmenting the understanding of intervention effects, and informing therapeutic decision making. Objective: This study aimed to present a technical framework that supports the automated analysis of both types of text data often present in DHIs. This framework generates text features and helps to build statistical models to predict target variables, including user engagement, symptom change, and therapeutic outcomes. Methods: We first discussed various NLP techniques and demonstrated how they are implemented in the presented framework. We then applied the framework in a case study of the Healthy Body Image Program, a Web-based intervention trial for eating disorders (EDs). A total of 372 participants who screened positive for an ED received a DHI aimed at reducing ED psychopathology (including binge eating and purging behaviors) and improving body image. These users generated 37,228 intervention text snippets and exchanged 4285 user-coach messages, which were analyzed using the proposed model. Results: We applied the framework to predict binge eating behavior, resulting in an area under the curve between 0.57 (when applied to new users) and 0.72 (when applied to new symptom reports of known users). In addition, initial evidence indicated that specific text features predicted the therapeutic outcome of reducing ED symptoms. Conclusions: The case study demonstrates the usefulness of a structured approach to text data analytics. NLP techniques improve the prediction of symptom changes in DHIs. We present a technical framework that can be easily applied in other clinical trials and clinical presentations and encourage other groups to apply the framework in similar contexts. ", doi="10.2196/13855", url="https://www.jmir.org/2020/2/e13855", url="http://www.ncbi.nlm.nih.gov/pubmed/32130118" } @Article{info:doi/10.2196/11287, author="Lyu, Xiaoguang and Hu, Jiming and Dong, Weiguo and Xu, Xin", title="Intellectual Structure and Evolutionary Trends of Precision Medicine Research: Coword Analysis", journal="JMIR Med Inform", year="2020", month="Feb", day="4", volume="8", number="2", pages="e11287", keywords="precision medicine", keywords="topics distribution", keywords="correlation structure", keywords="evolution patterns", keywords="coword analysis", abstract="Background: Precision medicine (PM) is playing a more and more important role in clinical practice. In recent years, the scale of PM research has been growing rapidly. Many reviews have been published to facilitate a better understanding of the status of PM research. However, there is still a lack of research on the intellectual structure in terms of topics. Objective: This study aimed to identify the intellectual structure and evolutionary trends of PM research through the application of various social network analysis and visualization methods. Methods: The bibliographies of papers published between 2009 and 2018 were extracted from the Web of Science database. Based on the statistics of keywords in the papers, a coword network was generated and used to calculate network indicators of both the entire network and local networks. Communities were then detected to identify subdirections of PM research. Topological maps of networks, including networks between communities and within each community, were drawn to reveal the correlation structure. An evolutionary graph and a strategic graph were finally produced to reveal research venation and trends in discipline communities. Results: The results showed that PM research involves extensive themes and, overall, is not balanced. A minority of themes with a high frequency and network indicators, such as Biomarkers, Genomics, Cancer, Therapy, Genetics, Drug, Target Therapy, Pharmacogenomics, Pharmacogenetics, and Molecular, can be considered the core areas of PM research. However, there were five balanced theme directions with distinguished status and tendencies: Cancer, Biomarkers, Genomics, Drug, and Therapy. These were shown to be the main branches that were both focused and well developed. Therapy, though, was shown to be isolated and undeveloped. Conclusions: The hotspots, structures, evolutions, and development trends of PM research in the past ten years were revealed using social network analysis and visualization. In general, PM research is unbalanced, but its subdirections are balanced. The clear evolutionary and developmental trend indicates that PM research has matured in recent years. The implications of this study involving PM research will provide reasonable and effective support for researchers, funders, policymakers, and clinicians. ", doi="10.2196/11287", url="https://medinform.jmir.org/2020/2/e11287", url="http://www.ncbi.nlm.nih.gov/pubmed/32014844" } @Article{info:doi/10.2196/16023, author="Zunic, Anastazia and Corcoran, Padraig and Spasic, Irena", title="Sentiment Analysis in Health and Well-Being: Systematic Review", journal="JMIR Med Inform", year="2020", month="Jan", day="28", volume="8", number="1", pages="e16023", keywords="sentiment analysis", keywords="natural language processing", keywords="text mining", keywords="machine learning", abstract="Background: Sentiment analysis (SA) is a subfield of natural language processing whose aim is to automatically classify the sentiment expressed in a free text. It has found practical applications across a wide range of societal contexts including marketing, economy, and politics. This review focuses specifically on applications related to health, which is defined as ``a state of complete physical, mental, and social well-being and not merely the absence of disease or infirmity.'' Objective: This study aimed to establish the state of the art in SA related to health and well-being by conducting a systematic review of the recent literature. To capture the perspective of those individuals whose health and well-being are affected, we focused specifically on spontaneously generated content and not necessarily that of health care professionals. Methods: Our methodology is based on the guidelines for performing systematic reviews. In January 2019, we used PubMed, a multifaceted interface, to perform a literature search against MEDLINE. We identified a total of 86 relevant studies and extracted data about the datasets analyzed, discourse topics, data creators, downstream applications, algorithms used, and their evaluation. Results: The majority of data were collected from social networking and Web-based retailing platforms. The primary purpose of online conversations is to exchange information and provide social support online. These communities tend to form around health conditions with high severity and chronicity rates. Different treatments and services discussed include medications, vaccination, surgery, orthodontic services, individual physicians, and health care services in general. We identified 5 roles with respect to health and well-being among the authors of the types of spontaneously generated narratives considered in this review: a sufferer, an addict, a patient, a carer, and a suicide victim. Out of 86 studies considered, only 4 reported the demographic characteristics. A wide range of methods were used to perform SA. Most common choices included support vector machines, na{\"i}ve Bayesian learning, decision trees, logistic regression, and adaptive boosting. In contrast with general trends in SA research, only 1 study used deep learning. The performance lags behind the state of the art achieved in other domains when measured by F-score, which was found to be below 60\% on average. In the context of SA, the domain of health and well-being was found to be resource poor: few domain-specific corpora and lexica are shared publicly for research purposes. Conclusions: SA results in the area of health and well-being lag behind those in other domains. It is yet unclear if this is because of the intrinsic differences between the domains and their respective sublanguages, the size of training datasets, the lack of domain-specific sentiment lexica, or the choice of algorithms. ", doi="10.2196/16023", url="https://medinform.jmir.org/2020/1/e16023", url="http://www.ncbi.nlm.nih.gov/pubmed/32012057" } @Article{info:doi/10.2196/16042, author="Pfaff, R. Emily and Crosskey, Miles and Morton, Kenneth and Krishnamurthy, Ashok", title="Clinical Annotation Research Kit (CLARK): Computable Phenotyping Using Machine Learning", journal="JMIR Med Inform", year="2020", month="Jan", day="24", volume="8", number="1", pages="e16042", keywords="natural language processing", keywords="machine learning", keywords="electronic health records", doi="10.2196/16042", url="http://medinform.jmir.org/2020/1/e16042/", url="http://www.ncbi.nlm.nih.gov/pubmed/32012059" } @Article{info:doi/10.2196/16816, author="Wang, Jing and Deng, Huan and Liu, Bangtao and Hu, Anbin and Liang, Jun and Fan, Lingye and Zheng, Xu and Wang, Tong and Lei, Jianbo", title="Systematic Evaluation of Research Progress on Natural Language Processing in Medicine Over the Past 20 Years: Bibliometric Study on PubMed", journal="J Med Internet Res", year="2020", month="Jan", day="23", volume="22", number="1", pages="e16816", keywords="natural language processing", keywords="clinical", keywords="medicine", keywords="information extraction", keywords="electronic medical record", abstract="Background: Natural language processing (NLP) is an important traditional field in computer science, but its application in medical research has faced many challenges. With the extensive digitalization of medical information globally and increasing importance of understanding and mining big data in the medical field, NLP is becoming more crucial. Objective: The goal of the research was to perform a systematic review on the use of NLP in medical research with the aim of understanding the global progress on NLP research outcomes, content, methods, and study groups involved. Methods: A systematic review was conducted using the PubMed database as a search platform. All published studies on the application of NLP in medicine (except biomedicine) during the 20 years between 1999 and 2018 were retrieved. The data obtained from these published studies were cleaned and structured. Excel (Microsoft Corp) and VOSviewer (Nees Jan van Eck and Ludo Waltman) were used to perform bibliometric analysis of publication trends, author orders, countries, institutions, collaboration relationships, research hot spots, diseases studied, and research methods. Results: A total of 3498 articles were obtained during initial screening, and 2336 articles were found to meet the study criteria after manual screening. The number of publications increased every year, with a significant growth after 2012 (number of publications ranged from 148 to a maximum of 302 annually). The United States has occupied the leading position since the inception of the field, with the largest number of articles published. The United States contributed to 63.01\% (1472/2336) of all publications, followed by France (5.44\%, 127/2336) and the United Kingdom (3.51\%, 82/2336). The author with the largest number of articles published was Hongfang Liu (70), while St{\'e}phane Meystre (17) and Hua Xu (33) published the largest number of articles as the first and corresponding authors. Among the first author's affiliation institution, Columbia University published the largest number of articles, accounting for 4.54\% (106/2336) of the total. Specifically, approximately one-fifth (17.68\%, 413/2336) of the articles involved research on specific diseases, and the subject areas primarily focused on mental illness (16.46\%, 68/413), breast cancer (5.81\%, 24/413), and pneumonia (4.12\%, 17/413). Conclusions: NLP is in a period of robust development in the medical field, with an average of approximately 100 publications annually. Electronic medical records were the most used research materials, but social media such as Twitter have become important research materials since 2015. Cancer (24.94\%, 103/413) was the most common subject area in NLP-assisted medical research on diseases, with breast cancers (23.30\%, 24/103) and lung cancers (14.56\%, 15/103) accounting for the highest proportions of studies. Columbia University and the talents trained therein were the most active and prolific research forces on NLP in the medical field. ", doi="10.2196/16816", url="http://www.jmir.org/2020/1/e16816/", url="http://www.ncbi.nlm.nih.gov/pubmed/32012074" } @Article{info:doi/10.2196/14971, author="Hu, Baotian and Bajracharya, Adarsha and Yu, Hong", title="Generating Medical Assessments Using a Neural Network Model: Algorithm Development and Validation", journal="JMIR Med Inform", year="2020", month="Jan", day="15", volume="8", number="1", pages="e14971", keywords="electronic health record note", keywords="medical assessment generation", keywords="deep neural network model", keywords="artificial intelligence", keywords="natural language processing", abstract="Background: Since its inception, artificial intelligence has aimed to use computers to help make clinical diagnoses. Evidence-based medical reasoning is important for patient care. Inferring clinical diagnoses is a crucial step during the patient encounter. Previous works mainly used expert systems or machine learning--based methods to predict the International Classification of Diseases - Clinical Modification codes based on electronic health records. We report an alternative approach: inference of clinical diagnoses from patients' reported symptoms and physicians' clinical observations. Objective: We aimed to report a natural language processing system for generating medical assessments based on patient information described in the electronic health record (EHR) notes. Methods: We processed EHR notes into the Subjective, Objective, Assessment, and Plan sections. We trained a neural network model for medical assessment generation (N2MAG). Our N2MAG is an innovative deep neural model that uses the Subjective and Objective sections of an EHR note to automatically generate an ``expert-like'' assessment of the patient. N2MAG can be trained in an end-to-end fashion and does not require feature engineering and external knowledge resources. Results: We evaluated N2MAG and the baseline models both quantitatively and qualitatively. Evaluated by both the Recall-Oriented Understudy for Gisting Evaluation metrics and domain experts, our results show that N2MAG outperformed the existing state-of-the-art baseline models. Conclusions: N2MAG could generate a medical assessment from the Subject and Objective section descriptions in EHR notes. Future work will assess its potential for providing clinical decision support. ", doi="10.2196/14971", url="http://medinform.jmir.org/2020/1/e14971/", url="http://www.ncbi.nlm.nih.gov/pubmed/31939742" } @Article{info:doi/10.2196/13296, author="Mohammadhassanzadeh, Hossein and Sketris, Ingrid and Traynor, Robyn and Alexander, Susan and Winquist, Brandace and Stewart, Alan Samuel", title="Using Natural Language Processing to Examine the Uptake, Content, and Readability of Media Coverage of a Pan-Canadian Drug Safety Research Project: Cross-Sectional Observational Study", journal="JMIR Form Res", year="2020", month="Jan", day="14", volume="4", number="1", pages="e13296", keywords="natural language processing", keywords="mass media", keywords="readability", keywords="pharmacoepidemiology", keywords="knowledge translation", abstract="Background: Isotretinoin, for treating cystic acne, increases the risk of miscarriage and fetal abnormalities when taken during pregnancy. The Health Canada--approved product monograph for isotretinoin includes pregnancy prevention guidelines. A recent study by the Canadian Network for Observational Drug Effect Studies (CNODES) on the occurrence of pregnancy and pregnancy outcomes during isotretinoin therapy estimated poor adherence to these guidelines. Media uptake of this study was unknown; awareness of this uptake could help improve drug safety communication. Objective: The aim of this study was to understand how the media present pharmacoepidemiological research using the CNODES isotretinoin study as a case study. Methods: Google News was searched (April 25-May 6, 2016), using a predefined set of terms, for mention of the CNODES study. In total, 26 articles and 3 CNODES publications (original article, press release, and podcast) were identified. The article texts were cleaned (eg, advertisements and links removed), and the podcast was transcribed. A dictionary of 1295 unique words was created using natural language processing (NLP) techniques (term frequency-inverse document frequency, Porter stemming, and stop-word filtering) to identify common words and phrases. Similarity between the articles and reference publications was calculated using Euclidian distance; articles were grouped using hierarchical agglomerative clustering. Nine readability scales were applied to measure text readability based on factors such as number of words, difficult words, syllables, sentence counts, and other textual metrics. Results: The top 5 dictionary words were pregnancy (250 appearances), isotretinoin (220), study (209), drug (201), and women (185). Three distinct clusters were identified: Clusters 2 (5 articles) and 3 (4 articles) were from health-related websites and media, respectively; Cluster 1 (18 articles) contained largely media sources; 2 articles fell outside these clusters. Use of the term isotretinoin versus Accutane (a brand name of isotretinoin), discussion of pregnancy complications, and assignment of responsibility for guideline adherence varied between clusters. For example, the term pregnanc appeared most often in Clusters 1 (14.6 average times per article) and 2 (11.4) and relatively infrequently in Cluster 3 (1.8). Average readability for all articles was high (eg, Flesch-Kincaid, 13; Gunning Fog, 15; SMOG Index, 10; Coleman Liau Index, 15; Linsear Write Index, 13; and Text Standard, 13). Readability increased from Cluster 2 (Gunning Fog of 16.9) to 3 (12.2). It varied between clusters (average 13th-15th grade) but exceeded the recommended health information reading level (grade 6th to 8th), overall. Conclusions: Media interpretation of the CNODES study varied, with differences in synonym usage and areas of focus. All articles were written above the recommended health information reading level. Analyzing media using NLP techniques can help determine drug safety communication effectiveness. This project is important for understanding how drug safety studies are taken up and redistributed in the media. ", doi="10.2196/13296", url="https://formative.jmir.org/2020/1/e13296", url="http://www.ncbi.nlm.nih.gov/pubmed/31934872" } @Article{info:doi/10.2196/15645, author="Prieto, Tom{\'a}s Jos{\'e} and Scott, Kenneth and McEwen, Dean and Podewils, J. Laura and Al-Tayyib, Alia and Robinson, James and Edwards, David and Foldy, Seth and Shlay, C. Judith and Davidson, J. Arthur", title="The Detection of Opioid Misuse and Heroin Use From Paramedic Response Documentation: Machine Learning for Improved Surveillance", journal="J Med Internet Res", year="2020", month="Jan", day="3", volume="22", number="1", pages="e15645", keywords="naloxone", keywords="emergency medical services", keywords="natural language processing", keywords="heroin", keywords="substance-related disorders", keywords="opioid crisis", keywords="artificial intelligence", abstract="Background: Timely, precise, and localized surveillance of nonfatal events is needed to improve response and prevention of opioid-related problems in an evolving opioid crisis in the United States. Records of naloxone administration found in prehospital emergency medical services (EMS) data have helped estimate opioid overdose incidence, including nonhospital, field-treated cases. However, as naloxone is often used by EMS personnel in unconsciousness of unknown cause, attributing naloxone administration to opioid misuse and heroin use (OM) may misclassify events. Better methods are needed to identify OM. Objective: This study aimed to develop and test a natural language processing method that would improve identification of potential OM from paramedic documentation. Methods: First, we searched Denver Health paramedic trip reports from August 2017 to April 2018 for keywords naloxone, heroin, and both combined, and we reviewed narratives of identified reports to determine whether they constituted true cases of OM. Then, we used this human classification as reference standard and trained 4 machine learning models (random forest, k-nearest neighbors, support vector machines, and L1-regularized logistic regression). We selected the algorithm that produced the highest area under the receiver operating curve (AUC) for model assessment. Finally, we compared positive predictive value (PPV) of the highest performing machine learning algorithm with PPV of searches of keywords naloxone, heroin, and combination of both in the binary classification of OM in unseen September 2018 data. Results: In total, 54,359 trip reports were filed from August 2017 to April 2018. Approximately 1.09\% (594/54,359) indicated naloxone administration. Among trip reports with reviewer agreement regarding OM in the narrative, 57.6\% (292/516) were considered to include information revealing OM. Approximately 1.63\% (884/54,359) of all trip reports mentioned heroin in the narrative. Among trip reports with reviewer agreement, 95.5\% (784/821) were considered to include information revealing OM. Combined results accounted for 2.39\% (1298/54,359) of trip reports. Among trip reports with reviewer agreement, 77.79\% (907/1166) were considered to include information consistent with OM. The reference standard used to train and test machine learning models included details of 1166 trip reports. L1-regularized logistic regression was the highest performing algorithm (AUC=0.94; 95\% CI 0.91-0.97) in identifying OM. Tested on 5983 unseen reports from September 2018, the keyword naloxone inaccurately identified and underestimated probable OM trip report cases (63 cases; PPV=0.68). The keyword heroin yielded more cases with improved performance (129 cases; PPV=0.99). Combined keyword and L1-regularized logistic regression classifier further improved performance (146 cases; PPV=0.99). Conclusions: A machine learning application enhanced the effectiveness of finding OM among documented paramedic field responses. This approach to refining OM surveillance may lead to improved first-responder and public health responses toward prevention of overdoses and other opioid-related problems in US communities. ", doi="10.2196/15645", url="https://www.jmir.org/2020/1/e15645", url="http://www.ncbi.nlm.nih.gov/pubmed/31899451" } @Article{info:doi/10.2196/15684, author="Hua, My and Sadah, Shouq and Hristidis, Vagelis and Talbot, Prue", title="Health Effects Associated With Electronic Cigarette Use: Automated Mining of Online Forums", journal="J Med Internet Res", year="2020", month="Jan", day="3", volume="22", number="1", pages="e15684", keywords="electronic cigarettes", keywords="vaping epidemic", keywords="vaping-associated pulmonary illness", keywords="e-cigarettes", keywords="electronic nicotine delivery devices", keywords="health effects", keywords="nicotine", keywords="symptoms", keywords="disorders", keywords="pulmonary disease", keywords="pneumonia", keywords="headaches", keywords="content analysis", keywords="text classification", keywords="e-cigarette, or vaping, product use associated lung injury", abstract="Background: Our previous infodemiological study was performed by manually mining health-effect data associated with electronic cigarettes (ECs) from online forums. Manual mining is time consuming and limits the number of posts that can be retrieved. Objective: Our goal in this study was to automatically extract and analyze a large number (>41,000) of online forum posts related to the health effects associated with EC use between 2008 and 2015. Methods: Data were annotated with medical concepts from the Unified Medical Language System using a modified version of the MetaMap tool. Of over 1.4 million posts, 41,216 were used to analyze symptoms (undiagnosed conditions) and disorders (physician-diagnosed terminology) associated with EC use. For each post, sentiment (positive, negative, and neutral) was also assigned. Results: Symptom and disorder data were categorized into 12 organ systems or anatomical regions. Most posts on symptoms and disorders contained negative sentiment, and affected systems were similar across all years. Health effects were reported most often in the neurological, mouth and throat, and respiratory systems. The most frequently reported symptoms and disorders were headache (n=939), coughing (n=852), malaise (n=468), asthma (n=916), dehydration (n=803), and pharyngitis (n=565). In addition, users often reported linked symptoms (eg, coughing and headache). Conclusions: Online forums are a valuable repository of data that can be used to identify positive and negative health effects associated with EC use. By automating extraction of online information, we obtained more data than in our prior study, identified new symptoms and disorders associated with EC use, determined which systems are most frequently adversely affected, identified specific symptoms and disorders most commonly reported, and tracked health effects over 7 years. ", doi="10.2196/15684", url="https://www.jmir.org/2020/1/e15684", url="http://www.ncbi.nlm.nih.gov/pubmed/31899452" } @Article{info:doi/10.2196/14782, author="Wu, Honghan and Hodgson, Karen and Dyson, Sue and Morley, I. Katherine and Ibrahim, M. Zina and Iqbal, Ehtesham and Stewart, Robert and Dobson, JB Richard and Sudlow, Cathie", title="Efficient Reuse of Natural Language Processing Models for Phenotype-Mention Identification in Free-text Electronic Medical Records: A Phenotype Embedding Approach", journal="JMIR Med Inform", year="2019", month="Dec", day="17", volume="7", number="4", pages="e14782", keywords="natural language processing", keywords="text mining", keywords="phenotype", keywords="word embedding", keywords="phenotype embedding", keywords="model adaptation", keywords="electronic health records", keywords="machine learning", keywords="clustering", abstract="Background: Much effort has been put into the use of automated approaches, such as natural language processing (NLP), to mine or extract data from free-text medical records in order to construct comprehensive patient profiles for delivering better health care. Reusing NLP models in new settings, however, remains cumbersome, as it requires validation and retraining on new data iteratively to achieve convergent results. Objective: The aim of this work is to minimize the effort involved in reusing NLP models on free-text medical records. Methods: We formally define and analyze the model adaptation problem in phenotype-mention identification tasks. We identify ``duplicate waste'' and ``imbalance waste,'' which collectively impede efficient model reuse. We propose a phenotype embedding--based approach to minimize these sources of waste without the need for labelled data from new settings. Results: We conduct experiments on data from a large mental health registry to reuse NLP models in four phenotype-mention identification tasks. The proposed approach can choose the best model for a new task, identifying up to 76\% waste (duplicate waste), that is, phenotype mentions without the need for validation and model retraining and with very good performance (93\%-97\% accuracy). It can also provide guidance for validating and retraining the selected model for novel language patterns in new tasks, saving around 80\% waste (imbalance waste), that is, the effort required in ``blind'' model-adaptation approaches. Conclusions: Adapting pretrained NLP models for new tasks can be more efficient and effective if the language pattern landscapes of old settings and new settings can be made explicit and comparable. Our experiments show that the phenotype-mention embedding approach is an effective way to model language patterns for phenotype-mention identification tasks and that its use can guide efficient NLP model reuse. ", doi="10.2196/14782", url="http://medinform.jmir.org/2019/4/e14782/", url="http://www.ncbi.nlm.nih.gov/pubmed/31845899" } @Article{info:doi/10.2196/13430, author="Afzal, Muhammad and Hussain, Maqbool and Malik, Mahmood Khalid and Lee, Sungyoung", title="Impact of Automatic Query Generation and Quality Recognition Using Deep Learning to Curate Evidence From Biomedical Literature: Empirical Study", journal="JMIR Med Inform", year="2019", month="Dec", day="9", volume="7", number="4", pages="e13430", keywords="data curation", keywords="evidence-based medicine", keywords="clinical decision support systems", keywords="precision medicine", keywords="biomedical research", keywords="machine learning", keywords="deep learning", abstract="Background: The quality of health care is continuously improving and is expected to improve further because of the advancement of machine learning and knowledge-based techniques along with innovation and availability of wearable sensors. With these advancements, health care professionals are now becoming more interested and involved in seeking scientific research evidence from external sources for decision making relevant to medical diagnosis, treatments, and prognosis. Not much work has been done to develop methods for unobtrusive and seamless curation of data from the biomedical literature. Objective: This study aimed to design a framework that can enable bringing quality publications intelligently to the users' desk to assist medical practitioners in answering clinical questions and fulfilling their informational needs. Methods: The proposed framework consists of methods for efficient biomedical literature curation, including the automatic construction of a well-built question, the recognition of evidence quality by proposing extended quality recognition model (E-QRM), and the ranking and summarization of the extracted evidence. Results: Unlike previous works, the proposed framework systematically integrates the echelons of biomedical literature curation by including methods for searching queries, content quality assessments, and ranking and summarization. Using an ensemble approach, our high-impact classifier E-QRM obtained significantly improved accuracy than the existing quality recognition model (1723/1894, 90.97\% vs 1462/1894, 77.21\%). Conclusions: Our proposed methods and evaluation demonstrate the validity and rigorousness of the results, which can be used in different applications, including evidence-based medicine, precision medicine, and medical education. ", doi="10.2196/13430", url="http://medinform.jmir.org/2019/4/e13430/", url="http://www.ncbi.nlm.nih.gov/pubmed/31815673" } @Article{info:doi/10.2196/14809, author="Timimi, Farris and Ray, Sara and Jones, Erik and Aase, Lee and Hoffman, Kathleen", title="Patient-Reported Outcomes in Online Communications on Statins, Memory, and Cognition: Qualitative Analysis Using Online Communities", journal="J Med Internet Res", year="2019", month="Nov", day="28", volume="21", number="11", pages="e14809", keywords="social media", keywords="hydroxymethylglutaryl-CoA reductase inhibitors", keywords="drug-related side effects and adverse reactions", keywords="memory loss", keywords="PROMs", keywords="pharmacovigilance", keywords="infodemiology", keywords="infoveillance", keywords="peer-support groups", abstract="Background: In drug development clinical trials, there is a need for balance between restricting variables by setting eligibility criteria and representing the broader patient population that may use a product once it is approved. Similarly, although recent policy initiatives focusing on the inclusion of historically underrepresented groups are being implemented, barriers still remain. These limitations of clinical trials may mask potential product benefits and side effects. To bridge these gaps, online communication in health communities may serve as an additional population signal for drug side effects. Objective: The aim of this study was to employ a nontraditional dataset to identify drug side-effect signals. The study was designed to apply both natural language processing (NLP) technology and hands-on linguistic analysis to a set of online posts from known statin users to (1) identify any underlying crossover between the use of statins and impairment of memory or cognition and (2) obtain patient lexicon in their descriptions of experiences with statin medications and memory changes. Methods: Researchers utilized user-generated content on Inspire, looking at over 11 million posts across Inspire. Posts were written by patients and caregivers belonging to a variety of communities on Inspire. After identifying these posts, researchers used NLP and hands-on linguistic analysis to draw and expand upon correlations among statin use, memory, and cognition. Results: NLP analysis of posts identified statistical correlations between statin users and the discussion of memory impairment, which were not observed in control groups. NLP found that, out of all members on Inspire, 3.1\% had posted about memory or cognition. In a control group of those who had posted about TNF inhibitors, 6.2\% had also posted about memory and cognition. In comparison, of all those who had posted about a statin medication, 22.6\% (P<.001) also posted about memory and cognition. Furthermore, linguistic analysis of a sample of posts provided themes and context to these statistical findings. By looking at posts from statin users about memory, four key themes were found and described in detail in the data: memory loss, aphasia, cognitive impairment, and emotional change. Conclusions: Correlations from this study point to a need for further research on the impact of statins on memory and cognition. Furthermore, when using nontraditional datasets, such as online communities, NLP and linguistic methodologies broaden the population for identifying side-effect signals. For side effects such as those on memory and cognition, where self-reporting may be unreliable, these methods can provide another avenue to inform patients, providers, and the Food and Drug Administration. ", doi="10.2196/14809", url="http://www.jmir.org/2019/11/e14809/", url="http://www.ncbi.nlm.nih.gov/pubmed/31778117" } @Article{info:doi/10.2196/14502, author="Lai, Po-Ting and Lu, Wei-Liang and Kuo, Ting-Rung and Chung, Chia-Ru and Han, Jen-Chieh and Tsai, Tzong-Han Richard and Horng, Jorng-Tzong", title="Using a Large Margin Context-Aware Convolutional Neural Network to Automatically Extract Disease-Disease Association from Literature: Comparative Analytic Study", journal="JMIR Med Inform", year="2019", month="Nov", day="26", volume="7", number="4", pages="e14502", keywords="deep learning", keywords="disease-disease association", keywords="biological relation extraction", keywords="convolutional neural networks", keywords="biomedical natural language processing", abstract="Background: Research on disease-disease association (DDA), like comorbidity and complication, provides important insights into disease treatment and drug discovery, and a large body of the literature has been published in the field. However, using current search tools, it is not easy for researchers to retrieve information on the latest DDA findings. First, comorbidity and complication keywords pull up large numbers of PubMed studies. Second, disease is not highlighted in search results. Finally, DDA is not identified, as currently no disease-disease association extraction (DDAE) dataset or tools are available. Objective: As there are no available DDAE datasets or tools, this study aimed to develop (1) a DDAE dataset and (2) a neural network model for extracting DDA from the literature. Methods: In this study, we formulated DDAE as a supervised machine learning classification problem. To develop the system, we first built a DDAE dataset. We then employed two machine learning models, support vector machine and convolutional neural network, to extract DDA. Furthermore, we evaluated the effect of using the output layer as features of the support vector machine-based model. Finally, we implemented large margin context-aware convolutional neural network architecture to integrate context features and convolutional neural networks through the large margin function. Results: Our DDAE dataset consisted of 521 PubMed abstracts. Experiment results showed that the support vector machine-based approach achieved an F1 measure of 80.32\%, which is higher than the convolutional neural network-based approach (73.32\%). Using the output layer of convolutional neural network as a feature for the support vector machine does not further improve the performance of support vector machine. However, our large margin context-aware-convolutional neural network achieved the highest F1 measure of 84.18\% and demonstrated that combining the hinge loss function of support vector machine with a convolutional neural network into a single neural network architecture outperforms other approaches. Conclusions: To facilitate the development of text-mining research for DDAE, we developed the first publicly available DDAE dataset consisting of disease mentions, Medical Subject Heading IDs, and relation annotations. We developed different conventional machine learning models and neural network architectures and evaluated their effects on our DDAE dataset. To further improve DDAE performance, we propose an large margin context-aware-convolutional neural network model for DDAE that outperforms other approaches. ", doi="10.2196/14502", url="http://medinform.jmir.org/2019/4/e14502/", url="http://www.ncbi.nlm.nih.gov/pubmed/31769759" } @Article{info:doi/10.2196/14285, author="Booth, Alison and Bell, Timothy and Halhol, Sonia and Pan, Shiyu and Welch, Verna and Merinopoulou, Evie and Lambrelli, Dimitra and Cox, Andrew", title="Using Social Media to Uncover Treatment Experiences and Decisions in Patients With Acute Myeloid Leukemia or Myelodysplastic Syndrome Who Are Ineligible for Intensive Chemotherapy: Patient-Centric Qualitative Data Analysis", journal="J Med Internet Res", year="2019", month="Nov", day="22", volume="21", number="11", pages="e14285", keywords="social media", keywords="health-related quality of life", keywords="patient-centric", keywords="leukemia", keywords="myeloid", keywords="acute", keywords="myelodysplastic syndromes", keywords="natural language processing", keywords="patient preference", keywords="qualitative research", abstract="Background: Until recently, treatment options were limited for patients with acute myeloid leukemia and myelodysplastic syndrome (AML and MDS) who are ineligible for intensive chemotherapy. Owing to the condition's rapid progression, it is difficult to identify what is most important to patients when making treatment decisions. Patients' needs can be better addressed by gaining a deeper understanding of their perspectives, which is valuable in the decision-making process. The Food and Drug Administration recently encouraged the use of social media as a tool to gain insight on patients' perspectives regarding symptoms experienced and the impacts of their disease. Objective: This study aimed to use disease-specific social media posts by patients with AML or MDS who are ineligible for intensive chemotherapy and their caregivers to capture factors they feel are most important, and to provide current evidence to inform and characterize these perspectives. Methods: Posts by patients with AML or MDS and their caregivers were extracted from publicly available discussions on 3 large AML- or MDS--specific sites. These posts were manually reviewed to only include patients who are ineligible for intensive chemotherapy. A total of 1443 posts from 220 AML patients/caregivers and 2733 posts from 127 MDS patients/caregivers met the study inclusion criteria. A qualitative data analysis (QDA) of a sample of 85 patients'/caregivers' posts was conducted to identify themes, and a targeted QDA of posts from 79 users focused on treatment decision discussions. Posts were manually reviewed, and relevant text segments were coded and grouped into categories and overall themes. Results: Eighty-six percent (73/85) of users in the overall QDA had relevant information about the key objectives. The most commonly discussed treatment experience theme was the humanistic burden of AML or MDS in terms of emotional/physical impact and impact on family (86\%, 63/73 of users), followed by treatment decisions (56\%, 41/73) and unmet needs (50\%, 37/73). In the QDA of treatment decisions, 60 posts from 45 users contained relevant information. Patients commonly reported the desire to reach specific milestones, including birthdays and weddings. They wished for a better quality of life over quantity of life, did not want the risk of suffering from side effects, and expressed a clear preference to be at home rather than in a hospital or care home. Conclusions: This study was a novel application of disease-specific social media. It highlighted experiences in the current treatment of AML and MDS, including information gaps, patient/caregiver uncertainty, and the importance of understanding patients'/caregivers' goals and opinions. A clear finding from this research was the importance of reaching certain personal life goals and being at home with family and friends. The analysis showed that patients/caregivers face additional challenges, including humanistic impacts and a lack of information regarding treatment options. ", doi="10.2196/14285", url="http://www.jmir.org/2019/11/e14285/", url="http://www.ncbi.nlm.nih.gov/pubmed/31755871" } @Article{info:doi/10.2196/14850, author="Jiang, Min and Sanger, Todd and Liu, Xiong", title="Combining Contextualized Embeddings and Prior Knowledge for Clinical Named Entity Recognition: Evaluation Study", journal="JMIR Med Inform", year="2019", month="Nov", day="13", volume="7", number="4", pages="e14850", keywords="natural language processing", keywords="named entity recognition", keywords="deep learning", keywords="contextualized word embedding", keywords="semantic embedding", keywords="prior knowledge", abstract="Background: Named entity recognition (NER) is a key step in clinical natural language processing (NLP). Traditionally, rule-based systems leverage prior knowledge to define rules to identify named entities. Recently, deep learning--based NER systems have become more and more popular. Contextualized word embedding, as a new type of representation of the word, has been proposed to dynamically capture word sense using context information and has proven successful in many deep learning--based systems in either general domain or medical domain. However, there are very few studies that investigate the effects of combining multiple contextualized embeddings and prior knowledge on the clinical NER task. Objective: This study aims to improve the performance of NER in clinical text by combining multiple contextual embeddings and prior knowledge. Methods: In this study, we investigate the effects of combining multiple contextualized word embeddings with classic word embedding in deep neural networks to predict named entities in clinical text. We also investigate whether using a semantic lexicon could further improve the performance of the clinical NER system. Results: By combining contextualized embeddings such as ELMo and Flair, our system achieves the F-1 score of 87.30\% when only training based on a portion of the 2010 Informatics for Integrating Biology and the Bedside NER task dataset. After incorporating the medical lexicon into the word embedding, the F-1 score was further increased to 87.44\%. Another finding was that our system still could achieve an F-1 score of 85.36\% when the size of the training data was reduced to 40\%. Conclusions: Combined contextualized embedding could be beneficial for the clinical NER task. Moreover, the semantic lexicon could be used to further improve the performance of the clinical NER system. ", doi="10.2196/14850", url="http://medinform.jmir.org/2019/4/e14850/", url="http://www.ncbi.nlm.nih.gov/pubmed/31719024" } @Article{info:doi/10.2196/14340, author="Jin, Yonghao and Li, Fei and Vimalananda, G. Varsha and Yu, Hong", title="Automatic Detection of Hypoglycemic Events From the Electronic Health Record Notes of Diabetes Patients: Empirical Study", journal="JMIR Med Inform", year="2019", month="Nov", day="8", volume="7", number="4", pages="e14340", keywords="natural language processing", keywords="convolutional neural networks", keywords="hypoglycemia", keywords="adverse events", abstract="Background: Hypoglycemic events are common and potentially dangerous conditions among patients being treated for diabetes. Automatic detection of such events could improve patient care and is valuable in population studies. Electronic health records (EHRs) are valuable resources for the detection of such events. Objective: In this study, we aim to develop a deep-learning--based natural language processing (NLP) system to automatically detect hypoglycemic events from EHR notes. Our model is called the High-Performing System for Automatically Detecting Hypoglycemic Events (HYPE). Methods: Domain experts reviewed 500 EHR notes of diabetes patients to determine whether each sentence contained a hypoglycemic event or not. We used this annotated corpus to train and evaluate HYPE, the high-performance NLP system for hypoglycemia detection. We built and evaluated both a classical machine learning model (ie, support vector machines [SVMs]) and state-of-the-art neural network models. Results: We found that neural network models outperformed the SVM model. The convolutional neural network (CNN) model yielded the highest performance in a 10-fold cross-validation setting: mean precision=0.96 (SD 0.03), mean recall=0.86 (SD 0.03), and mean F1=0.91 (SD 0.03). Conclusions: Despite the challenges posed by small and highly imbalanced data, our CNN-based HYPE system still achieved a high performance for hypoglycemia detection. HYPE can be used for EHR-based hypoglycemia surveillance and population studies in diabetes patients. ", doi="10.2196/14340", url="http://medinform.jmir.org/2019/4/e14340/", url="http://www.ncbi.nlm.nih.gov/pubmed/31702562" } @Article{info:doi/10.2196/12575, author="Petch, Jeremy and Batt, Jane and Murray, Joshua and Mamdani, Muhammad", title="Extracting Clinical Features From Dictated Ambulatory Consult Notes Using a Commercially Available Natural Language Processing Tool: Pilot, Retrospective, Cross-Sectional Validation Study", journal="JMIR Med Inform", year="2019", month="Nov", day="1", volume="7", number="4", pages="e12575", keywords="natural language processing", keywords="electronic health record", keywords="tuberculosis", abstract="Background: The increasing adoption of electronic health records (EHRs) in clinical practice holds the promise of improving care and advancing research by serving as a rich source of data, but most EHRs allow clinicians to enter data in a text format without much structure. Natural language processing (NLP) may reduce reliance on manual abstraction of these text data by extracting clinical features directly from unstructured clinical digital text data and converting them into structured data. Objective: This study aimed to assess the performance of a commercially available NLP tool for extracting clinical features from free-text consult notes. Methods: We conducted a pilot, retrospective, cross-sectional study of the accuracy of NLP from dictated consult notes from our tuberculosis clinic with manual chart abstraction as the reference standard. Consult notes for 130 patients were extracted and processed using NLP. We extracted 15 clinical features from these consult notes and grouped them a priori into categories of simple, moderate, and complex for analysis. Results: For the primary outcome of overall accuracy, NLP performed best for features classified as simple, achieving an overall accuracy of 96\% (95\% CI 94.3-97.6). Performance was slightly lower for features of moderate clinical and linguistic complexity at 93\% (95\% CI 91.1-94.4), and lowest for complex features at 91\% (95\% CI 87.3-93.1). Conclusions: The findings of this study support the use of NLP for extracting clinical features from dictated consult notes in the setting of a tuberculosis clinic. Further research is needed to fully establish the validity of NLP for this and other purposes. ", doi="10.2196/12575", url="http://medinform.jmir.org/2019/4/e12575/", url="http://www.ncbi.nlm.nih.gov/pubmed/31682579" } @Article{info:doi/10.2196/15980, author="Spasic, Irena and Krzeminski, Dominik and Corcoran, Padraig and Balinsky, Alexander", title="Cohort Selection for Clinical Trials From Longitudinal Patient Records: Text Mining Approach", journal="JMIR Med Inform", year="2019", month="Oct", day="31", volume="7", number="4", pages="e15980", keywords="natural language processing", keywords="machine learning", keywords="electronic medical records", keywords="clinical trial", keywords="eligibility determination", abstract="Background: Clinical trials are an important step in introducing new interventions into clinical practice by generating data on their safety and efficacy. Clinical trials need to ensure that participants are similar so that the findings can be attributed to the interventions studied and not to some other factors. Therefore, each clinical trial defines eligibility criteria, which describe characteristics that must be shared by the participants. Unfortunately, the complexities of eligibility criteria may not allow them to be translated directly into readily executable database queries. Instead, they may require careful analysis of the narrative sections of medical records. Manual screening of medical records is time consuming, thus negatively affecting the timeliness of the recruitment process. Objective: Track 1 of the 2018 National Natural Language Processing Clinical Challenge focused on the task of cohort selection for clinical trials, aiming to answer the following question: Can natural language processing be applied to narrative medical records to identify patients who meet eligibility criteria for clinical trials? The task required the participating systems to analyze longitudinal patient records to determine if the corresponding patients met the given eligibility criteria. We aimed to describe a system developed to address this task. Methods: Our system consisted of 13 classifiers, one for each eligibility criterion. All classifiers used a bag-of-words document representation model. To prevent the loss of relevant contextual information associated with such representation, a pattern-matching approach was used to extract context-sensitive features. They were embedded back into the text as lexically distinguishable tokens, which were consequently featured in the bag-of-words representation. Supervised machine learning was chosen wherever a sufficient number of both positive and negative instances was available to learn from. A rule-based approach focusing on a small set of relevant features was chosen for the remaining criteria. Results: The system was evaluated using microaveraged F measure. Overall, 4 machine algorithms, including support vector machine, logistic regression, na{\"i}ve Bayesian classifier, and gradient tree boosting (GTB), were evaluated on the training data using 10--fold cross-validation. Overall, GTB demonstrated the most consistent performance. Its performance peaked when oversampling was used to balance the training data. The final evaluation was performed on previously unseen test data. On average, the F measure of 89.04\% was comparable to 3 of the top ranked performances in the shared task (91.11\%, 90.28\%, and 90.21\%). With an F measure of 88.14\%, we significantly outperformed these systems (81.03\%, 78.50\%, and 70.81\%) in identifying patients with advanced coronary artery disease. Conclusions: The holdout evaluation provides evidence that our system was able to identify eligible patients for the given clinical trial with high accuracy. Our approach demonstrates how rule-based knowledge infusion can improve the performance of machine learning algorithms even when trained on a relatively small dataset. ", doi="10.2196/15980", url="http://medinform.jmir.org/2019/4/e15980/", url="http://www.ncbi.nlm.nih.gov/pubmed/31674914" } @Article{info:doi/10.2196/14830, author="Li, Fei and Jin, Yonghao and Liu, Weisong and Rawat, Singh Bhanu Pratap and Cai, Pengshan and Yu, Hong", title="Fine-Tuning Bidirectional Encoder Representations From Transformers (BERT)--Based Models on Large-Scale Electronic Health Record Notes: An Empirical Study", journal="JMIR Med Inform", year="2019", month="Sep", day="12", volume="7", number="3", pages="e14830", keywords="natural language processing", keywords="entity normalization", keywords="deep learning", keywords="electronic health record note", keywords="BERT", abstract="Background: The bidirectional encoder representations from transformers (BERT) model has achieved great success in many natural language processing (NLP) tasks, such as named entity recognition and question answering. However, little prior work has explored this model to be used for an important task in the biomedical and clinical domains, namely entity normalization. Objective: We aim to investigate the effectiveness of BERT-based models for biomedical or clinical entity normalization. In addition, our second objective is to investigate whether the domains of training data influence the performances of BERT-based models as well as the degree of influence. Methods: Our data was comprised of 1.5 million unlabeled electronic health record (EHR) notes. We first fine-tuned BioBERT on this large collection of unlabeled EHR notes. This generated our BERT-based model trained using 1.5 million electronic health record notes (EhrBERT). We then further fine-tuned EhrBERT, BioBERT, and BERT on three annotated corpora for biomedical and clinical entity normalization: the Medication, Indication, and Adverse Drug Events (MADE) 1.0 corpus, the National Center for Biotechnology Information (NCBI) disease corpus, and the Chemical-Disease Relations (CDR) corpus. We compared our models with two state-of-the-art normalization systems, namely MetaMap and disease name normalization (DNorm). Results: EhrBERT achieved 40.95\% F1 in the MADE 1.0 corpus for mapping named entities to the Medical Dictionary for Regulatory Activities and the Systematized Nomenclature of Medicine---Clinical Terms (SNOMED-CT), which have about 380,000 terms. In this corpus, EhrBERT outperformed MetaMap by 2.36\% in F1. For the NCBI disease corpus and CDR corpus, EhrBERT also outperformed DNorm by improving the F1 scores from 88.37\% and 89.92\% to 90.35\% and 93.82\%, respectively. Compared with BioBERT and BERT, EhrBERT outperformed them on the MADE 1.0 corpus and the CDR corpus. Conclusions: Our work shows that BERT-based models have achieved state-of-the-art performance for biomedical and clinical entity normalization. BERT-based models can be readily fine-tuned to normalize any kind of named entities. ", doi="10.2196/14830", url="http://medinform.jmir.org/2019/3/e14830/", url="http://www.ncbi.nlm.nih.gov/pubmed/31516126" } @Article{info:doi/10.2196/13802, author="Hatef, Elham and Rouhizadeh, Masoud and Tia, Iddrisu and Lasser, Elyse and Hill-Briggs, Felicia and Marsteller, Jill and Kharrazi, Hadi", title="Assessing the Availability of Data on Social and Behavioral Determinants in Structured and Unstructured Electronic Health Records: A Retrospective Analysis of a Multilevel Health Care System", journal="JMIR Med Inform", year="2019", month="Aug", day="02", volume="7", number="3", pages="e13802", keywords="social and behavioral determinants of health", keywords="electronic health record", keywords="structured data", keywords="unstructured data", keywords="natural language processing", keywords="multi-level health care system", abstract="Background: Most US health care providers have adopted electronic health records (EHRs) that facilitate the uniform collection of clinical information. However, standardized data formats to capture social and behavioral determinants of health (SBDH) in structured EHR fields are still evolving and not adopted widely. Consequently, at the point of care, SBDH data are often documented within unstructured EHR fields that require time-consuming and subjective methods to retrieve. Meanwhile, collecting SBDH data using traditional surveys on a large sample of patients is infeasible for health care providers attempting to rapidly incorporate SBDH data in their population health management efforts. A potential approach to facilitate targeted SBDH data collection is applying information extraction methods to EHR data to prescreen the population for identification of immediate social needs. Objective: Our aim was to examine the availability and characteristics of SBDH data captured in the EHR of a multilevel academic health care system that provides both inpatient and outpatient care to patients with varying SBDH across Maryland. Methods: We measured the availability of selected patient-level SBDH in both structured and unstructured EHR data. We assessed various SBDH including demographics, preferred language, alcohol use, smoking status, social connection and/or isolation, housing issues, financial resource strains, and availability of a home address. EHR's structured data were represented by information collected between January 2003 and June 2018 from 5,401,324 patients. EHR's unstructured data represented information captured for 1,188,202 patients between July 2016 and May 2018 (a shorter time frame because of limited availability of consistent unstructured data). We used text-mining techniques to extract a subset of SBDH factors from EHR's unstructured data. Results: We identified a valid address or zip code for 5.2 million (95.00\%) of approximately 5.4 million patients. Ethnicity was captured for 2.7 million (50.00\%), whereas race was documented for 4.9 million (90.00\%) and a preferred language for 2.7 million (49.00\%) patients. Information regarding alcohol use and smoking status was coded for 490,348 (9.08\%) and 1,728,749 (32.01\%) patients, respectively. Using the International Classification of Diseases--10th Revision diagnoses codes, we identified 35,171 (0.65\%) patients with information related to social connection/isolation, 10,433 (0.19\%) patients with housing issues, and 3543 (0.07\%) patients with income/financial resource strain. Of approximately 1.2 million unique patients with unstructured data, 30,893 (2.60\%) had at least one clinical note containing phrases referring to social connection/isolation, 35,646 (3.00\%) included housing issues, and 11,882 (1.00\%) had mentions of financial resource strain. Conclusions: Apart from demographics, SBDH data are not regularly collected for patients. Health care providers should assess the availability and characteristics of SBDH data in EHRs. Evaluating the quality of SBDH data can potentially enable health care providers to modify underlying workflows to improve the documentation, collection, and extraction of SBDH data from EHRs. ", doi="10.2196/13802", url="http://medinform.jmir.org/2019/3/e13802/", url="http://www.ncbi.nlm.nih.gov/pubmed/31376277" } @Article{info:doi/10.2196/12310, author="Dynomant, Emeric and Lelong, Romain and Dahamna, Badisse and Massonnaud, Cl{\'e}ment and Kerdelhu{\'e}, Ga{\'e}tan and Grosjean, Julien and Canu, St{\'e}phane and Darmoni, J. Stefan", title="Word Embedding for the French Natural Language in Health Care: Comparative Study", journal="JMIR Med Inform", year="2019", month="Jul", day="29", volume="7", number="3", pages="e12310", keywords="natural language processing", keywords="data mining", keywords="data curation", abstract="Background: Word embedding technologies, a set of language modeling and feature learning techniques in natural language processing (NLP), are now used in a wide range of applications. However, no formal evaluation and comparison have been made on the ability of each of the 3 current most famous unsupervised implementations (Word2Vec, GloVe, and FastText) to keep track of the semantic similarities existing between words, when trained on the same dataset. Objective: The aim of this study was to compare embedding methods trained on a corpus of French health-related documents produced in a professional context. The best method will then help us develop a new semantic annotator. Methods: Unsupervised embedding models have been trained on 641,279 documents originating from the Rouen University Hospital. These data are not structured and cover a wide range of documents produced in a clinical setting (discharge summary, procedure reports, and prescriptions). In total, 4 rated evaluation tasks were defined (cosine similarity, odd one, analogy-based operations, and human formal evaluation) and applied on each model, as well as embedding visualization. Results: Word2Vec had the highest score on 3 out of 4 rated tasks (analogy-based operations, odd one similarity, and human validation), particularly regarding the skip-gram architecture. Conclusions: Although this implementation had the best rate for semantic properties conservation, each model has its own qualities and defects, such as the training time, which is very short for GloVe, or morphological similarity conservation observed with FastText. Models and test sets produced by this study will be the first to be publicly available through a graphical interface to help advance the French biomedical research. ", doi="10.2196/12310", url="https://medinform.jmir.org/2019/3/e12310/", url="http://www.ncbi.nlm.nih.gov/pubmed/31359873" } @Article{info:doi/10.2196/14499, author="Lin, Chin and Lou, Yu-Sheng and Tsai, Dung-Jang and Lee, Chia-Cheng and Hsu, Chia-Jung and Wu, Ding-Chung and Wang, Mei-Chuen and Fang, Wen-Hui", title="Projection Word Embedding Model With Hybrid Sampling Training for Classifying ICD-10-CM Codes: Longitudinal Observational Study", journal="JMIR Med Inform", year="2019", month="Jul", day="23", volume="7", number="3", pages="e14499", keywords="word embedding", keywords="convolutional neural network", keywords="artificial intelligence", keywords="natural language processing", keywords="electronic health records", abstract="Background: Most current state-of-the-art models for searching the International Classification of Diseases, Tenth Revision Clinical Modification (ICD-10-CM) codes use word embedding technology to capture useful semantic properties. However, they are limited by the quality of initial word embeddings. Word embedding trained by electronic health records (EHRs) is considered the best, but the vocabulary diversity is limited by previous medical records. Thus, we require a word embedding model that maintains the vocabulary diversity of open internet databases and the medical terminology understanding of EHRs. Moreover, we need to consider the particularity of the disease classification, wherein discharge notes present only positive disease descriptions. Objective: We aimed to propose a projection word2vec model and a hybrid sampling method. In addition, we aimed to conduct a series of experiments to validate the effectiveness of these methods. Methods: We compared the projection word2vec model and traditional word2vec model using two corpora sources: English Wikipedia and PubMed journal abstracts. We used seven published datasets to measure the medical semantic understanding of the word2vec models and used these embeddings to identify the three--character-level ICD-10-CM diagnostic codes in a set of discharge notes. On the basis of embedding technology improvement, we also tried to apply the hybrid sampling method to improve accuracy. The 94,483 labeled discharge notes from the Tri-Service General Hospital of Taipei, Taiwan, from June 1, 2015, to June 30, 2017, were used. To evaluate the model performance, 24,762 discharge notes from July 1, 2017, to December 31, 2017, from the same hospital were used. Moreover, 74,324 additional discharge notes collected from seven other hospitals were tested. The F-measure, which is the major global measure of effectiveness, was adopted. Results: In medical semantic understanding, the original EHR embeddings and PubMed embeddings exhibited superior performance to the original Wikipedia embeddings. After projection training technology was applied, the projection Wikipedia embeddings exhibited an obvious improvement but did not reach the level of original EHR embeddings or PubMed embeddings. In the subsequent ICD-10-CM coding experiment, the model that used both projection PubMed and Wikipedia embeddings had the highest testing mean F-measure (0.7362 and 0.6693 in Tri-Service General Hospital and the seven other hospitals, respectively). Moreover, the hybrid sampling method was found to improve the model performance (F-measure=0.7371/0.6698). Conclusions: The word embeddings trained using EHR and PubMed could understand medical semantics better, and the proposed projection word2vec model improved the ability of medical semantics extraction in Wikipedia embeddings. Although the improvement from the projection word2vec model in the real ICD-10-CM coding task was not substantial, the models could effectively handle emerging diseases. The proposed hybrid sampling method enables the model to behave like a human expert. ", doi="10.2196/14499", url="http://medinform.jmir.org/2019/3/e14499/" } @Article{info:doi/10.2196/13331, author="Han, Jiang and Chen, Ken and Fang, Lei and Zhang, Shaodian and Wang, Fei and Ma, Handong and Zhao, Liebin and Liu, Shijian", title="Improving the Efficacy of the Data Entry Process for Clinical Research With a Natural Language Processing--Driven Medical Information Extraction System: Quantitative Field Research", journal="JMIR Med Inform", year="2019", month="Jul", day="16", volume="7", number="3", pages="e13331", keywords="electronic data capture", keywords="electric medical records", keywords="case report form", keywords="natural language processing", keywords="field research", abstract="Background: The growing interest in observational trials using patient data from electronic medical records poses challenges to both efficiency and quality of clinical data collection and management. Even with the help of electronic data capture systems and electronic case report forms (eCRFs), the manual data entry process followed by chart review is still time consuming. Objective: To facilitate the data entry process, we developed a natural language processing--driven medical information extraction system (NLP-MIES) based on the i2b2 reference standard. We aimed to evaluate whether the NLP-MIES--based eCRF application could improve the accuracy and efficiency of the data entry process. Methods: We conducted a randomized and controlled field experiment, and 24 eligible participants were recruited (12 for the manual group and 12 for NLP-MIES--supported group). We simulated the real-world eCRF completion process using our system and compared the performance of data entry on two research topics, pediatric congenital heart disease and pneumonia. Results: For the congenital heart disease condition, the NLP-MIES--supported group increased accuracy by 15\% (95\% CI 4\%-120\%, P=.03) and reduced elapsed time by 33\% (95\% CI 22\%-42\%, P<.001) compared with the manual group. For the pneumonia condition, the NLP-MIES--supported group increased accuracy by 18\% (95\% CI 6\%-32\%, P=.008) and reduced elapsed time by 31\% (95\% CI 19\%-41\%, P<.001). Conclusions: Our system could improve both the accuracy and efficiency of the data entry process. ", doi="10.2196/13331", url="http://medinform.jmir.org/2019/3/e13331/", url="http://www.ncbi.nlm.nih.gov/pubmed/31313661" } @Article{info:doi/10.2196/12876, author="Foufi, Vasiliki and Timakum, Tatsawan and Gaudet-Blavignac, Christophe and Lovis, Christian and Song, Min", title="Mining of Textual Health Information from Reddit: Analysis of Chronic Diseases With Extracted Entities and Their Relations", journal="J Med Internet Res", year="2019", month="Jun", day="13", volume="21", number="6", pages="e12876", keywords="social media", keywords="chronic disease", keywords="data mining", abstract="Background: Social media platforms constitute a rich data source for natural language processing tasks such as named entity recognition, relation extraction, and sentiment analysis. In particular, social media platforms about health provide a different insight into patient's experiences with diseases and treatment than those found in the scientific literature. Objective: This paper aimed to report a study of entities related to chronic diseases and their relation in user-generated text posts. The major focus of our research is the study of biomedical entities found in health social media platforms and their relations and the way people suffering from chronic diseases express themselves. Methods: We collected a corpus of 17,624 text posts from disease-specific subreddits of the social news and discussion website Reddit. For entity and relation extraction from this corpus, we employed the PKDE4J tool developed by Song et al (2015). PKDE4J is a text mining system that integrates dictionary-based entity extraction and rule-based relation extraction in a highly flexible and extensible framework. Results: Using PKDE4J, we extracted 2 types of entities and relations: biomedical entities and relations and subject-predicate-object entity relations. In total, 82,138 entities and 30,341 relation pairs were extracted from the Reddit dataset. The most highly mentioned entities were those related to oncological disease (2884 occurrences of cancer) and asthma (2180 occurrences). The relation pair anatomy-disease was the most frequent (5550 occurrences), the highest frequent entities in this pair being cancer and lymph. The manual validation of the extracted entities showed a very good performance of the system at the entity extraction task (3682/5151, 71.48\% extracted entities were correctly labeled). Conclusions: This study showed that people are eager to share their personal experience with chronic diseases on social media platforms despite possible privacy and security issues. The results reported in this paper are promising and demonstrate the need for more in-depth studies on the way patients with chronic diseases express themselves on social media platforms. ", doi="10.2196/12876", url="http://www.jmir.org/2019/6/e12876/", url="http://www.ncbi.nlm.nih.gov/pubmed/31199327" } @Article{info:doi/10.2196/11036, author="Mamidi, Ravali and Miller, Michele and Banerjee, Tanvi and Romine, William and Sheth, Amit", title="Identifying Key Topics Bearing Negative Sentiment on Twitter: Insights Concerning the 2015-2016 Zika Epidemic", journal="JMIR Public Health Surveill", year="2019", month="Jun", day="04", volume="5", number="2", pages="e11036", keywords="social media", keywords="machine learning", keywords="natural language processing", keywords="epidemiology", keywords="Zika", keywords="infodemiology", keywords="infoveillance", keywords="twitter", keywords="sentiment analysis", abstract="Background: To understand the public sentiment regarding the Zika virus, social media can be leveraged to understand how positive, negative, and neutral sentiments are expressed in society. Specifically, understanding the characteristics of negative sentiment could help inform federal disease control agencies' efforts to disseminate relevant information to the public about Zika-related issues. Objective: The purpose of this study was to analyze the public sentiment concerning Zika using posts on Twitter and determine the qualitative characteristics of positive, negative, and neutral sentiments expressed. Methods: Machine learning techniques and algorithms were used to analyze the sentiment of tweets concerning Zika. A supervised machine learning classifier was built to classify tweets into 3 sentiment categories: positive, neutral, and negative. Tweets in each category were then examined using a topic-modeling approach to determine the main topics for each category, with focus on the negative category. Results: A total of 5303 tweets were manually annotated and used to train multiple classifiers. These performed moderately well (F1 score=0.48-0.68) with text-based feature extraction. All 48,734 tweets were then categorized into the sentiment categories. Overall, 10 topics for each sentiment category were identified using topic modeling, with a focus on the negative sentiment category. Conclusions: Our study demonstrates how sentiment expressed within discussions of epidemics on Twitter can be discovered. This allows public health officials to understand public sentiment regarding an epidemic and enables them to address specific elements of negative sentiment in real time. Our negative sentiment classifier was able to identify tweets concerning Zika with 3 broad themes: neural defects,Zika abnormalities, and reports and findings. These broad themes were based on domain expertise and from topics discussed in journals such as Morbidity and Mortality Weekly Report and Vaccine. As the majority of topics in the negative sentiment category concerned symptoms, officials should focus on spreading information about prevention and treatment research. ", doi="10.2196/11036", url="http://publichealth.jmir.org/2019/2/e11036/", url="http://www.ncbi.nlm.nih.gov/pubmed/31165711" } @Article{info:doi/10.2196/11264, author="Nikfarjam, Azadeh and Ransohoff, D. Julia and Callahan, Alison and Jones, Erik and Loew, Brian and Kwong, Y. Bernice and Sarin, Y. Kavita and Shah, H. Nigam", title="Early Detection of Adverse Drug Reactions in Social Health Networks: A Natural Language Processing Pipeline for Signal Detection", journal="JMIR Public Health Surveill", year="2019", month="Jun", day="03", volume="5", number="2", pages="e11264", keywords="natural language processing", keywords="signal detection", keywords="adverse drug reactions", keywords="social media", keywords="drug-related side effects", keywords="medical oncology", keywords="antineoplastic agents", keywords="machine learning", abstract="Background: Adverse drug reactions (ADRs) occur in nearly all patients on chemotherapy, causing morbidity and therapy disruptions. Detection of such ADRs is limited in clinical trials, which are underpowered to detect rare events. Early recognition of ADRs in the postmarketing phase could substantially reduce morbidity and decrease societal costs. Internet community health forums provide a mechanism for individuals to discuss real-time health concerns and can enable computational detection of ADRs. Objective: The goal of this study is to identify cutaneous ADR signals in social health networks and compare the frequency and timing of these ADRs to clinical reports in the literature. Methods: We present a natural language processing-based, ADR signal-generation pipeline based on patient posts on Internet social health networks. We identified user posts from the Inspire health forums related to two chemotherapy classes: erlotinib, an epidermal growth factor receptor inhibitor, and nivolumab and pembrolizumab, immune checkpoint inhibitors. We extracted mentions of ADRs from unstructured content of patient posts. We then performed population-level association analyses and time-to-detection analyses. Results: Our system detected cutaneous ADRs from patient reports with high precision (0.90) and at frequencies comparable to those documented in the literature but an average of 7 months ahead of their literature reporting. Known ADRs were associated with higher proportional reporting ratios compared to negative controls, demonstrating the robustness of our analyses. Our named entity recognition system achieved a 0.738 microaveraged F-measure in detecting ADR entities, not limited to cutaneous ADRs, in health forum posts. Additionally, we discovered the novel ADR of hypohidrosis reported by 23 patients in erlotinib-related posts; this ADR was absent from 15 years of literature on this medication and we recently reported the finding in a clinical oncology journal. Conclusions: Several hundred million patients report health concerns in social health networks, yet this information is markedly underutilized for pharmacosurveillance. We demonstrated the ability of a natural language processing-based signal-generation pipeline to accurately detect patient reports of ADRs months in advance of literature reporting and the robustness of statistical analyses to validate system detections. Our findings suggest the important contributions that social health network data can play in contributing to more comprehensive and timely pharmacovigilance. ", doi="10.2196/11264", url="http://publichealth.jmir.org/2019/2/e11264/", url="http://www.ncbi.nlm.nih.gov/pubmed/31162134" } @Article{info:doi/10.2196/12239, author="Sheikhalishahi, Seyedmostafa and Miotto, Riccardo and Dudley, T. Joel and Lavelli, Alberto and Rinaldi, Fabio and Osmani, Venet", title="Natural Language Processing of Clinical Notes on Chronic Diseases: Systematic Review", journal="JMIR Med Inform", year="2019", month="Apr", day="27", volume="7", number="2", pages="e12239", keywords="electronic health records", keywords="clinical notes", keywords="chronic diseases", keywords="natural language processing", keywords="machine learning", keywords="deep learning", keywords="heart disease", keywords="stroke", keywords="cancer", keywords="diabetes", keywords="lung disease", abstract="Background: Novel approaches that complement and go beyond evidence-based medicine are required in the domain of chronic diseases, given the growing incidence of such conditions on the worldwide population. A promising avenue is the secondary use of electronic health records (EHRs), where patient data are analyzed to conduct clinical and translational research. Methods based on machine learning to process EHRs are resulting in improved understanding of patient clinical trajectories and chronic disease risk prediction, creating a unique opportunity to derive previously unknown clinical insights. However, a wealth of clinical histories remains locked behind clinical narratives in free-form text. Consequently, unlocking the full potential of EHR data is contingent on the development of natural language processing (NLP) methods to automatically transform clinical text into structured clinical data that can guide clinical decisions and potentially delay or prevent disease onset. Objective: The goal of the research was to provide a comprehensive overview of the development and uptake of NLP methods applied to free-text clinical notes related to chronic diseases, including the investigation of challenges faced by NLP methodologies in understanding clinical narratives. Methods: Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) guidelines were followed and searches were conducted in 5 databases using ``clinical notes,'' ``natural language processing,'' and ``chronic disease'' and their variations as keywords to maximize coverage of the articles. Results: Of the 2652 articles considered, 106 met the inclusion criteria. Review of the included papers resulted in identification of 43 chronic diseases, which were then further classified into 10 disease categories using the International Classification of Diseases, 10th Revision. The majority of studies focused on diseases of the circulatory system (n=38) while endocrine and metabolic diseases were fewest (n=14). This was due to the structure of clinical records related to metabolic diseases, which typically contain much more structured data, compared with medical records for diseases of the circulatory system, which focus more on unstructured data and consequently have seen a stronger focus of NLP. The review has shown that there is a significant increase in the use of machine learning methods compared to rule-based approaches; however, deep learning methods remain emergent (n=3). Consequently, the majority of works focus on classification of disease phenotype with only a handful of papers addressing extraction of comorbidities from the free text or integration of clinical notes with structured data. There is a notable use of relatively simple methods, such as shallow classifiers (or combination with rule-based methods), due to the interpretability of predictions, which still represents a significant issue for more complex methods. Finally, scarcity of publicly available data may also have contributed to insufficient development of more advanced methods, such as extraction of word embeddings from clinical notes. Conclusions: Efforts are still required to improve (1) progression of clinical NLP methods from extraction toward understanding; (2) recognition of relations among entities rather than entities in isolation; (3) temporal extraction to understand past, current, and future clinical events; (4) exploitation of alternative sources of clinical knowledge; and (5) availability of large-scale, de-identified clinical corpora. ", doi="10.2196/12239", url="http://medinform.jmir.org/2019/2/e12239/", url="http://www.ncbi.nlm.nih.gov/pubmed/31066697" } @Article{info:doi/10.2196/12109, author="Fu, Sunyang and Leung, Y. Lester and Wang, Yanshan and Raulli, Anne-Olivia and Kallmes, F. David and Kinsman, A. Kristin and Nelson, B. Kristoff and Clark, S. Michael and Luetmer, H. Patrick and Kingsbury, R. Paul and Kent, M. David and Liu, Hongfang", title="Natural Language Processing for the Identification of Silent Brain Infarcts From Neuroimaging Reports", journal="JMIR Med Inform", year="2019", month="Apr", day="21", volume="7", number="2", pages="e12109", keywords="natural language processing", keywords="neuroimaging", keywords="electronic health records", abstract="Background: Silent brain infarction (SBI) is defined as the presence of 1 or more brain lesions, presumed to be because of vascular occlusion, found by neuroimaging (magnetic resonance imaging or computed tomography) in patients without clinical manifestations of stroke. It is more common than stroke and can be detected in 20\% of healthy elderly people. Early detection of SBI may mitigate the risk of stroke by offering preventative treatment plans. Natural language processing (NLP) techniques offer an opportunity to systematically identify SBI cases from electronic health records (EHRs) by extracting, normalizing, and classifying SBI-related incidental findings interpreted by radiologists from neuroimaging reports. Objective: This study aimed to develop NLP systems to determine individuals with incidentally discovered SBIs from neuroimaging reports at 2 sites: Mayo Clinic and Tufts Medical Center. Methods: Both rule-based and machine learning approaches were adopted in developing the NLP system. The rule-based system was implemented using the open source NLP pipeline MedTagger, developed by Mayo Clinic. Features for rule-based systems, including significant words and patterns related to SBI, were generated using pointwise mutual information. The machine learning models adopted convolutional neural network (CNN), random forest, support vector machine, and logistic regression. The performance of the NLP algorithm was compared with a manually created gold standard. The gold standard dataset includes 1000 radiology reportsrandomly retrieved from the 2 study sites (Mayo and Tufts) corresponding to patients with no prior or current diagnosis of stroke or dementia. 400 out of the 1000 reports were randomly sampled and double read to determine interannotator agreements. The gold standard dataset was equally split to 3 subsets for training, developing, and testing. Results: Among the 400 reports selected to determine interannotator agreement, 5 reports were removed due to invalid scan types. The interannotator agreements across Mayo and Tufts neuroimaging reports were 0.87 and 0.91, respectively. The rule-based system yielded the best performance of predicting SBI with an accuracy, sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV) of 0.991, 0.925, 1.000, 1.000, and 0.990, respectively. The CNN achieved the best score on predicting white matter disease (WMD) with an accuracy, sensitivity, specificity, PPV, and NPV of 0.994, 0.994, 0.994, 0.994, and 0.994, respectively. Conclusions: We adopted a standardized data abstraction and modeling process to developed NLP techniques (rule-based and machine learning) to detect incidental SBIs and WMDs from annotated neuroimaging reports. Validation statistics suggested a high feasibility of detecting SBIs and WMDs from EHRs using NLP. ", doi="10.2196/12109", url="http://medinform.jmir.org/2019/2/e12109/", url="http://www.ncbi.nlm.nih.gov/pubmed/31066686" } @Article{info:doi/10.2196/11410, author="Milne, N. David and McCabe, L. Kathryn and Calvo, A. Rafael", title="Improving Moderator Responsiveness in Online Peer Support Through Automated Triage", journal="J Med Internet Res", year="2019", month="Apr", day="26", volume="21", number="4", pages="e11410", keywords="social support", keywords="triage", keywords="classification", keywords="natural language processing", abstract="Background: Online peer support forums require oversight to ensure they remain safe and therapeutic. As online communities grow, they place a greater burden on their human moderators, which increases the likelihood that people at risk may be overlooked. This study evaluated the potential for machine learning to assist online peer support by directing moderators' attention where it is most needed. Objective: This study aimed to evaluate the accuracy of an automated triage system and the extent to which it influences moderator behavior. Methods: A machine learning classifier was trained to prioritize forum messages as green, amber, red, or crisis depending on how urgently they require attention from a moderator. This was then launched as a set of widgets injected into a popular online peer support forum hosted by ReachOut.com, an Australian Web-based youth mental health service that aims to intervene early in the onset of mental health problems in young people. The accuracy of the system was evaluated using a holdout test set of manually prioritized messages. The impact on moderator behavior was measured as response ratio and response latency, that is, the proportion of messages that receive at least one reply from a moderator and how long it took for these replies to be made. These measures were compared across 3 periods: before launch, after an informal launch, and after a formal launch accompanied by training. Results: The algorithm achieved 84\% f-measure in identifying content that required a moderator response. Between prelaunch and post-training periods, response ratios increased by 0.9, 4.4, and 10.5 percentage points for messages labelled as crisis, red, and green, respectively, but decreased by 5.0 percentage points for amber messages. Logistic regression indicated that the triage system was a significant contributor to response ratios for green, amber, and red messages, but not for crisis messages. Response latency was significantly reduced (P<.001), between the same periods, by factors of 80\%, 80\%, 77\%, and 12\% for crisis, red, amber, and green messages, respectively. Regression analysis indicated that the triage system made a significant and unique contribution to reducing the time taken to respond to green, amber, and red messages, but not to crisis messages, after accounting for moderator and community activity. Conclusions: The triage system was generally accurate, and moderators were largely in agreement with how messages were prioritized. It had a modest effect on response ratios, primarily because moderators were already more likely to respond to high priority content before the introduction of triage. However, it significantly and substantially reduced the time taken for moderators to respond to prioritized content. Further evaluations are needed to assess the impact of mistakes made by the triage algorithm and how changes to moderator responsiveness impact the well-being of forum members. ", doi="10.2196/11410", url="https://www.jmir.org/2019/4/e11410/", url="http://www.ncbi.nlm.nih.gov/pubmed/31025945" } @Article{info:doi/10.2196/11499, author="Zhou, Liyuan and Suominen, Hanna and Gedeon, Tom", title="Adapting State-of-the-Art Deep Language Models to Clinical Information Extraction Systems: Potentials, Challenges, and Solutions", journal="JMIR Med Inform", year="2019", month="Apr", day="25", volume="7", number="2", pages="e11499", keywords="computer systems", keywords="artificial intelligence", keywords="deep learning", keywords="information storage and retrieval", keywords="medical informatics", keywords="nursing records", keywords="patient handoff", abstract="Background: Deep learning (DL) has been widely used to solve problems with success in speech recognition, visual object recognition, and object detection for drug discovery and genomics. Natural language processing has achieved noticeable progress in artificial intelligence. This gives an opportunity to improve on the accuracy and human-computer interaction of clinical informatics. However, due to difference of vocabularies and context between a clinical environment and generic English, transplanting language models directly from up-to-date methods to real-world health care settings is not always satisfactory. Moreover, the legal restriction on using privacy-sensitive patient records hinders the progress in applying machine learning (ML) to clinical language processing. Objective: The aim of this study was to investigate 2 ways to adapt state-of-the-art language models to extracting patient information from free-form clinical narratives to populate a handover form at a nursing shift change automatically for proofing and revising by hand: first, by using domain-specific word representations and second, by using transfer learning models to adapt knowledge from general to clinical English. We have described the practical problem, composed it as an ML task known as information extraction, proposed methods for solving the task, and evaluated their performance. Methods: First, word representations trained from different domains served as the input of a DL system for information extraction. Second, the transfer learning model was applied as a way to adapt the knowledge learned from general text sources to the task domain. The goal was to gain improvements in the extraction performance, especially for the classes that were topically related but did not have a sufficient amount of model solutions available for ML directly from the target domain. A total of 3 independent datasets were generated for this task, and they were used as the training (101 patient reports), validation (100 patient reports), and test (100 patient reports) sets in our experiments. Results: Our system is now the state-of-the-art in this task. Domain-specific word representations improved the macroaveraged F1 by 3.4\%. Transferring the knowledge from general English corpora to the task-specific domain contributed a further 7.1\% improvement. The best performance in populating the handover form with 37 headings was the macroaveraged F1 of 41.6\% and F1 of 81.1\% for filtering out irrelevant information. Performance differences between this system and its baseline were statistically significant (P<.001; Wilcoxon test). Conclusions: To our knowledge, our study is the first attempt to transfer models from general deep models to specific tasks in health care and gain a significant improvement. As transfer learning shows its advantage over other methods, especially on classes with a limited amount of training data, less experts' time is needed to annotate data for ML, which may enable good results even in resource-poor domains. ", doi="10.2196/11499", url="http://medinform.jmir.org/2019/2/e11499/", url="http://www.ncbi.nlm.nih.gov/pubmed/31021325" } @Article{info:doi/10.2196/11756, author="Chen, T. Annie and Swaminathan, Aarti and Kearns, R. William and Alberts, M. Nicole and Law, F. Emily and Palermo, M. Tonya", title="Understanding User Experience: Exploring Participants' Messages With a Web-Based Behavioral Health Intervention for Adolescents With Chronic Pain", journal="J Med Internet Res", year="2019", month="Apr", day="15", volume="21", number="4", pages="e11756", keywords="data visualization", keywords="natural language processing", keywords="chronic pain", keywords="cluster analysis", keywords="technology", abstract="Background: Delivery of behavioral health interventions on the internet offers many benefits, including accessibility, cost-effectiveness, convenience, and anonymity. In recent years, an increased number of internet interventions have been developed, targeting a range of conditions and behaviors, including depression, pain, anxiety, sleep disturbance, and eating disorders. Human support (coaching) is a common component of internet interventions that is intended to boost engagement; however, little is known about how participants interact with coaches and how this may relate to their experience with the intervention. By examining the data that participants produce during an intervention, we can characterize their interaction patterns and refine treatments to address different needs. Objective: In this study, we employed text mining and visual analytics techniques to analyze messages exchanged between coaches and participants in an internet-delivered pain management intervention for adolescents with chronic pain and their parents. Methods: We explored the main themes in coaches' and participants' messages using an automated textual analysis method, topic modeling. We then clustered participants' messages to identify subgroups of participants with similar engagement patterns. Results: First, we performed topic modeling on coaches' messages. The themes in coaches' messages fell into 3 categories: Treatment Content, Administrative and Technical, and Rapport Building. Next, we employed topic modeling to identify topics from participants' message histories. Similar to the coaches' topics, these were subsumed under 3 high-level categories: Health Management and Treatment Content, Questions and Concerns, and Activities and Interests. Finally, the cluster analysis identified 4 clusters, each with a distinguishing characteristic: Assignment-Focused, Short Message Histories, Pain-Focused, and Activity-Focused. The name of each cluster exemplifies the main engagement patterns of that cluster. Conclusions: In this secondary data analysis, we demonstrated how automated text analysis techniques could be used to identify messages of interest, such as questions and concerns from users. In addition, we demonstrated how cluster analysis could be used to identify subgroups of individuals who share communication and engagement patterns, and in turn facilitate personalization of interventions for different subgroups of patients. This work makes 2 key methodological contributions. First, this study is innovative in its use of topic modeling to provide a rich characterization of the textual content produced by coaches and participants in an internet-delivered behavioral health intervention. Second, to our knowledge, this is the first example of the use of a visual analysis method to cluster participants and identify similar patterns of behavior based on intervention message content. ", doi="10.2196/11756", url="http://www.jmir.org/2019/4/e11756/", url="http://www.ncbi.nlm.nih.gov/pubmed/30985288" } @Article{info:doi/10.2196/11397, author="DeJonckheere, Melissa and Nichols, P. Lauren and Vydiswaran, Vinod V. G. and Zhao, Xinyan and Collins-Thompson, Kevyn and Resnicow, Kenneth and Chang, Tammy", title="Using Text Messaging, Social Media, and Interviews to Understand What Pregnant Youth Think About Weight Gain During Pregnancy", journal="JMIR Form Res", year="2019", month="Apr", day="01", volume="3", number="2", pages="e11397", keywords="methods", keywords="adolescents", keywords="weight gain", keywords="pregnancy", keywords="text messaging", keywords="social media", keywords="natural language processing", abstract="Background: The majority of pregnant youth gain more weight than recommended by the National Academy of Medicine guidelines. Excess weight gain during pregnancy increases the risk of dangerous complications during delivery, including operative delivery and stillbirth, and contributes to the risk of long-term obesity in both mother and child. Little is known regarding youth's perceptions of and knowledge about weight gain during pregnancy. Objective: The aim of this study was to describe the feasibility and acceptability of 3 novel data collection and analysis strategies for use with youth (social media posts, text message surveys, and semistructured interviews) to explore their experiences during pregnancy. The mixed-methods analysis included natural language processing and thematic analysis. Methods: To demonstrate the feasibility and acceptability of this novel approach, we used descriptive statistics and thematic qualitative analysis to characterize participation and engagement in the study. Results: Recruitment of 54 pregnant women aged between 16 and 24 years occurred from April 2016 to September 2016. All participants completed at least 1 phase of the study. Semistructured interviews had the highest rate of completion, yet all 3 strategies were feasible and acceptable to pregnant youth. Conclusions: This study has described a novel youth-centered strategy of triangulating 3 sources of mixed-methods data to gain a deeper understanding of a health behavior phenomenon among an at-risk population of youth. ", doi="10.2196/11397", url="https://formative.jmir.org/2019/2/e11397/", url="http://www.ncbi.nlm.nih.gov/pubmed/30932869" } @Article{info:doi/10.2196/13039, author="Chen, Tao and Dredze, Mark and Weiner, P. Jonathan and Hernandez, Leilani and Kimura, Joe and Kharrazi, Hadi", title="Extraction of Geriatric Syndromes From Electronic Health Record Clinical Notes: Assessment of Statistical Natural Language Processing Methods", journal="JMIR Med Inform", year="2019", month="Mar", day="26", volume="7", number="1", pages="e13039", keywords="geriatrics", keywords="clinical notes", keywords="natural language processing", keywords="information extraction", keywords="conditional random fields", abstract="Background: Geriatric syndromes in older adults are associated with adverse outcomes. However, despite being reported in clinical notes, these syndromes are often poorly captured by diagnostic codes in the structured fields of electronic health records (EHRs) or administrative records. Objective: We aim to automatically determine if a patient has any geriatric syndromes by mining the free text of associated EHR clinical notes. We assessed which statistical natural language processing (NLP) techniques are most effective. Methods: We applied conditional random fields (CRFs), a widely used machine learning algorithm, to identify each of 10 geriatric syndrome constructs in a clinical note. We assessed three sets of features and attributes for CRF operations: a base set, enhanced token, and contextual features. We trained the CRF on 3901 manually annotated notes from 85 patients, tuned the CRF on a validation set of 50 patients, and evaluated it on 50 held-out test patients. These notes were from a group of US Medicare patients over 65 years of age enrolled in a Medicare Advantage Health Maintenance Organization and cared for by a large group practice in Massachusetts. Results: A final feature set was formed through comprehensive feature ablation experiments. The final CRF model performed well at patient-level determination (macroaverage F1=0.834, microaverage F1=0.851); however, performance varied by construct. For example, at phrase-partial evaluation, the CRF model worked well on constructs such as absence of fecal control (F1=0.857) and vision impairment (F1=0.798) but poorly on malnutrition (F1=0.155), weight loss (F1=0.394), and severe urinary control issues (F1=0.532). Errors were primarily due to previously unobserved words (ie, out-of-vocabulary) and a lack of context. Conclusions: This study shows that statistical NLP can be used to identify geriatric syndromes from EHR-extracted clinical notes. This creates new opportunities to identify patients with geriatric syndromes and study their health outcomes. ", doi="10.2196/13039", url="http://medinform.jmir.org/2019/1/e13039/", url="http://www.ncbi.nlm.nih.gov/pubmed/30862607" } @Article{info:doi/10.2196/13067, author="Karystianis, George and Adily, Armita and Schofield, W. Peter and Greenberg, David and Jorm, Louisa and Nenadic, Goran and Butler, Tony", title="Automated Analysis of Domestic Violence Police Reports to Explore Abuse Types and Victim Injuries: Text Mining Study", journal="J Med Internet Res", year="2019", month="Mar", day="12", volume="21", number="3", pages="e13067", keywords="domestic violence", keywords="injuries", keywords="abuse types", keywords="text mining", keywords="rule-based approach", keywords="police narratives", abstract="Background: The police attend numerous domestic violence events each year, recording details of these events as both structured (coded) data and unstructured free-text narratives. Abuse types (including physical, psychological, emotional, and financial) conducted by persons of interest (POIs) along with any injuries sustained by victims are typically recorded in long descriptive narratives. Objective: We aimed to determine if an automated text mining method could identify abuse types and any injuries sustained by domestic violence victims in narratives contained in a large police dataset from the New South Wales Police Force. Methods: We used a training set of 200 recorded domestic violence events to design a knowledge-driven approach based on syntactical patterns in the text and then applied this approach to a large set of police reports. Results: Testing our approach on an evaluation set of 100 domestic violence events provided precision values of 90.2\% and 85.0\% for abuse type and victim injuries, respectively. In a set of 492,393 domestic violence reports, we found 71.32\% (351,178) of events with mentions of the abuse type(s) and more than one-third (177,117 events; 35.97\%) contained victim injuries. ``Emotional/verbal abuse'' (33.46\%; 117,488) was the most common abuse type, followed by ``punching'' (86,322 events; 24.58\%) and ``property damage'' (22.27\%; 78,203 events). ``Bruising'' was the most common form of injury sustained (51,455 events; 29.03\%), with ``cut/abrasion'' (28.93\%; 51,284 events) and ``red marks/signs'' (23.71\%; 42,038 events) ranking second and third, respectively. Conclusions: The results suggest that text mining can automatically extract information from police-recorded domestic violence events that can support further public health research into domestic violence, such as examining the relationship of abuse types with victim injuries and of gender and abuse types with risk escalation for victims of domestic violence. Potential also exists for this extracted information to be linked to information on the mental health status. ", doi="10.2196/13067", url="http://www.jmir.org/2019/3/e13067/", url="http://www.ncbi.nlm.nih.gov/pubmed/30860490" } @Article{info:doi/10.2196/11990, author="Chen, Jinying and Lalor, John and Liu, Weisong and Druhl, Emily and Granillo, Edgard and Vimalananda, G. Varsha and Yu, Hong", title="Detecting Hypoglycemia Incidents Reported in Patients' Secure Messages: Using Cost-Sensitive Learning and Oversampling to Reduce Data Imbalance", journal="J Med Internet Res", year="2019", month="Mar", day="11", volume="21", number="3", pages="e11990", keywords="secure messaging", keywords="natural language processing", keywords="hypoglycemia", keywords="supervised machine learning", keywords="imbalanced data", keywords="adverse event detection", keywords="drug-related side effects and adverse reactions", abstract="Background: Improper dosing of medications such as insulin can cause hypoglycemic episodes, which may lead to severe morbidity or even death. Although secure messaging was designed for exchanging nonurgent messages, patients sometimes report hypoglycemia events through secure messaging. Detecting these patient-reported adverse events may help alert clinical teams and enable early corrective actions to improve patient safety. Objective: We aimed to develop a natural language processing system, called HypoDetect (Hypoglycemia Detector), to automatically identify hypoglycemia incidents reported in patients' secure messages. Methods: An expert in public health annotated 3000 secure message threads between patients with diabetes and US Department of Veterans Affairs clinical teams as containing patient-reported hypoglycemia incidents or not. A physician independently annotated 100 threads randomly selected from this dataset to determine interannotator agreement. We used this dataset to develop and evaluate HypoDetect. HypoDetect incorporates 3 machine learning algorithms widely used for text classification: linear support vector machines, random forest, and logistic regression. We explored different learning features, including new knowledge-driven features. Because only 114 (3.80\%) messages were annotated as positive, we investigated cost-sensitive learning and oversampling methods to mitigate the challenge of imbalanced data. Results: The interannotator agreement was Cohen kappa=.976. Using cross-validation, logistic regression with cost-sensitive learning achieved the best performance (area under the receiver operating characteristic curve=0.954, sensitivity=0.693, specificity 0.974, F1 score=0.590). Cost-sensitive learning and the ensembled synthetic minority oversampling technique improved the sensitivity of the baseline systems substantially (by 0.123 to 0.728 absolute gains). Our results show that a variety of features contributed to the best performance of HypoDetect. Conclusions: Despite the challenge of data imbalance, HypoDetect achieved promising results for the task of detecting hypoglycemia incidents from secure messages. The system has a great potential to facilitate early detection and treatment of hypoglycemia. ", doi="10.2196/11990", url="http://www.jmir.org/2019/3/e11990/", url="http://www.ncbi.nlm.nih.gov/pubmed/30855231" } @Article{info:doi/10.2196/12783, author="Wakamiya, Shoko and Morita, Mizuki and Kano, Yoshinobu and Ohkuma, Tomoko and Aramaki, Eiji", title="Tweet Classification Toward Twitter-Based Disease Surveillance: New Data, Methods, and Evaluations", journal="J Med Internet Res", year="2019", month="Feb", day="20", volume="21", number="2", pages="e12783", keywords="text mining", keywords="social media", keywords="machine learning", keywords="natural language processing", keywords="artificial intelligence", keywords="surveillance", keywords="infodemiology", keywords="infoveillance", abstract="Background: The amount of medical and clinical-related information on the Web is increasing. Among the different types of information available, social media--based data obtained directly from people are particularly valuable and are attracting significant attention. To encourage medical natural language processing (NLP) research exploiting social media data, the 13th NII Testbeds and Community for Information access Research (NTCIR-13) Medical natural language processing for Web document (MedWeb) provides pseudo-Twitter messages in a cross-language and multi-label corpus, covering 3 languages (Japanese, English, and Chinese) and annotated with 8 symptom labels (such as cold, fever, and flu). Then, participants classify each tweet into 1 of the 2 categories: those containing a patient's symptom and those that do not. Objective: This study aimed to present the results of groups participating in a Japanese subtask, English subtask, and Chinese subtask along with discussions, to clarify the issues that need to be resolved in the field of medical NLP. Methods: In summary, 8 groups (19 systems) participated in the Japanese subtask, 4 groups (12 systems) participated in the English subtask, and 2 groups (6 systems) participated in the Chinese subtask. In total, 2 baseline systems were constructed for each subtask. The performance of the participant and baseline systems was assessed using the exact match accuracy, F-measure based on precision and recall, and Hamming loss. Results: The best system achieved exactly 0.880 match accuracy, 0.920 F-measure, and 0.019 Hamming loss. The averages of match accuracy, F-measure, and Hamming loss for the Japanese subtask were 0.720, 0.820, and 0.051; those for the English subtask were 0.770, 0.850, and 0.037; and those for the Chinese subtask were 0.810, 0.880, and 0.032, respectively. Conclusions: This paper presented and discussed the performance of systems participating in the NTCIR-13 MedWeb task. As the MedWeb task settings can be formalized as the factualization of text, the achievement of this task could be directly applied to practical clinical applications. ", doi="10.2196/12783", url="http://www.jmir.org/2019/2/e12783/", url="http://www.ncbi.nlm.nih.gov/pubmed/30785407" } @Article{info:doi/10.2196/10788, author="Li, Rumeng and Hu, Baotian and Liu, Feifan and Liu, Weisong and Cunningham, Francesca and McManus, D. David and Yu, Hong", title="Detection of Bleeding Events in Electronic Health Record Notes Using Convolutional Neural Network Models Enhanced With Recurrent Neural Network Autoencoders: Deep Learning Approach", journal="JMIR Med Inform", year="2019", month="Feb", day="08", volume="7", number="1", pages="e10788", keywords="autoencoder", keywords="BiLSTM", keywords="bleeding", keywords="convolutional neural networks", keywords="electronic health record", abstract="Background: Bleeding events are common and critical and may cause significant morbidity and mortality. High incidences of bleeding events are associated with cardiovascular disease in patients on anticoagulant therapy. Prompt and accurate detection of bleeding events is essential to prevent serious consequences. As bleeding events are often described in clinical notes, automatic detection of bleeding events from electronic health record (EHR) notes may improve drug-safety surveillance and pharmacovigilance. Objective: We aimed to develop a natural language processing (NLP) system to automatically classify whether an EHR note sentence contains a bleeding event. Methods: We expert annotated 878 EHR notes (76,577 sentences and 562,630 word-tokens) to identify bleeding events at the sentence level. This annotated corpus was used to train and validate our NLP systems. We developed an innovative hybrid convolutional neural network (CNN) and long short-term memory (LSTM) autoencoder (HCLA) model that integrates a CNN architecture with a bidirectional LSTM (BiLSTM) autoencoder model to leverage large unlabeled EHR data. Results: HCLA achieved the best area under the receiver operating characteristic curve (0.957) and F1 score (0.938) to identify whether a sentence contains a bleeding event, thereby surpassing the strong baseline support vector machines and other CNN and autoencoder models. Conclusions: By incorporating a supervised CNN model and a pretrained unsupervised BiLSTM autoencoder, the HCLA achieved high performance in detecting bleeding events. ", doi="10.2196/10788", url="http://medinform.jmir.org/2019/1/e10788/", url="http://www.ncbi.nlm.nih.gov/pubmed/30735140" } @Article{info:doi/10.2196/12414, author="Liu, Qian and Chen, Qiuyi and Shen, Jiayi and Wu, Huailiang and Sun, Yimeng and Ming, Wai-Kit", title="Data Analysis and Visualization of Newspaper Articles on Thirdhand Smoke: A Topic Modeling Approach", journal="JMIR Med Inform", year="2019", month="Jan", day="29", volume="7", number="1", pages="e12414", keywords="media concerns", keywords="topic modeling", keywords="third-hand smoke", keywords="tobacco", keywords="indoor air quality", abstract="Background: Thirdhand smoke has been a growing topic for years in China. Thirdhand smoke (THS) consists of residual tobacco smoke pollutants that remain on surfaces and in dust. These pollutants are re-emitted as a gas or react with oxidants and other compounds in the environment to yield secondary pollutants. Objective: Collecting media reports on THS from major media outlets and analyzing this subject using topic modeling can facilitate a better understanding of the role that the media plays in communicating this health issue to the public. Methods: The data were retrieved from the Wiser and Factiva news databases. A preliminary investigation focused on articles dated between January 1, 2013, and December 31, 2017. Use of Latent Dirichlet Allocation yielded the top 10 topics about THS. The use of the modified LDAvis tool enabled an overall view of the topic model, which visualizes different topics as circles. Multidimensional scaling was used to represent the intertopic distances on a two-dimensional plane. Results: We found 745 articles dated between January 1, 2013, and December 31, 2017. The United States ranked first in terms of publications (152 articles on THS from 2013-2017). We found 279 news reports about THS from the Chinese media over the same period and 363 news reports from the United States. Given our analysis of the percentage of news related to THS in China, Topic 1 (Cancer) was the most popular among the topics and was mentioned in 31.9\% of all news stories. Topic 2 (Control of quitting smoking) was related to roughly 15\% of news items on THS. Conclusions: Data analysis and the visualization of news articles can generate useful information. Our study shows that topic modeling can offer insights into understanding news reports related to THS. This analysis of media trends indicated that related diseases, air and particulate matter (PM2.5), and control and restrictions are the major concerns of the Chinese media reporting on THS. The Chinese press still needs to consider fuller reports on THS based on scientific evidence and with less focus on sensational headlines. We recommend that additional studies be conducted related to sentiment analysis of news data to verify and measure the influence of THS-related topics. ", doi="10.2196/12414", url="http://medinform.jmir.org/2019/1/e12414/", url="http://www.ncbi.nlm.nih.gov/pubmed/30694199" } @Article{info:doi/10.2196/10013, author="Woo, Hyunki and Kim, Kyunga and Cha, KyeongMin and Lee, Jin-Young and Mun, Hansong and Cho, Jin Soo and Chung, In Ji and Pyo, Hui Jeung and Lee, Kun-Chul and Kang, Mira", title="Application of Efficient Data Cleaning Using Text Clustering for Semistructured Medical Reports to Large-Scale Stool Examination Reports: Methodology Study", journal="J Med Internet Res", year="2019", month="Jan", day="08", volume="21", number="1", pages="e10013", keywords="data cleaning", keywords="text clustering", keywords="key collision", keywords="nearest neighbor methods", keywords="OpenRefine", abstract="Background: Since medical research based on big data has become more common, the community's interest and effort to analyze a large amount of semistructured or unstructured text data, such as examination reports, have rapidly increased. However, these large-scale text data are often not readily applicable to analysis owing to typographical errors, inconsistencies, or data entry problems. Therefore, an efficient data cleaning process is required to ensure the veracity of such data. Objective: In this paper, we proposed an efficient data cleaning process for large-scale medical text data, which employs text clustering methods and value-converting technique, and evaluated its performance with medical examination text data. Methods: The proposed data cleaning process consists of text clustering and value-merging. In the text clustering step, we suggested the use of key collision and nearest neighbor methods in a complementary manner. Words (called values) in the same cluster would be expected as a correct value and its wrong representations. In the value-converting step, wrong values for each identified cluster would be converted into their correct value. We applied these data cleaning process to 574,266 stool examination reports produced for parasite analysis at Samsung Medical Center from 1995 to 2015. The performance of the proposed process was examined and compared with data cleaning processes based on a single clustering method. We used OpenRefine 2.7, an open source application that provides various text clustering methods and an efficient user interface for value-converting with common-value suggestion. Results: A total of 1,167,104 words in stool examination reports were surveyed. In the data cleaning process, we discovered 30 correct words and 45 patterns of typographical errors and duplicates. We observed high correction rates for words with typographical errors (98.61\%) and typographical error patterns (97.78\%). The resulting data accuracy was nearly 100\% based on the number of total words. Conclusions: Our data cleaning process based on the combinatorial use of key collision and nearest neighbor methods provides an efficient cleaning of large-scale text data and hence improves data accuracy. ", doi="10.2196/10013", url="https://www.jmir.org/2019/1/e10013/", url="http://www.ncbi.nlm.nih.gov/pubmed/30622098" } @Article{info:doi/10.2196/11817, author="Ricard, J. Benjamin and Marsch, A. Lisa and Crosier, Benjamin and Hassanpour, Saeed", title="Exploring the Utility of Community-Generated Social Media Content for Detecting Depression: An Analytical Study on Instagram", journal="J Med Internet Res", year="2018", month="Dec", day="06", volume="20", number="12", pages="e11817", keywords="machine learning", keywords="depression", keywords="social media", keywords="mental health", abstract="Background: The content produced by individuals on various social media platforms has been successfully used to identify mental illness, including depression. However, most of the previous work in this area has focused on user-generated content, that is, content created by the individual, such as an individual's posts and pictures. In this study, we explored the predictive capability of community-generated content, that is, the data generated by a community of friends or followers, rather than by a sole individual, to identify depression among social media users. Objective: The objective of this research was to evaluate the utility of community-generated content on social media, such as comments on an individual's posts, to predict depression as defined by the clinically validated Patient Health Questionnaire-8 (PHQ-8) assessment questionnaire. We hypothesized that the results of this research may provide new insights into next generation of population-level mental illness risk assessment and intervention delivery. Methods: We created a Web-based survey on a crowdsourcing platform through which participants granted access to their Instagram profiles as well as provided their responses to PHQ-8 as a reference standard for depression status. After data quality assurance and postprocessing, the study analyzed the data of 749 participants. To build our predictive model, linguistic features were extracted from Instagram post captions and comments, including multiple sentiment scores, emoji sentiment analysis results, and meta-variables such as the number of likes and average comment length. In this study, 10.4\% (78/749) of the data were held out as a test set. The remaining 89.6\% (671/749) of the data were used to train an elastic-net regularized linear regression model to predict PHQ-8 scores. We compared different versions of this model (ie, a model trained on only user-generated data, a model trained on only community-generated data, and a model trained on the combination of both types of data) on a test set to explore the utility of community-generated data in our predictive analysis. Results: The 2 models, the first trained on only community-generated data (area under curve [AUC]=0.71) and the second trained on a combination of user-generated and community-generated data (AUC=0.72), had statistically significant performances for predicting depression based on the Mann-Whitney U test (P=.03 and P=.02, respectively). The model trained on only user-generated data (AUC=0.63; P=.11) did not achieve statistically significant results. The coefficients of the models revealed that our combined data classifier effectively amalgamated both user-generated and community-generated data and that the 2 feature sets were complementary and contained nonoverlapping information in our predictive analysis. Conclusions: The results presented in this study indicate that leveraging community-generated data from social media, in addition to user-generated data, can be informative for predicting depression among social media users. ", doi="10.2196/11817", url="http://www.jmir.org/2018/12/e11817/", url="http://www.ncbi.nlm.nih.gov/pubmed/30522991" } @Article{info:doi/10.2196/medinform.9162, author="Jones, Josette and Pradhan, Meeta and Hosseini, Masoud and Kulanthaivel, Anand and Hosseini, Mahmood", title="Novel Approach to Cluster Patient-Generated Data Into Actionable Topics: Case Study of a Web-Based Breast Cancer Forum", journal="JMIR Med Inform", year="2018", month="Nov", day="29", volume="6", number="4", pages="e45", keywords="data interpretation", keywords="natural language processing", keywords="patient-generated information", keywords="social media", keywords="statistical analysis", keywords="infodemiology", abstract="Background: The increasing use of social media and mHealth apps has generated new opportunities for health care consumers to share information about their health and well-being. Information shared through social media contains not only medical information but also valuable information about how the survivors manage disease and recovery in the context of daily life. Objective: The objective of this study was to determine the feasibility of acquiring and modeling the topics of a major online breast cancer support forum. Breast cancer patient support forums were selected to discover the hidden, less obvious aspects of disease management and recovery. Methods: First, manual topic categorization was performed using qualitative content analysis (QCA) of each individual forum board. Second, we requested permission from the Breastcancer.org Community for a more in-depth analysis of the postings. Topic modeling was then performed using open source software Machine Learning Language Toolkit, followed by multiple linear regression (MLR) analysis to detect highly correlated topics among the different website forums. Results: QCA of the forums resulted in 20 categories of user discussion. The final topic model organized >4 million postings into 30 manageable topics. Using qualitative analysis of the topic models and statistical analysis, we grouped these 30 topics into 4 distinct clusters with similarity scores of ?0.80; these clusters were labeled Symptoms \& Diagnosis, Treatment, Financial, and Family \& Friends. A clinician review confirmed the clinical significance of the topic clusters, allowing for future detection of actionable items within social media postings. To identify the most significant topics across individual forums, MLR demonstrated that 6 topics---based on the Akaike information criterion values ranging from ?642.75 to ?412.32---were statistically significant. Conclusions: The developed method provides an insight into the areas of interest and concern, including those not ascertainable in the clinic. Such topics included support from lay and professional caregivers and late side effects of therapy that consumers discuss in social media and may be of interest to clinicians. The developed methods and results indicate the potential of social media to inform the clinical workflow with regards to the impact of recovery on daily life. ", doi="10.2196/medinform.9162", url="http://medinform.jmir.org/2018/4/e45/", url="http://www.ncbi.nlm.nih.gov/pubmed/30497991" } @Article{info:doi/10.2196/12159, author="Li, Fei and Liu, Weisong and Yu, Hong", title="Extraction of Information Related to Adverse Drug Events from Electronic Health Record Notes: Design of an End-to-End Model Based on Deep Learning", journal="JMIR Med Inform", year="2018", month="Nov", day="26", volume="6", number="4", pages="e12159", keywords="adverse drug event", keywords="deep learning", keywords="multi-task learning", keywords="named entity recognition", keywords="natural language processing", keywords="relation extraction", abstract="Background: Pharmacovigilance and drug-safety surveillance are crucial for monitoring adverse drug events (ADEs), but the main ADE-reporting systems such as Food and Drug Administration Adverse Event Reporting System face challenges such as underreporting. Therefore, as complementary surveillance, data on ADEs are extracted from electronic health record (EHR) notes via natural language processing (NLP). As NLP develops, many up-to-date machine-learning techniques are introduced in this field, such as deep learning and multi-task learning (MTL). However, only a few studies have focused on employing such techniques to extract ADEs. Objective: We aimed to design a deep learning model for extracting ADEs and related information such as medications and indications. Since extraction of ADE-related information includes two steps---named entity recognition and relation extraction---our second objective was to improve the deep learning model using multi-task learning between the two steps. Methods: We employed the dataset from the Medication, Indication and Adverse Drug Events (MADE) 1.0 challenge to train and test our models. This dataset consists of 1089 EHR notes of cancer patients and includes 9 entity types such as Medication, Indication, and ADE and 7 types of relations between these entities. To extract information from the dataset, we proposed a deep-learning model that uses a bidirectional long short-term memory (BiLSTM) conditional random field network to recognize entities and a BiLSTM-Attention network to extract relations. To further improve the deep-learning model, we employed three typical MTL methods, namely, hard parameter sharing, parameter regularization, and task relation learning, to build three MTL models, called HardMTL, RegMTL, and LearnMTL, respectively. Results: Since extraction of ADE-related information is a two-step task, the result of the second step (ie, relation extraction) was used to compare all models. We used microaveraged precision, recall, and F1 as evaluation metrics. Our deep learning model achieved state-of-the-art results (F1=65.9\%), which is significantly higher than that (F1=61.7\%) of the best system in the MADE1.0 challenge. HardMTL further improved the F1 by 0.8\%, boosting the F1 to 66.7\%, whereas RegMTL and LearnMTL failed to boost the performance. Conclusions: Deep learning models can significantly improve the performance of ADE-related information extraction. MTL may be effective for named entity recognition and relation extraction, but it depends on the methods, data, and other factors. Our results can facilitate research on ADE detection, NLP, and machine learning. ", doi="10.2196/12159", url="http://medinform.jmir.org/2018/4/e12159/", url="http://www.ncbi.nlm.nih.gov/pubmed/30478023" } @Article{info:doi/10.2196/11141, author="Rivas, Ryan and Montazeri, Niloofar and Le, XT Nhat and Hristidis, Vagelis", title="Automatic Classification of Online Doctor Reviews: Evaluation of Text Classifier Algorithms", journal="J Med Internet Res", year="2018", month="Nov", day="12", volume="20", number="11", pages="e11141", keywords="patient satisfaction", keywords="patient reported outcome measures", keywords="quality indicators, health care", keywords="supervised machine learning", abstract="Background: An increasing number of doctor reviews are being generated by patients on the internet. These reviews address a diverse set of topics (features), including wait time, office staff, doctor's skills, and bedside manners. Most previous work on automatic analysis of Web-based customer reviews assumes that (1) product features are described unambiguously by a small number of keywords, for example, battery for phones and (2) the opinion for each feature has a positive or negative sentiment. However, in the domain of doctor reviews, this setting is too restrictive: a feature such as visit duration for doctor reviews may be expressed in many ways and does not necessarily have a positive or negative sentiment. Objective: This study aimed to adapt existing and propose novel text classification methods on the domain of doctor reviews. These methods are evaluated on their accuracy to classify a diverse set of doctor review features. Methods: We first manually examined a large number of reviews to extract a set of features that are frequently mentioned in the reviews. Then we proposed a new algorithm that goes beyond bag-of-words or deep learning classification techniques by leveraging natural language processing (NLP) tools. Specifically, our algorithm automatically extracts dependency tree patterns and uses them to classify review sentences. Results: We evaluated several state-of-the-art text classification algorithms as well as our dependency tree--based classifier algorithm on a real-world doctor review dataset. We showed that methods using deep learning or NLP techniques tend to outperform traditional bag-of-words methods. In our experiments, the 2 best methods used NLP techniques; on average, our proposed classifier performed 2.19\% better than an existing NLP-based method, but many of its predictions of specific opinions were incorrect. Conclusions: We conclude that it is feasible to classify doctor reviews. Automatically classifying these reviews would allow patients to easily search for doctors based on their personal preference criteria. ", doi="10.2196/11141", url="http://www.jmir.org/2018/11/e11141/", url="http://www.ncbi.nlm.nih.gov/pubmed/30425030" } @Article{info:doi/10.2196/10497, author="Leroy, Gondy and Gu, Yang and Pettygrove, Sydney and Galindo, K. Maureen and Arora, Ananyaa and Kurzius-Spencer, Margaret", title="Automated Extraction of Diagnostic Criteria From Electronic Health Records for Autism Spectrum Disorders: Development, Evaluation, and Application", journal="J Med Internet Res", year="2018", month="Nov", day="07", volume="20", number="11", pages="e10497", keywords="parser", keywords="natural language processing", keywords="complex entity extraction", keywords="Autism Spectrum Disorder", keywords="DSM", keywords="electronic health records", keywords="decision tree", keywords="machine learning", abstract="Background: Electronic health records (EHRs) bring many opportunities for information utilization. One such use is the surveillance conducted by the Centers for Disease Control and Prevention to track cases of autism spectrum disorder (ASD). This process currently comprises manual collection and review of EHRs of 4- and 8-year old children in 11 US states for the presence of ASD criteria. The work is time-consuming and expensive. Objective: Our objective was to automatically extract from EHRs the description of behaviors noted by the clinicians in evidence of the diagnostic criteria in the Diagnostic and Statistical Manual of Mental Disorders (DSM). Previously, we reported on the classification of entire EHRs as ASD or not. In this work, we focus on the extraction of individual expressions of the different ASD criteria in the text. We intend to facilitate large-scale surveillance efforts for ASD and support analysis of changes over time as well as enable integration with other relevant data. Methods: We developed a natural language processing (NLP) parser to extract expressions of 12 DSM criteria using 104 patterns and 92 lexicons (1787 terms). The parser is rule-based to enable precise extraction of the entities from the text. The entities themselves are encompassed in the EHRs as very diverse expressions of the diagnostic criteria written by different people at different times (clinicians, speech pathologists, among others). Due to the sparsity of the data, a rule-based approach is best suited until larger datasets can be generated for machine learning algorithms. Results: We evaluated our rule-based parser and compared it with a machine learning baseline (decision tree). Using a test set of 6636 sentences (50 EHRs), we found that our parser achieved 76\% precision, 43\% recall (ie, sensitivity), and >99\% specificity for criterion extraction. The performance was better for the rule-based approach than for the machine learning baseline (60\% precision and 30\% recall). For some individual criteria, precision was as high as 97\% and recall 57\%. Since precision was very high, we were assured that criteria were rarely assigned incorrectly, and our numbers presented a lower bound of their presence in EHRs. We then conducted a case study and parsed 4480 new EHRs covering 10 years of surveillance records from the Arizona Developmental Disabilities Surveillance Program. The social criteria (A1 criteria) showed the biggest change over the years. The communication criteria (A2 criteria) did not distinguish the ASD from the non-ASD records. Among behaviors and interests criteria (A3 criteria), 1 (A3b) was present with much greater frequency in the ASD than in the non-ASD EHRs. Conclusions: Our results demonstrate that NLP can support large-scale analysis useful for ASD surveillance and research. In the future, we intend to facilitate detailed analysis and integration of national datasets. ", doi="10.2196/10497", url="https://www.jmir.org/2018/11/e10497/", url="http://www.ncbi.nlm.nih.gov/pubmed/30404767" } @Article{info:doi/10.2196/11085, author="Park, Hyun So and Hong, Hee Song", title="Identification of Primary Medication Concerns Regarding Thyroid Hormone Replacement Therapy From Online Patient Medication Reviews: Text Mining of Social Network Data", journal="J Med Internet Res", year="2018", month="Oct", day="24", volume="20", number="10", pages="e11085", keywords="medication counseling", keywords="social network data", keywords="primary medication concerns", keywords="satisfaction with levothyroxine treatment", abstract="Background: Patients with hypothyroidism report poor health-related quality of life despite having undergone thyroid hormone replacement therapy (THRT). Understanding patient concerns regarding levothyroxine can help improve the treatment outcomes of THRT. Objective: This study aimed to (1) identify the distinctive themes in patient concerns regarding THRT, (2) determine whether patients have unique primary medication concerns specific to their demographics, and (3) determine the predictability of primary medication concerns on patient treatment satisfaction. Methods: We collected patient reviews from WebMD in the United States (1037 reviews about generic levothyroxine and 1075 reviews about the brand version) posted between September 1, 2007, and January 30, 2017. We used natural language processing to identify the themes of medication concerns. Multiple regression analyses were conducted in order to examine the predictability of the primary medication concerns on patient treatment satisfaction. Results: Natural language processing of the patient reviews of levothyroxine posted on a social networking site produced 6 distinctive themes of patient medication concerns related to levothyroxine treatment: how to take the drug, treatment initiation, dose adjustment, symptoms of pain, generic substitutability, and appearance. Patients had different primary medication concerns unique to their gender, age, and treatment duration. Furthermore, treatment satisfaction on levothyroxine depended on what primary medication concerns the patient had. Conclusions: Natural language processing of text content available on social media could identify different themes of patient medication concerns that can be validated in future studies to inform the design of tailored medication counseling for improved patient treatment satisfaction. ", doi="10.2196/11085", url="http://www.jmir.org/2018/10/e11085/", url="http://www.ncbi.nlm.nih.gov/pubmed/30355555" } @Article{info:doi/10.2196/10032, author="Payton, Cobb Fay and Yarger, Kvasny Lynette and Pinter, Thomas Anthony", title="Text Mining Mental Health Reports for Issues Impacting Today's College Students: Qualitative Study", journal="JMIR Ment Health", year="2018", month="Oct", day="23", volume="5", number="4", pages="e10032", keywords="text mining", keywords="mental health", keywords="college students", keywords="information and communication technologies", abstract="Background: A growing number of college students are experiencing personal circumstances or encountering situations that feel overwhelming and negatively affect their academic studies and other aspects of life on campus. To meet this growing demand for counseling services, US colleges and universities are offering a growing variety of mental health services that provide support and services to students in distress. Objective: In this study, we explore mental health issues impacting college students using a corpus of news articles, foundation reports, and media stories. Mental health concerns within this population have been on the rise. Uncovering the most salient themes articulated in current news and literature reports can better enable higher education institutions to provide health services to its students. Methods: We used SAS Text Miner to analyze 165 references that were published from 2010 to 2015 and focused on mental health among college students. Key clusters were identified to reveal the themes that were most significant to the topic. Results: The final cluster analysis yielded six themes in students' mental health experiences in higher education (ie, age, race, crime, student services, aftermath, victim). Two themes, increasing demand for student services provided by campus counseling centers (113/165, 68.5\%) and the increased mental health risks faced by racial and ethnic minorities (30/165, 18.2\%), dominated the discourse. Conclusions: Higher education institutions are actively engaged in extending mental health services and offering targeted outreach to students of color. Cluster analysis identified that institutions are devoting more and innovative resources in response to the growing number students who experience mental health concerns. However, there is a need to focus on proactive approaches to mitigate the causes of mental health and the aftermath of a negative experience, particularly violence and sexual assault. Such strategies can potentially influence how students navigate their health information seeking and how information and communication technologies, including mobile apps, can partially address the needs of college students. ", doi="10.2196/10032", url="http://mental.jmir.org/2018/4/e10032/", url="http://www.ncbi.nlm.nih.gov/pubmed/30355565" } @Article{info:doi/10.2196/10726, author="Zolnoori, Maryam and Fung, Wah Kin and Fontelo, Paul and Kharrazi, Hadi and Faiola, Anthony and Wu, Shirley Yi Shuan and Stoffel, Virginia and Patrick, Timothy", title="Identifying the Underlying Factors Associated With Patients' Attitudes Toward Antidepressants: Qualitative and Quantitative Analysis of Patient Drug Reviews", journal="JMIR Ment Health", year="2018", month="Sep", day="30", volume="5", number="4", pages="e10726", keywords="medication adherence", keywords="attitude", keywords="perception", keywords="antidepressive agents", keywords="patient-centered care", keywords="chronic disease", keywords="depression", keywords="community networks", keywords="internet", keywords="social media", keywords="data mining", keywords="framework method", abstract="Background: Nonadherence to antidepressants is a major obstacle to deriving antidepressants' therapeutic benefits, resulting in significant burdens on the individuals and the health care system. Several studies have shown that nonadherence is weakly associated with personal and clinical variables but strongly associated with patients' beliefs and attitudes toward medications. Patients' drug review posts in online health care communities might provide a significant insight into patients' attitude toward antidepressants and could be used to address the challenges of self-report methods such as patients' recruitment. Objective: The aim of this study was to use patient-generated data to identify factors affecting the patient's attitude toward 4 antidepressants drugs (sertraline [Zoloft], escitalopram [Lexapro], duloxetine [Cymbalta], and venlafaxine [Effexor XR]), which in turn, is a strong determinant of treatment nonadherence. We hypothesized that clinical variables (drug effectiveness; adverse drug reactions, ADRs; perceived distress from ADRs, ADR-PD; and duration of treatment) and personal variables (age, gender, and patients' knowledge about medications) are associated with patients' attitude toward antidepressants, and experience of ADRs and drug ineffectiveness are strongly associated with negative attitude. Methods: We used both qualitative and quantitative methods to analyze the dataset. Patients' drug reviews were randomly selected from a health care forum called askapatient. The Framework method was used to build the analytical framework containing the themes for developing structured data from the qualitative drug reviews. Then, 4 annotators coded the drug reviews at the sentence level using the analytical framework. After managing missing values, we used chi-square and ordinal logistic regression to test and model the association between variables and attitude. Results: A total of 892 reviews posted between February 2001 and September 2016 were analyzed. Most of the patients were females (680/892, 76.2\%) and aged less than 40 years (540/892, 60.5\%). Patient attitude was significantly (P<.001) associated with experience of ADRs, ADR-PD, drug effectiveness, perceived lack of knowledge, experience of withdrawal, and duration of usage, whereas oth age (F4,874=0.72, P=.58) and gender ($\chi$24=2.7, P=.21) were not found to be associated with patient attitudes. Moreover, modeling the relationship between variables and attitudes showed that drug effectiveness and perceived distress from adverse drug reactions were the 2 most significant factors affecting patients' attitude toward antidepressants. Conclusions: Patients' self-report experiences of medications in online health care communities can provide a direct insight into the underlying factors associated with patients' perceptions and attitudes toward antidepressants. However, it cannot be used as a replacement for self-report methods because of the lack of information for some of the variables, colloquial language, and the unstructured format of the reports. ", doi="10.2196/10726", url="http://mental.jmir.org/2018/4/e10726/", url="http://www.ncbi.nlm.nih.gov/pubmed/30287417" } @Article{info:doi/10.2196/11021, author="Usui, Misa and Aramaki, Eiji and Iwao, Tomohide and Wakamiya, Shoko and Sakamoto, Tohru and Mochizuki, Mayumi", title="Extraction and Standardization of Patient Complaints from Electronic Medication Histories for Pharmacovigilance: Natural Language Processing Analysis in Japanese", journal="JMIR Med Inform", year="2018", month="Sep", day="27", volume="6", number="3", pages="e11021", keywords="adverse drug events", keywords="natural language processing", keywords="medical informatics", keywords="medication history", keywords="pharmacovigilance", abstract="Background: Despite the growing number of studies using natural language processing for pharmacovigilance, there are few reports on manipulating free text patient information in Japanese. Objective: This study aimed to establish a method of extracting and standardizing patient complaints from electronic medication histories accumulated in a Japanese community pharmacy for the detection of possible adverse drug event (ADE) signals. Methods: Subjective information included in electronic medication history data provided by a Japanese pharmacy operating in Hiroshima, Japan from September 1, 2015 to August 31, 2016, was used as patients' complaints. We formulated search rules based on morphological analysis and daily (nonmedical) speech and developed a system that automatically executes the search rules and annotates free text data with International Classification of Diseases, Tenth Revision (ICD-10) codes. The performance of the system was evaluated through comparisons with data manually annotated by health care workers for a data set of 5000 complaints. Results: Of 5000 complaints, the system annotated 2236 complaints with ICD-10 codes, whereas health care workers annotated 2348 statements. There was a match in the annotation of 1480 complaints between the system and manual work. System performance was .66 regarding precision, .63 in recall, and .65 for the F-measure. Conclusions: Our results suggest that the system may be helpful in extracting and standardizing patients' speech related to symptoms from massive amounts of free text data, replacing manual work. After improving the extraction accuracy, we expect to utilize this system to detect signals of possible ADEs from patients' complaints in the future. ", doi="10.2196/11021", url="http://medinform.jmir.org/2018/3/e11021/", url="http://www.ncbi.nlm.nih.gov/pubmed/30262450" } @Article{info:doi/10.2196/11548, author="Karystianis, George and Adily, Armita and Schofield, Peter and Knight, Lee and Galdon, Clara and Greenberg, David and Jorm, Louisa and Nenadic, Goran and Butler, Tony", title="Automatic Extraction of Mental Health Disorders From Domestic Violence Police Narratives: Text Mining Study", journal="J Med Internet Res", year="2018", month="Sep", day="13", volume="20", number="9", pages="e11548", keywords="text mining", keywords="rule-based approach", keywords="police narratives", keywords="mental health disorders", keywords="domestic violence", abstract="Background: Vast numbers of domestic violence (DV) incidents are attended by the New South Wales Police Force each year in New South Wales and recorded as both structured quantitative data and unstructured free text in the WebCOPS (Web-based interface for the Computerised Operational Policing System) database regarding the details of the incident, the victim, and person of interest (POI). Although the structured data are used for reporting purposes, the free text remains untapped for DV reporting and surveillance purposes. Objective: In this paper, we explore whether text mining can automatically identify mental health disorders from this unstructured text. Methods: We used a training set of 200 DV recorded events to design a knowledge-driven approach based on lexical patterns in text suggesting mental health disorders for POIs and victims. Results: The precision returned from an evaluation set of 100 DV events was 97.5\% and 87.1\% for mental health disorders related to POIs and victims, respectively. After applying our approach to a large-scale corpus of almost a half million DV events, we identified 77,995 events (15.83\%) that mentioned mental health disorders, with 76.96\% (60,032/77,995) of those linked to POIs versus 16.47\% (12,852/77,995) for the victims and 6.55\% (5111/77,995) for both. Depression was the most common mental health disorder mentioned in both victims (22.25\%, 3269) and POIs (18.70\%, 8944), followed by alcohol abuse for POIs (12.19\%, 5829) and various anxiety disorders (eg, panic disorder, generalized anxiety disorder) for victims (11.66\%, 1714). Conclusions: The results suggest that text mining can automatically extract targeted information from police-recorded DV events to support further public health research into the nexus between mental health disorders and DV. ", doi="10.2196/11548", url="http://www.jmir.org/2018/9/e11548/", url="http://www.ncbi.nlm.nih.gov/pubmed/30213778" } @Article{info:doi/10.2196/11510, author="Bickmore, W. Timothy and Trinh, Ha and Olafsson, Stefan and O'Leary, K. Teresa and Asadi, Reza and Rickles, M. Nathaniel and Cruz, Ricardo", title="Patient and Consumer Safety Risks When Using Conversational Assistants for Medical Information: An Observational Study of Siri, Alexa, and Google Assistant", journal="J Med Internet Res", year="2018", month="Sep", day="04", volume="20", number="9", pages="e11510", keywords="conversational assistant", keywords="conversational interface", keywords="dialogue system", keywords="medical error", keywords="patient safety", abstract="Background: Conversational assistants, such as Siri, Alexa, and Google Assistant, are ubiquitous and are beginning to be used as portals for medical services. However, the potential safety issues of using conversational assistants for medical information by patients and consumers are not understood. Objective: To determine the prevalence and nature of the harm that could result from patients or consumers using conversational assistants for medical information. Methods: Participants were given medical problems to pose to Siri, Alexa, or Google Assistant, and asked to determine an action to take based on information from the system. Assignment of tasks and systems were randomized across participants, and participants queried the conversational assistants in their own words, making as many attempts as needed until they either reported an action to take or gave up. Participant-reported actions for each medical task were rated for patient harm using an Agency for Healthcare Research and Quality harm scale. Results: Fifty-four subjects completed the study with a mean age of 42 years (SD 18). Twenty-nine (54\%) were female, 31 (57\%) Caucasian, and 26 (50\%) were college educated. Only 8 (15\%) reported using a conversational assistant regularly, while 22 (41\%) had never used one, and 24 (44\%) had tried one ``a few times.`` Forty-four (82\%) used computers regularly. Subjects were only able to complete 168 (43\%) of their 394 tasks. Of these, 49 (29\%) reported actions that could have resulted in some degree of patient harm, including 27 (16\%) that could have resulted in death. Conclusions: Reliance on conversational assistants for actionable medical information represents a safety risk for patients and consumers. Patients should be cautioned to not use these technologies for answers to medical questions they intend to act on without further consultation from a health care provider. ", doi="10.2196/11510", url="http://www.jmir.org/2018/9/e11510/", url="http://www.ncbi.nlm.nih.gov/pubmed/30181110" } @Article{info:doi/10.2196/10779, author="Kloehn, Nicholas and Leroy, Gondy and Kauchak, David and Gu, Yang and Colina, Sonia and Yuan, P. Nicole and Revere, Debra", title="Improving Consumer Understanding of Medical Text: Development and Validation of a New SubSimplify Algorithm to Automatically Generate Term Explanations in English and Spanish", journal="J Med Internet Res", year="2018", month="Aug", day="02", volume="20", number="8", pages="e10779", keywords="text simplification", keywords="health literacy", keywords="natural language processing", keywords="terminology", abstract="Background: While health literacy is important for people to maintain good health and manage diseases, medical educational texts are often written beyond the reading level of the average individual. To mitigate this disconnect, text simplification research provides methods to increase readability and, therefore, comprehension. One method of text simplification is to isolate particularly difficult terms within a document and replace them with easier synonyms (lexical simplification) or an explanation in plain language (semantic simplification). Unfortunately, existing dictionaries are seldom complete, and consequently, resources for many difficult terms are unavailable. This is the case for English and Spanish resources. Objective: Our objective was to automatically generate explanations for difficult terms in both English and Spanish when they are not covered by existing resources. The system we present combines existing resources for explanation generation using a novel algorithm (SubSimplify) to create additional explanations. Methods: SubSimplify uses word-level parsing techniques and specialized medical affix dictionaries to identify the morphological units of a term and then source their definitions. While the underlying resources are different, SubSimplify applies the same principles in both languages. To evaluate our approach, we used term familiarity to identify difficult terms in English and Spanish and then generated explanations for them. For each language, we extracted 400 difficult terms from two different article types (General and Medical topics) balanced for frequency. For English terms, we compared SubSimplify's explanation with the explanations from the Consumer Health Vocabulary, WordNet Synonyms and Summaries, as well as Word Embedding Vector (WEV) synonyms. For Spanish terms, we compared the explanation to WordNet Summaries and WEV Embedding synonyms. We evaluated quality, coverage, and usefulness for the simplification provided for each term. Quality is the average score from two subject experts on a 1-4 Likert scale (two per language) for the synonyms or explanations provided by the source. Coverage is the number of terms for which a source could provide an explanation. Usefulness is the same expert score, however, with a 0 assigned when no explanations or synonyms were available for a term. Results: SubSimplify resulted in quality scores of 1.64 for English (P<.001) and 1.49 for Spanish (P<.001), which were lower than those of existing resources (Consumer Health Vocabulary [CHV]=2.81). However, in coverage, SubSimplify outperforms all existing written resources, increasing the coverage from 53.0\% to 80.5\% in English and from 20.8\% to 90.8\% in Spanish (P<.001). This result means that the usefulness score of SubSimplify (1.32; P<.001) is greater than that of most existing resources (eg, CHV=0.169). Conclusions: Our approach is intended as an additional resource to existing, manually created resources. It greatly increases the number of difficult terms for which an easier alternative can be made available, resulting in greater actual usefulness. ", doi="10.2196/10779", url="http://www.jmir.org/2018/8/e10779/", url="http://www.ncbi.nlm.nih.gov/pubmed/30072361" } @Article{info:doi/10.2196/10961, author="Suominen, Hanna and Kelly, Liadh and Goeuriot, Lorraine", title="Scholarly Influence of the Conference and Labs of the Evaluation Forum eHealth Initiative: Review and Bibliometric Study of the 2012 to 2017 Outcomes", journal="JMIR Res Protoc", year="2018", month="Jul", day="27", volume="7", number="7", pages="e10961", keywords="evaluation studies as topic", keywords="health records", keywords="information extraction", keywords="information storage and retrieval", keywords="information visualization", keywords="patient education as topic", keywords="speech recognition", keywords="systematic reviews", keywords="test-set generation", keywords="text classification", abstract="Background: The eHealth initiative of the Conference and Labs of the Evaluation Forum (CLEF) has aimed since 2012 to provide researchers working on health text analytics with annual workshops, shared development challenges and tasks, benchmark datasets, and software for processing and evaluation. In 2012, it ran asa scientific workshop with the aim of establishing an evaluation lab, and since 2013, this annual workshop has been supplemented with 3 or more preceding labs each year. An evaluation lab is an activity where the participating individuals or teams' goal is to solve the same problem, typically using the same dataset in a given time frame. The overall purpose of this initiative is to support patients, their next of kin, clinical staff, health scientists, and health care policy makers in accessing, understanding, using, and authoring health information in a multilingual setting. In the CLEF eHealth 2013 to 2017 installations, the aim was to address patient-centric text processing. From 2015, the scope was also extended to aid bothpatients' understanding and clinicians' authoring of various types of medical content. CLEF eHealth 2017 introduced a new pilot task on technology-assisted reviews (TARs) in empirical medicine in order to support health scientists and health care policymakers' information access. Objectives: This original research paper reports on the outcomes of the first 6 installations of CLEF eHealth from 2012 to 2017. The focus is on measuring and analyzing the scholarly influence by reviewing CLEF eHealth papers and their citations. Methods: A review and bibliometric study of the CLEF eHealth proceedings, working notes, and author-declared paper extensions were conducted. Citation content analysis was used for the publications and their citations collected from Google Scholar. Results: As many as 718 teams registered their interest in the tasks, leading to 130 teams submitting to the 15 tasks. A total of 184 papers using CLEF eHealth data generated 1299 citations, yielding a total scholarly citation influence of almost 963,000 citations for the 741 coauthors, and included authors from 33 countries across the world. Eight tasks produced statistically significant improvements (2, 3, and 3 times with P<.001, P=.009, and P=.04, respectively) in processing quality by at least 1 out of the top 3 methods. Conclusions: These substantial participation numbers, large citation counts, and significant performance improvements encourage continuing to develop these technologies to address patient needs. Consequently, data and tools have been opened for future research and development, and the CLEF eHealth initiative continues to run new challenges. ", doi="10.2196/10961", url="http://www.researchprotocols.org/2018/7/e10961/", url="http://www.ncbi.nlm.nih.gov/pubmed/30054262" } @Article{info:doi/10.2196/jmir.9413, author="Du, Jingcheng and Tang, Lu and Xiang, Yang and Zhi, Degui and Xu, Jun and Song, Hsing-Yi and Tao, Cui", title="Public Perception Analysis of Tweets During the 2015 Measles Outbreak: Comparative Study Using Convolutional Neural Network Models", journal="J Med Internet Res", year="2018", month="Jul", day="09", volume="20", number="7", pages="e236", keywords="convolutional neural networks", keywords="social media", keywords="measles", keywords="public perception", abstract="Background: Timely understanding of public perceptions allows public health agencies to provide up-to-date responses to health crises such as infectious diseases outbreaks. Social media such as Twitter provide an unprecedented way for the prompt assessment of the large-scale public response. Objective: The aims of this study were to develop a scheme for a comprehensive public perception analysis of a measles outbreak based on Twitter data and demonstrate the superiority of the convolutional neural network (CNN) models (compared with conventional machine learning methods) on measles outbreak-related tweets classification tasks with a relatively small and highly unbalanced gold standard training set. Methods: We first designed a comprehensive scheme for the analysis of public perception of measles based on tweets, including 3 dimensions: discussion themes, emotions expressed, and attitude toward vaccination. All 1,154,156 tweets containing the word ``measles'' posted between December 1, 2014, and April 30, 2015, were purchased and downloaded from DiscoverText.com. Two expert annotators curated a gold standard of 1151 tweets (approximately 0.1\% of all tweets) based on the 3-dimensional scheme. Next, a tweet classification system based on the CNN framework was developed. We compared the performance of the CNN models to those of 4 conventional machine learning models and another neural network model. We also compared the impact of different word embeddings configurations for the CNN models: (1) Stanford GloVe embedding trained on billions of tweets in the general domain, (2) measles-specific embedding trained on our 1 million measles related tweets, and (3) a combination of the 2 embeddings. Results: Cohen kappa intercoder reliability values for the annotation were: 0.78, 0.72, and 0.80 on the 3 dimensions, respectively. Class distributions within the gold standard were highly unbalanced for all dimensions. The CNN models performed better on all classification tasks than k-nearest neighbors, na{\"i}ve Bayes, support vector machines, or random forest. Detailed comparison between support vector machines and the CNN models showed that the major contributor to the overall superiority of the CNN models is the improvement on recall, especially for classes with low occurrence. The CNN model with the 2 embedding combination led to better performance on discussion themes and emotions expressed (microaveraging F1 scores of 0.7811 and 0.8592, respectively), while the CNN model with Stanford embedding achieved best performance on attitude toward vaccination (microaveraging F1 score of 0.8642). Conclusions: The proposed scheme can successfully classify the public's opinions and emotions in multiple dimensions, which would facilitate the timely understanding of public perceptions during the outbreak of an infectious disease. Compared with conventional machine learning methods, our CNN models showed superiority on measles-related tweet classification tasks with a relatively small and highly unbalanced gold standard. With the success of these tasks, our proposed scheme and CNN-based tweets classification system is expected to be useful for the analysis of tweets about other infectious diseases such as influenza and Ebola. ", doi="10.2196/jmir.9413", url="http://www.jmir.org/2018/7/e236/", url="http://www.ncbi.nlm.nih.gov/pubmed/29986843" } @Article{info:doi/10.2196/jmir.9702, author="Guetterman, C. Timothy and Chang, Tammy and DeJonckheere, Melissa and Basu, Tanmay and Scruggs, Elizabeth and Vydiswaran, Vinod V. G.", title="Augmenting Qualitative Text Analysis with Natural Language Processing: Methodological Study", journal="J Med Internet Res", year="2018", month="Jun", day="29", volume="20", number="6", pages="e231", keywords="qualitative research", keywords="natural language processing", keywords="text data", keywords="methodology", keywords="coding", abstract="Background: Qualitative research methods are increasingly being used across disciplines because of their ability to help investigators understand the perspectives of participants in their own words. However, qualitative analysis is a laborious and resource-intensive process. To achieve depth, researchers are limited to smaller sample sizes when analyzing text data. One potential method to address this concern is natural language processing (NLP). Qualitative text analysis involves researchers reading data, assigning code labels, and iteratively developing findings; NLP has the potential to automate part of this process. Unfortunately, little methodological research has been done to compare automatic coding using NLP techniques and qualitative coding, which is critical to establish the viability of NLP as a useful, rigorous analysis procedure. Objective: The purpose of this study was to compare the utility of a traditional qualitative text analysis, an NLP analysis, and an augmented approach that combines qualitative and NLP methods. Methods: We conducted a 2-arm cross-over experiment to compare qualitative and NLP approaches to analyze data generated through 2 text (short message service) message survey questions, one about prescription drugs and the other about police interactions, sent to youth aged 14-24 years. We randomly assigned a question to each of the 2 experienced qualitative analysis teams for independent coding and analysis before receiving NLP results. A third team separately conducted NLP analysis of the same 2 questions. We examined the results of our analyses to compare (1) the similarity of findings derived, (2) the quality of inferences generated, and (3) the time spent in analysis. Results: The qualitative-only analysis for the drug question (n=58) yielded 4 major findings, whereas the NLP analysis yielded 3 findings that missed contextual elements. The qualitative and NLP-augmented analysis was the most comprehensive. For the police question (n=68), the qualitative-only analysis yielded 4 primary findings and the NLP-only analysis yielded 4 slightly different findings. Again, the augmented qualitative and NLP analysis was the most comprehensive and produced the highest quality inferences, increasing our depth of understanding (ie, details and frequencies). In terms of time, the NLP-only approach was quicker than the qualitative-only approach for the drug (120 vs 270 minutes) and police (40 vs 270 minutes) questions. An approach beginning with qualitative analysis followed by qualitative- or NLP-augmented analysis took longer time than that beginning with NLP for both drug (450 vs 240 minutes) and police (390 vs 220 minutes) questions. Conclusions: NLP provides both a foundation to code qualitatively more quickly and a method to validate qualitative findings. NLP methods were able to identify major themes found with traditional qualitative analysis but were not useful in identifying nuances. Traditional qualitative text analysis added important details and context. ", doi="10.2196/jmir.9702", url="http://www.jmir.org/2018/6/e231/", url="http://www.ncbi.nlm.nih.gov/pubmed/29959110" } @Article{info:doi/10.2196/10042, author="Fukuoka, Yoshimi and Lindgren, G. Teri and Mintz, Dov Yonatan and Hooper, Julie and Aswani, Anil", title="Applying Natural Language Processing to Understand Motivational Profiles for Maintaining Physical Activity After a Mobile App and Accelerometer-Based Intervention: The mPED Randomized Controlled Trial", journal="JMIR Mhealth Uhealth", year="2018", month="Jun", day="20", volume="6", number="6", pages="e10042", keywords="mobile apps", keywords="physical activity", keywords="fitness trackers", keywords="women", keywords="maintenance", keywords="accelerometer", keywords="randomized controlled trial", keywords="motivation", keywords="barriers", keywords="behavioral change", abstract="Background: Regular physical activity is associated with reduced risk of chronic illnesses. Despite various types of successful physical activity interventions, maintenance of activity over the long term is extremely challenging. Objective: The aims of this original paper are to 1) describe physical activity engagement post intervention, 2) identify motivational profiles using natural language processing (NLP) and clustering techniques in a sample of women who completed the physical activity intervention, and 3) compare sociodemographic and clinical data among these identified cluster groups. Methods: In this cross-sectional analysis of 203 women completing a 12-month study exit (telephone) interview in the mobile phone-based physical activity education study were examined. The mobile phone-based physical activity education study was a randomized, controlled trial to test the efficacy of the app and accelerometer intervention and its sustainability over a 9-month period. All subjects returned the accelerometer and stopped accessing the app at the last 9-month research office visit. Physical engagement and motivational profiles were assessed by both closed and open-ended questions, such as ``Since your 9-month study visit, has your physical activity been more, less, or about the same (compared to the first 9 months of the study)?'' and, ``What motivates you the most to be physically active?'' NLP and cluster analysis were used to classify motivational profiles. Descriptive statistics were used to compare participants' baseline characteristics among identified groups. Results: Approximately half of the 2 intervention groups (Regular and Plus) reported that they were still wearing an accelerometer and engaging in brisk walking as they were directed during the intervention phases. These numbers in the 2 intervention groups were much higher than the control group (overall P=.01 and P=.003, respectively). Three clusters were identified through NLP and named as the Weight Loss group (n=19), the Illness Prevention group (n=138), and the Health Promotion group (n=46). The Weight Loss group was significantly younger than the Illness Prevention and Health Promotion groups (overall P<.001). The Illness Prevention group had a larger number of Caucasians as compared to the Weight Loss group (P=.001), which was composed mostly of those who identified as African American, Hispanic, or mixed race. Additionally, the Health Promotion group tended to have lower BMI scores compared to the Illness Prevention group (overall P=.02). However, no difference was noted in the baseline moderate-to-vigorous intensity activity level among the 3 groups (overall P>.05). Conclusions: The findings could be relevant to tailoring a physical activity maintenance intervention. Furthermore, the findings from NLP and cluster analysis are useful methods to analyze short free text to differentiate motivational profiles. As more sophisticated NL tools are developed in the future, the potential of NLP application in behavioral research will broaden. Trial Registration: ClinicalTrials.gov NCT01280812; https://clinicaltrials.gov/ct2/show/NCT01280812 (Archived by WebCite at http://www.webcitation.org/70IkGagAJ) ", doi="10.2196/10042", url="http://mhealth.jmir.org/2018/6/e10042/", url="http://www.ncbi.nlm.nih.gov/pubmed/29925491" } @Article{info:doi/10.2196/medinform.8204, author="Hardjojo, Antony and Gunachandran, Arunan and Pang, Long and Abdullah, Bin Mohammed Ridzwan and Wah, Win and Chong, Chen Joash Wen and Goh, Hui Ee and Teo, Huang Sok and Lim, Gilbert and Lee, Li Mong and Hsu, Wynne and Lee, Vernon and Chen, I-Cheng Mark and Wong, Franco and Phang, King Jonathan Siung", title="Validation of a Natural Language Processing Algorithm for Detecting Infectious Disease Symptoms in Primary Care Electronic Medical Records in Singapore", journal="JMIR Med Inform", year="2018", month="Jun", day="11", volume="6", number="2", pages="e36", keywords="natural language processing", keywords="communicable diseases", keywords="epidemiology", keywords="surveillance", keywords="syndromic surveillance", keywords="electronic health records", abstract="Background: Free-text clinical records provide a source of information that complements traditional disease surveillance. To electronically harness these records, they need to be transformed into codified fields by natural language processing algorithms. Objective: The aim of this study was to develop, train, and validate Clinical History Extractor for Syndromic Surveillance (CHESS), an natural language processing algorithm to extract clinical information from free-text primary care records. Methods: CHESS is a keyword-based natural language processing algorithm to extract 48 signs and symptoms suggesting respiratory infections, gastrointestinal infections, constitutional, as well as other signs and symptoms potentially associated with infectious diseases. The algorithm also captured the assertion status (affirmed, negated, or suspected) and symptom duration. Electronic medical records from the National Healthcare Group Polyclinics, a major public sector primary care provider in Singapore, were randomly extracted and manually reviewed by 2 human reviewers, with a third reviewer as the adjudicator. The algorithm was evaluated based on 1680 notes against the human-coded result as the reference standard, with half of the data used for training and the other half for validation. Results: The symptoms most commonly present within the 1680 clinical records at the episode level were those typically present in respiratory infections such as cough (744/7703, 9.66\%), sore throat (591/7703, 7.67\%), rhinorrhea (552/7703, 7.17\%), and fever (928/7703, 12.04\%). At the episode level, CHESS had an overall performance of 96.7\% precision and 97.6\% recall on the training dataset and 96.0\% precision and 93.1\% recall on the validation dataset. Symptoms suggesting respiratory and gastrointestinal infections were all detected with more than 90\% precision and recall. CHESS correctly assigned the assertion status in 97.3\%, 97.9\%, and 89.8\% of affirmed, negated, and suspected signs and symptoms, respectively (97.6\% overall accuracy). Symptom episode duration was correctly identified in 81.2\% of records with known duration status. Conclusions: We have developed an natural language processing algorithm dubbed CHESS that achieves good performance in extracting signs and symptoms from primary care free-text clinical records. In addition to the presence of symptoms, our algorithm can also accurately distinguish affirmed, negated, and suspected assertion statuses and extract symptom durations. ", doi="10.2196/medinform.8204", url="http://medinform.jmir.org/2018/2/e36/", url="http://www.ncbi.nlm.nih.gov/pubmed/29907560" } @Article{info:doi/10.2196/10136, author="Kornfield, Rachel and Sarma, K. Prathusha and Shah, V. Dhavan and McTavish, Fiona and Landucci, Gina and Pe-Romashko, Klaren and Gustafson, H. David", title="Detecting Recovery Problems Just in Time: Application of Automated Linguistic Analysis and Supervised Machine Learning to an Online Substance Abuse Forum", journal="J Med Internet Res", year="2018", month="Jun", day="12", volume="20", number="6", pages="e10136", keywords="self-help groups", keywords="substance-related disorders", keywords="supervised machine learning", keywords="social support", keywords="health communication", abstract="Background: Online discussion forums allow those in addiction recovery to seek help through text-based messages, including when facing triggers to drink or use drugs. Trained staff (or ``moderators'') may participate within these forums to offer guidance and support when participants are struggling but must expend considerable effort to continually review new content. Demands on moderators limit the scalability of evidence-based digital health interventions. Objective: Automated identification of recovery problems could allow moderators to engage in more timely and efficient ways with participants who are struggling. This paper aimed to investigate whether computational linguistics and supervised machine learning can be applied to successfully flag, in real time, those discussion forum messages that moderators find most concerning. Methods: Training data came from a trial of a mobile phone-based health intervention for individuals in recovery from alcohol use disorder, with human coders labeling discussion forum messages according to whether or not authors mentioned problems in their recovery process. Linguistic features of these messages were extracted via several computational techniques: (1) a Bag-of-Words approach, (2) the dictionary-based Linguistic Inquiry and Word Count program, and (3) a hybrid approach combining the most important features from both Bag-of-Words and Linguistic Inquiry and Word Count. These features were applied within binary classifiers leveraging several methods of supervised machine learning: support vector machines, decision trees, and boosted decision trees. Classifiers were evaluated in data from a later deployment of the recovery support intervention. Results: To distinguish recovery problem disclosures, the Bag-of-Words approach relied on domain-specific language, including words explicitly linked to substance use and mental health (``drink,'' ``relapse,'' ``depression,'' and so on), whereas the Linguistic Inquiry and Word Count approach relied on language characteristics such as tone, affect, insight, and presence of quantifiers and time references, as well as pronouns. A boosted decision tree classifier, utilizing features from both Bag-of-Words and Linguistic Inquiry and Word Count performed best in identifying problems disclosed within the discussion forum, achieving 88\% sensitivity and 82\% specificity in a separate cohort of patients in recovery. Conclusions: Differences in language use can distinguish messages disclosing recovery problems from other message types. Incorporating machine learning models based on language use allows real-time flagging of concerning content such that trained staff may engage more efficiently and focus their attention on time-sensitive issues. ", doi="10.2196/10136", url="http://www.jmir.org/2018/6/e10136/", url="http://www.ncbi.nlm.nih.gov/pubmed/29895517" } @Article{info:doi/10.2196/publichealth.8214, author="Bollegala, Danushka and Maskell, Simon and Sloane, Richard and Hajne, Joanna and Pirmohamed, Munir", title="Causality Patterns for Detecting Adverse Drug Reactions From Social Media: Text Mining Approach", journal="JMIR Public Health Surveill", year="2018", month="May", day="09", volume="4", number="2", pages="e51", keywords="machine learning", keywords="ADR detection", keywords="causality", keywords="lexical patterns", keywords="causality detection", keywords="support vector machines", abstract="Background: Detecting adverse drug reactions (ADRs) is an important task that has direct implications for the use of that drug. If we can detect previously unknown ADRs as quickly as possible, then this information can be provided to the regulators, pharmaceutical companies, and health care organizations, thereby potentially reducing drug-related morbidity and saving lives of many patients. A promising approach for detecting ADRs is to use social media platforms such as Twitter and Facebook. A high level of correlation between a drug name and an event may be an indication of a potential adverse reaction associated with that drug. Although numerous association measures have been proposed by the signal detection community for identifying ADRs, these measures are limited in that they detect correlations but often ignore causality. Objective: This study aimed to propose a causality measure that can detect an adverse reaction that is caused by a drug rather than merely being a correlated signal. Methods: To the best of our knowledge, this was the first causality-sensitive approach for detecting ADRs from social media. Specifically, the relationship between a drug and an event was represented using a set of automatically extracted lexical patterns. We then learned the weights for the extracted lexical patterns that indicate their reliability for expressing an adverse reaction of a given drug. Results: Our proposed method obtains an ADR detection accuracy of 74\% on a large-scale manually annotated dataset of tweets, covering a standard set of drugs and adverse reactions. Conclusions: By using lexical patterns, we can accurately detect the causality between drugs and adverse reaction--related events. ", doi="10.2196/publichealth.8214", url="http://publichealth.jmir.org/2018/2/e51/", url="http://www.ncbi.nlm.nih.gov/pubmed/29743155" } @Article{info:doi/10.2196/jmir.9610, author="Henderson, Jette and Ke, Junyuan and Ho, C. Joyce and Ghosh, Joydeep and Wallace, C. Byron", title="Phenotype Instance Verification and Evaluation Tool (PIVET): A Scaled Phenotype Evidence Generation Framework Using Web-Based Medical Literature", journal="J Med Internet Res", year="2018", month="May", day="04", volume="20", number="5", pages="e164", keywords="medical informatics", keywords="medical subject headings", keywords="algorithms", keywords="clustering analysis", keywords="classification", keywords="databases as topic", keywords="information storage and retrieval", keywords="MEDLINE", abstract="Background: Researchers are developing methods to automatically extract clinically relevant and useful patient characteristics from raw healthcare datasets. These characteristics, often capturing essential properties of patients with common medical conditions, are called computational phenotypes. Being generated by automated or semiautomated, data-driven methods, such potential phenotypes need to be validated as clinically meaningful (or not) before they are acceptable for use in decision making. Objective: The objective of this study was to present Phenotype Instance Verification and Evaluation Tool (PIVET), a framework that uses co-occurrence analysis on an online corpus of publically available medical journal articles to build clinical relevance evidence sets for user-supplied phenotypes. PIVET adopts a conceptual framework similar to the pioneering prototype tool PheKnow-Cloud that was developed for the phenotype validation task. PIVET completely refactors each part of the PheKnow-Cloud pipeline to deliver vast improvements in speed without sacrificing the quality of the insights PheKnow-Cloud achieved. Methods: PIVET leverages indexing in NoSQL databases to efficiently generate evidence sets. Specifically, PIVET uses a succinct representation of the phenotypes that corresponds to the index on the corpus database and an optimized co-occurrence algorithm inspired by the Aho-Corasick algorithm. We compare PIVET's phenotype representation with PheKnow-Cloud's by using PheKnow-Cloud's experimental setup. In PIVET's framework, we also introduce a statistical model trained on domain expert--verified phenotypes to automatically classify phenotypes as clinically relevant or not. Additionally, we show how the classification model can be used to examine user-supplied phenotypes in an online, rather than batch, manner. Results: PIVET maintains the discriminative power of PheKnow-Cloud in terms of identifying clinically relevant phenotypes for the same corpus with which PheKnow-Cloud was originally developed, but PIVET's analysis is an order of magnitude faster than that of PheKnow-Cloud. Not only is PIVET much faster, it can be scaled to a larger corpus and still retain speed. We evaluated multiple classification models on top of the PIVET framework and found ridge regression to perform best, realizing an average F1 score of 0.91 when predicting clinically relevant phenotypes. Conclusions: Our study shows that PIVET improves on the most notable existing computational tool for phenotype validation in terms of speed and automation and is comparable in terms of accuracy. ", doi="10.2196/jmir.9610", url="http://www.jmir.org/2018/5/e164/", url="http://www.ncbi.nlm.nih.gov/pubmed/29728351" } @Article{info:doi/10.2196/publichealth.8552, author="Dancy-Scott, Nicole and Dutcher, A. Gale and Keselman, Alla and Hochstein, Colette and Copty, Christina and Ben-Senia, Diane and Rajan, Sampada and Asencio, Guadalupe Maria and Choi, Jongwon Jason", title="Trends in HIV Terminology: Text Mining and Data Visualization Assessment of International AIDS Conference Abstracts Over 25 Years", journal="JMIR Public Health Surveill", year="2018", month="May", day="04", volume="4", number="2", pages="e50", keywords="acquired immunodeficiency syndrome", keywords="data mining", keywords="history", keywords="HIV infections", keywords="terminology", abstract="Background: The language encompassing health conditions can also influence behaviors that affect health outcomes. Few published quantitative studies have been conducted that evaluate HIV-related terminology changes over time. To expand this research, this study included an analysis of a dataset of abstracts presented at the International AIDS Conference (IAC) from 1989 to 2014. These abstracts reflect the global response to HIV over 25 years. Two powerful methodologies were used to evaluate the dataset: text mining to convert the unstructured information into structured data for analysis and data visualization to represent the data visually to assess trends. Objective: The purpose of this project was to evaluate the evolving use of HIV-related language in abstracts presented at the IAC from 1989 to 2014. Methods: Over 80,000 abstracts were obtained from the International AIDS Society and imported into a Microsoft SQL Server database for data processing and text mining analyses. A text mining module within the KNIME Analytics Platform, an open source software, was then used to mine the partially processed data to create a terminology corpus of key HIV terms. Subject matter experts grouped the terms into categories. Tableau, a data visualization software, was used to visualize the frequency metrics associated with the terms as line graphs and word clouds. The visualized dashboards were reviewed to discern changes in terminology use across IAC years. Results: The major findings identify trends in HIV-related terminology over 25 years. The term ``AIDS epidemic'' was dominantly used from 1989 to 1991 and then declined in use. In contrast, use of the term ``HIV epidemic'' increased through 2014. Beginning in the mid-1990s, the term ``treatment experienced'' appeared with increasing frequency in the abstracts. Use of terms identifying individuals as ``carriers or victims'' of HIV rarely appeared after 2008. Use of the terms ``HIV positive'' and ``HIV infected'' peaked in the early-1990s and then declined in use. The terms ``men who have sex with men'' and ``MSM'' were rarely used until 1994; subsequently, use of these terms increased through 2014. The term ``sex worker'' steadily increased in frequency throughout conference years, whereas the term ``prostitute'' decreased over time. Conclusions: The results of this study highlight changes in HIV terminology use over 25 years, including the addition, disappearance, and changing use of terms that reflect advances in HIV research and medical practice and destigmatization of the disease. Coupled with findings from related quantitative research, HIV-related terminology recommendations based on results of this study are included. Adoption of these recommendations will further efforts to use less stigmatizing language and facilitate effective communication between health professionals and people affected by HIV. ", doi="10.2196/publichealth.8552", url="http://publichealth.jmir.org/2018/2/e50/", url="http://www.ncbi.nlm.nih.gov/pubmed/29728344" } @Article{info:doi/10.2196/publichealth.9361, author="Munkhdalai, Tsendsuren and Liu, Feifan and Yu, Hong", title="Clinical Relation Extraction Toward Drug Safety Surveillance Using Electronic Health Record Narratives: Classical Learning Versus Deep Learning", journal="JMIR Public Health Surveill", year="2018", month="Apr", day="25", volume="4", number="2", pages="e29", keywords="medical informatics applications", keywords="drug-related side effects and adverse reactions", keywords="neural networks", keywords="natural language processing", keywords="electronic health records", abstract="Background: Medication and adverse drug event (ADE) information extracted from electronic health record (EHR) notes can be a rich resource for drug safety surveillance. Existing observational studies have mainly relied on structured EHR data to obtain ADE information; however, ADEs are often buried in the EHR narratives and not recorded in structured data. Objective: To unlock ADE-related information from EHR narratives, there is a need to extract relevant entities and identify relations among them. In this study, we focus on relation identification. This study aimed to evaluate natural language processing and machine learning approaches using the expert-annotated medical entities and relations in the context of drug safety surveillance, and investigate how different learning approaches perform under different configurations. Methods: We have manually annotated 791 EHR notes with 9 named entities (eg, medication, indication, severity, and ADEs) and 7 different types of relations (eg, medication-dosage, medication-ADE, and severity-ADE). Then, we explored 3 supervised machine learning systems for relation identification: (1) a support vector machines (SVM) system, (2) an end-to-end deep neural network system, and (3) a supervised descriptive rule induction baseline system. For the neural network system, we exploited the state-of-the-art recurrent neural network (RNN) and attention models. We report the performance by macro-averaged precision, recall, and F1-score across the relation types. Results: Our results show that the SVM model achieved the best average F1-score of 89.1\% on test data, outperforming the long short-term memory (LSTM) model with attention (F1-score of 65.72\%) as well as the rule induction baseline system (F1-score of 7.47\%) by a large margin. The bidirectional LSTM model with attention achieved the best performance among different RNN models. With the inclusion of additional features in the LSTM model, its performance can be boosted to an average F1-score of 77.35\%. Conclusions: It shows that classical learning models (SVM) remains advantageous over deep learning models (RNN variants) for clinical relation identification, especially for long-distance intersentential relations. However, RNNs demonstrate a great potential of significant improvement if more training data become available. Our work is an important step toward mining EHRs to improve the efficacy of drug safety surveillance. Most importantly, the annotated data used in this study will be made publicly available, which will further promote drug safety research in the community. ", doi="10.2196/publichealth.9361", url="http://publichealth.jmir.org/2018/2/e29/", url="http://www.ncbi.nlm.nih.gov/pubmed/29695376" } @Article{info:doi/10.2196/medinform.8662, author="Zheng, Shuai and Jabbour, K. Salma and O'Reilly, E. Shannon and Lu, J. James and Dong, Lihua and Ding, Lijuan and Xiao, Ying and Yue, Ning and Wang, Fusheng and Zou, Wei", title="Automated Information Extraction on Treatment and Prognosis for Non--Small Cell Lung Cancer Radiotherapy Patients: Clinical Study", journal="JMIR Med Inform", year="2018", month="Feb", day="01", volume="6", number="1", pages="e8", keywords="information extraction", keywords="oncology", keywords="chemoradiation treatment", keywords="prognosis", keywords="non--small cell lung", keywords="information storage and retrieval", keywords="natural language processing", abstract="Background: In outcome studies of oncology patients undergoing radiation, researchers extract valuable information from medical records generated before, during, and after radiotherapy visits, such as survival data, toxicities, and complications. Clinical studies rely heavily on these data to correlate the treatment regimen with the prognosis to develop evidence-based radiation therapy paradigms. These data are available mainly in forms of narrative texts or table formats with heterogeneous vocabularies. Manual extraction of the related information from these data can be time consuming and labor intensive, which is not ideal for large studies. Objective: The objective of this study was to adapt the interactive information extraction platform Information and Data Extraction using Adaptive Learning (IDEAL-X) to extract treatment and prognosis data for patients with locally advanced or inoperable non--small cell lung cancer (NSCLC). Methods: We transformed patient treatment and prognosis documents into normalized structured forms using the IDEAL-X system for easy data navigation. The adaptive learning and user-customized controlled toxicity vocabularies were applied to extract categorized treatment and prognosis data, so as to generate structured output. Results: In total, we extracted data from 261 treatment and prognosis documents relating to 50 patients, with overall precision and recall more than 93\% and 83\%, respectively. For toxicity information extractions, which are important to study patient posttreatment side effects and quality of life, the precision and recall achieved 95.7\% and 94.5\% respectively. Conclusions: The IDEAL-X system is capable of extracting study data regarding NSCLC chemoradiation patients with significant accuracy and effectiveness, and therefore can be used in large-scale radiotherapy clinical data studies. ", doi="10.2196/medinform.8662", url="http://medinform.jmir.org/2018/1/e8/", url="http://www.ncbi.nlm.nih.gov/pubmed/29391345" } @Article{info:doi/10.2196/jmir.8669, author="Chen, Jinying and Druhl, Emily and Polepalli Ramesh, Balaji and Houston, K. Thomas and Brandt, A. Cynthia and Zulman, M. Donna and Vimalananda, G. Varsha and Malkani, Samir and Yu, Hong", title="A Natural Language Processing System That Links Medical Terms in Electronic Health Record Notes to Lay Definitions: System Development Using Physician Reviews", journal="J Med Internet Res", year="2018", month="Jan", day="22", volume="20", number="1", pages="e26", keywords="electronic health records", keywords="natural language processing", keywords="consumer health informatics", keywords="usability testing", keywords="computer software", abstract="Background: Many health care systems now allow patients to access their electronic health record (EHR) notes online through patient portals. Medical jargon in EHR notes can confuse patients, which may interfere with potential benefits of patient access to EHR notes. Objective: The aim of this study was to develop and evaluate the usability and content quality of NoteAid, a Web-based natural language processing system that links medical terms in EHR notes to lay definitions, that is, definitions easily understood by lay people. Methods: NoteAid incorporates two core components: CoDeMed, a lexical resource of lay definitions for medical terms, and MedLink, a computational unit that links medical terms to lay definitions. We developed innovative computational methods, including an adapted distant supervision algorithm to prioritize medical terms important for EHR comprehension to facilitate the effort of building CoDeMed. Ten physician domain experts evaluated the user interface and content quality of NoteAid. The evaluation protocol included a cognitive walkthrough session and a postsession questionnaire. Physician feedback sessions were audio-recorded. We used standard content analysis methods to analyze qualitative data from these sessions. Results: Physician feedback was mixed. Positive feedback on NoteAid included (1) Easy to use, (2) Good visual display, (3) Satisfactory system speed, and (4) Adequate lay definitions. Opportunities for improvement arising from evaluation sessions and feedback included (1) improving the display of definitions for partially matched terms, (2) including more medical terms in CoDeMed, (3) improving the handling of terms whose definitions vary depending on different contexts, and (4) standardizing the scope of definitions for medicines. On the basis of these results, we have improved NoteAid's user interface and a number of definitions, and added 4502 more definitions in CoDeMed. Conclusions: Physician evaluation yielded useful feedback for content validation and refinement of this innovative tool that has the potential to improve patient EHR comprehension and experience using patient portals. Future ongoing work will develop algorithms to handle ambiguous medical terms and test and evaluate NoteAid with patients. ", doi="10.2196/jmir.8669", url="http://www.jmir.org/2018/1/e26/", url="http://www.ncbi.nlm.nih.gov/pubmed/29358159" } @Article{info:doi/10.2196/medinform.9150, author="Garvin, Hornung Jennifer and Kim, Youngjun and Gobbel, Temple Glenn and Matheny, E. Michael and Redd, Andrew and Bray, E. Bruce and Heidenreich, Paul and Bolton, Dan and Heavirland, Julia and Kelly, Natalie and Reeves, Ruth and Kalsy, Megha and Goldstein, Kane Mary and Meystre, M. Stephane", title="Automating Quality Measures for Heart Failure Using Natural Language Processing: A Descriptive Study in the Department of Veterans Affairs", journal="JMIR Med Inform", year="2018", month="Jan", day="15", volume="6", number="1", pages="e5", keywords="natural language processing (NLP)", keywords="heart failure", keywords="left ventricular ejection fraction (EF)", keywords="informatics", keywords="quality measures", abstract="Background: We developed an accurate, stakeholder-informed, automated, natural language processing (NLP) system to measure the quality of heart failure (HF) inpatient care, and explored the potential for adoption of this system within an integrated health care system. Objective: To accurately automate a United States Department of Veterans Affairs (VA) quality measure for inpatients with HF. Methods: We automated the HF quality measure Congestive Heart Failure Inpatient Measure 19 (CHI19) that identifies whether a given patient has left ventricular ejection fraction (LVEF) <40\%, and if so, whether an angiotensin-converting enzyme inhibitor or angiotensin-receptor blocker was prescribed at discharge if there were no contraindications. We used documents from 1083 unique inpatients from eight VA medical centers to develop a reference standard (RS) to train (n=314) and test (n=769) the Congestive Heart Failure Information Extraction Framework (CHIEF). We also conducted semi-structured interviews (n=15) for stakeholder feedback on implementation of the CHIEF. Results: The CHIEF classified each hospitalization in the test set with a sensitivity (SN) of 98.9\% and positive predictive value of 98.7\%, compared with an RS and SN of 98.5\% for available External Peer Review Program assessments. Of the 1083 patients available for the NLP system, the CHIEF evaluated and classified 100\% of cases. Stakeholders identified potential implementation facilitators and clinical uses of the CHIEF. Conclusions: The CHIEF provided complete data for all patients in the cohort and could potentially improve the efficiency, timeliness, and utility of HF quality measurements. ", doi="10.2196/medinform.9150", url="http://medinform.jmir.org/2018/1/e5/", url="http://www.ncbi.nlm.nih.gov/pubmed/29335238" } @Article{info:doi/10.2196/publichealth.7726, author="Simpson, S. Sean and Adams, Nikki and Brugman, M. Claudia and Conners, J. Thomas", title="Detecting Novel and Emerging Drug Terms Using Natural Language Processing: A Social Media Corpus Study", journal="JMIR Public Health Surveill", year="2018", month="Jan", day="08", volume="4", number="1", pages="e2", keywords="natural language processing", keywords="street drugs", keywords="social media", keywords="vocabulary", abstract="Background: With the rapid development of new psychoactive substances (NPS) and changes in the use of more traditional drugs, it is increasingly difficult for researchers and public health practitioners to keep up with emerging drugs and drug terms. Substance use surveys and diagnostic tools need to be able to ask about substances using the terms that drug users themselves are likely to be using. Analyses of social media may offer new ways for researchers to uncover and track changes in drug terms in near real time. This study describes the initial results from an innovative collaboration between substance use epidemiologists and linguistic scientists employing techniques from the field of natural language processing to examine drug-related terms in a sample of tweets from the United States. Objective: The objective of this study was to assess the feasibility of using distributed word-vector embeddings trained on social media data to uncover previously unknown (to researchers) drug terms. Methods: In this pilot study, we trained a continuous bag of words (CBOW) model of distributed word-vector embeddings on a Twitter dataset collected during July 2016 (roughly 884.2 million tokens). We queried the trained word embeddings for terms with high cosine similarity (a proxy for semantic relatedness) to well-known slang terms for marijuana to produce a list of candidate terms likely to function as slang terms for this substance. This candidate list was then compared with an expert-generated list of marijuana terms to assess the accuracy and efficacy of using word-vector embeddings to search for novel drug terminology. Results: The method described here produced a list of 200 candidate terms for the target substance (marijuana). Of these 200 candidates, 115 were determined to in fact relate to marijuana (65 terms for the substance itself, 50 terms related to paraphernalia). This included 30 terms which were used to refer to the target substance in the corpus yet did not appear on the expert-generated list and were therefore considered to be successful cases of uncovering novel drug terminology. Several of these novel terms appear to have been introduced as recently as 1 or 2 months before the corpus time slice used to train the word embeddings. Conclusions: Though the precision of the method described here is low enough as to still necessitate human review of any candidate term lists generated in such a manner, the fact that this process was able to detect 30 novel terms for the target substance based only on one month's worth of Twitter data is highly promising. We see this pilot study as an important proof of concept and a first step toward producing a fully automated drug term discovery system capable of tracking emerging NPS terms in real time. ", doi="10.2196/publichealth.7726", url="http://publichealth.jmir.org/2018/1/e2/", url="http://www.ncbi.nlm.nih.gov/pubmed/29311050" } @Article{info:doi/10.2196/medinform.8751, author="Kim, Seongsoon and Park, Donghyeon and Choi, Yonghwa and Lee, Kyubum and Kim, Byounggun and Jeon, Minji and Kim, Jihye and Tan, Choon Aik and Kang, Jaewoo", title="A Pilot Study of Biomedical Text Comprehension using an Attention-Based Deep Neural Reader: Design and Experimental Analysis", journal="JMIR Med Inform", year="2018", month="Jan", day="05", volume="6", number="1", pages="e2", keywords="machine comprehension", keywords="biomedical text comprehension", keywords="deep learning", keywords="machine comprehension dataset", abstract="Background: With the development of artificial intelligence (AI) technology centered on deep-learning, the computer has evolved to a point where it can read a given text and answer a question based on the context of the text. Such a specific task is known as the task of machine comprehension. Existing machine comprehension tasks mostly use datasets of general texts, such as news articles or elementary school-level storybooks. However, no attempt has been made to determine whether an up-to-date deep learning-based machine comprehension model can also process scientific literature containing expert-level knowledge, especially in the biomedical domain. Objective: This study aims to investigate whether a machine comprehension model can process biomedical articles as well as general texts. Since there is no dataset for the biomedical literature comprehension task, our work includes generating a large-scale question answering dataset using PubMed and manually evaluating the generated dataset. Methods: We present an attention-based deep neural model tailored to the biomedical domain. To further enhance the performance of our model, we used a pretrained word vector and biomedical entity type embedding. We also developed an ensemble method of combining the results of several independent models to reduce the variance of the answers from the models. Results: The experimental results showed that our proposed deep neural network model outperformed the baseline model by more than 7\% on the new dataset. We also evaluated human performance on the new dataset. The human evaluation result showed that our deep neural model outperformed humans in comprehension by 22\% on average. Conclusions: In this work, we introduced a new task of machine comprehension in the biomedical domain using a deep neural model. Since there was no large-scale dataset for training deep neural models in the biomedical domain, we created the new cloze-style datasets Biomedical Knowledge Comprehension Title (BMKC\_T) and Biomedical Knowledge Comprehension Last Sentence (BMKC\_LS) (together referred to as BioMedical Knowledge Comprehension) using the PubMed corpus. The experimental results showed that the performance of our model is much higher than that of humans. We observed that our model performed consistently better regardless of the degree of difficulty of a text, whereas humans have difficulty when performing biomedical literature comprehension tasks that require expert level knowledge. ", doi="10.2196/medinform.8751", url="http://medinform.jmir.org/2018/1/e2/", url="http://www.ncbi.nlm.nih.gov/pubmed/29305341" } @Article{info:doi/10.2196/jmir.8344, author="Lin, Chin and Hsu, Chia-Jung and Lou, Yu-Sheng and Yeh, Shih-Jen and Lee, Chia-Cheng and Su, Sui-Lung and Chen, Hsiang-Cheng", title="Artificial Intelligence Learning Semantics via External Resources for Classifying Diagnosis Codes in Discharge Notes", journal="J Med Internet Res", year="2017", month="Nov", day="06", volume="19", number="11", pages="e380", keywords="word embedding", keywords="convolutional neural network", keywords="neural networks (computer)", keywords="natural language processing", keywords="text mining", keywords="data mining", keywords="machine learning", keywords="electronic medical records", keywords="electronic health records", abstract="Background: Automated disease code classification using free-text medical information is important for public health surveillance. However, traditional natural language processing (NLP) pipelines are limited, so we propose a method combining word embedding with a convolutional neural network (CNN). Objective: Our objective was to compare the performance of traditional pipelines (NLP plus supervised machine learning models) with that of word embedding combined with a CNN in conducting a classification task identifying International Classification of Diseases, Tenth Revision, Clinical Modification (ICD-10-CM) diagnosis codes in discharge notes. Methods: We used 2 classification methods: (1) extracting from discharge notes some features (terms, n-gram phrases, and SNOMED CT categories) that we used to train a set of supervised machine learning models (support vector machine, random forests, and gradient boosting machine), and (2) building a feature matrix, by a pretrained word embedding model, that we used to train a CNN. We used these methods to identify the chapter-level ICD-10-CM diagnosis codes in a set of discharge notes. We conducted the evaluation using 103,390 discharge notes covering patients hospitalized from June 1, 2015 to January 31, 2017 in the Tri-Service General Hospital in Taipei, Taiwan. We used the receiver operating characteristic curve as an evaluation measure, and calculated the area under the curve (AUC) and F-measure as the global measure of effectiveness. Results: In 5-fold cross-validation tests, our method had a higher testing accuracy (mean AUC 0.9696; mean F-measure 0.9086) than traditional NLP-based approaches (mean AUC range 0.8183-0.9571; mean F-measure range 0.5050-0.8739). A real-world simulation that split the training sample and the testing sample by date verified this result (mean AUC 0.9645; mean F-measure 0.9003 using the proposed method). Further analysis showed that the convolutional layers of the CNN effectively identified a large number of keywords and automatically extracted enough concepts to predict the diagnosis codes. Conclusions: Word embedding combined with a CNN showed outstanding performance compared with traditional methods, needing very little data preprocessing. This shows that future studies will not be limited by incomplete dictionaries. A large amount of unstructured information from free-text medical writing will be extracted by automated approaches in the future, and we believe that the health care field is about to enter the age of big data. ", doi="10.2196/jmir.8344", url="http://www.jmir.org/2017/11/e380/", url="http://www.ncbi.nlm.nih.gov/pubmed/29109070" } @Article{info:doi/10.2196/medinform.8531, author="Chen, Jinying and Jagannatha, N. Abhyuday and Fodeh, J. Samah and Yu, Hong", title="Ranking Medical Terms to Support Expansion of Lay Language Resources for Patient Comprehension of Electronic Health Record Notes: Adapted Distant Supervision Approach", journal="JMIR Med Inform", year="2017", month="Oct", day="31", volume="5", number="4", pages="e42", keywords="electronic health records", keywords="natural language processing", keywords="lexical entry selection", keywords="transfer learning", keywords="information extraction", abstract="Background: Medical terms are a major obstacle for patients to comprehend their electronic health record (EHR) notes. Clinical natural language processing (NLP) systems that link EHR terms to lay terms or definitions allow patients to easily access helpful information when reading through their EHR notes, and have shown to improve patient EHR comprehension. However, high-quality lay language resources for EHR terms are very limited in the public domain. Because expanding and curating such a resource is a costly process, it is beneficial and even necessary to identify terms important for patient EHR comprehension first. Objective: We aimed to develop an NLP system, called adapted distant supervision (ADS), to rank candidate terms mined from EHR corpora. We will give EHR terms ranked as high by ADS a higher priority for lay language annotation---that is, creating lay definitions for these terms. Methods: Adapted distant supervision uses distant supervision from consumer health vocabulary and transfer learning to adapt itself to solve the problem of ranking EHR terms in the target domain. We investigated 2 state-of-the-art transfer learning algorithms (ie, feature space augmentation and supervised distant supervision) and designed 5 types of learning features, including distributed word representations learned from large EHR data for ADS. For evaluating ADS, we asked domain experts to annotate 6038 candidate terms as important or nonimportant for EHR comprehension. We then randomly divided these data into the target-domain training data (1000 examples) and the evaluation data (5038 examples). We compared ADS with 2 strong baselines, including standard supervised learning, on the evaluation data. Results: The ADS system using feature space augmentation achieved the best average precision, 0.850, on the evaluation set when using 1000 target-domain training examples. The ADS system using supervised distant supervision achieved the best average precision, 0.819, on the evaluation set when using only 100 target-domain training examples. The 2 ADS systems both performed significantly better than the baseline systems (P<.001 for all measures and all conditions). Using a rich set of learning features contributed to ADS's performance substantially. Conclusions: ADS can effectively rank terms mined from EHRs. Transfer learning improved ADS's performance even with a small number of target-domain training examples. EHR terms prioritized by ADS were used to expand a lay language resource that supports patient EHR comprehension. The top 10,000 EHR terms ranked by ADS are available upon request. ", doi="10.2196/medinform.8531", url="http://medinform.jmir.org/2017/4/e42/", url="http://www.ncbi.nlm.nih.gov/pubmed/29089288" } @Article{info:doi/10.2196/jmir.7956, author="Birnbaum, L. Michael and Ernala, Kiranmai Sindhu and Rizvi, F. Asra and De Choudhury, Munmun and Kane, M. John", title="A Collaborative Approach to Identifying Social Media Markers of Schizophrenia by Employing Machine Learning and Clinical Appraisals", journal="J Med Internet Res", year="2017", month="Aug", day="14", volume="19", number="8", pages="e289", keywords="schizophrenia", keywords="psychotic disorders", keywords="online social networks", keywords="machine learning", keywords="linguistic analysis", keywords="Twitter", abstract="Background: Linguistic analysis of publicly available Twitter feeds have achieved success in differentiating individuals who self-disclose online as having schizophrenia from healthy controls. To date, limited efforts have included expert input to evaluate the authenticity of diagnostic self-disclosures. Objective: This study aims to move from noisy self-reports of schizophrenia on social media to more accurate identification of diagnoses by exploring a human-machine partnered approach, wherein computational linguistic analysis of shared content is combined with clinical appraisals. Methods: Twitter timeline data, extracted from 671 users with self-disclosed diagnoses of schizophrenia, was appraised for authenticity by expert clinicians. Data from disclosures deemed true were used to build a classifier aiming to distinguish users with schizophrenia from healthy controls. Results from the classifier were compared to expert appraisals on new, unseen Twitter users. Results: Significant linguistic differences were identified in the schizophrenia group including greater use of interpersonal pronouns (P<.001), decreased emphasis on friendship (P<.001), and greater emphasis on biological processes (P<.001). The resulting classifier distinguished users with disclosures of schizophrenia deemed genuine from control users with a mean accuracy of 88\% using linguistic data alone. Compared to clinicians on new, unseen users, the classifier's precision, recall, and accuracy measures were 0.27, 0.77, and 0.59, respectively. Conclusions: These data reinforce the need for ongoing collaborations integrating expertise from multiple fields to strengthen our ability to accurately identify and effectively engage individuals with mental illness online. These collaborations are crucial to overcome some of mental illnesses' biggest challenges by using digital technology. ", doi="10.2196/jmir.7956", url="http://www.jmir.org/2017/8/e289/", url="http://www.ncbi.nlm.nih.gov/pubmed/28807891" } @Article{info:doi/10.2196/medinform.8240, author="Oreskovic, Michel Nicolas and Maniates, Jennifer and Weilburg, Jeffrey and Choy, Garry", title="Optimizing the Use of Electronic Health Records to Identify High-Risk Psychosocial Determinants of Health", journal="JMIR Med Inform", year="2017", month="Aug", day="14", volume="5", number="3", pages="e25", keywords="word recognition", keywords="Medicaid", keywords="psychosocial determinants of health", keywords="social determinants of health", keywords="care coordination", abstract="Background: Care coordination programs have traditionally focused on medically complex patients, identifying patients that qualify by analyzing formatted clinical data and claims data. However, not all clinically relevant data reside in claims and formatted data. Recently, there has been increasing interest in including patients with complex psychosocial determinants of health in care coordination programs. Psychosocial risk factors, including social determinants of health, mental health disorders, and substance abuse disorders, are less amenable to rapid and systematic data analyses, as these data are often not collected or stored as formatted data, and due to US Health Insurance Portability and Accountability Act (HIPAA) regulations are often not available as claims data. Objective: The objective of our study was to develop a systematic approach using word recognition software to identifying psychosocial risk factors within any part of a patient's electronic health record (EHR). Methods: We used QPID (Queriable Patient Inference Dossier), an ontology-driven word recognition software, to scan adult patients' EHRs to identify terms predicting a high-risk patient suitable to be followed in a care coordination program in Massachusetts, USA. Search terms identified high-risk conditions in patients known to be enrolled in a care coordination program, and were then tested against control patients. We calculated precision, recall, and balanced F-measure for the search terms. Results: We identified 22 EHR-available search terms to define psychosocial high-risk status; the presence of 9 or more of these terms predicted that a patient would meet inclusion criteria for a care coordination program. Precision was .80, recall .98, and balanced F-measure .88 for the identified terms. For adult patients insured by Medicaid and enrolled in the program, a mean of 14 terms (interquartile range [IQR] 11-18) were present as identified by the search tool, ranging from 2 to 22 terms. For patients enrolled in the program but not insured by Medicaid, a mean of 6 terms (IQR 3-8) were present as identified by the search tool, ranging from 1 to 21. Conclusions: Selected informatics tools such as word recognition software can be leveraged to improve health care delivery, such as an EHR-based protocol that identifies psychosocially complex patients eligible for enrollment in a care coordination program. ", doi="10.2196/medinform.8240", url="http://medinform.jmir.org/2017/3/e25/", url="http://www.ncbi.nlm.nih.gov/pubmed/28807893" } @Article{info:doi/10.2196/medinform.7779, author="Tapi Nzali, Donald Mike and Bringay, Sandra and Lavergne, Christian and Mollevi, Caroline and Opitz, Thomas", title="What Patients Can Tell Us: Topic Analysis for Social Media on Breast Cancer", journal="JMIR Med Inform", year="2017", month="Jul", day="31", volume="5", number="3", pages="e23", keywords="breast cancer", keywords="text mining", keywords="social media", keywords="unsupervised learning", abstract="Background: Social media dedicated to health are increasingly used by patients and health professionals. They are rich textual resources with content generated through free exchange between patients. We are proposing a method to tackle the problem of retrieving clinically relevant information from such social media in order to analyze the quality of life of patients with breast cancer. Objective: Our aim was to detect the different topics discussed by patients on social media and to relate them to functional and symptomatic dimensions assessed in the internationally standardized self-administered questionnaires used in cancer clinical trials (European Organization for Research and Treatment of Cancer [EORTC] Quality of Life Questionnaire Core 30 [QLQ-C30] and breast cancer module [QLQ-BR23]). Methods: First, we applied a classic text mining technique, latent Dirichlet allocation (LDA), to detect the different topics discussed on social media dealing with breast cancer. We applied the LDA model to 2 datasets composed of messages extracted from public Facebook groups and from a public health forum (cancerdusein.org, a French breast cancer forum) with relevant preprocessing. Second, we applied a customized Jaccard coefficient to automatically compute similarity distance between the topics detected with LDA and the questions in the self-administered questionnaires used to study quality of life. Results: Among the 23 topics present in the self-administered questionnaires, 22 matched with the topics discussed by patients on social media. Interestingly, these topics corresponded to 95\% (22/23) of the forum and 86\% (20/23) of the Facebook group topics. These figures underline that topics related to quality of life are an important concern for patients. However, 5 social media topics had no corresponding topic in the questionnaires, which do not cover all of the patients' concerns. Of these 5 topics, 2 could potentially be used in the questionnaires, and these 2 topics corresponded to a total of 3.10\% (523/16,868) of topics in the cancerdusein.org corpus and 4.30\% (3014/70,092) of the Facebook corpus. Conclusions: We found a good correspondence between detected topics on social media and topics covered by the self-administered questionnaires, which substantiates the sound construction of such questionnaires. We detected new emerging topics from social media that can be used to complete current self-administered questionnaires. Moreover, we confirmed that social media mining is an important source of information for complementary analysis of quality of life. ", doi="10.2196/medinform.7779", url="http://medinform.jmir.org/2017/3/e23/", url="http://www.ncbi.nlm.nih.gov/pubmed/28760725" } @Article{info:doi/10.2196/medinform.7140, author="Elmessiry, Adel and Cooper, O. William and Catron, F. Thomas and Karrass, Jan and Zhang, Zhe and Singh, P. Munindar", title="Triaging Patient Complaints: Monte Carlo Cross-Validation of Six Machine Learning Classifiers", journal="JMIR Med Inform", year="2017", month="Jul", day="31", volume="5", number="3", pages="e19", keywords="natural language processing", keywords="NLP", keywords="machine learning", keywords="patient complaints", abstract="Background: Unsolicited patient complaints can be a useful service recovery tool for health care organizations. Some patient complaints contain information that may necessitate further action on the part of the health care organization and/or the health care professional. Current approaches depend on the manual processing of patient complaints, which can be costly, slow, and challenging in terms of scalability. Objective: The aim of this study was to evaluate automatic patient triage, which can potentially improve response time and provide much-needed scale, thereby enhancing opportunities to encourage physicians to self-regulate. Methods: We implemented a comparison of several well-known machine learning classifiers to detect whether a complaint was associated with a physician or his/her medical practice. We compared these classifiers using a real-life dataset containing 14,335 patient complaints associated with 768 physicians that was extracted from patient complaints collected by the Patient Advocacy Reporting System developed at Vanderbilt University and associated institutions. We conducted a 10-splits Monte Carlo cross-validation to validate our results. Results: We achieved an accuracy of 82\% and F-score of 81\% in correctly classifying patient complaints with sensitivity and specificity of 0.76 and 0.87, respectively. Conclusions: We demonstrate that natural language processing methods based on modeling patient complaint text can be effective in identifying those patient complaints requiring physician action. ", doi="10.2196/medinform.7140", url="http://medinform.jmir.org/2017/3/e19/", url="http://www.ncbi.nlm.nih.gov/pubmed/28760726" } @Article{info:doi/10.2196/jmir.7276, author="Cheng, Qijin and Li, MH Tim and Kwok, Chi-Leung and Zhu, Tingshao and Yip, SF Paul", title="Assessing Suicide Risk and Emotional Distress in Chinese Social Media: A Text Mining and Machine Learning Study", journal="J Med Internet Res", year="2017", month="Jul", day="10", volume="19", number="7", pages="e243", keywords="suicide", keywords="psychological stress", keywords="social media", keywords="Chinese", keywords="natural language", keywords="machine learning", abstract="Background: Early identification and intervention are imperative for suicide prevention. However, at-risk people often neither seek help nor take professional assessment. A tool to automatically assess their risk levels in natural settings can increase the opportunity for early intervention. Objective: The aim of this study was to explore whether computerized language analysis methods can be utilized to assess one's suicide risk and emotional distress in Chinese social media. Methods: A Web-based survey of Chinese social media (ie, Weibo) users was conducted to measure their suicide risk factors including suicide probability, Weibo suicide communication (WSC), depression, anxiety, and stress levels. Participants' Weibo posts published in the public domain were also downloaded with their consent. The Weibo posts were parsed and fitted into Simplified Chinese-Linguistic Inquiry and Word Count (SC-LIWC) categories. The associations between SC-LIWC features and the 5 suicide risk factors were examined by logistic regression. Furthermore, the support vector machine (SVM) model was applied based on the language features to automatically classify whether a Weibo user exhibited any of the 5 risk factors. Results: A total of 974 Weibo users participated in the survey. Those with high suicide probability were marked by a higher usage of pronoun (odds ratio, OR=1.18, P=.001), prepend words (OR=1.49, P=.02), multifunction words (OR=1.12, P=.04), a lower usage of verb (OR=0.78, P<.001), and a greater total word count (OR=1.007, P=.008). Second-person plural was positively associated with severe depression (OR=8.36, P=.01) and stress (OR=11, P=.005), whereas work-related words were negatively associated with WSC (OR=0.71, P=.008), severe depression (OR=0.56, P=.005), and anxiety (OR=0.77, P=.02). Inconsistently, third-person plural was found to be negatively associated with WSC (OR=0.02, P=.047) but positively with severe stress (OR=41.3, P=.04). Achievement-related words were positively associated with depression (OR=1.68, P=.003), whereas health- (OR=2.36, P=.004) and death-related (OR=2.60, P=.01) words positively associated with stress. The machine classifiers did not achieve satisfying performance in the full sample set but could classify high suicide probability (area under the curve, AUC=0.61, P=.04) and severe anxiety (AUC=0.75, P<.001) among those who have exhibited WSC. Conclusions: SC-LIWC is useful to examine language markers of suicide risk and emotional distress in Chinese social media and can identify characteristics different from previous findings in the English literature. Some findings are leading to new hypotheses for future verification. Machine classifiers based on SC-LIWC features are promising but still require further optimization for application in real life. ", doi="10.2196/jmir.7276", url="http://www.jmir.org/2017/7/e243/", url="http://www.ncbi.nlm.nih.gov/pubmed/28694239" } @Article{info:doi/10.2196/medinform.7123, author="Duz, Marco and Marshall, F. John and Parkin, Tim", title="Validation of an Improved Computer-Assisted Technique for Mining Free-Text Electronic Medical Records", journal="JMIR Med Inform", year="2017", month="Jun", day="29", volume="5", number="2", pages="e17", keywords="text mining", keywords="data mining", keywords="electronic medical record", keywords="validation studies", abstract="Background: The use of electronic medical records (EMRs) offers opportunity for clinical epidemiological research. With large EMR databases, automated analysis processes are necessary but require thorough validation before they can be routinely used. Objective: The aim of this study was to validate a computer-assisted technique using commercially available content analysis software (SimStat-WordStat v.6 (SS/WS), Provalis Research) for mining free-text EMRs. Methods: The dataset used for the validation process included life-long EMRs from 335 patients (17,563 rows of data), selected at random from a larger dataset (141,543 patients, {\textasciitilde}2.6 million rows of data) and obtained from 10 equine veterinary practices in the United Kingdom. The ability of the computer-assisted technique to detect rows of data (cases) of colic, renal failure, right dorsal colitis, and non-steroidal anti-inflammatory drug (NSAID) use in the population was compared with manual classification. The first step of the computer-assisted analysis process was the definition of inclusion dictionaries to identify cases, including terms identifying a condition of interest. Words in inclusion dictionaries were selected from the list of all words in the dataset obtained in SS/WS. The second step consisted of defining an exclusion dictionary, including combinations of words to remove cases erroneously classified by the inclusion dictionary alone. The third step was the definition of a reinclusion dictionary to reinclude cases that had been erroneously classified by the exclusion dictionary. Finally, cases obtained by the exclusion dictionary were removed from cases obtained by the inclusion dictionary, and cases from the reinclusion dictionary were subsequently reincluded using Rv3.0.2 (R Foundation for Statistical Computing, Vienna, Austria). Manual analysis was performed as a separate process by a single experienced clinician reading through the dataset once and classifying each row of data based on the interpretation of the free-text notes. Validation was performed by comparison of the computer-assisted method with manual analysis, which was used as the gold standard. Sensitivity, specificity, negative predictive values (NPVs), positive predictive values (PPVs), and F values of the computer-assisted process were calculated by comparing them with the manual classification. Results: Lowest sensitivity, specificity, PPVs, NPVs, and F values were 99.82\% (1128/1130), 99.88\% (16410/16429), 94.6\% (223/239), 100.00\% (16410/16412), and 99.0\% (100{\texttimes}2{\texttimes}0.983{\texttimes}0.998/[0.983+0.998]), respectively. The computer-assisted process required few seconds to run, although an estimated 30 h were required for dictionary creation. Manual classification required approximately 80 man-hours. Conclusions: The critical step in this work is the creation of accurate and inclusive dictionaries to ensure that no potential cases are missed. It is significantly easier to remove false positive terms from a SS/WS selected subset of a large database than search that original database for potential false negatives. The benefits of using this method are proportional to the size of the dataset to be analyzed. ", doi="10.2196/medinform.7123", url="http://medinform.jmir.org/2017/2/e17/", url="http://www.ncbi.nlm.nih.gov/pubmed/28663163" } @Article{info:doi/10.2196/publichealth.6577, author="Abdellaoui, Redhouane and Sch{\"u}ck, St{\'e}phane and Texier, Nathalie and Burgun, Anita", title="Filtering Entities to Optimize Identification of Adverse Drug Reaction From Social Media: How Can the Number of Words Between Entities in the Messages Help?", journal="JMIR Public Health Surveill", year="2017", month="Jun", day="22", volume="3", number="2", pages="e36", keywords="pharmacovigilance", keywords="social media", keywords="text mining", keywords="Gaussian mixture model", keywords="EM algorithm", keywords="clustering", keywords="density estimation", abstract="Background: With the increasing popularity of Web 2.0 applications, social media has made it possible for individuals to post messages on adverse drug reactions. In such online conversations, patients discuss their symptoms, medical history, and diseases. These disorders may correspond to adverse drug reactions (ADRs) or any other medical condition. Therefore, methods must be developed to distinguish between false positives and true ADR declarations. Objective: The aim of this study was to investigate a method for filtering out disorder terms that did not correspond to adverse events by using the distance (as number of words) between the drug term and the disorder or symptom term in the post. We hypothesized that the shorter the distance between the disorder name and the drug, the higher the probability to be an ADR. Methods: We analyzed a corpus of 648 messages corresponding to a total of 1654 (drug and disorder) pairs from 5 French forums using Gaussian mixture models and an expectation-maximization (EM) algorithm . Results: The distribution of the distances between the drug term and the disorder term enabled the filtering of 50.03\% (733/1465) of the disorders that were not ADRs. Our filtering strategy achieved a precision of 95.8\% and a recall of 50.0\%. Conclusions: This study suggests that such distance between terms can be used for identifying false positives, thereby improving ADR detection in social media. ", doi="10.2196/publichealth.6577", url="http://publichealth.jmir.org/2017/2/e36/", url="http://www.ncbi.nlm.nih.gov/pubmed/28642212" } @Article{info:doi/10.2196/jmir.7156, author="Guo, Haihong and Na, Xu and Hou, Li and Li, Jiao", title="Classifying Chinese Questions Related to Health Care Posted by Consumers Via the Internet", journal="J Med Internet Res", year="2017", month="Jun", day="20", volume="19", number="6", pages="e220", keywords="classification", keywords="natural language processing", keywords="hypertension", keywords="consumer health information", abstract="Background: In question answering (QA) system development, question classification is crucial for identifying information needs and improving the accuracy of returned answers. Although the questions are domain-specific, they are asked by non-professionals, making the question classification task more challenging. Objective: This study aimed to classify health care--related questions posted by the general public (Chinese speakers) on the Internet. Methods: A topic-based classification schema for health-related questions was built by manually annotating randomly selected questions. The Kappa statistic was used to measure the interrater reliability of multiple annotation results. Using the above corpus, we developed a machine-learning method to automatically classify these questions into one of the following six classes: Condition Management, Healthy Lifestyle, Diagnosis, Health Provider Choice, Treatment, and Epidemiology. Results: The consumer health question schema was developed with a four-hierarchical-level of specificity, comprising 48 quaternary categories and 35 annotation rules. The 2000 sample questions were coded with 2000 major codes and 607 minor codes. Using natural language processing techniques, we expressed the Chinese questions as a set of lexical, grammatical, and semantic features. Furthermore, the effective features were selected to improve the question classification performance. From the 6-category classification results, we achieved an average precision of 91.41\%, recall of 89.62\%, and F1 score of 90.24\%. Conclusions: In this study, we developed an automatic method to classify questions related to Chinese health care posted by the general public. It enables Artificial Intelligence (AI) agents to understand Internet users' information needs on health care. ", doi="10.2196/jmir.7156", url="http://www.jmir.org/2017/6/e220/", url="http://www.ncbi.nlm.nih.gov/pubmed/28634156" } @Article{info:doi/10.2196/jmir.6887, author="Meystre, Stephane and Gouripeddi, Ramkiran and Tieder, Joel and Simmons, Jeffrey and Srivastava, Rajendu and Shah, Samir", title="Enhancing Comparative Effectiveness Research With Automated Pediatric Pneumonia Detection in a Multi-Institutional Clinical Repository: A PHIS+ Pilot Study", journal="J Med Internet Res", year="2017", month="May", day="15", volume="19", number="5", pages="e162", keywords="natural language processing", keywords="pneumonia, bacterial", keywords="medical informatics", keywords="comparative effectiveness research", abstract="Background: Community-acquired pneumonia is a leading cause of pediatric morbidity. Administrative data are often used to conduct comparative effectiveness research (CER) with sufficient sample sizes to enhance detection of important outcomes. However, such studies are prone to misclassification errors because of the variable accuracy of discharge diagnosis codes. Objective: The aim of this study was to develop an automated, scalable, and accurate method to determine the presence or absence of pneumonia in children using chest imaging reports. Methods: The multi-institutional PHIS+ clinical repository was developed to support pediatric CER by expanding an administrative database of children's hospitals with detailed clinical data. To develop a scalable approach to find patients with bacterial pneumonia more accurately, we developed a Natural Language Processing (NLP) application to extract relevant information from chest diagnostic imaging reports. Domain experts established a reference standard by manually annotating 282 reports to train and then test the NLP application. Findings of pleural effusion, pulmonary infiltrate, and pneumonia were automatically extracted from the reports and then used to automatically classify whether a report was consistent with bacterial pneumonia. Results: Compared with the annotated diagnostic imaging reports reference standard, the most accurate implementation of machine learning algorithms in our NLP application allowed extracting relevant findings with a sensitivity of .939 and a positive predictive value of .925. It allowed classifying reports with a sensitivity of .71, a positive predictive value of .86, and a specificity of .962. When compared with each of the domain experts manually annotating these reports, the NLP application allowed for significantly higher sensitivity (.71 vs .527) and similar positive predictive value and specificity . Conclusions: NLP-based pneumonia information extraction of pediatric diagnostic imaging reports performed better than domain experts in this pilot study. NLP is an efficient method to extract information from a large collection of imaging reports to facilitate CER. ", doi="10.2196/jmir.6887", url="http://www.jmir.org/2017/5/e162/", url="http://www.ncbi.nlm.nih.gov/pubmed/28506958" } @Article{info:doi/10.2196/jmir.6533, author="Gibbons, Chris and Richards, Suzanne and Valderas, Maria Jose and Campbell, John", title="Supervised Machine Learning Algorithms Can Classify Open-Text Feedback of Doctor Performance With Human-Level Accuracy", journal="J Med Internet Res", year="2017", month="Mar", day="15", volume="19", number="3", pages="e65", keywords="machine learning", keywords="surveys and questionnaires", keywords="feedback", keywords="data mining", keywords="work performance", abstract="Background: Machine learning techniques may be an effective and efficient way to classify open-text reports on doctor's activity for the purposes of quality assurance, safety, and continuing professional development. Objective: The objective of the study was to evaluate the accuracy of machine learning algorithms trained to classify open-text reports of doctor performance and to assess the potential for classifications to identify significant differences in doctors' professional performance in the United Kingdom. Methods: We used 1636 open-text comments (34,283 words) relating to the performance of 548 doctors collected from a survey of clinicians' colleagues using the General Medical Council Colleague Questionnaire (GMC-CQ). We coded 77.75\% (1272/1636) of the comments into 5 global themes (innovation, interpersonal skills, popularity, professionalism, and respect) using a qualitative framework. We trained 8 machine learning algorithms to classify comments and assessed their performance using several training samples. We evaluated doctor performance using the GMC-CQ and compared scores between doctors with different classifications using t tests. Results: Individual algorithm performance was high (range F score=.68 to .83). Interrater agreement between the algorithms and the human coder was highest for codes relating to ``popular'' (recall=.97), ``innovator'' (recall=.98), and ``respected'' (recall=.87) codes and was lower for the ``interpersonal'' (recall=.80) and ``professional'' (recall=.82) codes. A 10-fold cross-validation demonstrated similar performance in each analysis. When combined together into an ensemble of multiple algorithms, mean human-computer interrater agreement was .88. Comments that were classified as ``respected,'' ``professional,'' and ``interpersonal'' related to higher doctor scores on the GMC-CQ compared with comments that were not classified (P<.05). Scores did not vary between doctors who were rated as popular or innovative and those who were not rated at all (P>.05). Conclusions: Machine learning algorithms can classify open-text feedback of doctor performance into multiple themes derived by human raters with high performance. Colleague open-text comments that signal respect, professionalism, and being interpersonal may be key indicators of doctor's performance. ", doi="10.2196/jmir.6533", url="http://www.jmir.org/2017/3/e65/", url="http://www.ncbi.nlm.nih.gov/pubmed/28298265" } @Article{info:doi/10.2196/resprot.5948, author="Luther, L. Stephen and Thomason, S. Susan and Sabharwal, Sunil and Finch, K. Dezon and McCart, James and Toyinbo, Peter and Bouayad, Lina and Matheny, E. Michael and Gobbel, T. Glenn and Powell-Cope, Gail", title="Leveraging Electronic Health Care Record Information to Measure Pressure Ulcer Risk in Veterans With Spinal Cord Injury: A Longitudinal Study Protocol", journal="JMIR Res Protoc", year="2017", month="Jan", day="19", volume="6", number="1", pages="e3", keywords="natural language processing", keywords="pressure ulcer", keywords="risk assessment", keywords="spinal cord injury", keywords="text mining", abstract="Background: Pressure ulcers (PrUs) are a frequent, serious, and costly complication for veterans with spinal cord injury (SCI). The health care team should periodically identify PrU risk, although there is no tool in the literature that has been found to be reliable, valid, and sensitive enough to assess risk in this vulnerable population. Objective: The immediate goal is to develop a risk assessment model that validly estimates the probability of developing a PrU. The long-term goal is to assist veterans with SCI and their providers in preventing PrUs through an automated system of risk assessment integrated into the veteran's electronic health record (EHR). Methods: This 5-year longitudinal, retrospective, cohort study targets 12,344 veterans with SCI who were cared for in the Veterans Health Administration (VHA) in fiscal year (FY) 2009 and had no record of a PrU in the prior 12 months. Potential risk factors identified in the literature were reviewed by an expert panel that prioritized factors and determined if these were found in structured data or unstructured form in narrative clinical notes for FY 2009-2013. These data are from the VHA enterprise Corporate Data Warehouse that is derived from the EHR structured (ie, coded in database/table) or narrative (ie, text in clinical notes) data for FY 2009-2013. Results: This study is ongoing and final results are expected in 2017. Thus far, the expert panel reviewed the initial list of risk factors extracted from the literature; the panel recommended additions and omissions and provided insights about the format in which the documentation of the risk factors might exist in the EHR. This list was then iteratively refined through review and discussed with individual experts in the field. The cohort for the study was then identified, and all structured, unstructured, and semistructured data were extracted. Annotation schemas were developed, samples of documents were extracted, and annotations are ongoing. Operational definitions of structured data elements have been created and steps to create an analytic dataset are underway. Conclusions: To our knowledge, this is the largest cohort employed to identify PrU risk factors in the United States. It also represents the first time natural language processing and statistical text mining will be used to expand the number of variables available for analysis. A major strength of this quantitative study is that all VHA SCI centers were included in the analysis, reducing potential for selection bias and providing increased power for complex statistical analyses. This longitudinal study will eventually result in a risk prediction tool to assess PrU risk that is reliable and valid, and that is sensitive to this vulnerable population. ", doi="10.2196/resprot.5948", url="http://www.researchprotocols.org/2017/1/e3/", url="http://www.ncbi.nlm.nih.gov/pubmed/28104580" } @Article{info:doi/10.2196/medinform.6373, author="Chen, Jinying and Zheng, Jiaping and Yu, Hong", title="Finding Important Terms for Patients in Their Electronic Health Records: A Learning-to-Rank Approach Using Expert Annotations", journal="JMIR Med Inform", year="2016", month="Nov", day="30", volume="4", number="4", pages="e40", keywords="electronic health records", keywords="natural language processing", keywords="information extraction", keywords="supervised learning", keywords="learning to rank", abstract="Background: Many health organizations allow patients to access their own electronic health record (EHR) notes through online patient portals as a way to enhance patient-centered care. However, EHR notes are typically long and contain abundant medical jargon that can be difficult for patients to understand. In addition, many medical terms in patients' notes are not directly related to their health care needs. One way to help patients better comprehend their own notes is to reduce information overload and help them focus on medical terms that matter most to them. Interventions can then be developed by giving them targeted education to improve their EHR comprehension and the quality of care. Objective: We aimed to develop a supervised natural language processing (NLP) system called Finding impOrtant medical Concepts most Useful to patientS (FOCUS) that automatically identifies and ranks medical terms in EHR notes based on their importance to the patients. Methods: First, we built an expert-annotated corpus. For each EHR note, 2 physicians independently identified medical terms important to the patient. Using the physicians' agreement as the gold standard, we developed and evaluated FOCUS. FOCUS first identifies candidate terms from each EHR note using MetaMap and then ranks the terms using a support vector machine-based learn-to-rank algorithm. We explored rich learning features, including distributed word representation, Unified Medical Language System semantic type, topic features, and features derived from consumer health vocabulary. We compared FOCUS with 2 strong baseline NLP systems. Results: Physicians annotated 90 EHR notes and identified a mean of 9 (SD 5) important terms per note. The Cohen's kappa annotation agreement was .51. The 10-fold cross-validation results show that FOCUS achieved an area under the receiver operating characteristic curve (AUC-ROC) of 0.940 for ranking candidate terms from EHR notes to identify important terms. When including term identification, the performance of FOCUS for identifying important terms from EHR notes was 0.866 AUC-ROC. Both performance scores significantly exceeded the corresponding baseline system scores (P<.001). Rich learning features contributed to FOCUS's performance substantially. Conclusions: FOCUS can automatically rank terms from EHR notes based on their importance to patients. It may help develop future interventions that improve quality of care. ", doi="10.2196/medinform.6373", url="http://medinform.jmir.org/2016/4/e40/", url="http://www.ncbi.nlm.nih.gov/pubmed/27903489" } @Article{info:doi/10.2196/medinform.5748, author="Park, Sook Min and He, Zhe and Chen, Zhiwei and Oh, Sanghee and Bian, Jiang", title="Consumers' Use of UMLS Concepts on Social Media: Diabetes-Related Textual Data Analysis in Blog and Social Q\&A Sites", journal="JMIR Med Inform", year="2016", month="Nov", day="24", volume="4", number="4", pages="e41", keywords="controlled vocabulary", keywords="consumer health vocabulary", keywords="concept coverage", abstract="Background: The widely known terminology gap between health professionals and health consumers hinders effective information seeking for consumers. Objective: The aim of this study was to better understand consumers' usage of medical concepts by evaluating the coverage of concepts and semantic types of the Unified Medical Language System (UMLS) on diabetes-related postings in 2 types of social media: blogs and social question and answer (Q\&A). Methods: We collected 2 types of social media data: (1) a total of 3711 blogs tagged with ``diabetes'' on Tumblr posted between February and October 2015; and (2) a total of 58,422 questions and associated answers posted between 2009 and 2014 in the diabetes category of Yahoo! Answers. We analyzed the datasets using a widely adopted biomedical text processing framework Apache cTAKES and its extension YTEX. First, we applied the named entity recognition (NER) method implemented in YTEX to identify UMLS concepts in the datasets. We then analyzed the coverage and the popularity of concepts in the UMLS source vocabularies across the 2 datasets (ie, blogs and social Q\&A). Further, we conducted a concept-level comparative coverage analysis between SNOMED Clinical Terms (SNOMED CT) and Open-Access Collaborative Consumer Health Vocabulary (OAC CHV)---the top 2 UMLS source vocabularies that have the most coverage on our datasets. We also analyzed the UMLS semantic types that were frequently observed in our datasets. Results: We identified 2415 UMLS concepts from blog postings, 6452 UMLS concepts from social Q\&A questions, and 10,378 UMLS concepts from the answers. The medical concepts identified in the blogs can be covered by 56 source vocabularies in the UMLS, while those in questions and answers can be covered by 58 source vocabularies. SNOMED CT was the dominant vocabulary in terms of coverage across all the datasets, ranging from 84.9\% to 95.9\%. It was followed by OAC CHV (between 73.5\% and 80.0\%) and Metathesaurus Names (MTH) (between 55.7\% and 73.5\%). All of the social media datasets shared frequent semantic types such as ``Amino Acid, Peptide, or Protein,'' ``Body Part, Organ, or Organ Component,'' and ``Disease or Syndrome.'' Conclusions: Although the 3 social media datasets vary greatly in size, they exhibited similar conceptual coverage among UMLS source vocabularies and the identified concepts showed similar semantic type distributions. As such, concepts that are both frequently used by consumers and also found in professional vocabularies such as SNOMED CT can be suggested to OAC CHV to improve its coverage. ", doi="10.2196/medinform.5748", url="https://medinform.jmir.org/2016/4/e41/", url="http://www.ncbi.nlm.nih.gov/pubmed/27884812" } @Article{info:doi/10.2196/publichealth.6586, author="Tangherlini, R. Timothy and Roychowdhury, Vwani and Glenn, Beth and Crespi, M. Catherine and Bandari, Roja and Wadia, Akshay and Falahi, Misagh and Ebrahimzadeh, Ehsan and Bastani, Roshan", title="``Mommy Blogs'' and the Vaccination Exemption Narrative: Results From A Machine-Learning Approach for Story Aggregation on Parenting Social Media Sites", journal="JMIR Public Health Surveill", year="2016", month="Nov", day="22", volume="2", number="2", pages="e166", keywords="vaccination", keywords="social media", keywords="machine learning", keywords="personal narratives", keywords="Internet", keywords="health knowledge", keywords="attitudes", keywords="practice", abstract="Background: Social media offer an unprecedented opportunity to explore how people talk about health care at a very large scale. Numerous studies have shown the importance of websites with user forums for people seeking information related to health. Parents turn to some of these sites, colloquially referred to as ``mommy blogs,'' to share concerns about children's health care, including vaccination. Although substantial work has considered the role of social media, particularly Twitter, in discussions of vaccination and other health care--related issues, there has been little work on describing the underlying structure of these discussions and the role of persuasive storytelling, particularly on sites with no limits on post length. Understanding the role of persuasive storytelling at Internet scale provides useful insight into how people discuss vaccinations, including exemption-seeking behavior, which has been tied to a recent diminution of herd immunity in some communities. Objective: To develop an automated and scalable machine-learning method for story aggregation on social media sites dedicated to discussions of parenting. We wanted to discover the aggregate narrative frameworks to which individuals, through their exchange of experiences and commentary, contribute over time in a particular topic domain. We also wanted to characterize temporal trends in these narrative frameworks on the sites over the study period. Methods: To ensure that our data capture long-term discussions and not short-term reactions to recent events, we developed a dataset of 1.99 million posts contributed by 40,056 users and viewed 20.12 million times indexed from 2 parenting sites over a period of 105 months. Using probabilistic methods, we determined the topics of discussion on these parenting sites. We developed a generative statistical-mechanical narrative model to automatically extract the underlying stories and story fragments from millions of posts. We aggregated the stories into an overarching narrative framework graph. In our model, stories were represented as network graphs with actants as nodes and their various relationships as edges. We estimated the latent stories circulating on these sites by modeling the posts as a sampling of the hidden narrative framework graph. Temporal trends were examined based on monthly user-poststatistics. Results: We discovered that discussions of exemption from vaccination requirements are highly represented. We found a strong narrative framework related to exemption seeking and a culture of distrust of government and medical institutions. Various posts reinforced part of the narrative framework graph in which parents, medical professionals, and religious institutions emerged as key nodes, and exemption seeking emerged as an important edge. In the aggregate story, parents used religion or belief to acquire exemptions to protect their children from vaccines that are required by schools or government institutions, but (allegedly) cause adverse reactions such as autism, pain, compromised immunity, and even death. Although parents joined and left the discussion forums over time, discussions and stories about exemptions were persistent and robust to these membership changes. Conclusions: Analyzing parent forums about health care using an automated analytic approach, such as the one presented here, allows the detection of widespread narrative frameworks that structure and inform discussions. In most vaccination stories from the sites we analyzed, it is taken for granted that vaccines and not vaccine preventable diseases (VPDs) pose a threat to children. Because vaccines are seen as a threat, parents focus on sharing successful strategies for avoiding them, with exemption being the foremost among these strategies. When new parents join such sites, they may be exposed to this endemic narrative framework in the threads they read and to which they contribute, which may influence their health care decision making. ", doi="10.2196/publichealth.6586", url="http://publichealth.jmir.org/2016/2/e166/", url="http://www.ncbi.nlm.nih.gov/pubmed/27876690" } @Article{info:doi/10.2196/jmir.5439, author="Ben-Sasson, Ayelet and Yom-Tov, Elad", title="Online Concerns of Parents Suspecting Autism Spectrum Disorder in Their Child: Content Analysis of Signs and Automated Prediction of Risk", journal="J Med Internet Res", year="2016", month="Nov", day="22", volume="18", number="11", pages="e300", keywords="online queries", keywords="autistic disorders", keywords="parents", keywords="machine learning", keywords="early detection", abstract="Background: Online communities are used as platforms by parents to verify developmental and health concerns related to their child. The increasing public awareness of autism spectrum disorders (ASD) leads more parents to suspect ASD in their child. Early identification of ASD is important for early intervention. Objective: To characterize the symptoms mentioned in online queries posed by parents who suspect that their child might have ASD and determine whether they are age-specific. To test the efficacy of machine learning tools in classifying the child's risk of ASD based on the parent's narrative. Methods: To this end, we analyzed online queries posed by parents who were concerned that their child might have ASD and categorized the warning signs they mentioned according to ASD-specific and non-ASD--specific domains. We then used the data to test the efficacy with which a trained machine learning tool classified the degree of ASD risk. Yahoo Answers, a social site for posting queries and finding answers, was mined for queries of parents asking the community whether their child has ASD. A total of 195 queries were sampled for this study (mean child age=38.0 months; 84.7\% [160/189] boys). Content text analysis of the queries aimed to categorize the types of symptoms described and obtain clinical judgment of the child's ASD-risk level. Results: Concerns related to repetitive and restricted behaviors and interests (RRBI) were the most prevalent (75.4\%, 147/195), followed by concerns related to language (61.5\%, 120/195) and emotional markers (50.3\%, 98/195). Of the 195 queries, 18.5\% (36/195) were rated by clinical experts as low-risk, 30.8\% (60/195) as medium-risk, and 50.8\% (99/195) as high-risk. Risk groups differed significantly (P<.001) in the rate of concerns in the language, social, communication, and RRBI domains. When testing whether an automatic classifier (decision tree) could predict if a query was medium- or high-risk based on the text of the query and the coded symptoms, performance reached an area under the receiver operating curve (ROC) curve of 0.67 (CI 95\% 0.50-0.78), whereas predicting from the text and the coded signs resulted in an area under the curve of 0.82 (0.80-0.86). Conclusions: Findings call for health care providers to closely listen to parental ASD-related concerns, as recommended by screening guidelines. They also demonstrate the need for Internet-based screening systems that utilize parents' narratives using a decision tree questioning method. ", doi="10.2196/jmir.5439", url="http://www.jmir.org/2016/11/e300/", url="http://www.ncbi.nlm.nih.gov/pubmed/27876688" } @Article{info:doi/10.2196/medinform.6328, author="Zheng, Le and Wang, Yue and Hao, Shiying and Shin, Y. Andrew and Jin, Bo and Ngo, D. Anh and Jackson-Browne, S. Medina and Feller, J. Daniel and Fu, Tianyun and Zhang, Karena and Zhou, Xin and Zhu, Chunqing and Dai, Dorothy and Yu, Yunxian and Zheng, Gang and Li, Yu-Ming and McElhinney, B. Doff and Culver, S. Devore and Alfreds, T. Shaun and Stearns, Frank and Sylvester, G. Karl and Widen, Eric and Ling, Bruce Xuefeng", title="Web-based Real-Time Case Finding for the Population Health Management of Patients With Diabetes Mellitus: A Prospective Validation of the Natural Language Processing--Based Algorithm With Statewide Electronic Medical Records", journal="JMIR Med Inform", year="2016", month="Nov", day="11", volume="4", number="4", pages="e37", keywords="electronic medical record", keywords="natural language processing", keywords="diabetes mellitus", keywords="data mining", abstract="Background: Diabetes case finding based on structured medical records does not fully identify diabetic patients whose medical histories related to diabetes are available in the form of free text. Manual chart reviews have been used but involve high labor costs and long latency. Objective: This study developed and tested a Web-based diabetes case finding algorithm using both structured and unstructured electronic medical records (EMRs). Methods: This study was based on the health information exchange (HIE) EMR database that covers almost all health facilities in the state of Maine, United States. Using narrative clinical notes, a Web-based natural language processing (NLP) case finding algorithm was retrospectively (July 1, 2012, to June 30, 2013) developed with a random subset of HIE-associated facilities, which was then blind tested with the remaining facilities. The NLP-based algorithm was subsequently integrated into the HIE database and validated prospectively (July 1, 2013, to June 30, 2014). Results: Of the 935,891 patients in the prospective cohort, 64,168 diabetes cases were identified using diagnosis codes alone. Our NLP-based case finding algorithm prospectively found an additional 5756 uncodified cases (5756/64,168, 8.97\% increase) with a positive predictive value of .90. Of the 21,720 diabetic patients identified by both methods, 6616 patients (6616/21,720, 30.46\%) were identified by the NLP-based algorithm before a diabetes diagnosis was noted in the structured EMR (mean time difference = 48 days). Conclusions: The online NLP algorithm was effective in identifying uncodified diabetes cases in real time, leading to a significant improvement in diabetes case finding. The successful integration of the NLP-based case finding algorithm into the Maine HIE database indicates a strong potential for application of this novel method to achieve a more complete ascertainment of diagnoses of diabetes mellitus. ", doi="10.2196/medinform.6328", url="http://medinform.jmir.org/2016/4/e37/", url="http://www.ncbi.nlm.nih.gov/pubmed/27836816" } @Article{info:doi/10.2196/medinform.5544, author="Kaufman, R. David and Sheehan, Barbara and Stetson, Peter and Bhatt, R. Ashish and Field, I. Adele and Patel, Chirag and Maisel, Mark James", title="Natural Language Processing--Enabled and Conventional Data Capture Methods for Input to Electronic Health Records: A Comparative Usability Study", journal="JMIR Med Inform", year="2016", month="Oct", day="28", volume="4", number="4", pages="e35", keywords="electronic health records", keywords="natural language processing", keywords="medical transcription", keywords="user-computer interface", abstract="Background: The process of documentation in electronic health records (EHRs) is known to be time consuming, inefficient, and cumbersome. The use of dictation coupled with manual transcription has become an increasingly common practice. In recent years, natural language processing (NLP)--enabled data capture has become a viable alternative for data entry. It enables the clinician to maintain control of the process and potentially reduce the documentation burden. The question remains how this NLP-enabled workflow will impact EHR usability and whether it can meet the structured data and other EHR requirements while enhancing the user's experience. Objective: The objective of this study is evaluate the comparative effectiveness of an NLP-enabled data capture method using dictation and data extraction from transcribed documents (NLP Entry) in terms of documentation time, documentation quality, and usability versus standard EHR keyboard-and-mouse data entry. Methods: This formative study investigated the results of using 4 combinations of NLP Entry and Standard Entry methods (``protocols'') of EHR data capture. We compared a novel dictation-based protocol using MediSapien NLP (NLP-NLP) for structured data capture against a standard structured data capture protocol (Standard-Standard) as well as 2 novel hybrid protocols (NLP-Standard and Standard-NLP). The 31 participants included neurologists, cardiologists, and nephrologists. Participants generated 4 consultation or admission notes using 4 documentation protocols. We recorded the time on task, documentation quality (using the Physician Documentation Quality Instrument, PDQI-9), and usability of the documentation processes. Results: A total of 118 notes were documented across the 3 subject areas. The NLP-NLP protocol required a median of 5.2 minutes per cardiology note, 7.3 minutes per nephrology note, and 8.5 minutes per neurology note compared with 16.9, 20.7, and 21.2 minutes, respectively, using the Standard-Standard protocol and 13.8, 21.3, and 18.7 minutes using the Standard-NLP protocol (1 of 2 hybrid methods). Using 8 out of 9 characteristics measured by the PDQI-9 instrument, the NLP-NLP protocol received a median quality score sum of 24.5; the Standard-Standard protocol received a median sum of 29; and the Standard-NLP protocol received a median sum of 29.5. The mean total score of the usability measure was 36.7 when the participants used the NLP-NLP protocol compared with 30.3 when they used the Standard-Standard protocol. Conclusions: In this study, the feasibility of an approach to EHR data capture involving the application of NLP to transcribed dictation was demonstrated. This novel dictation-based approach has the potential to reduce the time required for documentation and improve usability while maintaining documentation quality. Future research will evaluate the NLP-based EHR data capture approach in a clinical setting. It is reasonable to assert that EHRs will increasingly use NLP-enabled data entry tools such as MediSapien NLP because they hold promise for enhancing the documentation process and end-user experience. ", doi="10.2196/medinform.5544", url="http://medinform.jmir.org/2016/4/e35/", url="http://www.ncbi.nlm.nih.gov/pubmed/27793791" } @Article{info:doi/10.2196/medinform.5353, author="Zhang, Ye and Willis, Erin and Paul, J. Michael and Elhadad, No{\'e}mie and Wallace, C. Byron", title="Characterizing the (Perceived) Newsworthiness of Health Science Articles: A Data-Driven Approach", journal="JMIR Med Inform", year="2016", month="Sep", day="22", volume="4", number="3", pages="e27", keywords="natural language processing", keywords="text classification", keywords="press release", keywords="media coverage", abstract="Background: Health science findings are primarily disseminated through manuscript publications. Information subsidies are used to communicate newsworthy findings to journalists in an effort to earn mass media coverage and further disseminate health science research to mass audiences. Journal editors and news journalists then select which news stories receive coverage and thus public attention. Objective: This study aims to identify attributes of published health science articles that correlate with (1) journal editor issuance of press releases and (2) mainstream media coverage. Methods: We constructed four novel datasets to identify factors that correlate with press release issuance and media coverage. These corpora include thousands of published articles, subsets of which received press release or mainstream media coverage. We used statistical machine learning methods to identify correlations between words in the science abstracts and press release issuance and media coverage. Further, we used a topic modeling-based machine learning approach to uncover latent topics predictive of the perceived newsworthiness of science articles. Results: Both press release issuance for, and media coverage of, health science articles are predictable from corresponding journal article content. For the former task, we achieved average areas under the curve (AUCs) of 0.666 (SD 0.019) and 0.882 (SD 0.018) on two separate datasets, comprising 3024 and 10,760 articles, respectively. For the latter task, models realized mean AUCs of 0.591 (SD 0.044) and 0.783 (SD 0.022) on two datasets---in this case containing 422 and 28,910 pairs, respectively. We reported most-predictive words and topics for press release or news coverage. Conclusions: We have presented a novel data-driven characterization of content that renders health science ``newsworthy.'' The analysis provides new insights into the news coverage selection process. For example, it appears epidemiological papers concerning common behaviors (eg, alcohol consumption) tend to receive media attention. ", doi="10.2196/medinform.5353", url="http://medinform.jmir.org/2016/3/e27/", url="http://www.ncbi.nlm.nih.gov/pubmed/27658571" } @Article{info:doi/10.2196/medinform.5490, author="Wongchaisuwat, Papis and Klabjan, Diego and Jonnalagadda, Reddy Siddhartha", title="A Semi-Supervised Learning Approach to Enhance Health Care Community--Based Question Answering: A Case Study in Alcoholism", journal="JMIR Med Inform", year="2016", month="Aug", day="02", volume="4", number="3", pages="e24", keywords="machine learning", keywords="natural language processing", keywords="question answering", keywords="Web-based health communities", keywords="consumer health informatics", abstract="Background: Community-based question answering (CQA) sites play an important role in addressing health information needs. However, a significant number of posted questions remain unanswered. Automatically answering the posted questions can provide a useful source of information for Web-based health communities. Objective: In this study, we developed an algorithm to automatically answer health-related questions based on past questions and answers (QA). We also aimed to understand information embedded within Web-based health content that are good features in identifying valid answers. Methods: Our proposed algorithm uses information retrieval techniques to identify candidate answers from resolved QA. To rank these candidates, we implemented a semi-supervised leaning algorithm that extracts the best answer to a question. We assessed this approach on a curated corpus from Yahoo! Answers and compared against a rule-based string similarity baseline. Results: On our dataset, the semi-supervised learning algorithm has an accuracy of 86.2\%. Unified medical language system--based (health related) features used in the model enhance the algorithm's performance by proximately 8\%. A reasonably high rate of accuracy is obtained given that the data are considerably noisy. Important features distinguishing a valid answer from an invalid answer include text length, number of stop words contained in a test question, a distance between the test question and other questions in the corpus, and a number of overlapping health-related terms between questions. Conclusions: Overall, our automated QA system based on historical QA pairs is shown to be effective according to the dataset in this case study. It is developed for general use in the health care domain, which can also be applied to other CQA sites. ", doi="10.2196/medinform.5490", url="http://medinform.jmir.org/2016/3/e24/", url="http://www.ncbi.nlm.nih.gov/pubmed/27485666" } @Article{info:doi/10.2196/publichealth.5308, author="Lyles, Rees Courtney and Godbehere, Andrew and Le, Gem and El Ghaoui, Laurent and Sarkar, Urmimala", title="Applying Sparse Machine Learning Methods to Twitter: Analysis of the 2012 Change in Pap Smear Guidelines. A Sequential Mixed-Methods Study", journal="JMIR Public Health Surveill", year="2016", month="Jun", day="10", volume="2", number="1", pages="e21", keywords="Twitter", keywords="machine learning", keywords="social media", keywords="cervical cancer", keywords="qualitative research", abstract="Background: It is difficult to synthesize the vast amount of textual data available from social media websites. Capturing real-world discussions via social media could provide insights into individuals' opinions and the decision-making process. Objective: We conducted a sequential mixed methods study to determine the utility of sparse machine learning techniques in summarizing Twitter dialogues. We chose a narrowly defined topic for this approach: cervical cancer discussions over a 6-month time period surrounding a change in Pap smear screening guidelines. Methods: We applied statistical methodologies, known as sparse machine learning algorithms, to summarize Twitter messages about cervical cancer before and after the 2012 change in Pap smear screening guidelines by the US Preventive Services Task Force (USPSTF). All messages containing the search terms ``cervical cancer,'' ``Pap smear,'' and ``Pap test'' were analyzed during: (1) January 1--March 13, 2012, and (2) March 14--June 30, 2012. Topic modeling was used to discern the most common topics from each time period, and determine the singular value criterion for each topic. The results were then qualitatively coded from top 10 relevant topics to determine the efficiency of clustering method in grouping distinct ideas, and how the discussion differed before vs. after the change in guidelines . Results: This machine learning method was effective in grouping the relevant discussion topics about cervical cancer during the respective time periods ({\textasciitilde}20\% overall irrelevant content in both time periods). Qualitative analysis determined that a significant portion of the top discussion topics in the second time period directly reflected the USPSTF guideline change (eg, ``New Screening Guidelines for Cervical Cancer''), and many topics in both time periods were addressing basic screening promotion and education (eg, ``It is Cervical Cancer Awareness Month! Click the link to see where you can receive a free or low cost Pap test.'') Conclusions: It was demonstrated that machine learning tools can be useful in cervical cancer prevention and screening discussions on Twitter. This method allowed us to prove that there is publicly available significant information about cervical cancer screening on social media sites. Moreover, we observed a direct impact of the guideline change within the Twitter messages. ", doi="10.2196/publichealth.5308", url="http://publichealth.jmir.org/2016/1/e21/", url="http://www.ncbi.nlm.nih.gov/pubmed/27288093" } @Article{info:doi/10.2196/mental.4822, author="Braithwaite, R. Scott and Giraud-Carrier, Christophe and West, Josh and Barnes, D. Michael and Hanson, Lee Carl", title="Validating Machine Learning Algorithms for Twitter Data Against Established Measures of Suicidality", journal="JMIR Mental Health", year="2016", month="May", day="16", volume="3", number="2", pages="e21", keywords="suicide", keywords="social media", keywords="twitter", keywords="machine learning", abstract="Background: One of the leading causes of death in the United States (US) is suicide and new methods of assessment are needed to track its risk in real time. Objective: Our objective is to validate the use of machine learning algorithms for Twitter data against empirically validated measures of suicidality in the US population. Methods: Using a machine learning algorithm, the Twitter feeds of 135 Mechanical Turk (MTurk) participants were compared with validated, self-report measures of suicide risk. Results: Our findings show that people who are at high suicidal risk can be easily differentiated from those who are not by machine learning algorithms, which accurately identify the clinically significant suicidal rate in 92\% of cases (sensitivity: 53\%, specificity: 97\%, positive predictive value: 75\%, negative predictive value: 93\%). Conclusions: Machine learning algorithms are efficient in differentiating people who are at a suicidal risk from those who are not. Evidence for suicidality can be measured in nonclinical populations using social media data. ", doi="10.2196/mental.4822", url="http://mental.jmir.org/2016/2/e21/", url="http://www.ncbi.nlm.nih.gov/pubmed/27185366" } @Article{info:doi/10.2196/jmir.4612, author="Park, Albert and Hartzler, L. Andrea and Huh, Jina and McDonald, W. David and Pratt, Wanda", title="Automatically Detecting Failures in Natural Language Processing Tools for Online Community Text", journal="J Med Internet Res", year="2015", month="Aug", day="31", volume="17", number="8", pages="e212", keywords="UMLS", keywords="natural language processing", keywords="automatic data processing", keywords="quantitative evaluation", keywords="information extraction", abstract="Background: The prevalence and value of patient-generated health text are increasing, but processing such text remains problematic. Although existing biomedical natural language processing (NLP) tools are appealing, most were developed to process clinician- or researcher-generated text, such as clinical notes or journal articles. In addition to being constructed for different types of text, other challenges of using existing NLP include constantly changing technologies, source vocabularies, and characteristics of text. These continuously evolving challenges warrant the need for applying low-cost systematic assessment. However, the primarily accepted evaluation method in NLP, manual annotation, requires tremendous effort and time. Objective: The primary objective of this study is to explore an alternative approach---using low-cost, automated methods to detect failures (eg, incorrect boundaries, missed terms, mismapped concepts) when processing patient-generated text with existing biomedical NLP tools. We first characterize common failures that NLP tools can make in processing online community text. We then demonstrate the feasibility of our automated approach in detecting these common failures using one of the most popular biomedical NLP tools, MetaMap. Methods: Using 9657 posts from an online cancer community, we explored our automated failure detection approach in two steps: (1) to characterize the failure types, we first manually reviewed MetaMap's commonly occurring failures, grouped the inaccurate mappings into failure types, and then identified causes of the failures through iterative rounds of manual review using open coding, and (2) to automatically detect these failure types, we then explored combinations of existing NLP techniques and dictionary-based matching for each failure cause. Finally, we manually evaluated the automatically detected failures. Results: From our manual review, we characterized three types of failure: (1) boundary failures, (2) missed term failures, and (3) word ambiguity failures. Within these three failure types, we discovered 12 causes of inaccurate mappings of concepts. We used automated methods to detect almost half of 383,572 MetaMap's mappings as problematic. Word sense ambiguity failure was the most widely occurring, comprising 82.22\% of failures. Boundary failure was the second most frequent, amounting to 15.90\% of failures, while missed term failures were the least common, making up 1.88\% of failures. The automated failure detection achieved precision, recall, accuracy, and F1 score of 83.00\%, 92.57\%, 88.17\%, and 87.52\%, respectively. Conclusions: We illustrate the challenges of processing patient-generated online health community text and characterize failures of NLP tools on this patient-generated health text, demonstrating the feasibility of our low-cost approach to automatically detect those failures. Our approach shows the potential for scalable and effective solutions to automatically assess the constantly evolving NLP tools and source vocabularies to process patient-generated text. ", doi="10.2196/jmir.4612", url="http://www.jmir.org/2015/8/e212/", url="http://www.ncbi.nlm.nih.gov/pubmed/26323337" } @Article{info:doi/10.2196/medinform.3982, author="Ji, Xiaonan and Yen, Po-Yin", title="Using MEDLINE Elemental Similarity to Assist in the Article Screening Process for Systematic Reviews", journal="JMIR Med Inform", year="2015", month="Aug", day="31", volume="3", number="3", pages="e28", keywords="systematic review", keywords="evidence-based medicine", keywords="automatic document classification", keywords="relevance feedback", abstract="Background: Systematic reviews and their implementation in practice provide high quality evidence for clinical practice but are both time and labor intensive due to the large number of articles. Automatic text classification has proven to be instrumental in identifying relevant articles for systematic reviews. Existing approaches use machine learning model training to generate classification algorithms for the article screening process but have limitations. Objective: We applied a network approach to assist in the article screening process for systematic reviews using predetermined article relationships (similarity). The article similarity metric is calculated using the MEDLINE elements title (TI), abstract (AB), medical subject heading (MH), author (AU), and publication type (PT). We used an article network to illustrate the concept of article relationships. Using the concept, each article can be modeled as a node in the network and the relationship between 2 articles is modeled as an edge connecting them. The purpose of our study was to use the article relationship to facilitate an interactive article recommendation process. Methods: We used 15 completed systematic reviews produced by the Drug Effectiveness Review Project and demonstrated the use of article networks to assist article recommendation. We evaluated the predictive performance of MEDLINE elements and compared our approach with existing machine learning model training approaches. The performance was measured by work saved over sampling at 95\% recall (WSS95) and the F-measure (F1). We also used repeated analysis over variance and Hommel's multiple comparison adjustment to demonstrate statistical evidence. Results: We found that although there is no significant difference across elements (except AU), TI and AB have better predictive capability in general. Collaborative elements bring performance improvement in both F1 and WSS95. With our approach, a simple combination of TI+AB+PT could achieve a WSS95 performance of 37\%, which is competitive to traditional machine learning model training approaches (23\%-41\% WSS95). Conclusions: We demonstrated a new approach to assist in labor intensive systematic reviews. Predictive ability of different elements (both single and composited) was explored. Without using model training approaches, we established a generalizable method that can achieve a competitive performance. ", doi="10.2196/medinform.3982", url="http://medinform.jmir.org/2015/3/e28/", url="http://www.ncbi.nlm.nih.gov/pubmed/26323593" } @Article{info:doi/10.2196/jmir.4392, author="Cole-Lewis, Heather and Varghese, Arun and Sanders, Amy and Schwarz, Mary and Pugatch, Jillian and Augustson, Erik", title="Assessing Electronic Cigarette-Related Tweets for Sentiment and Content Using Supervised Machine Learning", journal="J Med Internet Res", year="2015", month="Aug", day="25", volume="17", number="8", pages="e208", keywords="social media", keywords="Twitter", keywords="e-cigarette", keywords="machine learning", abstract="Background: Electronic cigarettes (e-cigarettes) continue to be a growing topic among social media users, especially on Twitter. The ability to analyze conversations about e-cigarettes in real-time can provide important insight into trends in the public's knowledge, attitudes, and beliefs surrounding e-cigarettes, and subsequently guide public health interventions. Objective: Our aim was to establish a supervised machine learning algorithm to build predictive classification models that assess Twitter data for a range of factors related to e-cigarettes. Methods: Manual content analysis was conducted for 17,098 tweets. These tweets were coded for five categories: e-cigarette relevance, sentiment, user description, genre, and theme. Machine learning classification models were then built for each of these five categories, and word groupings (n-grams) were used to define the feature space for each classifier. Results: Predictive performance scores for classification models indicated that the models correctly labeled the tweets with the appropriate variables between 68.40\% and 99.34\% of the time, and the percentage of maximum possible improvement over a random baseline that was achieved by the classification models ranged from 41.59\% to 80.62\%. Classifiers with the highest performance scores that also achieved the highest percentage of the maximum possible improvement over a random baseline were Policy/Government (performance: 0.94; \% improvement: 80.62\%), Relevance (performance: 0.94; \% improvement: 75.26\%), Ad or Promotion (performance: 0.89; \% improvement: 72.69\%), and Marketing (performance: 0.91; \% improvement: 72.56\%). The most appropriate word-grouping unit (n-gram) was 1 for the majority of classifiers. Performance continued to marginally increase with the size of the training dataset of manually annotated data, but eventually leveled off. Even at low dataset sizes of 4000 observations, performance characteristics were fairly sound. Conclusions: Social media outlets like Twitter can uncover real-time snapshots of personal sentiment, knowledge, attitudes, and behavior that are not as accessible, at this scale, through any other offline platform. Using the vast data available through social media presents an opportunity for social science and public health methodologies to utilize computational methodologies to enhance and extend research and practice. This study was successful in automating a complex five-category manual content analysis of e-cigarette-related content on Twitter using machine learning techniques. The study details machine learning model specifications that provided the best accuracy for data related to e-cigarettes, as well as a replicable methodology to allow extension of these methods to additional topics. ", doi="10.2196/jmir.4392", url="http://www.jmir.org/2015/8/e208/", url="http://www.ncbi.nlm.nih.gov/pubmed/26307512" } @Article{info:doi/10.2196/medinform.4211, author="Zhou, Xiaofang and Zheng, An and Yin, Jiaheng and Chen, Rudan and Zhao, Xianyang and Xu, Wei and Cheng, Wenqing and Xia, Tian and Lin, Simon", title="Context-Sensitive Spelling Correction of Consumer-Generated Content on Health Care", journal="JMIR Med Inform", year="2015", month="Jul", day="31", volume="3", number="3", pages="e27", keywords="spelling correction system", keywords="context sensitive", keywords="consumer-generated content", keywords="biomedical ontology", abstract="Background: Consumer-generated content, such as postings on social media websites, can serve as an ideal source of information for studying health care from a consumer's perspective. However, consumer-generated content on health care topics often contains spelling errors, which, if not corrected, will be obstacles for downstream computer-based text analysis. Objective: In this study, we proposed a framework with a spelling correction system designed for consumer-generated content and a novel ontology-based evaluation system which was used to efficiently assess the correction quality. Additionally, we emphasized the importance of context sensitivity in the correction process, and demonstrated why correction methods designed for electronic medical records (EMRs) failed to perform well with consumer-generated content. Methods: First, we developed our spelling correction system based on Google Spell Checker. The system processed postings acquired from MedHelp, a biomedical bulletin board system (BBS), and saved misspelled words (eg, sertaline) and corresponding corrected words (eg, sertraline) into two separate sets. Second, to reduce the number of words needing manual examination in the evaluation process, we respectively matched the words in the two sets with terms in two biomedical ontologies: RxNorm and Systematized Nomenclature of Medicine -- Clinical Terms (SNOMED CT). The ratio of words which could be matched and appropriately corrected was used to evaluate the correction system's overall performance. Third, we categorized the misspelled words according to the types of spelling errors. Finally, we calculated the ratio of abbreviations in the postings, which remarkably differed between EMRs and consumer-generated content and could largely influence the overall performance of spelling checkers. Results: An uncorrected word and the corresponding corrected word was called a spelling pair, and the two words in the spelling pair were its members. In our study, there were 271 spelling pairs detected, among which 58 (21.4\%) pairs had one or two members matched in the selected ontologies. The ratio of appropriate correction in the 271 overall spelling errors was 85.2\% (231/271). The ratio of that in the 58 spelling pairs was 86\% (50/58), close to the overall ratio. We also found that linguistic errors took up 31.4\% (85/271) of all errors detected, and only 0.98\% (210/21,358) of words in the postings were abbreviations, which was much lower than the ratio in the EMRs (33.6\%). Conclusions: We conclude that our system can accurately correct spelling errors in consumer-generated content. Context sensitivity is indispensable in the correction process. Additionally, it can be confirmed that consumer-generated content differs from EMRs in that consumers seldom use abbreviations. Also, the evaluation method, taking advantage of biomedical ontology, can effectively estimate the accuracy of the correction system and reduce manual examination time. ", doi="10.2196/medinform.4211", url="http://medinform.jmir.org/2015/3/e27/", url="http://www.ncbi.nlm.nih.gov/pubmed/26232246" } @Article{info:doi/10.2196/medinform.3783, author="Maramba, Daniel Inocencio and Davey, Antoinette and Elliott, N. Marc and Roberts, Martin and Roland, Martin and Brown, Finlay and Burt, Jenni and Boiko, Olga and Campbell, John", title="Web-Based Textual Analysis of Free-Text Patient Experience Comments From a Survey in Primary Care", journal="JMIR Med Inform", year="2015", month="May", day="06", volume="3", number="2", pages="e20", keywords="patient experience", keywords="patient feedback", keywords="free-text comments", keywords="quantitative content analysis", keywords="textual analysis", abstract="Background: Open-ended questions eliciting free-text comments have been widely adopted in surveys of patient experience. Analysis of free text comments can provide deeper or new insight, identify areas for action, and initiate further investigation. Also, they may be a promising way to progress from documentation of patient experience to achieving quality improvement. The usual methods of analyzing free-text comments are known to be time and resource intensive. To efficiently deal with a large amount of free-text, new methods of rapidly summarizing and characterizing the text are being explored. Objective: The aim of this study was to investigate the feasibility of using freely available Web-based text processing tools (text clouds, distinctive word extraction, key words in context) for extracting useful information from large amounts of free-text commentary about patient experience, as an alternative to more resource intensive analytic methods. Methods: We collected free-text responses to a broad, open-ended question on patients' experience of primary care in a cross-sectional postal survey of patients recently consulting doctors in 25 English general practices. We encoded the responses to text files which were then uploaded to three Web-based textual processing tools. The tools we used were two text cloud creators: TagCrowd for unigrams, and Many Eyes for bigrams; and Voyant Tools, a Web-based reading tool that can extract distinctive words and perform Keyword in Context (KWIC) analysis. The association of patients' experience scores with the occurrence of certain words was tested with logistic regression analysis. KWIC analysis was also performed to gain insight into the use of a significant word. Results: In total, 3426 free-text responses were received from 7721 patients (comment rate: 44.4\%). The five most frequent words in the patients' comments were ``doctor'', ``appointment'', ``surgery'', ``practice'', and ``time''. The three most frequent two-word combinations were ``reception staff'', ``excellent service'', and ``two weeks''. The regression analysis showed that the occurrence of the word ``excellent'' in the comments was significantly associated with a better patient experience (OR=1.96, 95\%CI=1.63-2.34), while ``rude'' was significantly associated with a worse experience (OR=0.53, 95\%CI=0.46-0.60). The KWIC results revealed that 49 of the 78 (63\%) occurrences of the word ``rude'' in the comments were related to receptionists and 17(22\%) were related to doctors. Conclusions: Web-based text processing tools can extract useful information from free-text comments and the output may serve as a springboard for further investigation. Text clouds, distinctive words extraction and KWIC analysis show promise in quick evaluation of unstructured patient feedback. The results are easily understandable, but may require further probing such as KWIC analysis to establish the context. Future research should explore whether more sophisticated methods of textual analysis (eg, sentiment analysis, natural language processing) could add additional levels of understanding. ", doi="10.2196/medinform.3783", url="http://medinform.jmir.org/2015/2/e20/", url="http://www.ncbi.nlm.nih.gov/pubmed/25947632" } @Article{info:doi/10.2196/medinform.4321, author="Suominen, Hanna and Zhou, Liyuan and Hanlen, Leif and Ferraro, Gabriela", title="Benchmarking Clinical Speech Recognition and Information Extraction: New Data, Methods, and Evaluations", journal="JMIR Med Inform", year="2015", month="Apr", day="27", volume="3", number="2", pages="e19", keywords="computer systems evaluation", keywords="data collection", keywords="information extraction", keywords="nursing records", keywords="patient handoff", keywords="records as topic", keywords="speech recognition software", abstract="Background: Over a tenth of preventable adverse events in health care are caused by failures in information flow. These failures are tangible in clinical handover; regardless of good verbal handover, from two-thirds to all of this information is lost after 3-5 shifts if notes are taken by hand, or not at all. Speech recognition and information extraction provide a way to fill out a handover form for clinical proofing and sign-off. Objective: The objective of the study was to provide a recorded spoken handover, annotated verbatim transcriptions, and evaluations to support research in spoken and written natural language processing for filling out a clinical handover form. This dataset is based on synthetic patient profiles, thereby avoiding ethical and legal restrictions, while maintaining efficacy for research in speech-to-text conversion and information extraction, based on realistic clinical scenarios. We also introduce a Web app to demonstrate the system design and workflow. Methods: We experiment with Dragon Medical 11.0 for speech recognition and CRF++ for information extraction. To compute features for information extraction, we also apply CoreNLP, MetaMap, and Ontoserver. Our evaluation uses cross-validation techniques to measure processing correctness. Results: The data provided were a simulation of nursing handover, as recorded using a mobile device, built from simulated patient records and handover scripts, spoken by an Australian registered nurse. Speech recognition recognized 5276 of 7277 words in our 100 test documents correctly. We considered 50 mutually exclusive categories in information extraction and achieved the F1 (ie, the harmonic mean of Precision and Recall) of 0.86 in the category for irrelevant text and the macro-averaged F1 of 0.70 over the remaining 35 nonempty categories of the form in our 101 test documents. Conclusions: The significance of this study hinges on opening our data, together with the related performance benchmarks and some processing software, to the research and development community for studying clinical documentation and language-processing. The data are used in the CLEFeHealth 2015 evaluation laboratory for a shared task on speech recognition. ", doi="10.2196/medinform.4321", url="http://medinform.jmir.org/2015/2/e19/", url="http://www.ncbi.nlm.nih.gov/pubmed/25917752" } @Article{info:doi/10.2196/medinform.3022, author="Polepalli Ramesh, Balaji and Belknap, M. Steven and Li, Zuofeng and Frid, Nadya and West, P. Dennis and Yu, Hong", title="Automatically Recognizing Medication and Adverse Event Information From Food and Drug Administration's Adverse Event Reporting System Narratives", journal="JMIR Med Inform", year="2014", month="Jun", day="27", volume="2", number="1", pages="e10", keywords="natural language processing", keywords="pharmacovigilance", keywords="adverse drug events", abstract="Background: The Food and Drug Administration's (FDA) Adverse Event Reporting System (FAERS) is a repository of spontaneously-reported adverse drug events (ADEs) for FDA-approved prescription drugs. FAERS reports include both structured reports and unstructured narratives. The narratives often include essential information for evaluation of the severity, causality, and description of ADEs that are not present in the structured data. The timely identification of unknown toxicities of prescription drugs is an important, unsolved problem. Objective: The objective of this study was to develop an annotated corpus of FAERS narratives and biomedical named entity tagger to automatically identify ADE related information in the FAERS narratives. Methods: We developed an annotation guideline and annotate medication information and adverse event related entities on 122 FAERS narratives comprising approximately 23,000 word tokens. A named entity tagger using supervised machine learning approaches was built for detecting medication information and adverse event entities using various categories of features. Results: The annotated corpus had an agreement of over .9 Cohen's kappa for medication and adverse event entities. The best performing tagger achieves an overall performance of 0.73 F1 score for detection of medication, adverse event and other named entities. Conclusions: In this study, we developed an annotated corpus of FAERS narratives and machine learning based models for automatically extracting medication and adverse event information from the FAERS narratives. Our study is an important step towards enriching the FAERS data for postmarketing pharmacovigilance. ", doi="10.2196/medinform.3022", url="http://medinform.jmir.org/2014/1/e10/", url="http://www.ncbi.nlm.nih.gov/pubmed/25600332" }