<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e66973</article-id><article-id pub-id-type="doi">10.2196/66973</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Performance of Natural Language Processing versus International Classification of Diseases Codes in Building Registries for Patients With Fall Injury: Retrospective Analysis</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Taseh</surname><given-names>Atta</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sasanfar</surname><given-names>Souri</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Chan</surname><given-names>Michelle</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sirls</surname><given-names>Evan</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nazarian</surname><given-names>Ara</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Batmanghelich</surname><given-names>Kayhan</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bean</surname><given-names>Jonathan F</given-names></name><degrees>MD, MPH</degrees><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ashkani-Esfahani</surname><given-names>Soheil</given-names></name><degrees>MD, MPH</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Foot &#x0026; Ankle Research and Innovations Laboratory (FARIL), Department of Orthopaedic Surgery, Mass General Brigham, Harvard Medical School</institution><addr-line>158 Boston Post Road</addr-line><addr-line>Weston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Musculoskeletal Translational Innovation Initiative, Carl J. Shapiro Department of Orthopaedic Surgery, Beth Israel Deaconess Medical Center, Harvard Medical School</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff3"><institution>Batman Laboratory, Department of Electrical and Computer Engineering, College of Engineering, Boston University</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff4"><institution>New England Geriatric Research Education and Clinical Center (GRECC), Veterans Affair Boston Healthcare System</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff5"><institution>Department of Physical Medicine and Rehabilitation, Harvard Medical School</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff6"><institution>Spaulding Rehabilitation</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Lovis</surname><given-names>Christian</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Martin</surname><given-names>Elliot</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Raza</surname><given-names>Shaina</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Atta Taseh, MD, Foot &#x0026; Ankle Research and Innovations Laboratory (FARIL), Department of Orthopaedic Surgery, Mass General Brigham, Harvard Medical School, 158 Boston Post Road, Weston, MA, 02493, United States, 1 7818279613; <email>ataseh@mgh.harvard.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>14</day><month>7</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e66973</elocation-id><history><date date-type="received"><day>30</day><month>09</month><year>2024</year></date><date date-type="rev-recd"><day>20</day><month>02</month><year>2025</year></date><date date-type="accepted"><day>21</day><month>02</month><year>2025</year></date></history><copyright-statement>&#x00A9; Atta Taseh, Souri Sasanfar, Michelle Chan, Evan Sirls, Ara Nazarian, Kayhan Batmanghelich, Jonathan F Bean, Soheil Ashkani-Esfahani. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 14.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e66973"/><abstract><sec><title>Background</title><p>Standardized registries, such as the <italic>International Classification of Diseases</italic> (<italic>ICD</italic>) codes, are commonly built using administrative codes assigned to patient encounters. However, patients with fall injury are often coded using subsequent injury codes, such as hip fractures. This necessitates manual screening to ensure the accuracy of data registries.</p></sec><sec><title>Objective</title><p>This study aimed to automate the extraction of fall incidents and mechanisms using natural language processing (NLP) and compare this approach with the <italic>ICD</italic> method.</p></sec><sec sec-type="methods"><title>Methods</title><p>Clinical notes for patients with fall-induced hip fractures were retrospectively reviewed by medical experts. Fall incidences were detected, annotated, and classified among patients who had a fall-induced hip fracture (case group). The control group included patients with hip fractures without any evidence of falls. NLP models were developed using the annotated notes of the study groups to fulfill two separate tasks: fall occurrence detection and fall mechanism classification. The performances of the models were compared using accuracy, sensitivity, specificity, positive predictive value, negative predictive value, <italic>F</italic><sub>1</sub>-score, and area under the receiver operating characteristic curve.</p></sec><sec sec-type="results"><title>Results</title><p>A total of 1769 clinical notes were included in the final analysis for the fall occurrence task, and 783 clinical notes were analyzed for the fall mechanism classification task. The highest <italic>F</italic><sub>1</sub>-score using NLP for fall occurrence was 0.97 (specificity=0.96; sensitivity=0.97), and for fall mechanism classification was 0.61 (specificity=0.56; sensitivity=0.62). Natural language processing could detect up to 98% of the fall occurrences and 65% of the fall mechanisms accurately, compared to 26% and 12%, respectively, by <italic>ICD</italic> codes.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Our findings showed promising performance with higher accuracy of NLP algorithms compared to the conventional method for detecting fall occurrence and mechanism in developing disease registries using clinical notes. Our approach can be introduced to other registries that are based on large data and are in need of accurate annotation and classification.</p></sec></abstract><kwd-group><kwd>automation</kwd><kwd>automate</kwd><kwd>data registry</kwd><kwd>ICD Codes</kwd><kwd>artificial intelligence</kwd><kwd>AI</kwd><kwd>algorithms</kwd><kwd>predictive models</kwd><kwd>predictive analytics</kwd><kwd>machine learning</kwd><kwd>ML</kwd><kwd>large language models</kwd><kwd>LLMs</kwd><kwd>natural language processing</kwd><kwd>NLP</kwd><kwd>deep learning</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>With 3 million emergency room visits, 300,000 hospitalizations, and 30,000 fatalities annually, falls pose a major threat to public health [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. The financial impact is also substantial, with an estimated US $50 billion medical expenses for nonfatal falls [<xref ref-type="bibr" rid="ref3">3</xref>]. Therefore, researching and understanding the nature of falls and fall-related injuries are crucial for developing effective prevention and treatment strategies as populations age [<xref ref-type="bibr" rid="ref4">4</xref>]. Given the multifactorial nature of falls and the difficulties involved in conducting prospective research in the field, developing fall registries comprised of large and accurate medical data is very important [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Standardized registries are commonly built using administrative codes, such as the <italic>International Classification of Diseases</italic> (<italic>ICD</italic>), assigned to patient encounters, and Current Procedural Terminology (CPT) codes[<xref ref-type="bibr" rid="ref7">7</xref>]. Previous studies have used these codes to extract patients with a history of falls [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. However, this method has limitations that may lead to an underestimation of actual fall frequency and might not reveal the history of falls in patients [<xref ref-type="bibr" rid="ref11">11</xref>]. Reporting falls using the External Causes of Morbidity codes is usually recommended but not mandatory in all health care settings. Since falls are not typically considered stand-alone conditions, many health care providers may rather use the diagnosis <italic>ICD</italic> codes and assign codes to the end result of a fall, for example, a hip fracture, rather than the fall itself [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. This makes it difficult for investigators to identify falls in the patient&#x2019;s medical history and the true frequency of falls within populations. Given these limitations, clinical notes were suggested as a more reliable method of detecting falls, fall mechanisms, and fall-induced injuries [<xref ref-type="bibr" rid="ref14">14</xref>]. This process, however, is expert-dependent and time-consuming, particularly if the dataset is large. To address these obstacles, natural language processing (NLP), which combines computational linguistics and deep learning models to process narrative data, can be used to automate the review process of clinical notes to detect falls [<xref ref-type="bibr" rid="ref14">14</xref>].</p><p>Several studies have demonstrated the capability of supervised models to detect fall incidents, which have been documented in clinical notes [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. Although these models are effective at identifying fall events, they fall short of providing detailed insights into fall-related <italic>ICD</italic> codes that capture the specific mechanisms (eg, how the fall occurred) or the physical consequences (eg, the force of the impact) [<xref ref-type="bibr" rid="ref11">11</xref>]. Gaining a better understanding of these factors is essential for designing strategies to prevent falls since individuals who experience severe or high-impact falls often face a higher risk of recurrent falls and injuries [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. Tremblay et al [<xref ref-type="bibr" rid="ref11">11</xref>] highlighted the importance of studying fall mechanisms as a research priority. However, automated methods for extracting detailed fall mechanisms and their impact from clinical notes remain largely unexplored in the current literature.</p><p>This study aimed to assess the performance of NLP algorithms compared to conventional methods for detecting fall incidence and the mechanism of falls obtained from clinical notes of patients with hip fractures. We hypothesize that NLP algorithms outperform fall ICD codes in detecting falls and their mechanisms in patients with hip fractures.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design and Cohort</title><p>A retrospective case-control study was conducted, including the data from 4 tertiary hospitals in Greater Boston, Massachusetts. Data were retrieved from the institution&#x2019;s data repository using CPT codes for hip fractures (27125, 27130, 27226, 27228, 27235, 27236, 27244, 27245, and 27248) between January 2010 and December 2019.</p><p>Patients &#x2265;18 years old who were hospitalized because of hip fracture as a result of an outpatient fall (cases) or other reasons (controls) were included in the study. Falls resulting from violent encounters, animal attacks, significant external forces such as car or motor vehicle accidents, high-impact sports like skiing, and fractures caused by underlying pathological conditions were excluded to reduce the heterogeneity of fall mechanics. This exclusion helps avoid the influence of confounding injuries that differ significantly from typical accidental falls, ensuring that the study focuses on more clinically relevant fall types (). Given that the majority of hip fractures happen due to falls, we had a reasonable number of patients in the case group and included a single note for each patient. In contrast, multiple notes were reviewed and included per patient in the control group.</p></sec><sec id="s2-2"><title>Data Labeling</title><p>Expert annotations, serving as the ground truth for training the NLP models, were derived directly from clinical notes. The annotations embraced two specific tasks: (1) fall occurrence and (2) mechanism of falls (the way falls happened). One expert orthopedic researcher (AT) conducted the annotations, and the decisions for equivocal or debatable cases were made by a senior scientist (SAE). All clinical notes were evaluated in chronological order, starting from the date of the hip fracture CPT code. The first note documenting a fall was selected for analysis. A fall was defined as &#x201C;an unintentional event that results in the person coming to rest on the ground or another lower level&#x201D; [<xref ref-type="bibr" rid="ref19">19</xref>]. The mechanisms of fall were defined by 3 categories: same level (occurring on the same plane or surface), multilevel (descent from one level to a different one), and unclassified (not classifiable due to lack of sufficient information) [<xref ref-type="bibr" rid="ref20">20</xref>]. In rare cases, discrepancies between the documented fall mechanisms in the clinical notes and the corresponding fall <italic>ICD</italic> codes compromised the validity of comparisons between <italic>ICD</italic> and NLP-based approaches. Consequently, patients with conflicting information between clinical notes and ICD codes regarding the fall mechanism were excluded to ensure the integrity of the analysis ().</p></sec><sec id="s2-3"><title>Data Preprocessing</title><p>A variety of inpatient unstructured clinical notes, including history and physical examination, discharge summary, progress, operation, and emergency department notes, were obtained. Due to the diverse formatting of these clinical notes, specialized preprocessing methodologies were required, which diverged significantly from the conventional text-processing approaches. Following annotation, the clinical notes underwent various preprocessing steps, including de-identification, segmentation, and cleaning [<xref ref-type="bibr" rid="ref21">21</xref>]. The specific techniques used in preprocessing, which address the unique challenges posed by the clinical notes&#x2019; formatting, are outlined in <xref ref-type="table" rid="table1">Table 1</xref>. Detailed information about the segmentation process is provided in Tables S1 and S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. This detailed account ensures the data are optimally prepared for the subsequent analytical phases.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>An overview of the data preprocessing stages.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Stages</td><td align="left" valign="bottom">Tool or Method</td><td align="left" valign="bottom">Purpose</td><td align="left" valign="bottom">Output</td></tr></thead><tbody><tr><td align="left" valign="top">De-identification</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Stanford de-identifier</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Remove personal identifiers to ensure privacy and compliance with data protection regulations. This involves replacing all Protected Health Information entities with synthetic variants to maintain data integrity and eliminate biases. The model chosen was the Stanford-de-identifier-base-model developed by Chambon et al [<xref ref-type="bibr" rid="ref21">21</xref>], with an <italic>F</italic><sub>1</sub>-score of 98.9 on the I2b2 2014 test set [<xref ref-type="bibr" rid="ref22">22</xref>].</p></list-item></list></td><td align="left" valign="top">Anonymized text ready for analysis.</td></tr><tr><td align="left" valign="top">Segmentation</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Bespoke parser, Finite State Machine, and regular expressions</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Segment notes into distinct sections for enhanced text processing accuracy. The parser identifies section headings and concatenates segments, refined through manual evaluation and iterative improvements. More details are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></list-item></list></td><td align="left" valign="top">Accurately segmented text with sections tagged for reassembly.</td></tr><tr><td align="left" valign="top">Filtering uninformative data</td><td align="left" valign="top"><named-content content-type="#000000">Identification and removal:</named-content><list list-type="bullet"><list-item><p>Duplicates</p></list-item><list-item><p>Uninformative sections</p></list-item><list-item><p>Administrative content</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Remove duplicated sections from notes to prevent skewing results.</p></list-item><list-item><p>Discard sections containing only headings without informative text.</p></list-item><list-item><p>Remove document finalization and signature sections marked with terms like &#x201C;signed&#x201D; and &#x201C;FINAL.&#x201D;</p></list-item></list></td><td align="left" valign="top">Dataset free of redundant and uninformative sections.</td></tr><tr><td align="left" valign="top">Elimination of non-essential elements</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Regular expressions and manual filtering</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Exclude conversion error notifications, Unicode or hexadecimal sections, and other irrelevant elements.</p></list-item></list></td><td align="left" valign="top">Dataset without non-contributory headers, unreadable sections, and irregular patterns.</td></tr><tr><td align="left" valign="top">Removal of irrelevant metadata</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Manual filtering</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Remove timestamps, de-identified placeholders, and other non-analytical metadata.</p></list-item></list></td><td align="left" valign="top">Dataset without timestamps and placeholder text, ensuring grammatical consistency.</td></tr><tr><td align="left" valign="top">Splitting the data</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Random allocation</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Partition the dataset into training and testing subsets for unbiased model evaluation.</p></list-item></list></td><td align="left" valign="top">Training and testing subsets for model development and performance evaluation.</td></tr></tbody></table></table-wrap></sec><sec id="s2-4"><title>Model Development</title><p>Models were developed to automate two distinct tasks: fall occurrence and fall mechanism classification. All models besides Bidirectional Encoder Representations from Transformers (BERT) used a Term Frequency-Inverse Document Frequency (TF-IDF) representation of the text data. Specifically, TF-IDF vectorization with unigrams, bigrams, and trigrams (ngram_range=(1,3)) was applied to transform the processed text into numerical features before training these models. For the binary task of fall occurrence (fall vs no fall), a data split of 80:20 was used for training and testing purposes, respectively. The split was stratified by the binary outcome (fall vs no fall) to ensure a balanced representation of both classes in the training and testing subsets. Our methodology harnessed the text analysis capabilities of a modified BERT model described by Fu et al [<xref ref-type="bibr" rid="ref17">17</xref>] We used a maximum sequence length of 512 tokens, consistent with the recommendations in the original study by Devlin et al [<xref ref-type="bibr" rid="ref23">23</xref>], used a batch size of 8, and conducted training over 3 epochs. Moreover, the adaptive boosting (AdaBoost) algorithm was used for fall identification, using single-layer decision trees (stumps) as described by Quinlan et al [<xref ref-type="bibr" rid="ref23">23</xref>], [<xref ref-type="bibr" rid="ref24">24</xref>]. AdaBoost assigns coefficients based on each classifier&#x2019;s performance and adjusts sample weights during training to emphasize previously misclassified samples. Finally, extreme gradient boosting (XGBoost) was used, which is a refined version of gradient boosting recognized for its precision and versatility. XGBoost constructs additive training models in stages and optimizes a differentiable loss function, making it suitable for handling structured data derived from text [<xref ref-type="bibr" rid="ref25">25</xref>].</p><p>To address the challenges posed by the complex multiclass scenario in the fall mechanism classification task, which involved detailed classification into 3 categories (same level, multilevel, and unclassified classes), we designated 70% of the data for training and 30% for testing, ensuring stratification to maintain class distribution. We used a comprehensive suite of advanced machine learning models, including AdaBoost, support vector machine (SVM), XGBoost, and random forest (RF). Each model was chosen for its proven ability to decipher complex data relationships and offer detailed insights into the correlated factors of falls across the varied categories [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref31">31</xref>]. The SVM model is a two-layer recognition method that excels in high-dimensional spaces and allows for class weighting to address class imbalance, which makes it suitable for detecting fall mechanisms from clinical notes [<xref ref-type="bibr" rid="ref32">32</xref>] RF is an ensemble learning method that constructs multiple decision trees during training and merges their results to improve predictive accuracy and control overfitting. RF is also effective in handling class imbalance through class weighting [<xref ref-type="bibr" rid="ref33">33</xref>].</p><p>The hyperparameter configurations used for the models are provided in Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-5"><title>Statistical Analysis</title><p>Comparison of the baseline characteristics was made using SPSS software (version 28.0; IBM Corp), where the <italic>t</italic> and chi-square tests were used for continuous and categorical data, respectively. Several metrics were used to evaluate the models&#x2019; performance in identifying and classifying falls. These metrics included sensitivity, specificity, <italic>F</italic><sub>1</sub>-score, positive predictive value (PPV), negative predictive value (NPV), accuracy, and area under the receiver operating characteristic curve (AUC-ROC). A weighted-averaging approach was used for multiclass classifications to report the overall model performance [<xref ref-type="bibr" rid="ref34">34</xref>]. Furthermore, the percentage of the notes correctly classified for each task by machine learning and ICD approach were calculated and compared through chi-square test. A 0.05 type 1 error probability was considered significant.</p></sec><sec id="s2-6"><title>Ethical Considerations</title><p>The study protocol was approved by Mass General Brigham Institutional Review Board (number 2023P000741). The board waived participant consent due to the retrospective nature of the study. All the notes were de-identified in the preprocessing stage to avoid the inclusion of any protected health information (PHI) and to ensure patient privacy and compliance with HIPAA regulations.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>A total of 1,769 clinical notes were analyzed for the fall occurrence task. Of these, 791 notes corresponded to the case group (one note per patient, n=791), and 978 notes were from the control group (representing 317 individuals with multiple notes per individual) (<xref ref-type="fig" rid="figure1">Figure 1</xref>). Moreover, for the fall mechanism classification task, 783 notes (one note per patient, n=783) were included, comprising 511 same-level falls, 151 multilevel falls, and 121 unclassified falls. The case group comprised older individuals with a mean age of 77.7 (SD 14.3) years versus 65.3 (SD 19.6) years of the control group (<italic>P</italic>&#x003C;.001; <xref ref-type="table" rid="table2">Table 2</xref>). Furthermore, although both groups had a higher proportion of females, the case group had a notably higher percentage of female patients than the control group (<italic>P</italic>=.01; <xref ref-type="table" rid="table2">Table 2</xref>).</p><p>All 3 models performed well for detecting fall occurrences, with the BERT model showing a lower <italic>F</italic><sub>1</sub>-score and AUC-ROC (<xref ref-type="table" rid="table3">Table 3</xref>, <xref ref-type="fig" rid="figure2">Figure 2</xref>). The models could successfully classify a significant portion of patient notes (XGBoost=97%, AdaBoost=98%) as opposed to the <italic>ICD</italic> approach, which could find 26% of them (<italic>P</italic>&#x003C;.001; <xref ref-type="table" rid="table4">Table 4</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Study population flowchart. RPDR: Research Patient Data Registry.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e66973_fig01.png"/></fig><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Comparison of the baseline characteristics of the study groups.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Group</td><td align="left" valign="bottom">Age (years), mean (SD)</td><td align="left" valign="bottom">Gender (female), n (%)</td><td align="left" valign="bottom">Race (White), n (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Fall (n=791)</td><td align="left" valign="top">77.7 (14.3)</td><td align="left" valign="top">520 (65.7)</td><td align="left" valign="top">700 (88.5)</td></tr><tr><td align="left" valign="top">No fall (n=317)</td><td align="left" valign="top">65.3 (19.6)</td><td align="left" valign="top">183 (57.7)</td><td align="left" valign="top">278 (87.7)</td></tr><tr><td align="left" valign="top"><italic>P</italic> value</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">.01</td><td align="left" valign="top">.61<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Based on the comparison between the White and non-White races.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>The performance metrics of the study models for detection of fall occurrence and fall mechanism classification. Algorithms were trained on an expert annotated database.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Outcomes and models</td><td align="left" valign="bottom">PPV<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="bottom">NPV<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="bottom">Sensitivity</td><td align="left" valign="bottom">Specificity</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">Accuracy</td><td align="left" valign="bottom">AUC-ROC<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="8">Fall occurrence detection</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>BERT<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="top">0.94</td><td align="left" valign="top">0.88</td><td align="left" valign="top">0.84</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.88</td><td align="left" valign="top">0.90</td><td align="left" valign="top">0.97</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>AdaBoost<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.99</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.99</td></tr><tr><td align="left" valign="top" colspan="8">Fall mechanism classification<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SVM<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup></td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.50</td><td align="left" valign="top">0.62</td><td align="left" valign="top">0.36</td><td align="left" valign="top">0.57</td><td align="left" valign="top">0.62</td><td align="left" valign="top">0.67</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>AdaBoost</td><td align="left" valign="top">0.55</td><td align="left" valign="top">0.43</td><td align="left" valign="top">0.60</td><td align="left" valign="top">0.39</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.60</td><td align="left" valign="top">0.61</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost</td><td align="left" valign="top">0.60</td><td align="left" valign="top">0.51</td><td align="left" valign="top">0.62</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.61</td><td align="left" valign="top">0.62</td><td align="left" valign="top">0.65</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>RF<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup></td><td align="left" valign="top">0.60</td><td align="left" valign="top">0.52</td><td align="left" valign="top">0.65</td><td align="left" valign="top">0.35</td><td align="left" valign="top">0.60</td><td align="left" valign="top">0.65</td><td align="left" valign="top">0.70</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>PPV: positive predictive value.</p></fn><fn id="table3fn2"><p><sup>b</sup>NPV: negative predictive value.</p></fn><fn id="table3fn3"><p><sup>c</sup>AUC-ROC: area under the receiver operating characteristic curve.</p></fn><fn id="table3fn4"><p><sup>d</sup>BERT: Bidirectional Encoder Representations from Transformers.</p></fn><fn id="table3fn5"><p><sup>e</sup>AdaBoost: adaptive boosting.</p></fn><fn id="table3fn6"><p><sup>f</sup>XGBoost: extreme gradient boosting.</p></fn><fn id="table3fn7"><p><sup>g</sup>Weighted metrics are presented.</p></fn><fn id="table3fn8"><p><sup>h</sup>SVM: support vector machine.</p></fn><fn id="table3fn9"><p><sup>i</sup>RF: random forest.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Receiver operating characteristic curve for the fall occurrence detection task. AdaBoost: adaptive boosting; BERT: Bidrectional Encoder Representations from Transformers; ROC: receiver operating characteristic; XGBoost: extreme gradient boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e66973_fig02.png"/></fig><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Percentage of fall notes correctly classified by natural language processing approach versus <italic>International Classification of Diseases</italic> codes.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Model</td><td align="left" valign="bottom" rowspan="2">Fall occurrence</td><td align="left" valign="bottom" colspan="4">Fall mechanism</td></tr><tr><td align="left" valign="bottom">Overall</td><td align="left" valign="bottom">Class S<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="bottom">Class M<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="left" valign="bottom">Class U<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top"><italic>ICD</italic><sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="left" valign="top">26%</td><td align="left" valign="top">12%</td><td align="left" valign="top">8.4%</td><td align="left" valign="top">15.2%</td><td align="left" valign="top">22.2%</td></tr><tr><td align="left" valign="top">BERT<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="left" valign="top">84%</td><td align="left" valign="top">&#x2013;<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td><td align="left" valign="top">&#x2013;</td><td align="left" valign="top">&#x2013;</td><td align="left" valign="top">&#x2013;</td></tr><tr><td align="left" valign="top">AdaBoost<sup><xref ref-type="table-fn" rid="table4fn7">g</xref></sup></td><td align="left" valign="top">98%</td><td align="left" valign="top">60%</td><td align="left" valign="top">82%</td><td align="left" valign="top">26.1%</td><td align="left" valign="top">11%</td></tr><tr><td align="left" valign="top">XGBoost<sup><xref ref-type="table-fn" rid="table4fn8">h</xref></sup></td><td align="left" valign="top">97%</td><td align="left" valign="top">62%</td><td align="left" valign="top">78%</td><td align="left" valign="top">37%</td><td align="left" valign="top">28%</td></tr><tr><td align="left" valign="top">SVM<sup><xref ref-type="table-fn" rid="table4fn9">i</xref></sup></td><td align="left" valign="top">&#x2013;</td><td align="left" valign="top">62%</td><td align="left" valign="top">87%</td><td align="left" valign="top">17.4%</td><td align="left" valign="top">14%</td></tr><tr><td align="left" valign="top">RF<sup><xref ref-type="table-fn" rid="table4fn10">j</xref></sup></td><td align="left" valign="top">&#x2013;</td><td align="left" valign="top">65%</td><td align="left" valign="top">88.3%</td><td align="left" valign="top">15.2%</td><td align="left" valign="top">28%</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>Class S: same-level.</p></fn><fn id="table4fn2"><p><sup>b</sup>Class M: multi-level.</p></fn><fn id="table4fn3"><p><sup>c</sup>Class U: unclassified.</p></fn><fn id="table4fn4"><p><sup>d</sup><italic>ICD</italic>: <italic>International Classification of Diseases</italic>.</p></fn><fn id="table4fn5"><p><sup>e</sup>BERT: Bidirectional Encoder Representations from Transformers.</p></fn><fn id="table4fn6"><p><sup>f</sup>Not available.</p></fn><fn id="table4fn7"><p><sup>g</sup>AdaBoost: adaptive boosting.</p></fn><fn id="table4fn8"><p><sup>h</sup>XGBoost: extreme gradient boosting</p></fn><fn id="table4fn9"><p><sup>i</sup>SVM: support vector machine.</p></fn><fn id="table4fn10"><p><sup>j</sup> RF: random forest.</p></fn></table-wrap-foot></table-wrap><p>Regarding fall mechanism classification, the RF model slightly outperformed the others with an AUC-ROC of 0.70 and an <italic>F</italic><sub>1</sub>-score of 0.60 (<xref ref-type="table" rid="table3">Table 3</xref>, <xref ref-type="fig" rid="figure3">Figure 3</xref>). Moreover, the RF model correctly classified fall mechanism in 65% of the fall notes compared to the 12% of the <italic>ICD</italic> method (<italic>P</italic>&#x003C;.001, <xref ref-type="table" rid="table4">Table 4</xref>.). However, all 4 NLP models showed high classification performance in identifying small-level class falls only (<xref ref-type="table" rid="table4">Table 4</xref>).</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Receiver operating characteristics curve for the fall mechanism.AdaBoost: adaptive boosting; ROC: receiver operating characteristic; SVM: support vector machine; XGBoost: extreme gradient boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e66973_fig03.png"/></fig></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>This study aimed to automate fall identification and classification based on its mechanism from clinical notes and subsequently compare the results with the traditional <italic>ICD</italic> approach for building fall registries. Our results demonstrated the superior performance of NLP models, which correctly identified 98% of the notes for fall occurrence compared to the 26% detected by the <italic>ICD</italic> approach. Furthermore, the models could classify 65% of fall mechanisms, while the ICD approach detected 12% of these cases.</p><p>Automated identification of fall incidents from clinical notes is an emerging topic in biomedical sciences. It serves multiple purposes, such as insurance claim processing, cost analysis for falls, and enhancing fall prevention measures for inpatient safety [<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]. Despite these varied objectives, there are commonalities in the methodologies and models used. However, the interpretation of results can vary significantly and must be tailored to the specific study goals. Cheligeer et al [<xref ref-type="bibr" rid="ref38">38</xref>] highlighted the superior performance of BERT and machine learning models in detecting inpatient falls compared to traditional <italic>ICD</italic> coding. Their findings underscored these models&#x2019; ability to accurately identify nonfall cases, as evidenced by high NPV and specificity. Nevertheless, when aiming to develop a comprehensive registry, achieving optimal sensitivity to maximize the inclusion of fall patients, alongside a high <italic>F</italic><sub>1</sub>-score to balance PPV and sensitivity, becomes crucial.</p><p>Classical machine learning methods are commonly used in fall classification studies. Luther et al developed an SVM model using free-text clinical notes and a term-document matrix for feature selection, achieving an <italic>F</italic><sub>1</sub>-score of 0.87 [<xref ref-type="bibr" rid="ref39">39</xref>]. Our study extends this by using a TF-IDF feature selection method, which weighs terms based on their importance in capturing nuanced information from the notes. We found that ensemble methods achieved optimal performance with an <italic>F</italic><sub>1</sub>-score of up to 0.98. Santos et al demonstrated superior performance of neural networks over classical machine learning methods [<xref ref-type="bibr" rid="ref40">40</xref>]. This finding is supported by Fu et al, who showed high performance of context-aware models like BERT in fall detection tasks [<xref ref-type="bibr" rid="ref17">17</xref>]. However, in our study, BERT did not outperform other machine learning models. BERT&#x2019;s effectiveness depends on the availability of sufficient training data due to its deep learning architecture [<xref ref-type="bibr" rid="ref41">41</xref>]. Therefore, the sample size in our study may have influenced the effectiveness of training within this framework.</p><p>Identifying fall mechanisms from patient records presents a significant challenge, which, if addressed properly, can provide invaluable information for clinical and quality improvement purposes. Roudsari et al investigated the acute cost of care for falls in patients over 65 years of age, categorized by <italic>ICD</italic> codes for mechanisms [<xref ref-type="bibr" rid="ref13">13</xref>]. They found that same-level falls were the most common mechanism of injury (28%). However, most falls (60%) were coded as unspecified falls without mentioning the mechanism. In our study, only 11% of the notes were coded specifically for falls, and surprisingly, there were occasional discrepancies between the coded mechanisms and those described in clinical notes. Whether this discrepancy stems from insufficient clinical information or a tendency among providers to prioritize documenting immediate medical needs requires further investigation. Relying solely on medical coding is unreliable for identifying fall mechanisms.</p><p>While NLP has shown promise in retrieving data from medical records, its application in fall mechanism extraction remains underexplored. Liu et al automated the extraction of inpatient fall severity from incident reports, leveraging structured features to improve the <italic>F</italic><sub>1</sub>-score by 8%, achieving 0.78 [<xref ref-type="bibr" rid="ref22">22</xref>]. Our study incorporated diverse types of unstructured clinical notes, including discharge summaries and progress notes. These notes were authored by various medical professionals with differing styles and descriptions of falls, introducing significant variability that posed challenges for extracting features. Our results indicated that the XGBoost and RF models achieved the highest <italic>F</italic><sub>1</sub>-scores (0.6). These findings are consistent with previous research demonstrating improved disease classification accuracy using ensemble methods applied to medical notes [<xref ref-type="bibr" rid="ref22">22</xref>]. Additionally, using ensemble methods, Albano et al have shown promise in enhancing the classification accuracy when dealing with rare classes [<xref ref-type="bibr" rid="ref42">42</xref>]. However, our study revealed suboptimal performance of the models in managing the &#x201C;multilevel&#x201D; and &#x201C;unclassified&#x201D; subclasses, likely due to the overall limited number of notes available for these classes. Although reflective of real-life scenarios, the imbalance in fall mechanism classes may have impacted the performance of the models. Ensemble models like XGBoost and RF are prone to overfitting patterns in the training data, especially when managing imbalanced datasets. Similarly, even after fine-tuning, BERT may carry over biases from its general-purpose pretraining, limiting its ability to capture domain-specific nuances in clinical notes fully. To address these challenges, we applied weighted evaluation metrics to ensure a fair performance assessment across all classes. Additionally, hyperparameters were systematically optimized to mitigate class imbalance, and BERT was fine-tuned explicitly on clinical notes to enhance its applicability to the domain. However, relying on weighted metrics and fine-tuning may not entirely overcome the inherent limitations of dataset imbalance and pretraining biases. Future work should focus on augmenting the dataset to improve class balance and explore alternative architectures or pretraining strategies to reduce bias and overfitting.</p><p>Different approaches can be adopted for planning health care registries based on the registry&#x2019;s purpose, target population, and source data structure [<xref ref-type="bibr" rid="ref43">43</xref>]. Administrative codes are commonly used to build retrospective registries when using health records. However, the accuracy of this method is not universally reliable across all medical conditions [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. For example, a study by Dal et al evaluated the accuracy of the <italic>ICD</italic>-based Danish National Registry of Patients in identifying individuals with acromegaly, reporting a PPV of only 54.2% (CI 48.3&#x2010;60) compared to expert-confirmed diagnoses [<xref ref-type="bibr" rid="ref46">46</xref>]. Similarly, <italic>ICD</italic> codes for falls are often inconsistently applied, making them an unreliable sole method for identifying fall incidents. Our results highlight the potential of automated clinical note screening using NLP as an alternative for building registries. However, NLP can be computationally intensive due to the broad scope of falls, which spans diverse patient populations and clinical scenarios. This study proposes a combined approach using administrative codes related to fall conditions as a prescreening step to narrow the dataset, followed by NLP-based automated screening of clinical notes. This strategy balances computational efficiency with improved accuracy in registry development. Furthermore, this approach offers the advantage of extracting additional clinical details, such as the fall mechanism, which are often unavailable in administrative codes but crucial for understanding and preventing falls [<xref ref-type="bibr" rid="ref47">47</xref>].</p><p>Although this study made important strides in developing fall registries, there are a few areas for improvement. The sample size was adequate for a retrospective analysis; however, larger and more diverse populations would enhance the robustness of machine learning models. Additionally, our dataset was predominantly composed of individuals of White race, reflecting the demographic characteristics of the region. This provides an opportunity to expand the research to include more diverse groups. We also acknowledge recent advancements in data preprocessing, including automated entity resolution and noise handling, which can be used in future studies to enhance robustness and scalability [<xref ref-type="bibr" rid="ref48">48</xref>]. To address these issues, our future efforts will be focused on external validation, incorporating broader and more representative populations to improve the generalizability and impact of the findings.</p><p>In conclusion, our findings demonstrated a promising performance of NLP methods in identifying patients with a history of falls and hip fractures and their fall mechanisms from clinical notes. This approach can significantly enhance the accuracy and efficiency of developing fall registries. Moreover, the models were particularly effective in classifying the mechanisms of falls in patients who experienced same-level falls. Future studies with larger sample sizes and a broader spectrum of pathologies can further validate these findings and address the class imbalance issue. If well-expanded and developed, our approach can be introduced to the health care systems as an efficient and cost-effective approach for developing valid and reliable registry systems of diseases or clinical conditions that greatly burden the health care systems and the patients.</p></sec></body><back><ack><p>We gratefully acknowledge the patients whose clinical data served as the foundation for this research, enabling us to advance the field of automated fall detection.</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AdaBoost</term><def><p>adaptive boosting</p></def></def-item><def-item><term id="abb2">AUC-ROC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb3">BERT</term><def><p>Bidirectional Encoder Representations from Transformers</p></def></def-item><def-item><term id="abb4">CPT</term><def><p>Current Procedural Terminology</p></def></def-item><def-item><term id="abb5"><italic>ICD</italic></term><def><p><italic>International Classification of Diseases</italic></p></def></def-item><def-item><term id="abb6">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb7">NPV</term><def><p>negative predictive value</p></def></def-item><def-item><term id="abb8">PPV</term><def><p>positive predictive value</p></def></def-item><def-item><term id="abb9">RF</term><def><p>random forest</p></def></def-item><def-item><term id="abb10">SVM</term><def><p>support vector machine</p></def></def-item><def-item><term id="abb11">TF-IDF</term><def><p>Term Frequency-Inverse Document Frequency</p></def></def-item><def-item><term id="abb12">XGBoost</term><def><p>extreme gradient boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bergen</surname><given-names>G</given-names> </name><name name-style="western"><surname>Stevens</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Burns</surname><given-names>ER</given-names> </name></person-group><article-title>Falls and fall injuries among adults aged &#x2265;65 years - United States, 2014</article-title><source>MMWR Morb Mortal Wkly Rep</source><year>2016</year><month>09</month><day>23</day><volume>65</volume><issue>37</issue><fpage>993</fpage><lpage>998</lpage><pub-id pub-id-type="doi">10.15585/mmwr.mm6537a2</pub-id><pub-id pub-id-type="medline">27656914</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moreland</surname><given-names>B</given-names> </name><name name-style="western"><surname>Kakara</surname><given-names>R</given-names> </name><name name-style="western"><surname>Henry</surname><given-names>A</given-names> </name></person-group><article-title>Trends in nonfatal falls and fall-related injuries among adults aged &#x2265;65 years - United States, 2012-2018</article-title><source>MMWR Morb Mortal Wkly Rep</source><year>2020</year><month>07</month><day>10</day><volume>69</volume><issue>27</issue><fpage>875</fpage><lpage>881</lpage><pub-id pub-id-type="doi">10.15585/mmwr.mm6927a5</pub-id><pub-id pub-id-type="medline">32644982</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><article-title>Cost of older adult falls</article-title><source>US Centers for Disease Control and Prevention</source><access-date>2024-07-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://stacks.cdc.gov/view/cdc/122747">https://stacks.cdc.gov/view/cdc/122747</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Florence</surname><given-names>CS</given-names> </name><name name-style="western"><surname>Bergen</surname><given-names>G</given-names> </name><name name-style="western"><surname>Atherly</surname><given-names>A</given-names> </name><name name-style="western"><surname>Burns</surname><given-names>E</given-names> </name><name name-style="western"><surname>Stevens</surname><given-names>J</given-names> </name><name name-style="western"><surname>Drake</surname><given-names>C</given-names> </name></person-group><article-title>The medical costs of fatal falls and fall injuries among older adults</article-title><source>J Am Geriatr Soc</source><year>2018</year><month>04</month><volume>66</volume><issue>4</issue><fpage>693</fpage><lpage>698</lpage><pub-id pub-id-type="doi">10.1111/jgs.15304</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berg</surname><given-names>GM</given-names> </name><name name-style="western"><surname>Carlson</surname><given-names>T</given-names> </name><name name-style="western"><surname>Fairchild</surname><given-names>J</given-names> </name><name name-style="western"><surname>Edwards</surname><given-names>C</given-names> </name><name name-style="western"><surname>Sorell</surname><given-names>R</given-names> </name></person-group><article-title>Development of a falls registry: a pilot study</article-title><source>J Trauma Nurs</source><year>2017</year><volume>24</volume><issue>4</issue><fpage>224</fpage><lpage>230</lpage><pub-id pub-id-type="doi">10.1097/JTN.0000000000000295</pub-id><pub-id pub-id-type="medline">28692616</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Trotter</surname><given-names>JP</given-names> </name></person-group><article-title>Patient registries: a new gold standard for &#x201C;real world&#x201D; research</article-title><source>Ochsner J</source><year>2002</year><volume>4</volume><issue>4</issue><fpage>211</fpage><lpage>214</lpage><pub-id pub-id-type="medline">22826660</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schmidt</surname><given-names>M</given-names> </name><name name-style="western"><surname>Schmidt</surname><given-names>SAJ</given-names> </name><name name-style="western"><surname>Sandegaard</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Ehrenstein</surname><given-names>V</given-names> </name><name name-style="western"><surname>Pedersen</surname><given-names>L</given-names> </name><name name-style="western"><surname>S&#x00F8;rensen</surname><given-names>HT</given-names> </name></person-group><article-title>The Danish National Patient Registry: a review of content, data quality, and research potential</article-title><source>Clin Epidemiol</source><year>2015</year><volume>7</volume><fpage>449</fpage><lpage>490</lpage><pub-id pub-id-type="doi">10.2147/CLEP.S91125</pub-id><pub-id pub-id-type="medline">26604824</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khorgami</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Fleischer</surname><given-names>WJ</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>YJA</given-names> </name><name name-style="western"><surname>Mushtaq</surname><given-names>N</given-names> </name><name name-style="western"><surname>Charles</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Howard</surname><given-names>CA</given-names> </name></person-group><article-title>Ten-year trends in traumatic injury mechanisms and outcomes: a trauma registry analysis</article-title><source>Am J Surg</source><year>2018</year><month>04</month><volume>215</volume><issue>4</issue><fpage>727</fpage><lpage>734</lpage><pub-id pub-id-type="doi">10.1016/j.amjsurg.2018.01.008</pub-id><pub-id pub-id-type="medline">29397887</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Unguryanu</surname><given-names>TN</given-names> </name><name name-style="western"><surname>Grjibovski</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Trovik</surname><given-names>TA</given-names> </name><name name-style="western"><surname>Ytterstad</surname><given-names>B</given-names> </name><name name-style="western"><surname>Kudryavtsev</surname><given-names>AV</given-names> </name></person-group><article-title>Mechanisms of accidental fall injuries and involved injury factors: a registry-based study</article-title><source>Inj Epidemiol</source><year>2020</year><month>03</month><day>16</day><volume>7</volume><issue>1</issue><fpage>8</fpage><pub-id pub-id-type="doi">10.1186/s40621-020-0234-7</pub-id><pub-id pub-id-type="medline">32172689</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sumrein</surname><given-names>BO</given-names> </name><name name-style="western"><surname>Huttunen</surname><given-names>TT</given-names> </name><name name-style="western"><surname>Launonen</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Berg</surname><given-names>HE</given-names> </name><name name-style="western"><surname>Fell&#x00E4;nder-Tsai</surname><given-names>L</given-names> </name><name name-style="western"><surname>Mattila</surname><given-names>VM</given-names> </name></person-group><article-title>Proximal humeral fractures in Sweden-a registry-based study</article-title><source>Osteoporos Int</source><year>2017</year><month>03</month><volume>28</volume><issue>3</issue><fpage>901</fpage><lpage>907</lpage><pub-id pub-id-type="doi">10.1007/s00198-016-3808-z</pub-id><pub-id pub-id-type="medline">27787593</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tremblay</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Berndt</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Luther</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Foulis</surname><given-names>PR</given-names> </name><name name-style="western"><surname>French</surname><given-names>DD</given-names> </name></person-group><article-title>Identifying fall-related injuries: text mining the electronic medical record</article-title><source>Inf Technol Manag</source><year>2009</year><month>12</month><volume>10</volume><issue>4</issue><fpage>253</fpage><lpage>265</lpage><pub-id pub-id-type="doi">10.1007/s10799-009-0061-6</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jensen</surname><given-names>PB</given-names> </name><name name-style="western"><surname>Jensen</surname><given-names>LJ</given-names> </name><name name-style="western"><surname>Brunak</surname><given-names>S</given-names> </name></person-group><article-title>Mining electronic health records: towards better research applications and clinical care</article-title><source>Nat Rev Genet</source><year>2012</year><month>05</month><day>2</day><volume>13</volume><issue>6</issue><fpage>395</fpage><lpage>405</lpage><pub-id pub-id-type="doi">10.1038/nrg3208</pub-id><pub-id pub-id-type="medline">22549152</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Roudsari</surname><given-names>BS</given-names> </name><name name-style="western"><surname>Ebel</surname><given-names>BE</given-names> </name><name name-style="western"><surname>Corso</surname><given-names>PS</given-names> </name><name name-style="western"><surname>Molinari</surname><given-names>NAM</given-names> </name><name name-style="western"><surname>Koepsell</surname><given-names>TD</given-names> </name></person-group><article-title>The acute medical care costs of fall-related injuries among the U.S. older adults</article-title><source>Injury</source><year>2005</year><month>11</month><volume>36</volume><issue>11</issue><fpage>1316</fpage><lpage>1322</lpage><pub-id pub-id-type="doi">10.1016/j.injury.2005.05.024</pub-id><pub-id pub-id-type="medline">16214476</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Gliklich</surname><given-names>RE</given-names> </name><name name-style="western"><surname>Leavy</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Dreyer</surname><given-names>NA</given-names> </name></person-group><source>Registries for Evaluating Patient Outcomes: A User&#x2019;s Guide</source><year>2020</year><publisher-name>Agency for Healthcare Research and Quality (AHRQ)</publisher-name><pub-id pub-id-type="doi">10.23970/AHRQEPCREGISTRIES4</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Patterson</surname><given-names>BW</given-names> </name><name name-style="western"><surname>Jacobsohn</surname><given-names>GC</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>MN</given-names> </name><etal/></person-group><article-title>Development and validation of a pragmatic natural language processing approach to identifying falls in older adults in the emergency department</article-title><source>BMC Med Inform Decis Mak</source><year>2019</year><month>07</month><day>22</day><volume>19</volume><issue>1</issue><fpage>138</fpage><pub-id pub-id-type="doi">10.1186/s12911-019-0843-7</pub-id><pub-id pub-id-type="medline">31331322</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shiner</surname><given-names>B</given-names> </name><name name-style="western"><surname>Neily</surname><given-names>J</given-names> </name><name name-style="western"><surname>Mills</surname><given-names>PD</given-names> </name><name name-style="western"><surname>Watts</surname><given-names>BV</given-names> </name></person-group><article-title>Identification of inpatient falls using automated review of text-based medical records</article-title><source>J Patient Saf</source><year>2020</year><month>09</month><volume>16</volume><issue>3</issue><fpage>e174</fpage><lpage>e178</lpage><pub-id pub-id-type="doi">10.1097/PTS.0000000000000275</pub-id><pub-id pub-id-type="medline">27331601</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Thorsteinsdottir</surname><given-names>B</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><etal/></person-group><article-title>A hybrid model to identify fall occurrence from electronic health records</article-title><source>Int J Med Inform</source><year>2022</year><month>03</month><day>7</day><volume>162</volume><fpage>104736</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2022.104736</pub-id><pub-id pub-id-type="medline">35316697</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leucht</surname><given-names>P</given-names> </name><name name-style="western"><surname>Fischer</surname><given-names>K</given-names> </name><name name-style="western"><surname>Muhr</surname><given-names>G</given-names> </name><name name-style="western"><surname>Mueller</surname><given-names>EJ</given-names> </name></person-group><article-title>Epidemiology of traumatic spine fractures</article-title><source>Injury</source><year>2009</year><month>02</month><volume>40</volume><issue>2</issue><fpage>166</fpage><lpage>172</lpage><pub-id pub-id-type="doi">10.1016/j.injury.2008.06.040</pub-id><pub-id pub-id-type="medline">19233356</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="book"><person-group person-group-type="author"><collab>Institute of Medicine (US) Division of Health Promotion and Disease Prevention</collab><name name-style="western"><surname>Berg</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Cassells</surname><given-names>JS</given-names> </name></person-group><article-title>Falls in older persons: risk factors and prevention</article-title><source>The Second Fifty Years: Promoting Health and Preventing Disability, National Academies Press (US)</source><year>1992</year><access-date>2024-07-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/books/NBK235613">https://www.ncbi.nlm.nih.gov/books/NBK235613</ext-link></comment></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sterling</surname><given-names>DA</given-names> </name><name name-style="western"><surname>O'Connor</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Bonadies</surname><given-names>J</given-names> </name><name name-style="western"><surname>O&#x2019;Connor</surname><given-names>JA</given-names> </name></person-group><article-title>Geriatric falls: injury severity is high and disproportionate to mechanism</article-title><source>The Journal of Trauma: Injury, Infection, and Critical Care</source><year>2001</year><month>01</month><volume>50</volume><issue>1</issue><fpage>116</fpage><lpage>119</lpage><pub-id pub-id-type="doi">10.1097/00005373-200101000-00021</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chambon</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Steinkamp</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Adleberg</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cook</surname><given-names>TS</given-names> </name><name name-style="western"><surname>Langlotz</surname><given-names>CP</given-names> </name></person-group><article-title>Automated deidentification of radiology reports combining transformer and &#x201C;hide in plain sight&#x201D; rule-based methods</article-title><source>J Am Med Inform Assoc</source><year>2023</year><month>01</month><day>18</day><volume>30</volume><issue>2</issue><fpage>318</fpage><lpage>328</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocac219</pub-id><pub-id pub-id-type="medline">36416419</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>ZSY</given-names> </name><name name-style="western"><surname>So</surname><given-names>HY</given-names> </name><name name-style="western"><surname>Tsui</surname><given-names>KL</given-names> </name></person-group><article-title>Evaluating resampling methods and structured features to improve fall incident report identification by the severity level</article-title><source>J Am Med Inform Assoc</source><year>2021</year><month>07</month><day>30</day><volume>28</volume><issue>8</issue><fpage>1756</fpage><lpage>1764</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocab048</pub-id><pub-id pub-id-type="medline">34010385</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Burstein</surname><given-names>J</given-names> </name><name name-style="western"><surname>Doran</surname><given-names>C</given-names> </name><name name-style="western"><surname>Solorio</surname><given-names>T</given-names> </name></person-group><article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title><year>2019</year><month>06</month><conf-name>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name><publisher-name>Association for Computational Linguistics</publisher-name><fpage>4171</fpage><lpage>4186</lpage><pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Quinlan</surname><given-names>JR</given-names> </name></person-group><article-title>Induction of decision trees</article-title><source>Mach Learn</source><year>1986</year><month>03</month><volume>1</volume><issue>1</issue><fpage>81</fpage><lpage>106</lpage><pub-id pub-id-type="doi">10.1007/BF00116251</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Freund</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Schapire</surname><given-names>RE</given-names> </name></person-group><article-title>A decision-theoretic generalization of on-line learning and an application to boosting</article-title><source>J Comput Syst Sci</source><year>1997</year><month>08</month><volume>55</volume><issue>1</issue><fpage>119</fpage><lpage>139</lpage><pub-id pub-id-type="doi">10.1006/jcss.1997.1504</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zi&#x0119;ba</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tomczak</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Tomczak</surname><given-names>JM</given-names> </name></person-group><article-title>Ensemble boosted trees with synthetic features generation in application to bankruptcy prediction</article-title><source>Expert Syst Appl</source><year>2016</year><month>10</month><volume>58</volume><fpage>93</fpage><lpage>101</lpage><pub-id pub-id-type="doi">10.1016/j.eswa.2016.04.001</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Guestrin</surname><given-names>C</given-names> </name></person-group><article-title>XGBoost: a scalable tree boosting system</article-title><year>2016</year><month>08</month><conf-name>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, in KDD &#x2019;16</conf-name><publisher-name>Association for Computing Machinery</publisher-name><fpage>785</fpage><lpage>794</lpage><pub-id pub-id-type="doi">10.1145/2939672.2939785</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Jung</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Classification of short single-lead electrocardiograms (ECGs) for atrial fibrillation detection using piecewise linear spline and XGBoost</article-title><source>Physiol Meas</source><year>2018</year><month>10</month><day>24</day><volume>39</volume><issue>10</issue><fpage>104006</fpage><pub-id pub-id-type="doi">10.1088/1361-6579/aadf0f</pub-id><pub-id pub-id-type="medline">30183685</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Torlay</surname><given-names>L</given-names> </name><name name-style="western"><surname>Perrone-Bertolotti</surname><given-names>M</given-names> </name><name name-style="western"><surname>Thomas</surname><given-names>E</given-names> </name><name name-style="western"><surname>Baciu</surname><given-names>M</given-names> </name></person-group><article-title>Machine learning&#x2013;XGBoost analysis of language networks to classify patients with epilepsy</article-title><source>Brain Inf</source><year>2017</year><month>09</month><volume>4</volume><issue>3</issue><fpage>159</fpage><lpage>169</lpage><pub-id pub-id-type="doi">10.1007/s40708-017-0065-7</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Rosendael</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Maliakal</surname><given-names>G</given-names> </name><name name-style="western"><surname>Kolli</surname><given-names>KK</given-names> </name><etal/></person-group><article-title>Maximization of the usage of coronary CTA derived plaque information using a machine learning based algorithm to improve risk stratification; insights from the CONFIRM registry</article-title><source>J Cardiovasc Comput Tomogr</source><year>2018</year><month>05</month><volume>12</volume><issue>3</issue><fpage>204</fpage><lpage>209</lpage><pub-id pub-id-type="doi">10.1016/j.jcct.2018.04.011</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Alizadehsani</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hosseini</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Sani</surname><given-names>ZA</given-names> </name><name name-style="western"><surname>Ghandeharioun</surname><given-names>A</given-names> </name><name name-style="western"><surname>Boghrati</surname><given-names>R</given-names> </name></person-group><article-title>Diagnosis of coronary artery disease using cost-sensitive algorithms</article-title><source>Proceedings of 2012 IEEE 12th International Conference on Data Mining Workshops</source><year>2012</year><fpage>9</fpage><lpage>16</lpage><pub-id pub-id-type="doi">10.1109/ICDMW.2012.29</pub-id><pub-id pub-id-type="other">978-1-4673-5164-5</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pan</surname><given-names>D</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Qu</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Z</given-names> </name></person-group><article-title>Human falling detection algorithm based on multisensor data fusion with SVM</article-title><source>Mobile Information Systems</source><year>2020</year><month>10</month><day>31</day><volume>2020</volume><issue>1</issue><fpage>1</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1155/2020/8826088</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Parmar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Katariya</surname><given-names>R</given-names> </name><name name-style="western"><surname>Patel</surname><given-names>V</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Hemanth</surname><given-names>J</given-names> </name><name name-style="western"><surname>Fernando</surname><given-names>X</given-names> </name><name name-style="western"><surname>Lafata</surname><given-names>P</given-names> </name><name name-style="western"><surname>Baig</surname><given-names>Z</given-names> </name></person-group><article-title>A review on random forest: an ensemble classifier</article-title><source>International Conference on Intelligent Data Communication Technologies and Internet of Things (ICICI)</source><year>2019</year><publisher-name>Springer, Cham</publisher-name><fpage>758</fpage><lpage>763</lpage><series>Lecture Notes on Data Engineering and Communications Technologies</series><pub-id pub-id-type="doi">10.1007/978-3-030-03146-6_86</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Opitz</surname><given-names>J</given-names> </name></person-group><article-title>From bias and prevalence to macro F1, kappa, and MCC: a structured overview of metrics for multi-class evaluation</article-title><source>Semantic Scholar</source><access-date>2025-06-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.semanticscholar.org/paper/From-Bias-and-Prevalence-to-Macro-F1%2C-Kappa%2C-and-A-Opitz/ff334e8cee3550eb5c80a61213ad0aecb549f48f">https://www.semanticscholar.org/paper/From-Bias-and-Prevalence-to-Macro-F1%2C-Kappa%2C-and-A-Opitz/ff334e8cee3550eb5c80a61213ad0aecb549f48f</ext-link></comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nakatani</surname><given-names>H</given-names> </name><name name-style="western"><surname>Nakao</surname><given-names>M</given-names> </name><name name-style="western"><surname>Uchiyama</surname><given-names>H</given-names> </name><name name-style="western"><surname>Toyoshiba</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ochiai</surname><given-names>C</given-names> </name></person-group><article-title>Predicting inpatient falls using natural language processing of nursing records obtained from Japanese electronic medical records: case-control study</article-title><source>JMIR Med Inform</source><year>2020</year><month>04</month><day>22</day><volume>8</volume><issue>4</issue><fpage>e16970</fpage><pub-id pub-id-type="doi">10.2196/16970</pub-id><pub-id pub-id-type="medline">32319959</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Popowich</surname><given-names>F</given-names> </name></person-group><article-title>Using text mining and natural language processing for health care claims processing</article-title><source>SIGKDD Explor Newsl</source><year>2005</year><month>06</month><volume>7</volume><issue>1</issue><fpage>59</fpage><lpage>66</lpage><pub-id pub-id-type="doi">10.1145/1089815.1089824</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hoffman</surname><given-names>GJ</given-names> </name><name name-style="western"><surname>Hays</surname><given-names>RD</given-names> </name><name name-style="western"><surname>Shapiro</surname><given-names>MF</given-names> </name><name name-style="western"><surname>Wallace</surname><given-names>SP</given-names> </name><name name-style="western"><surname>Ettner</surname><given-names>SL</given-names> </name></person-group><article-title>Claims-based identification methods and the cost of fall-related injuries among US older adults</article-title><source>Med Care</source><year>2016</year><month>07</month><volume>54</volume><issue>7</issue><fpage>664</fpage><lpage>671</lpage><pub-id pub-id-type="doi">10.1097/MLR.0000000000000531</pub-id><pub-id pub-id-type="medline">27057747</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cheligeer</surname><given-names>C</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>G</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>S</given-names> </name><etal/></person-group><article-title>BERT-Based neural network for inpatient fall detection from electronic medical records: retrospective cohort study</article-title><source>JMIR Med Inform</source><year>2024</year><month>01</month><day>30</day><volume>12</volume><issue>1</issue><fpage>e48995</fpage><pub-id pub-id-type="doi">10.2196/48995</pub-id><pub-id pub-id-type="medline">38289643</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luther</surname><given-names>SL</given-names> </name><name name-style="western"><surname>McCart</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Berndt</surname><given-names>DJ</given-names> </name><etal/></person-group><article-title>Improving identification of fall-related injuries in ambulatory care using statistical text mining</article-title><source>Am J Public Health</source><year>2015</year><month>06</month><volume>105</volume><issue>6</issue><fpage>1168</fpage><lpage>1173</lpage><pub-id pub-id-type="doi">10.2105/AJPH.2014.302440</pub-id><pub-id pub-id-type="medline">25880936</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>dos Santos</surname><given-names>HDP</given-names> </name><name name-style="western"><surname>Silva</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Maciel</surname><given-names>MCO</given-names> </name><name name-style="western"><surname>Burin</surname><given-names>HMV</given-names> </name><name name-style="western"><surname>Urbanetto</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Vieira</surname><given-names>R</given-names> </name></person-group><article-title>Fall detection in EHR using word embeddings and deep learning</article-title><conf-name>2019 IEEE 19th International Conference on Bioinformatics and Bioengineering (BIBE)</conf-name><conf-date>Oct 28-30, 2019</conf-date><conf-loc>Athens, Greece</conf-loc><publisher-name>IEEE</publisher-name><fpage>265</fpage><lpage>268</lpage><pub-id pub-id-type="doi">10.1109/BIBE.2019.00054</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Gani</surname><given-names>R</given-names> </name><name name-style="western"><surname>Chalaguine</surname><given-names>L</given-names> </name></person-group><article-title>Feature engineering vs BERT on twitter data</article-title><source>ArXiv</source><comment>Preprint posted online on  Oct 28, 2022</comment><pub-id pub-id-type="doi">10.48550/ARXIV.2210.16168</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Albano</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sciandra</surname><given-names>M</given-names> </name><name name-style="western"><surname>Plaia</surname><given-names>A</given-names> </name></person-group><article-title>Ensemble method for text classification in medicine with multiple rare classes</article-title><source>CLADAG 2023</source><year>2023</year><access-date>2025-07-06</access-date><publisher-name>Pearson, Italy</publisher-name><fpage>17</fpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.statlab-unisa.it/cladag2023/book-of-abstracts/">https://www.statlab-unisa.it/cladag2023/book-of-abstracts/</ext-link></comment></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Gliklich</surname><given-names>RE</given-names> </name><name name-style="western"><surname>Dreyer</surname><given-names>NA</given-names> </name><name name-style="western"><surname>Leavy</surname><given-names>MB</given-names> </name></person-group><source>Registries for Evaluating Patient Outcomes: A User&#x2019;s Guide</source><year>2014</year><publisher-name>Rockville (MD): Agency for Healthcare Research and Quality (US)</publisher-name><pub-id pub-id-type="other">24945055</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Anand</surname><given-names>N</given-names> </name><name name-style="western"><surname>Edwards</surname><given-names>L</given-names> </name><name name-style="western"><surname>Baker</surname><given-names>LX</given-names> </name><name name-style="western"><surname>Chren</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Wheless</surname><given-names>L</given-names> </name></person-group><article-title>Validity of using billing codes from electronic health records to estimate skin cancer counts</article-title><source>JAMA Dermatol</source><year>2021</year><month>09</month><day>1</day><volume>157</volume><issue>9</issue><fpage>1089</fpage><lpage>1094</lpage><pub-id pub-id-type="doi">10.1001/jamadermatol.2021.2856</pub-id><pub-id pub-id-type="medline">34379079</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Qureshi</surname><given-names>AI</given-names> </name><name name-style="western"><surname>Harris-Lane</surname><given-names>P</given-names> </name><name name-style="western"><surname>Siddiqi</surname><given-names>F</given-names> </name><name name-style="western"><surname>Kirmani</surname><given-names>JF</given-names> </name></person-group><article-title>International classification of diseases and current procedural terminology codes underestimated thrombolytic use for ischemic stroke</article-title><source>J Clin Epidemiol</source><year>2006</year><month>08</month><volume>59</volume><issue>8</issue><fpage>856</fpage><lpage>858</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2006.01.004</pub-id><pub-id pub-id-type="medline">16828680</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dal</surname><given-names>J</given-names> </name><name name-style="western"><surname>Skou</surname><given-names>N</given-names> </name><name name-style="western"><surname>Nielsen</surname><given-names>EH</given-names> </name><name name-style="western"><surname>Pedersen</surname><given-names>lars</given-names> </name><name name-style="western"><surname>Joergensen</surname><given-names>JOL</given-names> </name></person-group><article-title>Acromegaly according to the Danish National Registry of Patients: how valid are ICD diagnoses and how do patterns of registration affect the accuracy of registry data?</article-title><source>CLEP</source><year>2014</year><month>09</month><fpage>295</fpage><pub-id pub-id-type="doi">10.2147/CLEP.S63758</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bedard</surname><given-names>NA</given-names> </name><name name-style="western"><surname>Katz</surname><given-names>JN</given-names> </name><name name-style="western"><surname>Losina</surname><given-names>E</given-names> </name><name name-style="western"><surname>Opare-Addo</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Kopp</surname><given-names>PT</given-names> </name></person-group><article-title>Administrative data use in national registry efforts: blessing or curse?</article-title><source>Journal of Bone and Joint Surgery</source><year>2022</year><volume>104</volume><issue>Suppl 3</issue><fpage>39</fpage><lpage>46</lpage><pub-id pub-id-type="doi">10.2106/JBJS.22.00565</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Raza</surname><given-names>S</given-names> </name><name name-style="western"><surname>Schwartz</surname><given-names>B</given-names> </name></person-group><article-title>Constructing a disease database and using natural language processing to capture and standardize free text clinical information</article-title><source>Sci Rep</source><year>2023</year><month>05</month><day>26</day><volume>13</volume><issue>1</issue><fpage>8591</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-35482-0</pub-id><pub-id pub-id-type="medline">37237101</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Supplementary tables detailing an overview of the text segmentation process and model development.</p><media xlink:href="medinform_v13i1e66973_app1.docx" xlink:title="DOCX File, 23 KB"/></supplementary-material></app-group></back></article>