<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e66466</article-id><article-id pub-id-type="doi">10.2196/66466</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Identifying Patient-Reported Outcome Measure Documentation in Veterans Health Administration Chiropractic Clinic Notes: Natural Language Processing Analysis</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Coleman</surname><given-names>Brian C</given-names></name><degrees>MHS, DC</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Corcoran</surname><given-names>Kelsey L</given-names></name><degrees>DC</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Brandt</surname><given-names>Cynthia A</given-names></name><degrees>MPH, MD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Goulet</surname><given-names>Joseph L</given-names></name><degrees>MS, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Luther</surname><given-names>Stephen L</given-names></name><degrees>MA, PhD</degrees><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lisi</surname><given-names>Anthony J</given-names></name><degrees>DC</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Pain Research, Informatics, Multimorbidities, and Education Center, VA Connecticut Healthcare System</institution><addr-line>950 Campbell Ave</addr-line><addr-line>West Haven</addr-line><addr-line>CT</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Emergency Medicine, Yale School of Medicine, Yale University</institution><addr-line>New Haven</addr-line><addr-line>CT</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Biomedical Informatics and Data Science, Yale School of Medicine, Yale University</institution><addr-line>New Haven</addr-line><addr-line>CT</addr-line><country>United States</country></aff><aff id="aff4"><institution>Center of Innovation for Complex Chronic Healthcare, Edward Hines, Jr. VA Hospital</institution><addr-line>Hines</addr-line><addr-line>IL</addr-line><country>United States</country></aff><aff id="aff5"><institution>College of Public Health, University of South Florida</institution><addr-line>Tampa</addr-line><addr-line>FL</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Lovis</surname><given-names>Christian</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Chrimes</surname><given-names>Dillon</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Malhotra</surname><given-names>Meetu</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Hou</surname><given-names>Zhen</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Brian C Coleman, MHS, DC, Pain Research, Informatics, Multimorbidities, and Education Center, VA Connecticut Healthcare System, 950 Campbell Ave, West Haven, CT, 06516, United States, 1 2039325711; <email>brian.coleman@yale.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>2</day><month>4</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e66466</elocation-id><history><date date-type="received"><day>13</day><month>09</month><year>2024</year></date><date date-type="rev-recd"><day>11</day><month>03</month><year>2025</year></date><date date-type="accepted"><day>15</day><month>03</month><year>2025</year></date></history><copyright-statement>&#x00A9; Brian C Coleman, Kelsey L Corcoran, Cynthia A Brandt, Joseph L Goulet, Stephen L Luther, Anthony J Lisi. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 2.4.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e66466"/><abstract><sec><title>Background</title><p>The use of patient-reported outcome measures (PROMs) is an expected component of high-quality, measurement-based chiropractic care. The largest health care system offering integrated chiropractic care is the Veterans Health Administration (VHA). Challenges limit monitoring PROM use as a care quality metric at a national scale in the VHA. Structured data are unavailable, with PROMs often embedded within clinic text notes as unstructured data requiring time-intensive, peer-conducted chart review for evaluation. Natural language processing (NLP) of clinic text notes is one promising solution to extracting care quality data from unstructured text.</p></sec><sec><title>Objective</title><p>This study aims to test NLP approaches to identify PROMs documented in VHA chiropractic text notes.</p></sec><sec sec-type="methods"><title>Methods</title><p>VHA chiropractic notes from October 1, 2017, to September 30, 2020, were obtained from the VHA Musculoskeletal Diagnosis/Complementary and Integrative Health Cohort. A rule-based NLP model built using medspaCy and spaCy was evaluated on text matching and note categorization tasks. SpaCy was used to build bag-of-words, convoluted neural networks, and ensemble models for note categorization. Performance metrics for each model and task included precision, recall, and F-measure. Cross-validation was used to validate performance metric estimates for the statistical and machine-learning models.</p></sec><sec sec-type="results"><title>Results</title><p>Our sample included 377,213 visit notes from 56,628 patients. The rule-based model performance was good for soft-boundary text-matching (precision=81.1%, recall=96.7%, and F-measure=88.2%) and excellent for note categorization (precision=90.3%, recall=99.5%, and F-measure=94.7%). Cross-validation performance of the statistical and machine learning models for the note categorization task was very good overall, but lower than rule-based model performance. The overall prevalence of PROM documentation was low (17.0%).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>We evaluated multiple NLP methods across a series of tasks, with optimal performance achieved using a rule-based method. By leveraging NLP approaches, we can overcome the challenges posed by unstructured clinical text notes to track documented PROM use. Overall documented use of PROMs in chiropractic notes was low and highlights a potential for quality improvement. This work represents a methodological advancement in the identification and monitoring of documented use of PROMs to ensure consistent, high-quality chiropractic care for veterans.</p></sec></abstract><kwd-group><kwd>Veterans Health Administration</kwd><kwd>natural language processing</kwd><kwd>quality of health care</kwd><kwd>chiropractic</kwd><kwd>patient reported outcome measures</kwd><kwd>NLP</kwd><kwd>AI</kwd><kwd>artificial intelligence</kwd><kwd>veteran</kwd><kwd>chiropractor</kwd><kwd>integrated health cohort</kwd><kwd>musculoskeletal diagnosis</kwd><kwd>musculoskeletal</kwd><kwd>quality of care</kwd><kwd>care</kwd><kwd>PROM</kwd><kwd>neural network</kwd><kwd>chiropractic care</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Patient-reported outcome measures (PROMs) are standardized, validated questionnaires completed by patients to identify and quantify their perceptions of their health status [<xref ref-type="bibr" rid="ref1">1</xref>]. These measures are often of interest to clinicians to assess condition severity and response to treatment as a component of a measurement-based care approach. Measurement-based care is a recommended practice in the management of musculoskeletal pain conditions [<xref ref-type="bibr" rid="ref2">2</xref>], where biomarkers of disease severity are lacking and baseline and serial reassessment for progress may influence clinical decision-making. Using PROMs can improve communication and shared decision-making between patients and clinicians, enable contextualization of pain within a patient&#x2019;s life, and may positively influence health and pain status [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>In the Veterans Health Administration (VHA), patients with musculoskeletal pain may receive chiropractic care, where the use of PROMs is an expected component of high-quality care [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. The VHA Office of Specialty Care Ongoing Professional Practice Evaluation quality program for chiropractors includes a quality metric stating &#x201C;appropriate pain, functional, and/or other measures are documented and used to inform clinical decision making.&#x201D; However, substantial challenges limit monitoring this important metric of high-quality care at a national scale. Digital data systems integration is limited for both remote and point-of-care data collection that may otherwise facilitate structured data collection, thus any PROM documentation often occurs in unstructured clinic notes. Quality evaluation of these notes typically requires time-intensive, peer-conducted chart review, with substantial human effort limiting ongoing monitoring [<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>The VHA Chiropractic Program has expanded rapidly in recent years driven by policy change and natural growth [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>], with 299 facilities now offering on-site chiropractic care in fiscal year (FY) 2024, up 344% from 87 facilities in FY2017. Scalable solutions are needed to monitor established care quality metrics, such as PROM use, and ensure high-quality chiropractic care delivery across the enterprise. Further, such solutions could also be widely applicable to other disciplines managing musculoskeletal pain, which is highly prevalent and burdensome in the VHA system and beyond.</p><p>Natural language processing (NLP) of clinic text notes is one promising solution to extracting care quality data from unstructured text [<xref ref-type="bibr" rid="ref9">9</xref>], with previous studies focusing on pain care quality in primary care and chiropractic care settings [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Use cases for NLP across other clinical domains highlight its potential utility in information extraction and analysis tasks. NLP methods have been used to accurately identify functional status impairment for patients with dementia based on electronic health record (EHR) clinic notes [<xref ref-type="bibr" rid="ref12">12</xref>]. Additionally, NLP techniques have shown utility in information extraction, classification, and risk prediction tasks for patient-reported outcomes among cancer patients [<xref ref-type="bibr" rid="ref13">13</xref>]. Relevant to pain management, NLP has been used to identify opioid use and misuse in clinic notes [<xref ref-type="bibr" rid="ref14">14</xref>] and support decision support systems to impact clinical care [<xref ref-type="bibr" rid="ref15">15</xref>]. More recently, the use of large-language models in data mining of EHR data, including clinical text data, has demonstrated promise and efficiency in information extraction and content analysis [<xref ref-type="bibr" rid="ref16">16</xref>].</p><p>The objective of this study was to develop an NLP approach to identify PROMs documented in VHA chiropractic clinic text notes. We aimed to iteratively develop a rule-based pipeline and evaluated performance for a text span pattern matching task and a note categorization task. We also aimed to compare the performance of the rule-based method to statistical and machine learning methods for the note categorization task and estimate the overall prevalence of documented PROM use in the corpus. We hypothesize a rule-based NLP approach, when iteratively developed and refined, will demonstrate comparable performance to statistical and machine learning methods in identifying and categorizing PROM documentation within VHA chiropractic clinic text notes.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Setting, Data Sources, and Cohort</title><p>We conducted a secondary, retrospective analysis of the Musculoskeletal Diagnosis/Complementary and Integrative Health (MSD/CIH) Cohort [<xref ref-type="bibr" rid="ref17">17</xref>]&#x2014;an EHR data cohort of VHA patients receiving VHA health care for musculoskeletal conditions, with updated cohort entry through September 30, 2020. Study reporting was informed by published recommendations for reporting machine learning and NLP studies [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>].</p><p>We identified patients in the MSD/CIH Cohort who received VHA chiropractic care at a VHA facility between October 1, 2017, and September 30, 2020. All chiropractic clinic visits were identified using an administrative clinic identifier denoting &#x201C;Chiropractic Care.&#x201D; Initiating chiropractic care in the VHA most often requires the placement of a referral order for consultation, with the consultation visit linked to this order in the EHR. We included only patients with an initial consultation visit during the study period and included all visits occurring within 1 year of their first consultation visit date. Population demographic and clinical characteristics were extracted from the EHR for each patient included in the sample, including age at first chiropractic consultation, sex of record, race/ethnicity, marital status, smoking status, service-connected disability percentage, and BMI. <italic>International Classification of Diseases, 10th Revision</italic> (<italic>ICD-10</italic>) codes were also extracted for each visit, and flags were used to denote whether a visit included a low back pain diagnosis, neck pain diagnosis, other spinal pain diagnosis, or any other diagnosis.</p><p>We obtained all clinic text notes linked to the identified visits to build a corpus of clinic visit notes. We excluded note types related to telecommunications and administrative events (eg, appointment scheduling, secure messaging), including only notes describing chiropractic care in the ambulatory, in-hospital, or telehealth setting.</p></sec><sec id="s2-2"><title>Data Preparation</title><p>Multiple notes can be written to describe the same identified chiropractic visit; for example, a resident chiropractor note and an attending chiropractor note may each contain data relevant to a single visit. We concatenated all notes linked to the same unique visit identifier on the same date of service (regardless of note author) to create a 1-to-1 relationship between visits and clinic notes. A unique character set was used as a delimiter to separate individual notes. To evaluate the amount of text present across notes in the corpus, we quantified each note length using tokenization based on whitespace splitting and visualized the data using a histogram of token lengths. We compared tokenized lengths across different types of visits and across FYs through visual comparison, Kruskal-Wallis tests across all groups, and Dunn tests across group pairs with a significance level <italic>&#x03B1;</italic> of 0.05 and a Bonferroni correction for multiple comparisons.</p><p>Additional metadata about each chiropractic care visit was extracted from the EHR and written into a header for each note, offset from the rest of the note using a unique character series delimiter. This included a patient identifier, visit identifier, visit date, VHA facility identifier code, the date of the first consultation visit, and the visit number with the total number of visits within the year after consultation (eg, &#x201C;visit 3 of 6&#x201D;), the number of days since the previous visit (if applicable), and the number of days to the next visit (if applicable).</p><p>Visits were tagged into the following five exclusive categories using conditional logic: (1) first consultation visit, (2) final visit within 1 year, (3) visit immediately preceding a 60-day gap in care, (4) visit immediately following a 60-day gap in care, and (5) other intermediate visit. These categories were identified to stratify visits on having a higher likelihood of potential use of PROMs at the beginning or end of an episode of chiropractic care or before or after a gap in chiropractic care.</p><p>A nationally representative, random stratified sample of 300 notes was selected for human annotation to be used as an initial training set. The stratification approach was designed to select an approximately equal number of notes for each visit category and was randomly repeated until a maximum number of facilities was included in the training set (78 of 79, with 1 facility excluded due to very few notes included in the corpus).</p></sec><sec id="s2-3"><title>Corpus Annotation</title><p>Two study investigators (BCC and KLC) with VHA chiropractic care subject-matter expertise annotated the training corpus using eHost, a Java-based annotation tool [<xref ref-type="bibr" rid="ref20">20</xref>]. Annotators identified and tagged spans of text referencing documented use of PROMs and assigned a span attribute for the specific type of PROM based on an a priori list (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The initial seed list of PROMs was a sample of validated PROMs potentially used by chiropractors addressing pain and function, particularly for spinal conditions. An additional attribute class was included for tagging an unspecified PROM that was not included in the list, which was reviewed for inclusion during subsequent iterations. We excluded all versions of the Numerical Rating Scale and Visual Analog Scales as unidimensional measures that have been criticized as potentially limited clinical importance [<xref ref-type="bibr" rid="ref21">21</xref>], especially when trying to measure a complex, multifaceted condition like musculoskeletal pain [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. An annotation guide was developed and iteratively revised/validated using a preparatory random note sample from a separate corpus during an annotation pilot. Interannotator agreement (IAA) was high across 3 samples of pilot notes (n<sub>1</sub>=50 notes, IAA<sub>1</sub>=78.8%; n<sub>2</sub>=100 notes, IAA<sub>2</sub>=84.5%; and n<sub>3</sub>=100 notes, IAA<sub>3</sub>=86.1%).</p><p>Annotation of the initial training set (n=300 notes) was iteratively completed in 100-note batches. IAA remained high across the 3 iterations of annotating the initial training set (IAA<sub>1</sub>=71.7%, IAA<sub>2</sub>=81.2%, and IAA<sub>3</sub>=87.1%). Adjudication of disagreement was achieved through review and discussion between the annotators. A third-person adjudicator was available to provide a final adjudication decision in the event of unresolved disagreement, though this did not become necessary.</p></sec><sec id="s2-4"><title>Initial NLP Model Development</title><p>A rule-based NLP pipeline (<xref ref-type="fig" rid="figure1">Figure 1</xref>) was built using medspaCy [<xref ref-type="bibr" rid="ref24">24</xref>] (v1.0.0) and spaCy [<xref ref-type="bibr" rid="ref25">25</xref>] (v3.6.0; Explosion) in Python (v3.8.5; Python Software Foundation). The initial pipeline used the medspaCy TargetMatcher and TargetRule functions to match spans of text in a spaCy Doc object. Matches were based on an initial set of rules to define text pattern rules for 15 predetermined PROMs, with an associated attribute referencing a specific measure assigned for each identified span.</p><p>For all iterations of NLP model development, evaluation statistics (precision, recall, and F-measure) were calculated based on true positives, false positives, and false negatives for three defined matching tasks: (1) strict-boundary matching, (2) soft-boundary matching, and (3) note categorization. Strict-boundary matching considered only the perfect overlap of the human annotation and NLP target matching methods to be a match in performance metric calculation. Soft-boundary matching was a fuzzy matching approach that allowed for flexibility in the overlap between the span start and span end positions of the human annotation and NLP target matching methods to define a match. Note categorization cast the results of the human annotation and NLP target matching to a binary document classification question, defining documented PROM use at the note level. All matching methods required matching on the assigned attribute for a specified PROM to be considered a match.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Rule-based natural language processing (NLP) medspaCy pipeline overview. PROM: patient-reported outcome measure.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e66466_fig01.png"/></fig></sec><sec id="s2-5"><title>NLP Model Refinement</title><p>The initial pipeline was run on the annotated first training set and false positives and false negatives were manually reviewed to revise the TargetMatcher rules and the overall NLP pipeline. Pipeline modifications were attentively designed to address only generalizable false positives and negatives and avoid overfitting the training data. During this review, false negatives were addressed by modifying TargetMatcher rules to better represent broader matching patterns (eg, abbreviations), supporting fuzzy matching (eg, typographic errors), and by adding TargetMatcher rules for 3 additional measures identified during annotation. False positives were addressed by adding sectionizer and postprocesser steps to the NLP pipeline. A medspaCy sectionizer component was added to define sections of the clinical note, with a Postprocessor component added to set rules for sections to ignore in defining PROM spans. This allowed the exclusion of inappropriate matches that were being identified in abbreviation lists, medication lists, goals of care, and other irrelevant sections of the note. The final rule-based model is available on GitHub [<xref ref-type="bibr" rid="ref26">26</xref>].</p><p>After the development of an initial NLP rule-based model to identify PROM documentation, a second high-probability training set (n=200) was identified based on a preliminary model NLP output as programmatic labeling and annotated by a single investigator (BCC). This created a full annotation set of 500 notes and increased the prevalence of positive notes in the training set to balance the prediction problem.</p><p>Using the full training set, we trained an initial set of statistical and machine learning models to complete the note categorization task prediction and compared their performance to the rule-based model. The full annotated training set was randomly partitioned with 75% allocated to the model training set, 15% to the development set to tune hyperparameters, and 10% to the test set to evaluate performance. We tested 3 model architecture configurations built in spaCy: (1) a bag-of-words (BOW) model, (2) a convoluted neural network model, and (3) an ensemble model combining a linear BOW model and Tok2Vec model. Architecture configurations for each model are detailed in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> with source code available on GitHub [<xref ref-type="bibr" rid="ref26">26</xref>]. All computational analysis was conducted using a secure virtual machine with a Windows 10 operating system, 8 virtual processors (x86-64, 2.60 GHz), and 16 GB memory, with variable overall runtime based on competing user demands.</p></sec><sec id="s2-6"><title>NLP Model Evaluation and Validation</title><p>For the initial evaluation of the rule-based model performance on the text matching and note categorization tasks, we evaluated performance based on precision, recall, and F-measure. To compare the performance of the rule-based model to the initial statistical and machine learning models for the note categorization task, precision, recall, F-measure, and accuracy were calculated. The area under the receiver operator characteristic curve (AUC-ROC) was calculated for the statistical and machine learning models. AUC-ROC was not calculated for the rule-based model as it is a deterministic model generating a binary decision output rather than a probability score output.</p><p>We used Monte Carlo and k-folds cross-validation (stratified and unstratified) to validate the calculated performance metrics for the initial statistical and machine learning models across multiple simulations. Monte Carlo cross-validation consisted of 100 simulation cycles randomly partitioning the annotated data set into 75% training/15% development/10% testing splits. k-folds cross-validation was performed using 10 cycles of 10-fold repeated cross-validation (100 total cycles), with and without stratification, and sampled 8 training folds, 1 development fold, and 1 test fold during each cycle. Precision, recall, F-measure, and AUC-ROC were calculated for each cycle across both cross-validation methods.</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>This study received exemption approval from the institutional review boards of the VA Connecticut Healthcare System (1690344-1) and Yale University (2000032830).</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>We identified 56,628 patients for inclusion in this study, with a total of 377,213 visits across the study period. Patient and visit characteristics are presented in <xref ref-type="table" rid="table1">Table 1</xref>. The patient population was consistent with those usually receiving VHA chiropractic care. Patients had a median of 5 (IQR 3-6) chiropractic care visits, with most visits occurring for low back pain. There were 14,198 patients (25.1%) who had at least one 60-day care gap. The tokenized text across the entire corpus had a mean length of 565 tokens (SD 434), with 4617 notes (1.2%) greater than 2000 tokens in length.</p><p>Assessing the tokenized text lengths by visit type for the entire corpus split by whitespaces (<xref ref-type="fig" rid="figure2">Figure 2</xref>) showed a greater number of tokens in the first (consult) visits compared with the other visit types. The distribution of tokenized length of first (consult) visits was right-shifted compared with all other visit types, with a mean length of 1069 tokens (SD 565) and 3198 notes (5.6%) greater than 2000 tokens in length. The mean token length was 464 (SD 316) for other intermediate visits, 514 (SD 404) for visits preceding a 60-day care gap, 587 (SD 427) for visits following a 60-day care gap, and 490 (SD 358) for final visits within 1 year. A Kruskal-Wallis test demonstrated statistically significant differences across the 5 visit types (<italic>P</italic>&#x003C;.001), with all corrected pairwise comparisons significant at <italic>P</italic>&#x003C;.001 except visits preceding a 60-day care gap compared with final visits within 1 year (<italic>P</italic>=.008). Tokenized text lengths by FY (<xref ref-type="fig" rid="figure3">Figure 3</xref>) showed a mean length of 579 (SD 456) tokens in FY2018, 571 (SD 435) tokens in FY2019, and 496 (SD 348) tokens in FY2020. A Kruskal-Wallis test showed a statistically significant difference across the FY groups (<italic>P</italic>&#x003C;.001), with corrected pairwise comparisons demonstrating significant differences between FY2020 and each of FY2018 and FY2019 (<italic>P</italic>&#x003C;.001), but not between FY2018 and FY2019 (<italic>P</italic>&#x2265;.99).</p><p>For each included PROM, the text span match frequency and note categorization frequency between human annotation and the rule-based model output on the full training set and in the full-text corpus are presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. When the rule-based model was run on the full note corpus, there were 112,131 PROM text spans identified across 64,027 notes (17.0% of the full corpus), a prevalence consistent with that of human annotation in the initial annotation training set (53 of 300 notes, 17.7%). PROM documentation was identified in 13.8% (n=32,341) of other intermediate visits, 32.7% (n=18,519) of first (consult) visits, 13.2% (n=2365) of visits preceding a 60-day gap in care, 13.9% (n=1733) of visits following a 60-day gap in care, and 16.0% (n=9069) final visits within 1 year. The prevalence of documented PROM use, by visit, decreased over time (19.1% in FY2018, 16.5% in FY2019, and 13.2% in FY2020) (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). The most documented PROMs were the Bournemouth Questionnaire (back and neck versions) and the Oswestry Disability Index.</p><p>Rule-based model performance across the strict- and soft-boundary matching and note categorization tasks are shown in <xref ref-type="table" rid="table2">Table 2</xref>. Performance for the strict-boundary matching task was low across all metrics. When relaxing the matching criteria to allow for soft-boundary overlap, the model performance improved substantially with good to excellent model performance that was balanced across precision and recall with few false positives and very few false negatives. Rule-based model performance in the note categorization task was excellent, with high precision and near-perfect recall.</p><p>The comparison between the rule-based model and the initial statistical and machine learning model performance on the note categorization task is shown in <xref ref-type="table" rid="table3">Table 3</xref>. Using the rule-based model output as a binary text categorization yielded better performance across all metrics and high accuracy (95.8%) compared with the spaCy models. Performance metric distributions (<xref ref-type="fig" rid="figure4">Figure 4</xref>) and the mean metric with 95% CI (<xref ref-type="table" rid="table4">Table 4</xref>) were consistent across each cross-validation method, with acceptable to good model performance across all metrics for all spaCy models. The model consistently outperformed both the BOW and convoluted neural network models across all metrics, with a good balance between precision and recall and a high AUC-ROC.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Patient and visit sample characteristics.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristics</td><td align="left" valign="bottom">Total</td></tr></thead><tbody><tr><td align="left" valign="top">Patient characteristics</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total patients, n</td><td align="left" valign="top">56,628</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> (years), median (IQR)</td><td align="left" valign="top">53 (39-66)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sex, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Female</td><td align="left" valign="top">9119 (16.1)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Male</td><td align="left" valign="top">47,509 (83.9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Race or ethnicity, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;White</td><td align="left" valign="top">38,921 (68.7)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Black or African American</td><td align="left" valign="top">9899 (17.5)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Hispanic or Latino</td><td align="left" valign="top">4410 (7.8)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Other or unknown</td><td align="left" valign="top">3398 (6.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Marital status, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Married</td><td align="left" valign="top">29,653 (52.4)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Single or never married</td><td align="left" valign="top">9456 (16.7)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Divorce or separated</td><td align="left" valign="top">15,650 (27.6)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Other or unknown</td><td align="left" valign="top">1869 (3.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Smoking status, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Current smoker</td><td align="left" valign="top">19,133 (33.8)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Former smoker</td><td align="left" valign="top">15,872 (28.0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Never smoker</td><td align="left" valign="top">21,571 (38.1)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Missing</td><td align="left" valign="top">52 (0.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Service-connected percentage, median (IQR)</td><td align="left" valign="top">70.0 (10.0-90.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>BMI<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (kg/m<sup>2</sup>), median (IQR)</td><td align="left" valign="top">29.4 (26.2-33.1)</td></tr><tr><td align="left" valign="top">Visit characteristics</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total visits and notes, n</td><td align="left" valign="top">377,213</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Note token length, median (IQR)</td><td align="left" valign="top">447 (261-718)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Chiropractic care visits per patient, median (IQR)</td><td align="left" valign="top">5.0 (3.0-9.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Visit diagnoses, median (IQR)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Low back pain visits</td><td align="left" valign="top">4.0 (2.0-7.0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Neck pain visits</td><td align="left" valign="top">1.0 (0.0-5.0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Other spinal pain visits</td><td align="left" valign="top">0.0 (0.0-4.0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Other diagnosis visits</td><td align="left" valign="top">2.0 (0.0-5.0)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Visit types, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2003;First (consult) visit</td><td align="left" valign="top">56,628 (15.0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Final visit within 1 year</td><td align="left" valign="top">56,628 (15.0)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Visits preceding a 60-day care gap</td><td align="left" valign="top">17,890 (4.7)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Visits following a 60-day care gap</td><td align="left" valign="top">12,466 (3.3)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Other intermediate visit</td><td align="left" valign="top">233,601 (62.0)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Age as of initial chiropractic consult.</p></fn><fn id="table1fn2"><p><sup>b</sup>7108 missing.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Tokenized text length of notes in the study corpus, split on whitespace characters, by visit type with an overflow bin for notes greater than 2000 tokens (n=4617).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e66466_fig02.png"/></fig><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Tokenized text length of notes in the study corpus, split on whitespace characters, by fiscal year, with an overflow bin for notes greater than 2000 tokens (n=4617). FY: fiscal year.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e66466_fig03.png"/></fig><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Rule-based model evaluation summary.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Rule-based model task</td><td align="left" valign="bottom">Precision (%)</td><td align="left" valign="bottom">Recall (%)</td><td align="left" valign="bottom">F-measure (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Strict-boundary matching task</td><td align="char" char="." valign="top">47.4</td><td align="char" char="." valign="top">58.0</td><td align="char" char="." valign="top">52.2</td></tr><tr><td align="left" valign="top">Soft-boundary matching task</td><td align="char" char="." valign="top">81.1</td><td align="char" char="." valign="top">96.7</td><td align="char" char="." valign="top">88.2</td></tr><tr><td align="left" valign="top">Note categorization task</td><td align="char" char="." valign="top">90.3</td><td align="char" char="." valign="top">99.5</td><td align="char" char="." valign="top">94.7</td></tr></tbody></table></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Note categorization task model evaluation summary.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">Precision (%)</td><td align="left" valign="bottom">Recall (%)</td><td align="left" valign="bottom">F-measure (%)</td><td align="left" valign="bottom">Accuracy (%)</td><td align="left" valign="bottom">AUC-ROC<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Rule-based model</td><td align="char" char="." valign="top">90.3</td><td align="char" char="." valign="top">99.5</td><td align="char" char="." valign="top">94.7</td><td align="char" char="." valign="top">95.8</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td></tr><tr><td align="left" valign="top">Bag-of-words model</td><td align="char" char="." valign="top">75.0</td><td align="char" char="." valign="top">85.7</td><td align="char" char="." valign="top">80.0</td><td align="char" char="." valign="top">82.0</td><td align="char" char="." valign="top">93.8</td></tr><tr><td align="left" valign="top">Convoluted neural network model</td><td align="char" char="." valign="top">93.8</td><td align="char" char="." valign="top">71.4</td><td align="char" char="." valign="top">81.1</td><td align="char" char="." valign="top">86.0</td><td align="char" char="." valign="top">94.7</td></tr><tr><td align="left" valign="top">Ensemble model</td><td align="char" char="." valign="top">86.4</td><td align="char" char="." valign="top">90.5</td><td align="char" char="." valign="top">88.4</td><td align="char" char="." valign="top">90.0</td><td align="char" char="." valign="top">96.5</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>AUC-ROC: area under the receiver operator characteristic curve.</p></fn><fn id="table3fn2"><p><sup>b</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Statistical and machine learning model performance metrics from Monte Carlo (100 iterations) and k-folds (10x10-folds) cross-validation (stratified and unstratified).</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Precision (%), mean (95% CI)</td><td align="left" valign="bottom">Recall (%), mean (95% CI)</td><td align="left" valign="bottom">F-measure (%), mean (95% CI)</td><td align="left" valign="bottom">AUC-ROC<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> (%), mean (95% CI)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="5">Monte Carlo cross-validation</td></tr><tr><td align="left" valign="top">&#x2003;Bag-of-words</td><td align="char" char="." valign="top">82.2 (80.4-84.0)</td><td align="char" char="." valign="top">80.2 (78.1-82.2)</td><td align="char" char="." valign="top">80.6 (79.2-82.0)</td><td align="char" char="." valign="top">91.2 (90.3-92.2)</td></tr><tr><td align="left" valign="top">&#x2003;Convoluted neural network</td><td align="char" char="." valign="top">79.9 (77.9-81.9)</td><td align="char" char="." valign="top">78.5 (76.6-80.5)</td><td align="char" char="." valign="top">78.6 (77.1-80.1)</td><td align="char" char="." valign="top">91.5 (90.7-92.3)</td></tr><tr><td align="left" valign="top">&#x2003;Ensemble</td><td align="char" char="." valign="top">89.1 (87.4-90.8)</td><td align="char" char="." valign="top">87.2 (85.1-89.2)</td><td align="char" char="." valign="top">87.7 (86.2-89.2)</td><td align="char" char="." valign="top">95.1 (94.3-95.9)</td></tr><tr><td align="left" valign="top" colspan="5">k-folds cross-validation</td></tr><tr><td align="left" valign="top">&#x2003;Bag-of-words</td><td align="char" char="." valign="top">81.3 (79.5-83.2)</td><td align="char" char="." valign="top">82.1 (80.0-84.2)</td><td align="char" char="." valign="top">81.1 (79.6-82.6)</td><td align="char" char="." valign="top">92.2 (91.4-92.9)</td></tr><tr><td align="left" valign="top">&#x2003;Convoluted neural network</td><td align="char" char="." valign="top">79.6 (77.5-81.7)</td><td align="char" char="." valign="top">79.5 (77.4-81.7)</td><td align="char" char="." valign="top">78.9 (77.2-80.5)</td><td align="char" char="." valign="top">92.2 (91.5-93.0)</td></tr><tr><td align="left" valign="top">&#x2003;Ensemble</td><td align="char" char="." valign="top">88.7 (87.1-90.3)</td><td align="char" char="." valign="top">87.2 (85.4-89.1)</td><td align="char" char="." valign="top">87.6 (86.1-89.0)</td><td align="char" char="." valign="top">95.0 (94.1-95.8)</td></tr><tr><td align="left" valign="top" colspan="5">Stratified k-folds cross-validation</td></tr><tr><td align="left" valign="top">&#x2003;Bag-of-words</td><td align="char" char="." valign="top">83.9 (82.1-85.6)</td><td align="char" char="." valign="top">79.9 (77.6-82.3)</td><td align="char" char="." valign="top">81.2 (79.6-82.8)</td><td align="char" char="." valign="top">92.0 (91.0-92.9)</td></tr><tr><td align="left" valign="top">&#x2003;Convoluted neural network</td><td align="char" char="." valign="top">82.2 (80.4-84.0)</td><td align="char" char="." valign="top">77.9 (75.6-80.2)</td><td align="char" char="." valign="top">79.2 (77.8-80.6)</td><td align="char" char="." valign="top">91.9 (91.1-92.8)</td></tr><tr><td align="left" valign="top">&#x2003;Ensemble</td><td align="char" char="." valign="top">88.7 (87.1-90.3)</td><td align="char" char="." valign="top">88.4 (86.6-90.1)</td><td align="char" char="." valign="top">88.2 (86.9-89.4)</td><td align="char" char="." valign="top">95.2 (94.5-96.0)</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>AUC-ROC: area under the receiver operator characteristic curve.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Performance metric distributions (with mean value and 95% CIs) for the note text categorization task using Monte Carlo and stratified k-folds cross-validation for the bag-of-words (BOW), convoluted neural network (CNN), and ensemble (ENS) models. AUC of ROC: area under the receiver operator characteristic curve.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e66466_fig04.png"/></fig></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>In this study, we tested NLP approaches to identify PROM use documented in VHA chiropractic clinic notes as a method to monitor an important marker of high-quality chiropractic care. Our iterative process targeting a series of prediction tasks showed overall strong performance across rule-based, statistical, and machine-learning approaches, affirmed by multiple approaches to cross-validation. The rule-based NLP model had good to excellent performance in identifying text spans referencing PROMs when flexible span boundaries were used but not when span boundaries were strict. When using the rule-based model output as a note categorization prediction, overall performance was excellent, with high precision and near-perfect recall. While our initial hypothesis predicted comparable performance between the rule-based approach and the statistical and machine learning methods, our findings demonstrate that the rule-based approach achieved superior performance in identifying and categorizing PROM documentation within this sample of VHA chiropractic clinic text notes. We suspect improved performance may be attributable to data volume and sparsity of text indicators of PROMs relative to the remaining document content describing the rest of the chiropractic care encounter. Rule-based models are generally less susceptible to data volume issues given their methodical approach, whereas machine learning approaches thrive on large data sets where more complex patterns and relationships can be trained on and identified in the text. Important benefits of the rule-based approach include simplicity, interpretability, and efficiency&#x2014;all of which provide added value alongside slightly superior performance.</p><p>Overall model performance was sufficient to have clinical utility for quality metric monitoring, especially given the low prevalence of false negatives. The error analysis process used during model development identified that remaining false positives were frequently due to unique circumstances difficult to address by creating exclusion rules without overfitting the training data. For example, the text &#x201C;Goals of Care: Improve outcome measures, reduce ODI by 20%&#x201D; would identify a section span to exclude (&#x201C;Goals of Care&#x201D;) followed by a section span to include (&#x201C;outcome measures&#x201D;) and a PROM span (&#x201C;ODI&#x201D;), therefore marking the PROM span for inclusion.</p><p>Consistent findings across multiple methods of cross-validation, with and without stratification accounting for baseline PROM use prevalence, showed acceptable performance for each of the statistical and machine learning models tested on the note categorization task. Of note, several performance metrics in the initial model evaluation fell outside the cross-validation estimated 95% CI, attesting to the benefit of cross-validation in generating a more appropriate representation of performance.</p></sec><sec id="s4-2"><title>Clinical Implications and Future Work</title><p>While all models performed adequately for note categorization, we used the rule-based model, having the best performance, on the full corpus to estimate the overall prevalence of PROM documentation. The overall prevalence was low and consistent with estimates from the annotation data, highlighting the potential limited documented use of PROMs by VHA chiropractors despite being an identified quality care metric. This is in contrast with data from surveys of US chiropractors in which 60% of respondents reported using PROMs several times per day [<xref ref-type="bibr" rid="ref27">27</xref>]. Qualitative evaluation in a non-VHA setting has highlighted multiple barriers and facilitators to implementing PROMs in chiropractic care [<xref ref-type="bibr" rid="ref28">28</xref>]. These included clinician knowledge and training, engagement and purpose of collecting PROM data, perceived utility versus burden for clinicians and patients, and organizational and administrative factors (such as in-visit time availability and ability to use electronic data collection systems). Evidence of external determinants influencing PROM use can be hypothesized from our results showing a decreasing trend in documented PROM use across each year of the study period. The VHA Chiropractic Program rapidly expanded nationally during this time, with an increase in the number of employed VHA chiropractors [<xref ref-type="bibr" rid="ref29">29</xref>] and increasing service penetration across the national VHA system [<xref ref-type="bibr" rid="ref30">30</xref>]. Thus it is possible that our results reflect an increasing number of new chiropractors not using PROMs, not documenting PROM use in clinic notes, or a combined effect of the 2, highlighting a potential opportunity for quality improvement and education.</p><p>Given our findings, we hypothesize there is a relationship between documentation quantity as a proxy for comprehensiveness of evaluation, including the use of PROMs. PROM use was most prevalent in first (consult) visits at more than double the rate found in other visit types. Paired with significantly longer text lengths compared with other visit types, this suggests that the increased complexity and information gathering inherent in initial consultations necessitate more thorough documentation, including the application of outcome measures. Additionally, the decrease in documented PROM use during FY2020, paired with shorter text length in FY2020, is potentially attributable to the onset of the COVID-19 pandemic with changes in the quantity and mode of delivery of chiropractic care [<xref ref-type="bibr" rid="ref31">31</xref>] and challenges in administration due to impacted face-to-face care with limited availability of remote data collection information systems.</p><p>Future work should consider patient, visit, facility, clinician, and system factors, including qualitative perspectives of VHA chiropractors, that may influence the use of specific PROMs and their documentation by VHA chiropractors. These determinants can inform intervention development and implementation strategies related to improving PROM use and documentation. However, these potential external influences do not affect our confidence in the performance of the developed NLP models to evaluate documented PROM use by VHA chiropractors on a national scale as a quality metric.</p><p>The implications of tracking PROM use by VHA chiropractors using this approach include enhanced patient outcomes, improved clinical decision-making, and ensuring consistent, high-quality chiropractic care for veterans. These are consistent with core practical considerations for the development of NLP systems to address pragmatic clinical needs and improve patient outcomes, including offering an organization incentive for use and supporting ongoing monitoring with implementation feasibility [<xref ref-type="bibr" rid="ref32">32</xref>]. Within the VHA chiropractic care setting, our approach allows for the monitoring of an important quality metric at a national scale using centralized resources. Additionally, this approach standardizes and enhances the objectivity and rigor of assessment of documentation content, while minimizing the individual burden of chart review by practicing VHA chiropractors. Further, our methods may have application in other clinical settings providing musculoskeletal pain care, in the VHA and beyond, given the high prevalence of musculoskeletal pain in both veteran and nonveteran populations. Future research should focus on refining these NLP models to enhance their applicability across diverse clinical settings, including other pain care clinics, and explore the integration of additional data sources to further enrich patient care insights. Additionally, future research can evaluate the efficiency of our approach in terms of financial, human, computational, and other resource costs compared with traditional manual review methods to best understand the potential value and system resource use implications of this work.</p></sec><sec id="s4-3"><title>Limitations</title><p>There are several limitations to our study that are inherent to any observational research or studies using NLP. The text notes used in this study were originally intended for clinical care purposes. Thus, their secondary use for research is subject to limitations in the quality of the notes and the content documented for that purpose, which may not always fully reflect what was done during the patient encounter. Observing the variation in the quantity of text (ie, number of tokens) present in each note across the corpus showed an expectedly right-skewed distribution and highlighted heterogeneity in the quantity of text content included in VHA chiropractic clinic notes. Similar findings were evident in tokenized length differences between visit types and the year during which a visit occurred. We suspect this correlates with heterogeneity in comprehensiveness of documentation, and by proxy of care delivered, highlighting a future opportunity for VHA chiropractor education or other quality improvement interventions to ensure consistent quality on a national scale. Templated material in clinic notes, while difficult to quantify across the corpus, may limit the richness and variability of the text data, potentially affecting the capture of patient- and note-specific details. It was evident during annotation that templates were, at times, shared across VHA facilities, which may affect our intention to capture variability across the VHA by stratifying our random sampling of notes by the facility. Further, our sampling by facility may not account for variations between individual chiropractors at a given facility. Stratifying our random sampling by a chiropractor was an alternative strategy but risked increasing the heterogeneity of the sample.</p><p>Our strategy to capture clinic notes is consistent with previous practices to identify VHA chiropractic clinic visits and associated notes. However, facility variation in clinic workflow may also influence the use and the documentation of PROMs separately. While expected that these would be documented in the EHR, clinics may use alternative strategies for the collection and recording of PROMs from patients and limiting our findings to representing &#x201C;documented PROM use.&#x201D; Further, the initial seed list of PROMs suspected to potentially be used by VHA chiropractors was established a priori without empirical validation. We allowed flexible expansion of the initial list during the annotation process to include PROMs that had use, but were not considered in advance, yet it is possible we still failed to include PROMs that may have been used more rarely than our random sampling could capture. As future PROMs are developed and adopted, maintenance of the proposed NLP approach to incorporate these has not been assessed in this study. While this would require some degree of manual effort, we do not suspect that incorporating additional PROMs into the model is particularly challenging given our experience in expanding the initial seed list during this study. We also did not incorporate pretrained or large-language models into this analysis due to restricted use in our computing environment at the time of this study. If, in future research, these types of models are successfully able to be validated with adequate performance and implemented, their adoption may mitigate the requirement of ongoing manual efforts. We also did not conduct a formal text feature analysis on the output from the tested machine learning models, which could provide insight into the contributions of specific text to the model prediction. This may be an important contribution in future work to compare the explainability of the machine learning approaches to the generally interpretable rule-based approach and potentially optimize machine learning model parameters to enhance prediction performance.</p><p>Our sample of patients receiving VHA chiropractic care from the MSD/CIH Cohort allowed overlapping entry into the parent cohort and our study sample through the end of the study period, ideally representing all chiropractic care occurring during this time. However, by limiting our follow-up period for an individual patient to 1 year after their initial chiropractic visit, we may have excluded chiropractic care received later in the study period. This, along with the onset of the COVID-19 pandemic, may account for the rapid reduction in patients and visits during the final year of the cohort with unknown potential impact on our findings.</p><p>As all notes in this study originated during VHA chiropractic care, there is unknown utility of the developed NLP models in non-VHA chiropractic documentation. Variations in the documentation requirements between the VHA and non-VHA settings for administrative purposes (eg, billing) may influence the use and documentation of PROMs. Nonetheless, our sample originates from the largest collection of chiropractic care EHR data in an integrated medical setting, with evidence from VHA studies of chiropractic care having the potential to influence chiropractic care in the non-VHA setting.</p></sec><sec id="s4-4"><title>Conclusions</title><p>Our study demonstrates the effective use of NLP to accurately identify documented PROM use from VHA chiropractic clinic notes, highlighting the potential for improved data use in quality monitoring of patient care. By leveraging an NLP approach, we can overcome the challenges posed by unstructured clinical text notes to track an identified quality care metric for chiropractic care. Overall documented use of PROMs was low and highlights the need for quality improvement. Future work should evaluate determinants influencing PROM use and develop intervention and implementation strategies to improve their use and documentation in VHA chiropractic care to ensure consistent, high-quality chiropractic care for veterans.</p></sec></sec></body><back><ack><p>The contents of this manuscript represent the view of the authors and do not necessarily reflect the position or policy of the US Department of Veterans Affairs, the National Institutes of Health, or the United States Government. This material is based upon work supported by the National Center for Complementary &#x0026; Integrative Health of the National Institutes of Health under award number K08AT011570. Successful completion of this study also builds on the support of previous work by the Department of Veterans Affairs, Veterans Health Administration, Office of Research and Development, Health Services Research and Development IIR-12-118, and the NCMIC Foundation, with resources and the use of facilities at the VA Connecticut Healthcare System.</p></ack><notes><sec><title>Data Availability</title><p>The code developed in this research has been made publicly available on GitHub (bccolemanva/outcomemeasure-nlp). The datasets generated and analyzed during this study are not publicly available. To maximize the protection and security of veterans&#x2019; data while making these data available to researchers, the US Department of Veterans Affairs (VA) developed the VA Informatics and Computing Infrastructure (VINCI). VA researchers must log onto VINCI via a secure gateway or virtual private network connection (VPN) and use a virtual workspace on VINCI to access and analyze VA data. As per the VA Office of Research and Development Policy, VINCI does not allow the transfer of any patient-level data out of its secure environment without special permission. Researchers who are not VA employees must be vetted and receive &#x201C;without compensation&#x201D; (WOC) employee status to gain access to VINCI. All analyses performed for this study took place on the VINCI platform. For questions about data access, contact study lead, BCC (Brian.Coleman2@va.gov) or the VA Office of Research and Development (VHACOORDRegulatory@va.gov).</p></sec></notes><fn-group><fn fn-type="conflict"><p>BCC receives grant funding related to this work from the National Center for Complementary &#x0026; Integrative Health of the National Institutes of Health under award number K08AT011570.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AUC-ROC</term><def><p>area under the receiver operator characteristic curve</p></def></def-item><def-item><term id="abb2">BOW</term><def><p>bag-of-words</p></def></def-item><def-item><term id="abb3">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb4">FY</term><def><p>fiscal year</p></def></def-item><def-item><term id="abb5">IAA</term><def><p>interannotator agreement</p></def></def-item><def-item><term id="abb6"><italic>ICD-10</italic></term><def><p><italic>International Classification of Diseases, 10th Revision</italic></p></def></def-item><def-item><term id="abb7">MSD/CIH</term><def><p>Musculoskeletal Diagnosis/Complementary and Integrative Health</p></def></def-item><def-item><term id="abb8">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb9">PROM</term><def><p>patient-reported outcome measure</p></def></def-item><def-item><term id="abb10">VHA</term><def><p>Veterans Health Administration</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dawson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Doll</surname><given-names>H</given-names> </name><name name-style="western"><surname>Fitzpatrick</surname><given-names>R</given-names> </name><name name-style="western"><surname>Jenkinson</surname><given-names>C</given-names> </name><name name-style="western"><surname>Carr</surname><given-names>AJ</given-names> </name></person-group><article-title>The routine use of patient reported outcome measures in healthcare settings</article-title><source>BMJ</source><year>2010</year><month>01</month><day>18</day><volume>340</volume><fpage>c186</fpage><pub-id pub-id-type="doi">10.1136/bmj.c186</pub-id><pub-id pub-id-type="medline">20083546</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>I</given-names> </name><name name-style="western"><surname>Wiles</surname><given-names>L</given-names> </name><name name-style="western"><surname>Waller</surname><given-names>R</given-names> </name><etal/></person-group><article-title>What does best practice care for musculoskeletal pain look like? Eleven consistent recommendations from high-quality clinical practice guidelines: systematic review</article-title><source>Br J Sports Med</source><year>2020</year><month>01</month><volume>54</volume><issue>2</issue><fpage>79</fpage><lpage>86</lpage><pub-id pub-id-type="doi">10.1136/bjsports-2018-099878</pub-id><pub-id pub-id-type="medline">30826805</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holmes</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Lewith</surname><given-names>G</given-names> </name><name name-style="western"><surname>Newell</surname><given-names>D</given-names> </name><name name-style="western"><surname>Field</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bishop</surname><given-names>FL</given-names> </name></person-group><article-title>The impact of patient-reported outcome measures in clinical practice for pain: a systematic review</article-title><source>Qual Life Res</source><year>2017</year><month>02</month><volume>26</volume><issue>2</issue><fpage>245</fpage><lpage>257</lpage><pub-id pub-id-type="doi">10.1007/s11136-016-1449-5</pub-id><pub-id pub-id-type="medline">27815820</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hawk</surname><given-names>C</given-names> </name><name name-style="western"><surname>Whalen</surname><given-names>W</given-names> </name><name name-style="western"><surname>Farabaugh</surname><given-names>RJ</given-names> </name><etal/></person-group><article-title>Best practices for chiropractic management of patients with chronic musculoskeletal pain: a clinical practice guideline</article-title><source>J Altern Complement Med</source><year>2020</year><month>10</month><volume>26</volume><issue>10</issue><fpage>884</fpage><lpage>901</lpage><pub-id pub-id-type="doi">10.1089/acm.2020.0181</pub-id><pub-id pub-id-type="medline">32749874</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lisi</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Salsbury</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Hawk</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Chiropractic integrated care pathway for low back pain in veterans: results of a delphi consensus process</article-title><source>J Manipulative Physiol Ther</source><year>2018</year><month>02</month><volume>41</volume><issue>2</issue><fpage>137</fpage><lpage>148</lpage><pub-id pub-id-type="doi">10.1016/j.jmpt.2017.10.001</pub-id><pub-id pub-id-type="medline">29482827</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vassar</surname><given-names>M</given-names> </name><name name-style="western"><surname>Holzmann</surname><given-names>M</given-names> </name></person-group><article-title>The retrospective chart review: important methodological considerations</article-title><source>J Educ Eval Health Prof</source><year>2013</year><volume>10</volume><issue>12</issue><fpage>12</fpage><pub-id pub-id-type="doi">10.3352/jeehp.2013.10.12</pub-id><pub-id pub-id-type="medline">24324853</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><source>Military Construction, Veterans Affairs, and Related Agencies Appropriations Act, HR 2998, 115th Cong (2018)</source><access-date>2025-03-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.congress.gov/bill/115th-congress/house-bill/2998/text">https://www.congress.gov/bill/115th-congress/house-bill/2998/text</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="report"><article-title>VA Directive 2018-1210 - Chiropractic Care</article-title><year>2018</year><publisher-name>United States Department of Veterans Affairs</publisher-name></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ohno-Machado</surname><given-names>L</given-names> </name></person-group><article-title>Realizing the full potential of electronic health records: the role of natural language processing</article-title><source>J Am Med Inform Assoc</source><year>2011</year><volume>18</volume><issue>5</issue><fpage>539</fpage><pub-id pub-id-type="doi">10.1136/amiajnl-2011-000501</pub-id><pub-id pub-id-type="medline">21846784</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luther</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Finch</surname><given-names>DK</given-names> </name><name name-style="western"><surname>Bouayad</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Measuring pain care quality in the Veterans Health Administration primary care setting</article-title><source>Pain</source><year>2022</year><month>06</month><day>1</day><volume>163</volume><issue>6</issue><fpage>e715</fpage><lpage>e724</lpage><pub-id pub-id-type="doi">10.1097/j.pain.0000000000002477</pub-id><pub-id pub-id-type="medline">34724683</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Coleman</surname><given-names>B</given-names> </name><name name-style="western"><surname>Finch</surname><given-names>D</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Extracting pain care quality indicators from U.S. Veterans Health Administration chiropractic care using natural language processing</article-title><source>Appl Clin Inform</source><year>2023</year><month>05</month><volume>14</volume><issue>3</issue><fpage>600</fpage><lpage>608</lpage><pub-id pub-id-type="doi">10.1055/a-2091-1162</pub-id><pub-id pub-id-type="medline">37164327</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Laurentiev</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>DH</given-names> </name><name name-style="western"><surname>Mahesri</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Identifying functional status impairment in people living with dementia through natural language processing of clinical documents: cross-sectional study</article-title><source>J Med Internet Res</source><year>2024</year><month>02</month><day>13</day><volume>26</volume><fpage>e47739</fpage><pub-id pub-id-type="doi">10.2196/47739</pub-id><pub-id pub-id-type="medline">38349732</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sim</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Horan</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Baker</surname><given-names>JN</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>IC</given-names> </name></person-group><article-title>Using natural language processing to analyze unstructured patient-reported outcomes data derived from electronic health records for cancer populations: a systematic review</article-title><source>Expert Rev Pharmacoecon Outcomes Res</source><year>2024</year><month>04</month><volume>24</volume><issue>4</issue><fpage>467</fpage><lpage>475</lpage><pub-id pub-id-type="doi">10.1080/14737167.2024.2322664</pub-id><pub-id pub-id-type="medline">38383308</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Poulsen</surname><given-names>MN</given-names> </name><name name-style="western"><surname>Freda</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Troiani</surname><given-names>V</given-names> </name><name name-style="western"><surname>Mowery</surname><given-names>DL</given-names> </name></person-group><article-title>Developing a framework to infer opioid use disorder severity from clinical notes to inform natural language processing methods: characterization study</article-title><source>JMIR Ment Health</source><year>2024</year><month>01</month><day>15</day><volume>11</volume><issue>1</issue><fpage>e53366</fpage><pub-id pub-id-type="doi">10.2196/53366</pub-id><pub-id pub-id-type="medline">38224481</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Afshar</surname><given-names>M</given-names> </name><name name-style="western"><surname>Adelaine</surname><given-names>S</given-names> </name><name name-style="western"><surname>Resnik</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Deployment of real-time natural language processing and deep learning clinical decision support in the electronic health record: pipeline implementation for an opioid misuse screener in hospitalized adults</article-title><source>JMIR Med Inform</source><year>2023</year><month>04</month><day>20</day><volume>11</volume><issue>1</issue><fpage>e44977</fpage><pub-id pub-id-type="doi">10.2196/44977</pub-id><pub-id pub-id-type="medline">37079367</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wals Zurita</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Miras del Rio</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ugarte Ruiz de Aguirre</surname><given-names>N</given-names> </name><etal/></person-group><article-title>The transformative potential of large language models in mining electronic health records data: content analysis</article-title><source>JMIR Med Inform</source><year>2025</year><month>01</month><day>2</day><volume>13</volume><issue>1</issue><fpage>e58457</fpage><pub-id pub-id-type="doi">10.2196/58457</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goulet</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Kerns</surname><given-names>RD</given-names> </name><name name-style="western"><surname>Bair</surname><given-names>M</given-names> </name><etal/></person-group><article-title>The musculoskeletal diagnosis cohort: examining pain and pain care among veterans</article-title><source>Pain</source><year>2016</year><month>08</month><volume>157</volume><issue>8</issue><fpage>1696</fpage><lpage>1703</lpage><pub-id pub-id-type="doi">10.1097/j.pain.0000000000000567</pub-id><pub-id pub-id-type="medline">27023420</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luo</surname><given-names>W</given-names> </name><name name-style="western"><surname>Phung</surname><given-names>D</given-names> </name><name name-style="western"><surname>Tran</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Guidelines for developing and reporting machine learning predictive models in biomedical research: a multidisciplinary view</article-title><source>J Med Internet Res</source><year>2016</year><month>12</month><day>16</day><volume>18</volume><issue>12</issue><fpage>e323</fpage><pub-id pub-id-type="doi">10.2196/jmir.5870</pub-id><pub-id pub-id-type="medline">27986644</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Moon</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Recommended practices and ethical considerations for natural language processing-assisted observational research: a scoping review</article-title><source>Clin Transl Sci</source><year>2023</year><month>03</month><volume>16</volume><issue>3</issue><fpage>398</fpage><lpage>411</lpage><pub-id pub-id-type="doi">10.1111/cts.13463</pub-id><pub-id pub-id-type="medline">36478394</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>South</surname><given-names>B</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Leng</surname><given-names>J</given-names> </name><name name-style="western"><surname>Forbush</surname><given-names>T</given-names> </name><name name-style="western"><surname>DuVall</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chapman</surname><given-names>W</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Cohen</surname><given-names>KB</given-names> </name><name name-style="western"><surname>Demner-Fushman</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ananiadou</surname><given-names>S</given-names> </name><name name-style="western"><surname>Webber</surname><given-names>B</given-names> </name><name name-style="western"><surname>Tsujii</surname><given-names>J</given-names> </name><name name-style="western"><surname>Pestian</surname><given-names>J</given-names> </name></person-group><article-title>A prototype tool set to support machine-assisted annotation</article-title><conf-name>BioNLP: Proceedings of the 2012 Workshop on Biomedical Natural Language Processing Montr&#x00E9;al</conf-name><conf-date>Jun 8, 2012</conf-date><conf-loc>Montr&#x00E9;al, Canada</conf-loc></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krebs</surname><given-names>EE</given-names> </name><name name-style="western"><surname>Carey</surname><given-names>TS</given-names> </name><name name-style="western"><surname>Weinberger</surname><given-names>M</given-names> </name></person-group><article-title>Accuracy of the pain numeric rating scale as a screening test in primary care</article-title><source>J Gen Intern Med</source><year>2007</year><month>10</month><volume>22</volume><issue>10</issue><fpage>1453</fpage><lpage>1458</lpage><pub-id pub-id-type="doi">10.1007/s11606-007-0321-2</pub-id><pub-id pub-id-type="medline">17668269</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Williams</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Davies</surname><given-names>HTO</given-names> </name><name name-style="western"><surname>Chadury</surname><given-names>Y</given-names> </name></person-group><article-title>Simple pain rating scales hide complex idiosyncratic meanings</article-title><source>Pain</source><year>2000</year><month>04</month><volume>85</volume><issue>3</issue><fpage>457</fpage><lpage>463</lpage><pub-id pub-id-type="doi">10.1016/S0304-3959(99)00299-7</pub-id><pub-id pub-id-type="medline">10781919</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huber</surname><given-names>A</given-names> </name><name name-style="western"><surname>Suman</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Rendo</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Biasi</surname><given-names>G</given-names> </name><name name-style="western"><surname>Marcolongo</surname><given-names>R</given-names> </name><name name-style="western"><surname>Carli</surname><given-names>G</given-names> </name></person-group><article-title>Dimensions of &#x201C;unidimensional&#x201D; ratings of pain and emotions in patients with chronic musculoskeletal pain</article-title><source>Pain</source><year>2007</year><month>08</month><volume>130</volume><issue>3</issue><fpage>216</fpage><lpage>224</lpage><pub-id pub-id-type="doi">10.1016/j.pain.2006.11.014</pub-id><pub-id pub-id-type="medline">17240067</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eyre</surname><given-names>H</given-names> </name><name name-style="western"><surname>Chapman</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Peterson</surname><given-names>KS</given-names> </name><etal/></person-group><article-title>Launching into clinical space with medspaCy: a new clinical text processing toolkit in Python</article-title><source>AMIA Annu Symp Proc</source><year>2021</year><volume>2021</volume><issue>438&#x2013;447</issue><fpage>438</fpage><lpage>447</lpage><pub-id pub-id-type="medline">35308962</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Honnibal</surname><given-names>M</given-names> </name><name name-style="western"><surname>Montani</surname><given-names>I</given-names> </name><name name-style="western"><surname>Landeghem</surname><given-names>S</given-names> </name><name name-style="western"><surname>Boyd</surname><given-names>A</given-names> </name></person-group><source>spaCy: industrial-strength natural language processing in Python</source><year>2020</year><access-date>2024-08-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://spacy.io">https://spacy.io</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>BCColemanVA/OutcomeMeasure-NLP</article-title><source>GitHub</source><access-date>2024-09-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/BCColemanVA/OutcomeMeasure-NLP">https://github.com/BCColemanVA/OutcomeMeasure-NLP</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Himelfarb</surname><given-names>I</given-names> </name><name name-style="western"><surname>Hyland</surname><given-names>JK</given-names> </name><name name-style="western"><surname>Ouzts</surname><given-names>NE</given-names> </name><etal/></person-group><article-title>Practice analysis of chiropractic 2020: a project report, survey analysis, and summary of the practice of chiropractic within the United States</article-title><source>National Board of Chiropractic Examiners</source><year>2020</year><access-date>2025-01-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.nbce.org/wp-content/uploads/2020-Practice-Analysis-of-Chiropractic-2020-3.pdf">https://www.nbce.org/wp-content/uploads/2020-Practice-Analysis-of-Chiropractic-2020-3.pdf</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holmes</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Bishop</surname><given-names>FL</given-names> </name><name name-style="western"><surname>Newell</surname><given-names>D</given-names> </name><name name-style="western"><surname>Field</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lewith</surname><given-names>G</given-names> </name></person-group><article-title>Chiropractors&#x2019; views on the use of patient-reported outcome measures in clinical practice: a qualitative study</article-title><source>Chiropr Man Therap</source><year>2018</year><volume>26</volume><issue>1</issue><fpage>50</fpage><pub-id pub-id-type="doi">10.1186/s12998-018-0219-6</pub-id><pub-id pub-id-type="medline">30568787</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Corcoran</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Peterson</surname><given-names>DR</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Moran</surname><given-names>EA</given-names> </name><name name-style="western"><surname>Lisi</surname><given-names>AJ</given-names> </name></person-group><article-title>Characteristics and productivity of the chiropractic workforce of the Veterans Health Administration</article-title><source>Chiropr Man Therap</source><year>2022</year><month>04</month><day>11</day><volume>30</volume><issue>1</issue><fpage>18</fpage><pub-id pub-id-type="doi">10.1186/s12998-022-00429-1</pub-id><pub-id pub-id-type="medline">35410303</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Burdick</surname><given-names>R</given-names> </name><name name-style="western"><surname>Corcoran</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Lisi</surname><given-names>A</given-names> </name></person-group><article-title>The rate of use of Veterans Affairs chiropractic care: a 5-year analysis</article-title><source>Chiropr Man Therap</source><year>2022</year><month>01</month><day>21</day><volume>30</volume><issue>1</issue><fpage>4</fpage><pub-id pub-id-type="doi">10.1186/s12998-022-00413-9</pub-id><pub-id pub-id-type="medline">35062971</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Roytman</surname><given-names>GR</given-names> </name><name name-style="western"><surname>Coleman</surname><given-names>BC</given-names> </name><name name-style="western"><surname>Corcoran</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Goertz</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Long</surname><given-names>CR</given-names> </name><name name-style="western"><surname>Lisi</surname><given-names>AJ</given-names> </name></person-group><article-title>Changes in the use of telehealth and face-to-face chiropractic care in the Department of Veterans Affairs before and after the COVID-19 pandemic</article-title><source>J Manipulative Physiol Ther</source><year>2021</year><month>09</month><volume>44</volume><issue>7</issue><fpage>584</fpage><lpage>590</lpage><pub-id pub-id-type="doi">10.1016/j.jmpt.2021.12.002</pub-id><pub-id pub-id-type="medline">35249749</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tamang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Humbert-Droz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gianfrancesco</surname><given-names>M</given-names> </name><name name-style="western"><surname>Izadi</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Schmajuk</surname><given-names>G</given-names> </name><name name-style="western"><surname>Yazdany</surname><given-names>J</given-names> </name></person-group><article-title>Practical considerations for developing clinical natural language processing systems for population health management and measurement</article-title><source>JMIR Med Inform</source><year>2023</year><month>01</month><day>3</day><volume>11</volume><issue>1</issue><fpage>e37805</fpage><pub-id pub-id-type="doi">10.2196/37805</pub-id><pub-id pub-id-type="medline">36595345</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Patient-reported outcome measure (PROM) span match and note frequencies in annotation set and full text corpus.</p><media xlink:href="medinform_v13i1e66466_app1.docx" xlink:title="DOCX File, 24 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Supplemental methods describing detailed architectures of statistical and machine learning models.</p><media xlink:href="medinform_v13i1e66466_app2.docx" xlink:title="DOCX File, 21 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Trends in patient-reported outcome measure use, by unique patient and visit, across the study period, by fiscal year (FY) and month. Patient-reported outcome measure use (PROM +) identified based on note (visit) categorization output from the rule-based natural language processing (NLP) model. Individual patients may be counted in multiple months and fiscal years.</p><media xlink:href="medinform_v13i1e66466_app3.docx" xlink:title="DOCX File, 26 KB"/></supplementary-material></app-group></back></article>