<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn></journal-meta><article-meta><article-id pub-id-type="publisher-id">45105</article-id><article-id pub-id-type="doi">10.2196/45105</article-id><title-group><article-title>Identifying Risk Factors Associated With Lower Back Pain in Electronic Medical Record Free Text: Deep Learning Approach Using Clinical Note Annotations</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Jaiswal</surname><given-names>Aman</given-names></name><degrees>BTech</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Katz</surname><given-names>Alan</given-names></name><degrees>MBChB, MSc</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nesca</surname><given-names>Marcello</given-names></name><degrees>BCom, BA, MSc</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Milios</surname><given-names>Evangelos</given-names></name><degrees>EE, SM, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Faculty of Computer Science, Dalhousie University</institution>, <addr-line>Halifax</addr-line><addr-line>NS</addr-line>, <country>Canada</country></aff><aff id="aff2"><institution>Manitoba Centre for Health Policy, Department of Community Health Sciences, University of Manitoba</institution>, <addr-line>Winnipeg</addr-line><addr-line>MB</addr-line>, <country>Canada</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Lovis</surname><given-names>Christian</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Liu</surname><given-names>Gengbo</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Wang</surname><given-names>Haoze</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Abeysinghe</surname><given-names>Rashmie</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Alan Katz, MBChB, MSc<email>Alan.katz@umanitoba.ca</email></corresp></author-notes><pub-date pub-type="collection"><year>2023</year></pub-date><pub-date pub-type="epub"><day>9</day><month>8</month><year>2023</year></pub-date><volume>11</volume><elocation-id>e45105</elocation-id><history><date date-type="received"><day>15</day><month>12</month><year>2022</year></date><date date-type="rev-recd"><day>11</day><month>05</month><year>2023</year></date><date date-type="accepted"><day>03</day><month>06</month><year>2023</year></date></history><copyright-statement>&#x00A9; Aman Jaiswal, Alan Katz, Marcello Nesca, Evangelos Milios. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 09.08.2023. </copyright-statement><copyright-year>2023</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2023/1/e45105"/><abstract><sec><title>Background</title><p>Lower back pain is a common weakening condition that affects a large population. It is a leading cause of disability and lost productivity, and the associated medical costs and lost wages place a substantial burden on individuals and society. Recent advances in artificial intelligence and natural language processing have opened new opportunities for the identification and management of risk factors for lower back pain. In this paper, we propose and train a deep learning model on a data set of clinical notes that have been annotated with relevant risk factors, and we evaluate the model&#x2019;s performance in identifying risk factors in new clinical notes.</p></sec><sec><title>Objective</title><p>The primary objective is to develop a novel deep learning approach to detect risk factors for underlying disease in patients presenting with lower back pain in clinical encounter notes. The secondary objective is to propose solutions to potential challenges of using deep learning and natural language processing techniques for identifying risk factors in electronic medical record free text and make practical recommendations for future research in this area.</p></sec><sec sec-type="methods"><title>Methods</title><p>We manually annotated clinical notes for the presence of six risk factors for severe underlying disease in patients presenting with lower back pain. Data were highly imbalanced, with only 12% (n=296) of the annotated notes having at least one risk factor. To address imbalanced data, a combination of semantic textual similarity and regular expressions was used to further capture notes for annotation. Further analysis was conducted to study the impact of downsampling, binary formulation of multi-label classification, and unsupervised pretraining on classification performance.</p></sec><sec sec-type="results"><title>Results</title><p>Of 2749 labeled clinical notes, 347 exhibited at least one risk factor, while 2402 exhibited none. The initial analysis shows that downsampling the training set to equalize the ratio of clinical notes with and without risk factors improved the macro&#x2013;area under the receiver operating characteristic curve (AUROC) by 2%. The Bidirectional Encoder Representations from Transformers (BERT) model improved the macro-AUROC by 15% over the traditional machine learning baseline. In experiment 2, the proposed BERT&#x2013;convolutional neural network (CNN) model for longer texts improved (4% macro-AUROC) over the BERT baseline, and the multitask models are more stable for minority classes. In experiment 3, domain adaptation of BERTCNN using masked language modeling improved the macro-AUROC by 2%.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Primary care clinical notes are likely to require manipulation to perform meaningful free-text analysis. The application of BERT models for multi-label classification on downsampled annotated clinical notes is useful in detecting risk factors suggesting an indication for imaging for patients with lower back pain.</p></sec></abstract><kwd-group><kwd>machine learning</kwd><kwd>lower back pain</kwd><kwd>natural language processing</kwd><kwd>semantic textual similarity</kwd><kwd>electronic medical records</kwd><kwd>risk factors</kwd><kwd>deep learning</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Lower back pain (LBP) is recognized as a common disability worldwide [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. While there is no agreed-upon definition of LBP, in a systematic review, it was primarily defined through routinely collected electronic health data, which include <italic>International Classification of Diseases, Ninth Revision</italic> (<italic>ICD-9</italic>) and <italic>International Statistical Classification of Diseases, Tenth Revision</italic> (<italic>ICD-10</italic>) codes [<xref ref-type="bibr" rid="ref4">4</xref>]. One estimate of the burden of LBP is that 13% of adults in the United States live with LBP, while in Canada, among those living with chronic pain, 50.9% identified the location of their pain in the upper or lower back [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. In a systematic review [<xref ref-type="bibr" rid="ref4">4</xref>], the mean prevalence of LBP among the studies collected ranged between 1.4% and 15.6%.</p><p>While the burden of LBP remains high, it is important to understand the indicators for possible serious underlying causes that require imaging, also known as &#x201C;risk factors&#x201D; [<xref ref-type="bibr" rid="ref5">5</xref>]. According to Choosing Wisely Canada, risk factors may include [<xref ref-type="bibr" rid="ref6">6</xref>]:</p><list list-type="bullet"><list-item><p>A history of cancer</p></list-item><list-item><p>Unexplained weight loss</p></list-item><list-item><p>A recent infection</p></list-item><list-item><p>Fever</p></list-item><list-item><p>Loss of bowel or bladder control</p></list-item><list-item><p>Abnormal reflexes or the loss of muscle power in the legs</p></list-item></list><p>Radiological (diagnostic) imaging includes procedures such as x-rays, computed tomography scans, or magnetic resonance imaging scans. Recommendations from clinical practice guidelines state that, unless risk factors are present, radiological imaging is not needed for patients with LBP [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Moreover, ordering radiological imaging when it is unnecessary puts the patient at risk for radiation exposure and other negative consequences [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Despite these recommendations, patients with LBP are frequently subjected to unnecessary imaging [<xref ref-type="bibr" rid="ref8">8</xref>].</p><p>The data for this study in clinical practice uses electronic medical records (EMRs). The widespread use of this IT has introduced the feasibility of analyzing large numbers of clinical notes without having to manually access paper charts and perform the analyses using automated approaches such as natural language processing (NLP) [<xref ref-type="bibr" rid="ref9">9</xref>]. The Canadian Primary Care Sentinel Surveillance Network [<xref ref-type="bibr" rid="ref10">10</xref>] routinely extracts clinical information such as clinical encounter notes, note type, and the date of the notes from primary care clinical practices with the permission of the providers. Applying NLP methods to EMR data makes it possible to detect LBP risk factors and understand the use of imaging in this common clinical presentation.</p><p>Since the introduction of transformers in 2019 [<xref ref-type="bibr" rid="ref11">11</xref>], which are large language models that can be fine-tuned for specific tasks, deep language models have achieved a significant milestone in natural language understanding. The transfer learning paradigm of unsupervised pretraining and fine-tuning [<xref ref-type="bibr" rid="ref12">12</xref>] using Bidirectional Encoder Representations from Transformers (BERT) has reduced the requirement for large labeled data sets to achieve state-of-the-art analytic performance. Previous research [<xref ref-type="bibr" rid="ref13">13</xref>] has explored the use of topic models and deep neural networks to automatically distinguish acute LBP episodes using free-text clinical notes.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><p>The following steps were undertaken to achieve our goal: preparation of EMR data, EMR annotation process, addressing imbalanced data, and application of the proposed model.</p><sec id="s2-1"><title>Preparation of EMR Data</title><p>We accessed a random sample of deidentified EMR data, and using the regular expressions created in SAS (SAS Institute), we identified a cohort of patients with any indication of LBP. Notes were further filtered by note type to only include provider-generated clinical notes. The data were then split randomly into three files. Ethics approval for the study was provided by the University of Manitoba Health Research Ethics Board and the Health Information Privacy Committee.</p></sec><sec id="s2-2"><title>EMR Annotation Process</title><p>Six medical students reviewed the EMR notes to identify the six LBP risk factors in accordance to Choosing Wisely Canada. They worked in teams of two to validate the application of the inclusion and exclusion criteria, each note being annotated by two students. The inclusion criteria listed in <xref ref-type="other" rid="box1">Textbox 1</xref> were the presence of specific clinical notes suggestive of at least one of the six risk factors indicating the need for imaging. The exclusion criteria were the presence of clinical conditions that could lead to symptoms that may be confused with any of the underlying conditions represented by the six risk factors and clinical notes that do not represent relevant visits.</p><boxed-text id="box1"><title> Inclusion and exclusion criteria for risk factors.</title><p><bold>Inclusion criteria</bold></p><list list-type="bullet"><list-item><p>Lower extremities for loss of muscle function</p></list-item><list-item><p>Positive straight leg test</p></list-item><list-item><p>Nerve impingement</p></list-item><list-item><p>Sciatica, but need to confirm radiculopathy</p></list-item><list-item><p>Incontinence related to a nerve issue</p></list-item><list-item><p>If back pain has improved</p></list-item><list-item><p>Follow-up discussions of imaging results</p></list-item><list-item><p>Saddle anesthesia</p></list-item><list-item><p>Notes that do not specify upper vs lower back pain</p></list-item></list><p><bold>Exclusion criteria</bold></p><list list-type="bullet"><list-item><p>HIV is not a relevant infection (regardless of viral load and strain/location)</p></list-item><list-item><p>Urinary symptoms other than incontinence are neither risk factors nor symptoms of relevant infection</p></list-item><list-item><p>Shingles as an infection if it is a lumbar dermatome</p></list-item><list-item><p>Nocturnal enuresis</p></list-item><list-item><p>Degenerative diseases or osteoarthritis with an indication of back pain</p></list-item><list-item><p>Copy/pasted imaging results onto the electronic medical record note</p></list-item><list-item><p>Notes that mention previous or resolved back pain</p></list-item><list-item><p>Well child/adolescent visit</p></list-item></list></boxed-text><p>An experienced clinician (AK) arbitrated any disagreements between student annotators. This supported the inclusion of correctly labeled records in the classification model. For the annotation process, we used Microsoft Forms (Microsoft Corporation), which enabled us to collect the relevant data in a systematic and organized manner. Specifically, the output from Microsoft Forms was linked to a secure CSV file containing the clinical notes, using a unique identifier to facilitate data merging and subsequent analysis.</p></sec><sec id="s2-3"><title>Addressing Imbalanced Data</title><p>Our data collection process consisted of two rounds. In the first round, we established the initial distribution of risk factors. Analysis of this round revealed an imbalanced distribution of labels, a well-known factor that can impact the performance of deep learning methods [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Specifically, we observed an imbalance in both the infrequent occurrence of individual risk factors and the high frequency of the &#x201C;null class,&#x201D; which denotes the absence of risk factors.</p><p>To address this imbalance, we adopted a 2-pronged approach. First, we collected additional clinical notes specifically targeting minority risk factors. Second, we downsampled the majority of notes with &#x201C;null class.&#x201D; Notably, the initial data set lacked any clinical notes for unexplained weight loss. <xref ref-type="table" rid="table1">Table 1</xref> depicts the distribution of risk factors after the first labeling round, revealing that only 12% (n=296) of the 2487 annotated notes exhibited any risk factors.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Risk factor distribution after the first labeling round. Zero notes exhibit the unexplained weight loss risk factor.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Risk factors</td><td align="left" valign="top">Annotations (round 1), n</td></tr></thead><tbody><tr><td align="left" valign="top">Cancer</td><td align="left" valign="top">26</td></tr><tr><td align="left" valign="top">Weight<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Fever</td><td align="left" valign="top">8</td></tr><tr><td align="left" valign="top">Infection</td><td align="left" valign="top">8</td></tr><tr><td align="left" valign="top">Bowel</td><td align="left" valign="top">9</td></tr><tr><td align="left" valign="top">Abreflex</td><td align="left" valign="top">233</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Zero notes exhibit the unexplained weight loss risk factor.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-4"><title>Acquiring More Notes to Annotate</title><p>Prior studies have explored methods for addressing the challenge of obtaining sufficient data for training [<xref ref-type="bibr" rid="ref16">16</xref>]. To acquire clinical notes for labeling that are more likely to exhibit a minority risk factor, we used unsupervised semantic textual similarity (STS). It is a ranking task where given a text query and a list of clinical notes, the STS model ranks the clinical notes that are semantically like the query. We trained two unsupervised STS models, Transformers and Sequential Denoising Auto-Encoder (TSDAE) [<xref ref-type="bibr" rid="ref17">17</xref>] and Simple Contrastive Learning of Sentence Embeddings (SimCSE) [<xref ref-type="bibr" rid="ref18">18</xref>], implemented using the SentenceTransformer Python library [<xref ref-type="bibr" rid="ref19">19</xref>]. To rank the unlabeled clinical notes (ie, 55,000 notes with any LBP indication), we formed the queries using rationales, collected as part of the first labeling round. Here, we refer to &#x201C;rationale&#x201D; as an extracted snippet or text from the clinical note the annotators highlighted as evidence for a risk factor.</p><p><xref ref-type="fig" rid="figure1">Figure 1</xref> illustrates the STS sampling process with numbered steps. First, we group the clinical notes based on the exhibited risk factors. We then concatenate the rationales for each group of clinical notes to form queries and rank the unlabeled clinical notes using the unsupervised STS models. If the rationales were unavailable from the first labeling round (eg, &#x201C;weight loss&#x201D;), we used risk factor definition or custom text as the query. We selected the top K notes from the ranked clinical notes, where &#x201C;K&#x201D; is set within the 10-50 range. We further filtered noisy outputs using phrases such as &#x201C;has fever,&#x201D; &#x201C;has back pain,&#x201D; and &#x201C;lost weight.&#x201D; Finally, we iterated the process for each risk factor and provided the selected notes for the second labeling round.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Semantic textual similarity sampling process, followed for the second labeling round. STS: semantic textual similarity.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v11i1e45105_fig01.png"/></fig><p>This approach helped maximize annotations for clinical notes that exhibited risk factors. <xref ref-type="table" rid="table2">Table 2</xref> depicts the complete distribution of risk factors after both rounds of labeling. Of the 262 annotated clinical notes in the second round, 19.5% (n=51) of the clinical notes exhibited risk factors, in contrast to 12% (n=296) in the first round.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Risk factor distribution after both rounds of labeling.<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Risk factors</td><td align="left" valign="top">Annotations (round 1 + 2), n</td></tr></thead><tbody><tr><td align="left" valign="top">Cancer</td><td align="left" valign="top">53</td></tr><tr><td align="left" valign="top">Weight</td><td align="left" valign="top">32</td></tr><tr><td align="left" valign="top">Fever</td><td align="left" valign="top">17</td></tr><tr><td align="left" valign="top">Infection</td><td align="left" valign="top">9</td></tr><tr><td align="left" valign="top">Bowel</td><td align="left" valign="top">9</td></tr><tr><td align="left" valign="top">Abreflex</td><td align="left" valign="top">236</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>This includes 2487 notes from the first round and 262 notes from the second round. In the second labeling round, we collected 32 clinical notes for the unexplained weight loss risk factor.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-5"><title>Treating Class Imbalance With Downsampling</title><p>Following the second round of labeling, a significant class imbalance was observed in the resulting distribution of labels. Specifically, out of the total 2749 annotated clinical notes, only 347 were labeled as having one or more risk factors, while the remaining 2402 notes were labeled with no risk factor. To mitigate this issue, two common approaches are oversampling the minority class or downsampling the majority class. In a multi-label data set, each instance can be assigned to one or more classes. For instance, in the case of clinical notes, they may have one or more risk factors, making it challenging to oversample the minority class. This is because generating synthetic instances requires randomly selecting a minority clinical note that may have a combination of labels rather than a single label. However, this approach may bias the model toward the minority class and lead to overfitting. Consequently, we opted for downsampling the majority class to balance the class distribution and prevent the model from being biased toward the majority class.</p><p>Specifically, a subset of the clinical notes with &#x201C;no risk factors&#x201D; was randomly selected to match the number of clinical notes with &#x201C;any risk factor.&#x201D; This approach aimed to balance the class distribution and enable the model to learn from both positive and negative examples. To assess the effectiveness of the downsampling strategy, we conducted a comparative analysis of the model&#x2019;s performance with and without downsampling.</p></sec><sec id="s2-6"><title>Application of Proposed Model</title><p>Transformer-based BERT [<xref ref-type="bibr" rid="ref11">11</xref>] models can be fine-tuned for detecting risk factors in clinical notes using a small labeled data set. The requirement for large labeled data sets is eased with models that are pretrained on large clinical text. In this work, we used BlueBERT [<xref ref-type="bibr" rid="ref20">20</xref>] as our back-end model that is pretrained on PubMed abstracts and clinical notes from the Medical Information Mart for Intensive Care (MIMIC-III) data set [<xref ref-type="bibr" rid="ref21">21</xref>]. However, BERT models are limited to a maximum input length of 512 tokens. The length of clinical notes in our data set ranges from 7 to 1400 tokens with 8% (n=221) of the notes having more than 512 tokens. To overcome this limitation, we propose a novel architecture called BERT&#x2013;convolutional neural network (CNN) that chunks the inputs and processes them using convolution layers. The proposed chunking method is illustrated in <xref ref-type="fig" rid="figure2">Figure 2</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>BERT input chunking: a clinical note is first separated into chunks of 512 tokens. Each chunk is then independently processed by the BERT-based back-end model. The chunk embedding is obtained by averaging the token embeddings from the last five layers of BERT. Finally, all the chunk embeddings are concatenated and processed using convolution layers, as defined by Kim [<xref ref-type="bibr" rid="ref22">22</xref>]. Note: The sample clinical note does not belong to the real data set. BERT: Bidirectional Encoder Representations from Transformers.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v11i1e45105_fig02.png"/></fig></sec><sec id="s2-7"><title>Experimental Setup</title><p>The study used a repeated 2-fold cross-validation approach with two repetitions to improve the estimated performance of the machine learning models. As the data set was multi-label, we adopted the iterative stratification method [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>] provided by the scikit-multilearn library [<xref ref-type="bibr" rid="ref25">25</xref>] to generate stratified splits for the folds. This ensured that every split had a similar distribution of risk factors. The 2-fold cross-validation was repeated twice, resulting in a total of four runs. Wherever applicable, we implemented the downsampling technique (as described earlier) on the training set. Our results are reported in terms of the area under the receiver operating characteristic curve (AUROC) of individual risk factors and their macroaverage across the folds. <xref ref-type="table" rid="table3">Table 3</xref> reports the frequency of positive risk factors in each split of the folds.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Frequency of positive risk factors in train-test splits. We report the approximate counts of each risk factor across folds. Note: the counts do not include the clinical notes with no risk factors, which are approximately 1198 and 1195 for the train and test split, respectively.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Positive risk factors</td><td align="left" valign="bottom">Train split (n=1374 notes), n</td><td align="left" valign="bottom">Test split (n=1375 notes), n</td></tr></thead><tbody><tr><td align="left" valign="top">Cancer</td><td align="left" valign="top">26</td><td align="left" valign="top">27</td></tr><tr><td align="left" valign="top">Weight</td><td align="left" valign="top">16</td><td align="left" valign="top">16</td></tr><tr><td align="left" valign="top">Fever</td><td align="left" valign="top">8</td><td align="left" valign="top">9</td></tr><tr><td align="left" valign="top">Infection</td><td align="left" valign="top">4</td><td align="left" valign="top">5</td></tr><tr><td align="left" valign="top">Bowel</td><td align="left" valign="top">4</td><td align="left" valign="top">5</td></tr><tr><td align="left" valign="top">Abreflex</td><td align="left" valign="top">118</td><td align="left" valign="top">118</td></tr></tbody></table></table-wrap></sec><sec id="s2-8"><title>Ethics Approval</title><p>The study received ethics approval from the Health Research Ethics Board of the University of Manitoba (study number HS20263; review number H2016:408).</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>In this section, we report the analysis of the data collection and classification performance of the transformer-based models with different configurations, including traditional machine learning and BERT-based baselines. The transformer-based models were trained for 10 epochs each, with a learning rate ranging from 5e-05 to 6e-5. Unless specified otherwise, all the BERT-based models use BlueBERT [<xref ref-type="bibr" rid="ref20">20</xref>] as the back end.</p></sec><sec id="s3-2"><title>Data Collection Analysis</title><p>Each annotation was added to the clinical note level independently. These notes are associated with patient- and site-level information, allowing for further analysis based on the patient and site as the unit of analysis. <xref ref-type="table" rid="table4">Table 4</xref> presents an analysis of the LBP characteristics reported in the collected data, using notes, patient, and site ID as the units of analysis. This enables a multilevel analysis of the reported characteristics, providing a detailed understanding of their distribution across various units of analysis.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Lower back pain characteristics gathered from collected data, with notes, patient, and site ID each serving as the units of analysis.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Unit of analysis</td><td align="left" valign="bottom" colspan="2">Values, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4"><bold>Notes (N=2749)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">History of cancer</td><td align="left" valign="top" colspan="2">53 (1.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Signs of fever</td><td align="left" valign="top" colspan="2">17 (0.6)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Unexplained weight loss</td><td align="left" valign="top" colspan="2">32 (1.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Recent infection</td><td align="left" valign="top" colspan="2">9 (0.3)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Loss of bowel or bladder control</td><td align="left" valign="top" colspan="2">9 (0.3)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Abnormal reflexes</td><td align="left" valign="top" colspan="2">236 (8.6)</td></tr><tr><td align="left" valign="top" colspan="4"><bold>Patients (N=1943)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">History of cancer</td><td align="left" valign="top" colspan="2">40 (2.1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Signs of fever</td><td align="left" valign="top" colspan="2">17 (0.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Unexplained weight loss</td><td align="left" valign="top" colspan="2">32 (1.6)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Recent infection</td><td align="left" valign="top" colspan="2">9 (0.5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Loss of bowel or bladder control</td><td align="left" valign="top" colspan="2">8 (0.4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Abnormal reflexes</td><td align="left" valign="top" colspan="2">201 (10.3)</td></tr><tr><td align="left" valign="top" colspan="4"><bold>Site ID (N=22)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">History of cancer</td><td align="left" valign="top" colspan="2">12 (55)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Signs of fever</td><td align="left" valign="top" colspan="2">11 (50)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Unexplained weight loss</td><td align="left" valign="top" colspan="2">12 (55)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Recent infection</td><td align="left" valign="top" colspan="2">5 (23)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Loss of bowel or bladder control</td><td align="left" valign="top" colspan="2">7 (32)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Abnormal reflexes</td><td align="left" valign="top" colspan="2">13 (59)</td></tr></tbody></table></table-wrap><p>A total of 2749 clinical notes were annotated to collect information on risk factors for LBP. The most reported risk factor was &#x201C;abnormal reflexes,&#x201D; with 236 annotations, followed by &#x201C;history of cancer&#x201D; with 53 annotations. Out of the 1943 patients covered by the annotation process, only 40 were labeled with a &#x201C;history of cancer,&#x201D; accounting for 2.1% (n=40) of the total patients. More than 10% of patients were reported with &#x201C;abnormal reflexes,&#x201D; while &#x201C;recent infection&#x201D; and &#x201C;loss of bowel control&#x201D; were reported in only 9 and 8 patients, respectively.</p><p>The analysis of clinical sites associated with the clinical notes revealed that 12 of 22 sites reported at least two risk factors, with &#x201C;recent infection&#x201D; and &#x201C;loss of bowel or bladder control&#x201D; being the least commonly reported risk factors, mentioned in only 5 and 7 clinical sites, respectively. These findings indicate that &#x201C;abnormal reflexes&#x201D; is the most reported characteristic of LBP across all units of analysis, with &#x201C;history of cancer,&#x201D; &#x201C;unexplained weight loss,&#x201D; and &#x201C;signs of fever&#x201D; being reported less frequently. The frequency of &#x201C;loss of bowel or bladder control&#x201D; and &#x201C;recent infection&#x201D; was relatively low across all units of analysis, indicating that these characteristics may not be as common as others in cases of LBP. The distribution of these characteristics varies across different units of analysis, which highlights the importance of examining LBP characteristics at multiple levels.</p></sec><sec id="s3-3"><title>Performance With and Without Downsampling</title><p>In our initial analysis, we compared the impact of downsampling the training set, as described earlier, on the average and label-wise performance of the models. <xref ref-type="fig" rid="figure3">Figure 3</xref> displays the results of this comparison. We also included a tf-idf (term frequency&#x2013;inverse document frequency) + logistic regression model trained with a multi-output classifier [<xref ref-type="bibr" rid="ref26">26</xref>] as a baseline, which was the best-performing baseline (among 7 candidates, including k-nearest neighbors, naive Bayes, random forest, and models from the scikit-multilearn Library [<xref ref-type="bibr" rid="ref25">25</xref>]). On average, the BERT models performed 15% better than the baseline. Downsampling the training set improved performance by 2% for BERT-Multi models and reduced the SD as reflected by the error bars for minority labels (eg, &#x201C;bowel&#x201D; and &#x201C;fever&#x201D;). Downsampling of the majority class (ie, &#x201C;No Risk factor notes&#x201D;) also helped stabilize the performance of the models, as indicated by the smaller error bars. We used the downsampled training set for further analysis.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Comparison of BERT-Multitask models trained on complete and downsampled data. A tf-idf + logistic regression model trained with a multi-output classifier is included as a baseline. The AUROC for each risk factor and their macroaverage are reported, with the SDs reflected in the error bars. AUROC: area under the receiver operating characteristic curve; BERT: Bidirectional Encoder Representations from Transformers; tf-idf: term frequency&#x2013;inverse document frequency.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v11i1e45105_fig03.png"/></fig></sec><sec id="s3-4"><title>Performance With BERTCNN and Independent Binary Classifiers</title><p>Using the downsampled training set for all the models, we compared the performance of four different models chosen by architecture (BERT, BERTCNN) and task formulation (multitask learning, binary classification). <xref ref-type="fig" rid="figure4">Figure 4</xref> shows the results. The comparison of BERT and BERTCNN highlights the importance of not truncating longer inputs. The comparison of the proposed model (BERTCNN) with their binary variants helps in understanding the trade-off between parameter efficiency and performance. The average AUROC of all the models are comparable, with BERTCNN-Multi performing 4% better than BERT-Multi. The multitask BERT and BERTCNN models match the performance of their binary alternative with six times fewer parameters. When sufficient positive samples are present for a risk factor (eg, abreflex), all the models perform comparably with a low SD. When the samples are insufficient (eg, &#x201C;infection&#x201D; and &#x201C;bowel&#x201D;), the binary models have high SD (indicated by the error bars), as few-samples BERT fine-tuning is known to be unstable [<xref ref-type="bibr" rid="ref27">27</xref>]. In such cases, the multitask models generally produce more stable results, with the BERTCNN-Multi performing 9% better than BERT-Multi. In general, the BERTCNN model can benefit from the extra context found in the complete clinical note to improve prediction performance.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>BERT-Multi, BERT-Binary, BERTCNN-Multi, and BERTCNN-Binary trained on the downsampled training data. The AUROC for each risk factor and their macroaverage are reported, with the SDs reflected in the error bars. AUROC: area under the receiver operating characteristic curve; BERT: Bidirectional Encoder Representations from Transformers; BERTCNN: Bidirectional Encoder Representations from Transformers&#x2013;convolutional neural network.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v11i1e45105_fig04.png"/></fig></sec><sec id="s3-5"><title>Performance With Domain Adaptation Using Unsupervised Training</title><p>The best-performing model can further benefit from pretraining [<xref ref-type="bibr" rid="ref28">28</xref>] the underlying transformer model using the clinical notes. In this analysis, we investigate the effect of domain adaptation using pretraining on classification performance. We used BERTCNN and further pretrained the back-end model (BlueBERT [<xref ref-type="bibr" rid="ref20">20</xref>]) with the complete corpus of relevant clinical notes (N=57,000) for 3 epochs. Two choices for pretraining the BERT architecture were considered: masked language modeling (MLM; BERTCNN-MLM-Multi) [<xref ref-type="bibr" rid="ref12">12</xref>] and causal language modeling (CLM; BERTCNN-CLM-Multi) [<xref ref-type="bibr" rid="ref29">29</xref>]. In addition, we also report results of the recent transformers-based model for long text in the clinical domain, called clinical-longformer [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>], which was pretrained on clinical notes from the MIMIC-III data set [<xref ref-type="bibr" rid="ref21">21</xref>]. Our results, shown in <xref ref-type="fig" rid="figure5">Figure 5</xref>, indicate that the MLM method performed 2% better than no domain adaptation and improved the performance for &#x201C;cancer&#x201D; by 5%. The longformer model further improves performance over MLM by 2%. It is worth noting that while the performance improvement of domain adaptation using MLM [<xref ref-type="bibr" rid="ref32">32</xref>] is not significant, it is comparable to that of the already pretrained BlueBERT [<xref ref-type="bibr" rid="ref20">20</xref>] and clinical-longformer [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>], which were pretrained on a much larger corpus of over 2 million notes.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Effect of domain adaptation using MLM, CLM, and comparison with the clinical-longformer model. The AUROC for each risk factor and their macroaverage are reported, with the SDs reflected in the error bars. AUROC: area under the receiver operating characteristic curve; BERTCNN: Bidirectional Encoder Representations from Transformers&#x2013;convolutional neural network; CLM: causal language modeling; MLM: masked language modeling.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v11i1e45105_fig05.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>The analysis of electronic clinical notes using machine learning techniques provides the opportunity to explore and evaluate clinical care, previously not possible when clinical experts had to read each clinical record. NLP of clinical records is still a relatively new research endeavor that is rapidly evolving. This study encountered and addressed several challenges that are likely to be common in the analysis of clinical notes. For example, the initially collected data were imbalanced, with most clinical notes having no risk factors for cancer, suggesting the need for further investigation of LBP. By sampling additional clinical notes from the unlabeled pool using unsupervised semantic matching techniques for a limited second round of labeling, we captured 7.5% more clinical notes with at least one risk factor. Strategic resampling can decrease bias in multi-label data sets, which substantially helps in classification performance. The analysis comparing multitask learning and binary classification suggests we can match the performance of independent binary classifiers and produce more stable results while using a fraction of the learned parameters required for binary classifiers. This study demonstrates the value of domain adaptation as an additional technique to improve the classification results of transformer-based models and improve clinical free-text classification using unsupervised methods.</p><p>A strength of this study is the comparison of different models and approaches using a random sample of real clinical notes. We compared the BERT-based model, which does not truncate longer clinical notes and uses the complete context to make predictions, to the more commonly used truncated note model. The extensive empirical analysis on the impact of different modeling choices, including comparisons of multitask and single-task learning, resampling of data, and domain adaptation using unsupervised methods for the detection of LBP risk factors in clinical notes, provides guidance for future analysis of clinical text data.</p><p>While the low number of samples for certain risk factors in the test set is a limitation, this was addressed in reporting the AUROC for each individual risk factor, including their macroaverage for each model, and using the repeated k-fold cross-validation approach for better estimation of performance.</p><p>Future research will involve linking the outcomes of imaging studies to the identification of risk factors in this data set. It is anticipated that patients without risk factors would have normal imaging, while those with risk factors should be more likely to have abnormal imaging suggestive of disease requiring further treatment. Those analyses will need to address the imbalance in the data, as a minority of patients have undergone imaging.</p><p>Deep learning models, specifically BERT-based models, are suitable for capturing and detecting risk factors for LBP in clinical notes. Semantic matching techniques are effective during data collection in providing minority samples for labeling and improving data set distribution. The proposed method BERTCNN can be successfully applied for clinical notes that may be longer than the input limit of BERT-based models. Detecting risk factors in clinical notes is better formulated as multitask learning, which is more efficient and provides stable results. Furthermore, transformer-based models are successfully adopted for clinical text using transfer learning and MLM.</p></sec></body><back><ack><p>The authors acknowledge the clinicians and patients whose data were accessed for this study through the Manitoba Primary Care Research Network, a node of the Canadian Sentinel Surveillance Network. The authors also thank medical student annotators Elvina Mukhamedshina, Gem Newman, JaeYeon Park, Mehrin Ahmed, Sue Zhang, and Will Siemens.</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AUROC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb2">BERT</term><def><p>Bidirectional Encoder Representations from Transformers</p></def></def-item><def-item><term id="abb3">BERTCNN</term><def><p>Bidirectional Encoder Representations from Transformers&#x2013;convolutional neural network</p></def></def-item><def-item><term id="abb4">CLM</term><def><p>causal language modeling</p></def></def-item><def-item><term id="abb5">EMR</term><def><p>electronic medical record</p></def></def-item><def-item><term id="abb6"><italic>ICD-10</italic></term><def><p><italic>International Statistical Classification of Diseases, Tenth Revision</italic></p></def></def-item><def-item><term id="abb7"><italic>ICD-9</italic></term><def><p><italic>International Classification of Diseases, Ninth Revision</italic></p></def></def-item><def-item><term id="abb8">LBP</term><def><p>lower back pain</p></def></def-item><def-item><term id="abb9">MIMIC-III</term><def><p>Medical Information Mart for Intensive Care</p></def></def-item><def-item><term id="abb10">MLM</term><def><p>masked language modeling</p></def></def-item><def-item><term id="abb11">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb12">SimCSE</term><def><p>Simple Contrastive Learning of Sentence Embeddings</p></def></def-item><def-item><term id="abb13">STS</term><def><p>semantic textual similarity</p></def></def-item><def-item><term id="abb14">tf-idf</term><def><p>term frequency&#x2013;inverse document frequency</p></def></def-item><def-item><term id="abb15">TSDAE</term><def><p>Transformers and Sequential Denoising Auto-Encoder</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>Centers for Disease Control and Prevention</collab></person-group><article-title>Acute low back pain</article-title><year>2022</year><access-date>2022-06-1</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://web.archive.org/web/20220709154456/https://www.cdc.gov/acute-pain/low-back-pain/index.html">web.archive.org/web/20220709154456/www.cdc.gov/acute-pain/low-back-pain/index.html</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stevans</surname><given-names>JM</given-names></name><name name-style="western"><surname>Delitto</surname><given-names>A</given-names></name><name name-style="western"><surname>Khoja</surname><given-names>SS</given-names></name><name name-style="western"><surname>Patterson</surname><given-names>CG</given-names></name><name name-style="western"><surname>Smith</surname><given-names>CN</given-names></name><name name-style="western"><surname>Schneider</surname><given-names>MJ</given-names></name><etal/></person-group><article-title>Risk factors associated with transition from acute to chronic low back pain in US patients seeking primary care</article-title><source>JAMA Netw Open</source><year>2021</year><month>02</month><day>1</day><volume>4</volume><issue>2</issue><fpage>e2037371</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2020.37371</pub-id><pub-id pub-id-type="medline">33591367</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>MacDougall</surname><given-names>HL</given-names></name><name name-style="western"><surname>George</surname><given-names>SZ</given-names></name><name name-style="western"><surname>Dover</surname><given-names>GC</given-names></name></person-group><article-title>Low back pain treatment by athletic trainers and athletic therapists: BIOMEDICAL or Biopsychosocial orientation?</article-title><source>J Athl Train</source><year>2019</year><month>08</month><day>6</day><volume>54</volume><issue>7</issue><fpage>772</fpage><lpage>779</lpage><pub-id pub-id-type="doi">10.4085/1062-6050-430-17</pub-id><pub-id pub-id-type="medline">31386578</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fatoye</surname><given-names>F</given-names></name><name name-style="western"><surname>Gebrye</surname><given-names>T</given-names></name><name name-style="western"><surname>Odeyemi</surname><given-names>I</given-names></name></person-group><article-title>Real-world incidence and prevalence of low back pain using routinely collected data</article-title><source>Rheumatol Int</source><year>2019</year><month>03</month><day>8</day><volume>39</volume><issue>4</issue><fpage>619</fpage><lpage>626</lpage><pub-id pub-id-type="doi">10.1007/s00296-019-04273-0</pub-id><pub-id pub-id-type="medline">30848349</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chou</surname><given-names>R</given-names></name></person-group><article-title>Low back pain</article-title><source>Ann Intern Med</source><year>2021</year><month>08</month><day>10</day><volume>174</volume><issue>8</issue><fpage>ITC113</fpage><lpage>ITC128</lpage><pub-id pub-id-type="doi">10.7326/AITC202108170</pub-id><pub-id pub-id-type="medline">34370518</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>Choosing Wisely Canada</collab></person-group><article-title>Imaging tests for lower back pain</article-title><year>2022</year><access-date>2022-06-2</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://choosingwiselycanada.org/pamphlet/imaging-tests-for-lower-back-pain/">choosingwiselycanada.org/pamphlet/imaging-tests-for-lower-back-pain/</ext-link></comment></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bach</surname><given-names>SM</given-names></name><name name-style="western"><surname>Holten</surname><given-names>KB</given-names></name></person-group><article-title>Guideline update: what's the best approach to acute low back pain?</article-title><source>J Fam Pract</source><year>2009</year><volume>58</volume><issue>12</issue><fpage>E1</fpage><pub-id pub-id-type="medline">19961812]</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rao</surname><given-names>D</given-names></name><name name-style="western"><surname>Scuderi</surname><given-names>G</given-names></name><name name-style="western"><surname>Scuderi</surname><given-names>C</given-names></name><name name-style="western"><surname>Grewal</surname><given-names>R</given-names></name><name name-style="western"><surname>Sandhu</surname><given-names>SJ</given-names></name></person-group><article-title>The use of imaging in management of patients with low back pain</article-title><source>J Clin Imaging Sci</source><year>2018</year><month>08</month><day>24</day><volume>8</volume><fpage>30</fpage><pub-id pub-id-type="doi">10.4103/jcis.JCIS_16_18</pub-id><pub-id pub-id-type="medline">30197821</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Esteva</surname><given-names>A</given-names></name><name name-style="western"><surname>Robicquet</surname><given-names>A</given-names></name><name name-style="western"><surname>Ramsundar</surname><given-names>B</given-names></name><name name-style="western"><surname>Kuleshov</surname><given-names>V</given-names></name><name name-style="western"><surname>DePristo</surname><given-names>M</given-names></name><name name-style="western"><surname>Chou</surname><given-names>K</given-names></name><etal/></person-group><article-title>A guide to deep learning in Healthcare</article-title><source>Nat Med</source><year>2019</year><month>01</month><volume>25</volume><issue>1</issue><fpage>24</fpage><lpage>29</lpage><pub-id pub-id-type="doi">10.1038/s41591-018-0316-z</pub-id><pub-id pub-id-type="medline">30617335</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Birtwhistle</surname><given-names>RV</given-names></name></person-group><article-title>Canadian Primary Care Sentinel Surveillance Network: a developing resource for family medicine and public health</article-title><source>Can Fam Physician</source><year>2011</year><month>10</month><volume>57</volume><fpage>10</fpage><lpage>1221</lpage><pub-id pub-id-type="medline">21998241]</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Vaswani</surname><given-names>A</given-names></name><name name-style="western"><surname>Shazeer</surname><given-names>N</given-names></name><name name-style="western"><surname>Parmar</surname><given-names>N</given-names></name><name name-style="western"><surname>Uszkoreit</surname><given-names>J</given-names></name><name name-style="western"><surname>Jones</surname><given-names>L</given-names></name><name name-style="western"><surname>Gomez</surname><given-names>AN</given-names></name><etal/></person-group><article-title>Attention is all you need</article-title><year>2017</year><conf-name>Presented at Advances in Neural Information Processing Systems 30 (NIPS 2017)</conf-name><conf-date>December 4-9</conf-date><conf-loc>Long Beach, CA</conf-loc><comment><ext-link ext-link-type="uri" xlink:href="https://papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html">papers.nips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html</ext-link></comment></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names></name><name name-style="western"><surname>Chang</surname><given-names>M-W</given-names></name><name name-style="western"><surname>Lee</surname><given-names>K</given-names></name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names></name></person-group><article-title>BERT: pre-training of deep Bidirectional transformers for language understanding</article-title><year>2019</year><conf-name>Presented at Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</conf-name><conf-date>June</conf-date><conf-loc>Minneapolis, MN</conf-loc><fpage>4171</fpage><lpage>4186</lpage><pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Miotto</surname><given-names>R</given-names></name><name name-style="western"><surname>Percha</surname><given-names>BL</given-names></name><name name-style="western"><surname>Glicksberg</surname><given-names>BS</given-names></name><name name-style="western"><surname>Lee</surname><given-names>H-C</given-names></name><name name-style="western"><surname>Cruz</surname><given-names>L</given-names></name><name name-style="western"><surname>Dudley</surname><given-names>JT</given-names></name><etal/></person-group><article-title>Identifying acute low back pain episodes in primary care practice from clinical notes: observational study</article-title><source>JMIR Med Inform</source><year>2020</year><month>02</month><day>27</day><volume>8</volume><issue>2</issue><fpage>e16878</fpage><pub-id pub-id-type="doi">10.2196/16878</pub-id><pub-id pub-id-type="medline">32130159</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Japkowicz</surname><given-names>N</given-names></name><name name-style="western"><surname>Stephen</surname><given-names>S</given-names></name></person-group><article-title>The class imbalance problem: a systematic study</article-title><source>Intelligent Data Analysis</source><year>2002</year><volume>6</volume><issue>5</issue><fpage>429</fpage><lpage>449</lpage><pub-id pub-id-type="doi">10.3233/IDA-2002-6504</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krawczyk</surname><given-names>B</given-names></name></person-group><article-title>Learning from imbalanced data: open challenges and future directions</article-title><source>Prog Artif Intelligence</source><year>2016</year><month>11</month><volume>5</volume><issue>4</issue><fpage>221</fpage><lpage>232</lpage><pub-id pub-id-type="doi">10.1007/s13748-016-0094-0</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Humbert-Droz</surname><given-names>M</given-names></name><name name-style="western"><surname>Mukherjee</surname><given-names>P</given-names></name><name name-style="western"><surname>Gevaert</surname><given-names>O</given-names></name></person-group><article-title>Strategies to address the lack of labeled data for supervised machine learning training with electronic health records: case study for the extraction of symptoms from clinical notes</article-title><source>JMIR Med Inform</source><year>2022</year><month>03</month><day>14</day><volume>10</volume><issue>3</issue><fpage>e32903</fpage><pub-id pub-id-type="doi">10.2196/32903</pub-id><pub-id pub-id-type="medline">35285805</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>K</given-names></name><name name-style="western"><surname>Reimers</surname><given-names>N</given-names></name><name name-style="western"><surname>Gurevych</surname><given-names>I</given-names></name></person-group><article-title>TSDAE: using transformer-based sequential denoising auto-encoder for unsupervised sentence embedding learning</article-title><year>2021</year><conf-name>Presented at Findings of the Association for Computational Linguistics: EMNLP 2021</conf-name><conf-date>November</conf-date><conf-loc>Punta Cana, Dominican Republic</conf-loc><fpage>671</fpage><lpage>688</lpage><pub-id pub-id-type="doi">10.18653/v1/2021.findings-emnlp.59</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Gao</surname><given-names>T</given-names></name><name name-style="western"><surname>Yao</surname><given-names>X</given-names></name><name name-style="western"><surname>Chen</surname><given-names>D</given-names></name></person-group><article-title>Simcse: simple Contrastive learning of sentence Embeddings</article-title><year>2021</year><conf-name>Presented at Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</conf-name><conf-date>November</conf-date><conf-loc>Online and Punta Cana, Dominican Republic</conf-loc><fpage>6894</fpage><lpage>6910</lpage><pub-id pub-id-type="doi">10.18653/v1/2021.emnlp-main.552</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Reimers</surname><given-names>N</given-names></name><name name-style="western"><surname>Gurevych</surname><given-names>I</given-names></name></person-group><article-title>Sentence-BERT: sentence embeddings using siamese BERT-networks</article-title><year>2019</year><conf-name>Presented at Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</conf-name><conf-date>November</conf-date><conf-loc>Hong Kong, China</conf-loc><fpage>3982</fpage><lpage>3992</lpage><pub-id pub-id-type="doi">10.18653/v1/D19-1410</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Peng</surname><given-names>Y</given-names></name><name name-style="western"><surname>Yan</surname><given-names>S</given-names></name><name name-style="western"><surname>Lu</surname><given-names>Z</given-names></name></person-group><article-title>Transfer learning in BIOMEDICAL natural language processing: an evaluation of BERT and Elmo on ten benchmarking datasets</article-title><year>2019</year><conf-name>Presented at Proceedings of the 18th BioNLP Workshop and Shared Task</conf-name><conf-date>August</conf-date><conf-loc>Florence, Italy</conf-loc><fpage>58</fpage><lpage>65</lpage><pub-id pub-id-type="doi">10.18653/v1/W19-5006</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Johnson</surname><given-names>AEW</given-names></name><name name-style="western"><surname>Pollard</surname><given-names>TJ</given-names></name><name name-style="western"><surname>Shen</surname><given-names>L</given-names></name><name name-style="western"><surname>Lehman</surname><given-names>LWH</given-names></name><name name-style="western"><surname>Feng</surname><given-names>M</given-names></name><name name-style="western"><surname>Ghassemi</surname><given-names>M</given-names></name><etal/></person-group><article-title>MIMIC-III, a freely accessible critical care database</article-title><source>Sci Data</source><year>2016</year><month>05</month><day>24</day><volume>3</volume><fpage>160035</fpage><pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id><pub-id pub-id-type="medline">27219127</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>Y</given-names></name></person-group><article-title>Convolutional neural networks for sentence classification</article-title><year>2014</year><conf-name>Presented at Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</conf-name><conf-date>October</conf-date><conf-loc>Doha, Qatar</conf-loc><fpage>1746</fpage><lpage>1751</lpage><pub-id pub-id-type="doi">10.3115/v1/D14-1181</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Szyma&#x0144;ski</surname><given-names>P</given-names></name><name name-style="western"><surname>Kajdanowicz</surname><given-names>T</given-names></name></person-group><article-title>A network perspective on stratification of multi-label data</article-title><year>2017</year><conf-name>Presented at Proceedings of the First International Workshop on Learning With Imbalanced Domains: Theory and Applications</conf-name><conf-date>September 22</conf-date><conf-loc>Skopje, Macedonia</conf-loc><fpage>22</fpage><lpage>35</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://proceedings.mlr.press/v74/szyma%C5%84ski17a.html">proceedings.mlr.press/v74/szyma%C5%84ski17a.html</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Sechidis</surname><given-names>K</given-names></name><name name-style="western"><surname>Tsoumakas</surname><given-names>G</given-names></name><name name-style="western"><surname>Vlahavas</surname><given-names>I</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Gunopulos</surname><given-names>D</given-names></name><name name-style="western"><surname>Hofmann</surname><given-names>T</given-names></name><name name-style="western"><surname>Malerba</surname><given-names>D</given-names></name><name name-style="western"><surname>Vazirgiannis</surname><given-names>M</given-names></name></person-group><article-title>On the stratification of multi-label data</article-title><source>Machine Learning and Knowledge Discovery in Databases, Part III: European Conference, ECML PKDD 2010, Athens, Greece, September 5-9, 2011, Proceedings, Part III</source><year>2011</year><publisher-loc>Berlin, Heidelberg</publisher-loc><publisher-name>Springer</publisher-name><fpage>145</fpage><lpage>158</lpage><pub-id pub-id-type="doi">10.1007/978-3-642-23808-6</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Szyma&#x0144;ski</surname><given-names>P</given-names></name><name name-style="western"><surname>Kajdanowicz</surname><given-names>T</given-names></name></person-group><article-title>A Scikit-based python environment for performing multi-label classification</article-title><source>arXiv. Preprint posted online on February 5, 2017</source><pub-id pub-id-type="doi">10.48550/arXiv.1702.01460</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pedregosa</surname><given-names>F</given-names></name><name name-style="western"><surname>Varoquaux</surname><given-names>G</given-names></name><name name-style="western"><surname>Gramfort</surname><given-names>A</given-names></name><name name-style="western"><surname>Michel</surname><given-names>V</given-names></name><name name-style="western"><surname>Thirion</surname><given-names>B</given-names></name><name name-style="western"><surname>Grisel</surname><given-names>O</given-names></name><etal/></person-group><article-title>Scikit-learn: machine learning in Python</article-title><source>J Mach Learn Res</source><year>2011</year><volume>12</volume><issue>85</issue><fpage>2825</fpage><lpage>2830</lpage></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>T</given-names></name><name name-style="western"><surname>Wu</surname><given-names>F</given-names></name><name name-style="western"><surname>Katiyar</surname><given-names>A</given-names></name><name name-style="western"><surname>Weinberger</surname><given-names>KQ</given-names></name><name name-style="western"><surname>Artzi</surname><given-names>Y</given-names></name></person-group><article-title>Revisiting few-sample BERT fine-tuning</article-title><year>2021</year><conf-name>Presented at 9th International Conference on Learning Representations</conf-name><conf-date>May 3-7</conf-date><conf-loc>Virtual Event, Austria</conf-loc><comment><ext-link ext-link-type="uri" xlink:href="https://openreview.net/forum?id=cO1IH43yUF">openreview.net/forum?id=cO1IH43yUF</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Gururangan</surname><given-names>S</given-names></name><name name-style="western"><surname>Marasovi&#x0107;</surname><given-names>A</given-names></name><name name-style="western"><surname>Swayamdipta</surname><given-names>S</given-names></name><name name-style="western"><surname>Lo</surname><given-names>K</given-names></name><name name-style="western"><surname>Beltagy</surname><given-names>I</given-names></name><name name-style="western"><surname>Downey</surname><given-names>D</given-names></name><etal/></person-group><article-title>Don&#x2019;t stop pretraining: adapt language models to domains and tasks</article-title><year>2020</year><conf-name>Presented at Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</conf-name><conf-loc>Online</conf-loc><fpage>8342</fpage><lpage>8360</lpage><pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.740</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>Z</given-names></name><name name-style="western"><surname>Dai</surname><given-names>Z</given-names></name><name name-style="western"><surname>Yang</surname><given-names>Y</given-names></name><name name-style="western"><surname>Carbonell</surname><given-names>J</given-names></name><name name-style="western"><surname>Salakhutdinov</surname><given-names>R</given-names></name><name name-style="western"><surname>Le</surname><given-names>QV</given-names></name></person-group><article-title>XLNet: generalized Autoregressive Pretraining for language understanding</article-title><source>arXiv. Preprint posted online on June 19, 2019</source><pub-id pub-id-type="doi">10.48550/arXiv.1906.08237</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Y</given-names></name><name name-style="western"><surname>Wehbe</surname><given-names>RM</given-names></name><name name-style="western"><surname>Ahmad</surname><given-names>FS</given-names></name><name name-style="western"><surname>Wang</surname><given-names>H</given-names></name><name name-style="western"><surname>Luo</surname><given-names>Y</given-names></name></person-group><article-title>Clinical-Longformer and clinical-Bigbird: transformers for long clinical sequences</article-title><source>arXiv. Preprint posted online on January 27, 2022</source><pub-id pub-id-type="doi">10.48550/arXiv.2201.11838</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Y</given-names></name><name name-style="western"><surname>Wehbe</surname><given-names>RM</given-names></name><name name-style="western"><surname>Ahmad</surname><given-names>FS</given-names></name><name name-style="western"><surname>Wang</surname><given-names>H</given-names></name><name name-style="western"><surname>Luo</surname><given-names>Y</given-names></name></person-group><article-title>A comparative study of pretrained language models for long clinical text</article-title><source>J Am Med Inform Assoc</source><year>2023</year><month>01</month><day>18</day><volume>30</volume><issue>2</issue><fpage>340</fpage><lpage>347</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocac225</pub-id><pub-id pub-id-type="medline">36451266</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names></name><name name-style="western"><surname>Chang</surname><given-names>M-W</given-names></name><name name-style="western"><surname>Lee</surname><given-names>K</given-names></name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names></name></person-group><article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title><year>2019</year><conf-name>Presented at Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</conf-name><conf-date>June</conf-date><conf-loc>Minneapolis, MN</conf-loc><fpage>4171</fpage><lpage>4186</lpage><pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id></nlm-citation></ref></ref-list></back></article>