<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i4e37771</article-id>
      <article-id pub-id-type="pmid">35442903</article-id>
      <article-id pub-id-type="doi">10.2196/37771</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Predicting COVID-19 Symptoms From Free Text in Medical Records Using Artificial Intelligence: Feasibility Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Tiberius</surname>
            <given-names>Victor</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Van Olmen</surname>
            <given-names>Josefien</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Family Medicine and Population Health</institution>
            <institution>University of Antwerp</institution>
            <addr-line>Prinsstraat 13</addr-line>
            <addr-line>Antwerp, 2000</addr-line>
            <country>Belgium</country>
            <phone>32 475892225</phone>
            <email>josefien.vanolmen@uantwerpen.be</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9724-1887</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Van Nooten</surname>
            <given-names>Jens</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0165-5709</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Philips</surname>
            <given-names>Hilde</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9503-470X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Sollie</surname>
            <given-names>Annet</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0281-3526</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Daelemans</surname>
            <given-names>Walter</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9832-7890</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Family Medicine and Population Health</institution>
        <institution>University of Antwerp</institution>
        <addr-line>Antwerp</addr-line>
        <country>Belgium</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Computational Linguistics, Psycholinguistics and Sociolinguistics Research Centre</institution>
        <institution>University of Antwerp</institution>
        <addr-line>Antwerp</addr-line>
        <country>Belgium</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Josefien Van Olmen <email>josefien.vanolmen@uantwerpen.be</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>4</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>27</day>
        <month>4</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>4</issue>
      <elocation-id>e37771</elocation-id>
      <history>
        <date date-type="received">
          <day>6</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>27</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>31</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>11</day>
          <month>4</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Josefien Van Olmen, Jens Van Nooten, Hilde Philips, Annet Sollie, Walter Daelemans. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 27.04.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/4/e37771" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Electronic medical records have opened opportunities to analyze clinical practice at large scale. Structured registries and coding procedures such as the International Classification of Primary Care further improved these procedures. However, a large part of the information about the state of patient and the doctors’ observations is still entered in free text fields. The main function of those fields is to report the doctor’s line of thought, to remind oneself and his or her colleagues on follow-up actions, and to be accountable for clinical decisions. These fields contain rich information that can be complementary to that in coded fields, and until now, they have been hardly used for analysis.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to develop a prediction model to convert the free text information on COVID-19–related symptoms from out of hours care electronic medical records into usable symptom-based data that can be analyzed at large scale.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The design was a feasibility study in which we examined the content of the raw data, steps and methods for modelling, as well as the precision and accuracy of the models. A data prediction model for 27 preidentified COVID-19–relevant symptoms was developed for a data set derived from the database of primary-care out-of-hours consultations in Flanders. A multiclass, multilabel categorization classifier was developed. We tested two approaches, which were (1) a classical machine learning–based text categorization approach, Binary Relevance, and (2) a deep neural network learning approach with BERTje, including a domain-adapted version. Ethical approval was acquired through the Institutional Review Board of the Institute of Tropical Medicine and the ethics committee of the University Hospital of Antwerpen (ref 20/50/693).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The sample set comprised 3957 fields. After cleaning, 2313 could be used for the experiments. Of the 2313 fields, 85% (n=1966) were used to train the model, and 15% (n=347) for testing. The normal BERTje model performed the best on the data. It reached a weighted F1 score of 0.70 and an exact match ratio or accuracy score of 0.38, indicating the instances for which the model has identified all correct codes. The other models achieved respectable results as well, ranging from 0.59 to 0.70 weighted F1. The Binary Relevance method performed the best on the data without a frequency threshold. As for the individual codes, the domain-adapted version of BERTje performs better on several of the less common objective codes, while BERTje reaches higher F1 scores for the least common labels especially, and for most other codes in general.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The artificial intelligence model BERTje can reliably predict COVID-19–related information from medical records using text mining from the free text fields generated in primary care settings. This feasibility study invites researchers to examine further possibilities to use primary care routine data.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>text mining</kwd>
        <kwd>electronic medical records</kwd>
        <kwd>COVID-19</kwd>
        <kwd>structured registry</kwd>
        <kwd>coding procedure</kwd>
        <kwd>prediction model</kwd>
        <kwd>feasibility study</kwd>
        <kwd>precision model</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>primary care</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Electronic medical records (EMRs) have opened the opportunity to analyze clinical practice at large scale, and to perform clinical-epidemiological research, which can inform health care managers and policy makers. Structured registries and coding procedures such as the International Classification of Primary Care have improved the way doctors put information into EMR, which has facilitated the use of its output and accelerated research using these data. The free text fields also still available in EMR systems have been hardly used apart from clinical follow-up. Yet the usage of this information has great potential to contribute to monitoring and evaluation of clinical practice and to EMR-driven research. In 2016, US researchers compared the accuracy for case detection of diagnoses such as dementia, stroke, diabetes, and depression based upon coded information versus the procedure including free text, and they found a significant improvement in algorithm sensitivity in the latter [<xref ref-type="bibr" rid="ref1">1</xref>].</p>
      <p>This is not surprising since these fields contain the core of clinical practice captured in the encounter notes. The encounter notes available in most EMRs have a structured “SOAP” format, which stands for Subjective (patient’s history), Objective (physical examination), Assessment (initial differential diagnosis), and Plan [<xref ref-type="bibr" rid="ref2">2</xref>]. The main function of these free text fields is to report the doctor’s line of thought, to remind oneself and colleagues on follow-up actions, and to be accountable for clinical decisions. Therefore, they contain the richest data about the state of the patient and the observations of the doctor. Yet their use is also challenging. Health care providers tend to write notes quickly, with personal styles and abbreviations, and they vary in their completeness and quality of reporting. Therefore, encounter notes have seldom been used for further analyses and research.</p>
      <p>A 2019 review on the use of free text fields in the EMR [<xref ref-type="bibr" rid="ref3">3</xref>] showed that the focus of most studies was on the development of methods to extract symptom information for disease classification tasks. For instance, a UK study validated a method for mining free text fields to link them to frequent medical conditions such as colic or renal failure [<xref ref-type="bibr" rid="ref4">4</xref>]. The analysis of symptoms themselves has been restricted to specific and rather narrow domains such as neuromuscular diseases [<xref ref-type="bibr" rid="ref5">5</xref>], psychiatry [<xref ref-type="bibr" rid="ref6">6</xref>], and veterinary medicine [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. A recent study demonstrates the feasibility of extracting information from free text notes and using this as input to a model for predicting patient outcomes [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
      <p>To use the information from free text fields at a large scale, methods to recognize this information need to be developed and evaluated. A 2012 study found that combination of a manually created filter and rule learning algorithm yielded the best performance across two different data sets (radiology reports and general practitioner [GP] notes) [<xref ref-type="bibr" rid="ref10">10</xref>], but the performance for the GP set was considerably lower. The variation of symptoms and note-taking is peculiar for the GP domain. This implies that more such studies are necessary to develop robust methods for data recognition for GP data sets to improve the reproducibility of data and their value for routine use.</p>
      <p>The relevance for quick information using real time data was apparent in the COVID-19 pandemic. The collection, evaluation, and synthesis of information started quickly. Data mainly came from hospital settings, where most severe cases were admitted, and where resources could be mobilized quickly, for instance, to make decision-support algorithms for diagnosis and treatment based upon models that predict disease outcomes [<xref ref-type="bibr" rid="ref11">11</xref>]. This predominant use of data from severely ill patients led to risk of bias in the models [<xref ref-type="bibr" rid="ref12">12</xref>]. This underlined the need to develop methods to extract data quickly and reliably from primary care health records at large scale.</p>
      <p>Our study contributes to this goal. The objective of this paper was to develop a robust method to transform the primary care notes into a list of symptoms that could feed improved COVID-19 prediction models through the development of a text classifier model that can predict the relevant symptoms (output) based upon the analysis of the free text fields (input). If this method proves robust, free text data from primary care clinical notes about COVID-19–related symptoms can be mined at large scale quickly and reliably.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Background</title>
        <p>This study is part of the project ID-CoV to develop procedures for data identification, harmonization, and linkage to develop robust methodologies to build a risk prediction tool based on primary care and hospital data for the identification of individuals at higher risk for severe COVID-19 outcomes (project id 43639, Funded by University of Antwerp).</p>
      </sec>
      <sec>
        <title>Data Collection</title>
        <p>The iCAREdata database was used, which is a database of contacts in out of hours (OOH) care by general practice cooperatives, triage centers (additional centers organized during the COVID-19 pandemic to triage between infectious and noninfectious diseases), pharmacies, and a small number of first aid departments connected to the system (covering OOH care of roughly two-thirds of Flanders population) [<xref ref-type="bibr" rid="ref13">13</xref>]. One OOH hosts between 80 and 150 different GPs. Data from EMR at OOH services therefore cover a broad range of different physicians, with different approaches of medical care and registration of clinical data, leading to high variability of content, completeness, quality, and format of information in the data set, which adds methodological challenges to developing mining procedures. Nevertheless, the analysis of the data of this segment of primary care consultations is especially relevant in a pandemic context [<xref ref-type="bibr" rid="ref14">14</xref>]<italic>.</italic> The units of analysis in iCAREdata are records, each record being one contact (=consultation). Due to the exploratory nature, sample size was not considered a limiting factor. We aimed to use as many observations (patient’s encounters) as possible in a given time period to reduce the uncertainty of our model estimates. A study database was created that comprises all records from January 1, 2019, to November 30, 2020. These are roughly 779,000 records, which include a pre–COVID-19 period and a COVID-19 epidemic period (March 1, 2020, to November 30, 2020).</p>
        <p>For each record, 15 fields were extracted (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). For the data mining study reported in this paper, only 5 fields were used (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>). The “field subjective” (physician’s report on the patient’s account of their problem) and “field objective” (findings and measurements of the physician) were explored for relevant text (combinations). We used supervised machine learning algorithms to classify information into one or more of predetermined symptoms via the multiclass, multilabel prediction model described below. Fields “DiagnTekst” and “DiagnCod” were used as control records for validation.</p>
        <p>The establishment of the symptom list that needed to be the outcome of the classifier model was started from an initial list of 23 symptoms identified by the Belgium Public Health Institute as relevant [<xref ref-type="bibr" rid="ref15">15</xref>] but was refined driven by the data. A manual exploration of the data set yielded 62 symptoms most of them with a negative counterpart, indicating the absence of that symptom. Negative symptoms were relevant because of their negative predictive value in a diagnostic or prognostic algorithm [<xref ref-type="bibr" rid="ref16">16</xref>]; for instance, the absence of cough contributing to the likelihood or non-likelihood of a COVID-19 diagnosis. The skewed distribution led to a regrouping of symptoms, resulting in a final list of 27 signs or symptoms (<xref ref-type="table" rid="table1">Table 1</xref>). There are two types of symptom codes, which are “objective,” based on the “objectief” text field, and “subjective,” based on the “subjectief” text field, respectively.</p>
        <boxed-text id="box1" position="float">
          <title>Relevant fields for input to machine learning algorithm to recognize signs and symptoms.</title>
          <p>Machine learning fields</p>
          <list list-type="bullet">
            <list-item>
              <p>IdContact: unique id for contact (date, guard post, time)</p>
            </list-item>
            <list-item>
              <p>Subjectief: subjective text field</p>
            </list-item>
            <list-item>
              <p>Objectief: objective text field</p>
            </list-item>
            <list-item>
              <p>DiagnTekst: diagnosis term (thesaurus)</p>
            </list-item>
            <list-item>
              <p>DiagnCod: diagnosis code from the International Classification of Primary Care [<xref ref-type="bibr" rid="ref17">17</xref>]</p>
            </list-item>
          </list>
        </boxed-text>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Final list with signs and symptoms to be coded from the free text.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="220"/>
            <col width="780"/>
            <thead>
              <tr valign="top">
                <td>Final symptoms—coded</td>
                <td>Explanation</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>S<sup>a</sup>1; SA<sup>b</sup>1</td>
                <td>Cough</td>
              </tr>
              <tr valign="top">
                <td>S100; SA100</td>
                <td>Upper respiratory tract infection complaints</td>
              </tr>
              <tr valign="top">
                <td>S101; SA101</td>
                <td>Dyspnea and shortness of breath</td>
              </tr>
              <tr valign="top">
                <td>S7; SA7</td>
                <td>Thoracic pain or chest pain</td>
              </tr>
              <tr valign="top">
                <td>S102; SA102</td>
                <td>Loss of taste or smell</td>
              </tr>
              <tr valign="top">
                <td>S10; SA10</td>
                <td>History of fever</td>
              </tr>
              <tr valign="top">
                <td>S112</td>
                <td>Pain or stiffness in muscles, joints, or neck</td>
              </tr>
              <tr valign="top">
                <td>S109</td>
                <td>Complaints of throat or voice</td>
              </tr>
              <tr valign="top">
                <td>S12</td>
                <td>Fatigue</td>
              </tr>
              <tr valign="top">
                <td>S15</td>
                <td>Headache</td>
              </tr>
              <tr valign="top">
                <td>S103; SA103</td>
                <td>Gastrointestinal complaints</td>
              </tr>
              <tr valign="top">
                <td>S104</td>
                <td>Significant acute event or change</td>
              </tr>
              <tr valign="top">
                <td>S105</td>
                <td>Chronic pulmonary complaints; smoking; potentially worsening</td>
              </tr>
              <tr valign="top">
                <td>S105</td>
                <td>Other comorbidities or being pregnant</td>
              </tr>
              <tr valign="top">
                <td>S106</td>
                <td>Known cardiovascular diseases or hypertension or relevant medication</td>
              </tr>
              <tr valign="top">
                <td>S107</td>
                <td>Known diabetes or diabetes medication</td>
              </tr>
              <tr valign="top">
                <td>S108</td>
                <td>Medication NSAID<sup>c</sup> or immunosuppressive drugs</td>
              </tr>
              <tr valign="top">
                <td>S113</td>
                <td>Palpitations or dizziness</td>
              </tr>
              <tr valign="top">
                <td>S110</td>
                <td>General complaints as malaise and illness</td>
              </tr>
              <tr valign="top">
                <td>S111</td>
                <td>Mental or sleeping problems</td>
              </tr>
              <tr valign="top">
                <td>S63</td>
                <td>Close contact with a sick person (COVID-19 symptoms) or COVID-19–positive case</td>
              </tr>
              <tr valign="top">
                <td>O<sup>d</sup>101</td>
                <td>Respiratory signs found during physical examination</td>
              </tr>
              <tr valign="top">
                <td>O6</td>
                <td>Fever measured by health care staff</td>
              </tr>
              <tr valign="top">
                <td>O102</td>
                <td>Ear-, nose-, or throat-positive signs during physical examination</td>
              </tr>
              <tr valign="top">
                <td>O104</td>
                <td>Neurological symptoms</td>
              </tr>
              <tr valign="top">
                <td>O103</td>
                <td>Circulatory positive signs: abnormal pulse rate, tension, or turgor of capillary refill</td>
              </tr>
              <tr valign="top">
                <td>O19</td>
                <td>Impression of being ill</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>S: Subjective.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>A: absence of the symptom.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>NSAID: nonsteroidal anti-inflammatory drugs.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>O: Objective.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Development of a Classifier Model</title>
        <p>Classification entails the tasks of predicting the class (or label of output variable—the list with 27 signs or symptoms) based upon the input variables (the free text fields). Two approaches were examined to develop a multiclass, multilabel categorization classifier, which are as follows: (1) a classical machine learning–based text categorization approach; and (2) a deep neural network learning approach based on fine-tuning a pretrained model for domain adaptation and learning the classification task. The advantage of the latter approach is that, in general, less supervised training data (ie, annotated data) are needed for learning the task. A random sample from the data set was extracted for annotation, with a distribution of 1/3 records from before the start of the COVID-19 pandemic (operationalized as March 1, 2020) and 2/3 after that date, comprising 3957 entries in total. Character encoding problems in the text data were solved during preprocessing. Empty entries and entries that did not contain any information (eg, “/”) in either the subjective or objective fields were removed from the data set, which left 2313 entries to be used for the experiments. The subjective and objective text fields were merged into one text field in order to receive sufficiently large text fragments for prediction. The same resulting text could be assigned multiple objective and subjective codes. Negative symptoms were kept apart by coding them with an A-label; for instance, SA10 indicated the absence of a history of fever. The A codes were frequent among the objective text fields. Entries that were annotated as irrelevant (without any symptom code) were used as negative examples for training of the models.</p>
        <p>The samples were annotated by 5 medical doctors or researchers. Inter-annotator variability was checked. All annotators started annotation of the same set and manually compared inconsistencies, discussed them, adapted the standard operating guidelines, and repeated this procedure until agreement of 90% was achieved. During the annotation phase, the inventory of symptom tags (classes) evolved, but all annotated data were made comparable through a common code book and standard operating procedure in the final data set. The number of entries, average number of tokens (instances of words and punctuation marks), and total amount of tokens for the training partition, test partition, and the total data set are summarized in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <p>The distribution of codes (labels) in the data set is shown in <xref rid="figure1" ref-type="fig">Figures 1</xref> and <xref rid="figure2" ref-type="fig">2</xref>. The majority of the codes are subjective codes; out of the 55 codes, 43 (78%) are subjective while the remaining 12 (22%) are objective. For the development of the classifier, experiments were conducted with all codes and only codes occurring at least 50 times, which meant 35 (63%) out of 55 codes (representing 93% of all used codes).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Total number of entries, average amount of tokens per entry, and total amount of tokens for the training, test portions, and the entire data set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="230"/>
            <col width="230"/>
            <col width="280"/>
            <col width="260"/>
            <thead>
              <tr valign="top">
                <td>Portion</td>
                <td>Entries, n (%)</td>
                <td>Average tokens per entry, n</td>
                <td>Total tokens, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Train</td>
                <td>1966 (85)</td>
                <td>24</td>
                <td>53,929</td>
              </tr>
              <tr valign="top">
                <td>Test</td>
                <td>347 (15)</td>
                <td>31</td>
                <td>10,779</td>
              </tr>
              <tr valign="top">
                <td>Total</td>
                <td>2313 (100)</td>
                <td>28</td>
                <td>64,708</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Code distribution in the data set. Codes to the right of the threshold line were removed for the experiments where a frequency threshold was employed.</p>
          </caption>
          <graphic xlink:href="medinform_v10i4e37771_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Distribution of the percentage of entries in the data set assigned to a particular number of codes.</p>
          </caption>
          <graphic xlink:href="medinform_v10i4e37771_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The baseline accuracies (most frequent class prediction and random prediction) are 0.15 and 0.08, respectively. In the first set of experiments, we used classic machine learning methods. One of the most common approaches to multiclass, multilabel classification is Binary Relevance. With this method, the multilabel problem is translated to <italic>n</italic> binary classification problems, where <italic>n</italic> is equal to the number of labels present in the data set. Binary in this case means that the classifier attempts to predict whether a class (code) is present (1) or not (0) in the text. For the binary classifiers, we used the Stochastic Gradient Descent classifier [<xref ref-type="bibr" rid="ref18">18</xref>] and optimized the hyperparameters (including the loss function) by performing a gridsearch on them (a search for the best combination of algorithm parameters on a validation partition of the training data in the context of 5-fold cross-validation). The performance of this method is measured by taking the mean of all cross-validated results from the individual binary classifiers.</p>
        <p>Further experiments were then conducted with BERTje [<xref ref-type="bibr" rid="ref19">19</xref>], a Dutch version of BERT [<xref ref-type="bibr" rid="ref20">20</xref>]. BERT is a widely used model for natural language processing, and the availability of a Dutch version BERTje made it the first choice of the team. BERTje is an open-source pretrained language model that has been trained on a large amount of generic (nonmedical) Dutch text data. Thus, the model already has knowledge about language patterns before having been trained on data for a specific problem, in contrast to, for example, the Stochastic Gradient Descent classifier, which was limited to the training data. Additionally, we continued the pretraining of BERTje by using a selection of the text fields of the original data set (part of the iCAREdata database) in order to “adapt” BERTje to medical texts. This method has been proven to be successful on a wide range of tasks [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. For all experiments, the F1 macro score metric was used for evaluation, which is the average F1 score (harmonic mean of precision and recall) obtained for the classes. In our binary relevance setup and the implementation of F1 macro we used, only successful predictions of the minority class (correctly predicting that the code is present) are taken into account, which makes it the most critical (but also the most relevant) evaluation.</p>
        <p>For all experiments, we used a stratified train-test split, where 80% of the data were used for training and hyperparameter optimization, and 20% were used for testing. The best model on test (BERTje) was then fine-tuned on all annotated data and applied to the complete (unannotated) data set, predicting diagnostic codes based on the text fields.</p>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>Ethical approval was acquired through the Institutional Review Board of the Institute of Tropical Medicine and the ethics committee of the University Hospital of Antwerpen (ref 20/50/693).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>In the tables below, the results of the experiments on the test set are summarized. Across all models that were trained and tested on data with a frequency threshold for the labels, the normal BERTje model performed the best on the data, reaching a weighted F1 score of 0.70 and an exact match ratio or accuracy score of 0.38 (<xref ref-type="table" rid="table3">Table 3</xref>), indicating the instances for which the model has identified all correct codes. The results per code can be found in Table S1 of <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The other models achieved respectable results as well, ranging from 0.59 to 0.70 weighted F1. The Binary Relevance method performed the best on the data without a frequency threshold (Table S2 of <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      <p>Regarding the results on the individual codes themselves, the domain-adapted version of BERTje performs better on several of the less common objective codes (O101, O102, OA101, OA102, OA104, and OA6), while BERTje reaches higher F1 scores for the least common labels (S102 and SA102) especially, and most other codes in general.</p>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Average results for the different models on test data with a frequency threshold for the codes (codes occurring at least 50 times).</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="280"/>
          <col width="180"/>
          <col width="180"/>
          <col width="180"/>
          <col width="180"/>
          <thead>
            <tr valign="top">
              <td>Method</td>
              <td>Weighted precision</td>
              <td>Weighted specificity</td>
              <td>Weighted recall</td>
              <td>Weighted F1</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Binary Relevance (SGD<sup>a</sup> classifier)</td>
              <td>0.69</td>
              <td>0.93</td>
              <td>0.52</td>
              <td>0.59</td>
            </tr>
            <tr valign="top">
              <td>BERTje</td>
              <td>0.77</td>
              <td>0.97</td>
              <td>0.68</td>
              <td>0.70</td>
            </tr>
            <tr valign="top">
              <td>BERTje (domain adaptation)</td>
              <td>0.74</td>
              <td>0.96</td>
              <td>0.62</td>
              <td>0.67</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup>SGD: Stochastic Gradient Descent.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this paper, we demonstrated the feasibility of developing a model to predict symptom codes from primary care clinical text notes. Across the three models tested, the pretrained neural network model BERTje performed the best. The reason for the lower performance of the domain-adapted BERTje needs further investigation. Neural networks can forget information they previously learned upon learning new information (catastrophic forgetting); however, from the current data, we are not able to explain if this was the reason for the lower performance.</p>
        <p>Our model resulted in the ability to predict symptoms from the free text with a weighted average F score of 0.66 (0.75 sensitivity and 0.97 specificity) on all codes, regardless of frequency, and an F score of 0.70 (0.77 sensitivity and 0.97 specificity) on codes that occurred more than 50 times in the data set. Very few studies that have developed mining techniques for clinical notes, in general [<xref ref-type="bibr" rid="ref23">23</xref>], and from primary care, in particular. Yet the incidental other studies show feasibility and good results [<xref ref-type="bibr" rid="ref24">24</xref>]. A study using a Repeated Incremental Pruning to Produce Error Reduction rule learning model resulted in a sensitivity of 0.91, and a specificity 0.76 [<xref ref-type="bibr" rid="ref10">10</xref>]. To our knowledge, this is the first study that mined data from OOH health care organizations.</p>
        <p>The strength of our study is that we used a large database representative of a population of 6 million people in Flanders and with many different GPs. The major limitation of our study relates to the quality of the raw data. The data set contained consultations of OOH primary care consultations. The notes in these consultations were often very brief, and the completeness and quality of information varied across entries. This is similar in studies from routine primary care [<xref ref-type="bibr" rid="ref25">25</xref>]; however, in OOH care, this is likely to be worse, making it more difficult to develop mining models. This reflects the reality of medical practice and the limitations of real-world data. Further research into minimal needs for reporting for both clinical and other purposes is warranted. Another limitation is that some symptom codes, for instance SA100 (<italic>geen BLWI klachten-no respiratory tract complaints)</italic> could not be learned by the machine learning models. The explanation for this, as for similar cases, is that there were too few instances available in the data set for the model to learn from [<xref ref-type="bibr" rid="ref9">9</xref>]. For these codes, it would be useful to investigate the data for more cases to be annotated. Even if more elaborate annotating will improve the gain, not all free text fields can be transformed into coded information, which needs to be taken into account in the interpretation of the output.</p>
        <p>Notwithstanding the limitations, our study is relevant for primary care research and evaluation. Once coded, these symptoms can be monitored, evaluated, and processed, for the development and testing of algorithms, for near real time symptom surveillance [<xref ref-type="bibr" rid="ref26">26</xref>], or for assessing quality of history taking and record keeping. Our study focused on symptom detection, but wider applications of the text mining and natural language processing can be thought of, such as the analyses of adverse events or patient-reported experiences [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The BERTje prediction models can reliably predicting COVID-19–related information from medical records using text mining from the free text fields generated in primary care settings. The feasibility to convert this rich but largely untapped source of clinical encounter into data usable for monitoring, evaluation, and research provides opportunities for comprehensive analysis of primary care consultations at large scale, as well as use for monitoring purposes, also in other primary care settings. This feasibility study invites researchers to examine further possibilities to use primary care routine data, for instance, to examine the process of clinical reasoning through EMR analysis or to assess the input of patient-related information into the diagnostic process.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Details about experiments.</p>
        <media xlink:href="medinform_v10i4e37771_app1.docx" xlink:title="DOCX File , 39 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">GP</term>
          <def>
            <p>general practitioner</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">OOH</term>
          <def>
            <p>Out of Hours</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We acknowledge the following people: Nathalie Wisse and Veronique Verhoeven for their contribution to the manual coding process; José Peñalvo, Elly Mertens, and Els van Gentbrugge for the development of the jointly funded project ID-COV; and the iCAREdata team for extracting the data set. This study was funded by the University of Antwerp (ID 43639) through a joint Pump Priming Proposal Fund.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ford</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>HE</given-names>
            </name>
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cassell</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Extracting information from the text of electronic medical records to improve case detection: a systematic review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2016</year>
          <month>09</month>
          <day>05</day>
          <volume>23</volume>
          <issue>5</issue>
          <fpage>1007</fpage>
          <lpage>15</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26911811"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocv180</pub-id>
          <pub-id pub-id-type="medline">26911811</pub-id>
          <pub-id pub-id-type="pii">ocv180</pub-id>
          <pub-id pub-id-type="pmcid">PMC4997034</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pearce</surname>
              <given-names>PF</given-names>
            </name>
            <name name-style="western">
              <surname>Ferguson</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>George</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Langford</surname>
              <given-names>CA</given-names>
            </name>
          </person-group>
          <article-title>The essential SOAP note in an EHR age</article-title>
          <source>Nurse Pract</source>
          <year>2016</year>
          <month>02</month>
          <day>18</day>
          <volume>41</volume>
          <issue>2</issue>
          <fpage>29</fpage>
          <lpage>36</lpage>
          <pub-id pub-id-type="doi">10.1097/01.NPR.0000476377.35114.d7</pub-id>
          <pub-id pub-id-type="medline">26795838</pub-id>
          <pub-id pub-id-type="pii">00006205-201602000-00004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koleck</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Dreisbach</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bourne</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Bakken</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing of symptoms documented in free-text narratives of electronic health records: a systematic review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2019</year>
          <month>04</month>
          <day>01</day>
          <volume>26</volume>
          <issue>4</issue>
          <fpage>364</fpage>
          <lpage>379</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30726935"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocy173</pub-id>
          <pub-id pub-id-type="medline">30726935</pub-id>
          <pub-id pub-id-type="pii">5307912</pub-id>
          <pub-id pub-id-type="pmcid">PMC6657282</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Duz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Parkin</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Validation of an Improved Computer-Assisted Technique for Mining Free-Text Electronic Medical Records</article-title>
          <source>JMIR Med Inform</source>
          <year>2017</year>
          <month>06</month>
          <day>29</day>
          <volume>5</volume>
          <issue>2</issue>
          <fpage>e17</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2017/2/e17/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.7123</pub-id>
          <pub-id pub-id-type="medline">28663163</pub-id>
          <pub-id pub-id-type="pii">v5i2e17</pub-id>
          <pub-id pub-id-type="pmcid">PMC5509949</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaya</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Alcan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Zinnuroğlu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Karataş</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Çoban</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dolgun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Deniz</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Analysis of free text in electronic health records by using text mining methods</article-title>
          <year>2018</year>
          <conf-name>7th International Conference on Advanced Technologies(ICAT’18)</conf-name>
          <conf-date>28 April - 01 May 2018</conf-date>
          <conf-loc>Antalya, Turkey</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karystianis</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nevado</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dehghan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Keane</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Nenadic</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Automatic mining of symptom severity from psychiatric evaluation notes</article-title>
          <source>Int J Methods Psychiatr Res</source>
          <year>2018</year>
          <month>03</month>
          <day>22</day>
          <volume>27</volume>
          <issue>1</issue>
          <fpage>e1602</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29271009"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/mpr.1602</pub-id>
          <pub-id pub-id-type="medline">29271009</pub-id>
          <pub-id pub-id-type="pmcid">PMC5888187</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Anholt</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Berezowski</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jamal</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Ribble</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Stephen</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Mining free-text medical records for companion animal enteric syndrome surveillance</article-title>
          <source>Prev Vet Med</source>
          <year>2014</year>
          <month>03</month>
          <day>01</day>
          <volume>113</volume>
          <issue>4</issue>
          <fpage>417</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.1016/j.prevetmed.2014.01.017</pub-id>
          <pub-id pub-id-type="medline">24485708</pub-id>
          <pub-id pub-id-type="pii">S0167-5877(14)00018-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Welsh</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Duz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Parkin</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>JF</given-names>
            </name>
          </person-group>
          <article-title>Disease and pharmacologic risk factors for first and subsequent episodes of equine laminitis: A cohort study of free-text electronic medical records</article-title>
          <source>Prev Vet Med</source>
          <year>2017</year>
          <month>01</month>
          <day>01</day>
          <volume>136</volume>
          <fpage>11</fpage>
          <lpage>18</lpage>
          <pub-id pub-id-type="doi">10.1016/j.prevetmed.2016.11.012</pub-id>
          <pub-id pub-id-type="medline">28010903</pub-id>
          <pub-id pub-id-type="pii">S0167-5877(16)30581-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goh</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yeow</surname>
              <given-names>AYK</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>YY</given-names>
            </name>
            <name name-style="western">
              <surname>Au</surname>
              <given-names>LSY</given-names>
            </name>
            <name name-style="western">
              <surname>Poh</surname>
              <given-names>HMN</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yeow</surname>
              <given-names>JJL</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>GYH</given-names>
            </name>
          </person-group>
          <article-title>Prediction of Readmission in Geriatric Patients From Clinical Notes: Retrospective Text Mining Study</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>10</month>
          <day>19</day>
          <volume>23</volume>
          <issue>10</issue>
          <fpage>e26486</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/10/e26486/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/26486</pub-id>
          <pub-id pub-id-type="medline">34665149</pub-id>
          <pub-id pub-id-type="pii">v23i10e26486</pub-id>
          <pub-id pub-id-type="pmcid">PMC8564665</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schuemie</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>'t Jong</surname>
              <given-names>GW</given-names>
            </name>
            <name name-style="western">
              <surname>van Soest</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Sturkenboom</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Kors</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Automating classification of free-text electronic health records for epidemiological studies</article-title>
          <source>Pharmacoepidemiol Drug Saf</source>
          <year>2012</year>
          <month>06</month>
          <day>24</day>
          <volume>21</volume>
          <issue>6</issue>
          <fpage>651</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1002/pds.3205</pub-id>
          <pub-id pub-id-type="medline">22271492</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jimenez-Solem</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Petersen</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Hansen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hansen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lioma</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Igel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Boomsma</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Krause</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lorenzen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Selvan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Petersen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nyeland</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Ankarfeldt</surname>
              <given-names>MZ</given-names>
            </name>
            <name name-style="western">
              <surname>Virenfeldt</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Winther-Jensen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Linneberg</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ghazi</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Detlefsen</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lauritzen</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>de Bruijne</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ibragimov</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Petersen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lillholm</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Middleton</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mogensen</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Thorsen-Meyer</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Perner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Helleberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kaas-Hansen</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Bonde</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bonde</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nielsen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sillesen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Developing and validating COVID-19 adverse outcome risk prediction models from a bi-national European cohort of 5594 patients</article-title>
          <source>Sci Rep</source>
          <year>2021</year>
          <month>02</month>
          <day>05</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>3246</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-021-81844-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-021-81844-x</pub-id>
          <pub-id pub-id-type="medline">33547335</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-021-81844-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC7864944</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wynants</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Van Calster</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Heinze</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Schuit</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bonten</surname>
              <given-names>MMJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dahly</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Damen</surname>
              <given-names>JAA</given-names>
            </name>
            <name name-style="western">
              <surname>Debray</surname>
              <given-names>TPA</given-names>
            </name>
            <name name-style="western">
              <surname>de Jong</surname>
              <given-names>VMT</given-names>
            </name>
            <name name-style="western">
              <surname>De Vos</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dhiman</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Haller</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Harhay</surname>
              <given-names>MO</given-names>
            </name>
            <name name-style="western">
              <surname>Henckaerts</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Heus</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kammer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kreuzberger</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lohmann</surname>
              <given-names>Anna</given-names>
            </name>
            <name name-style="western">
              <surname>Luijken</surname>
              <given-names>Kim</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>Jie</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>Glen P</given-names>
            </name>
            <name name-style="western">
              <surname>McLernon</surname>
              <given-names>David J</given-names>
            </name>
            <name name-style="western">
              <surname>Andaur Navarro</surname>
              <given-names>Constanza L</given-names>
            </name>
            <name name-style="western">
              <surname>Reitsma</surname>
              <given-names>Johannes B</given-names>
            </name>
            <name name-style="western">
              <surname>Sergeant</surname>
              <given-names>Jamie C</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>Chunhu</given-names>
            </name>
            <name name-style="western">
              <surname>Skoetz</surname>
              <given-names>Nicole</given-names>
            </name>
            <name name-style="western">
              <surname>Smits</surname>
              <given-names>Luc J M</given-names>
            </name>
            <name name-style="western">
              <surname>Snell</surname>
              <given-names>Kym I E</given-names>
            </name>
            <name name-style="western">
              <surname>Sperrin</surname>
              <given-names>Matthew</given-names>
            </name>
            <name name-style="western">
              <surname>Spijker</surname>
              <given-names>René</given-names>
            </name>
            <name name-style="western">
              <surname>Steyerberg</surname>
              <given-names>Ewout W</given-names>
            </name>
            <name name-style="western">
              <surname>Takada</surname>
              <given-names>Toshihiko</given-names>
            </name>
            <name name-style="western">
              <surname>Tzoulaki</surname>
              <given-names>Ioanna</given-names>
            </name>
            <name name-style="western">
              <surname>van Kuijk</surname>
              <given-names>Sander M J</given-names>
            </name>
            <name name-style="western">
              <surname>van Bussel</surname>
              <given-names>Bas</given-names>
            </name>
            <name name-style="western">
              <surname>van der Horst</surname>
              <given-names>Iwan C C</given-names>
            </name>
            <name name-style="western">
              <surname>van Royen</surname>
              <given-names>Florien S</given-names>
            </name>
            <name name-style="western">
              <surname>Verbakel</surname>
              <given-names>Jan Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wallisch</surname>
              <given-names>Christine</given-names>
            </name>
            <name name-style="western">
              <surname>Wilkinson</surname>
              <given-names>Jack</given-names>
            </name>
            <name name-style="western">
              <surname>Wolff</surname>
              <given-names>Robert</given-names>
            </name>
            <name name-style="western">
              <surname>Hooft</surname>
              <given-names>Lotty</given-names>
            </name>
            <name name-style="western">
              <surname>Moons</surname>
              <given-names>Karel G M</given-names>
            </name>
            <name name-style="western">
              <surname>van Smeden</surname>
              <given-names>Maarten</given-names>
            </name>
          </person-group>
          <article-title>Prediction models for diagnosis and prognosis of covid-19: systematic review and critical appraisal</article-title>
          <source>BMJ</source>
          <year>2020</year>
          <month>04</month>
          <day>07</day>
          <volume>369</volume>
          <fpage>m1328</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bmj.com/lookup/pmidlookup?view=long&#38;pmid=32265220"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.m1328</pub-id>
          <pub-id pub-id-type="medline">32265220</pub-id>
          <pub-id pub-id-type="pmcid">PMC7222643</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Colliers</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bartholomeeusen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Remmen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Coenen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Michiels</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bastiaens</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Van Royen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Verhoeven</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Holmgren</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>De Ruyck</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Philips</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Improving Care And Research Electronic Data Trust Antwerp (iCAREdata): a research database of linked data on out-of-hours primary care</article-title>
          <source>BMC Res Notes</source>
          <year>2016</year>
          <month>05</month>
          <day>04</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>259</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcresnotes.biomedcentral.com/articles/10.1186/s13104-016-2055-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13104-016-2055-x</pub-id>
          <pub-id pub-id-type="medline">27142361</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13104-016-2055-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC4855754</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morreel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Philips</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Verhoeven</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Organisation and characteristics of out-of-hours primary care during a COVID-19 outbreak: A real-time observational study</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <month>8</month>
          <day>13</day>
          <volume>15</volume>
          <issue>8</issue>
          <fpage>e0237629</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0237629"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0237629</pub-id>
          <pub-id pub-id-type="medline">32790804</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-15141</pub-id>
          <pub-id pub-id-type="pmcid">PMC7425859</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <article-title>Gevalsdefinitie, indicaties voor testen en verplichte melding van covid-19</article-title>
          <source>Sciensano</source>
          <year>2020</year>
          <access-date>2021-12-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://covid-19.sciensano.be/sites/default/files/Covid19/COVID-19_Case%20definition_Testing_NL.pdf">https://covid-19.sciensano.be/sites/default/files/Covid19/COVID-19_Case%20definition_Testing_NL.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tostmann</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bradley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bousema</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yiek</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Holwerda</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bleeker-Rovers</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ten Oever</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Meijer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rahamat-Langendoen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hopman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>van der Geest-Blankert</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wertheim</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Strong associations and moderate predictive value of early symptoms for SARS-CoV-2 test positivity among healthcare workers, the Netherlands, March 2020</article-title>
          <source>Euro Surveill</source>
          <year>2020</year>
          <month>04</month>
          <volume>25</volume>
          <issue>16</issue>
          <fpage>pii=2000508</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.eurosurveillance.org/content/10.2807/1560-7917.ES.2020.25.16.2000508"/>
          </comment>
          <pub-id pub-id-type="doi">10.2807/1560-7917.ES.2020.25.16.2000508</pub-id>
          <pub-id pub-id-type="medline">32347200</pub-id>
          <pub-id pub-id-type="pmcid">PMC7189649</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <article-title>ICPC-2 International Classification of Primary Care - 2nd edition</article-title>
          <source>Universiteit Gent</source>
          <access-date>2022-04-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.transhis.nl/wp-content/uploads/2014/12/icpc-2-2pager-nederlands.pdf">https://www.transhis.nl/wp-content/uploads/2014/12/icpc-2-2pager-nederlands.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Robbins</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Monro</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A Stochastic Approximation Method</article-title>
          <source>Ann. Math. Statist</source>
          <year>1951</year>
          <month>09</month>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>400</fpage>
          <lpage>407</lpage>
          <pub-id pub-id-type="doi">10.1214/aoms/1177729586</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Vries</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Van Cranenburgh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bisazza</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Caselli</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Van Noord</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nissim</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>BERTje: A Dutch BERT Model</article-title>
          <source>GitHub</source>
          <access-date>2022-02-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/cl-tohoku/bert-japanese">https://github.com/cl-tohoku/bert-japanese</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding</article-title>
          <source>GitHub</source>
          <access-date>2022-02-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/tensorflow/tensor2tensor">https://github.com/tensorflow/tensor2tensor</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rietzler</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stabinger</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Opitz</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Engl</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Adapt or Get Left Behind: Domain Adaptation through BERT Language Model Finetuning for Aspect-Target Sentiment Classification</article-title>
          <source>arXiv</source>
          <year>2020</year>
          <fpage>11</fpage>
          <lpage>16</lpage>
          <pub-id pub-id-type="doi">10.48550/arXiv.1908.11860</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Eisenstein</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Unsupervised Domain Adaptation of Contextualized Embeddings for Sequence Labeling</article-title>
          <source>arXiv</source>
          <year>2022</year>
          <fpage>4238</fpage>
          <lpage>48</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/xhan77/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1433</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hendrickx</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Voets</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>van Dyk</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kool</surname>
              <given-names>RB</given-names>
            </name>
          </person-group>
          <article-title>Using Text Mining Techniques to Identify Health Care Providers With Patient Safety Problems: Exploratory Study</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>07</month>
          <day>27</day>
          <volume>23</volume>
          <issue>7</issue>
          <fpage>e19064</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/7/e19064/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19064</pub-id>
          <pub-id pub-id-type="medline">34313604</pub-id>
          <pub-id pub-id-type="pii">v23i7e19064</pub-id>
          <pub-id pub-id-type="pmcid">PMC8367101</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hardjojo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gunachandran</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Abdullah</surname>
              <given-names>MRB</given-names>
            </name>
            <name name-style="western">
              <surname>Wah</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chong</surname>
              <given-names>JWC</given-names>
            </name>
            <name name-style="western">
              <surname>Goh</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Teo</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>MI</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Phang</surname>
              <given-names>JSK</given-names>
            </name>
          </person-group>
          <article-title>Validation of a Natural Language Processing Algorithm for Detecting Infectious Disease Symptoms in Primary Care Electronic Medical Records in Singapore</article-title>
          <source>JMIR Med Inform</source>
          <year>2018</year>
          <month>06</month>
          <day>11</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e36</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2018/2/e36/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.8204</pub-id>
          <pub-id pub-id-type="medline">29907560</pub-id>
          <pub-id pub-id-type="pii">v6i2e36</pub-id>
          <pub-id pub-id-type="pmcid">PMC6026305</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Im</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Roh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bae</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A pilot study on the evaluation of medical student documentation: assessment of SOAP notes</article-title>
          <source>Korean J Med Educ</source>
          <year>2016</year>
          <month>06</month>
          <volume>28</volume>
          <issue>2</issue>
          <fpage>237</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.3946/kjme.2016.26"/>
          </comment>
          <pub-id pub-id-type="doi">10.3946/kjme.2016.26</pub-id>
          <pub-id pub-id-type="medline">26996436</pub-id>
          <pub-id pub-id-type="pii">kjme.2016.26</pub-id>
          <pub-id pub-id-type="pmcid">PMC4951742</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birtwhistle</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Primary care electronic medical records: a new data source for research in Canada</article-title>
          <source>CMAJ</source>
          <year>2015</year>
          <month>03</month>
          <day>03</day>
          <volume>187</volume>
          <issue>4</issue>
          <fpage>239</fpage>
          <lpage>240</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cmaj.ca/cgi/pmidlookup?view=long&#38;pmid=25421989"/>
          </comment>
          <pub-id pub-id-type="doi">10.1503/cmaj.140473</pub-id>
          <pub-id pub-id-type="medline">25421989</pub-id>
          <pub-id pub-id-type="pii">cmaj.140473</pub-id>
          <pub-id pub-id-type="pmcid">PMC4347766</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
