<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v13i1e68863</article-id>
      <article-id pub-id-type="pmid">40053805</article-id>
      <article-id pub-id-type="doi">10.2196/68863</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Performance Improvement of a Natural Language Processing Tool for Extracting Patient Narratives Related to Medical States From Japanese Pharmaceutical Care Records by Increasing the Amount of Training Data: Natural Language Processing Analysis and Validation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Yamamoto</surname>
            <given-names>Koujirou</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Yeow</surname>
            <given-names>Adrian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Ohno</surname>
            <given-names>Yukiko</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0001-2378-2629</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Aomori</surname>
            <given-names>Tohru</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7697-6446</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Nishiyama</surname>
            <given-names>Tomohiro</given-names>
          </name>
          <degrees>MEng</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1538-8266</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Kato</surname>
            <given-names>Riri</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-0029-743X</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Fujiki</surname>
            <given-names>Reina</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0005-8682-9294</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Ishikawa</surname>
            <given-names>Haruki</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-0633-5482</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Kiyomiya</surname>
            <given-names>Keisuke</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-1586-6357</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Isawa</surname>
            <given-names>Minae</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-3068-0800</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Mochizuki</surname>
            <given-names>Mayumi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6772-4327</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Aramaki</surname>
            <given-names>Eiji</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0201-3609</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Ohtani</surname>
            <given-names>Hisakazu</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Graduate School of Pharmaceutical Sciences</institution>
            <institution>Keio University</institution>
            <addr-line>1-5-30, Shibakouen, Minato</addr-line>
            <addr-line>Tokyo, 105-8512</addr-line>
            <country>Japan</country>
            <phone>81 3 5400 2486</phone>
            <email>ohtani@keio.jp</email>
          </address>
          <xref rid="aff5" ref-type="aff">5</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4089-9407</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Graduate School of Pharmaceutical Sciences</institution>
        <institution>Keio University</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Faculty of Pharmacy</institution>
        <institution>Takasaki University of Health and Welfare</institution>
        <addr-line>Takasaki</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Nara Institute of Science and Technology</institution>
        <addr-line>Ikoma</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Faculty of Pharmacy</institution>
        <institution>Keio University</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Pharmacy</institution>
        <institution>Keio University Hospital</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>School of Medicine</institution>
        <institution>Keio University</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Hisakazu Ohtani <email>ohtani@keio.jp</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>4</day>
        <month>3</month>
        <year>2025</year>
      </pub-date>
      <volume>13</volume>
      <elocation-id>e68863</elocation-id>
      <history>
        <date date-type="received">
          <day>25</day>
          <month>11</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>25</day>
          <month>12</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>28</day>
          <month>1</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>31</day>
          <month>1</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Yukiko Ohno, Tohru Aomori, Tomohiro Nishiyama, Riri Kato, Reina Fujiki, Haruki Ishikawa, Keisuke Kiyomiya, Minae Isawa, Mayumi Mochizuki, Eiji Aramaki, Hisakazu Ohtani. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 04.03.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2025/1/e68863" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Patients’ oral expressions serve as valuable sources of clinical information to improve pharmacotherapy. Natural language processing (NLP) is a useful approach for analyzing unstructured text data, such as patient narratives. However, few studies have focused on using NLP for narratives in the Japanese language.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aimed to develop a high-performance NLP system for extracting clinical information from patient narratives by examining the performance progression with a gradual increase in the amount of training data.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used subjective texts from the pharmaceutical care records of Keio University Hospital from April 1, 2018, to March 31, 2019, comprising 12,004 records from 6559 cases. After preprocessing, we annotated diseases and symptoms within the texts. We then trained and evaluated a deep learning model (bidirectional encoder representations from transformers combined with a conditional random field [BERT-CRF]) through 10-fold cross-validation. The annotated data were divided into 10 subsets, and the amount of training data was progressively increased over 10 steps. We also analyzed the causes of errors. Finally, we applied the developed system to the analysis of case report texts to evaluate its usability for texts from other sources.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The <italic>F</italic><sub>1</sub>-score of the system improved from 0.67 to 0.82 as the amount of training data increased from 1200 to 12,004 records. The <italic>F</italic><sub>1</sub>-score reached 0.78 with 3600 records and was largely similar thereafter. As performance improved, errors from incorrect extractions decreased significantly, which resulted in an increase in precision. For case reports, the <italic>F</italic><sub>1</sub>-score also increased from 0.34 to 0.41 as the training dataset expanded from 1200 to 12,004 records. Performance was lower for extracting symptoms from case report texts compared with pharmaceutical care records, suggesting that this system is more specialized for analyzing subjective data from pharmaceutical care records.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We successfully developed a high-performance system specialized in analyzing subjective data from pharmaceutical care records by training a large dataset, with near-complete saturation of system performance with about 3600 training records. This system will be useful for monitoring symptoms, offering benefits for both clinical practice and research.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>NLP</kwd>
        <kwd>named entity recognition</kwd>
        <kwd>NER</kwd>
        <kwd>deep learning</kwd>
        <kwd>pharmaceutical care record</kwd>
        <kwd>electronic medical record</kwd>
        <kwd>EMR</kwd>
        <kwd>Japanese</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>In clinical settings, information, such as changes in a patient’s condition and the occurrence of adverse events, is essential for providing optimal pharmaceutical care. However, physicians commonly underestimate or underreport symptoms expressed by patients themselves [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. For example, a clinical study involving patients with advanced non–small cell lung cancer showed that physicians underreported grade 2 skin toxicity and fatigue and all grades of diarrhea compared with patients, with only a few grade 3 and grade 4 adverse events reported [<xref ref-type="bibr" rid="ref2">2</xref>]. In contrast, patient self-reports, including patient questionnaires and the PRO-CTCAE (patient-reported outcomes version of the Common Terminology Criteria for Adverse Events) Measurement System, are considered useful and are increasingly used in cross-sectional surveys of adverse events [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Therefore, the clinical information contained in a patient’s self-report can be used, when appropriately identified, to prevent an underestimation of patient symptoms.</p>
      <p>Unlike structured databases, such as the Japanese Adverse Drug Reaction Report Database, text data, such as patient narratives, comprising natural language, are often unstructured and ambiguous in meaning. To enable computers to handle and analyze natural languages, appropriate natural language processing (NLP) technology is necessary. For example, named entity recognition (NER) technology can be used to extract disease and symptom names from natural language text and determine whether the extracted terms are positively or negatively expressed (positive-negative classification). NER is expected to be useful for analyzing symptoms, as in adverse event monitoring. NER has been applied to data from social media platforms to investigate patient outcomes related to post–COVID-19 conditions [<xref ref-type="bibr" rid="ref6">6</xref>] and to assess the frequency of adverse drug reactions [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      <p>Patient narratives are accumulated in pharmacist records as well as posts on social media platforms. Because pharmacists routinely assess the efficacy and safety of pharmacotherapy, their records contain a large amount of patient information regarding adverse drug events. Pharmacist records are often structured using the SOAP format: subjective data (S), objective data (O), assessment (A), and plan (P). The subjective data in the SOAP format contain the patients’ raw words. Therefore, the subjective data in pharmacist records could serve as a rich source of patient narratives related to adverse events.</p>
      <p>As mentioned above, some investigations have been conducted to extract information, such as diseases and symptoms, from text data. For example, Nikfarjam et al [<xref ref-type="bibr" rid="ref7">7</xref>] developed a neural network–based system to extract adverse drug events from patient postings on social health networks in English. Similarly, Batbaatar et al [<xref ref-type="bibr" rid="ref8">8</xref>] employed deep learning methods to extract not only diseases and symptoms but also pharmacological substances and other health-related information from social media service posts in English. However, most NER research has focused on physician records [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>] or data in English [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref18">18</xref>], with few studies targeting Japanese pharmacist records.</p>
      <p>In a study focusing on Japanese pharmacist records, Usui et al [<xref ref-type="bibr" rid="ref19">19</xref>] developed a rule-based system to extract patient complaints from the electronic medication records of Japanese community pharmacies and standardize them using International Statistical Classification of Diseases and Related Health Problems 10th Revision (ICD-10) codes. However, the rule-based system could not handle patient words not appearing in its predefined list of rules. Therefore, we hypothesized that a deep learning model could extract diseases and symptoms from the text more accurately than the rule-based system of Usui et al [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
      <p>A system has already been developed to target physician records based on a deep learning model, bidirectional encoder representations from transformers (BERT)–conditional random field (CRF). BERT is an effective NLP model that can be fine-tuned for a variety of tasks [<xref ref-type="bibr" rid="ref20">20</xref>]. In addition, several studies have reported that BERT-CRF [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], which adds a CRF layer to the output, performs well in the NER task. We previously examined whether the current system could be adapted to pharmaceutical care records without modification [<xref ref-type="bibr" rid="ref23">23</xref>]. The system showed relatively high performance for the assessment data from pharmaceutical care records written in SOAP format. However, the performance of the system was inadequate for subjective data, objective data, and the plan. Furthermore, taking into account the importance of subjective data in adverse event analysis, it was determined that the system needed to be trained using subjective data.</p>
      <p>In this study, we aimed to develop a new NLP system to accurately extract disease and symptom names (disease name extraction) from patients’ subjective data and determine whether these conditions are affirmed or denied (ie, present or absent). In addition, it is generally accepted that system performance improves with the amount of training data used. However, case reports and physician summaries, in which more research has been conducted than patient narratives, are organized in physicians’ own words. On the other hand, patients’ narratives are freely worded and nonorganized. Therefore, it is important to know how the system’s performance changes as the training amount increases when targeting such unorganized data. Furthermore, from an annotation cost perspective, annotation of medical documents requires more cost and effort due to its highly specialized and complicated nature. Therefore, resources can be distributed efficiently by predicting the approximate amount of annotation with a high learning effect. Thus, we also investigated how the system performance changed as the amount of training data increased.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>We aimed to develop a system that extracts disease names from patients’ subjective texts and determines whether the conditions are affirmed or denied in the sentence (<xref rid="figure1" ref-type="fig">Figure 1</xref>). The input was processed sentence by sentence. To develop and evaluate the system, this study included 5 parts: training data preparation, model training, evaluation, error analysis, and evaluation of the usability of the system for case reports (<xref rid="figure2" ref-type="fig">Figure 2</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Conceptual diagram of the disease-name extraction system operation. After inputting preprocessed text data into the system sentence by sentence, the system extracts diseases and symptoms as outputs and predicts the presence or absence of each finding. BERT: bidirectional encoder representations from transformers; CRF: conditional random field.</p>
          </caption>
          <graphic xlink:href="medinform_v13i1e68863_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Overview of this study. Subjective data were collected from pharmaceutical care records in the electronic medical record system. Data were preprocessed, and diseases and symptoms in the data were annotated to prepare the training data. A disease-name extraction system was developed, and its performance was evaluated through 10-fold cross-validation with progressive increases in the amount of training data over 10 steps. Errors that occurred during the performance evaluation were sampled, and the causes of the errors were analyzed. The developed system was applied to case reports, and its performance for case reports was investigated. BERT: bidirectional encoder representations from transformers; CRF: conditional random field.</p>
          </caption>
          <graphic xlink:href="medinform_v13i1e68863_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>In the training data preparation, subjective texts were preprocessed and annotated. In the deep learning part, a disease-name extraction system was constructed based on a deep learning model by increasing the amount of training data in 10 steps. In the evaluation part, cross-validation was used to evaluate performance at the same time as training. We sampled the evaluation results and classified the causes of errors in the error analysis part. In the evaluation of the usability of the system for case reports, we evaluated the performance of the developed system when applied to case reports. Each step has been described in the following sections.</p>
      </sec>
      <sec>
        <title>Training Data Preparation</title>
        <p>The pharmaceutical care records in the electronic medical record system of Keio University Hospital, written by pharmacists in Japanese, were used as the source of training data. These records comprise 5 columns: subjective, objective, assessment, plan, and free text. In some cases, SOAP-formatted sentences are included in the free-text column. In this study, we collected 12,004 subjective data records from 6559 cases in the subjective and free-text columns described from April 1, 2018, to March 31, 2019.</p>
        <p>All text data in the pharmaceutical care records were converted to full-width (2-byte) characters. Line breaks or periods were used as sentence separators, and spaces at the beginning and end of sentences, as well as blank lines, were deleted. Additionally, the string “_X000D_,” indicating a line break, was removed. Two researchers independently annotated 400 records based on the annotation criteria described in the following paragraph. A comparison between their annotations obtained a high κ coefficient (κ coefficient=0.92), indicating almost perfect agreement. Therefore, the remaining data were annotated by a single researcher.</p>
        <p>The annotation criteria were originally developed by our group to collect information for adverse event monitoring in clinical practice and epidemiological studies. Patient expressions related to diseases or symptoms, including nouns, verbs, adjectives, and adverbs, were extracted. The time of onset, site, severity, and triggers of symptoms were also extracted when they were described near the clinical findings. Numerical information was excluded because such data should be obtained from the structured electronic medical record database rather than text data. However, if the text verbally referred to changes in laboratory values, such as “increase in blood pressure,” it was extracted as a target. Texts referring to normal renal function, liver function, blood electrolyte levels, appetite, sleep, and bowel movements were interpreted as negative references to abnormalities (ie, denial of symptoms) and were included in the extraction.</p>
      </sec>
      <sec>
        <title>Deep Learning Method</title>
        <p>In this study, we used BERT-CRF as a deep learning method. Several pretrained Japanese BERT models have been developed and published. In this study, we used the character-based BERT of Tohoku University, which is the most commonly used pretrained BERT model for Japanese. BERTJapaneseTokenizer [<xref ref-type="bibr" rid="ref24">24</xref>], which uses MeCab as a morphological analyzer, was used as a tokenizer. Fine-tuning was performed with a batch size of 32 and with 10-fold cross-validation. In the 10-fold cross-validation, data were shuffled by sentence and then split into 10 groups (folds). The amount of training data was increased in 10 steps from 1200 to 12,004 records.</p>
      </sec>
      <sec>
        <title>Evaluation</title>
        <p>Precision, recall, and <italic>F</italic><sub>1</sub>-score were used as indices for performance evaluation. Precision was calculated as follows: number of true positives/number of true positives and false positives. Recall was calculated as follows: number of true positives/number of true positives and false negatives. <italic>F</italic><sub>1</sub>-score was calculated as follows: 2 × precision × recall/(precision + recall).</p>
        <p>The final accuracy was calculated with the cross-validation method. The number of epochs was determined based on the <italic>F</italic><sub>1</sub>-scores obtained during cross-validation. We also investigated the performance by taking partial matches into consideration, as partially matched terms can still convey valuable clinical information. The Levenshtein distance [<xref ref-type="bibr" rid="ref25">25</xref>] was used as the criterion for partial matches. The Levenshtein distance measures the resemblance between 2 strings (W1 and W2) and is defined as the minimum number of character deletion, insertion, and replacement operations required to convert one string into the other. Similarity was calculated using the following formula: similarity (W1, W2) = {max (|W1|, |W2|) − Levenshtein distance}/max (|W1|, |W2|), where |Wn| represents the number of characters (length) of Wn. Similarity values ranged from 0 to 1. We sampled the results of the cross-validation to list the partial matches and investigated any failures to extract essential terms that were correctly detected by researchers due to partial matches. Detailed results of this survey are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. In this survey, the number of incomplete extractions without essential terms increased rapidly when the similarity was below 0.667. Of the 63 extracted terms with a similarity of 0.667 or higher, 4 terms had partially or entirely missing essential terms. However, at the next lowest similarity levels of 0.636 and 0.625, 5 of 9 extracted terms had partially or entirely missing essential terms. Therefore, in this study, “the number of matches, including partial matches” was defined as the sum of the number of complete matches and partial matches with a similarity of 0.66 or greater. In the error analysis, “partial matches with a lower similarity,” described in the next section, were defined as partial matches with a similarity of less than 0.66. During the process of information processing in BERT, infrequent characters were converted to unknown keys ([UNK]). Because the data that we used to evaluate partial matches contained [UNK], we converted all [UNK] into full-width asterisks to calculate the similarity of the Levenshtein distances by regarding them as single characters.</p>
      </sec>
      <sec>
        <title>Error Analysis</title>
        <p>For 5 steps of the amount of training data, the results of the system were compared with the annotated diseases and symptoms in the validation data, and the causes of mismatches were classified into 4 categories (<xref ref-type="table" rid="table1">Table 1</xref>): error 1, failure of the system to extract; error 2, incorrect extraction by the system; error 3, difference in positive-negative classification; and error 4, partial matches with low similarity. Because the amount of data contained in 1 fold was different for each training amount, we used the number of folds that contained close to 1200 records: 10 folds, 3 folds, 2 folds, 1 fold, and 1 fold in the training data of 1200, 3600, 6002, 8402, and 12,004 records, respectively. The choice of which fold of the validation data to use for this analysis was based on the proximity of the <italic>F</italic><sub>1</sub>-score to the average rates when they were used in the evaluation. In this study, we defined the total number of extractions by the following formula: total number of extractions = number of researchers’ extractions + number of system extractions − (number of exact matches + number of partial matches with a similarity of 0.66 or greater). The error rate was given as the ratio of the number of errors to the total number of extractions: error rate (%) = number of errors × 100/total number of extractions.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Definitions of error cause categories.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="280"/>
            <col width="160"/>
            <col width="200"/>
            <col width="230"/>
            <col width="130"/>
            <thead>
              <tr valign="bottom">
                <td>P-N<sup>a</sup> classification</td>
                <td colspan="4">Named entity recognition</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Exact match</td>
                <td colspan="2">Partial match</td>
                <td>Unmatch</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>1&gt;similarity&gt;0.66</td>
                <td>0.66&gt;similarity&gt;0</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="bottom">
                <td>Match</td>
                <td>Exact match</td>
                <td>Partial match</td>
                <td>Error 4<sup>b</sup></td>
                <td>Error 1<sup>c</sup>/2<sup>d</sup></td>
              </tr>
              <tr valign="bottom">
                <td>Unmatch</td>
                <td>Error 3<sup>e</sup></td>
                <td>Error 3<sup>e</sup></td>
                <td>Error 1<sup>c</sup>/2<sup>d</sup></td>
                <td> Error 1<sup>c</sup>/2<sup>d</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>P-N: positive-negative.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Error 4: partial matches with low similarity.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>Error 1: system extraction failure.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>Error 2: incorrect extraction by the system.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>Error 3: difference in the P-N classification.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>In cases where the number of extracted terms differed between the researchers and the system, the number of errors was counted for the larger number. For example, when one extracted 2 symptoms and the other extracted them as a single (long-named) symptom, we counted them as 2 errors. Therefore, the sum of the error rate and the correct answer rate could exceed 100%. We further investigated changes in the error rates of the 4 categories with an increase in the amount of training data. For error categories with a greater increase in variation of the error rate obtained by the following formula, a detailed descriptive error analysis was performed: variation of error rates (%) = error rates after increasing training amount/error rates before increasing training amount × 100.</p>
        <p>When the error rate before increasing the training amount was 0% and the error rate after increasing the training amount was greater than 0%, the variation of error rates was regarded as 100%.</p>
      </sec>
      <sec>
        <title>Evaluation for Case Reports</title>
        <p>To evaluate whether the developed system can be applied to other types of texts, we assessed the system performance using Japanese case report data. A series of 148 case reports was collected as validation data from the Real-MedNLP Test Collection. These data were distributed in the Real-MedNLP Test Collection and permitted for redistribution by the respective journal publisher [<xref ref-type="bibr" rid="ref10">10</xref>]. These reports are available as open access through the “Japan Science and Technology Information Aggregator, Electronic,” an electronic journal platform operated by the Japan Science and Technology Agency.</p>
        <p>Case report validation data were also preprocessed and annotated in the same manner as the training data. Two researchers independently annotated 15 cases, approximately 10% of the total cases. Because the agreement between their annotations was almost perfect, with a κ coefficient of 0.93, the remainder was annotated by a single researcher. The validation data were applied to the 10 systems that were developed in the 10-fold cross-validation of each training amount to evaluate performance. The individual performances of the 10 systems were averaged and regarded as the performance of the system for each training amount.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was conducted with the approval of the Keio University School of Medicine Ethics Committee (approval number: 20200067).</p>
        <p>An opt-out method was used to ensure that study subjects had the opportunity to refuse the use of their information, since obtaining informed consent was difficult due to the large number of subjects and the fact that many of them had already been discharged from the hospital and had no direct contact with the hospital. Information on the research has been disclosed on the website of the Clinical Translational Research Center, Keio University Hospital.</p>
        <p>Each subject’s electronic medical record information was assigned a dummy ID at the time of registration, and information was managed through pseudonymization using dummy IDs.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Training Data Features</title>
        <p>12,004 subjective data in the pharmaceutical care records used as training data contained 43,553 sentences. These records included 12,287 affirmed diseases and symptoms and 8822 denied diseases and symptoms.</p>
      </sec>
      <sec>
        <title>Evaluation</title>
        <p>The training curves for precision, recall, and <italic>F</italic><sub>1</sub>-score values for exact matches are shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>. As the training data increased from 1200 to 12,004 records, precision, recall, and <italic>F</italic><sub>1</sub>-score improved from 0.66 to 0.81, from 0.69 to 0.83, and from 0.67 to 0.82, respectively. A power approximation expression for the <italic>F</italic><sub>1</sub>-score of exact matches yielded the equation: y = 0.40x<sup>0.08</sup>, predicting an <italic>F</italic><sub>1</sub>-score of 0.90 with 25,251 training records. Performance improvement saturated after training with 3600 records, indicating that a large amount of training data is needed to improve the <italic>F</italic><sub>1</sub>-score beyond 0.78. Comparing the <italic>F</italic><sub>1</sub>-scores for exact matches and those including partial matches, the performance gap tended to diminish as the amount of training data increased (<xref rid="figure4" ref-type="fig">Figure 4</xref>). The system trained with 12,004 records achieved an <italic>F</italic><sub>1</sub>-score of 0.82 for exact matches and 0.84 when partial matches were included.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Training curve during cross-validation (only exact matches). Trends in the mean precision, recall, and F1-score values are shown from 1200- to 12,004-record trainings.</p>
          </caption>
          <graphic xlink:href="medinform_v13i1e68863_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Training curve of the F1-scores during cross-validation (only exact matches/including partial matches). Trends in the mean F1-scores in which only exact matches were considered matches and in the mean F1-scores in which partial matches were included as matches are shown from 1200- to 12,004-record trainings.</p>
          </caption>
          <graphic xlink:href="medinform_v13i1e68863_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Error Analysis</title>
        <p>Error analysis was carried out for 4395, 3924, 4396, 3071, and 4356 sentences from the validation data during cross-validation using 1200, 3600, 6002, 8402, and 12,004 records, respectively. The causes of errors were classified, and <xref rid="figure5" ref-type="fig">Figure 5</xref> shows the transition of error rates for each cause. The overall error rate decreased from 31.9% (945/2963) to 18.2% (444/2446). The rate of error 2 (incorrect extraction by the system) showed the most significant decrease with an increase in the training records, followed by errors 4, 3, and 1. Error 2 decreased from 9.4% (280/2963) to 3.7% (90/2446) (–5.7 percentage points). Errors 4, 3, and 1 decreased from 9.4% (278/2963) to 6.1% (148/2446) (–3.3 percentage points), from 7.1% (209/2963) to 4.7% (114/2446) (–2.4 percentage points), and from 6.0% (178/2963) to 3.8% (92/2446) (–2.2 percentage points), respectively. Error 4 was the primary cause of error for the system trained with 12,004 records, followed by errors 3, 1, and 2. The 3600-record training when performance reached a plateau was used as a reference point. Comparing the variation in the error rates associated with the increase in the training amount from 1200 to 3600 records to the variation in the error rates associated with the increase in the training amount from 3600 to 12,004 records, the variation for errors 1 and 4 decreased by 9 percentage points and 5 percentage points, respectively, while the variation for errors 2 and 3 increased by 24 percentage points and 10 percentage points, respectively, indicating that improvement was stagnant. Therefore, we categorized errors 2 and 3 into subcategories and investigated the difference in the variation in error rates before and after the 3600-record training.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Trends in error rates for each error cause category. A sample study of performance evaluation results was conducted from 1200- to 12,004-record trainings. The errors are classified into 4 cause categories, and trends in error rates for each category are shown. P-N: positive-negative.</p>
          </caption>
          <graphic xlink:href="medinform_v13i1e68863_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Error 2 was further classified into 7 subgroups (<xref ref-type="table" rid="table2">Table 2</xref>). The most common errors in the 1200-record training involved error 2a (the extraction of information that was neither symptoms nor supplemental information related to symptoms). This subcategory encompassed a wide variety of patient expressions, including behaviors, such as “strained in the bathroom,” indications of healthiness, such as “my blood pressure has been quite good,” and medication-related issues unrelated to patient symptoms, such as “the remaining number (of medications) became limited.” The second most common subcategory, error 2b (incomplete extraction without essential terms), involved errors in extracting terms that were not considered symptoms due to the absence of a subject, such as “loose” or “didn’t go well with me.” It also included cases where only supplemental words were extracted, such as “at night” or “very much.” The third most common subcategory, error 2c, involved errors related to the extraction of conditional terms that were annotated or not annotated by researchers depending on the context or were not finally annotated after the researcher was faced with a difficult decision. For example, researchers would annotate the patient expression “I couldn’t eat” as it indicates a symptom of the loss of appetite, while the expression “I couldn’t eat because I was too sleepy” would not be attributed to appetite. In addition, the expression “my stomach went like this” may have been regarded as a symptom because it represented an abnormal condition. However, the researchers finally decided not to annotate it. The next most common error subcategories were errors 2d and 2e (partial matches with low similarity and an incorrect positive-negative classification), which involved either entire or partial extraction of essential terms.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Number of errors and the ratio of errors to total extraction in the subcategories of error 2.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="340"/>
            <col width="220"/>
            <col width="220"/>
            <col width="220"/>
            <thead>
              <tr valign="top">
                <td>Subgroup</td>
                <td>Number of errors in 1200-record training (n=2963), n (%)</td>
                <td>Number of errors in 3600-record training (n=2387), n (%)</td>
                <td>Number of errors in 12,004-record training (n=2446), n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Total errors of category 2</td>
                <td>280 (9.4)</td>
                <td>130 (5.4)</td>
                <td>90 (3.7)</td>
              </tr>
              <tr valign="top">
                <td>Error 2a: Extraction of information that was neither symptoms nor supplemental information</td>
                <td>86 (2.9)</td>
                <td>32 (1.3)</td>
                <td>12 (0.5)</td>
              </tr>
              <tr valign="top">
                <td>Error 2b: Incomplete extraction without essential terms</td>
                <td>76 (2.6)</td>
                <td>27 (1.1)</td>
                <td>16 (0.7)</td>
              </tr>
              <tr valign="top">
                <td>Error 2c: Extraction of conditional terms that were annotated or not by researchers depending on the context or were not finally annotated after the researcher was faced with a difficult decision</td>
                <td>55 (1.9)</td>
                <td>47 (2.0)</td>
                <td>32 (1.3)</td>
              </tr>
              <tr valign="top">
                <td>Error 2d: Partial matches with low similarity and incorrect P-N<sup>a</sup> classifications that involved entire extractions of essential terms</td>
                <td>35 (1.2)</td>
                <td>15 (0.6)</td>
                <td>12 (0.5)</td>
              </tr>
              <tr valign="top">
                <td>Error 2e: Partial matches with low similarity and incorrect P-N classifications that involved partial extractions of essential terms or incomplete extractions without essential terms</td>
                <td>19 (0.6)</td>
                <td>2 (0.08)</td>
                <td>3 (0.1)</td>
              </tr>
              <tr valign="top">
                <td>Error 2f: Symptoms that the researchers forgot to extract</td>
                <td>7 (0.2)</td>
                <td>6 (0.3)</td>
                <td>12 (0.5)</td>
              </tr>
              <tr valign="top">
                <td>Error 2g: Partial matches with low similarity involving entire extractions of symptoms for which the researchers made incorrect P-N classifications</td>
                <td>2 (0.1)</td>
                <td>1 (0.04)</td>
                <td>3 (0.1)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>P-N: positive-negative.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>From 1200- to 12,004-record trainings, significant improvements were observed in the following order: “extraction of information that was neither symptoms nor supplemental information (error 2a),” “incomplete extraction without essential terms (error 2b),” and “partial matches with low similarity and incorrect positive-negative classifications that involved entire extractions of essential terms (error 2d).” In contrast, “symptoms that the researchers forgot to extract (error 2f)” and “partial matches with low similarity involving entire extractions of symptoms for which the researchers made incorrect positive-negative classifications (error 2g)” increased from 0.2% (7/2963) to 0.5% (12/2446) and from 0.1% (2/2963) to 0.1% (3/2446), respectively. However, other subgroups showed overall improvements.</p>
        <p>The differences in the variation of each subcategory before and after the 3600 cases were large in the following order: “partial matches with low similarity involving entire extractions of symptoms for which the researchers made incorrect positive-negative classifications (error 2g)” (231 percentage points), “partial matches with low similarity and incorrect positive-negative classifications that involved partial extractions of essential terms or incomplete extractions without essential terms (error 2e)” (133 percentage points), “symptoms that the researchers forgot to extract (error 2f)” (89 percentage points), “partial matches with low similarity and incorrect positive-negative classifications that involved entire extractions of essential terms (error 2d)” (25 percentage points), “incomplete extraction without essential terms (error 2b)” (14 percentage points), “extraction of information that was neither symptoms nor supplemental information (error 2a)” (–10 percentage points), and “extraction of conditional terms that were annotated or not by researchers depending on the context or were not finally annotated after the researcher was faced with a difficult decision (error 2c)” (–40 percentage points). A larger difference in the variation indicates a relative stagnation in the improvement of errors on increasing the training amount beyond 3600 records.</p>
        <p>Error 3 was classified into 5 subcategories (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). The top 3 subcategories with the largest differences in variation before and after the 3600-record training in each subcategory were as follows: “determined from positive expressions (error 3b)” (125 percentage points), “determined from negative expressions (error 3c)” (64 percentage points), and “determined from other information (error 3a)” (–8.4 percentage points).</p>
        <p>The contents of errors 3b and 3c, which showed particularly large differences in variation rates, were classified into further subcategories based on 2 points: the position of the positive-negative expression and the way in which the positive-negative expression was stated (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> and <xref ref-type="supplementary-material" rid="app4">4</xref>). Based on the method focusing on the position and the way of positive-negative expressions, we classified them into 5 and 6 subcategories, respectively. The differences in the variation before and after the 3600-record training in the subcategories focusing on the position of positive-negative expressions were large in this order: “more than three words are in between from the extracted terms (error 3bc-1-4)” (369 percentage points), “determination with reference to the positive-negative classification of another extracted term in the same sentence (error 3bc-1-5)” (200 percentage points), “included within the extracted terms (error 3bc-1-2)” (195 percentage points), “one or two words are in between from the extracted terms (error 3bc-1-3)” (90 percentage points), and “immediately before or/and after the symptom (error 3bc-1-1)” (60 percentage points).</p>
        <p>The differences in the variation before and after the 3600-record training in the subcategories focusing on the way of positive-negative expressions were large in this order: “determination with reference to the positive-negative classification of another extracted term in the same sentence (error 3bc-2-5)” (200 percentage points), “reversal of positive-negative classification by partial match (error 3bc-2-6)” (200 percentage points), “included within the extracted terms (error 3bc-2-2)” (157 percentage points), “mild negative expressions (error 3bc-2-3)” (119 percentage points), “simple positive or negative expressions (error 3bc-2-1)” (76 percentage points), and “negated positive or negative expressions (error 3bc-2-4)” (–15 percentage points).</p>
        <p>“Error 3bc-1-5 (3bc-2-5): determination with reference to the positive-negative classification of another extracted term in the same sentence” and “error 3bc-2-6: reversal of positive-negative classification by partial match” had an error rate of 0% at the 3600-record training. Therefore, the difference in variability was large, as only one or two errors at the 12,004-record training resulted in 100% variation.</p>
      </sec>
      <sec>
        <title>Evaluation of Case Reports</title>
        <p><xref rid="figure6" ref-type="fig">Figure 6</xref> shows the improvement in system performance for the handling of the text data of case reports with an increase in the number of training datasets. The <italic>F</italic><sub>1</sub>-score increased from 0.34 to 0.41 with an increase in the training dataset and saturated at 3600 records of the training dataset. The differences between the <italic>F</italic><sub>1</sub>-scores for case reports and those for pharmaceutical care records were 0.33 and 0.41 at 1200 and 12,004 training dataset records, respectively. The differences between the <italic>F</italic><sub>1</sub>-scores for exact matches and those including partial matches were larger than those at cross-validation. In the performance evaluation for case reports, the <italic>F</italic><sub>1</sub>-scores for partial matches were always 0.04-0.05 higher than those for exact matches (<xref rid="figure7" ref-type="fig">Figure 7</xref>).</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Performance of the case report analysis (only exact matches). Case reports were analyzed using systems trained with between 1200 and 12,004 pharmaceutical care records. Trends in the mean precision, recall, and F1-score values, which included only exact matches as matches, are shown.</p>
          </caption>
          <graphic xlink:href="medinform_v13i1e68863_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>F1-scores of the case report analysis (only exact matches/including partial matches). Case reports were analyzed using systems trained with between 1200 and 12,004 pharmaceutical care records. Trends in the mean F1-scores in which only exact matches were considered matches and in the mean F1-scores in which partial matches were included as matches are shown.</p>
          </caption>
          <graphic xlink:href="medinform_v13i1e68863_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We developed a disease-name extraction system targeting subjective data in Japanese pharmaceutical care records by fine-tuning BERT-CRF. The performance of the system, evaluated by exact matches with a correct positive-negative classification, improved as the amount of training data increased, even though the system targeted subjective data, which has not been adequately addressed before. A similar trend was observed when evaluated by matches including partial matches, with a similarity score of 0.66 or higher. Although there was a certain number of partial match extractions, the extraction performance with partial matches was close to that for exact matches. This could be attributed to the strict criterion for similarity used in this study. The system performance increased slowly and reached a plateau at the 3600-record training. The results were presumably influenced by a combination of factors, including the deep learning model used (BERT-CRF), the complex annotation criteria, and human error in the annotation, which cannot be determined from this study. To resolve this point and further improve system performance, the use of deep learning models other than BERT-CRF should be considered. In addition, system performance will vary depending on the content of the training data. Therefore, when other training data are used to develop the system, it is difficult to say whether the system performance will reach a plateau with the same training amount in this study. However, by utilizing the characteristics of a gradual curve in performance transition, efficient training can be achieved by increasing the amount of training data step by step from a small amount and using an approximate formula to predict the training amount when performance reaches a plateau.</p>
        <p>The analytical performance of the system was lower for case reports than for pharmaceutical care records. When records from different sources were used as training data, the improvement in system performance with increasing training data was inferior to that observed when training with records from the same source type. In addition, the analytical performance gap between text from the same type of training data and that from different types of data persisted, regardless of the amount of training data. In other words, the developed system was not well-suited for analyzing case reports but proved especially useful for analyzing patient narratives when trained on subjective data from pharmaceutical care records. This indicates that separate system development is necessary to analyze different types of records. The difference in performance between the case report analysis and the pharmaceutical care record analysis is assumed to be largely due to the existence of raw patient statements. However, another important feature is the different occupations of the health care professionals who create the records. Physicians create case reports covering the patients’ conditions comprehensively and summarizing the patients’ backgrounds and disease processes in their own words, while pharmacists mainly monitor drug efficacy and side effects and create pharmaceutical care records. Therefore, pharmacists naturally create SOAP format records that contain a lot of information about adverse events. We would like to emphasize again that our system is likely to be suitable for the collection of adverse event information since the system learned from the subjective data in pharmaceutical care records. Furthermore, since the pharmaceutical care records were written in SOAP format, we were able to selectively collect subjective data and use them for the training data in this study. As a result, a system specialized for subjective data was efficiently developed. This suggests that SOAP-format data may be a useful source for deep learning.</p>
        <p>Error analysis revealed that the reduction in error 2 with increased training data was 5.7 percentage points (from 9.4% to 3.7%), a larger decrease than that observed for any other error type. This reduction in false-positive extractions significantly contributed to the improvement in precision. Although we prioritized recall over precision during system development to avoid missing symptoms, the increase in training data led to higher precision without compromising recall. Therefore, we believe that increasing the training data helped to develop a more user-friendly system with fewer false extractions.</p>
      </sec>
      <sec>
        <title>Error Analysis</title>
        <p>While errors in most subgroups of error 2 decreased, there were increases in error 2f (symptoms that the researchers forgot to extract) and error 2g (partial matches with low similarity involving entire extractions of symptoms for which the researchers made incorrect positive-negative classifications). Researchers may be more prone to making annotation mistakes as the amount of data to be prepared increases and as they encounter a wider variety of expressions. However, the percentage of these errors in total extractions was only 0.3% (9/2963) for the 1200-record training and 0.6% (15/2446) for the 12,004-record training, and thus, their impacts on overall system performance were small. Additionally, the system showed improvements in other types of false positives, indicating that increasing the training data can help to reduce a wide variety of false-positive errors.</p>
        <p>Next, we looked at the variation in error rates for error 2. Among errors 2g, 2e, 2f, and 2d, which showed large differences in error rate variation, “partial matches with low similarity involving entire extractions of symptoms for which the researchers made incorrect positive-negative classifications (error 2g)” and “partial matches with low similarity and incorrect positive-negative classifications that involved partial extractions of essential terms or incomplete extractions without essential terms (error 2e)” were almost eliminated at the 3600-record training. Errors related to “symptoms that the researchers forgot to extract (error 2f)” and “partial matches with low similarity and incorrect positive-negative classifications that involved entire extractions of essential terms (error 2d)” were caused by researchers’ mistakes or errors in the extraction range or positive-negative classification, but the system was able to partially recognize the information to be extracted. Therefore, these errors may have been difficult to resolve.</p>
        <p>Among errors 2b, 2a, and 2c, which showed limited differences in error rate variation, the extracted terms or pre/post descriptions of “extraction of information that was neither symptoms nor supplemental information (error 2a)” and “incomplete extraction without essential terms (error 2b)” were different from the real extraction target. Concerning the system’s context-readable specification, the system appears to have learned that the contexts before and after the incorrect extractions were not symptom-related contexts, reducing errors 2a and 2b, as it learned from large amounts of training data. “Error 2c” error rates increased at the 3600-record training. This finding indicates that error 2c improved less than error 2a and 2b when comparing the 1200 and 12,004-record trainings, although the difference in variation was small. Error 2c is more difficult to improve than errors 2a and 2b, presumably because error 2c contains expressions that researchers are also unsure about and that may be extracted depending on the situation.</p>
        <p>Among the subcategories of error 3 focusing on the position of positive-negative expressions, “included within the extracted terms (error 3bc-1-2)” (195 percentage points) had a larger difference in error rate variation than “one or two words are in between from the extracted terms (error 3bc-1-3)” (90 percentage points) and “immediately before or/and after the symptom (error 3bc-1-1)” (60 percentage points). This suggests that the system may be able to perform correct positive-negative classification when the extracted terms and the positive-negative expressions are close to each other but not contained within the extracted terms.</p>
        <p>Among the subcategories of error 3 focusing on the way of positive-negative expressions, simple sentences were considered easy to judge: “simple positive or negative expressions (error 3bc-2-1)” (76 percentage points) and “negated positive or negative expressions (error 3bc-2-4)” (–15 percentage points). In “mild negative expressions (error 3bc-2-3)” (119 percentage points), on the other hand, the researchers focused on the description of the degree of denial to determine whether the symptoms were completely denied. However, due to the variety of degree descriptions, this annotation criteria could have been cumbersome for the system.</p>
        <p>Through this error analysis, it was inferred that while the system can easily improve the extraction results with reference to the context near the extracted terms due to increased learning, the system cannot handle cases where the researchers are unsure of the decision or complex annotation criteria in which the extraction results change depending on the situation.</p>
      </sec>
      <sec>
        <title>Evaluation</title>
        <p>The system developed in this study performed better than the systems in previous studies focused on the Japanese language. In a previous study of Japanese patient narratives, Usui et al [<xref ref-type="bibr" rid="ref19">19</xref>] aimed to thoroughly extract patient complaints from subjective data using a rule-based system. The high performance achieved by our deep learning–based system suggests that our system had considerable flexibility in handling data where symptoms could not be extracted through rules alone. As a result, the performance of our system, with an <italic>F</italic><sub>1</sub>-score of 0.82, surpassed that of the system by Usui et al [<xref ref-type="bibr" rid="ref19">19</xref>] (<italic>F</italic><sub>1</sub>-score of 0.65), although it should be noted that the output functions of the 2 systems were not exactly the same, complicating a direct comparison. Similarly, Aramaki et al [<xref ref-type="bibr" rid="ref26">26</xref>] used Japanese case reports as training data to develop a system for extracting a variety of symptoms. The <italic>F</italic><sub>1</sub>-scores of their system were 0.87 for NER and 0.63 for NER with positive-negative classification [<xref ref-type="bibr" rid="ref26">26</xref>], both of which were lower than the scores achieved by our system. Although many previous studies of NER have focused on specific symptoms or patient populations, our system has the advantage of covering a broader range of patients and extracting a wider variety of symptoms. This makes it particularly useful for comprehensively collecting information on patient narratives.</p>
        <p>The performance of the developed system is the same or even higher than that of similar systems in English, which aimed to extract a variety of symptomatic adverse drug events from patient narratives. For example, the neural network–based system developed by Nikfarjam et al [<xref ref-type="bibr" rid="ref7">7</xref>] achieved a micro-average <italic>F</italic><sub>1</sub>-score of 0.74 in NER for drug adverse events in patient posts on social health networks. Another system by Batbaatar et al [<xref ref-type="bibr" rid="ref8">8</xref>] achieved an <italic>F</italic><sub>1</sub>-score of 0.82 for NER of diseases or syndromes and an <italic>F</italic><sub>1</sub>-score of 0.88 for signs and symptoms from social media posts. Of these 2 systems, one extracted only the adverse events experienced by the patient, while the other did not involve positive-negative classification. Therefore, our system, despite its added complexity with positive-negative classification, appears to have comparable or even superior performance compared with the aforementioned systems.</p>
      </sec>
      <sec>
        <title>Future Applications</title>
        <p>The developed system is expected to be applied to monitor adverse events based on patient reports and narratives. Among previous studies using NER systems, the study by Ujiie et al [<xref ref-type="bibr" rid="ref27">27</xref>] aimed to reduce the screening burden of drug safety information for pharmaceutical companies by identifying medical articles that contained descriptions of adverse drug events. Similarly, the system developed by Nishioka et al [<xref ref-type="bibr" rid="ref28">28</xref>] sought to detect a specific adverse reaction—hand-foot syndrome—by identifying blog posts from patients with breast cancer, analyzing each text and post to determine whether it involved the adverse reaction. However, they did not use an NER approach. Given that our system has the advantage of extracting a wide variety of symptoms from diverse patient expressions, it is better positioned for broader use in monitoring adverse events across a wider range of conditions.</p>
        <p>Although we focused on patient narratives, assessments recorded by health care professionals are undoubtedly another important source of patient information. Specifically, patient summaries, such as discharge and transfer summaries, contain information that has been reviewed, selected, and organized by health care professionals. However, evaluations by health care professionals carry the risk of underestimation and underreporting [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. When using any medical records for research, including pharmaceutical care records, there is a concern about the filtering of information by health care professionals. However, we succeeded in establishing a system for analyzing patient narratives. The next step is to target patient narratives automatically transcribed by voice input so that the analysis of patient narratives can be achieved with fewer omissions in the recordings. By comparing patient narratives collected by either method with professional assessments using a system, such as ours, the gap between patients and health professionals can be identified. The developed system could contribute to achieving gap-free assessment and care between patients and health care professionals through the identification and analysis of the gap.</p>
      </sec>
      <sec>
        <title>Limitations of This Study</title>
        <p>One limitation of this study is that we used data from a single facility. Therefore, further verification is needed to determine whether this system can be used for subjective data from other facilities. Since subjective data include raw patient speech, differences in dialect and the patient’s disease and treatment are assumed to be factors that cause differences in the content of the descriptions between facilities. Therefore, while this study used data from an acute care hospital in Tokyo, the system may perform differently for data from chronic care hospitals, community pharmacies, and medical facilities in areas where dialects are spoken. Validation is expected to be conducted in the future using data from medical facilities with the same features and different features to reveal the versatility of the system.</p>
        <p>Another issue is the evaluation of the validity of the system in clinical settings. For example, <italic>F</italic><sub>1</sub>-score, the evaluation index used in this study, provides a relative evaluation and can be used to compare systems under the same conditions. Since there is no standard value for judging practicality, the results cannot be used to judge the applicability in actual clinical settings. However, since different applications require different performances, the system is potentially usable in a real clinical setting even if it does not have 100% performance. For example, signal detection is more important to capture overall trends than to overlook individual cases, so the system is useful for processing large amounts of data. Another limitation is the lack of standardization for extracted terms, which could be addressed by developing additional databases or systems.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This is the first study to develop an NER system to extract disease and symptom names from subjective data in Japanese hospital pharmaceutical care records and to investigate how its performance improves with increasing training data. The developed system demonstrated high performance in extracting disease and symptom names when trained on a large dataset, although performance improved gradually as the training amount increased, reaching a plateau after training on approximately 3600 records.</p>
        <p>The system enables the monitoring of various symptoms using patient narratives as a source and is expected to be a valuable tool for supporting both clinical practice and research.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Results of the sample survey for the determination of the Levenshtein distance similarity threshold.</p>
        <media xlink:href="medinform_v13i1e68863_app1.docx" xlink:title="DOCX File , 43 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Number of errors and the ratio of errors to total extraction in the subcategories of error 3.</p>
        <media xlink:href="medinform_v13i1e68863_app2.docx" xlink:title="DOCX File , 21 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Number of errors and the ratio of errors to total extraction in the subcategories of error 3b and 3c focused on the position of positive-negative expressions.</p>
        <media xlink:href="medinform_v13i1e68863_app3.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Number of errors and the ratio of errors to total extraction in the subcategories of error 3b and 3c focused on the way of positive-negative expressions.</p>
        <media xlink:href="medinform_v13i1e68863_app4.docx" xlink:title="DOCX File , 21 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CRF</term>
          <def>
            <p>conditional random field</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">NER</term>
          <def>
            <p>named entity recognition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">UNK</term>
          <def>
            <p>unknown keys</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was supported by JST SPRING (grant number: JPMJSP2123).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Di Maio</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gallo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Leighl</surname>
              <given-names>NB</given-names>
            </name>
            <name name-style="western">
              <surname>Piccirillo</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Daniele</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nuzzo</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Gridelli</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gebbia</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ciardiello</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>De Placido</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ceribelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Favaretto</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>de Matteis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Feld</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Butts</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bryce</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Signoriello</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Morabito</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rocco</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Perrone</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Symptomatic toxicities experienced during anticancer treatment: agreement between patient and physician reporting in three randomized trials</article-title>
          <source>J Clin Oncol</source>
          <year>2015</year>
          <month>03</month>
          <day>10</day>
          <volume>33</volume>
          <issue>8</issue>
          <fpage>910</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://hdl.handle.net/2318/1572016"/>
          </comment>
          <pub-id pub-id-type="doi">10.1200/JCO.2014.57.9334</pub-id>
          <pub-id pub-id-type="medline">25624439</pub-id>
          <pub-id pub-id-type="pii">JCO.2014.57.9334</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Novello</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Capelletto</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cortinovis</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tiseo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Galetta</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Valmadre</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Casartelli</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rapetti</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rossi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Italian multicenter survey to evaluate the opinion of patients and their reference clinicians on the "tolerance" to targeted therapies already available for non-small cell lung cancer treatment in daily clinical practice</article-title>
          <source>Transl Lung Cancer Res</source>
          <year>2014</year>
          <month>06</month>
          <volume>3</volume>
          <issue>3</issue>
          <fpage>173</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://hdl.handle.net/2318/1588384"/>
          </comment>
          <pub-id pub-id-type="doi">10.3978/j.issn.2218-6751.2014.06.10</pub-id>
          <pub-id pub-id-type="medline">25806297</pub-id>
          <pub-id pub-id-type="pii">tlcr-03-03-173</pub-id>
          <pub-id pub-id-type="pmcid">PMC4367690</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ohta</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Miki</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Kimura</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Abe</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sakuma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Koike</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Morimoto</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology of adverse events and medical errors in the care of cardiology patients</article-title>
          <source>J Patient Saf</source>
          <year>2019</year>
          <month>09</month>
          <volume>15</volume>
          <issue>3</issue>
          <fpage>251</fpage>
          <lpage>256</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27465298"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/PTS.0000000000000291</pub-id>
          <pub-id pub-id-type="medline">27465298</pub-id>
          <pub-id pub-id-type="pmcid">PMC6727907</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davies</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Measuring health-related quality of life in cancer patients</article-title>
          <source>Nurs Stand</source>
          <year>2009</year>
          <month>04</month>
          <day>02</day>
          <volume>23</volume>
          <issue>30</issue>
          <fpage>42</fpage>
          <lpage>49</lpage>
          <pub-id pub-id-type="doi">10.7748/ns2009.04.23.30.42.c6930</pub-id>
          <pub-id pub-id-type="medline">19408494</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>YJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sim</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Surveillance of Symptom Burden Using the Patient-Reported Outcome Version of the Common Terminology Criteria for Adverse Events in Patients With Various Types of Cancers During Chemoradiation Therapy: Real-World Study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2023</year>
          <month>03</month>
          <day>08</day>
          <volume>9</volume>
          <fpage>e44105</fpage>
          <pub-id pub-id-type="doi">10.2196/44105</pub-id>
          <pub-id pub-id-type="medline">36884274</pub-id>
          <pub-id pub-id-type="pii">v9i1e44105</pub-id>
          <pub-id pub-id-type="pmcid">PMC10034615</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dolatabadi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Moyano</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bales</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Spasojevic</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bhambhoria</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bhatti</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Debnath</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hoell</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Leng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Nanda</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Saab</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sahak</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sie</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Uppal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vadlamudi</surname>
              <given-names>NK</given-names>
            </name>
            <name name-style="western">
              <surname>Vladimirova</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yakimovich</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kocak</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Cheung</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Using social media to help understand patient-reported health outcomes of post-COVID-19 condition: natural language processing approach</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>09</month>
          <day>19</day>
          <volume>25</volume>
          <fpage>e45767</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e45767/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/45767</pub-id>
          <pub-id pub-id-type="medline">37725432</pub-id>
          <pub-id pub-id-type="pii">v25i1e45767</pub-id>
          <pub-id pub-id-type="pmcid">PMC10510753</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nikfarjam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ransohoff</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Callahan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Loew</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kwong</surname>
              <given-names>BY</given-names>
            </name>
            <name name-style="western">
              <surname>Sarin</surname>
              <given-names>KY</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>Early detection of adverse drug reactions in social health networks: a natural language processing pipeline for signal detection</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2019</year>
          <month>06</month>
          <day>03</day>
          <volume>5</volume>
          <issue>2</issue>
          <fpage>e11264</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2019/2/e11264/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/11264</pub-id>
          <pub-id pub-id-type="medline">31162134</pub-id>
          <pub-id pub-id-type="pii">v5i2e11264</pub-id>
          <pub-id pub-id-type="pmcid">PMC6684218</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Batbaatar</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ryu</surname>
              <given-names>KH</given-names>
            </name>
          </person-group>
          <article-title>Ontology-based healthcare named entity recognition from twitter messages using a recurrent neural network approach</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2019</year>
          <month>09</month>
          <day>27</day>
          <volume>16</volume>
          <issue>19</issue>
          <fpage>3628</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph16193628"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph16193628</pub-id>
          <pub-id pub-id-type="medline">31569654</pub-id>
          <pub-id pub-id-type="pii">ijerph16193628</pub-id>
          <pub-id pub-id-type="pmcid">PMC6801946</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jagannatha</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Overview of the first natural language processing challenge for extracting medication, indication, and adverse drug events from electronic health record notes (MADE 1.0)</article-title>
          <source>Drug Saf</source>
          <year>2019</year>
          <month>01</month>
          <volume>42</volume>
          <issue>1</issue>
          <fpage>99</fpage>
          <lpage>111</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30649735"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40264-018-0762-z</pub-id>
          <pub-id pub-id-type="medline">30649735</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40264-018-0762-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC6860017</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yada</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nakamura</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wakamiya</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aramaki</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Real-MedNLP: overview of REAL document-based MEDical Natural Language Processing Task</article-title>
          <source>\Proceedings of the 16th NTCIR Conference on Evaluation of Information Access Technologies</source>
          <year>2022</year>
          <conf-name>16th NTCIR Conference on Evaluation of Information Access Technologies</conf-name>
          <conf-date>June 14-17, 2022</conf-date>
          <conf-loc>Tokyo, Japan</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://research.nii.ac.jp/ntcir/workshop/OnlineProceedings16/pdf/ntcir/01-NTCIR16-OV-MEDNLP-YadaS.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Henry</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Buchan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Filannino</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stubbs</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>2018 n2c2 shared task on adverse drug events and medication extraction in electronic health records</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>01</month>
          <day>01</day>
          <volume>27</volume>
          <issue>1</issue>
          <fpage>3</fpage>
          <lpage>12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31584655"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocz166</pub-id>
          <pub-id pub-id-type="medline">31584655</pub-id>
          <pub-id pub-id-type="pii">5581277</pub-id>
          <pub-id pub-id-type="pmcid">PMC7489085</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dirkson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Verberne</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>van Oortmerssen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gelderblom</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kraaij</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>How do others cope? Extracting coping strategies for adverse drug events from social media</article-title>
          <source>J Biomed Inform</source>
          <year>2023</year>
          <month>03</month>
          <volume>139</volume>
          <fpage>104228</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(22)00233-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2022.104228</pub-id>
          <pub-id pub-id-type="medline">36309197</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(22)00233-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dreyfus</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhary</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bhardwaj</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Shree</surname>
              <given-names>VK</given-names>
            </name>
          </person-group>
          <article-title>Application of natural language processing techniques to identify off-label drug usage from various online health communities</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>09</month>
          <day>18</day>
          <volume>28</volume>
          <issue>10</issue>
          <fpage>2147</fpage>
          <lpage>2154</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34333625"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab124</pub-id>
          <pub-id pub-id-type="medline">34333625</pub-id>
          <pub-id pub-id-type="pii">6333355</pub-id>
          <pub-id pub-id-type="pmcid">PMC8449611</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raza</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lakamana</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A framework for multi-faceted content analysis of social media chatter regarding non-medical use of prescription medications</article-title>
          <source>BMC Digit Health</source>
          <year>2023</year>
          <month>08</month>
          <day>07</day>
          <volume>1</volume>
          <fpage>29</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37680768"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s44247-023-00029-w</pub-id>
          <pub-id pub-id-type="medline">37680768</pub-id>
          <pub-id pub-id-type="pii">29</pub-id>
          <pub-id pub-id-type="pmcid">PMC10483682</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Magge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tutubalina</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Miftahutdinov</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Alimova</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Dirkson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Verberne</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>DeepADEMiner: a deep learning pharmacovigilance pipeline for extraction and normalization of adverse drug event mentions on Twitter</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>09</month>
          <day>18</day>
          <volume>28</volume>
          <issue>10</issue>
          <fpage>2184</fpage>
          <lpage>2192</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34270701"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab114</pub-id>
          <pub-id pub-id-type="medline">34270701</pub-id>
          <pub-id pub-id-type="pii">6322900</pub-id>
          <pub-id pub-id-type="pmcid">PMC8449608</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Whitfield</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hauser</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Reynolds</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Anwar</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Monitoring COVID-19 pandemic through the lens of social media using natural language processing and machine learning</article-title>
          <source>Health Inf Sci Syst</source>
          <year>2021</year>
          <month>12</month>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>25</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34188896"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13755-021-00158-4</pub-id>
          <pub-id pub-id-type="medline">34188896</pub-id>
          <pub-id pub-id-type="pii">158</pub-id>
          <pub-id pub-id-type="pmcid">PMC8226148</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Park</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Consumers' Use of UMLS Concepts on Social Media: Diabetes-Related Textual Data Analysis in Blog and Social Q&amp;A Sites</article-title>
          <source>JMIR Med Inform</source>
          <year>2016</year>
          <month>11</month>
          <day>24</day>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>e41</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2016/4/e41/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.5748</pub-id>
          <pub-id pub-id-type="medline">27884812</pub-id>
          <pub-id pub-id-type="pii">v4i4e41</pub-id>
          <pub-id pub-id-type="pmcid">PMC5146325</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Preiss</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Baumgartner</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Edlund</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bobashev</surname>
              <given-names>GV</given-names>
            </name>
          </person-group>
          <article-title>Using Named Entity Recognition to Identify Substances Used in the Self-medication of Opioid Withdrawal: Natural Language Processing Study of Reddit Data</article-title>
          <source>JMIR Form Res</source>
          <year>2022</year>
          <month>03</month>
          <day>30</day>
          <volume>6</volume>
          <issue>3</issue>
          <fpage>e33919</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2022/3/e33919/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/33919</pub-id>
          <pub-id pub-id-type="medline">35353047</pub-id>
          <pub-id pub-id-type="pii">v6i3e33919</pub-id>
          <pub-id pub-id-type="pmcid">PMC9008522</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Usui</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Aramaki</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Iwao</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wakamiya</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sakamoto</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mochizuki</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Extraction and Standardization of Patient Complaints from Electronic Medication Histories for Pharmacovigilance: Natural Language Processing Analysis in Japanese</article-title>
          <source>JMIR Med Inform</source>
          <year>2018</year>
          <month>09</month>
          <day>27</day>
          <volume>6</volume>
          <issue>3</issue>
          <fpage>e11021</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2018/3/e11021/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/11021</pub-id>
          <pub-id pub-id-type="medline">30262450</pub-id>
          <pub-id pub-id-type="pii">v6i3e11021</pub-id>
          <pub-id pub-id-type="pmcid">PMC6231790</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</source>
          <year>2019</year>
          <conf-name>2019 Conference of the North American Chapter of the Association for Computational Linguistics</conf-name>
          <conf-date>June 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yue</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Entity extraction of electrical equipment malfunction text by a hybrid natural language processing algorithm</article-title>
          <source>IEEE Access</source>
          <year>2021</year>
          <volume>9</volume>
          <fpage>40216</fpage>
          <lpage>40226</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2021.3063354</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sugimoto</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wada</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Konishi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Okada</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Manabe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Matsumura</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Takeda</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Extracting clinical information from japanese radiology reports using a 2-stage deep learning approach: algorithm development and validation</article-title>
          <source>JMIR Med Inform</source>
          <year>2023</year>
          <month>11</month>
          <day>14</day>
          <volume>11</volume>
          <fpage>e49041</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2023//e49041/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/49041</pub-id>
          <pub-id pub-id-type="medline">37991979</pub-id>
          <pub-id pub-id-type="pii">v11i1e49041</pub-id>
          <pub-id pub-id-type="pmcid">PMC10686535</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ohno</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kato</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ishikawa</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nishiyama</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Isawa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mochizuki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Aramaki</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Aomori</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Using the natural language processing system medical named entity recognition-Japanese to analyze pharmaceutical care records: natural language processing analysis</article-title>
          <source>JMIR Form Res</source>
          <year>2024</year>
          <month>06</month>
          <day>04</day>
          <volume>8</volume>
          <fpage>e55798</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2024//e55798/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/55798</pub-id>
          <pub-id pub-id-type="medline">38833694</pub-id>
          <pub-id pub-id-type="pii">v8i1e55798</pub-id>
          <pub-id pub-id-type="pmcid">PMC11185902</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <article-title>BertJapanese</article-title>
          <source>Hugging Face</source>
          <access-date>2025-02-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://huggingface.co/docs/transformers/ja/model_doc/bert-japanese">https://huggingface.co/docs/transformers/ja/model_doc/bert-japanese</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Levenshtein</surname>
              <given-names>VI</given-names>
            </name>
          </person-group>
          <article-title>Binary codes capable of correcting deletions, insertions and reversals</article-title>
          <source>Soviet Physics Doklady</source>
          <year>1966</year>
          <month>2</month>
          <volume>10</volume>
          <issue>8</issue>
          <fpage>707</fpage>
          <lpage>710</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://nymity.ch/sybilhunting/pdf/Levenshtein1966a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aramaki</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Yano</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wakamiya</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>MedEx/J: a one-scan simple and fast NLP tool for japanese clinical texts</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2017</year>
          <volume>245</volume>
          <fpage>285</fpage>
          <lpage>288</lpage>
          <pub-id pub-id-type="medline">29295100</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ujiie</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yada</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wakamiya</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aramaki</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Identification of adverse drug event-related japanese articles: natural language processing analysis</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>11</month>
          <day>27</day>
          <volume>8</volume>
          <issue>11</issue>
          <fpage>e22661</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/11/e22661/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/22661</pub-id>
          <pub-id pub-id-type="medline">33245290</pub-id>
          <pub-id pub-id-type="pii">v8i11e22661</pub-id>
          <pub-id pub-id-type="pmcid">PMC7732716</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nishioka</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Asano</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yada</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aramaki</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Yajima</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kizaki</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hori</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Detection of adverse event signals with severity grade classification from cancer patient narrative</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2024</year>
          <month>01</month>
          <day>25</day>
          <volume>310</volume>
          <fpage>554</fpage>
          <lpage>558</lpage>
          <pub-id pub-id-type="doi">10.3233/SHTI231026</pub-id>
          <pub-id pub-id-type="medline">38269870</pub-id>
          <pub-id pub-id-type="pii">SHTI231026</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
