<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i4e35606</article-id>
      <article-id pub-id-type="pmid">35451969</article-id>
      <article-id pub-id-type="doi">10.2196/35606</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Multi-Label Classification in Patient-Doctor Dialogues With the RoBERTa-WWM-ext + CNN (Robustly Optimized Bidirectional Encoder Representations From Transformers Pretraining Approach With Whole Word Masking Extended Combining a Convolutional Neural Network) Model: Named Entity Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Elbattah</surname>
            <given-names>Mahmoud</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Monday</surname>
            <given-names>Happy</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>Yuanyuan</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7894-865X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Gao</surname>
            <given-names>Dongping</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Institute of Medical Information</institution>
            <institution>Chinese Academy of Medical Sciences</institution>
            <institution>Peking Union Medical College</institution>
            <addr-line>No 3 Yabao Road</addr-line>
            <addr-line>Chaoyang District</addr-line>
            <addr-line>Beijing, 100020</addr-line>
            <country>China</country>
            <phone>86 10 5232 8720</phone>
            <email>gaodp_gaodp@126.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8699-8195</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>Xifeng</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3446-6741</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Meiting</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1555-7728</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Nan</surname>
            <given-names>Jiale</given-names>
          </name>
          <degrees>BA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0577-4510</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Weining</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2326-9400</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Institute of Medical Information</institution>
        <institution>Chinese Academy of Medical Sciences</institution>
        <institution>Peking Union Medical College</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Internal Medicine</institution>
        <institution>Chinese Academy of Medical Sciences</institution>
        <institution>Peking Union Medical College Hospital</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Dongping Gao <email>gaodp_gaodp@126.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>4</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>21</day>
        <month>4</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>4</issue>
      <elocation-id>e35606</elocation-id>
      <history>
        <date date-type="received">
          <day>10</day>
          <month>12</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>13</day>
          <month>2</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>24</day>
          <month>2</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>25</day>
          <month>2</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Yuanyuan Sun, Dongping Gao, Xifeng Shen, Meiting Li, Jiale Nan, Weining Zhang. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 21.04.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/4/e35606" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>With the prevalence of online consultation, many patient-doctor dialogues have accumulated, which, in an authentic language environment, are of significant value to the research and development of intelligent question answering and automated triage in recent natural language processing studies.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The purpose of this study was to design a front-end task module for the network inquiry of intelligent medical services. Through the study of automatic labeling of real doctor-patient dialogue text on the internet, a method of identifying the negative and positive entities of dialogues with higher accuracy has been explored.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The data set used for this study was from the Spring Rain Doctor internet online consultation, which was downloaded from the official data set of Alibaba Tianchi Lab. We proposed a composite abutting joint model, which was able to automatically classify the types of clinical finding entities into the following 4 attributes: positive, negative, other, and empty. We adapted a downstream architecture in Chinese Robustly Optimized Bidirectional Encoder Representations from Transformers Pretraining Approach (RoBERTa) with whole word masking (WWM) extended (RoBERTa-WWM-ext) combining a text convolutional neural network (CNN). We used RoBERTa-WWM-ext to express sentence semantics as a text vector and then extracted the local features of the sentence through the CNN, which was our new fusion model. To verify its knowledge learning ability, we chose Enhanced Representation through Knowledge Integration (ERNIE), original Bidirectional Encoder Representations from Transformers (BERT), and Chinese BERT with WWM to perform the same task, and then compared the results. Precision, recall, and macro-F1 were used to evaluate the performance of the methods.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We found that the ERNIE model, which was trained with a large Chinese corpus, had a total score (macro-F1) of 65.78290014, while BERT and BERT-WWM had scores of 53.18247117 and 69.2795315, respectively. Our composite abutting joint model (RoBERTa-WWM-ext + CNN) had a macro-F1 value of 70.55936311, showing that our model outperformed the other models in the task.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The accuracy of the original model can be greatly improved by giving priority to WWM and replacing the word-based mask with unit to classify and label medical entities. Better results can be obtained by effectively optimizing the downstream tasks of the model and the integration of multiple models later on. The study findings contribute to the translation of online consultation information into machine-readable information.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>online consultation</kwd>
        <kwd>named entity</kwd>
        <kwd>automatic classification</kwd>
        <kwd>ERNIE</kwd>
        <kwd>Enhanced Representation through Knowledge Integration</kwd>
        <kwd>BERT</kwd>
        <kwd>Bidirectional Encoder Representations from Transformers</kwd>
        <kwd>machine learning</kwd>
        <kwd>neural network</kwd>
        <kwd>model</kwd>
        <kwd>China</kwd>
        <kwd>Chinese</kwd>
        <kwd>classification</kwd>
        <kwd>patient-physician dialogue</kwd>
        <kwd>patient doctor dialogue</kwd>
        <kwd>semantics</kwd>
        <kwd>natural language processing</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Internet hospitals in China are in high demand due to limited and unevenly distributed health care resources, lack of family physicians, increasing burden of chronic diseases, and rapid growth of the aging population [<xref ref-type="bibr" rid="ref1">1</xref>]. Gong et al researched online epidemic-related consultations by multicenter internet hospitals in China during the COVID-19 epidemic, and proved that internet hospitals can offer essential medical support to the public, reduce social panic, and reduce the chance of nosocomial cross-infection, thus playing an important role in preventing and controlling COVID-19 [<xref ref-type="bibr" rid="ref2">2</xref>]. The COVID-19 outbreak catalyzed the expansion of online health care services. During online consultation, large amounts of text data are accumulated, and contextual data that contain patient-doctor dialogues are of significant value. Network inquiry technology is still in the popularization stage in China, and the text record of inquiry is seldom used in research in the area of natural language processing (NLP), which involves patient privacy and information security [<xref ref-type="bibr" rid="ref3">3</xref>]. Recently, there has been a lot of work in this area, for instance, a study on the problem of corpus-level entity typing [<xref ref-type="bibr" rid="ref4">4</xref>]. Chinese scholars have reported on multi-instance learning in the 27th ACM International Conference [<xref ref-type="bibr" rid="ref5">5</xref>]. Moreover, Wentong et al introduced named entity recognition of electronic medical records based on Bidirectional Encoder Representations from Transformers (BERT) [<xref ref-type="bibr" rid="ref6">6</xref>] and Piao et al researched a Chinese named entity recognition method based on BERT embedding, which improved entity recognition and attribute labeling [<xref ref-type="bibr" rid="ref7">7</xref>]. These are significant studies in the NLP domain. Entity studies of clinical text data commonly involve electronic medical records. Dun-Wei et al performed a study based on multi-feature embedding and the attention mechanism [<xref ref-type="bibr" rid="ref8">8</xref>], and Xue et al researched cross-department chunking [<xref ref-type="bibr" rid="ref9">9</xref>]. Moreover, Zhang et al studied automatic identification of Chinese clinical entities from free text in electronic health records and contributed to translating human-readable health information into machine-readable information [<xref ref-type="bibr" rid="ref10">10</xref>]. Furthermore, Jiang et al used machine learning approaches to mine massive service data from the largest China-based online medical consultation platform, which covers 1,582,564 consultation records of patient-physician pairs from 2009 to 2018, and showed that promoting multiple timely responses in patient-provider interactions is essential to encourage payment [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>However, there is limited clinical dialogue data, and the development of sentence compression for aspect-based sentiment analysis is constantly improving [<xref ref-type="bibr" rid="ref12">12</xref>]. Chinese researchers have used the BERT model to analyze public emotion during the epidemic of COVID-19 and have substantiated that the fine-tuning of BERT has higher accuracy in the training process [<xref ref-type="bibr" rid="ref13">13</xref>]. A team from Drexel University used a transformer-based machine learning model to analyze the nuances of vaccine sentiment in Twitter discourse [<xref ref-type="bibr" rid="ref14">14</xref>]. Patient-doctor dialogues, which are different from daily communication or other universal Q&#38;A, contain important data, such as a patient’s symptoms and the diagnosis by a doctor, and these are called “clinical findings” or named entities in patient-doctor dialogues.</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>The purpose of this study was to design a front-end task module for the network inquiry of Intelligent Medical Services. Through the study of automatic labeling of real doctor-patient dialogue text on the internet, a method of identifying the negative and positive entities of the dialogue with higher accuracy was explored. This work significantly eliminates the human work involved in feature engineering.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Sets</title>
        <p>In this paper, our task was named entity automatic classification in patient-doctor dialogues, which was divided into the following 4 attributes: positive, negative, other, and empty. The details are presented below.</p>
        <p>The tag “positive (POS)” is used when it can be determined that a patient has dependent symptoms, diseases, and corresponding entities that are likely to cause a certain disease. The tag “negative (NEG)” is used when the disease and symptoms are not related. The tag “other (OTHER)” is used when the user does not know or the answer is unclear/ambiguous, which is difficult to infer. The tag “empty (EMPTY)” is used when there is no practical meaning to determine the patient’s condition, such as interpretation of some medical knowledge by the doctor, independent of the patient’s current condition, inspection items, drug names, etc.</p>
        <p>The data set is from the <italic>Spring Rain Doctor</italic> internet online consultation, which has been downloaded from the official data set of Alibaba Tianchi Lab [<xref ref-type="bibr" rid="ref15">15</xref>]. The training set consists of 6000 dialogues, and each set of dialogues contains more than a dozen statements and a total of 186,305 sentences. The test set consists of 2000 dialogues and a total of 61,207 sentences.</p>
        <p>On analysis, we found that online consultation data had the below features.</p>
        <p>1. The patient description information was scattered, had slang, and had some spelling mistakes:</p>
        <p>患者：经常放屁，很丑(臭) (sentence_id:20); Patient: Fart often. It stnks (stinks)</p>
        <p>医生：杀菌治疗的话应该重新换药 (sentence_id:21); Doctor: For bactericidal treatment, you should be replaced with drugs</p>
        <p>患者：现在安(按)肚脐左边，感觉按着涨涨的感觉 (sentence_id:22); Patient: Now prress (press) the left side of the navel, I feel it like a balloon.</p>
        <p>医生：我觉得这种疼痛应该有中药的影响。(sentence_id:23); Doctor: I think this pain should be affected by traditional Chinese medicine.</p>
        <p>2. Interval answers were common:</p>
        <p>医生：咳嗽咳痰？(sentence_id:4); Doctor: Any Cough or expectoration?</p>
        <p>医生：头痛头晕脑胀？(sentence_id:5); Doctor: Headache, dizziness, or brain swelling?</p>
        <p>医生：从资料分析看，有可能是过敏性鼻炎。(sentence_id:6); Doctor: According to the previous examination, it may be allergic rhinitis.</p>
        <p>患者：应该是里面，表面上没有鼓包或红肿之类的，没有感冒或咳嗽过最近，头晕脑胀有时会 (sentence_id:7); Patient: It should be inside. There is no bulge or swelling on the surface. There is no cold or cough recently. Dizziness and brain swelling sometimes occur.</p>
        <p>3. The main symptoms were mixed with other symptoms:</p>
        <p>医生：你好，是10岁的孩子<bold><italic>头痛</italic></bold>吗？(sentence_id:2); Doctor: Hello, is it a 10-year-old child with a <italic>headache</italic>?</p>
        <p>患者：是的 (sentence_id:3); Patient: Yes</p>
        <p>患者：不知道头疼<italic>恶心吐</italic>，是不是<italic>感冒</italic> (sentence_id:19); Patient: I'm not sure whether headache, <italic>nausea</italic>, or <italic>vomiting</italic> is <italic>colds</italic></p>
        <p>医生：但是感冒一般不会呕吐 (sentence_id:28); Doctor: But a cold usually doesn't cause vomiting</p>
        <p>患者：恶心之前<italic>没劲</italic>，<italic>反酸水</italic> (sentence_id:30); Patient: <italic>No strength</italic> before nausea, <italic>sour stomach</italic></p>
        <p>医生：需要详细的问诊和查体，建议到医院<italic>神经内科</italic>或儿童神经内科面诊 (sentence_id:36); Doctor: Need detailed consultation and physical examination, I suggest going to the hospital <italic>neurology department</italic> or children’s neurology department for a face-to-face diagnosis</p>
        <p>The above aspects introduce many difficulties in entity recognition and attribute annotation.</p>
        <p>The format of raw data was multilayer nested JSON. According to the aspects of the models, we split the innermost text into pairs of splicing contextual sentences. “Jsonlite” is a unique package of R language [<xref ref-type="bibr" rid="ref16">16</xref>], and the built-in “stream_in” statement does well with tiling JSON into an Excel table, making it intuitive and convenient for us to compare the differences in output data. We then extracted the corresponding subform data according to the analysis requirements. All models shared the same data set. Before input into our model, in addition to the sentence content, we appended the speech role information (ie, sender).</p>
      </sec>
      <sec>
        <title>Composite Abutting Joint Model for Clinical Named Entity Classification</title>
        <p>We proposed a composite abutting joint model and adapted a downstream architecture in Chinese Encoder Representations from Transformers Pretraining Approach (RoBERTa) with whole word masking (WWM) extended (RoBERTa-WWM-ext), which combines a text convolutional neural network (CNN) [<xref ref-type="bibr" rid="ref17">17</xref>]. We used RoBERTa-WWM-ext to express sentence semantics as a text vector [<xref ref-type="bibr" rid="ref18">18</xref>] and then extracted the local features of the sentence through the CNN, which was our new fusion model.</p>
        <sec>
          <title>Construction of the Composite Abutting Joint Model</title>
          <p>Chinese RoBERTa-WWM-ext is an open-source model from the Harbin Institute of Technology, which uses WWM combined with the RoBERTa model [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. We adapted a downstream architecture in Chinese RoBERTa-WWM, which combines a text CNN [<xref ref-type="bibr" rid="ref21">21</xref>]. Our training objective was to use RoBERTa-WWM-ext to express sentence semantics as a text vector and then extract the local features of the sentence through the CNN. The construction of our model is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Construction of our model. BERT: Bidirectional Encoder Representations from Transformers; CNN: convolutional neural network.</p>
            </caption>
            <graphic xlink:href="medinform_v10i4e35606_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>The Input Layer of the Composite Abutting Joint Model</title>
          <p>The input layer is the same as BERT [<xref ref-type="bibr" rid="ref22">22</xref>]. It uses a masked language model (MLM) to generate deep 2-way linguistic representations that combine adjacent and contextual information. Its structure involves stacking traditional transformers, and taking BERT as an example, each of its 12 transformer layers combine left and right contexts to form a deeper 2-way self-attention architecture neural network. Text-input BERT is characterized by 3 levels (<xref rid="figure2" ref-type="fig">Figure 2</xref>), namely, token embeddings, segment embeddings, and position embeddings.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Bidirectional Encoder Representations from Transformers input characterization.</p>
            </caption>
            <graphic xlink:href="medinform_v10i4e35606_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Text Vector Calculation Layer of the Composite Abutting Joint Model</title>
          <p>To maintain continuity between sentences, the beginning and end of the original text are marked with a special symbol [CLS], and the 2 sentences are split with [SEP]. The coded information in the discrete state is transformed into N-dimensional space vectors and transmitted to the encoder unit of the transformer through a continuous and distributed representation. Similarity and distance are computed at the self-attention level to capture word dependencies within sentences. For the calculation of the self-attention function, Vaswani et al introduced “Scaled Dot-Product Attention” [<xref ref-type="bibr" rid="ref23">23</xref>]. The input includes queries and keys for dimension <italic>d<sub>k</sub></italic> and the value for dimension <italic>d<sub>v</sub></italic>. The dot products of a query are computed with all keys, and each is divided by each key. Then, the softmax function is applied to the values. In fact, during the model computation, it has a set of queries packed together into a matrix Q. The keys and values are packed together into matrices K and V. The output matrix is as follows [<xref ref-type="bibr" rid="ref23">23</xref>]:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v10i4e35606_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>The model could project the queries, keys, and values linearly <italic>h</italic> times with different learned linear projections to <italic>d<sub>k</sub></italic>, <italic>d<sub>k</sub></italic>, and <italic>d<sub>v</sub></italic> dimensions, respectively. On each projected version of the queries, keys, and values, it executes the attention function in parallel to generate <italic>d<sub>v</sub>-</italic>dimensional output values. These values are connected and projected again to obtain the final result. This is multihead attention [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
          <p>
            <disp-formula>Multihead (<italic>Q, K, V</italic>) = Concat (head<sub>1</sub>, ..., head<sub>h</sub>)W<sup>O</sup> <bold>(2)</bold></disp-formula>
          </p>
          <p>where head<sub>i</sub> = Attention(<italic>QW<sub>i</sub><sup>Q</sup></italic>, <italic>KW<sub>i</sub><sup>K</sup></italic>, <italic>VW<sub>i</sub><sup>V</sup></italic>) and where the projections are parameter matrices <inline-graphic xlink:href="medinform_v10i4e35606_fig6.png" xlink:type="simple" mimetype="image"/>, <inline-graphic xlink:href="medinform_v10i4e35606_fig7.png" xlink:type="simple" mimetype="image"/>, <inline-graphic xlink:href="medinform_v10i4e35606_fig8.png" xlink:type="simple" mimetype="image"/>, and <inline-graphic xlink:href="medinform_v10i4e35606_fig9.png" xlink:type="simple" mimetype="image"/>.</p>
          <p>The inputs and outputs of the self-attention layer are added and normalized, which makes the output mean of the self-attention layer 0 and the standard deviation 1, and then, it is transferred to the feed-forward layer of the feed-forward neural network. Mean and normalization are processed again. The transformer encoder structure of the model has been described by Vaswani et al [<xref ref-type="bibr" rid="ref23">23</xref>] (<xref rid="figure3" ref-type="fig">Figure 3</xref>).</p>
          <p>In transformers, location coding is computed using a trigonometric function as follows [<xref ref-type="bibr" rid="ref23">23</xref>]:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v10i4e35606_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v10i4e35606_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>The positional encoding vector results are added to the embedding vector sequence corresponding to each input word instead of connecting vector. Similar to BERT in our model, 15% of the word-piece tokens are masked at random during training. These masked tokens are divided into 3 parts, with 80% of them using [MASK], 10% of them being replaced with a random word, and 10% of them using the original word. Related research by Dandan et al showed that the downstream task of the pretraining model can improve the performance of the model through FINETUNE [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
          <p>During the pretraining phase, the BERT model takes on 2 tasks, MLM and next sentence prediction (NSP). Piao et al have explained the process of predictive masking in MLM tasks, which obtains the semantic representation of a word in a specific context through self-supervised learning [<xref ref-type="bibr" rid="ref7">7</xref>]. Not the same as BERT, RoBERTa-WWM-ext cancels the NSP and uses max_len = 512 during the pretraining, and the number of training steps is appropriately extended [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
          <p>Another feature of RoBERTa-WWM-ext is that it uses WWM. An example to illustrate the characteristics of WWM is provided in <xref rid="figure4" ref-type="fig">Figure 4</xref> [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
          <p>BERT can only divide Chinese into characters, not words (units). WWM makes the Chinese mask more like English. A complete word will be shielded; otherwise, it will not be shielded, which can maintain the integrity of the Chinese word as a unit, to improve the accuracy of model learning.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Transformer encoder structure.</p>
            </caption>
            <graphic xlink:href="medinform_v10i4e35606_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>An example of whole word masking in our model.</p>
            </caption>
            <graphic xlink:href="medinform_v10i4e35606_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Sentence Feature Computing Layer of the Composite Abutting Joint Model</title>
          <p>The output word vector of RoBERTa-WWM-ext was further extracted by a CNN, which is expected to enhance the robustness of the model. The computing formula is as follows [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v10i4e35606_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v10i4e35606_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v10i4e35606_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>where W<sub>A</sub> and W<sub>B</sub> are 2 matrices that are randomly initialized by adding an attention layer to deal with the location characteristics, and b is the RoBERTa-WWM-ext hidden layer dimension, with b<sub>1</sub> being the offset. Moreover, E<sub>Ro</sub> represents the output of the coding layer of RoBERTa-WWM-ext, and feature<sub>text</sub> represents the weighted feature obtained by the product of the score weight and the output of the encoder, which is also the output text vector feature of RoBERTa-WWM-ext. After CNN calculation, the predicted emotion label is finally obtained [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Evaluation Criteria</title>
        <p>We adopted Alibaba cloud’s official evaluation standard, and Macro-F1 was used as the evaluation index. Suppose we have n categories, C1, ..., CI, ..., CN, the calculation is as follows:</p>
        <p>
          <disp-formula>
            <graphic xlink:href="medinform_v10i4e35606_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>where accuracy (Pi) is the number of samples correctly predicted as category CI/number of samples predicted as category CI, and recall rate (Ri) is the number of samples correctly predicted as category CI/number of samples of the real CI category.</p>
      </sec>
      <sec>
        <title>Graphics Processing Unit Server Requirements</title>
        <p>The server requirements are as follows: CPU, 8 cores at 2.5 GHz; memory, 32 GB; hard disk, 500 GB; GPU/Field Programmable Gate Array, 1×NVIDIA V100.</p>
      </sec>
      <sec>
        <title>Results of Our Composite Abutting Joint Model</title>
        <p>Our data involved a 3-layer nested JSON file. The first layer was regarded as the index of each dialogue, the second layer was the specific dialogue content between patients and doctors in each dialogue, and the third layer was the entity part corresponding to a single sentence. Not every sentence had an entity part, and not every entity needed to be marked with an entity attribute. We expanded the training set data and all the models’ training results. The distribution of entity attribute labels is shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <p>From <xref ref-type="table" rid="table1">Table 1</xref>, we know that the BERT results of the test data have more positive labels, with a value nearly 10 percentage points higher than that for the train data, and the negative labels were nearly 4 percentage points less than that for the train data. After optimizing WWM, the attribute proportion was close to the train data, but there was still a certain gap. We used the fine-tune approach with CNN for RoBERTa-WWM-ext, but it did not change the label proportion. In the Enhanced Representation through Knowledge Integration (ERNIE) model train results, the attribute proportion was closer to that for the train data when compared with BERT. Next, we compared the 4 models, and the results are shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Attribute statistics in the training data and the model training results of the test data.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="170"/>
            <col width="160"/>
            <col width="130"/>
            <col width="130"/>
            <col width="160"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Data set</td>
                <td>Training data (N=118,976)</td>
                <td colspan="4">Test data (N=39,204)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>ERNIE<sup>a</sup></td>
                <td>BERT<sup>b</sup></td>
                <td>BERT-WWM<sup>c</sup></td>
                <td>RoBERTa-WWM-ext + CNN<sup>d</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>POS<sup>e</sup>, n (%)</td>
                <td>74,774 (62.85%)</td>
                <td>25,163 (64.18%)</td>
                <td>27,866 (71.08%)</td>
                <td>26,116 (66.62%)</td>
                <td>26,116 (66.62%)</td>
              </tr>
              <tr valign="top">
                <td>NEG<sup>f</sup>, n (%)</td>
                <td>14,086 (11.84%)</td>
                <td>4271 (10.89%)</td>
                <td>3125 (7.97%)</td>
                <td>3871 (9.87%)</td>
                <td>3871 (9.87%)</td>
              </tr>
              <tr valign="top">
                <td>OTHER<sup>g</sup>, n (%)</td>
                <td>6167 (5.18%)</td>
                <td>1006 (2.57%)</td>
                <td>684 (1.74%)</td>
                <td>2587 (6.60%)</td>
                <td>2587 (6.60%)</td>
              </tr>
              <tr valign="top">
                <td>EMPTY<sup>h</sup>, n (%)</td>
                <td>23,949 (20.13%)</td>
                <td>8764 (22.35%)</td>
                <td>7529 (19.20%)</td>
                <td>6630 (16.91%)</td>
                <td>6630 (16.91%)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>ERNIE: Enhanced Representation through Knowledge Integration.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>BERT: Bidirectional Encoder Representations from Transformers.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>BERT-WWM: Bidirectional Encoder Representations from Transformers with whole word masking.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>RoBERTa-WWM-ext + CNN: Robustly Optimized BERT Pretraining Approach with whole word masking extended plus a convolutional neural network.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>The tag “positive (POS)” is used when it can be determined that a patient has dependent symptoms, diseases, and corresponding entities that are likely to cause a certain disease.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>NEG: The tag “negative (NEG)” is used when the disease and symptoms are not related.</p>
            </fn>
            <fn id="table1fn7">
              <p><sup>g</sup>OTHER: The tag “other (OTHER)” is used when the user does not know or the answer is unclear/ambiguous, which is difficult to infer.</p>
            </fn>
            <fn id="table1fn8">
              <p><sup>h</sup>EMPTY: The tag “empty (EMPTY)” is used when there is no practical meaning to determine the patient’s condition, such as interpretation of some medical knowledge by the doctor, independent of the patient’s current condition, inspection items, drug names, etc.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>The scores of the 4 models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="230"/>
            <col width="170"/>
            <col width="170"/>
            <col width="170"/>
            <col width="260"/>
            <thead>
              <tr valign="top">
                <td>Data set</td>
                <td>ERNIE<sup>a</sup></td>
                <td colspan="3">BERT<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>BERT</td>
                <td>BERT-WWM<sup>c</sup></td>
                <td>RoBERTa-WWM-ext + CNN<sup>d</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>POS<sup>e</sup>-Rr<sup>f</sup></td>
                <td>87.32461545</td>
                <td>87.10998052</td>
                <td>89.81676537</td>
                <td>89.23248142</td>
              </tr>
              <tr valign="top">
                <td>POS-Pr</td>
                <td>87.35933834</td>
                <td>78.69582391</td>
                <td>86.57854406</td>
                <td>88.20871479</td>
              </tr>
              <tr valign="top">
                <td>POS-F1</td>
                <td>87.34197344</td>
                <td>82.68940537</td>
                <td>88.16793149</td>
                <td>88.71764473</td>
              </tr>
              <tr valign="top">
                <td>NEG<sup>g</sup>-Rr<sup>h</sup></td>
                <td>67.70158588</td>
                <td>41.50100514</td>
                <td>66.96448515</td>
                <td>70.13625195</td>
              </tr>
              <tr valign="top">
                <td>NEG-Pr</td>
                <td>71.03351301</td>
                <td>59.45600000</td>
                <td>77.50775595</td>
                <td>77.30182176</td>
              </tr>
              <tr valign="top">
                <td>NEG-F1</td>
                <td>69.32753888</td>
                <td>48.88187319</td>
                <td>71.85140803</td>
                <td>73.54491158</td>
              </tr>
              <tr valign="top">
                <td>OTHER<sup>i</sup>-Rr</td>
                <td>27.30551262</td>
                <td>12.98299845</td>
                <td>58.06285420</td>
                <td>57.13549717</td>
              </tr>
              <tr valign="top">
                <td>OTHER-Pr</td>
                <td>52.68389662</td>
                <td>36.84210526</td>
                <td>43.58081980</td>
                <td>45.06298253</td>
              </tr>
              <tr valign="top">
                <td>OTHER-F1</td>
                <td>35.96878181</td>
                <td>19.20000000</td>
                <td>49.79014800</td>
                <td>50.38618810</td>
              </tr>
              <tr valign="top">
                <td>EMPTY<sup>j</sup>-Rr</td>
                <td>75.84846093</td>
                <td>61.62851881</td>
                <td>62.98342541</td>
                <td>67.77163904</td>
              </tr>
              <tr valign="top">
                <td>EMPTY-Pr</td>
                <td>65.84446728</td>
                <td>62.29224837</td>
                <td>72.27169811</td>
                <td>71.50589868</td>
              </tr>
              <tr valign="top">
                <td>EMPTY-F1</td>
                <td>70.49330644</td>
                <td>61.95860610</td>
                <td>67.30863850</td>
                <td>69.58870804</td>
              </tr>
              <tr valign="top">
                <td>Macro-Rr</td>
                <td>64.54504372</td>
                <td>50.80562573</td>
                <td>69.45688253</td>
                <td>71.06896740</td>
              </tr>
              <tr valign="top">
                <td>Macro-Pr</td>
                <td>69.23030381</td>
                <td>59.32154439</td>
                <td>69.98470448</td>
                <td>70.51985444</td>
              </tr>
              <tr valign="top">
                <td>Total score (Macro-F1)</td>
                <td>65.78290014</td>
                <td>53.18247117</td>
                <td>69.27953150</td>
                <td>70.55936311</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>ERNIE: Enhanced Representation through Knowledge Integration.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>BERT: Bidirectional Encoder Representations from Transformers.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>BERT-WWM: Bidirectional Encoder Representations from Transformers with whole word masking.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>RoBERTa-WWM-ext + CNN: Robustly Optimized BERT Pretraining Approach with whole word masking extended plus a convolutional neural network.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>The tag “positive (POS)” is used when it can be determined that a patient has dependent symptoms, diseases, and corresponding entities that are likely to cause a certain disease.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>Pr: precision rate.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>NEG: The tag “negative (NEG)” is used when the disease and symptoms are not related.</p>
            </fn>
            <fn id="table2fn8">
              <p><sup>h</sup>Rr: recall rate.</p>
            </fn>
            <fn id="table2fn9">
              <p><sup>i</sup>OTHER: The tag “other (OTHER)” is used when the user does not know or the answer is unclear/ambiguous, which is difficult to infer.</p>
            </fn>
            <fn id="table2fn10">
              <p><sup>j</sup>EMPTY: The tag “empty (EMPTY)” is used when there is no practical meaning to determine the patient’s condition, such as interpretation of some medical knowledge by the doctor, independent of the patient’s current condition, inspection items, drug names, etc.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>From the scoring results, the ERNIE model, which has been trained on a large Chinese corpus, had a total score 12.6 points higher than that of the BERT model in our task. BERT-WWM surpassed ERNIE, with a score of 69.28. Our RoBERTa-WWM-ext + CNN model improved the overall score by 1.28. With the addition of the message sender in the corpus of RoBERTa-WWM-ext, the correct rate of answering sentences also improved.</p>
      <p>A previous report assessed BERT fine-tuning as embedding input into the text CNN model and showed that the accuracy rate was 0.31% higher than that of the original BERT model and was more stable [<xref ref-type="bibr" rid="ref28">28</xref>]. We used CNN to compute sentence features. To verify our model’s knowledge learning ability, we chose ERNIE [<xref ref-type="bibr" rid="ref29">29</xref>], original BERT, and Chinese BERT with WWM to do the same task, and then compared the results of these models.</p>
      <p>In this study, we showed that our model outperformed the other models on the task. The test was not manually modified, and the error of the training data limited the role of manual rules. We tried to add rules to correct the positive labeling, but the total score was only 29.31 points. The accuracy of the positive label was 92.33, but the recall was only 16.46. Due to the false-positive interference of the original data, it was difficult to improve the accuracy of the model itself through artificial rules. The longest sequence length supported by BERT is 512. The text tasks suitable for processing include short texts, such as comments on social platforms and article titles, but for a medical dialogue composed of more than 50 single sentences, the length is obviously not enough. We can only use the truncation method to preprocess text, that is, first truncation, tail truncation, and head to tail truncation, which adds some difficulty to the preliminary work. According to the work of Zeng et al, the base model did improve the accuracy rate by adjusting the downstream tasks [<xref ref-type="bibr" rid="ref30">30</xref>]. For the single model, XLNET and RoBERTa were better than BERT and ERNIE, and the integration of multiple models will improve the model by 2.58% on average. The results of this study indicated that the accuracy of the model improved with small and middle sample sizes. The multimodel joint integration was an effective way to improve the accuracy of the entity attribute annotation.</p>
      <p>“Internet medical+” was part of China’s rapid development after “Internet+” became China’s national strategy in 2015 [<xref ref-type="bibr" rid="ref31">31</xref>]. In 2019, the novel coronavirus pneumonia outbreak occurred globally, and traditional medical treatment brought many malpractices, which stimulated the technical development of internet inquiry [<xref ref-type="bibr" rid="ref32">32</xref>]. In the 9th IEEE International Conference on Health Care Informatics (ICHI) in 2021, some scholars proposed to integrate structured data with unstructured text annotation recorded in the classification stage, and use NLP methods for admission prediction and triage notes [<xref ref-type="bibr" rid="ref33">33</xref>]. This study hopes to further optimize medical information and pave the way for the automatic generation of medical cases through the automatic entity annotation of doctor-patient real dialogue text generated in the process of consultation. It is speculated that our study findings will contribute to the application of NLP methods in the field of health care.</p>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ERNIE</term>
          <def>
            <p>Enhanced Representation through Knowledge Integration</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">MLM</term>
          <def>
            <p>masked language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">NSP</term>
          <def>
            <p>next sentence prediction</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">RoBERTa</term>
          <def>
            <p>Robustly Optimized Bidirectional Encoder Representations from Transformers Pretraining Approach</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">RoBERTa-WWM-ext</term>
          <def>
            <p>Robustly Optimized Bidirectional Encoder Representations from Transformers Pretraining Approach with whole word masking extended</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">WWM</term>
          <def>
            <p>whole word masking</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors would like to thank the 7th China Health Information Processing Conference organizers for providing the training, development, and test corpora. This research is funded by the National Key Research and Development Program of China (grant ID: 2020AAA0104905).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Characteristics of Online Health Care Services From China's Largest Online Medical Platform: Cross-sectional Survey Study</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>04</month>
          <day>15</day>
          <volume>23</volume>
          <issue>4</issue>
          <fpage>e25817</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/4/e25817/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/25817</pub-id>
          <pub-id pub-id-type="medline">33729985</pub-id>
          <pub-id pub-id-type="pii">v23i4e25817</pub-id>
          <pub-id pub-id-type="pmcid">PMC8051434</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Internet Hospitals Help Prevent and Control the Epidemic of COVID-19 in China: Multicenter User Profiling Study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>04</month>
          <day>14</day>
          <volume>22</volume>
          <issue>4</issue>
          <fpage>e18908</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/4/e18908/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/18908</pub-id>
          <pub-id pub-id-type="medline">32250962</pub-id>
          <pub-id pub-id-type="pii">v22i4e18908</pub-id>
          <pub-id pub-id-type="pmcid">PMC7159055</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Vogel</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Privacy Protection in Online Health Communities: Natural Experimental Empirical Study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>05</month>
          <day>21</day>
          <volume>22</volume>
          <issue>5</issue>
          <fpage>e16246</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/5/e16246/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16246</pub-id>
          <pub-id pub-id-type="medline">32436851</pub-id>
          <pub-id pub-id-type="pii">v22i5e16246</pub-id>
          <pub-id pub-id-type="pmcid">PMC7273234</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yaghoobzadeh</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Adel</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Schuetze</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Corpus-Level Fine-Grained Entity Typing</article-title>
          <source>Journal of Artificial Intelligence</source>
          <year>2018</year>
          <month>04</month>
          <day>17</day>
          <volume>61</volume>
          <fpage>835</fpage>
          <lpage>862</lpage>
          <pub-id pub-id-type="doi">10.1613/jair.5601</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>METIC: Multi-Instance Entity Typing from Corpus</article-title>
          <source>CIKM '18: Proceedings of the 27th ACM International Conference on Information and Knowledge Management</source>
          <year>2018</year>
          <conf-name>27th ACM International Conference on Information and Knowledge Management</conf-name>
          <conf-date>October 22-26, 2018</conf-date>
          <conf-loc>Torino, Italy</conf-loc>
          <fpage>903</fpage>
          <lpage>912</lpage>
          <pub-id pub-id-type="doi">10.1145/3269206.3271804</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wentong</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yanhui</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Fei</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xiangbing</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Named Entity Recognition of Electronic Medical Records Based on BERT</article-title>
          <source>Journal of Hunan University of Technology</source>
          <year>2020</year>
          <volume>34</volume>
          <issue>4</issue>
          <fpage>34</fpage>
          <lpage>62</lpage>
          <pub-id pub-id-type="doi">10.3969/j.issn.1673-9833.2020.04.009</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Piao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wenyong</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Chinese Named Entity Recognition Method Based on BERT Embedding</article-title>
          <source>Computer Engineering</source>
          <year>2020</year>
          <volume>46</volume>
          <issue>4</issue>
          <fpage>52</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.19678/j.issn.1000-3428.0054272</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dun-Wei</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Yong-Kai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yi-Nan</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Bin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kuan-Lu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Named entity recognition of Chinese electronic medical records based on multifeature embedding and attention mechanism</article-title>
          <source>Chinese Journal of Engineering</source>
          <year>2021</year>
          <volume>43</volume>
          <issue>9</issue>
          <fpage>1190</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.13374/j.issn2095-9389.2021.01.12.006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhipeng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Cross-department chunking based on Chinese electronic medical record</article-title>
          <source>Application Research of Computers</source>
          <year>2017</year>
          <volume>34</volume>
          <issue>7</issue>
          <fpage>2084</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://open.oriprobe.com/articles/51487196/Cross_department_chunking_based_on_Chinese_electro.htm"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Clinical Named Entity Recognition From Chinese Electronic Health Records via Machine Learning Methods</article-title>
          <source>JMIR Med Inform</source>
          <year>2018</year>
          <month>12</month>
          <day>17</day>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>e50</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2018/4/e50/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.9965</pub-id>
          <pub-id pub-id-type="medline">30559093</pub-id>
          <pub-id pub-id-type="pii">v6i4e50</pub-id>
          <pub-id pub-id-type="pmcid">PMC6315256</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cameron</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Analysis of Massive Online Medical Consultation Service Data to Understand Physicians' Economic Return: Observational Data Mining Study</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>02</month>
          <day>18</day>
          <volume>8</volume>
          <issue>2</issue>
          <fpage>e16765</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/2/e16765/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16765</pub-id>
          <pub-id pub-id-type="medline">32069213</pub-id>
          <pub-id pub-id-type="pii">v8i2e16765</pub-id>
          <pub-id pub-id-type="pmcid">PMC7055801</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Che</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Sentence Compression for Aspect-Based Sentiment Analysis</article-title>
          <source>IEEE/ACM Trans. Audio Speech Lang. Process</source>
          <year>2015</year>
          <month>12</month>
          <volume>23</volume>
          <issue>12</issue>
          <fpage>2111</fpage>
          <lpage>2124</lpage>
          <pub-id pub-id-type="doi">10.1109/taslp.2015.2443982</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chow</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 Sensing: Negative Sentiment Analysis on Social Media in China via BERT Model</article-title>
          <source>IEEE Access</source>
          <year>2020</year>
          <volume>8</volume>
          <fpage>138162</fpage>
          <lpage>138169</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2020.3012595</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kummervold</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dada</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kilich</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Paterson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>HJ</given-names>
            </name>
          </person-group>
          <article-title>Categorizing Vaccine Confidence With a Transformer-Based Machine Learning Model: Analysis of Nuances of Vaccine Sentiment in Twitter Discourse</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>10</month>
          <day>08</day>
          <volume>9</volume>
          <issue>10</issue>
          <fpage>e29584</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/10/e29584/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/29584</pub-id>
          <pub-id pub-id-type="medline">34623312</pub-id>
          <pub-id pub-id-type="pii">v9i10e29584</pub-id>
          <pub-id pub-id-type="pmcid">PMC8538052</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <article-title>CBLUE: A Chinese Biomedical Language Understanding Evaluation Benchmark</article-title>
          <source>Alibaba Group</source>
          <year>2021</year>
          <access-date>2022-03-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tianchi.aliyun.com/dataset/dataDetail?dataId=95414">https://tianchi.aliyun.com/dataset/dataDetail?dataId=95414</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ihaka</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gentleman</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>R: A Language for Data Analysis and Graphics</article-title>
          <source>Journal of Computational and Graphical Statistics</source>
          <year>1996</year>
          <month>09</month>
          <volume>5</volume>
          <issue>3</issue>
          <fpage>299</fpage>
          <lpage>314</lpage>
          <pub-id pub-id-type="doi">10.1080/10618600.1996.10474713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pattanayak</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Convolutional Neural Networks</article-title>
          <source>Pro Deep Learning with TensorFlow</source>
          <year>2017</year>
          <publisher-loc>Berkeley, CA</publisher-loc>
          <publisher-name>Apress</publisher-name>
          <fpage>153</fpage>
          <lpage>221</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RoBERTa: A Robustly Optimized BERT Pretraining Approach</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <access-date>2022-03-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1907.11692">https://arxiv.org/abs/1907.11692</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Che</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Pre-Training With Whole Word Masking for Chinese BERT</article-title>
          <source>IEEE/ACM Trans. Audio Speech Lang. Process</source>
          <year>2021</year>
          <volume>29</volume>
          <fpage>3504</fpage>
          <lpage>3514</lpage>
          <pub-id pub-id-type="doi">10.1109/taslp.2021.3124365</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Debut</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sanh</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Chaumond</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Delangue</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Moi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cistac</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Rault</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>HuggingFace's Transformers: State-of-the-art Natural Language Processing</article-title>
          <source>arXiv</source>
          <year>2020</year>
          <access-date>2022-03-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1910.03771">https://arxiv.org/abs/1910.03771</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Barzilay</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jaakkola</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Molding CNNs for text: non-linear, non-consecutive convolutions</article-title>
          <source>arXiv</source>
          <year>2015</year>
          <access-date>2022-03-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1508.04112">https://arxiv.org/abs/1508.04112</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding</article-title>
          <source>arXiv</source>
          <year>2018</year>
          <access-date>2022-03-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1810.04805">https://arxiv.org/abs/1810.04805</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention Is All You Need</article-title>
          <source>arXiv</source>
          <year>2017</year>
          <access-date>2022-03-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1706.03762">https://arxiv.org/abs/1706.03762</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dandan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jiashan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yong</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kehai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiashan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yong</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Chinese Short Text Classification Algorithm Based on BERT Model</article-title>
          <source>Computer Engineering</source>
          <year>2021</year>
          <volume>47</volume>
          <issue>1</issue>
          <fpage>47</fpage>
          <lpage>86</lpage>
          <pub-id pub-id-type="doi">10.19678/j.issn.1000-3428.0056222</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dauphin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Auli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Grangier</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Language Modeling with Gated Convolutional Networks</article-title>
          <source>arXiv</source>
          <year>2017</year>
          <access-date>2022-03-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1612.08083">https://arxiv.org/abs/1612.08083</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Transfer Capsule Network for Aspect Level Sentiment Classification</article-title>
          <source>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2019</year>
          <conf-name>57th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>July 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <fpage>547</fpage>
          <lpage>556</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/p19-1052</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kun</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yi</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shuya</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Shouyin</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Long text aspect-level sentiment analysis based on text filtering and improved BERT</article-title>
          <source>Journal of Computer Applications</source>
          <year>2020</year>
          <volume>40</volume>
          <issue>10</issue>
          <fpage>2838</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.11772/j.issn.1001-9081.2020020164</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xiaowei</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jianfei</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Research on News Text Classification Based on Improved BERT-CNN Model</article-title>
          <source>Video Engineering</source>
          <year>2021</year>
          <volume>45</volume>
          <issue>7</issue>
          <fpage>146</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.16280/j.videoe.2021.07.040</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>ERNIE: Enhanced Representation through Knowledge Integration</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <access-date>2022-03-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1904.09223">https://arxiv.org/abs/1904.09223</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Automated classification of clinical trial eligibility criteria text based on ensemble learning and metric learning</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2021</year>
          <month>07</month>
          <day>30</day>
          <volume>21</volume>
          <issue>Suppl 2</issue>
          <fpage>129</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-021-01492-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-021-01492-z</pub-id>
          <pub-id pub-id-type="medline">34330259</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-021-01492-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC8323220</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xiaoyan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jing</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rong</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Studying on The Existing Modes of “Internet Plus Medical Services” in China</article-title>
          <source>Chinese Health Service Management</source>
          <year>2019</year>
          <volume>36</volume>
          <issue>1</issue>
          <fpage>8</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://36.112.18.13/Qikan/Article/Detail?id=6100173853"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hui</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Qiong</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xiaoli</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bochun</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Opportunity and Reflection of the lnternet+Medical Under COVlD-19 Epidemic Situation</article-title>
          <source>Chinese Hospital Management</source>
          <year>2020</year>
          <volume>40</volume>
          <issue>6</issue>
          <fpage>38</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cnki.com.cn/Article/CJFDTOTAL-YYGL202006017.htm"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arnaud</surname>
              <given-names>É</given-names>
            </name>
            <name name-style="western">
              <surname>Elbattah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gignon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dequen</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>NLP-Based Prediction of Medical Specialties at Hospital Admission Using Triage Notes</article-title>
          <year>2021</year>
          <conf-name>9th International Conference on Healthcare Informatics (ICHI)</conf-name>
          <conf-date>August 9-12, 2021</conf-date>
          <conf-loc>Victoria, BC, Canada</conf-loc>
          <fpage>548</fpage>
          <lpage>553</lpage>
          <pub-id pub-id-type="doi">10.1109/ICHI52183.2021.00103</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
