<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i7e28218</article-id>
      <article-id pub-id-type="pmid">34057414</article-id>
      <article-id pub-id-type="doi">10.2196/28218</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Head and Tail Entity Fusion Model in Medical Knowledge Graph Construction: Case Study for Pituitary Adenoma</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Hao</surname>
            <given-names>Tianyong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhu</surname>
            <given-names>Shanfeng</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lin</surname>
            <given-names>Hongfei</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Fang</surname>
            <given-names>An</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9526-9306</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Lou</surname>
            <given-names>Pei</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1426-670X</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Hu</surname>
            <given-names>Jiahui</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4352-3250</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>Wanqing</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3705-5737</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Feng</surname>
            <given-names>Ming</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9943-5941</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Ren</surname>
            <given-names>Huiling</given-names>
          </name>
          <degrees>MSL</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1067-408X</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Xianlai</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <address>
            <institution>Big Data Institute</institution>
            <institution>Central South University</institution>
            <addr-line>932 South Lushan Road</addr-line>
            <addr-line>Changsha, 410083</addr-line>
            <country>China</country>
            <phone>86 731 88879583</phone>
            <email>chenxianlai@csu.edu.cn</email>
          </address>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4338-015X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Life Science College</institution>
        <institution>Central South University</institution>
        <addr-line>Changsha</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Institute of Medical Information</institution>
        <institution>Chinese Academy of Medical Sciences</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Peking Union Medical College Hospital</institution>
        <institution>Chinese Academy of Medical Sciences</institution>
        <institution>Peking Union Medical College</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Big Data Institute</institution>
        <institution>Central South University</institution>
        <addr-line>Changsha</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>National Engineering Lab for Medical Big Data Application Technology</institution>
        <institution>Central South University</institution>
        <addr-line>Changsha</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Xianlai Chen <email>chenxianlai@csu.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>22</day>
        <month>7</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>7</issue>
      <elocation-id>e28218</elocation-id>
      <history>
        <date date-type="received">
          <day>25</day>
          <month>2</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>12</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>11</day>
          <month>4</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>30</day>
          <month>5</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©An Fang, Pei Lou, Jiahui Hu, Wanqing Zhao, Ming Feng, Huiling Ren, Xianlai Chen. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 22.07.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/7/e28218" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Pituitary adenoma is one of the most common central nervous system tumors. The diagnosis and treatment of pituitary adenoma remain very difficult. Misdiagnosis and recurrence often occur, and experienced neurosurgeons are in serious shortage. A knowledge graph can help interns quickly understand the medical knowledge related to pituitary tumor.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to develop a data fusion method suitable for medical data using data of pituitary adenomas integrated from different sources. The overall goal was to construct a knowledge graph for pituitary adenoma (KGPA) to be used for knowledge discovery.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>A complete framework suitable for the construction of a medical knowledge graph was developed, which was used to build the KGPA. The schema of the KGPA was manually constructed. Information of pituitary adenoma was automatically extracted from Chinese electronic medical records (CEMRs) and medical websites through a conditional random field model and newly designed web wrappers. An entity fusion method is proposed based on the head-and-tail entity fusion model to fuse the data from heterogeneous sources.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Data were extracted from 300 CEMRs of pituitary adenoma and 4 health portals. Entity fusion was carried out using the proposed data fusion model. The F1 scores of the head and tail entity fusions were 97.32% and 98.57%, respectively. Triples from the constructed KGPA were selected for evaluation, demonstrating 95.4% accuracy.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This paper introduces an approach to fuse triples extracted from heterogeneous data sources, which can be used to build a knowledge graph. The evaluation results showed that the data in the KGPA are of high quality. The constructed KGPA can help physicians in clinical practice.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>knowledge graph</kwd>
        <kwd>pituitary adenoma</kwd>
        <kwd>entity fusion</kwd>
        <kwd>similarity calculation</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Pituitary adenoma is one of the most common central nervous system tumors. Most of the benign adenomas are characterized by swelling growth, which can be cured by surgery or medicine [<xref ref-type="bibr" rid="ref1">1</xref>]. However, a small number of pituitary adenomas are not sensitive to surgery, radiotherapy, and drug therapy, and metastasis will lead to pituitary adenocarcinoma [<xref ref-type="bibr" rid="ref2">2</xref>]. At present, there are difficulties in the diagnosis and treatment of pituitary adenoma [<xref ref-type="bibr" rid="ref3">3</xref>]. In some cases, pituitary adenocarcinoma can even be life-threatening [<xref ref-type="bibr" rid="ref4">4</xref>] and the prognosis is extremely poor. Therefore, pituitary adenoma has become a hot topic in life science research, and an open knowledgebase of pituitary adenoma is needed.</p>
      <p>A knowledge graph is a general framework for formal description of knowledge, which can describe knowledge in the form of triples as a “head entity-relation-tail entity,” one of the most popular knowledge representation methods currently adopted [<xref ref-type="bibr" rid="ref5">5</xref>]. Well-known open-domain knowledge graphs include Freebase, DBpedia, YAGO, and NELL, among others [<xref ref-type="bibr" rid="ref6">6</xref>]. Knowledge graphs are also widely used in the medical field. Gong et al [<xref ref-type="bibr" rid="ref7">7</xref>] proposed a method to build a diabetes knowledgebase by mining the web; they extracted knowledge from the semistructured content of the vertical portal and then mapped the information onto a unified knowledge graph. Ernst et al [<xref ref-type="bibr" rid="ref8">8</xref>] constructed a biomedical science knowledge graph in which they extracted data using distant supervision methods and used logical reasoning for consistency checks. Rotmensch et al [<xref ref-type="bibr" rid="ref9">9</xref>] designed an automatic extraction framework to directly extract diseases and symptoms from electronic medical records (EMRs), and automatically constructed a knowledge graph.</p>
      <p>Data fusion is an important step of the integration of heterogeneous data in the construction of knowledge graphs. Entity fusion includes methods based on character similarity, clustering, deep learning, and others. Zhang et al [<xref ref-type="bibr" rid="ref10">10</xref>] proposed a novel multisource medical data integration and mining solution for better health care services, which can search for similar medical records in a time-efficient and privacy-preserving manner. Wang et al [<xref ref-type="bibr" rid="ref11">11</xref>] extracted different semantic words using multimodal trees and performed multigranularity feature fusion on the data. Li et al [<xref ref-type="bibr" rid="ref12">12</xref>] proposed a novel fusion-embedding learning model, G2SKGE, which aims to learn the subgraph structure information of the entity in a knowledge graph. Li et al [<xref ref-type="bibr" rid="ref13">13</xref>] proposed an approach to build a knowledge graph for hepatocellular carcinoma, and applied a biomedical information extraction system to filter and fuse the data.</p>
      <p>In this study, we extracted data from patient EMRs and medical websites, fused the entities using our proposed head-and-tail entity fusion model, and constructed a medical knowledge graph for pituitary adenoma (KGPA). The main contributions of this study are as follows. First, there is currently no Chinese knowledgebase for pituitary adenoma. Therefore, this study presents the complete process of knowledge graph construction, which was used to construct the KGPA. Second, to integrate the data extracted from different sources, we propose a fusion method suitable for medical data that was used in the process of KGPA construction. The method includes two steps: tail entity fusion and head entity fusion. Finally, knowledge of pituitary adenoma, such as the typical symptoms of different pituitary adenoma–related diseases, can be clearly revealed by searching the KGPA. According to doctors’ feedback on use of the KGPA, the content displayed in the KGPA was considered to be consistent with the actual clinical situation.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>According to the characteristics of pituitary adenoma diseases combined with the characteristics of Chinese electronic medical records (CEMRs) and Chinese health websites, we designed the construction framework of the KGPA, as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>, which includes 5 steps: raw data collection, schema design, data extraction, data fusion, and data storage and visualization. Each step is introduced in detail below, with emphasis on the proposed data fusion model.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Process for construction of the knowledge graph for pituitary adenoma. CEMR: Chinese electronic medical record; NLP: natural language processing; BERT: bidirectional encoder representations from transformer.</p>
          </caption>
          <graphic xlink:href="medinform_v9i7e28218_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Schema</title>
        <p>The knowledge graph includes a data layer and a schema layer [<xref ref-type="bibr" rid="ref14">14</xref>]. Entities, relations, and attributes in the data layer are regulated and restricted by the schema. The schema was based on several open-access authoritative terminologies and ontologies, including the UMLS Semantic Network [<xref ref-type="bibr" rid="ref15">15</xref>], the concept definitions in SNOMED-CT [<xref ref-type="bibr" rid="ref16">16</xref>], and the International Statistical Classification of Diseases and Related Health Problems (ICD-10). In addition, the natural language processing datasets defined by the Informatics for Integrating Biology &#38; the Bedside [<xref ref-type="bibr" rid="ref17">17</xref>] and CEMRs Entity and Relations Annotation Specifications defined by Harbin Institute of Technology [<xref ref-type="bibr" rid="ref18">18</xref>] were also referenced for this task. With the help of clinical experts, a combination of top-down and bottom-up approaches was used to construct the KGPA schema.</p>
        <p>In our previous study of CEMRs data extraction, we found that the medical diagnosis and treatment activities could be summarized based on symptoms (symptom) and abnormal results (examination) [<xref ref-type="bibr" rid="ref19">19</xref>]. The doctor will give a comprehensive diagnosis conclusion (disease) and corresponding treatment measures (surgery, medicine). Therefore, the mentioned entities and the relations between them were abstracted for design of the schema. The CEMRs are detailed but contain a limited number of concepts; therefore, we extracted data from medical websites to expand the concepts. Through analyzing the data types of the websites, six types of concepts were added to the schema: pathogeny, treatment, examination, treatment department, English name, and alternative name. The most frequently used disease term in websites was selected as the concept of the disease, and then treatment and examination were defined as related entities. Pathogeny, treatment department, English name, and alternative name were defined as the attributes of the disease. Attributes can be used to describe the internal characteristics of the disease entities; the more attributes there are, the more complete the information of the entity will be [<xref ref-type="bibr" rid="ref20">20</xref>]. The KGPA schema is shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Schema of the knowledge graph for pituitary adenoma (KGPA). Concepts extracted from Chinese electronic medical records are in red. Concepts extracted from health websites are in blue. GH: growth hormone.</p>
          </caption>
          <graphic xlink:href="medinform_v9i7e28218_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Extraction</title>
        <sec>
          <title>Process</title>
          <p>In the process of data extraction, entities and relations were first extracted from unstructured information in CEMRs. For website data, specific HTML wrappers were constructed to directly extract the triples (eg, Cushing syndrome, Symptom, Lethargy). The details are described below.</p>
        </sec>
        <sec>
          <title>EMR Data Extraction</title>
          <p>CEMRs include information on admission, discharge summary, disease course, and a medical record summary, among other details. Since the history of present illness (HPI) in the admission record contains a large amount of detailed patient symptoms and preliminary examination information, the HPI was selected as the main data source in our study.</p>
          <p>The Chinese Clinical Natural Language Processing System (CCNLP) [<xref ref-type="bibr" rid="ref21">21</xref>] developed by our team was used to annotate entities and relations in CEMRs, as shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>. The CCNLP allows user to customize the entities and relations. According to the definition of the schema, we defined 6 types of entities and 5 types of relations in the CCNLP. Two clinicians were invited to perform annotation. The conditional random field model is embedded in the system, which can train the annotated corpus and assist in annotation. The results of the two annotators were evaluated by the consistency evaluation function of the CCNLP [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Medical text annotation using the Chinese Clinical Natural Language Processing System (CCNLP) system.</p>
            </caption>
            <graphic xlink:href="medinform_v9i7e28218_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Web Data Extraction</title>
          <p>The web data were mainly collected from medical websites and high-quality encyclopedia websites. The extracted disease entities in the CEMRs were used as search terms on the medical websites. Since single medical website retrieval is not comprehensive, four websites with higher data quality were used: xywy [<xref ref-type="bibr" rid="ref23">23</xref>], UpToDate [<xref ref-type="bibr" rid="ref24">24</xref>], Baidu Encyclopedia [<xref ref-type="bibr" rid="ref25">25</xref>], and chunyuyisheng [<xref ref-type="bibr" rid="ref26">26</xref>]. All of these websites provide HTML pages of diseases, symptoms, treatments, and other relevant details. This enabled obtaining sufficient medical knowledge to construct the knowledge graph.</p>
          <p>Since the websites shared similar structures, xywy was selected as an example to illustrate the details of pages and its structures used for data extraction. As shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>, the information in “Infobox” can be directly extracted and stored as triples. The “Medicines” data in the website are stored in a tabular format. We extracted the title and first lines of the tables, which were combined as triples. Different wrappers were designed to extract information from different web pages.</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Web page structural analysis for knowledge extraction.</p>
            </caption>
            <graphic xlink:href="medinform_v9i7e28218_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Data Fusion</title>
        <sec>
          <title>Framework</title>
          <p>Triples from different sources may have complements, redundancies, or even conflicts among each other. To ensure accuracy of the data in the knowledge graph, a data fusion method was proposed as shown in <xref rid="figure5" ref-type="fig">Figure 5</xref>. The data were fused by calculating the similarity of head entities and tail entities. The purpose of similarity calculation is to find the optimal alignment between the website entities and CEMR entities. The fusion methods were carried out in two steps. First, the similarity of tail entities (symptoms and examinations contained in both data sources) were calculated based on bidirectional encoder representations from transformer (BERT), the TransR model, and the Jaccard coefficient. Tail entity fusion enabled obtaining a more consistent entity expression. Second, the structural information of the graph was used to merge the head entities (diseases) through the TransR model, Jaccard coefficient, and the count of same nodes.</p>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>Data fusion framework. CEMR: Chinese electronic medical record; BERT: bidirectional encoder representations from transformer.</p>
            </caption>
            <graphic xlink:href="medinform_v9i7e28218_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Tail Entity Fusion Model</title>
          <sec>
            <title>Features</title>
            <p>In the entity fusion task, there are only two types of training results (positive and negative); therefore, this can be converted into a binary classification problem. In the tail entity fusion experiment, three different features were constructed as model inputs: semantic similarity, TransR similarity, and Jaccard similarity.</p>
          </sec>
          <sec>
            <title>Semantic Similarity Calculation Based on BERT</title>
            <p>A semantic model is widely used in the similarity calculation of textual data. In this study, the semantic classification model was trained with labeled data. BERT-Base, Chinese [<xref ref-type="bibr" rid="ref27">27</xref>] was used to construct the embedding of the tail entities in CEMRs and website data, as shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>. Tail entities can be regarded as short sentences, and the matching problem of entity pairs can be modeled as a classification task. The first output vector of the coding layer “C” is taken as the semantic representation of the entity pair. “[CLS]” represents the beginning of a sentence and “[SEP]” separates the two sentences. “E” represents the word embedding of the input character and “T” represents the contextual representation of the input character. The semantic categories are then calculated using two full connection layers: full connection layer 1 uses a tanh activation function and full connection layer 2 normalizes the probability of each class with the softmax function.</p>
            <fig id="figure6" position="float">
              <label>Figure 6</label>
              <caption>
                <p>Semantic similarity calculation model based on bidirectional encoder representations from transformer (BERT).</p>
              </caption>
              <graphic xlink:href="medinform_v9i7e28218_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Knowledge Representation Learning</title>
            <p>Knowledge representation learning methods do not rely on textual information but rather obtain the depth characteristics of the data by mapping the entities to low-dimensional space vectors. A total of 4684 pituitary adenoma triples were used to test the data representation ability of the Trans models [<xref ref-type="bibr" rid="ref28">28</xref>]. We evaluated the performance of the models using hits@10 (ie, the proportion of correctly aligned entities ranked in the top 10 predictions); a higher hits@10 value indicates better performance. The evaluation results were 0.27 for TransE, 0.37 for TransH, and 0.39 for TransR. Therefore, TransR was selected for knowledge representation learning. The extracted triples were used as positive examples (head [<italic>h</italic>], relation [<italic>r</italic>]<italic>,</italic> tail [<italic>t</italic>]). For each positive triple, we randomly replaced its head entity (<italic>h’, r, t</italic>) or tail entity (<italic>h, r, t’</italic>) to generate a negative triple. A mapping matrix <italic>M<sub>r</sub></italic> was used to describe the relational space of relation <italic>r</italic>. Using the gradient descent method to update the parameters, we obtained the vector of the tail entities <italic>trans_vec</italic>. The cosine similarity cos was used to calculate the tail entity similarity of the two data sources, as shown in Equation 1:</p>
            <p>
              <disp-formula>Simteal_trans(m<sub>i</sub>,n<sub>i</sub>)=argmax(cos[trans_vec<sub>mi</sub>],cos[trans_vec<sub>ni</sub>])</disp-formula>
              <bold>(1)</bold>
            </p>
          </sec>
          <sec>
            <title>Jaccard Coefficient</title>
            <p>The Jaccard coefficient was selected as the third feature of tail entity fusion. The Jaccard coefficient refers to the ratio of the number of intersection elements to the union elements in two sets; the higher the Jaccard value, the higher the similarity. We assigned each tail entity in the CEMRs and websites to sets <italic>t<sub>1</sub></italic> and <italic>t<sub>2</sub></italic>, respectively. The Jaccard coefficient represents the ratio of the same number of Chinese characters in the two words to the total number of characters, as shown in Equation 2:</p>
            <p>
              <disp-formula>Jaccard(t<sub>1</sub>,t<sub>2</sub>)=&#124;t<sub>1</sub>∩t<sub>2</sub>&#124;/&#124;t<sub>1</sub>&#124;+t<sub>2</sub>&#124;–&#124;t<sub>1</sub>∩t<sub>2</sub>&#124;</disp-formula>
              <bold>(2)</bold>
            </p>
          </sec>
        </sec>
        <sec>
          <title>Head Entity Fusion Model</title>
          <sec>
            <title>Features</title>
            <p>When merging head entities (diseases), the similarity of the two attributes and their structures were mainly considered. That is, if two head entities are the same, their neighboring entities should also be similar.</p>
          </sec>
          <sec>
            <title>Attribute Similarity</title>
            <p>Entity alignment can be performed using the alternative name attribute or the English name attribute of the disease. If the head entities in the two data sources have the same alternative name or English name, the two entities can be considered the same. For example, “垂体生长激素腺瘤” (growth hormone–secreting pituitary adenoma) has alternative names of “pituitary growth hormone secreting adenoma” and “GH adenoma.” Therefore, we can align “pituitary growth hormone secreting adenoma” and “GH adenoma” to “growth hormone–secreting pituitary adenoma.”</p>
          </sec>
          <sec>
            <title>Structural Similarity Fusion Model</title>
            <p>When the head entities cannot be aligned by the attribute, we propose using the structural similarity model to fuse entities. Three different features were chosen as the classifier model’s inputs: the number of identical tail nodes, Jaccard similarity, and TransR similarity, as shown in Equation 3.</p>
            <p>The head entity and the tail entity have a 1-N relationship. Taking two disease sets <inline-graphic xlink:href="medinform_v9i7e28218_fig8.png" xlink:type="simple" mimetype="image"/> from two data sources as an example, <inline-graphic xlink:href="medinform_v9i7e28218_fig9.png" xlink:type="simple" mimetype="image"/> represents the number of identical tail nodes in different sets and <inline-graphic xlink:href="medinform_v9i7e28218_fig10.png" xlink:type="simple" mimetype="image"/> represents the ratio of the same number of characters to the total number of characters of two sets. The order of words in the set is not considered. For the attribute similarity, the vector representation of entities was trained using the TransR model, whereas in this case, we calculated the vector of the head entity using the TransR model.</p>
            <p>
              <disp-formula>
                <graphic xlink:href="medinform_v9i7e28218_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
              </disp-formula>
            </p>
            <p>After the head entities of two heterogeneous data sources were fused, the triples containing all of the disease information were obtained. Finally, to standardize the disease names in the knowledge graph, we mapped them to the ICD codes.</p>
          </sec>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Extraction</title>
        <p>Three hundred clinical medical records and 4 portal websites were selected as data sources to construct the KGPA. Although these are all Chinese resources, our proposed approach is not dependent on a particular language and can be applied to data resources in other language in the same way. The data in CEMRs were annotated by two doctors using the CCNLP system [<xref ref-type="bibr" rid="ref21">21</xref>]. With the consistency test function of the system, the consistency of the annotations reached 95.2%. Website data were extracted according to the wrapper defined in this study. <xref ref-type="table" rid="table1">Table 1</xref> shows the number of all entities extracted from the two types of data sources. The concepts are abundant in websites, whereas the CEMRs included more symptom entities, which can help to expand more data types for the KGPA. The “Prefusion” column of <xref ref-type="table" rid="table1">Table 1</xref> shows the number of all relations extracted from the two types of data sources.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Number of relations before and after data fusion.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Relation</td>
                <td>Head entity</td>
                <td>Tail entity</td>
                <td>Prefusion</td>
                <td>After fusion</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Diseases_rel_Symptom</td>
                <td>disease</td>
                <td>symptom</td>
                <td>3154</td>
                <td>1940</td>
              </tr>
              <tr valign="top">
                <td>Diseases_rel_Surgery</td>
                <td>disease</td>
                <td>surgery</td>
                <td>55</td>
                <td>45</td>
              </tr>
              <tr valign="top">
                <td>Diseases_rel_Medicines</td>
                <td>disease</td>
                <td>medicine</td>
                <td>245</td>
                <td>182</td>
              </tr>
              <tr valign="top">
                <td>Diseases_rel_Examination</td>
                <td>disease</td>
                <td>examination</td>
                <td>437</td>
                <td>274</td>
              </tr>
              <tr valign="top">
                <td>Symptoms_rel_Body structure</td>
                <td>symptom</td>
                <td>body</td>
                <td>396</td>
                <td>281</td>
              </tr>
              <tr valign="top">
                <td>Diseases_rel_Treatment</td>
                <td>disease</td>
                <td>treatment</td>
                <td>110</td>
                <td>109</td>
              </tr>
              <tr valign="top">
                <td>Diseases_attr_Pathogeny</td>
                <td>disease</td>
                <td>pathogeny</td>
                <td>122</td>
                <td>104</td>
              </tr>
              <tr valign="top">
                <td>Diseases_attr_Department</td>
                <td>disease</td>
                <td>department</td>
                <td>71</td>
                <td>44</td>
              </tr>
              <tr valign="top">
                <td>Diseases_attr_English name</td>
                <td>disease</td>
                <td>English name</td>
                <td>71</td>
                <td>42</td>
              </tr>
              <tr valign="top">
                <td>Diseases_attr_Alternative name</td>
                <td>disease</td>
                <td>alternative name</td>
                <td>23</td>
                <td>20</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Data Fusion</title>
        <p>Two hundred medical records were randomly selected for the fusion experiment. The ratio of the training set and test set was 8:2. The experiment was trained under Windows 10, and the model based on the TensorFlow framework was used.</p>
        <p>The proposed tail entity fusion model was used to perform entity fusion for symptoms and examinations. Before the fusion began, different entities with the same conceptual semantics extracted from different websites were merged to reduce duplication and computation. A vector representation of 768 dimensions was constructed through the Chinese BERT model, and then the similarity results were obtained by full connection layers. A 50-dimensional vector was obtained by the TransR model and the cosine similarity was used to calculate the entity pair similarity values. The Jaccard coefficient was used as a numerical feature. These three results were taken as features into the classification model. Three different classification models were adopted for training: logistic regression, decision tree, and neural network. The results are shown in <xref ref-type="table" rid="table2">Table 2</xref>. The neural network showed the best performance.</p>
        <p>Subsequently, the triples completed by the tail entity fusion model were used for the head entity fusion experiment. A total of 65 head entities were fused between CEMRs and websites. Among them, 17 entities could be directly mapped by disease name, 6 entities could be fused by attribute (eg, growth hormone–secreting pituitary adenoma, pituitary microadenoma, Cushing syndrome, hypothyroidism), and 42 head entities were fused based on the proposed structural similarity fusion model. The three classification models above were used for training. As shown in <xref ref-type="table" rid="table2">Table 2</xref>, the decision trees performed better when fusing head entities because the data inputs to the model were smaller than the fusing tail entities. With the increase of data volume, the advantages of the neural network were reflected in the fusion of tail entities.</p>
        <p>Additionally, we divided the features into four variants for an ablation study. We selected logistic regression as the classification model to explore the contribution of different features to the model, and these results are also shown in <xref ref-type="table" rid="table2">Table 2</xref>. These three features had nearly the same contributions to the model in the head entity fusion. For a specific disease knowledge graph, the Jaccard similarity feature played a major role in the tail entity ablation experiment, and the features based on BERT and TransR simply contributed by fine-tuning the model.</p>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows that our proposed model has higher accuracy than previous models. Compared with previous models, we divided the entities into head entities and tail entities and fused them according to different characteristics. Different concepts were considered separately in the step-by-step fusion process, which improved the precision of the fusion.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Head and tail fusion model performance.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="30"/>
            <col width="580"/>
            <col width="0"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="0"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td colspan="5">Fusion model</td>
                <td colspan="3">Precision (%)</td>
                <td colspan="3">Recall (%)</td>
                <td>F-score (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Head entity fusion</bold>
                </td>
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="3">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">
                  <bold>Linear regression models</bold>
                </td>
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Ja<sup>a</sup>+TransR</td>
                <td colspan="3">83.37</td>
                <td colspan="3">84.06</td>
                <td colspan="3">83.71</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Sa<sup>b</sup>+TransR</td>
                <td colspan="3">83.37</td>
                <td colspan="3">84.55</td>
                <td colspan="3">83.95</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Ja+Sa</td>
                <td colspan="3">83.85</td>
                <td colspan="3">84.55</td>
                <td colspan="3">84.19</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Ja+Sa+TransR</td>
                <td colspan="3">83.92</td>
                <td colspan="3">84.61</td>
                <td colspan="3">84.26</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">Neural network</td>
                <td colspan="3">97.29</td>
                <td colspan="3">97.03</td>
                <td colspan="2">97.16</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">Decision tree</td>
                <td colspan="3">97.47</td>
                <td colspan="3">97.18</td>
                <td colspan="2">97.32</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Tail entity fusion</bold>
                </td>
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="3">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">
                  <bold>Linear regression models</bold>
                </td>
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>BERT<sup>c</sup>+TransR</td>
                <td colspan="3">61.73</td>
                <td colspan="3">61.74</td>
                <td colspan="3">61.73</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Ja+BERT</td>
                <td colspan="3">95.76</td>
                <td colspan="3">95.83</td>
                <td colspan="3">95.79</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Ja+TransR</td>
                <td colspan="3">95.89</td>
                <td colspan="3">95.93</td>
                <td colspan="3">95.90</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Ja+BERT+TransR</td>
                <td colspan="3">95.92</td>
                <td colspan="3">95.94</td>
                <td colspan="3">95.93</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">Neural network</td>
                <td colspan="3">98.43</td>
                <td colspan="3">98.72</td>
                <td colspan="2">98.57</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">Decision tree</td>
                <td colspan="3">98.18</td>
                <td colspan="3">98.05</td>
                <td colspan="2">98.11</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Ja : Jaccard similarity.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>Sa: identical tail nodes in different sets: <inline-graphic xlink:href="medinform_v9i7e28218_fig9.png" xlink:type="simple" mimetype="image"/>.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>BERT: bidirectional encoder representations from transformer.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Model comparison.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="120"/>
            <col width="200"/>
            <col width="600"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Research field</td>
                <td>Method</td>
                <td>F1-score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Ruan et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td>
                <td>Symptom</td>
                <td>Align entities according to the string similarities of the entity names and attribute values</td>
                <td>—<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>Yang et al [<xref ref-type="bibr" rid="ref30">30</xref>]</td>
                <td>Disease, medicine</td>
                <td>Align entities according to the entity’s attribute types (attr<sub>bool</sub>, attr<sub>numeric</sub>, attr<sub>string</sub>, attr<sub>time</sub>)</td>
                <td>0.60</td>
              </tr>
              <tr valign="top">
                <td>Sun et al [<xref ref-type="bibr" rid="ref31">31</xref>]</td>
                <td>Disease, medicine, symptom</td>
                <td>Character similarity of entity pairs and degree centrality of entities in the graph</td>
                <td>0.76</td>
              </tr>
              <tr valign="top">
                <td>Liu et al [<xref ref-type="bibr" rid="ref32">32</xref>]</td>
                <td>Disease, medicine, examination</td>
                <td>Semantic classification model based on pretrained BERT<sup>b</sup></td>
                <td>0.83</td>
              </tr>
              <tr valign="top">
                <td>Our model</td>
                <td>Symptom, examination, disease</td>
                <td>Multifeature learning based on head-and-tail entities</td>
                <td>0.97</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Not provided.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>BERT: bidirectional encoder representations from transformer.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The triples obtained after data fusion were stored and visualized in Neo4j [<xref ref-type="bibr" rid="ref33">33</xref>]. The KGPA contained 1789 entities and 3041 pairs of relations of 73 pituitary adenoma–related diseases. For a knowledge graph, accuracy is of great importance. However, there is currently no gold standard for pituitary adenoma knowledge graph validation. To evaluate the quality of the knowledge graph, the accuracy of triples was used as an indicator. Three hundred triples were randomly sampled and each triple was manually evaluated by two physicians; the accuracy reached 95.4%.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>A knowledge graph was constructed by mining CEMRs and web resources. In the process of KGPA construction, to solve the problem of knowledge duplication between heterogeneous data sources, we proposed a head-and-tail entity fusion model. The model showed good performance on the fusion of medical data.</p>
        <p>The KGPA was proven to be effective when displaying the typical symptoms of pituitary adenoma–related diseases. For example, the query for symptoms of disease “prolactin (PRL)-secreting pituitary adenomas” differed from the query for the disease “nonfunctioning pituitary adenoma” using the following query in Cypher: “MATCH (p:dis{disease: 垂体泌乳素腺瘤})-[:dis_rel_sym]-&#62;(n), (m)&#60;-[:dis_rel_sym] -(q:dis{disease:垂体无功能腺瘤}), WHERE (m)&#60;&#62;(n), RETURN p,n,q.” As shown in <xref rid="figure7" ref-type="fig">Figure 7</xref>, the entities in the middle of the graph are symptoms of both diseases and the entities on the right are typical symptoms unique to the disease “PRL-secreting pituitary adenomas.”</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Differences of typical symptoms between “prolactin-secreting pituitary adenomas” and “nonfunctioning pituitary adenoma” in the knowledge graph for pituitary adenoma.</p>
          </caption>
          <graphic xlink:href="medinform_v9i7e28218_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Searching for the KGPA by Cypher, we found that most pituitary adenoma–related diseases have the following basic symptoms: headache, vision problems, fatigue, slow reaction, mood problems, changes in height and weight, changes in appetite, and changes in sleep. Nonfunctioning pituitary adenoma has all of these basic symptoms listed above. In addition to the basic symptoms, pituitary thyroid-stimulating hormone adenoma is also associated with symptoms of goiter, palpitation, and exophthalmos. The typical symptoms of PRL-secreting pituitary adenomas are associated with the reproductive system, decreased libido, and menstrual changes in women. The typical symptoms of pituitary growth hormone adenoma are altered facial features, enlarged hands and feet, snoring, and metabolic disorders. Cushing syndrome is characterized by obesity, altered skin color, increased hair, and edema. Based on clinicians’ feedback on the use of the KGPA, the knowledge in the KGPA was consistent with the actual clinical situation. The KGPA will be useful for clinical interns in diagnosis and treatment, and may also be helpful for medical students to quickly master knowledge of pituitary adenoma–related diseases.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>The KGPA was constructed by integrating CEMRs and web data related to pituitary adenoma. However, since we only focused on pituitary tumors, the data volume was relatively small. In the next step, we plan to try to extend the method proposed in this study to the entire neurosurgery field or even larger fields and apply the knowledge graph to clinical practice.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>This study shows that entities and relations extracted from heterogeneous data sources such as CEMRs and health websites can be used to construct a knowledge graph after entity fusion. The head-and-tail entity fusion model proposed in this paper achieved 97% in accuracy, which is higher than that reported for previous models. The KGPA constructed in this study can be used to discover the knowledge hidden in the source text, such as typical symptoms unique to the disease “PRL-secreting pituitary adenomas.” Based on clinicians’ feedback, the knowledge in the KGPA was consistent with the actual clinical situation. The knowledge graph constructed will be useful and helpful for patients, medical students, and interns to assist in obtaining information for symptoms, diagnosis, treatment, and disease pathogenesis.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CCNLP</term>
          <def>
            <p>Chinese Clinical Natural Language Processing System.</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CEMR</term>
          <def>
            <p>Chinese electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HPI</term>
          <def>
            <p>history of present illness</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">KGPA</term>
          <def>
            <p>knowledge graph for pituitary adenoma</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">PRL</term>
          <def>
            <p>prolactin</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research has been funded by the Science and Technology Innovation 2030-Major Project (2020AAA0104902), the Chinese Academy of Medical Sciences Initiative for Innovative Medicine (2017-I2M-3-014), the Chinese Academy of Medical Sciences and Peking Union Medical College Fundamental Scientific Research Funds Project of the Central Public Welfare Research Institution (2018PT33005), and the Hunan Provincial Key Research and Development Program (2020SK2089).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>AF designed the methods, analyzed the results of experiments, and drafted the paper. PL, JH, and WZ extracted the data and performed the data fusion. MF collected the electronic medical records and annotated the dataset. MF and HR evaluated the pituitary adenoma knowledge graph. XC supervised the research and revised the paper. All authors read and approved the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Osamura</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Egashira</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Miyai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yamazaki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Takekoshi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sanno</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Teramoto</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Molecular pathology of the pituitary. Development and functional differentiation of pituitary adenomas</article-title>
          <source>Front Horm Res</source>
          <year>2004</year>
          <volume>32</volume>
          <fpage>20</fpage>
          <lpage>33</lpage>
          <pub-id pub-id-type="doi">10.1159/000079036</pub-id>
          <pub-id pub-id-type="medline">15281338</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kinoshita</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tominaga</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Usui</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Arita</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sugiyama</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kurisu</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Impact of subclinical haemorrhage on the pituitary gland in patients with pituitary adenomas</article-title>
          <source>Clin Endocrinol (Oxf)</source>
          <year>2014</year>
          <month>05</month>
          <volume>80</volume>
          <issue>5</issue>
          <fpage>720</fpage>
          <lpage>725</lpage>
          <pub-id pub-id-type="doi">10.1111/cen.12349</pub-id>
          <pub-id pub-id-type="medline">24125536</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>YJ</given-names>
            </name>
          </person-group>
          <article-title>Pituitary apoplexy due to pituitary adenoma infarction</article-title>
          <source>J Korean Neurosurg Soc</source>
          <year>2008</year>
          <month>05</month>
          <volume>43</volume>
          <issue>5</issue>
          <fpage>246</fpage>
          <lpage>249</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jkns.or.kr/journal/view.php?doi=10.3340/jkns.2008.43.5.246"/>
          </comment>
          <pub-id pub-id-type="doi">10.3340/jkns.2008.43.5.246</pub-id>
          <pub-id pub-id-type="medline">19096606</pub-id>
          <pub-id pub-id-type="pmcid">PMC2588219</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaushik</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ramakrishnaiah</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Angtuaco</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>Ectopic pituitary adenoma in persistent craniopharyngeal canal</article-title>
          <source>J Comput Assist Tomogr</source>
          <year>2010</year>
          <volume>34</volume>
          <issue>4</issue>
          <fpage>612</fpage>
          <lpage>614</lpage>
          <pub-id pub-id-type="doi">10.1097/rct.0b013e3181dbe5d1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Byambasuren</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sui</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zan</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Preliminary study on the construction of Chinese medical knowledge graph</article-title>
          <source>J Chinese Inf Process</source>
          <year>2019</year>
          <volume>33</volume>
          <issue>10</issue>
          <fpage>1</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://jcip.cipsc.org.cn/EN/Y2019/V33/I10/1"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Real-world data medical knowledge graph: construction and applications</article-title>
          <source>Artif Intell Med</source>
          <year>2020</year>
          <month>03</month>
          <volume>103</volume>
          <fpage>101817</fpage>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2020.101817</pub-id>
          <pub-id pub-id-type="medline">32143785</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(19)30954-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>On building a diabetes centric knowledge base via mining the web</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <month>04</month>
          <day>09</day>
          <volume>19</volume>
          <issue>Suppl 2</issue>
          <fpage>49</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-0771-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-0771-6</pub-id>
          <pub-id pub-id-type="medline">30961582</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-0771-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6454670</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ernst</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Siu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Weikum</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>KnowLife: a versatile approach for constructing a large knowledge graph for biomedical sciences</article-title>
          <source>BMC Bioinformatics</source>
          <year>2015</year>
          <month>05</month>
          <day>14</day>
          <volume>16</volume>
          <fpage>157</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-015-0549-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12859-015-0549-5</pub-id>
          <pub-id pub-id-type="medline">25971816</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12859-015-0549-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC4448285</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rotmensch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Halpern</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tlimat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Horng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sontag</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Learning a health knowledge graph from electronic medical records</article-title>
          <source>Sci Rep</source>
          <year>2017</year>
          <month>07</month>
          <day>20</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>5994</fpage>
          <pub-id pub-id-type="doi">10.1038/s41598-017-05778-z</pub-id>
          <pub-id pub-id-type="medline">28729710</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-017-05778-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC5519723</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Lian</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Multi-source medical data integration and mining for healthcare services</article-title>
          <source>IEEE Access</source>
          <year>2020</year>
          <volume>8</volume>
          <fpage>165010</fpage>
          <lpage>165017</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2020.3023332</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhuang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhuang</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Chinese medical named entity recognition based on multi-granularity semantic dictionary and multimodal tree</article-title>
          <source>J Biomed Inform</source>
          <year>2020</year>
          <month>11</month>
          <volume>111</volume>
          <fpage>103583</fpage>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2020.103583</pub-id>
          <pub-id pub-id-type="medline">33010427</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(20)30211-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Graph2Seq: fusion embedding learning for knowledge graph completion</article-title>
          <source>IEEE Access</source>
          <year>2019</year>
          <volume>7</volume>
          <fpage>157960</fpage>
          <lpage>157971</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2019.2950230</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>KGHC: a knowledge graph for hepatocellular carcinoma</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2020</year>
          <month>07</month>
          <day>09</day>
          <volume>20</volume>
          <issue>Suppl 3</issue>
          <fpage>135</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-020-1112-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-020-1112-5</pub-id>
          <pub-id pub-id-type="medline">32646496</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-020-1112-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7346328</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nickel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tresp</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Gabrilovich</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>A review of relational machine learning for knowledge graphs</article-title>
          <source>Proc IEEE</source>
          <year>2016</year>
          <month>1</month>
          <volume>104</volume>
          <issue>1</issue>
          <fpage>11</fpage>
          <lpage>33</lpage>
          <pub-id pub-id-type="doi">10.1109/jproc.2015.2483592</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <article-title>Unified Medical Language System (UMLS)</article-title>
          <source>National Library of Medicine</source>
          <access-date>2021-02-09</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nlm.nih.gov/research/umls/index.html">https://www.nlm.nih.gov/research/umls/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <source>SNOMED International</source>
          <access-date>2021-02-09</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.snomed.org/">https://www.snomed.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>Ö</given-names>
            </name>
            <name name-style="western">
              <surname>South</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>DuVall</surname>
              <given-names>SL</given-names>
            </name>
          </person-group>
          <article-title>2010 i2b2/VA challenge on concepts, assertions, and relations in clinical text</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <month>09</month>
          <day>01</day>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>552</fpage>
          <lpage>556</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21685143"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000203</pub-id>
          <pub-id pub-id-type="medline">21685143</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000203</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168320</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Qu</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>QB</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>YX</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>YJ</given-names>
            </name>
          </person-group>
          <article-title>Corpus construction for named entities and entity relations on Chinese electronic medical records</article-title>
          <source>J Softw</source>
          <year>2016</year>
          <fpage>2725</fpage>
          <lpage>2746</lpage>
          <pub-id pub-id-type="doi">10.13328/j.cnki.jos.004880</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Study on the building of clinical text natural language processing system—taking cTAKES as an example</article-title>
          <source>J Med Inform</source>
          <year>2018</year>
          <volume>39</volume>
          <issue>12</issue>
          <fpage>48</fpage>
          <lpage>53</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Su</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Privacy-preserving distributed data fusion based on attribute protection</article-title>
          <source>IEEE Trans Ind Inf</source>
          <year>2019</year>
          <month>10</month>
          <volume>15</volume>
          <issue>10</issue>
          <fpage>5765</fpage>
          <lpage>5777</lpage>
          <pub-id pub-id-type="doi">10.1109/tii.2019.2912175</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <source>Chinese Clinical Natural Language Processing System (CCNLP)</source>
          <year>2021</year>
          <access-date>2021-02-09</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://ccnlp.imicams.ac.cn/">http://ccnlp.imicams.ac.cn/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Annotating Chinese e-medical record for knowledge discovery</article-title>
          <source>Data Anal Knowl Discov</source>
          <year>2019</year>
          <volume>3</volume>
          <issue>7</issue>
          <fpage>123</fpage>
          <lpage>132</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <source>xywy</source>
          <access-date>2020-12-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.xywy.com/">http://www.xywy.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <source>UpToDate</source>
          <access-date>2020-12-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.uptodate.cn/home/">https://www.uptodate.cn/home/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <source>Baidu Encyclopedia</source>
          <access-date>2020-12-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://baike.baidu.com/">https://baike.baidu.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <source>chunyuyisheng</source>
          <access-date>2020-12-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.chunyuyisheng.com/">https://www.chunyuyisheng.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: Pre-train-ing of Deep Bidirectional Transformers for Language Understanding</article-title>
          <source>arXiv</source>
          <year>2018</year>
          <access-date>2021-07-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1810.04805">https://arxiv.org/abs/1810.04805</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yue</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Learning entity and relation embeddings for knowledge resolution</article-title>
          <source>Procedia Comput Sci</source>
          <year>2017</year>
          <volume>108</volume>
          <fpage>345</fpage>
          <lpage>354</lpage>
          <pub-id pub-id-type="doi">10.1016/j.procs.2017.05.045</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>An automatic approach for constructing a knowledge base of symptoms in Chinese</article-title>
          <source>J Biomed Semantics</source>
          <year>2017</year>
          <month>09</month>
          <day>20</day>
          <volume>8</volume>
          <issue>Suppl 1</issue>
          <fpage>33</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-017-0145-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13326-017-0145-x</pub-id>
          <pub-id pub-id-type="medline">29297414</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13326-017-0145-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC5763289</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Qiao</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Construction of Chinese knowledge graph of heart disease</article-title>
          <source>J Wuhan Univ</source>
          <year>2020</year>
          <volume>66</volume>
          <issue>3</issue>
          <fpage>261</fpage>
          <lpage>267</lpage>
          <pub-id pub-id-type="doi">10.14188/j.1671-8836.2018.0217</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Knowledge extraction and alignment for respiratory disease</article-title>
          <source>Harbin Institute of Technology</source>
          <year>2019</year>
          <access-date>2021-03-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://kns.cnki.net/KCMS/detail/detail.aspx?dbname=CMFD 202001&#38;filename=1019646460.nh/">https://kns.cnki.net/KCMS/detail/detail.aspx?dbname=CMFD 202001&#38;filename=1019646460.nh/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ruan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Construction of an open dataset for clinical event graph</article-title>
          <source>J Chinese Inf Process</source>
          <year>2020</year>
          <volume>11</volume>
          <fpage>37</fpage>
          <lpage>48</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Balaur</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Mazein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Saqi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lysenko</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rawlings</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Auffray</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Recon2Neo4j: applying graph database technologies for managing comprehensive genome-scale networks</article-title>
          <source>Bioinformatics</source>
          <year>2017</year>
          <month>04</month>
          <day>01</day>
          <volume>33</volume>
          <issue>7</issue>
          <fpage>1096</fpage>
          <lpage>1098</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27993779"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btw731</pub-id>
          <pub-id pub-id-type="medline">27993779</pub-id>
          <pub-id pub-id-type="pii">btw731</pub-id>
          <pub-id pub-id-type="pmcid">PMC5408918</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
