<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i10e20291</article-id>
      <article-id pub-id-type="pmid">33084582</article-id>
      <article-id pub-id-type="doi">10.2196/20291</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Building a Pharmacogenomics Knowledge Model Toward Precision Medicine: Case Study in Melanoma</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>He</surname>
            <given-names>Zonglin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Friedrich</surname>
            <given-names>Christoph</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Kang</surname>
            <given-names>Hongyu</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9647-0645</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Jiao</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6391-8343</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Meng</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4308-6660</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>Liu</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1144-0178</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Hou</surname>
            <given-names>Li</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Institute of Medical Information &#38;Library</institution>
            <institution>Chinese Academy of Medical Sciences/Peking Union Medical College</institution>
            <addr-line>3 Yabao Road, Chaoyang District</addr-line>
            <addr-line>Beijing </addr-line>
            <country>China</country>
            <phone>86 18910120178</phone>
            <email>hou.li@imicams.ac.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9226-2216</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Institute of Medical Information &#38;Library</institution>
        <institution>Chinese Academy of Medical Sciences/Peking Union Medical College</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Biomedical Engineering</institution>
        <institution>School of Life Science</institution>
        <institution>Beijing Institute of Technology</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Li Hou <email>hou.li@imicams.ac.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>10</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>21</day>
        <month>10</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>10</issue>
      <elocation-id>e20291</elocation-id>
      <history>
        <date date-type="received">
          <day>15</day>
          <month>5</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>21</day>
          <month>6</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>11</day>
          <month>8</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>13</day>
          <month>9</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Hongyu Kang, Jiao Li, Meng Wu, Liu Shen, Li Hou. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 21.10.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2020/10/e20291/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Many drugs do not work the same way for everyone owing to distinctions in their genes. Pharmacogenomics (PGx) aims to understand how genetic variants influence drug efficacy and toxicity. It is often considered one of the most actionable areas of the personalized medicine paradigm. However, little prior work has included in-depth explorations and descriptions of drug usage, dosage adjustment, and so on.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We present a pharmacogenomics knowledge model to discover the hidden relationships between PGx entities such as drugs, genes, and diseases, especially details in precise medication.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>PGx open data such as DrugBank and RxNorm were integrated in this study, as well as drug labels published by the US Food and Drug Administration. We annotated 190 drug labels manually for entities and relationships. Based on the annotation results, we trained 3 different natural language processing models to complete entity recognition. Finally, the  pharmacogenomics knowledge model was described in detail.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>In entity recognition tasks, the Bidirectional Encoder Representations from Transformers–conditional random field model achieved better performance with micro-F1 score of 85.12%. The pharmacogenomics knowledge model in our study included 5 semantic types: drug, gene, disease, precise medication (population, daily dose, dose form, frequency, etc), and adverse reaction. Meanwhile, 26 semantic relationships were defined in detail. Taking melanoma caused by a <italic>BRAF</italic> gene mutation into consideration, the pharmacogenomics knowledge model covered 7 related drugs and 4846 triples were established in this case. All the corpora, relationship definitions, and triples were made publically available.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We highlighted the pharmacogenomics knowledge model as a scalable framework for clinicians and clinical pharmacists to adjust drug dosage according to patient-specific genetic variation, and for pharmaceutical researchers to develop new drugs. In the future, a series of other antitumor drugs and automatic relation extractions will be taken into consideration to further enhance our framework with more PGx linked data.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>pharmacogenomics</kwd>
        <kwd>knowledge model</kwd>
        <kwd>BERT–CRF model</kwd>
        <kwd>named entity recognition</kwd>
        <kwd>melanoma</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Pharmacogenomics</title>
        <p>The field of pharmacogenomics (PGx) has developed rapidly since the initial scientific discoveries of genetic characteristics affecting individual response to drugs or other agents [<xref ref-type="bibr" rid="ref1">1</xref>]. Through these years of development, PGx aims at understanding how genetic variants influence drug efficacy and toxicity. It combines pharmacology (the science of drugs) and genomics (the study of genes and their functions), and is certain to improve new drug development and precision medication. Such studies can reveal how genetic variation across individuals affects a drug’s pharmacokinetics and pharmacodynamics [<xref ref-type="bibr" rid="ref2">2</xref>]. Many drugs do not work the same way for everyone. Consequently, PGx is often considered one of the most actionable areas of the personalized medicine paradigm [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>As of June 2019, more than 190 drugs [<xref ref-type="bibr" rid="ref4">4</xref>] approved by the US Food and Drug Administration (FDA) clearly stated on in their medical specifications that they need to be deployed with greater precision based on individual genotype. The introduction of targeted drugs and targeted therapies provides a more feasible and effective way for cancer treatment, improves drug efficacy, and reduces adverse reactions. Therefore, studies of new therapies related to PGx such as drug combinations and new drug discoveries [<xref ref-type="bibr" rid="ref5">5</xref>] have become increasingly popular. A typical case of repurposing drugs is afatinib (40 mg q.d.), which was introduced [<xref ref-type="bibr" rid="ref6">6</xref>] for treating lung cancer after <italic>NGR1</italic> gene fusion.</p>
      </sec>
      <sec>
        <title>Named Entity Recognition</title>
        <p>Named entity recognition (NER) is a basic tool for natural language processing (NLP) tasks such as information extraction, question answering system, syntactic analysis, and machine translation. Its main goal is identifying entities with specific meaning in the text, mainly including people’s names, place names, organization names, proper nouns, etc. It is the foundation of identifying semantic relationships between entities and filling a knowledge base.</p>
        <p>The common statistical models of NER mainly include the Hidden Markov Model [<xref ref-type="bibr" rid="ref7">7</xref>] and the conditional random field (CRF) [<xref ref-type="bibr" rid="ref8">8</xref>]. In recent years, neural network deep learning methods based on the development of word vector technology, such as the convolutional neural network (CNN) [<xref ref-type="bibr" rid="ref9">9</xref>] and the recurrent neural network (RNN), have made a great breakthrough in the field of NLP. After that, long short-term memory (LSTM) [<xref ref-type="bibr" rid="ref10">10</xref>] added a memory cell to RNN, to overcome the problem of gradient explosion and gradient disappearance. Bidirectional RNN [<xref ref-type="bibr" rid="ref11">11</xref>] adopts a double-layer RNN structure, which can collect forward and backward information at the same time.</p>
        <p>In 2018, Devlin et al [<xref ref-type="bibr" rid="ref12">12</xref>] from Google AI Language proposed the Bidirectional Encoder Representations from Transformers (BERT) which provided outstanding performance in 11 NLP tasks, opening a new era for NLP. Similar to the general pretraining 2-stage training method, BERT uses the language model for pretraining as the first stage. In the second stage, it fine-tunes for downstream tasks, and achieves the best results in multiple NLP tasks. The BERT–CRF model [<xref ref-type="bibr" rid="ref13">13</xref>] and multilingual BERT model [<xref ref-type="bibr" rid="ref14">14</xref>] were trained on different languages such as Portuguese and the F1 score was ultimately improved. Today, the BERT model has also been applied in biomedical research. BERT-based models were investigated for their effectiveness in biomedical and clinical entity normalization, and achieved state-of-the-art performance on large-scale electronic health record notes [<xref ref-type="bibr" rid="ref15">15</xref>] and online corpus [<xref ref-type="bibr" rid="ref16">16</xref>]. The BioBERT model [<xref ref-type="bibr" rid="ref17">17</xref>] for biomedical text mining tasks and the ClinicalBERT [<xref ref-type="bibr" rid="ref18">18</xref>] for clinical notes were also introduced and outperformed previous models.</p>
      </sec>
      <sec>
        <title>Biomedical Knowledge Representation</title>
        <p>The Knowledge Representation Model can be understood as a structured set of directed graphs, in which the nodes of the graph represent entities or concepts, while the edges represent the semantic relationship between entities or concepts. During the development of the knowledge representation, sematic networks, ontology, and knowledge graphs/models are most commonly used in the field of biomedical science.</p>
        <p>A semantic network [<xref ref-type="bibr" rid="ref19">19</xref>], or frame network, is a knowledge base that represents semantic relations between concepts in a network.</p>
        <p>An ontology is a formal explicit description of concepts in a domain, properties of each concept, various features and attributes, and restrictions on these properties [<xref ref-type="bibr" rid="ref20">20</xref>]. The Drug Target Ontology [<xref ref-type="bibr" rid="ref21">21</xref>] provided a framework and formal classification, which included related information between protein, gene, protein domain, binding site, small-molecule drug, mechanism of action, and many other types of information. Dumontier and Villanuevarosales [<xref ref-type="bibr" rid="ref22">22</xref>] constructed a lightweight ontology, Pharmacogenomics Ontology, based on Pharmacogenomics Knowledge Base (PharmGKB) data, which contains 40 core concepts, involving phenotype, genotype, and drug therapy.</p>
        <p>A knowledge graph/model emphasizes data cleaning and knowledge fusion, and its essence is a semantic network, which allows access to knowledge inference. Since this concept was put forward by Google in 2012 [<xref ref-type="bibr" rid="ref23">23</xref>], researchers have conducted a series of discussions and research aimed at intelligent retrieval. High-quality heterogeneous graphs such as the Safe Medicine Recommendation (SMR) [<xref ref-type="bibr" rid="ref24">24</xref>] and KnowLife [<xref ref-type="bibr" rid="ref25">25</xref>] contain entities and relationships between disease, medicine, patient, gene, organ, and other biomedical entities constructed by bridging electronic medical records, ICD-9, DrugBank, electronic health record [<xref ref-type="bibr" rid="ref26">26</xref>], and other databases, which leads to more hidden relationships.</p>
        <p>Above all, the knowledge graph/model technology provides a means to extract structured knowledge from massive texts and images. It has broad applications in biomedical field and can promote intelligent semantic retrieval, medical questions and answers, clinical decision support, and many other scenarios.</p>
      </sec>
      <sec>
        <title>Related Works</title>
        <p>With the rapid growth and accumulation of massive PGx data, there is an increasing need for scientific data collecting, organizing, modeling, and mining. These data reflect a hierarchy of relationships and detailed information between biomedical entities. Currently, the semantic types and relationships involved in PGx knowledge representation are usually limited to drug, gene, and disease.</p>
        <sec>
          <title>Drug–Gene Target Treatment</title>
          <p>Drug2Gene [<xref ref-type="bibr" rid="ref27">27</xref>] was a knowledge base combining information on compound, drug, gene, and protein from 19 publicly available databases. Sun et al [<xref ref-type="bibr" rid="ref28">28</xref>] designed a computational workflow to construct drug-target networks including drugs, genes, and diseases from different knowledge bases.</p>
        </sec>
        <sec>
          <title>Drug–Gene–Drug Interaction</title>
          <p>Bo et al [<xref ref-type="bibr" rid="ref29">29</xref>] extracted drug–gene–drug interactions from biomedical literature using the bidirectional LSTM (Bi-LSTM) model by combining biomedical resources with lexical information and entity position information. Coulet et al [<xref ref-type="bibr" rid="ref30">30</xref>] instantiated a description logics knowledge base to identify gene variant–drug response associations.</p>
        </sec>
        <sec>
          <title>Drug–Gene–Phenotype Relationship</title>
          <p>Dalleau et al [<xref ref-type="bibr" rid="ref31">31</xref>] assembled a set of linked PGx data from 6 distinct resources such as DisGeNET [<xref ref-type="bibr" rid="ref32">32</xref>] and ClinVar [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
        </sec>
        <sec>
          <title>Disease–Chemical–Gene Relationship</title>
          <p>Kim et al developed DigSee [<xref ref-type="bibr" rid="ref34">34</xref>] for disease–gene relationships and DigChem [<xref ref-type="bibr" rid="ref35">35</xref>] for disease–gene–chemical relationships from biomedical literature abstracts at a PubMed scale.</p>
          <p>However, there currently exist no in-depth explorations and descriptions of personalized medication, such as drug usage, dosage adjustment, and applicable population. Therefore, there is significance in applying the knowledge model to the field of PGx in further study, which will assist clinicians and clinical pharmacists in precise medication.</p>
        </sec>
      </sec>
      <sec>
        <title>Objective</title>
        <p>In this study, we proposed the following 2 objects:</p>
        <list list-type="order">
          <list-item>
            <p>We aimed to present a pharmacogenomics knowledge model consisting of 5 semantic types related to PGx and precision medication, and also give definitions of relationships between these entities. The model mostly focuses on anticancer drugs, drug usage, and adjustments of daily dosage.</p>
          </list-item>
          <list-item>
            <p>We aimed to semiautomatically construct PGx corpora, which are relatively rare in the existing research, and make them open access. The NLP algorithms for PGx NER were also trained for facilitating corpus annotation.</p>
          </list-item>
        </list>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Steps</title>
        <p>There are 3 main steps in our study (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <list list-type="order">
          <list-item>
            <p>Data preparation: Data related to PGx were collected from DailyMed, DrugBank, and RxNorm.</p>
          </list-item>
          <list-item>
            <p>Data processing: Manual annotation for PGx entities and relationships were applied to drug labels in PDF/XML format from DailyMed. The BERT–CRF model were trained for entity recognition in this study. Data from DrugBank and RxNorm were also downloaded, parsed, and extracted for more drug attributes and relationships.</p>
          </list-item>
          <list-item>
            <p>Model construction: The PGx knowledge model was described in this aspect based on the entities and relationships extraction. Melanoma was also used as an example to verify the accuracy and validity of our model.</p>
          </list-item>
        </list>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>The framework of our study.</p>
          </caption>
          <graphic xlink:href="medinform_v8i10e20291_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Preparation</title>
        <p>Data related to PGx need to be collected and integrated in this study, which are currently stored in DrugBank, PharmGKB, Comparative Toxicogenomics Database (CTD), RxNorm, and other databases. Based on the pharmacogenomics knowledge model built in our study, we chose the following 3 data sources to accomplish data crawling and data preparation.</p>
        <sec>
          <title>DailyMed</title>
          <p>The text of drug labels was obtained from DailyMed, which is a free drug information resource [<xref ref-type="bibr" rid="ref36">36</xref>] provided by the US National Library of Medicine (NLM). It consists of digitized versions of drug labels as submitted to the US FDA. DailyMed was of special interest because of its comprehensive coverage, open availability, and the package inserts’ combination of format consistency and rich detail. Drug labels in DailyMed give a detailed description of drugs’ indications and usage, adverse reaction, and applicable population, especially the dosage, dose form, and dosage adjustment. We downloaded 4067 drug labels randomly for pretraining tasks and 190 drug labels in the table of PGx biomarkers for annotation tasks.</p>
        </sec>
        <sec>
          <title>DrugBank</title>
          <p>DrugBank is a unique bioinformatics and cheminformatics resource that combines detailed drug (ie, chemical, pharmacological, and pharmaceutical) data with comprehensive drug target (ie, sequence, structure, and pathway) information [<xref ref-type="bibr" rid="ref37">37</xref>] provided by the University of Alberta. The latest release of DrugBank (version 5.1.4, released July 2, 2019) was parsed in this paper for drug attributes such as drug name, description, chemical formula, molecular weight, drug approval status, and so on.</p>
        </sec>
        <sec>
          <title>RxNorm</title>
          <p>RxNorm [<xref ref-type="bibr" rid="ref38">38</xref>] provides a suite of standards for clinical drugs in the form of “Ingredient–Strength–Dose Form–Brand name,” and is designed by NLM for the electronic exchange of clinical health information. Several attributes and drug–drug interactions of precise medication were selected from RxNorm, such as daily dose, dose form, and frequency as attributes, and has_dose_form, dose_form_of as relationships.</p>
        </sec>
      </sec>
      <sec>
        <title>Annotation Task</title>
        <p>We recruited 3 annotators, all of whom had a medical training background and curation experience. Each drug label was annotated independently by 2 annotators (ie, double annotation). Differences were resolved by a third and senior annotator. Besides this, we measured agreement of relationship annotations using the <italic>F</italic> score to assess consistency.</p>
        <p>Because all 190 drug labels in the FDA table of PGx biomarkers [<xref ref-type="bibr" rid="ref4">4</xref>] are in PDF format, the annotator needed to convert all of them into an editable format such as .txt (Notepad or other word processors) or .doc/.docx (Microsoft Word) before annotation.</p>
        <p>The main tasks involved in the annotation stage were the recognition of semantic types and semantic relationships from drug labels sections, including “Indications and Usage,” “Dosage and Administration,” “Use in Specific Populations,” “Warnings and Precautions,” and “Adverse Reactions.” For semantic types, different highlighted colors represented different entities according to the frame of the PGx knowledge model. In this work, drug was annotated in yellow, gene was annotated in red, disease was annotated in gray, dosage and dose form were annotated in green, adverse reaction was annotated in purple, and population was annotated in blue. For semantic relationships, the more important and difficult section, annotators read the drug labels and recorded the relation descriptions between diseases and drugs, diseases and genes, diseases and diseases, drugs and genes, drugs and drugs, and drugs and dosage manually. This formed the basis of relationship definition in the follow-up work. Before annotation, we also indicated the annotation guidelines, see in <xref rid="figure2" ref-type="fig">Figure 2</xref>.</p>
        <p>An example of drug label annotation is shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>. Finally, all the annotated semantic types and relationships were recorded in a structured database designed in advance.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Annotation guidelines.</p>
          </caption>
          <graphic xlink:href="medinform_v8i10e20291_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Annotation example of MEKINIST.</p>
          </caption>
          <graphic xlink:href="medinform_v8i10e20291_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>BERT–CRF for NER</title>
        <p>After the annotation of entities, we applied the BERT–CRF model for NER. The CRF model and BERT–Bi-LSTM–CRF model were also trained in our study as a comparison.</p>
        <p>The BERT–CRF architecture was composed of 4 sections: the input layer, the pretraining model, the full connection layer, and the CRF layer, which assigns a tag to each word based on its context in the output (<xref rid="figure4" ref-type="fig">Figure 4</xref>). We feed a sentence to the architecture to obtain contextual BERT embedding for each word as {Tok<sub>1</sub>,...,Tok<sub>N</sub>} The context could be captured via many attention heads in each of its layers as well. These embeddings were then transported to a CRF layer to obtain the tag as {Tag<sub>1</sub>,...,Tag<sub>N</sub>} for each word block.</p>
        <p>The BERT-Base Multilingual, which has 110M parameters, was used in this NER task. We set the training batch size to 32, the max_seq to 80, and the learning rate to 0.00001. A total of 10 epochs were trained in each iteration to ensure model convergence. Other parameters related to BERT are set to default values. The dropout rate was set to 0.9 in fully connected layers to prevent over fitting. The transfer matrix in CRF is also left for the model to learn. The transfer matrix in the CRF layer was learned by the model itself. Importantly, the Bi-LSTM layer was added in this architecture before feeding the tweet-level representation into the CRF layer, to compare the performance between BERT–CRF with Bi-LSTM and without Bi-LSTM.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>BERT–CRF architecture. BERT: Bidirectional Encoder Representations from Transformers; CRF: Conditional Random Field.</p>
          </caption>
          <graphic xlink:href="medinform_v8i10e20291_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Model Representation</title>
        <p>We extended the semantic types of our model from 3 common types of drug, gene, and disease to 5 types: drug, gene (gene name, gene mutation), disease (disease name, position, etc), precise medication (population, daily dose, dose form, frequency, take time for, take with a meal or not, etc), and adverse reaction.</p>
        <p>All the semantic types and attributes covered in pharmacogenomics knowledge model are shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <p>The entities model in pharmacogenomics knowledge model was defined and EID represented the unique identifier for entities</p>
        <p>     Entity={EID*,TERM*,Source,SEMANTICType*} (1)</p>
        <p>The relationships model in pharmacogenomics knowledge model was defined and RID represented the unique identifier for relationships</p>
        <p>     Relation={RID*,Relationship*,Domain*,</p>
        <p>          Range*,Definition,TreeNumber*} (2)</p>
        <p>The whole pharmacogenomics knowledge model can be represented as the risk factors of precision medication for cancers. In this model, disease (C, especially for cancer in this paper) is usually caused by gene mutations (G), which decided the target drug (Dr) for treatment.</p>
        <p>     Dr = F(C,G) (3)</p>
        <p>During treatment, routine dosage/dose form (Ds) has been already offered by the FDA drug labels. However, it differs when the patient has an adverse reaction (A) or the disease occurs in special groups (P) such as pregnancy, lactation, pediatric, geriatric. Assuming that the 4 factors are independent in some cases, each factor can effect dosage/dose form separately.</p>
        <p>     Ds = F(Dr,G,A,P) (4)</p>
        <p>Above all, gene mutation, disease, adverse reaction, and patient populations are the risk factors in pharmacogenomics knowledge model of drugs to be used, and suitable dosage and dose form especially.</p>
        <p>     Dr, Ds=F(C,G,A,P) (5)</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Semantic types and attributes in the knowledge model.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="850"/>
            <thead>
              <tr valign="top">
                <td>Semantic Type</td>
                <td>Entity/Attribute</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Drug</td>
                <td> Drug Name, Description, Chemical Formula, Molecular Weight, Drug Approval Status, CAS<sup>a</sup>, UNII<sup>b</sup>, Pharmacology Indication</td>
              </tr>
              <tr valign="top">
                <td>Gene</td>
                <td>Gene name, Mutation</td>
              </tr>
              <tr valign="top">
                <td>Disease</td>
                <td>Disease Name, Position</td>
              </tr>
              <tr valign="top">
                <td>Adverse Reaction</td>
                <td>N/A<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>Population</td>
                <td>Pediatric Use Population, Applicable Population, Gender, Age, Race</td>
              </tr>
              <tr valign="top">
                <td>Drug Use</td>
                <td>Daily dose, Dose form, Frequency, Take time for, Take with a meal or not, etc</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>CAS: Chemical Abstracts Service Number.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>UNII: Unique Ingredient Identifier.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>N/A: not available.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Set Overview</title>
        <p>In this paper, we have collected 4067 drug labels in XML format downloaded from DailyMed as pretraining data for the BERT–CRF architecture, and 190 drug labels after annotation for model representation in which 90% (n=171) form the training set and 10% (n=19) form the test set, randomly assigned. Statistics-annotated corpus are presented in <xref ref-type="table" rid="table2">Table 2</xref>. Besides, the number of unique unigrams were 2216 in the training set and 829 in the test set; the number of unique bigrams were 120,705 in the training set and 18,851 in the test set.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Number of entities in training and test sets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="350"/>
            <col width="350"/>
            <thead>
              <tr valign="top">
                <td>Entity</td>
                <td>Number of entities in the training set</td>
                <td>Number of entities in the test set</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Drug</td>
                <td>76</td>
                <td>31</td>
              </tr>
              <tr valign="top">
                <td>Gene</td>
                <td>60</td>
                <td>26</td>
              </tr>
              <tr valign="top">
                <td>Disease</td>
                <td>94</td>
                <td>33</td>
              </tr>
              <tr valign="top">
                <td>Body_Part</td>
                <td>23</td>
                <td>7</td>
              </tr>
              <tr valign="top">
                <td>Daily_Dose</td>
                <td>99</td>
                <td>27</td>
              </tr>
              <tr valign="top">
                <td>Dose_Form</td>
                <td>16</td>
                <td>8</td>
              </tr>
              <tr valign="top">
                <td>Frequency</td>
                <td>32</td>
                <td>12</td>
              </tr>
              <tr valign="top">
                <td>Adverse_Reaction</td>
                <td>372</td>
                <td>77</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Performance of Named Entity Recognition</title>
        <p>Three basic models are compared, with the specific results shown in <xref ref-type="table" rid="table3">Table 3</xref> in which minor averaging for the F1 score was used. The BERT–CRF model achieved better performance than the other 2 models in this task. In some recent studies, the full connectivity layer was done by the Bi-LSTM layer, which ultimately resulted in the BERT–Bi-LSTM–CRF model. However, the BERT–Bi-LSTM–CRF model presented a more complex structure and slower training speed than BERT–CRF. Besides this, there was a little difference of 2% between these 2 models, so BERT–CRF was selected in our study. The BERT–CRF model showed a high F1 score in drug, dose form, and body part, but a low F1 score in daily dose and disease, shown in <xref ref-type="table" rid="table4">Table 4</xref>. However, these performances were only for the PGx corpus built semiautomatically in this work, and the 3 basic models may present different results in other studies with large-scale corpora.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Performance of the models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td>F1 (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>CRF<sup>a</sup></td>
                <td>88.03</td>
                <td>73.57</td>
                <td>80.16</td>
              </tr>
              <tr valign="top">
                <td>BERT–CRF<sup>b</sup></td>
                <td>85.12</td>
                <td>85.12</td>
                <td>85.12</td>
              </tr>
              <tr valign="top">
                <td>BERT–Bi-LSTM–CRF<sup>c</sup></td>
                <td>85.22</td>
                <td>81.00</td>
                <td>83.05</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>CRF: Conditional Random Field.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>BERT: Bidirectional Encoder Representations from Transformers</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>Bi-LSTM: Bidirectional Long Short-Term Memory.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Performance of the semantic type.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td rowspan="2">Semantic type</td>
                <td colspan="3">F1</td>
              </tr>
              <tr valign="bottom">
                <td>CRF<sup>a</sup> (%)</td>
                <td>BERT–Bi-LSTM–CRF<sup>b,c</sup> (%)</td>
                <td>BERT–CRF (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Drug</td>
                <td>94.12</td>
                <td>94.12</td>
                <td>100.00</td>
              </tr>
              <tr valign="top">
                <td>Gene</td>
                <td>66.67</td>
                <td>80.00</td>
                <td>71.43</td>
              </tr>
              <tr valign="top">
                <td>Disease</td>
                <td>61.54</td>
                <td>66.67</td>
                <td>57.14</td>
              </tr>
              <tr valign="top">
                <td>Body_Part</td>
                <td>57.14</td>
                <td>57.15</td>
                <td>85.71</td>
              </tr>
              <tr valign="top">
                <td>Daily_Dose</td>
                <td>31.58</td>
                <td>31.58</td>
                <td>42.11</td>
              </tr>
              <tr valign="top">
                <td>Dose_Form</td>
                <td>100.00</td>
                <td>100.00</td>
                <td>100.00</td>
              </tr>
              <tr valign="top">
                <td>Frequency</td>
                <td>62.50</td>
                <td>75.00</td>
                <td>75.00</td>
              </tr>
              <tr valign="top">
                <td>Adverse Reaction</td>
                <td>68.15</td>
                <td>79.00</td>
                <td>73.74</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>CRF: Conditional Random Field.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>BERT: Bidirectional Encoder Representations from Transformers</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>Bi-LSTM: Bidirectional Long Short-Term Memory.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Semantic Relationships Extraction</title>
        <p>Because this study required a high accuracy of relationship extraction, we adopted a manual method in this task. Descriptions of semantic relationships were normalized at the same time during annotation, such as “in combination with” = “synergized by,” “recommended dosage” = “routine dosage.” The normalized descriptions are presented in <xref ref-type="table" rid="table5">Table 5</xref>. The other expressions in drug labels were stored as synonyms in our study at the same time. In order to make the pharmacogenomics knowledge model be more portable, several semantic relationships were extended, such as “is biomarker-efficacy of,” “is biomarker-prognosis of.”</p>
        <p>In the end, 26 kinds of semantic relationships were extracted, and the consistency of the entity relationship annotation was 78.55%. Among them, there were 14 first-level semantic relationships and 12 second-level semantic relationships. Each kind of semantic relationships has been defined in detail, as shown in the accessory document.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Examples of semantic relationship–normalized description.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="850"/>
            <thead>
              <tr valign="top">
                <td>Normalized description</td>
                <td>Expressions in drug labels</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Treats</td>
                <td>for the prevention of, for relief of the signs and symptoms, for the treatment of, for the prevention of, as monotherapy of</td>
              </tr>
              <tr valign="top">
                <td>Synergized by</td>
                <td>in combination with, coadministered with</td>
              </tr>
              <tr valign="top">
                <td>Antagonized by</td>
                <td>avoid concurrent administration of, avoid concomitant use of</td>
              </tr>
              <tr valign="top">
                <td>Have dosage</td>
                <td>total daily doses, recommended dosage</td>
              </tr>
              <tr valign="top">
                <td>Have mutation</td>
                <td>with *** mutation, the presence of *** mutation, be homozygous for</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Pharmacogenomics Knowledge Model</title>
        <p>Based on the entity recognition and relationship definitions mentioned above, the pharmacogenomics knowledge model is presented as <xref rid="figure5" ref-type="fig">Figure 5</xref>.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Overview of pharmacogenomics knowledge model.</p>
          </caption>
          <graphic xlink:href="medinform_v8i10e20291_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>The Case of Melanoma</title>
        <p>Melanoma is a malignant neoplasm derived from cells that are capable of forming melanin, which may occur in the skin of any part of body. It frequently metastasizes widely, and the regional lymph nodes, liver, lungs, and brain are likely to be involved. The incidence of malignant skin melanomas is rising rapidly in all parts of the world. Therefore, melanoma, which is caused by <italic>BRAF</italic> gene mutation, was taken as an example to verify our model.</p>
        <p>Seven drugs were included in the cases: binimetinib, cobimetinib, dabrafenib, encorafenib, nivolumab, trametinib, and vemurafenib. Most were newly indicated for the treatment of unresectable or metastatic melanoma with <italic>BRAF</italic> V600E or V600K mutations, as detected by FDA-approved tests in 2018. Among them, dabrafenib, encorafenib, and vemurafenib are targeted drugs for <italic>BRAF</italic> gene mutations.</p>
        <p>By researching the 7 drugs, 4846 triples were established in the pharmacogenomics knowledge model of melanoma, among them 4713 triples were drug–drug relationships, 41 were drug–adverse reaction, 30 were drug–dosage, 24 were adverse reaction–dosage, 22 were drug–disease, 7 were drug–gene, 4 were drug–population, 2 were gene–mutation, and 3 were gene–disease. An example of data visualization of trametinib can be seen in <xref rid="figure6" ref-type="fig">Figure 6</xref>. Relationships can be displayed when the mouse hovers over the joint(s).</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>An example of pharmacogenomics knowledge model data visualization.</p>
          </caption>
          <graphic xlink:href="medinform_v8i10e20291_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Set Access</title>
        <p>We provided a user-friendly interface [<xref ref-type="bibr" rid="ref39">39</xref>] that enables users to access the pharmacogenomics knowledge model data set (<xref rid="figure7" ref-type="fig">Figure 7</xref>). In the “Home” page, users can learn basic information and purpose of this knowledge model. On “The Case of Melanoma” page, users can obtain all the triples in melanoma cases and browse the triples by different groups of relationships. Visualization of the triples are presented as well. On the “Download” page, users can download the melanoma data set, drug attribute data set, and annotated data set in Microsoft Excel format, as well as the relationships and definition document in Microsoft Word format for the user’s convenience.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>User interface of pharmacogenomics knowledge model data set.</p>
          </caption>
          <graphic xlink:href="medinform_v8i10e20291_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Potential Relationships in Pharmacogenomics Knowledge Model</title>
        <p>The pharmacogenomics knowledge model constructed in this paper reveals hidden relationships between drug, gene, disease, precise medication, and adverse reaction. Trametinib is used as an example, which is a kinase inhibitor indicated as a single agent for the treatment of BRAF-inhibitor treatment-naïve patients with unresectable or metastatic melanoma with <italic>BRAF</italic> V600E or V600K mutations as detected by an FDA-approved test. The recommended dosage is 2 mg orally once daily, and should be taken at least 1 hour before or at least 2 hours after a meal. However, we recognized from pharmacogenomics knowledge model that more careful attention should be paid to dosing schedules, when medication experience changes or other side effects occur. That is to say, trametinib needs to be stopped permanently in case of fever or interstitial lung disease, taken 1-2 hours before meals in case of metastatic thyroid cancer, and once a day in case of liver injury.</p>
      </sec>
      <sec>
        <title>Comparison With Relevant Data Sources</title>
        <p>The pharmacogenomics knowledge model included 9 groups of PGx relationships in this model, which can present more potential information than other relevant data sources such as DrugBank, PharmGKB, CTD, and RxNorm, as shown in <xref ref-type="table" rid="table6">Table 6</xref>.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Comparison between pharmacogenomics data sources.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="367"/>
            <col width="134"/>
            <col width="171"/>
            <col width="81"/>
            <col width="134"/>
            <col width="113"/>
            <thead>
              <tr valign="top">
                <td>Relationships</td>
                <td>DrugBank</td>
                <td>PharmGKB<sup>d</sup></td>
                <td>CTD<sup>e</sup></td>
                <td>RxNorm<sup>f</sup></td>
                <td>PGxKM<sup>g</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Drug–Gene</td>
                <td>√<sup>a</sup></td>
                <td>√</td>
                <td>√</td>
                <td>—</td>
                <td>√</td>
              </tr>
              <tr valign="top">
                <td>Drug–Drug</td>
                <td>√</td>
                <td>√<bold>*</bold><sup>b</sup></td>
                <td>—</td>
                <td>—</td>
                <td>√</td>
              </tr>
              <tr valign="top">
                <td>Gene–Disease</td>
                <td>—<sup>c</sup></td>
                <td>
                  <break/>
                </td>
                <td>√</td>
                <td>—</td>
                <td>√</td>
              </tr>
              <tr valign="top">
                <td>Gene–Mutation</td>
                <td>—</td>
                <td>√</td>
                <td>—</td>
                <td>—</td>
                <td>√</td>
              </tr>
              <tr valign="top">
                <td>Drug–Disease</td>
                <td>√</td>
                <td>√<bold>*</bold></td>
                <td>√</td>
                <td>—</td>
                <td>√</td>
              </tr>
              <tr valign="top">
                <td>Drug–Adverse Reaction</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>√</td>
              </tr>
              <tr valign="top">
                <td>Drug–Dosage</td>
                <td>√</td>
                <td>—</td>
                <td>—</td>
                <td>√</td>
                <td>√</td>
              </tr>
              <tr valign="top">
                <td> Drug–Population</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>√</td>
              </tr>
              <tr valign="top">
                <td>Adverse Reaction–Dosage</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>√</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>Have structured data and can be downloaded in the web set.</p>
            </fn>
            <fn id="table6fn2">
              <p><sup>b</sup>Have information (unstructured data) for such relationships in the web set.</p>
            </fn>
            <fn id="table6fn3">
              <p><sup>c</sup>Have no information for such relationships in the web set.</p>
            </fn>
            <fn id="table6fn4">
              <p><sup>d</sup>PharmGKB: Pharmacogenomics Knowledge Base.</p>
            </fn>
            <fn id="table6fn5">
              <p><sup>e</sup>CTD: Comparative Toxicogenomics Database.</p>
            </fn>
            <fn id="table6fn6">
              <p><sup>f</sup>RxNorm: drug data interaction standard in American Clinical Information System</p>
            </fn>
            <fn id="table6fn7">
              <p><sup>g</sup>PGxKM: pharmacogenomics knowledge model.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Limitations and Future Studies</title>
        <p>However, there are still some limitations in our study. First, this study aimed to build a pharmacogenomics knowledge model and semiautomatically annotate the corpus using the existing NLP tools. However, we did not validate the feasibility of NLP tools or compare the NLP performance using a benchmark data set, such as clinical records from the Third i2b2 Workshop on NLP Challenges [<xref ref-type="bibr" rid="ref40">40</xref>] or LabeledIn [<xref ref-type="bibr" rid="ref41">41</xref>], of labeled indications for human drugs. Our future research will explore BERT–CRF model verification on other standard drug corporas. Second, relation extraction was manually done by the 3 annotators which will place restrictions on the application of pharmacogenomics knowledge model, and an evaluation of automatic relation extraction will be conducted in the future. Common relation extraction methods such as CNN, LSTM, and BERT method will be used to improve extraction efficiency.</p>
        <p>In future studies, we also plan to do the following jobs to improve our research. First, a series of other antitumor drugs will be taken into consideration to fill up our framework, such as ceritinib and afatinib for non–small-cell lung cancer. Second, linked data can also be extended to other sources, such as CTD, PharmGKB, and DisGeNET. We hope that this knowledge model for PGx interactions could serve as a framework and a resource for future drug research and development.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>A pharmacogenomics knowledge model was constructed for precision medication in our research, which reflected the multidimensional relationships between drug, gene, disease, as well as relationships from gene to drug to dosage or frequency associations. Extraction task for PGx entities has been done using the BERT–CRF model with F1 score of 85.12%. Our pharmacogenomics knowledge model contained 5 semantic types (drug, gene, disease, precise medication, and adverse reaction) and 26 semantic relationships had been defined in detail. Using melanoma caused by <italic>BRAF</italic> gene mutation as an example, we verified the feasibility of this model using the FDA’s drug labels and relevant linked data. Finally, we highlighted this knowledge model as a scalable framework for clinicians and clinical pharmacists to adjust drug dosage according to patient-specific genetic variation, and to support pharmaceutical researchers during new drug discoveries.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ATC</term>
          <def>
            <p>Anaplastic thyroid cancer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">Bi-LSTM</term>
          <def>
            <p>bidirectional long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CRF</term>
          <def>
            <p>conditional random field</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CTD</term>
          <def>
            <p>the Comparative Toxicogenomics Database</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">FDA</term>
          <def>
            <p>the US Food and Drug Administration</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NLM</term>
          <def>
            <p>the US National Library of Medicine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">PGx</term>
          <def>
            <p>pharmacogenomics</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">PharmGKB</term>
          <def>
            <p>Pharmacogenomics Knowledge Base</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work is supported by the Special Research Fund for Central Universities-Peking Union Medical College (Grant No. 3332020049), the National Key Research and Development Program of China (Grant No. 2016YFC0901901), the National Natural Science Foundation of China (Grant No. 81601573), National Engineering Laboratory for Internet Medical Systems and Applications (Grant No. NELIMSA2018P02), the Key Laboratory of Knowledge Technology for Medical Integrative Publishing of China, the program of China Knowledge Center for Engineering Sciences and Technology (Medical Knowledge Service System; Grant No. CKCEST-2019-1-10).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>HK designed the model, performed the experiments, and wrote this paper. The study was originally conceived of by JL, who also improved the experiments and made modifications to this paper. HK, MW, and LS designed the annotation framework, made the rules of annotation, and analyzed the results. LH guided the study and made modifications to this paper. All the authors wrote and revised the manuscript, and all the authors have read and approved the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Prasad</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Role of regulatory agencies in translating pharmacogenetics to the clinics</article-title>
          <source>Clin Cases Miner Bone Metab</source>
          <year>2009</year>
          <month>01</month>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>29</fpage>
          <lpage>34</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22461095"/>
          </comment>
          <pub-id pub-id-type="medline">22461095</pub-id>
          <pub-id pub-id-type="pmcid">PMC2781218</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wheeler</surname>
              <given-names>HE</given-names>
            </name>
            <name name-style="western">
              <surname>Maitland</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Dolan</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Cox</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ratain</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Cancer pharmacogenomics: strategies and challenges</article-title>
          <source>Nat Rev Genet</source>
          <year>2012</year>
          <month>11</month>
          <day>27</day>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>23</fpage>
          <lpage>34</lpage>
          <pub-id pub-id-type="doi">10.1038/nrg3352</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>Clinical Pharmacogenomics: Opportunities and Challenges at Point of Care</article-title>
          <source>Clin Pharmacol Ther</source>
          <year>2012</year>
          <month>12</month>
          <day>05</day>
          <volume>93</volume>
          <issue>1</issue>
          <fpage>33</fpage>
          <lpage>35</lpage>
          <pub-id pub-id-type="doi">10.1038/clpt.2012.196</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <source>Table of Pharmacogenomic Biomarkers in Drug Labeling</source>
          <access-date>2020-04-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.fda.gov/drugs/science-research-drugs/table-pharmacogenomic-biomarkers-drug-labeling">https://www.fda.gov/drugs/science-research-drugs/table-pharmacogenomic-biomarkers-drug-labeling</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hida</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nokihara</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kondo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>YH</given-names>
            </name>
            <name name-style="western">
              <surname>Azuma</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Seto</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Takiguchi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nishio</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yoshioka</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Imamura</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hotta</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Watanabe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Goto</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Satouchi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kozuki</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Shukuya</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nakagawa</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mitsudomi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yamamoto</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Asakawa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Asabe</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tanaka</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tamura</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Alectinib versus crizotinib in patients with ALK -positive non-small-cell lung cancer (J-ALEX): an open-label, randomised phase 3 trial</article-title>
          <source>The Lancet</source>
          <year>2017</year>
          <month>07</month>
          <volume>390</volume>
          <issue>10089</issue>
          <fpage>29</fpage>
          <lpage>39</lpage>
          <pub-id pub-id-type="doi">10.1016/s0140-6736(17)30565-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gay</surname>
              <given-names>ND</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Beadling</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Warrick</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Neff</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Corless</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Tolba</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Durable Response to Afatinib in Lung Adenocarcinoma Harboring NRG1 Gene Fusions</article-title>
          <source>Journal of Thoracic Oncology</source>
          <year>2017</year>
          <month>08</month>
          <volume>12</volume>
          <issue>8</issue>
          <fpage>e107</fpage>
          <lpage>e110</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jtho.2017.04.025</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Recognizing names in biomedical texts: a machine learning approach</article-title>
          <source>Bioinformatics</source>
          <year>2004</year>
          <month>05</month>
          <day>01</day>
          <volume>20</volume>
          <issue>7</issue>
          <fpage>1178</fpage>
          <lpage>90</lpage>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/bth060</pub-id>
          <pub-id pub-id-type="medline">14871877</pub-id>
          <pub-id pub-id-type="pii">bth060</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lafferty</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mccallum</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pereira</surname>
              <given-names>FCN</given-names>
            </name>
          </person-group>
          <article-title>Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data</article-title>
          <year>2001</year>
          <month>06</month>
          <conf-name>18th International Conference on Machine Learning</conf-name>
          <conf-date>June 28 to July 1</conf-date>
          <conf-loc>San Francisco</conf-loc>
          <fpage>282</fpage>
          <lpage>289</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Collobert</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bottou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Karlen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kavukcuoglu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kuksa</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Natural Language Processing (Almost) from Scratch</article-title>
          <source>Journal of Machine Learning Research</source>
          <year>2011</year>
          <volume>12</volume>
          <fpage>2493</fpage>
          <lpage>2537</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jmlr.org/papers/volume12/collobert11a/collobert11a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sak</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Senior</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Beaufays</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <source>Long short-term memory recurrent neural network architectures for large scale acoustic modeling</source>
          <year>2014</year>
          <month>2</month>
          <day>5</day>
          <access-date>2020-04-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43905.pdf">https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43905.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schuster</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Paliwal</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Bidirectional recurrent neural networks</article-title>
          <source>IEEE Trans. Signal Process</source>
          <year>1997</year>
          <volume>45</volume>
          <issue>11</issue>
          <fpage>2673</fpage>
          <lpage>2681</lpage>
          <pub-id pub-id-type="doi">10.1109/78.650093</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <source>BERT: pre-training of deep bidirectional transformers for language understanding</source>
          <year>2018</year>
          <month>10</month>
          <day>11</day>
          <access-date>2020-04-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/N19-1423.pdf">https://www.aclweb.org/anthology/N19-1423.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Souza</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Nogueira</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lotufo</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Portuguese named entity recognition using BERT-CRF</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <month>09</month>
          <day>23</day>
          <access-date>2020-04-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arXiv.org/abs/1909.10649">http://arXiv.org/abs/1909.10649</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Awasthy</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ni</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Towards lingua Franca named entity recognition with BERT</source>
          <year>2019</year>
          <month>11</month>
          <day>19</day>
          <access-date>2020-04-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arXiv.org/abs/1912.01389">http://arXiv.org/abs/1912.01389</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Rawat</surname>
              <given-names>BPS</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Fine-Tuning Bidirectional Encoder Representations From Transformers (BERT)-Based Models on Large-Scale Electronic Health Record Notes: An Empirical Study</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>09</month>
          <day>12</day>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>e14830</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/3/e14830/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/14830</pub-id>
          <pub-id pub-id-type="medline">31516126</pub-id>
          <pub-id pub-id-type="pii">v7i3e14830</pub-id>
          <pub-id pub-id-type="pmcid">PMC6746103</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>Bert-based ranking for biomedical entity normalization</source>
          <year>2019</year>
          <month>08</month>
          <access-date>2020-04-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1908.03548">https://arxiv.org/abs/1908.03548</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</source>
          <year>2019</year>
          <month>01</month>
          <day>25</day>
          <access-date>2020-04-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1901.08746">https://arxiv.org/abs/1901.08746</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Altosaar</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ranganath</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>ClinicalBERT: modeling clinical notes and predicting hospital readmission</source>
          <year>2019</year>
          <month>04</month>
          <access-date>2020-04-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1904.05342">https://arxiv.org/abs/1904.05342</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Percha</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>RB</given-names>
            </name>
          </person-group>
          <article-title>A global network of biomedical relationships derived from text</article-title>
          <source>Bioinformatics</source>
          <year>2018</year>
          <month>08</month>
          <day>01</day>
          <volume>34</volume>
          <issue>15</issue>
          <fpage>2614</fpage>
          <lpage>2624</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29490008"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/bty114</pub-id>
          <pub-id pub-id-type="medline">29490008</pub-id>
          <pub-id pub-id-type="pii">4911883</pub-id>
          <pub-id pub-id-type="pmcid">PMC6061699</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <article-title>Ontology Development 101: A guide to creating your first ontology</article-title>
          <source>CiteSeerX</source>
          <access-date>2020-04-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bit.ly/3j6mM5H"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Küçük-McGinty</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Vidovic</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Forlin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Koleti</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Guha</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mathias</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Ursu</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Stathias</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nabizadeh</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mader</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Visser</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bologa</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Oprea</surname>
              <given-names>TI</given-names>
            </name>
            <name name-style="western">
              <surname>Schürer</surname>
              <given-names>Stephan C</given-names>
            </name>
          </person-group>
          <article-title>Drug target ontology to classify and integrate drug discovery data</article-title>
          <source>J Biomed Semantics</source>
          <year>2017</year>
          <month>11</month>
          <day>09</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>50</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-017-0161-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13326-017-0161-x</pub-id>
          <pub-id pub-id-type="medline">29122012</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13326-017-0161-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC5679337</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dumontier</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Villanueva-Rosales</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Towards pharmacogenomics knowledge discovery with the semantic web</article-title>
          <source>Brief Bioinform</source>
          <year>2009</year>
          <month>03</month>
          <volume>10</volume>
          <issue>2</issue>
          <fpage>153</fpage>
          <lpage>63</lpage>
          <pub-id pub-id-type="doi">10.1093/bib/bbn056</pub-id>
          <pub-id pub-id-type="medline">19240125</pub-id>
          <pub-id pub-id-type="pii">bbn056</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <source>Introducing the Knowledge Graph: Things, Not Strings</source>
          <access-date>2020-04-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://googleblog.blogspot.be/2012/05/introducing-knowledge-graph-things-not.html">http://googleblog.blogspot.be/2012/05/introducing-knowledge-graph-things-not.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Safe medicine recommendation via medical knowledge graph embedding</source>
          <year>2017</year>
          <month>10</month>
          <day>16</day>
          <access-date>2020-04-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arXiv.org/abs/1710.05980">http://arXiv.org/abs/1710.05980</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ernst</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Siu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Weikum</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>KnowLife: a versatile approach for constructing a large knowledge graph for biomedical sciences</article-title>
          <source>BMC Bioinformatics</source>
          <year>2015</year>
          <month>05</month>
          <day>14</day>
          <volume>16</volume>
          <fpage>157</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-015-0549-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12859-015-0549-5</pub-id>
          <pub-id pub-id-type="medline">25971816</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12859-015-0549-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC4448285</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>An automatic approach for constructing a knowledge base of symptoms in Chinese</article-title>
          <source>J Biomed Semantics</source>
          <year>2017</year>
          <month>09</month>
          <day>20</day>
          <volume>8</volume>
          <issue>Suppl 1</issue>
          <fpage>33</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-017-0145-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13326-017-0145-x</pub-id>
          <pub-id pub-id-type="medline">29297414</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13326-017-0145-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC5763289</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roider</surname>
              <given-names>HG</given-names>
            </name>
            <name name-style="western">
              <surname>Pavlova</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kirov</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Slavov</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Slavov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Uzunov</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Drug2Gene: an exhaustive resource to explore effectively the drug-target relation network</article-title>
          <source>BMC Bioinformatics</source>
          <year>2014</year>
          <month>03</month>
          <day>11</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>68</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-68"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-15-68</pub-id>
          <pub-id pub-id-type="medline">24618344</pub-id>
          <pub-id pub-id-type="pii">1471-2105-15-68</pub-id>
          <pub-id pub-id-type="pmcid">PMC4234465</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>DTome: a web-based tool for drug-target interactome construction</article-title>
          <source>BMC Bioinformatics</source>
          <year>2012</year>
          <month>06</month>
          <day>11</day>
          <volume>13 Suppl 9</volume>
          <fpage>S7</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-13-S9-S7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-13-S9-S7</pub-id>
          <pub-id pub-id-type="medline">22901092</pub-id>
          <pub-id pub-id-type="pii">1471-2105-13-S9-S7</pub-id>
          <pub-id pub-id-type="pmcid">PMC3372450</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Leveraging Biomedical Resources in Bi-LSTM for Drug-Drug Interaction Extraction</article-title>
          <source>IEEE Access</source>
          <year>2018</year>
          <month>06</month>
          <volume>6</volume>
          <fpage>33432</fpage>
          <lpage>33439</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2018.2845840</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coulet</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Smaïl-Tabbone</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Napoli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Devignes</surname>
              <given-names>M-D</given-names>
            </name>
          </person-group>
          <article-title>Ontology-based knowledge discovery in pharmacogenomics</article-title>
          <source>Adv Exp Med Biol</source>
          <year>2011</year>
          <volume>696</volume>
          <fpage>357</fpage>
          <lpage>66</lpage>
          <pub-id pub-id-type="doi">10.1007/978-1-4419-7046-6_36</pub-id>
          <pub-id pub-id-type="medline">21431576</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dalleau</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Marzougui</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Da Silva</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ringot</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ndiaye</surname>
              <given-names>NC</given-names>
            </name>
            <name name-style="western">
              <surname>Coulet</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Learning from biomedical linked data to suggest valid pharmacogenes</article-title>
          <source>J Biomed Semantics</source>
          <year>2017</year>
          <month>04</month>
          <day>20</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>16</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-017-0125-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13326-017-0125-1</pub-id>
          <pub-id pub-id-type="medline">28427468</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13326-017-0125-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5399403</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <source>DisGNET</source>
          <access-date>2020-09-08</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.disgenet.org/">https://www.disgenet.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <source>ClinVar</source>
          <access-date>2020-09-08</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.clinicalgenome.org/data-sharing/clinvar/">https://www.clinicalgenome.org/data-sharing/clinvar/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>An analysis of disease-gene relationship from Medline abstracts by DigSee</article-title>
          <source>Sci Rep</source>
          <year>2017</year>
          <month>01</month>
          <day>05</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>40154</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/srep40154"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/srep40154</pub-id>
          <pub-id pub-id-type="medline">28054646</pub-id>
          <pub-id pub-id-type="pii">srep40154</pub-id>
          <pub-id pub-id-type="pmcid">PMC5215527</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>DigChem: Identification of disease-gene-chemical relationships from Medline abstracts</article-title>
          <source>PLoS Comput Biol</source>
          <year>2019</year>
          <month>05</month>
          <day>15</day>
          <volume>15</volume>
          <issue>5</issue>
          <fpage>e1007022</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pcbi.1007022"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pcbi.1007022</pub-id>
          <pub-id pub-id-type="medline">31091224</pub-id>
          <pub-id pub-id-type="pii">PCOMPBIOL-D-18-01377</pub-id>
          <pub-id pub-id-type="pmcid">PMC6519793</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <source>About DaliyMed</source>
          <access-date>2020-04-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dailymed.nlm.nih.gov/dailymed/about-dailymed.cfm">https://dailymed.nlm.nih.gov/dailymed/about-dailymed.cfm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <source>About DrugBank</source>
          <access-date>2020-04-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.drugbank.ca/about">https://www.drugbank.ca/about</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <collab>Wei Ma</collab>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ganesan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>RxNorm: prescription for electronic drug information exchange</article-title>
          <source>IT Prof</source>
          <year>2005</year>
          <month>09</month>
          <volume>7</volume>
          <issue>5</issue>
          <fpage>17</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1109/MITP.2005.122</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <source>Pharmacogenomics knowledge model</source>
          <access-date>2020-04-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.phoc.org.cn/PGxKM/">http://www.phoc.org.cn/PGxKM/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Solti</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Cadag</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Extracting medication information from clinical text</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <month>09</month>
          <day>01</day>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>514</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20819854"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2010.003947</pub-id>
          <pub-id pub-id-type="medline">20819854</pub-id>
          <pub-id pub-id-type="pii">17/5/514</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995677</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khare</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>LabeledIn: cataloging labeled indications for human drugs</article-title>
          <source>J Biomed Inform</source>
          <year>2014</year>
          <month>12</month>
          <volume>52</volume>
          <fpage>448</fpage>
          <lpage>56</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(14)00185-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2014.08.004</pub-id>
          <pub-id pub-id-type="medline">25220766</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(14)00185-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC4260997</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
