<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i6e28247</article-id>
      <article-id pub-id-type="pmid">34142969</article-id>
      <article-id pub-id-type="doi">10.2196/28247</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>A Novel Metric to Quantify the Effect of Pathway Enrichment Evaluation With Respect to Biomedical Text-Mined Terms: Development and Feasibility Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Hao</surname>
            <given-names>Tianyong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hu</surname>
            <given-names>Baotian</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Heng</surname>
            <given-names>Weng</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Qin</surname>
            <given-names>Xuan</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4397-0328</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Yao</surname>
            <given-names>Xinzhi</given-names>
          </name>
          <degrees>BA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6795-2653</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Xia</surname>
            <given-names>Jingbo</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Hubei Key Lab of Agricultural Bioinformatics</institution>
            <institution>College of Informatics</institution>
            <institution>Huazhong Agricultural University</institution>
            <addr-line>1#, Lion Rock Street, Hongshan District</addr-line>
            <addr-line>Hubei Province</addr-line>
            <addr-line>Wuhan, 430070</addr-line>
            <country>China</country>
            <phone>86 02787288509</phone>
            <email>xiajingbo.math@gmail.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7285-588X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Hubei Key Lab of Agricultural Bioinformatics</institution>
        <institution>College of Informatics</institution>
        <institution>Huazhong Agricultural University</institution>
        <addr-line>Wuhan</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jingbo Xia <email>xiajingbo.math@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>6</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>18</day>
        <month>6</month>
        <year>2021</year>
      </pub-date>
      <volume>9</volume>
      <issue>6</issue>
      <elocation-id>e28247</elocation-id>
      <history>
        <date date-type="received">
          <day>25</day>
          <month>2</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>30</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>5</day>
          <month>4</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>19</day>
          <month>4</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Xuan Qin, Xinzhi Yao, Jingbo Xia. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 18.06.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2021/6/e28247" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Natural language processing has long been applied in various applications for biomedical knowledge inference and discovery. Enrichment analysis based on named entity recognition is a classic application for inferring enriched associations in terms of specific biomedical entities such as gene, chemical, and mutation.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to investigate the effect of pathway enrichment evaluation with respect to biomedical text-mining results and to develop a novel metric to quantify the effect.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Four biomedical text mining methods were selected to represent natural language processing methods on drug-related gene mining. Subsequently, a pathway enrichment experiment was performed by using the mined genes, and a series of inverse pathway frequency (IPF) metrics was proposed accordingly to evaluate the effect of pathway enrichment. Thereafter, 7 IPF metrics and traditional <italic>P</italic> value metrics were compared in simulation experiments to test the robustness of the proposed metrics.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>IPF metrics were evaluated in a case study of rapamycin-related gene set. By applying the best IPF metrics in a pathway enrichment simulation test, a novel discovery of drug efficacy of rapamycin for breast cancer was replicated from the data chosen prior to the year 2000. Our findings show the effectiveness of the best IPF metric in support of knowledge discovery in new drug use. Further, the mechanism underlying the drug-disease association was visualized by Cytoscape.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The results of this study suggest the effectiveness of the proposed IPF metrics in pathway enrichment evaluation as well as its application in drug use discovery.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>pathway enrichment</kwd>
        <kwd>metric</kwd>
        <kwd>evaluation</kwd>
        <kwd>text mining</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The rising health issues worldwide and outbreaks of drug resistance have drawn a great amount of attention to new drug development [<xref ref-type="bibr" rid="ref1">1</xref>]. However, drug development is expensive and time-consuming, and an average of US $800 million [<xref ref-type="bibr" rid="ref2">2</xref>] to US $1.8 billion [<xref ref-type="bibr" rid="ref3">3</xref>] and more than 10 years is invested in the development of 1 drug [<xref ref-type="bibr" rid="ref4">4</xref>]. Improving the efficiency of drug discovery has long been one of the most important research directions and goals of medical research. As per the data in the 2018 edition of the World Health Organization’s International Classification of Diseases and related health problems, there are 31,055 diseases [<xref ref-type="bibr" rid="ref5">5</xref>]. Direct drug-disease pairing validation will have 85,214,920 drug-disease treatment validations. This highlights the importance of understanding the mechanisms of disease pathology and the action mechanisms of the existing drugs. According to the data released by the US National Food and Drug Administration in 2018, 35,283 types of drugs and 2744 types of effective ingredients have been approved [<xref ref-type="bibr" rid="ref6">6</xref>]. Therefore, drug repositioning is recommended as a low-cost drug discovery method based on the clinical use of the drug, by which new indications of the marketed drug are discovered and an old drug is repurposed [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. The linking of drugs to diseases via enriched gene sets is the basis of the drug use strategy under pathway enrichment analysis, which has long been an investigative way to unveil the functional interpretation of known gene sets [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. The enrichment analysis mainly relies on the evaluation of the overexpressed gene set in a specific pathway, thereby leading to functional interpretation [<xref ref-type="bibr" rid="ref10">10</xref>]. Technically, for a given disease or drug, relevant pathway information is publicly available in pathway databases [<xref ref-type="bibr" rid="ref11">11</xref>]. For humans, the Kyoto Encyclopedia of Genes and Genomes (KEGG) database [<xref ref-type="bibr" rid="ref12">12</xref>] contains 38,680 <italic>Homo sapiens</italic> genes, and the abundance of data makes the correlation of disease-related genes or drug-related genes possible. In addition, there are multiple ways to identify a relevant gene set for a given disease [<xref ref-type="bibr" rid="ref13">13</xref>]. While genome-wide association studies [<xref ref-type="bibr" rid="ref14">14</xref>] or mRNA analysis [<xref ref-type="bibr" rid="ref15">15</xref>] is the typical method for drug-related knowledge discovery, biomedical natural language processing is an alternative [<xref ref-type="bibr" rid="ref16">16</xref>]. However, evaluating pathway enrichment in terms of a chosen gene set exclusively generated by a text mining system is still an unsolved issue [<xref ref-type="bibr" rid="ref17">17</xref>]. The text mining system extracts the drug-related genes from drug-related literature, and pathway enrichment is then subsequently performed upon the text-mined genes. Although it is believed that text mining takes advantage of the abundant information from text resources [<xref ref-type="bibr" rid="ref18">18</xref>], the diversity rooted from the various text mining systems leads to diversified results and effects in subsequent pathway enrichment. As representatives of the text mining system, PubTator [<xref ref-type="bibr" rid="ref19">19</xref>] in a co-occurrence manner and the Turku Event Extraction System (TEES) [<xref ref-type="bibr" rid="ref20">20</xref>] in a more semantic and syntactic manner play an important role in the biomedical named entity recognition and pathway enrichment.</p>
      <p>The framework of this study was as follows. First, we used various biomedical text mining strategies to investigate the drug-related gene sets. Second, we designed novel metrics for pathway enrichment of text-mined genes. Here, 7 novel inverse pathway frequency (IPF) metrics were proposed and they were compared with the traditional <italic>P</italic> values. Finally, we performed a case study to show the effectiveness of the IPF metrics in pathway enrichment as well as the promising application of the text mining pipeline for new drug use discovery.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Collection of Rapamycin-Centric Resources</title>
        <p>In this paradigm, a drug-centric text resource was obtained to extract the related genes. We set the drug as rapamycin, also known as sirolimus, as the target drug, which is used for the treatment of renal cell carcinoma and malignant lymphoma. Relevant texts and pathway data were collected targeting rapamycin as follows:</p>
        <list list-type="order">
          <list-item>
            <p>Text resources: 31,118 abstracts reporting rapamycin were downloaded from PubMed.</p>
          </list-item>
          <list-item>
            <p>Rapamycin-related pathway data set: The drug pathway was retrieved from the comparative toxicogenomic database (CTD) [<xref ref-type="bibr" rid="ref21">21</xref>], in which the KEGG pathway is enriched significantly among genes that interact with the drug or its downstream entity with a significant <italic>P</italic> value. In total, there are 166 pathways that are related to rapamycin.</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Pathway Enrichment Evaluation in Terms of Text-Mined Genes</title>
        <p>As shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>, 4 text mining methods were applied to extract the gene pairs in rapamycin-related PubMed texts. They were (1) Method 1: <italic>ABSTRACT</italic> (co-occurrence in abstract) [<xref ref-type="bibr" rid="ref19">19</xref>], (2) Method 2: <italic>SENTENCE</italic> (co-occurrence in sentence), (3) Method 3: <italic>DEPENDENCY</italic> (under consideration of dependency tree structure) [<xref ref-type="bibr" rid="ref22">22</xref>], and (4) Method 4: <italic>TEES</italic> (Turku Event Extraction System) [<xref ref-type="bibr" rid="ref20">20</xref>]. By taking co-occurrence or relation from the above methods, genes were linked to form an undirected pathway. We then proposed 7 types of novel pathway enrichment metrics by introducing various weights to the mined genes. Since the genes were extracted from 4 types of text mining systems, metrics evaluation was compared with respect to different text mining systems. For a given gene set, the candidate pathway is derived from 329 pathways in KEGG. Therefore, the sorted pathways based on <italic>P</italic> values in KEGG enrichment are regarded as the ground truth of pathway enrichment without using the text-mined knowledge. Furthermore, the feasibility of the text mining system for drug mechanism prediction was investigated.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Text mining systems for gene extraction and pathway construction. TEES: Turku Event Extraction System.</p>
          </caption>
          <graphic xlink:href="medinform_v9i6e28247_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>State-of-the-art Text Mining Methods</title>
        <p>To extract gene pairs from the abstracts of papers, PubTator [<xref ref-type="bibr" rid="ref19">19</xref>] and TEES [<xref ref-type="bibr" rid="ref23">23</xref>] were selected as the 2 baseline text mining tools, which contribute to the following 4 text mining systems (<xref rid="figure2" ref-type="fig">Figure 2</xref>):</p>
        <list list-type="order">
          <list-item>
            <p>Method 1: <italic>ABSTRACT.</italic> Only abstracts containing the specific drug name were collected. If more than 2 genes showed up in one collected sentence, these genes were extracted and regarded as drug-related genes.</p>
          </list-item>
          <list-item>
            <p>Method 2: <italic>SENTENCE.</italic> Similar to the abstract-level extraction rule, gene pairs were extracted based on a sentence co-occurrence rule.</p>
          </list-item>
          <list-item>
            <p>Method 3: <italic>DEPENDENCY.</italic> Being stricter than sentence-level gene-pair extraction, the syntactic rule was introduced to restrict the co-occurrence filtering rule. Here, the Stanford parser was used to identity the gene subject or the gene object in a sentence. The gene pair is maintained only when the 2 genes act as sub or obj in the syntactic tree.</p>
          </list-item>
          <list-item>
            <p>Method 4: <italic>TEES.</italic> TEES [<xref ref-type="bibr" rid="ref20">20</xref>] is a sophisticated biomedical relation extraction system, which has been trained over 400,000 linguistics features. TEES is used to extract the genes that have interactions with other genes in drug-related abstracts. Thus, the TEES method provides a set of genes, which shows interaction information in drug-related abstracts.</p>
          </list-item>
        </list>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Gene pair extraction rule for the text mining systems.</p>
          </caption>
          <graphic xlink:href="medinform_v9i6e28247_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Traditional Metrics for Pathway Enrichment</title>
        <p>Based on the drug-related abstract text file, 1 text mining tool extracts 1 group of genes. This group of genes is considered to be associated with the drug. For the sake of new drug use discovery, a group of drug-related genes is obtained using a text mining tool. Meanwhile, in the KEGG database, 1 pathway contains a group of genes, which are related to the disease the pathway correlates with. Thus, the matching degree between the drug-related gene group and the disease-related pathway represents the potential of the matching degree between the drug and the disease. ClusterProfile [<xref ref-type="bibr" rid="ref24">24</xref>] is a known pathway enrichment tool, which applies the <italic>P</italic> value setting for the significance test of the relevant pathway for a given gene set.</p>
        <p>Assuming in total that there are <italic>N</italic> background genes related to a specific pathway and there is a given gene set with <italic>k</italic> genes, pathway enrichment is performed to evaluate the significance for the given gene set to be relevant to the specific pathway. The significance value is obtained via chance computation for the given gene set in comparison to a randomly sampled gene set. In random sampling, <italic>k</italic> genes are sampled and <italic>x</italic> out of <italic>k</italic> genes are related to the pathway. Then, the probability for this instance is as follows:</p>
        <p>
          <disp-formula>
            <graphic xlink:href="medinform_v9i6e28247_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>The <italic>P</italic> value used to address the significance of the pathway for the gene set is as follows:</p>
        <p>
          <disp-formula>
            <graphic xlink:href="medinform_v9i6e28247_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>The <italic>P</italic> value as a traditional enrichment metric reflects solid statistical concern in terms of chance computation. It relies on the hypothesis that the chance for each gene belonging to a given gene set is equal. However, this prerequisite is in some cases not met, for example, housekeeping genes have higher chances to appear in any given pathway, while on the contrary, certain specific genes only appear in a specific pathway.</p>
      </sec>
      <sec>
        <title>Proposed Metrics for Pathway Enrichment</title>
        <sec>
          <title>IPF for a Gene in a Given Pathway</title>
          <p>The 4 text mining methods extracted 4 different sets of drug-related genes. Through these gene-drug relations, a bridge between the genes and the drug was established. The aim of this study was to investigate how a drug is associated with its indication through the gene. The next part was to establish the bridge between these genes and the indication. Mature gene-disease relations were easily accessed through KEGG in the form of the KEGG pathway. The KEGG pathway is a collection of manually drawn pathway maps representing the knowledge on the molecular interaction, reactions, and relation network. Thus, a bridge between the genes and the drug was established via KEGG. The whole path in that mechanism was addressed by finding a gene bridge between the drug and its indication. The next step was to evaluate this strategy. We paid attention to which text mining method is more suitable in this strategy. We focused on the drug-related gene set extracted by the text mining method in terms of the quantity and importance. Thus, we needed to define the importance standard of the gene to the indication. The standard of the gene to the indication in this case is based on the KEGG pathway information. One gene specifically shows up in a specific pathway, which means that this pathway can be identified with this gene. In other words, the less pathways a gene appears in, the more important it is to its related pathway. To calculate this situation, we give a value IPF.</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v9i6e28247_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>Where <italic>P</italic>={<italic>p</italic><sub>1</sub>,<italic>p</italic><sub>2</sub>…..<italic>p</italic><sub>M</sub>} refers to all KEGG pathways, where <italic>M=#{P}</italic> is the number of pathways in the KEGG database.</p>
          <p>{p<sub>m</sub>&#124;gene<sub>i</sub> ∈ p<sub>m</sub>} refers to a pathway that contains the <italic>i</italic>-th gene, denoted as <italic>gene<sub>i</sub></italic>. Thus, every gene in the KEGG database receives a basic score. Simply adding all the gene scores together is unfair. Because all pathways show up in KEGG in the form of a map, each map consists of a set of node boxes and severe edges instead of genes and edges. Therefore, we need to figure out how to calculate that score that one text mining method receives from all node boxes in a specific pathway.</p>
        </sec>
        <sec>
          <title>Enrichment of Text Mining–Based Gene Sets in a Pathway in View of a Gene</title>
          <p>Assume <italic>T<sub>t</sub></italic> is a gene set that contains all of the genes mined by the <italic>t-</italic>th text mining method. In order to evaluate how <italic>T<sub>t</sub></italic> genes are enriched in a specific pathway, <italic>P<sub>m</sub></italic>, we define</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v9i6e28247_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>Where <italic>IPF_gene<sub>T<sub>t</sub>,P<sub>m</sub></sub></italic> considers the number of genes that exist in a pathway as well as the weight of each gene. The sum of the IPFs can be used to evaluate the association of the group of genes to a pathway. By doing this, cumulative associations along with gene weights are represented.</p>
        </sec>
        <sec>
          <title>Enrichment of Text Mining–Based Gene Sets in a Pathway in View of a Node</title>
          <p>In KEGG, a node box in some cases represents 1 set of homologous genes, instead of 1 separate gene. Generally, although there exists more than 1 gene, these genes play the same role. Therefore, even the text mining method digs more than one gene belonging to this pathway but they play the same role in the same node box. We only applied the max gene score to represent the score that this text mining method receives in this node box in this pathway. If <italic>node<sub>j</sub></italic> is a single node,</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v9i6e28247_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>where</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v9i6e28247_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>If <italic>node<sub>j</sub></italic> has E subnodes,</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v9i6e28247_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>Where <italic>g<sub>i</sub> ∈ {N<sub>node<sub>j</sub></sub> ∩ T<sub>t</sub>}, g<sub>i</sub> = g<sub>max</sub></italic></p>
          <p>For each <italic>gene<sub>i</sub></italic>, which belongs to gene set <italic>node<sub>j</sub></italic> as well as <italic>T<sub>t</sub></italic>, the maximum IPF<italic>gene<sub>i</sub></italic> is assigned, which means <italic>gene<sub>i</sub></italic> belongs to gene set <italic>N<sub>node<sub>j</sub></sub></italic>.</p>
          <p>It is noted that a node box sometimes represents 1 set of protein complex genes that need to work together to play a role in the pathway. Therefore, we applied the sum of all the gene scores that the text mining method received in this node and multiplied it with a coefficient to represent the score that this text mining method receives in this node box in this pathway.</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v9i6e28247_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>where <italic>∣N<sub>node<sub>j</sub></sub>∣</italic> means the gene number of gene set <italic>N<sub>node<sub>j</sub></sub></italic>, while ∣<italic>g<sub>i</sub></italic> ∈ {<italic>node<sub>j</sub></italic> ∩ <italic>T<sub>t</sub></italic>}∣ means the gene number of the union of gene set <italic>N<sub>node<sub>j</sub></sub></italic> and gene set <italic>T<sub>t</sub></italic>.</p>
        </sec>
        <sec>
          <title>Enrichment of Text Mining-Based Gene Sets in a Pathway in View of the Shortest Path</title>
          <p>Besides the inclusion of genes in 1 node, the graph theory of the node in the pathway should be taken into consideration. In graph theory, the degree of a vertex is the number of edges associated with the vertex. In a pathway graph, one node holding a high degree indicates that this node connects with more vertices. In term of gene, this gene is associated with many genes. Mutations and regulation of the gene affect more genes. In 1 pathway, the more a node shows up in the shortest path between the 2 genes, the more important this gene is in this pathway.</p>
          <p>First, assume <italic>SP<sub>node<sub>r</sub>,node<sub>s</sub></sub></italic> refers to the shortest path between 2 arbitrary nodes, that is, <italic>node<sub>r</sub></italic> and <italic>node<sub>s</sub></italic> in pathway <italic>P<sub>m</sub></italic>, then, we count the occurrence of <italic>node<sub>j</sub></italic> in <italic>SP<sub>node<sub>r</sub>,node<sub>s</sub></sub></italic> with respect to <italic>P<sub>m</sub></italic>.</p>
          <p>
            <disp-formula><italic>Count<sub>node<sub>j</sub>,P<sub>m</sub></sub></italic> = #{<italic>SP<sub>node<sub>r</sub>,node<sub>s</sub></sub>&#124;node<sub>j</sub> ∈ SP<sub>node<sub>r</sub>,node<sub>s</sub></sub></italic>} <bold>(9)</bold>
              </disp-formula>
          </p>
          <p>In addition, <italic>NShortPath<sub>node<sub>r</sub>,node<sub>j</sub>,node<sub>k</sub></sub></italic> is a binary value, which denotes whether or not <italic>node<sub>j</sub></italic> appears in the shortest path between <italic>node<sub>r</sub></italic>  and <italic>node<sub>k</sub></italic>.</p>
          <p>Thus, each node in the pathway holds a “count” score. To compare the importance of a node among all the nodes in one pathway, softmax function is applied to <italic>NShortPath<sub>node<sub>r</sub>,node<sub>j</sub>,node<sub>k</sub></sub></italic>. Here, the softmax function is the gradient logarithmic normalization of the discrete probability distribution of finite terms. The result of softmax is suitable for describing the importance of 1 node in 1 pathway.</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v9i6e28247_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>Then, we added all <italic>IPF<sub>node<sub>j</sub></sub></italic> to represent the total score that the text mining method receives in this pathway,</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v9i6e28247_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>where</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v9i6e28247_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>Based on the above discussion on <italic>IPF_gene</italic> (equation 4), <italic>IPF_node</italic> (equation 8), and <italic>IPF_shortpath</italic> (equation 11), we formulate a generalized formula for <italic>IPF_node<sub>T<sub>t</sub>,P<sub>m</sub></sub></italic>.</p>
          <p>
            <disp-formula>
              <graphic xlink:href="medinform_v9i6e28247_fig16.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>Here, equation (13) summarizes all the above metric considerations and proposes a generalized form of IPF metrics. For instance, <italic>IPF_gene</italic> in equation (4) holds if 1 is assigned to <italic>Weight<sub>node<sub>j</sub>,P<sub>m</sub></sub></italic>. Equation (12) is assigned to <italic>Score<sub>T<sub>t</sub>,node<sub>j</sub></sub></italic> Score(<italic>T<sub>t</sub></italic>, <italic>node<sub>j</sub></italic>) and equation (3) to <italic>Weight<sub>gene<sub>i</sub></sub></italic>. The full list of generalized IPF metrics is shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>The complete inverse pathway frequency metrics list.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="290"/>
              <col width="230"/>
              <col width="250"/>
              <col width="230"/>
              <thead>
                <tr valign="top">
                  <td>Inverse pathway frequency (IPF) metrics</td>
                  <td>
                    <italic>Weight<sub>node<sub>j</sub>,P<sub>m</sub></sub></italic>
                  </td>
                  <td>
                    <italic>Score<sub>T<sub>t</sub>,node<sub>j</sub></sub></italic>
                  </td>
                  <td>
                    <italic>Weight<sub>gene<sub>i</sub></sub></italic>
                  </td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>IPF_gene</td>
                  <td>1</td>
                  <td>Equation (12)</td>
                  <td>Equation (3)</td>
                </tr>
                <tr valign="top">
                  <td>IPF_node</td>
                  <td>1</td>
                  <td>Equations (5) and (7)</td>
                  <td>1</td>
                </tr>
                <tr valign="top">
                  <td>IPF_shortpath</td>
                  <td>Equation (10)</td>
                  <td>Equation (12)</td>
                  <td>1</td>
                </tr>
                <tr valign="top">
                  <td>IPF_shortpath_gene</td>
                  <td>Equation (10)</td>
                  <td>Equation (12)</td>
                  <td>Equation (3)</td>
                </tr>
                <tr valign="top">
                  <td>IPF_shortpath_node</td>
                  <td>Equation (10)</td>
                  <td>Equations (5) and (7)</td>
                  <td>1</td>
                </tr>
                <tr valign="top">
                  <td>IPF_gene_node</td>
                  <td>1</td>
                  <td>Equations (5) and (7)</td>
                  <td>Equation (3)</td>
                </tr>
                <tr valign="top">
                  <td>IPF_gene_node_shortpath</td>
                  <td>Equations (5), (7), and (10)</td>
                  <td>Equations (5) and (7)</td>
                  <td> Equation (3)</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>IPF Metric Comparison Under the Evaluation of Relevance Gene Ranking</title>
        <p>We evaluated the effectiveness of IPF metrics by observing the rank counts of topic-related genes obtained from the 4 text mining methods. First, the 4 baseline text mining methods, that is, ABSTRACT, SENTENCE, DEPENDENCY, and TEES, were used to filter the vital genes in rapamycin-related texts. Afterwards, for each gene set obtained by the various text mining methods, 7 IPF metrics and traditional <italic>P</italic> values were used to map to obtain vital pathways and their pathway ranks. We then evaluated the pathway ranks by counting the occurrences of the key CTD pathways depicted in the Methods section. As shown in <xref rid="figure3" ref-type="fig">Figure 3</xref> and <xref ref-type="table" rid="table2">Table 2</xref>, the x-axis refers to the rank of the enriched pathways and the y-axis refers to the cumulative percentage (CumPer), which is the ratio of the vital CTD pathway among the top i-th enriched pathways.</p>
        <p>
          <disp-formula>
            <graphic xlink:href="medinform_v9i6e28247_fig17.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Comparison of the pathway-enrichment metrics based on the rapamycin-related gene set. CumPer: cumulative percentage; IPF: inverse pathway frequency; TEES: Turku Event Extraction System.</p>
          </caption>
          <graphic xlink:href="medinform_v9i6e28247_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Comparison of the areas under the cumulative percentage curve for the pathway-enriched methods based on the known rapamycin-related pathway.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="260"/>
            <col width="160"/>
            <col width="160"/>
            <col width="180"/>
            <col width="240"/>
            <thead>
              <tr valign="top">
                <td>Inverse pathway frequency metrics</td>
                <td>ABSTRACT</td>
                <td>SENTENCE</td>
                <td>DEPENDENCY</td>
                <td>Turku Event Extraction System</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>IPF_gene</td>
                <td>0.634</td>
                <td>0.638</td>
                <td>0.628</td>
                <td>0.529</td>
              </tr>
              <tr valign="top">
                <td>IPF_node</td>
                <td>0.647</td>
                <td>0.648</td>
                <td>0.672</td>
                <td>0.625</td>
              </tr>
              <tr valign="top">
                <td>IPF_shortpath</td>
                <td>0.680<sup>a</sup></td>
                <td>0.679<sup>a</sup></td>
                <td>0.688<sup>a</sup></td>
                <td>0.635<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>IPF_shortpath_gene</td>
                <td>0.675</td>
                <td>0.675</td>
                <td>0.682</td>
                <td>0.626</td>
              </tr>
              <tr valign="top">
                <td>IPF_shortpath_node</td>
                <td>0.675</td>
                <td>0.675</td>
                <td>0.682</td>
                <td>0.626</td>
              </tr>
              <tr valign="top">
                <td>IPF_gene_node</td>
                <td>0.675</td>
                <td>0.675</td>
                <td>0.682</td>
                <td>0.626</td>
              </tr>
              <tr valign="top">
                <td>IPF_gene_node_shortpath</td>
                <td>0.675</td>
                <td>0.675</td>
                <td>0.681</td>
                <td>0.626</td>
              </tr>
              <tr valign="top">
                <td><italic>P</italic> value</td>
                <td>.59</td>
                <td>.60</td>
                <td>.64</td>
                <td>.62</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Indicates that the area is significantly superior to this text mining method in terms of the pathway enrichment indicator.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The bars from 0 to 8 in the bar plot represent the <italic>P</italic> value and 7 IPF metrics in <xref ref-type="table" rid="table1">Table 1</xref>, respectively. The results show that genes ranked with <italic>P</italic> values map to less vital pathways than genes from IPF metrics. In detail, the cumulative percentage curves of <italic>P</italic> values are given in the left 4 plots, and it is straightforward to observe that the <italic>y</italic> obtained by the <italic>P</italic> value grades the lowest in all the text mining cases. If computing the area under the cumulative percentage curve, the areas are 0.634, 0.638, 0.625, and 0.529 for <italic>P</italic> values for each case, which are as well the least in all cases. In all, the consistency of the poor performance of the <italic>P</italic> value positively shows the effectiveness of the IPF metric in support of the key pathway enrichment. Furthermore, in all the 7 IPF metrics, the black bar, which represents <italic>IPF_node</italic>, performs the best with the highest value of area under the cumulative percentage curve. It achieves 0.68, 0.679, 0.688, and 0.635 in <italic>ABSTRACT, SENTENCE, DEPENDENCY</italic>, and <italic>TEES</italic>, respectively.</p>
      </sec>
      <sec>
        <title>Artificial Intelligence in Pathway Enrichment</title>
        <p>Although the area values among IPF metrics do not differ substantially from each other, the <italic>IPF_node</italic> prevails over the rest of all in a consistent manner. The results show that the <italic>IPF_node</italic> represents the best semantic feature from the view of the natural language processing method and it is the most supportive for vital pathway enrichment.</p>
        <sec>
          <title>Replication in the Discovery of Efficacy of Rapamycin for Breast Cancer</title>
          <p>The discovery of the efficacy of rapamycin was replicated via a pathway enrichment experiment. PubReminer was used to retrieve the research trend of rapamycin and breast cancer drugs. A total of 1502 abstracts were obtained, and the starting time was the year 2000. The experiment was designed to test if the gene interaction of rapamycin could be excavated by the text mining method from literature without reporting the relevance of breast cancer and rapamycin. All the gene pairs in the literature related to rapamycin from the years 1978 to 2000 were excavated, the active genes of rapamycin were obtained, and the enrichment analysis of the strategic gene pathway in this study was carried out. After applying the <italic>IPF_node,</italic> 1640 abstracts of rapamycin prior to the year 2000 were obtained and 243 genes were obtained. Afterwards, a standard pathway enrichment was obtained, and the top 0.5% of the pathways under each enrichment path index was statistically analyzed. As expected, the breast cancer pathway was listed in the enrichment results, and the results indicated that the potential activity of rapamycin can be obtained by enriching the gene pathway by text mining interaction genes.</p>
        </sec>
        <sec>
          <title>Visualization of the Pharmaceutical Mechanism</title>
          <p>The text mining system was investigated to bridge the drug, protein, and disease pathway in order to explore the pharmaceutical mechanism of rapamycin. Starting from the Literature Network application, the disease-related gene network was constructed, and 480 genes obtained by rapamycin-centric text mining were used to highlight the overlapping parts in the breast cancer gene network. All the breast cancer–related genes were collected from the STRING database. According to all the existing databases and text information, each gene was sorted for rapamycin correlation, and in this verification section, 100 breast cancer–related genes from STRING were selected. The breast cancer gene network was constructed according to the gene interaction mentioned in more than 40,000 papers, and the network was constructed using the literature network application program. After gene pathway enrichment analysis, the drug was associated with the pathway and Cytoscape was used for network visualization. In view of the relation between the pathway information and the disease, the drug was further associated with the disease. In order to further analyze the relationship between drugs and diseases, the distribution of the drug-active genes excavated in the disease gene network was analyzed.</p>
          <p>In order to construct a disease-specific gene network, the genetic relationship of this network in nature was obtained from disease-related abstracts. Since Cytoscape is a high-quality visualization platform for network analysis, a literature network application program based on Cytoscape was applied to address the drug disease associations obtained after pathway enrichment. <xref rid="figure4" ref-type="fig">Figure 4</xref> highlights 38 vital genes plotted as yellow circles, namely, <italic>STAT3, TP53, CDK4, CTLA4, AR, MYC, NOTCH1, IL6, ERBB2, CXCL12, BECN1, IGF1R, CDK2, EGF, ERBB4, MMP9, PIK3CA, CXCL8, ABCB1, EZH2, CDK6, SOX2, AKT1, CDH1, SRC, MTOR, ABCG2, KDR, CCND1, VEGFA, EGFR, ZEB1, ATM, PTEN, CXCR4, ERBB3, MDM2,</italic> and <italic>GATA3</italic>. These 38 genes are based on the intersection of the breast cancer text network and the drug rapamycin-active gene obtained in this strategy. The size of the point in the graph represents the degree of the point, the greater the degree, the larger the point, and the degree in this network is the number of proteins that interact with the protein. The edge thickness in the figure represents the number of sentences that support the protein-protein relationship. The edge color in the figure also represents the number of sentences that support the protein-protein relationship. It can be seen from the figure that the yellow bright spot covers the vast majority of breast cancer gene networks with moderately large spots. The 38 genes were enriched by the <italic>P</italic> value pathway, and 16 of them, that is, <italic>EGFR, IL6, TP53, CDK6, CDK4, PTEN, CDK2, KDR, AKT1, IGF1R, CCND1, VEGFA, PIK3CA, MDM2, MTOR,</italic> and the <italic>MYC</italic> signaling pathway belong to one of the <italic>MTOR</italic> signaling pathways. Among them, <italic>MTOR</italic> is an important gene targeted by rapamycin. The <italic>MTOR</italic> pathway plays an important role in multiple activities of rapamycin and is therefore linked to breast cancer. The reason that literature network is used to construct breast cancer–related network is that the protein interaction involved in constructing the network is obtained from the literature related to breast cancer, and it is the programmed realization of protein interaction based on sentence coexpression in this study. It is convenient for users to quickly construct interactive protein interaction networks based on text relationships. In this study, the breast cancer–related genes obtained from the STRING database were rearranged according to the text information, and the protein interaction information excavated from the text was reflected in the size of the protein gene points. Thus, breast cancer genes were given different weights. It is more convenient to give priority to the location of the active genes under the active conditions defined by the interaction. The overlap of disease and drug-active genes was observed and the possible mechanism of action was speculated.</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Visualization of the extracted gene pairs from literature.</p>
            </caption>
            <graphic xlink:href="medinform_v9i6e28247_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>In this study, all text resources were obtained from a rapamycin-centric literature data set prior to the year 2000, and all predicted drug efficacies for rapamycin were based on knowledge ahead of this timeline. Therefore, it was interesting to “replicate” and evaluate a novel pathway-discovery method in our case study and to investigate the research paradigm based on pathway enrichment. Several studies after the year 2000 provide evidences to show that the mined rapamycin-centric pathway make sense. For example, after Liu et al [<xref ref-type="bibr" rid="ref25">25</xref>] reported the effect of rapamycin in effectively inhibiting the growth of breast cancer in preclinical and clinical trials, the mechanism of action of rapamycin was elucidated. Rapamycin controls the growth, metabolism, and senescence of cells, as well as cells’ reactions to nutrients, energy levels, and growth factors. <italic>MTOR</italic>, the target of rapamycin, is often upregulated in a variety of cancers, while rapamycin is extremely selective in blocking <italic>MTOR</italic>. Interestingly, our case study pinpointed <italic>MTOR</italic> correctly and made our pathway enrichment method conceivable in the study of breast cancer. Hopefully, the investigation of rapamycin action in the treatment of breast cancer will be propelled by further extensive and abundant text mining results in the future.</p>
      <p>In conclusion, this research proposed a group of new pathway enrichment metrics by combining protein-interaction mechanisms, graph theories, information retrieval, and data mining weighting technology and by providing a new idea on pathway enrichment analysis. Moreover, the effectiveness of the best new enrichment metric for rapamycin was analyzed and the new activity of the drug shown by our method is supported by evidence from the literature. This research strategy sheds light on the investigation of the mechanism of action of drugs on diseases by using text-mined genes that are enriched in signaling pathways.</p>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CTD</term>
          <def>
            <p>comparative toxicogenomic database</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">IPF</term>
          <def>
            <p>inverse pathway frequency</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">KEGG</term>
          <def>
            <p>Kyoto Encyclopedia of Genes and Genomes</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">TEES</term>
          <def>
            <p>Turku Event Extraction System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors would like to express their gratitude to Prof Lars Juhl Jensen, Dr Marc Legeay, and Ms Yuxing Wang for many valuable discussions. Data and codes are available in https://github.com/RuringQinXuan/PathwayEnrichmentMetric.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>XQ was responsible for the coding, performed the whole text mining experiments, implemented the IPF metric evaluation, and drafted the manuscript. JX formulated the whole mathematical analysis, performed the TEES experiments, and modified the manuscript. XY performed PubMed term extraction.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Parfett</surname>
              <given-names>CLJ</given-names>
            </name>
          </person-group>
          <article-title>Somatic cell genetics: a review of drug resistance, lectin resistance and gene transfer in mammalian cells in culture</article-title>
          <source>Can J Genet Cytol</source>
          <year>1980</year>
          <volume>22</volume>
          <issue>4</issue>
          <fpage>443</fpage>
          <lpage>96</lpage>
          <pub-id pub-id-type="doi">10.1139/g80-056</pub-id>
          <pub-id pub-id-type="medline">7016268</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alfiya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Paulius</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Clara</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nicole</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Jochen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Georg</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Treatment with imatinib prevents fibrosis in different preclinical models of systemic sclerosis and induces regression of established fibrosis</article-title>
          <source>Arthritis &#38; Rheumatology</source>
          <year>2010</year>
          <volume>60</volume>
          <issue>1</issue>
          <fpage>219</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.3410/f.1148053.605164</pub-id>
          <pub-id pub-id-type="medline">19116940</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Mytelka</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Dunwiddie</surname>
              <given-names>CT</given-names>
            </name>
            <name name-style="western">
              <surname>Persinger</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Munos</surname>
              <given-names>BH</given-names>
            </name>
            <name name-style="western">
              <surname>Lindborg</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Schacht</surname>
              <given-names>AL</given-names>
            </name>
          </person-group>
          <source>Nat Rev Drug Discov</source>
          <year>2010</year>
          <month>03</month>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>203</fpage>
          <lpage>14</lpage>
          <pub-id pub-id-type="doi">10.1038/nrd3078</pub-id>
          <pub-id pub-id-type="medline">20168317</pub-id>
          <pub-id pub-id-type="pii">nrd3078</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Sirota</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shenoy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pai</surname>
              <given-names>RK</given-names>
            </name>
            <name name-style="western">
              <surname>Roedder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chiang</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Morgan</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Sarwal</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Pasricha</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
          </person-group>
          <article-title>Computational repositioning of the anticonvulsant topiramate for inflammatory bowel disease</article-title>
          <source>Sci Transl Med</source>
          <year>2011</year>
          <month>08</month>
          <day>17</day>
          <volume>3</volume>
          <issue>96</issue>
          <fpage>96ra76</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://stm.sciencemag.org/cgi/pmidlookup?view=long&#38;pmid=21849664"/>
          </comment>
          <pub-id pub-id-type="doi">10.1126/scitranslmed.3002648</pub-id>
          <pub-id pub-id-type="medline">21849664</pub-id>
          <pub-id pub-id-type="pii">3/96/96ra76</pub-id>
          <pub-id pub-id-type="pmcid">PMC3479650</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gaebel</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zielasek</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Reed</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Mental and Behavioural Disorders in the ICD-11: Concepts, Methodologies, and Current Status</article-title>
          <source>Psychiatr Pol</source>
          <year>2017</year>
          <volume>51</volume>
          <issue>2</issue>
          <fpage>169</fpage>
          <lpage>195</lpage>
          <pub-id pub-id-type="doi">10.12740/pp/69660</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Sridhara</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>McGuinn</surname>
              <given-names>WD</given-names>
            </name>
            <name name-style="western">
              <surname>Morse</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Abraham</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lostritto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Baird</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pazdur</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>United States Food and Drug Administration Drug Approval summary: Gefitinib (ZD1839; Iressa) tablets</article-title>
          <source>Clin Cancer Res</source>
          <year>2004</year>
          <month>02</month>
          <day>15</day>
          <volume>10</volume>
          <issue>4</issue>
          <fpage>1212</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://clincancerres.aacrjournals.org/cgi/pmidlookup?view=long&#38;pmid=14977817"/>
          </comment>
          <pub-id pub-id-type="doi">10.1158/1078-0432.ccr-03-0564</pub-id>
          <pub-id pub-id-type="medline">14977817</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kingsmore</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Grammer</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Lipsky</surname>
              <given-names>PE</given-names>
            </name>
          </person-group>
          <article-title>Drug repurposing to improve treatment of rheumatic autoimmune inflammatory diseases</article-title>
          <source>Nat Rev Rheumatol</source>
          <year>2020</year>
          <month>01</month>
          <day>12</day>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>32</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1038/s41584-019-0337-0</pub-id>
          <pub-id pub-id-type="medline">31831878</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41584-019-0337-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wadi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Weiser</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stein</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Reimand</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Impact of outdated gene annotations on pathway enrichment analysis</article-title>
          <source>Nat Methods</source>
          <year>2016</year>
          <month>08</month>
          <day>30</day>
          <volume>13</volume>
          <issue>9</issue>
          <fpage>705</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27575621"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/nmeth.3963</pub-id>
          <pub-id pub-id-type="medline">27575621</pub-id>
          <pub-id pub-id-type="pii">nmeth.3963</pub-id>
          <pub-id pub-id-type="pmcid">PMC7802636</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Parameter searching in attractor algorithm for community detection—an application in pathway enrichment analysis</article-title>
          <source>Journal of Physics: Conference Series</source>
          <year>2018</year>
          <month>08</month>
          <day>30</day>
          <conf-name>3rd Annual International Conference on Information System and Artificial Intelligence (ISAI2018)</conf-name>
          <conf-date>22-24 June 2018</conf-date>
          <conf-loc>Suzhou</conf-loc>
          <fpage>012051</fpage>
          <pub-id pub-id-type="doi">10.1088/1742-6596/1069/1/012051</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hung</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>DeLisi</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Gene set enrichment analysis: performance evaluation and usage guidelines</article-title>
          <source>Brief Bioinform</source>
          <year>2012</year>
          <month>05</month>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>281</fpage>
          <lpage>91</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21900207"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bib/bbr049</pub-id>
          <pub-id pub-id-type="medline">21900207</pub-id>
          <pub-id pub-id-type="pii">bbr049</pub-id>
          <pub-id pub-id-type="pmcid">PMC3357488</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Tavallaie</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Drug discovery approaches targeting the incretin pathway</article-title>
          <source>Bioorg Chem</source>
          <year>2020</year>
          <month>06</month>
          <volume>99</volume>
          <fpage>103810</fpage>
          <pub-id pub-id-type="doi">10.1016/j.bioorg.2020.103810</pub-id>
          <pub-id pub-id-type="medline">32325333</pub-id>
          <pub-id pub-id-type="pii">S0045-2068(19)31981-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kanehisa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goto</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>KEGG: Kyoto encyclopedia of genes and genomes</article-title>
          <source>Nucleic Acids Res</source>
          <year>2000</year>
          <month>01</month>
          <day>28</day>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>27</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1093/nar/28.1.27</pub-id>
          <pub-id pub-id-type="medline">10592173</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Masoudi-Sobhanzadeh</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Omidi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Amanlou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Masoudi-Nejad</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Drug databases and their contributions to drug repurposing</article-title>
          <source>Genomics</source>
          <year>2020</year>
          <month>03</month>
          <volume>112</volume>
          <issue>2</issue>
          <fpage>1087</fpage>
          <lpage>1095</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ygeno.2019.06.021</pub-id>
          <pub-id pub-id-type="medline">31226485</pub-id>
          <pub-id pub-id-type="pii">S0888-7543(19)30128-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Burbridge</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kusalik</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Comparing four genome-wide association study (GWAS) programs with varied input data quantity</article-title>
          <year>2018</year>
          <conf-name>2018 IEEE international conference on bioinformatics and biomedicine (BIBM)</conf-name>
          <conf-date>Dec 3, 2018</conf-date>
          <conf-loc>Madrid</conf-loc>
          <fpage>1800</fpage>
          <lpage>1802</lpage>
          <pub-id pub-id-type="doi">10.1109/bibm.2018.8621425</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yeganeh</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mostafavi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Use of machine learning for diagnosis of cancer in ovarian tissues with a selected mRNA panel</article-title>
          <year>2018</year>
          <conf-name>2018 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)</conf-name>
          <conf-date>December 3, 2018</conf-date>
          <conf-loc>Madrid</conf-loc>
          <fpage>2429</fpage>
          <lpage>2434</lpage>
          <pub-id pub-id-type="doi">10.1109/bibm.2018.8621371</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gachloo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A review of drug knowledge discovery using BioNLP and tensor or matrix decomposition</article-title>
          <source>Genomics Inform</source>
          <year>2019</year>
          <month>6</month>
          <volume>17</volume>
          <issue>2</issue>
          <fpage>e18</fpage>
          <pub-id pub-id-type="doi">10.5808/gi.2019.17.2.e18</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of the Performance of BioNLP Tools for Discovering Causal Genes in Terms with Pathway Enrichment</article-title>
          <source>J. Phys.: Conf. Ser</source>
          <year>2018</year>
          <month>08</month>
          <day>30</day>
          <volume>1069</volume>
          <fpage>012037</fpage>
          <pub-id pub-id-type="doi">10.1088/1742-6596/1069/1/012037</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Percha</surname>
              <given-names>Bethany</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>Russ B</given-names>
            </name>
          </person-group>
          <article-title>A global network of biomedical relationships derived from text</article-title>
          <source>Bioinformatics</source>
          <year>2018</year>
          <month>08</month>
          <day>01</day>
          <volume>34</volume>
          <issue>15</issue>
          <fpage>2614</fpage>
          <lpage>2624</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29490008"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/bty114</pub-id>
          <pub-id pub-id-type="medline">29490008</pub-id>
          <pub-id pub-id-type="pii">4911883</pub-id>
          <pub-id pub-id-type="pmcid">PMC6061699</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Chih-Hsuan</given-names>
            </name>
            <name name-style="western">
              <surname>Kao</surname>
              <given-names>Hung-Yu</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Zhiyong</given-names>
            </name>
          </person-group>
          <article-title>PubTator: a web-based text mining tool for assisting biocuration</article-title>
          <source>Nucleic Acids Res</source>
          <year>2013</year>
          <month>07</month>
          <volume>41</volume>
          <issue>Web Server issue</issue>
          <fpage>W518</fpage>
          <lpage>22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23703206"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkt441</pub-id>
          <pub-id pub-id-type="medline">23703206</pub-id>
          <pub-id pub-id-type="pii">gkt441</pub-id>
          <pub-id pub-id-type="pmcid">PMC3692066</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Tavallaie</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Drug discovery approaches targeting the incretin pathway</article-title>
          <source>Bioorg Chem</source>
          <year>2020</year>
          <month>06</month>
          <volume>99</volume>
          <fpage>103810</fpage>
          <pub-id pub-id-type="doi">10.1016/j.bioorg.2020.103810</pub-id>
          <pub-id pub-id-type="medline">32325333</pub-id>
          <pub-id pub-id-type="pii">S0045-2068(19)31981-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>Allan Peter</given-names>
            </name>
            <name name-style="western">
              <surname>Grondin</surname>
              <given-names>Cynthia J</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>Robin J</given-names>
            </name>
            <name name-style="western">
              <surname>Sciaky</surname>
              <given-names>Daniela</given-names>
            </name>
            <name name-style="western">
              <surname>McMorran</surname>
              <given-names>Roy</given-names>
            </name>
            <name name-style="western">
              <surname>Wiegers</surname>
              <given-names>Jolene</given-names>
            </name>
            <name name-style="western">
              <surname>Wiegers</surname>
              <given-names>Thomas C</given-names>
            </name>
            <name name-style="western">
              <surname>Mattingly</surname>
              <given-names>Carolyn J</given-names>
            </name>
          </person-group>
          <article-title>The Comparative Toxicogenomics Database: update 2019</article-title>
          <source>Nucleic Acids Res</source>
          <year>2019</year>
          <month>01</month>
          <day>08</day>
          <volume>47</volume>
          <issue>D1</issue>
          <fpage>D948</fpage>
          <lpage>D954</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30247620"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gky868</pub-id>
          <pub-id pub-id-type="medline">30247620</pub-id>
          <pub-id pub-id-type="pii">5106145</pub-id>
          <pub-id pub-id-type="pmcid">PMC6323936</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Debusmann</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kuhlmann</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Dependency grammar: classification and exploration</article-title>
          <source>Resource-Adaptive Cognitive Processes</source>
          <year>2010</year>
          <publisher-loc>Berlin</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>365</fpage>
          <lpage>388</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>A novel feature selection strategy for enhanced biomedical event extraction using the Turku system</article-title>
          <source>Biomed Res Int</source>
          <year>2014</year>
          <volume>2014</volume>
          <fpage>205239</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2014/205239"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2014/205239</pub-id>
          <pub-id pub-id-type="medline">24800214</pub-id>
          <pub-id pub-id-type="pmcid">PMC3997098</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>clusterProfiler: an R package for comparing biological themes among gene clusters</article-title>
          <source>OMICS</source>
          <year>2012</year>
          <month>05</month>
          <volume>16</volume>
          <issue>5</issue>
          <fpage>284</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22455463"/>
          </comment>
          <pub-id pub-id-type="doi">10.1089/omi.2011.0118</pub-id>
          <pub-id pub-id-type="medline">22455463</pub-id>
          <pub-id pub-id-type="pmcid">PMC3339379</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Targeting the mTOR pathway in breast cancer</article-title>
          <source>Tumour Biol</source>
          <year>2017</year>
          <month>06</month>
          <volume>39</volume>
          <issue>6</issue>
          <fpage>1010428317710825</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/1010428317710825?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/1010428317710825</pub-id>
          <pub-id pub-id-type="medline">28639903</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
