<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn></journal-meta><article-meta><article-id pub-id-type="publisher-id">52210</article-id><article-id pub-id-type="doi">10.2196/52210</article-id><title-group><article-title>Knowledge Graph for Breast Cancer Prevention and Treatment: Literature-Based Data Analysis Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Jin</surname><given-names>Shuyan</given-names></name><degrees>MPH</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Liang</surname><given-names>Haobin</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Wenxia</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Li</surname><given-names>Huan</given-names></name><degrees>MM</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Health Department, Shenzhen Maternity and Child Healthcare Hospital</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country></aff><aff id="aff2"><institution>School of Economics and Statistics, Guangzhou University</institution>, <addr-line>Guangzhou</addr-line>, <country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Benis</surname><given-names>Arriel</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Gaudet-Blavignac</surname><given-names>Christophe</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Yang</surname><given-names>Shu</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Chu</surname><given-names>Yuanchia</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Shuyan Jin, MPH<email>13823373712@163.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2024</year></pub-date><pub-date pub-type="epub"><day>22</day><month>2</month><year>2024</year></pub-date><volume>12</volume><elocation-id>e52210</elocation-id><history><date date-type="received"><day>26</day><month>08</month><year>2023</year></date><date date-type="rev-recd"><day>02</day><month>01</month><year>2024</year></date><date date-type="accepted"><day>06</day><month>01</month><year>2024</year></date></history><copyright-statement>&#x00A9; Shuyan Jin, Haobin Liang, Wenxia Zhang, Huan Li. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 22.2.2024. </copyright-statement><copyright-year>2024</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2024/1/e52210"/><abstract><sec><title>Background</title><p>The incidence of breast cancer has remained high and continues to rise since the 21st century. Consequently, there has been a significant increase in research efforts focused on breast cancer prevention and treatment. Despite the extensive body of literature available on this subject, systematic integration is lacking. To address this issue, knowledge graphs have emerged as a valuable tool. By harnessing their powerful knowledge integration capabilities, knowledge graphs offer a comprehensive and structured approach to understanding breast cancer prevention and treatment.</p></sec><sec><title>Objective</title><p>We aim to integrate literature data on breast cancer treatment and prevention, build a knowledge graph, and provide support for clinical decision-making.</p></sec><sec sec-type="methods"><title>Methods</title><p>We used Medical Subject Headings terms to search for clinical trial literature on breast cancer prevention and treatment published on PubMed between 2018 and 2022. We downloaded triplet data from the Semantic MEDLINE Database (SemMedDB) and matched them with the retrieved literature to obtain triplet data for the target articles. We visualized the triplet information using NetworkX for knowledge discovery.</p></sec><sec sec-type="results"><title>Results</title><p>Within the scope of literature research in the past 5 years, malignant neoplasms appeared most frequently (587/1387, 42.3%). Pharmacotherapy (267/1387, 19.3%) was the primary treatment method, with trastuzumab (209/1805, 11.6%) being the most commonly used therapeutic drug. Through the analysis of the knowledge graph, we have discovered a complex network of relationships between treatment methods, therapeutic drugs, and preventive measures for different types of breast cancer.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study constructed a knowledge graph for breast cancer prevention and treatment, which enabled the integration and knowledge discovery of relevant literature in the past 5 years. Researchers can gain insights into treatment methods, drugs, preventive knowledge regarding adverse reactions to treatment, and the associations between different knowledge domains from the graph.</p></sec></abstract><kwd-group><kwd>knowledge graph</kwd><kwd>breast cancer</kwd><kwd>treatment</kwd><kwd>prevention</kwd><kwd>adverse reaction</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Breast cancer is the most common malignant tumor in women worldwide, with a reported death toll exceeding 600,000 in 2018 alone [<xref ref-type="bibr" rid="ref1">1</xref>]. Breast cancer has emerged as the most prevalent cancer and a primary cause of mortality among women. The global incidence of new cases of female breast cancer witnessed a sharp increase from 1.05 million in 2000 to 2.09 million in 2018 [<xref ref-type="bibr" rid="ref2">2</xref>]. In 2020, global cancer burden data revealed that new breast cancer cases reached 2.26 million, constituting 11.7% of all newly diagnosed cancer cases worldwide. The newly reported mortality cases numbered 0.68 million, representing 6.9% of global newly reported deaths [<xref ref-type="bibr" rid="ref3">3</xref>]. Factors such as old age, young age at menarche, family history of breast cancer, smoking, and drinking alcohol increase the risk of breast cancer [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. On the contrary, regular physical exercise; breastfeeding; regular work and rest; and intake of fruits, vegetables, whole grains, and dietary fiber can appropriately reduce the risk of breast cancer [<xref ref-type="bibr" rid="ref7">7</xref>]. Various treatment methods are used for patients with breast cancer, including surgery, radiation therapy, endocrine therapy, chemotherapy, and targeted therapy. So far, most countries have primarily focused on population education for breast cancer prevention, including encouraging increased physical activity, controlling BMI, and limiting alcohol intake [<xref ref-type="bibr" rid="ref8">8</xref>]. Despite the increasing number of research literature, a large amount of literature on breast cancer prevention and treatment has not been systematically integrated. Knowledge graph technology allows for the independent connection and integration of disparate literature, resulting in a more comprehensive and cohesive knowledge framework.</p><p>Knowledge Graph is a knowledge repository proposed by Google in 2012 to enhance the functionality of search engines. It describes concepts and their relationships in the real world using triplets in the form of entity-relation-entity [<xref ref-type="bibr" rid="ref9">9</xref>]. Knowledge graphs can integrate information from diverse sources and domains, including text, databases, and web pages, and intricately interlink them. These integrations serve to mitigate information silos, fostering the establishment of a more comprehensive knowledge framework. Knowledge graphs have been widely used in various fields, such as medicine, network security, journalism, finance, and education [<xref ref-type="bibr" rid="ref10">10</xref>]. Knowledge graphs in the biomedical domain have applications in studies related to disease associations [<xref ref-type="bibr" rid="ref11">11</xref>], genomics [<xref ref-type="bibr" rid="ref12">12</xref>], drug interactions [<xref ref-type="bibr" rid="ref13">13</xref>], and support for physicians in formulating individualized treatment regimens [<xref ref-type="bibr" rid="ref14">14</xref>]. At present, there are well-established knowledge graphs, including DisGeNET [<xref ref-type="bibr" rid="ref15">15</xref>], which integrate information on the associations between genes and diseases; DrugBank [<xref ref-type="bibr" rid="ref16">16</xref>], a comprehensive bioinformatics and cheminformatics knowledge base; and ClinVar [<xref ref-type="bibr" rid="ref17">17</xref>], a compilation of genetic variation information from diverse laboratories worldwide. One study extracted breast cancer&#x2013;related features from Chinese breast cancer mammography reports and built a knowledge graph for diagnosing breast cancer by combining diagnosis and treatment guidelines and insights from clinical experts [<xref ref-type="bibr" rid="ref18">18</xref>]. Another study integrated triples from clinical guidelines, medical encyclopedias, and electronic medical records to build a breast cancer knowledge graph [<xref ref-type="bibr" rid="ref19">19</xref>]. Despite a small number of scholars having constructed knowledge graphs for breast cancer, the varied emphases and diverse data sources employed render their applicability limited. A knowledge graph specifically focused on the prevention and treatment of breast cancer has not been constructed at present. Therefore, this study primarily collects information related to the prevention and treatment of breast cancer to construct a knowledge graph.</p><p>In the biomedical field, there are already mature tools (eg, SemRep) for extracting knowledge from medical texts. SemRep is a natural language processing program based on the Unified Medical Language System (UMLS), which performs operations such as text tokenization, syntactic analysis, part-of-speech disambiguation, phrase mapping, semantic predicate normalization, and syntactic constraints [<xref ref-type="bibr" rid="ref20">20</xref>]. It extracts entities and relationships from biomedical texts and outputs triplets stored in the Semantic MEDLINE Database (SemMedDB) [<xref ref-type="bibr" rid="ref21">21</xref>]. SemMedDB currently encompasses details on approximately 96.3 million predications derived from all PubMed citations (around 29.1 million citations) and serves as the foundation for the Semantic MEDLINE application [<xref ref-type="bibr" rid="ref22">22</xref>]. We downloaded the entity and relationship data provided by SemMedDB. NetworkX is an open-source library for Python, primarily designed for creating, analyzing, and visualizing complex network structures. NetworkX plays a significant role in knowledge visualization, facilitating users in intuitively presenting and comprehending intricate knowledge graphs or network data.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Ethics Approval</title><p>This study was approved by the Board of Medical Ethics Committee of Shenzhen Maternal and Child Health Hospital (SFYLS[2022]003).</p></sec><sec id="s2-2"><title>Data Source</title><p>We conducted a search on PubMed using Medical Subject Headings terms &#x201C;breast cancer,&#x201D; &#x201C;prevention,&#x201D; and &#x201C;treatment,&#x201D; covering the period from January 1, 2018, to December 31, 2022, and the study type was clinical trials. A total of 3589 articles were retrieved. We obtained the entity and relationship data from SemMedDB.</p></sec><sec id="s2-3"><title>Data Processing and Construction of Knowledge Graph</title><p>We matched the PMIDs of the retrieved articles with the database and extracted the corresponding triplet information. We initially obtained 33,060 Subject-Predicate-Object (SPO) triplets of data.</p><p>Next, we made improvements according to the SPO cleaning principles proposed by Fiszman et al [<xref ref-type="bibr" rid="ref9">9</xref>] (ie, relevance, connectivity, novelty, and significance). We combined them with expert manual screening to ensure that the selected SPO triplets have a higher relevance. In the improved process, we did not predefine semantic patterns. Instead, we used a series of cleaning operations to select core SPO triplets and connected SPO triplets, eliminating SPO triplets lacking specific information and those that appeared only once in the frequency. The specific process is as follows:</p><list list-type="order"><list-item><p>In the same article, there may be repeated occurrences of identical SPO triplets. To maintain equal contribution from each article, we counted the repeated SPO triplets once within the same article.</p></list-item><list-item><p>To ensure statistical reliability, we calculated the occurrence frequency of each SPO triplet across different articles. SPO triplets with low occurrence frequencies may lack statistical significance. Therefore, we filtered SPO triplets with frequencies greater than or equal to 2.</p></list-item><list-item><p>Based on expert domain knowledge, we manually screened the selected SPO triplets with frequencies greater than or equal to 2 to identify those of research value.</p></list-item></list><p>Finally, we obtained 25,449 SPO triplets data. We imported the filtered SPO triplets information into the NetworkX for visual analysis to explore knowledge and information related to breast cancer prevention and treatment.</p><p>All analyses were conducted in a Python program (version 3.11.3; Python Software Foundation), primarily using Pandas, Matplotlib, WordCloud, and NetworkX packages [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref26">26</xref>].</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Summary of Included Literatures</title><p>A total of 3589 articles were published in 618 different journals. Among them, 191 articles were published in the same journal, while 293 journals had only 1 article published. The journals were ranked based on the number of publications, and the top 100 journals accounted for 2631 articles, which is 73.30% of the total.</p></sec><sec id="s3-2"><title>Semantic Relationships and Semantic Patterns</title><p>We mainly summarize semantic associations into 3 types: treatment and prevention, influencing or associated factors, and related diseases (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Regarding treatment and prevention, the relationships include TREATS, ADMINISTERED_TO, USES, and PREVENTS, representing treatment drugs, surgeries, and preventive measures for breast cancer. Regarding influencing or associated factors, the relationships include ASSOCIATED_WITH, AFFECTS, and CAUSES, which represent diseases&#x2019; impact and etiological factors. Regarding related diseases, the relationship COEXISTS_WITH represents the coexistence between different diseases. In the semantic patterns involving treatment (TREATS), the topp-TREATS-neop and topp-TREATS-podg have appeared over 1000 times.</p></sec><sec id="s3-3"><title>Summary of SPO Triples</title><p>In terms of breast tumors, malignant neoplasms had the highest frequency, accounting for 42.3% (587/1387) of the total, followed by triple-negative breast neoplasms (56/1387, 4%) and human epidermal growth factor receptor 2 (<italic>HER</italic>2)&#x2013;positive carcinoma of breast (54/1387, 4%; <xref ref-type="table" rid="table1">Table 1</xref> and <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Summary of breast cancer subtypes and stages, treatment methods, and treatment drugs. The top 30 subtypes, treatment methods, and treatment drugs with higher frequencies in all data are presented for each group.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top" colspan="2">Group</td><td align="left" valign="top">Values, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3"><bold>Breast cancer subtypes and stages (n=1387)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Malignant neoplasm of breast</td><td align="left" valign="top">587 (42.3)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Triple-negative breast neoplasms</td><td align="left" valign="top">56 (4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><italic>HER</italic>2<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>-positive carcinoma of breast</td><td align="left" valign="top">54 (3.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Carcinoma breast stage IV</td><td align="left" valign="top">48 (3.5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Breast cancer metastatic</td><td align="left" valign="top">47 (3.4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Early-stage breast carcinoma</td><td align="left" valign="top">42 (3)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Malignant neoplasms</td><td align="left" valign="top">31 (2.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Neoplasm</td><td align="left" valign="top">30 (2.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Metastatic triple-negative breast carcinoma</td><td align="left" valign="top">26 (1.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">High-risk cancer</td><td align="left" valign="top">24 (1.7)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Neoplasm metastasis</td><td align="left" valign="top">21 (1.5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Advanced cancer</td><td align="left" valign="top">19 (1.4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Advanced breast carcinoma</td><td align="left" valign="top">19 (1.4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><italic>HER</italic>2-negative breast cancer</td><td align="left" valign="top">18 (1.3)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Locally advanced malignant neoplasm</td><td align="left" valign="top">17 (1.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Advanced malignant neoplasm</td><td align="left" valign="top">15 (1.1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Nonsmall cell lung carcinoma</td><td align="left" valign="top">15 (1.1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Noninfiltrating intraductal carcinoma</td><td align="left" valign="top">14 (1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Locally advanced breast cancer</td><td align="left" valign="top">13 (0.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Breast cancer stage III</td><td align="left" valign="top">11 (0.8)</td></tr><tr><td align="left" valign="top" colspan="3"><bold>Treatment of breast cancer (n=1387)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Pharmacotherapy</td><td align="left" valign="top">267 (19.3)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Neoadjuvant therapy</td><td align="left" valign="top">88 (6.3)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Hormone therapy</td><td align="left" valign="top">68 (4.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Chemotherapy (adjuvant)</td><td align="left" valign="top">54 (3.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Therapeutic procedure</td><td align="left" valign="top">53 (3.8)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Radiation therapy</td><td align="left" valign="top">48 (3.5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Treatment protocols</td><td align="left" valign="top">43 (3.1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Adjuvant therapy</td><td align="left" valign="top">36 (2.6)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Breast-conserving surgery</td><td align="left" valign="top">35 (2.5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">First-line treatment</td><td align="left" valign="top">31 (2.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Single-agent therapy</td><td align="left" valign="top">27 (1.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Mastectomy</td><td align="left" valign="top">27 (1.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Operative surgical procedures</td><td align="left" valign="top">20 (1.4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Interventional procedure</td><td align="left" valign="top">16 (1.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Radiotherapy (adjuvant)</td><td align="left" valign="top">14 (1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Excision of axillary lymph nodes group</td><td align="left" valign="top">13 (0.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Combined modality therapy</td><td align="left" valign="top">12 (0.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Excision</td><td align="left" valign="top">11 (0.8)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Targeted therapy</td><td align="left" valign="top">11 (0.8)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Placebos</td><td align="left" valign="top">10 (0.7)</td></tr><tr><td align="left" valign="top" colspan="3"><bold>Drugs for breast cancer (n=1805)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Trastuzumab</td><td align="left" valign="top">209 (11.6)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Capecitabine</td><td align="left" valign="top">88 (4.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Paclitaxel</td><td align="left" valign="top">81 (4.5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Aromatase inhibitors</td><td align="left" valign="top">64 (3.5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Immunologic adjuvants</td><td align="left" valign="top">62 (3.4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Letrozole</td><td align="left" valign="top">58 (3.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Bevacizumab</td><td align="left" valign="top">48 (2.7)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Tamoxifen</td><td align="left" valign="top">40 (2.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Gemcitabine</td><td align="left" valign="top">36 (2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Pertuzumab</td><td align="left" valign="top">36 (2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Fulvestrant</td><td align="left" valign="top">36 (2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Cyclophosphamide</td><td align="left" valign="top">32 (1.8)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Pembrolizumab</td><td align="left" valign="top">30 (1.7)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Docetaxel</td><td align="left" valign="top">27 (1.5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Taxane</td><td align="left" valign="top">27 (1.5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Ado-trastuzumab emtansine</td><td align="left" valign="top">22 (1.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">130-nm albumin-bound paclitaxel</td><td align="left" valign="top">22 (1.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Carboplatin</td><td align="left" valign="top">22 (1.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Eribulin</td><td align="left" valign="top">21 (1.2)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Palbociclib</td><td align="left" valign="top">19 (1.1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Exemestane</td><td align="left" valign="top">19 (1.1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Everolimus</td><td align="left" valign="top">19 (1.1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Olaparib</td><td align="left" valign="top">18 (1)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Talazoparib</td><td align="left" valign="top">17 (0.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Pharmaceutical preparations</td><td align="left" valign="top">16 (0.9)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Protein-tyrosine kinase inhibitor</td><td align="left" valign="top">15 (0.8)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Cisplatin</td><td align="left" valign="top">14 (0.8)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Lapatinib</td><td align="left" valign="top">14 (0.8)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Fluorouracil</td><td align="left" valign="top">13 (0.7)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Preservative free ingredient</td><td align="left" valign="top">13 (0.7)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup><italic>HER</italic>2: human epidermal growth factor receptor 2.</p></fn></table-wrap-foot></table-wrap><p>Pharmacotherapy is the most common treatment method, accounting for 19.2% (267/1387) of the overall frequency. Additionally, other high-frequency treatment modalities include neoadjuvant therapy (88/1387, 6%), hormone therapy (68/1387, 5%), adjuvant chemotherapy (54/1387, 4%), and radiation therapy (48/1387, 3%; <xref ref-type="table" rid="table1">Table 1</xref> and <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). In breast cancer treatment drugs, trastuzumab (209/1805, 11.6%), capecitabine (88/1805, 5%), paclitaxel (81/1805, 4%), aromatase inhibitors (64/1805, 4%), and immunologic adjuvants (62/1805, 3%) have a relatively high frequency of occurrence (<xref ref-type="table" rid="table1">Table 1</xref> and <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>).</p></sec><sec id="s3-4"><title>Breast Cancer Knowledge Graph</title><p>We visualized the SPO triples and displayed 3 subgroups: breast cancer treatment methods, therapeutic drugs, and relevant preventive measures. <xref ref-type="fig" rid="figure1">Figure 1</xref> shows the relationship between different subtypes and stages of breast cancer and treatment methods. In different subtypes of breast cancer, the highest frequency is observed in malignant neoplasm of the breast, with pharmacotherapy having the highest frequency among various treatment modalities. Different subtypes simultaneously correspond to multiple treatment modalities; likewise, a single treatment modality corresponds to multiple breast cancer subtypes.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Relationship between different subtypes and stages of breast cancer and treatment methods. <italic>HER</italic>2: human epidermal growth factor receptor 2.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e52210_fig01.png"/></fig><p><xref ref-type="fig" rid="figure2">Figure 2</xref> shows the relationship between different subtypes and stages of breast cancer and drugs. Among the therapeutic drugs for breast cancer, trastuzumab has the highest frequency and corresponds to the most types of breast cancer. Capecitabine, paclitaxel, aromatase inhibitors, and immunologic adjuvants also have relatively high frequencies. In comparison, immunologic adjuvants have the fewest connections with different types of breast cancer.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Relationship between different subtypes and stages of breast cancer and drugs. <italic>HER</italic>2: human epidermal growth factor receptor 2.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e52210_fig02.png"/></fig><p><xref ref-type="fig" rid="figure3">Figure 3</xref> shows the relationship between breast cancer treatment and adverse reactions. Pharmacotherapy is associated with neuropathy, onycholysis, heart neutropenia failure, alopecia, febrile neutropenia, anemia, stomatitis, leukopenia, thrombocytopenia, premature menopause, and gastrointestinal dysfunction. Additionally, multiple nodes are connected, forming multiple pathways, such as pharmacotherapy-febrile neutropenia-adjuvant chemotherapy and pharmacotherapy-leukopenia-breast cancer therapeutic procedure-osteoporosis.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Relationship between breast cancer treatment and adverse reactions.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e52210_fig03.png"/></fig><p><xref ref-type="fig" rid="figure4">Figure 4</xref> shows the relationship between adverse events after breast cancer treatment and preventive measures. Peripheral neuropathy is associated with cryotherapy, low-level laser therapy, compression procedure, acupuncture procedure, pharmacotherapy, and massage. Lymphedema is associated with resistance education, axillary lymph node dissection, physical therapy, excision of axillary lymph nodes group, and drainage of lymphatics. Early radiation dermatitis is associated with topical administration and bleomycin, cisplatin, or methotrexate protocol. In addition, there are some adverse reactions with relatively few treatment measures, such as stomatitis-diet, alopecia-scalp cooling.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Relationship between adverse reactions after breast cancer treatment and preventive measures.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e52210_fig04.png"/></fig><p>We performed a relationship visualization to gain a better understanding of the association between types of breast cancer, treatments, drugs, and genes. <xref ref-type="fig" rid="figure5">Figure 5</xref> intuitively reflects the high frequency of malignant neoplasm of the breast, pharmacotherapy, and trastuzumab. In addition, breast malignant tumors are associated with multiple genes, such as the phosphatidylinositol-4,5-bisphosphate 3-kinase catalytic subunit alpha (<italic>PIK3CA</italic>) gene, platelet-derived growth factor receptor beta (<italic>PDGFRB</italic>) gene, phosphatase and tensin homolog (<italic>PTEN</italic>) gene, and erb-B2 receptor tyrosine kinase 2 (<italic>ERBB2</italic>) gene.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Relationship between types of breast cancer, treatments, drugs, and genes.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e52210_fig05.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The knowledge graphs constructed in this study help researchers understand the research hot spots in breast cancer over the past 5 years. The complex network involving treatment methods, drugs, adverse reactions, preventive measures, and genes in breast cancer can assist clinicians in making decisions that comprehensively consider multiple aspects, ultimately aiding in decisions that are the most beneficial to patients. Additionally, the knowledge graph allows for personalized considerations based on specific genes for individualized patients.</p><p>This study found that from 2018 to 2022, breast malignancies appeared most frequently in the literature and were the primary concern for researchers. Research interest in triple-negative breast neoplasms is higher than in other subtypes. This phenomenon may be due to the higher risk of recurrence and poor prognosis in patients with early-stage triple-negative breast neoplasms [<xref ref-type="bibr" rid="ref10">10</xref>], making it a subject of greater concern to clinicians and researchers. Among treatment modalities, pharmacotherapy receives the highest attention. Pharmacotherapy for breast cancer primarily involves chemotherapy, endocrine therapy, and targeted therapy [<xref ref-type="bibr" rid="ref27">27</xref>]. Compared to traditional surgery and radiotherapy, pharmacotherapy can more precisely intervene in the growth and division of cancer cells by targeting specific molecules or cellular structures, which reduces damage to normal cells and allows for the formulation of personalized treatment plans based on the patient&#x2019;s genotype and molecular characteristics [<xref ref-type="bibr" rid="ref28">28</xref>]. Medications circulating through the bloodstream can also act on cancer cells throughout the body, preventing cancer cell metastasis. These advantages of pharmacotherapy may be related to the heightened emphasis on pharmacotherapy over the past 5 years. Trastuzumab receives the highest attention in breast cancer pharmacotherapy; it is a specific cancer-targeting medication used in the treatment of cancers characterized by elevated levels of HER2 protein [<xref ref-type="bibr" rid="ref29">29</xref>].</p><p>Pharmacotherapy is associated with various adverse reactions, including neutropenia, neuropathy, onycholysis, heart failure, alopecia, and febrile neutropenia. Among these adverse reactions, peripheral neuropathy and lymphedema have the most corresponding preventive and treatment measures, with lymphedema being a common complication after surgery [<xref ref-type="bibr" rid="ref30">30</xref>]. However, there is limited research on how to prevent and treat the potential adverse reactions of pharmacotherapy, and further studies are needed. Various adverse effects of breast cancer treatment may reduce patients&#x2019; adherence to treatment. Therefore, when clinicians choose different treatments and drugs, they should pay close attention to their potential adverse reactions and how to prevent or mitigate them.</p><p>In existing knowledge graphs related to breast cancer, one study from China constructed a knowledge graph using electronic medical records, clinical guidelines, and expert opinions, primarily focusing on breast cancer diagnosis [<xref ref-type="bibr" rid="ref18">18</xref>]. Another study by Chinese scholars also used data from various sources, including clinical guidelines, medical encyclopedias, and electronic medical records, to construct a knowledge graph primarily applied to medical knowledge question-answering and medical record retrieval [<xref ref-type="bibr" rid="ref19">19</xref>]. These studies used data from multiple sources, including structured, unstructured, and semistructured data. Data extraction and accuracy face challenges. Therefore, they used neural network models for training and calculated a series of metrics to ensure data accuracy. For instance, they utilized BERT + Bi-LSTM+ CRF for textual data to achieve named entity recognition. In this study, SemMedDB was used as the data source, and the database was constructed by extracting semantic information from PubMed using SemRep, which demonstrated good performance in a biomedical text [<xref ref-type="bibr" rid="ref21">21</xref>].</p><p>In summary, the knowledge graph constructed in this study for breast cancer treatment and prevention encompasses information on different stages, subtypes of breast cancer, treatment modalities, medications, adverse reactions, and preventive measures. This knowledge forms a complex network, providing clinical practitioners with a comprehensive and referenced knowledge base. We recommend that clinical practitioners apply our research findings in several aspects. First, clinicians can gain insights into the current state of breast cancer treatment and prevention research through our study. Additionally, there is a relative lack of preventive measures and strategies for mitigating postoperative and postmedication adverse reactions compared to breast cancer treatment, and more efforts are needed in these areas. Furthermore, our research can assist clinicians in making comprehensive decisions. For instance, when selecting a treatment approach for patients, the knowledge graph facilitates linking to available medications, associated adverse reactions, and measures to mitigate or prevent adverse effects.</p><p>Our research still has several limitations. First, SemRep, as a natural language processing program based on the UMLS, still exhibits shortcomings. Despite the extensive coverage and scale of the UMLS Metathesaurus, it has a relatively limited ability to recognize entities. There are still areas for improvement in processing natural language texts [<xref ref-type="bibr" rid="ref20">20</xref>]. Second, clinical researchers often prefer causal relationships rather than pure correlations; however, our study can only reveal the connections between pieces of information and cannot determine the magnitude and direction of their effects. Third, with the release of new literature, the knowledge graph also needs to be updated promptly, increasing the burden on researchers. Future improvements should focus on automating the mining of literature data to ensure timely updates to the knowledge graph for breast cancer prevention and treatment, thereby alleviating the burden on researchers.</p></sec><sec id="s4-2"><title>Conclusions</title><p>This study successfully constructed a knowledge graph for breast cancer prevention and treatment by integrating relevant literature from the past 5 years and conducting knowledge discovery. Through this knowledge graph, researchers can learn about breast cancer treatment methods, medications, and adverse reactions to preventive treatments and gain insights into the relationships between different pieces of knowledge.</p></sec></sec></body><back><ack><p>The authors would like to thank Feng Xixi, associate chief physician and member of the Chronic Disease Special Committee of the Chengdu City Preventive Medicine Association, for her suggestions at the initial stage of the study.</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ERBB2</term><def><p>erb-B2 receptor tyrosine kinase 2</p></def></def-item><def-item><term id="abb2">HER2</term><def><p>human epidermal growth factor receptor 2</p></def></def-item><def-item><term id="abb3">PDGFRB</term><def><p>platelet-derived growth factor receptor beta</p></def></def-item><def-item><term id="abb4">PIK3CA</term><def><p>phosphatidylinositol-4,5-bisphosphate 3-kinase catalytic subunit alpha</p></def></def-item><def-item><term id="abb5">PTEN</term><def><p>phosphatase and tensin homolog</p></def></def-item><def-item><term id="abb6">SemMedDB</term><def><p>Semantic MEDLINE Database</p></def></def-item><def-item><term id="abb7">SPO</term><def><p>Subject-Predicate-Object</p></def></def-item><def-item><term id="abb8">UMLS</term><def><p>Unified Medical Language System</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bray</surname><given-names>F</given-names></name><name name-style="western"><surname>Ferlay</surname><given-names>J</given-names></name><name name-style="western"><surname>Soerjomataram</surname><given-names>I</given-names></name><name name-style="western"><surname>Siegel</surname><given-names>RL</given-names></name><name name-style="western"><surname>Torre</surname><given-names>LA</given-names></name><name name-style="western"><surname>Jemal</surname><given-names>A</given-names></name></person-group><article-title>Global cancer statistics 2018: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title><source>CA Cancer J Clin</source><year>2018</year><month>11</month><volume>68</volume><issue>6</issue><fpage>394</fpage><lpage>424</lpage><pub-id pub-id-type="doi">10.3322/caac.21492</pub-id><pub-id pub-id-type="medline">30207593</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xiao</surname><given-names>Y</given-names></name><name name-style="western"><surname>Xia</surname><given-names>J</given-names></name><name name-style="western"><surname>Li</surname><given-names>L</given-names></name><etal/></person-group><article-title>Associations between dietary patterns and the risk of breast cancer: a systematic review and meta-analysis of observational studies</article-title><source>Breast Cancer Res</source><year>2019</year><month>01</month><day>29</day><volume>21</volume><issue>1</issue><fpage>16</fpage><pub-id pub-id-type="doi">10.1186/s13058-019-1096-1</pub-id><pub-id pub-id-type="medline">30696460</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sung</surname><given-names>H</given-names></name><name name-style="western"><surname>Ferlay</surname><given-names>J</given-names></name><name name-style="western"><surname>Siegel</surname><given-names>RL</given-names></name><etal/></person-group><article-title>Global cancer statistics 2020: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title><source>CA Cancer J Clin</source><year>2021</year><month>05</month><volume>71</volume><issue>3</issue><fpage>209</fpage><lpage>249</lpage><pub-id pub-id-type="doi">10.3322/caac.21660</pub-id><pub-id pub-id-type="medline">33538338</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thakur</surname><given-names>P</given-names></name><name name-style="western"><surname>Seam</surname><given-names>RK</given-names></name><name name-style="western"><surname>Gupta</surname><given-names>MK</given-names></name><name name-style="western"><surname>Gupta</surname><given-names>M</given-names></name><name name-style="western"><surname>Sharma</surname><given-names>M</given-names></name><name name-style="western"><surname>Fotedar</surname><given-names>V</given-names></name></person-group><article-title>Breast cancer risk factor evaluation in a Western Himalayan state: a case-control study and comparison with the Western World</article-title><source>South Asian J Cancer</source><year>2017</year><volume>6</volume><issue>3</issue><fpage>106</fpage><lpage>109</lpage><pub-id pub-id-type="doi">10.4103/sajc.sajc_157_16</pub-id><pub-id pub-id-type="medline">28975116</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Badr</surname><given-names>LK</given-names></name><name name-style="western"><surname>Bourdeanu</surname><given-names>L</given-names></name><name name-style="western"><surname>Alatrash</surname><given-names>M</given-names></name><name name-style="western"><surname>Bekarian</surname><given-names>G</given-names></name></person-group><article-title>Breast cancer risk factors: a cross- cultural comparison between the west and the east</article-title><source>Asian Pac J Cancer Prev</source><year>2018</year><month>08</month><day>24</day><volume>19</volume><issue>8</issue><fpage>2109</fpage><lpage>2116</lpage><pub-id pub-id-type="doi">10.22034/APJCP.2018.19.8.2109</pub-id><pub-id pub-id-type="medline">30139209</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>X</given-names></name><name name-style="western"><surname>Dong</surname><given-names>XP</given-names></name><name name-style="western"><surname>Guan</surname><given-names>YZ</given-names></name><name name-style="western"><surname>Me</surname><given-names>R</given-names></name><name name-style="western"><surname>Guo</surname><given-names>DL</given-names></name><name name-style="western"><surname>He</surname><given-names>YT</given-names></name><etal/></person-group><article-title>Research progress on epidemiological trend and risk factors of female breast cancer</article-title><source>Cancer Res Prev Treat</source><year>2021</year><volume>48</volume><issue>1</issue><fpage>87</fpage><lpage>92</lpage></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tan</surname><given-names>MM</given-names></name><name name-style="western"><surname>Ho</surname><given-names>WK</given-names></name><name name-style="western"><surname>Yoon</surname><given-names>SY</given-names></name><etal/></person-group><article-title>A case-control study of breast cancer risk factors in 7,663 women in Malaysia</article-title><source>PLoS One</source><year>2018</year><volume>13</volume><issue>9</issue><fpage>e0203469</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0203469</pub-id><pub-id pub-id-type="medline">30216346</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Britt</surname><given-names>KL</given-names></name><name name-style="western"><surname>Cuzick</surname><given-names>J</given-names></name><name name-style="western"><surname>Phillips</surname><given-names>KA</given-names></name></person-group><article-title>Key steps for effective breast cancer prevention</article-title><source>Nat Rev Cancer</source><year>2020</year><month>08</month><volume>20</volume><issue>8</issue><fpage>417</fpage><lpage>436</lpage><pub-id pub-id-type="doi">10.1038/s41568-020-0266-x</pub-id><pub-id pub-id-type="medline">32528185</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Fiszman</surname><given-names>M</given-names></name><name name-style="western"><surname>Rindflesch</surname><given-names>TC</given-names></name><name name-style="western"><surname>Kilicoglu</surname><given-names>H</given-names></name></person-group><article-title>Abstraction summarization for managing the biomedical research literature</article-title><source>Proceedings of the Computational Lexical Semantics Workshop at HLT-NAACL 2004</source><year>2004</year><publisher-name>Association for Computational Linguistics</publisher-name><fpage>76</fpage><lpage>83</lpage><pub-id pub-id-type="doi">10.5555/1596431.1596442</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="web"><article-title>For the progress of adjuvant treatment of triple-negative breast cancer, just look at these 8 key clinical studies! [Article in Chinese]</article-title><source>Sohu</source><year>2021</year><month>12</month><day>14</day><access-date>2023-06-25</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.sohu.com/a/508222106_121118854">https://www.sohu.com/a/508222106_121118854</ext-link></comment></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Feng</surname><given-names>B</given-names></name><name name-style="western"><surname>Gao</surname><given-names>J</given-names></name></person-group><article-title>AnthraxKP: a knowledge graph-based, anthrax knowledge portal mined from biomedical literature</article-title><source>Database (Oxford)</source><year>2022</year><month>06</month><day>2</day><volume>2022</volume><fpage>baac037</fpage><pub-id pub-id-type="doi">10.1093/database/baac037</pub-id><pub-id pub-id-type="medline">35653350</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Feng</surname><given-names>F</given-names></name><name name-style="western"><surname>Tang</surname><given-names>F</given-names></name><name name-style="western"><surname>Gao</surname><given-names>Y</given-names></name><etal/></person-group><article-title>GenomicKB: a knowledge graph for the human genome</article-title><source>Nucleic Acids Res</source><year>2023</year><month>01</month><day>6</day><volume>51</volume><issue>D1</issue><fpage>D950</fpage><lpage>D956</lpage><pub-id pub-id-type="doi">10.1093/nar/gkac957</pub-id><pub-id pub-id-type="medline">36318240</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>James</surname><given-names>T</given-names></name><name name-style="western"><surname>Hennig</surname><given-names>H</given-names></name></person-group><article-title>Knowledge graphs and their applications in drug discovery</article-title><source>Methods Mol Biol</source><year>2024</year><volume>2716</volume><fpage>203</fpage><lpage>221</lpage><pub-id pub-id-type="doi">10.1007/978-1-0716-3449-3_9</pub-id><pub-id pub-id-type="medline">37702941</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lyu</surname><given-names>K</given-names></name><name name-style="western"><surname>Tian</surname><given-names>Y</given-names></name><name name-style="western"><surname>Shang</surname><given-names>Y</given-names></name><etal/></person-group><article-title>Causal knowledge graph construction and evaluation for clinical decision support of diabetic nephropathy</article-title><source>J Biomed Inform</source><year>2023</year><month>03</month><volume>139</volume><fpage>104298</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2023.104298</pub-id><pub-id pub-id-type="medline">36731730</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pi&#x00F1;ero</surname><given-names>J</given-names></name><name name-style="western"><surname>Bravo</surname><given-names>&#x00C0;</given-names></name><name name-style="western"><surname>Queralt-Rosinach</surname><given-names>N</given-names></name><etal/></person-group><article-title>DisGeNET: a comprehensive platform integrating information on human disease-associated genes and variants</article-title><source>Nucleic Acids Res</source><year>2017</year><month>01</month><day>4</day><volume>45</volume><issue>D1</issue><fpage>D833</fpage><lpage>D839</lpage><pub-id pub-id-type="doi">10.1093/nar/gkw943</pub-id><pub-id pub-id-type="medline">27924018</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wishart</surname><given-names>DS</given-names></name><name name-style="western"><surname>Feunang</surname><given-names>YD</given-names></name><name name-style="western"><surname>Guo</surname><given-names>AC</given-names></name><etal/></person-group><article-title>DrugBank 5.0: a major update to the DrugBank database for 2018</article-title><source>Nucleic Acids Res</source><year>2018</year><month>01</month><day>4</day><volume>46</volume><issue>D1</issue><fpage>D1074</fpage><lpage>D1082</lpage><pub-id pub-id-type="doi">10.1093/nar/gkx1037</pub-id><pub-id pub-id-type="medline">29126136</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="web"><article-title>ClinVar</article-title><source>National Library of Medicine</source><access-date>2023-11-18</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/clinvar">https://www.ncbi.nlm.nih.gov/clinvar</ext-link></comment></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>X</given-names></name><name name-style="western"><surname>Sun</surname><given-names>S</given-names></name><name name-style="western"><surname>Tang</surname><given-names>T</given-names></name><etal/></person-group><article-title>Construction of a knowledge graph for breast cancer diagnosis based on Chinese electronic medical records: development and usability study</article-title><source>BMC Med Inform Decis Mak</source><year>2023</year><month>10</month><day>10</day><volume>23</volume><issue>1</issue><fpage>210</fpage><pub-id pub-id-type="doi">10.1186/s12911-023-02322-0</pub-id><pub-id pub-id-type="medline">37817193</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>An</surname><given-names>B</given-names></name></person-group><article-title>Construction and application of Chinese breast cancer knowledge graph based on multi-source heterogeneous data</article-title><source>Math Biosci Eng</source><year>2023</year><month>02</month><day>6</day><volume>20</volume><issue>4</issue><fpage>6776</fpage><lpage>6799</lpage><pub-id pub-id-type="doi">10.3934/mbe.2023292</pub-id><pub-id pub-id-type="medline">37161128</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>XY</given-names></name><name name-style="western"><surname>Li</surname><given-names>JL</given-names></name><name name-style="western"><surname>Li</surname><given-names>ZY</given-names></name></person-group><article-title>Integrated medical language system and its application in knowledge discovery</article-title><source>Digital Library Forum</source><year>2019</year><volume>9</volume><fpage>24</fpage><lpage>29</lpage></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kilicoglu</surname><given-names>H</given-names></name><name name-style="western"><surname>Rosemblat</surname><given-names>G</given-names></name><name name-style="western"><surname>Fiszman</surname><given-names>M</given-names></name><name name-style="western"><surname>Shin</surname><given-names>D</given-names></name></person-group><article-title>Broad-coverage biomedical relation extraction with SemRep</article-title><source>BMC Bioinformatics</source><year>2020</year><month>05</month><day>14</day><volume>21</volume><issue>1</issue><fpage>188</fpage><pub-id pub-id-type="doi">10.1186/s12859-020-3517-7</pub-id><pub-id pub-id-type="medline">32410573</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="web"><article-title>Access to SemRep/SemMedDB/SKR resources</article-title><source>National Library of Medicine</source><access-date>2023-11-18</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://lhncbc.nlm.nih.gov/ii/tools/SemRep_SemMedDB_SKR.html">https://lhncbc.nlm.nih.gov/ii/tools/SemRep_SemMedDB_SKR.html</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>McKinney</surname><given-names>W</given-names></name></person-group><article-title>Pandas: a foundational Python library for data analysis and statistics</article-title><source>Python for High Performance and Scientific Computing</source><year>2010</year><publisher-name>Deutsches Zentrum f&#x00FC;r Luft-und Raumfahrt</publisher-name><fpage>293</fpage><lpage>296</lpage></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hunter</surname><given-names>JD</given-names></name></person-group><article-title>Matplotlib: a 2D graphics environment</article-title><source>Comput Sci Eng</source><year>2007</year><volume>9</volume><issue>3</issue><fpage>90</fpage><lpage>95</lpage><pub-id pub-id-type="doi">10.1109/MCSE.2007.55</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><article-title>WordCloud for Python documentation</article-title><source>Andreas C. M&#x00FC;ller - Machine Learning Scientist</source><access-date>2023-12-25</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://amueller.github.io/word_cloud/">https://amueller.github.io/word_cloud/</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Hagberg</surname><given-names>A</given-names></name><name name-style="western"><surname>Swart</surname><given-names>PJ</given-names></name><name name-style="western"><surname>Schult</surname><given-names>DA</given-names></name></person-group><source>Exploring Network Structure, Dynamics, and Function Using NetworkX</source><year>2008</year><publisher-name>Los Alamos National Lab (LANL)</publisher-name></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><article-title>The difference between breast cancer radiotherapy, targeted therapy and chemotherapy! [Article in Chinese]</article-title><source>Sohu</source><year>2018</year><month>12</month><day>7</day><access-date>2023-11-18</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.sohu.com/a/280208482_790163">https://www.sohu.com/a/280208482_790163</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nagini</surname><given-names>S</given-names></name></person-group><article-title>Breast cancer: current molecular therapeutic targets and new players</article-title><source>Anticancer Agents Med Chem</source><year>2017</year><volume>17</volume><issue>2</issue><fpage>152</fpage><lpage>163</lpage><pub-id pub-id-type="doi">10.2174/1871520616666160502122724</pub-id><pub-id pub-id-type="medline">27137076</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="web"><article-title>Trastuzumab</article-title><source>Cancer Research UK</source><access-date>2023-11-18</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancerresearchuk.org/about-cancer/treatment/drugs/trastuzumab">https://www.cancerresearchuk.org/about-cancer/treatment/drugs/trastuzumab</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bernas</surname><given-names>M</given-names></name><name name-style="western"><surname>Thiadens</surname><given-names>SRJ</given-names></name><name name-style="western"><surname>Smoot</surname><given-names>B</given-names></name><name name-style="western"><surname>Armer</surname><given-names>JM</given-names></name><name name-style="western"><surname>Stewart</surname><given-names>P</given-names></name><name name-style="western"><surname>Granzow</surname><given-names>J</given-names></name></person-group><article-title>Lymphedema following cancer therapy: overview and options</article-title><source>Clin Exp Metastasis</source><year>2018</year><month>08</month><volume>35</volume><issue>5-6</issue><fpage>547</fpage><lpage>551</lpage><pub-id pub-id-type="doi">10.1007/s10585-018-9899-5</pub-id><pub-id pub-id-type="medline">29774452</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Table depicting the semantic relationship and semantic schema of breast cancer.</p><media xlink:href="medinform_v12i1e52210_app1.docx" xlink:title="DOCX File, 19 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Different subtypes and stages of breast cancer.</p><media xlink:href="medinform_v12i1e52210_app2.png" xlink:title="PNG File, 158 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Treatments of breast cancer.</p><media xlink:href="medinform_v12i1e52210_app3.png" xlink:title="PNG File, 214 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Drugs for breast cancer.</p><media xlink:href="medinform_v12i1e52210_app4.png" xlink:title="PNG File, 160 KB"/></supplementary-material></app-group></back></article>