<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e67513</article-id><article-id pub-id-type="doi">10.2196/67513</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Predicting Drug&#x2013;Side Effect Relationships From Parametric Knowledge Embedded in Biomedical BERT Models: Methodological Study With a Natural Language Processing Approach</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Jeon</surname><given-names>Woohyuk</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Park</surname><given-names>Minjae</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>An</surname><given-names>Doyeon</given-names></name><degrees>MPH</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nam</surname><given-names>Wonshik</given-names></name><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Shin</surname><given-names>Ju-Young</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Seunghee</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Lee</surname><given-names>Suehyun</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Computer Engineering, College of IT Convergence, Gachon University</institution><addr-line>AI&#x00B7;Engineering Building, 317A, 1342 Seongnam-daero, Sujeong-gu, Seongnam-si, Gyeonggi-do</addr-line><addr-line>Seongnam</addr-line><country>Republic of Korea</country></aff><aff id="aff2"><institution>School of Pharmacy, Sungkyunkwan University</institution><addr-line>Suwon</addr-line><country>Republic of Korea</country></aff><aff id="aff3"><institution>Konyang Medical Data Research Group, Konyang University Hospital</institution><addr-line>Daejeon</addr-line><country>Republic of Korea</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Simona</surname><given-names>Aurelien</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Yoon</surname><given-names>Dukyong</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Zaghir</surname><given-names>Jamil</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Mohd Yusoh</surname><given-names>Zeratul Izzah</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Suehyun Lee, PhD, Department of Computer Engineering, College of IT Convergence, Gachon University, AI&#x00B7;Engineering Building, 317A, 1342 Seongnam-daero, Sujeong-gu, Seongnam-si, Gyeonggi-do, Seongnam, 13120, Republic of Korea, 82 010-9012-9364, 82 031-750-5333; <email>leesh@gachon.ac.kr</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>10</day><month>7</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e67513</elocation-id><history><date date-type="received"><day>14</day><month>10</month><year>2024</year></date><date date-type="rev-recd"><day>13</day><month>05</month><year>2025</year></date><date date-type="accepted"><day>13</day><month>05</month><year>2025</year></date></history><copyright-statement>&#x00A9; Woohyuk Jeon, Minjae Park, Doyeon An, Wonshik Nam, Ju-Young Shin, Seunghee Lee, Suehyun Lee. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 10.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e67513"/><abstract><sec><title>Background</title><p>Adverse drug reactions (ADRs) pose serious risks to patient health, and effectively predicting and managing them is an important public health challenge. Given the complexity and specificity of biomedical text data, the traditional context-independent word embedding model, Word2Vec, has limitations in fully reflecting the domain specificity of such data. Although Bidirectional Encoder Representations from Transformers (BERT)&#x2013;based models pretrained on biomedical corpora have demonstrated high performance in ADR-related studies, research using these models to predict previously unknown drug&#x2013;side effect relationships remains insufficient.</p></sec><sec><title>Objective</title><p>This study proposes a method for predicting drug&#x2013;side effect relationships by leveraging the parametric knowledge embedded in biomedical BERT models. Through this approach, we predict promising candidates for potential drug&#x2013;side effect relationships with unknown causal mechanisms by leveraging parametric knowledge from biomedical BERT models and embedding vector similarities of known relationships.</p></sec><sec sec-type="methods"><title>Methods</title><p>We used 158,096 pairs of drug&#x2013;side effect relationships from the side effect resource (SIDER) database to generate an adjacency matrix and calculate the cosine similarity between word embedding vectors of drugs and side effects. Relation scores were calculated for 8,235,435 drug&#x2013;side effect pairs using this similarity. To evaluate the prediction accuracy of drug-side effect relationships, the area under the curve (AUC) value was measured using the calculated relation score and 158,096 known drug&#x2013;side effect relationships from SIDER.</p></sec><sec sec-type="results"><title>Results</title><p>The clagator/biobert_v1.1 model achieved an AUC of 0.915 at an optimal threshold of 0.289, outperforming the existing Word2Vec model with an AUC of 0.848. The BERT-based models pretrained on the biomedical corpus outperformed the vanilla BERT model with an AUC of 0.857. External validation with the FDA (Food and Drug Administration) Adverse Event Reporting System data, using Fisher exact test based on 8,235,435 predicted drug&#x2013;side effect pairs and 901,361 known relationships, confirmed high statistical significance (<italic>P</italic>&#x003C;.001) with an odds ratio of 4.822. In addition, a literature review of predicted drug&#x2013;side effect relationships not confirmed in the SIDER database revealed that these relationships have been reported in recent studies published after 2016.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study introduces a method for extracting drug&#x2013;side effect relationships embedded in parameters of language models pretrained on biomedical corpora and using this information to predict previously unknown drug&#x2013;side effect relationships. We found that BERT-based models pretrained with biomedical corpora consider contextual information and achieve better performance in drug&#x2013;side effect relationship prediction. External validation using the FDA Adverse Event Reporting System dataset and the literature review of certain cases confirmed high statistical significance, demonstrating practical applicability. These results highlight the utility of natural language processing&#x2013;based approaches for predicting and managing ADR.</p></sec></abstract><kwd-group><kwd>adverse drug reaction</kwd><kwd>ADR prediction</kwd><kwd>NLP</kwd><kwd>BERT</kwd><kwd>word embedding</kwd><kwd>drug-side effect relationship</kwd><kwd>Bidirectional Encoder Representations from Transformers</kwd><kwd>natural language processing</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>An adverse drug reaction (ADR) is a harmful, unintended reaction that occurs despite the proper use of medication [<xref ref-type="bibr" rid="ref1">1</xref>]. In addition to causing serious health problems, ADRs are known to be one of the leading causes of prolonged patient hospitalization and increased health care spending [<xref ref-type="bibr" rid="ref2">2</xref>]. Approximately 2 million cases of serious ADRs are reported annually in the United States, resulting in 100,000 deaths [<xref ref-type="bibr" rid="ref3">3</xref>]. Therefore, early prediction and prevention of ADRs during drug development is a critical challenge for patient safety and public health.</p><p>Traditionally, ADR prediction has been based on approaches that analyze the chemical structure, mechanism of action, and pharmacokinetic properties of drugs [<xref ref-type="bibr" rid="ref4">4</xref>]. Subsequently, ADR prediction methodologies using machine learning techniques have been developed [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>], and with advances in natural language processing (NLP) techniques, attempts have been made to automatically extract and predict drug-side effect relationships from vast amounts of biomedical literature data [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. The prediction of ADRs using these techniques is accelerating, especially with the advent of word embedding methods such as Word2Vec [<xref ref-type="bibr" rid="ref11">11</xref>], which can effectively vectorize semantic information embedded in textual data [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>].</p><p>However, biomedical text data are characterized by a much more specialized and complex set of terms and concepts compared with the general literature, and the interactions between them are also highly diverse and dynamic [<xref ref-type="bibr" rid="ref14">14</xref>]. In fact, it has been pointed out that traditional word embedding models such as Word2Vec, which do not consider contextual information, do not sufficiently represent the relationships between complex biomedical concepts [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Therefore, models that do not adequately reflect domain specificity are limited in their ability to accurately capture drug&#x2013;side effect relationships.</p><p>One solution to this problem is to use language models based on Bidirectional Encoder Representations from Transformers (BERT) [<xref ref-type="bibr" rid="ref17">17</xref>] to perform word embedding. BERT is a language model based on the transformer [<xref ref-type="bibr" rid="ref18">18</xref>] architecture, which has recently gained attention; unlike traditional one-way language models, it has richer language expressiveness by learning context in both directions. In addition, because we trained on large corpora, domain-specific pretrained models using large biomedical corpora can fully reflect the domain specificity of the biomedical text data [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. Recently, several BERT-based models have been proposed that use large biomedical corpora such as PubMed and PMC for domain-specific pretraining. Examples include BioBERT [<xref ref-type="bibr" rid="ref19">19</xref>], BioMedBERT [<xref ref-type="bibr" rid="ref20">20</xref>], and PharmBERT [<xref ref-type="bibr" rid="ref23">23</xref>], which have demonstrated high performances in various bio-NLP tasks.</p><p>BERT models and BERT-based models pretrained on biomedical corpora have demonstrated high performance in ADR-related studies [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]. However, there is a lack of research leveraging these models to predict previously unknown drug&#x2013;side effect relationships. Therefore, this study aims to use a biomedical domain-specific BERT language model based on the ADR prediction relation score methodology proposed by Seungsoo et al [<xref ref-type="bibr" rid="ref12">12</xref>]. Specifically, we calculate the similarity between embedding vectors of known drug&#x2013;side effect relationships and derive promising candidates for potential relationships. In other words, our objective is to efficiently identify drug&#x2013;side effect relationships whose causal associations have not yet been clearly established, by computing relation scores from biomedical language model embeddings grounded in known relationships. Furthermore, we examine whether replacing the Word2Vec model with a BERT-based model leads to an actual improvement in ADR prediction accuracy, thus validating the performance advantages of context-dependent language models.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>System Overview</title><p><xref ref-type="fig" rid="figure1">Figure 1</xref> presents an overview of this study and illustrates the overall research flow from data collection to validation.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>System overview: the predictive performance of drug-side effect relationships was evaluated using area under the curve. AUC: area under the curve; BERT: Bidirectional Encoder Representations from Transformers; FAERS: FDA (Food and Drug Administration) Adverse Event Reporting System; SIDER: side effect resource.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67513_fig01.png"/></fig><p>First, we collected and refined the data for this study from the Side Effect Resource (SIDER), PubMed, and the FDA Adverse Event Reporting System (FAERS). From the abstract sentences collected from PubMed, we selectively extracted only sentences containing drugs and side effects mentioned in SIDER. These extracted sentences were used to train a Word2Vec model, from which embedding vectors for drugs and side effects were derived. Based on the drug&#x2013;side effect relationships in SIDER, BERT-based models were used to derive the embedding vectors of drugs and side effects.</p><p>Based on the derived embedding vectors, the cosine similarity between the drug and side effect pairs was calculated. Using the cosine similarity of drug and side effect pairs and existing known drug&#x2013;side effect relationships, a relation score was calculated for all drug&#x2013;side effect combinations. Drug&#x2013;side effect combinations with high relation scores were predicted to have a higher likelihood of being actually related [<xref ref-type="bibr" rid="ref12">12</xref>]. To evaluate the accuracy of these predictions, we calculated area under the curve (AUC) values and compared the results of the Word2Vec model pipeline with those of the BERT-based model pipeline. Statistical significance between predicted results and FAERS was assessed using the Fisher exact test.</p></sec><sec id="s2-2"><title>Data Collection and Preprocessing</title><p>SIDER is a database that provides information on marketed drugs and their side effects [<xref ref-type="bibr" rid="ref28">28</xref>]. The drug names recorded in SIDER followed those approved by the Food and Drug Administration (FDA), and side effect names used the Medical Dictionary for Regulatory Activities (MedDRA) terminology [<xref ref-type="bibr" rid="ref29">29</xref>]. To minimize data leakage that could occur from the same drug being listed under different names, we collected and integrated synonyms for each drug using PubChem compound identifiers provided by SIDER. We also ensured terminology standardization through MedDRA-based side effect names.</p><p>Using version 4.1 of SIDER, we collected 158,096 unique pairs of drug&#x2013;side effect relationships after removing duplicates. To use these 158,096 pairs as input values in the BERT-based models and for relation score calculations, we derived an adjacency matrix with drugs as rows and side effects as columns (<xref ref-type="fig" rid="figure2">Figure 2</xref>). In addition, 1345 drug names and 6123 side effect terms that appeared in the collected drug&#x2013;side effect relationships were extracted and used as dictionaries for drugs and side effects.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Drug-SE adjacency matrix. This is a method to derive an adjacency matrix using drug&#x2013;side effect relationships in the side effect resource. The relation R has the value of 1 if the drug&#x2013;side effect relationship exists and 0 if it does not. SE: side effect.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67513_fig02.png"/></fig><p>We collected biomedical literature from PubMed, a biological literature database [<xref ref-type="bibr" rid="ref30">30</xref>]. A total of 42,515,246 paper abstracts updated on December 8, 2022, were collected, and for training the Word2Vec model, only sentences in which the drugs and side effects mentioned in SIDER were mentioned at least once were extracted [<xref ref-type="bibr" rid="ref12">12</xref>]. There were 14,289,160 sentences in which a drug was mentioned at least once and 32,107,327 sentences in which a side effect was mentioned at least once.</p></sec><sec id="s2-3"><title>Calculating Cosine Similarity</title><p>For the 1345 drugs and 6123 side effects recorded in the adjacency matrix, we performed word embedding using BERT-based models and calculated the cosine similarity for all drug and side effect vector pairs. In this case, the cosine similarity is calculated using equation (1).</p><disp-formula id="equWL1"><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>S</mml:mi><mml:mi>i</mml:mi><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>A</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>B</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo symmetric="true">&#x2016;</mml:mo><mml:mi>A</mml:mi><mml:mo symmetric="true">&#x2016;</mml:mo></mml:mrow><mml:mrow><mml:mo symmetric="true">&#x2016;</mml:mo><mml:mi>B</mml:mi><mml:mo symmetric="true">&#x2016;</mml:mo></mml:mrow></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x00D7;</mml:mo><mml:msub><mml:mi>B</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msqrt><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:msqrt><mml:mo>&#x00D7;</mml:mo><mml:msqrt><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>B</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:msqrt></mml:mrow></mml:mfrac><mml:mrow><mml:mo>(</mml:mo><mml:mn>1</mml:mn><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>This process yielded 1,809,025 drug vector pairwise similarities and 37,491,129 side effect vector pairwise similarities.</p></sec><sec id="s2-4"><title>Calculating Relation Score</title><p><xref ref-type="fig" rid="figure3">Figure 3</xref> shows the process of calculating the relation score. For all drug&#x2013;side effect pairs embedded as vectors, the cosine similarity values obtained in the previous step were used to calculate the drug&#x2013;side effect&#x2019;s relation score. The process of calculating the relation score between a specific <italic>Drug</italic><sub><italic>&#x03B1;</italic></sub> and a specific side effect <italic>SE</italic><sub><italic>&#x03B2;</italic></sub> was done using equations (2) to (6).</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Illustration of the computation of the relation score between a specific <italic>Drug</italic><sub><italic>&#x03B1;</italic></sub> and a specific side effect <italic>SE</italic><sub><italic>&#x03B2;</italic></sub>.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67513_fig03.png"/></fig><disp-formula id="E2"><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mo>&#x223C;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>D</mml:mi><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>D</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2228;</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>D</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>D</mml:mi><mml:mi>r</mml:mi><mml:mi>u</mml:mi><mml:mi>g</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:mo fence="false" stretchy="false">}</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mn>2</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E3"><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>D</mml:mi><mml:mi>r</mml:mi><mml:mi>u</mml:mi><mml:mi>g</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:msub><mml:mi>D</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2228;</mml:mo><mml:mi>A</mml:mi><mml:mi>d</mml:mi><mml:mi>j</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>y</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>S</mml:mi><mml:msub><mml:mi>E</mml:mi><mml:mrow><mml:mi>&#x03B2;</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>D</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mtext>&#x00A0;</mml:mtext><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mn>1345</mml:mn><mml:mo fence="false" stretchy="false">}</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mn>3</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E4"><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mo>&#x223C;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>S</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>E</mml:mi><mml:mrow><mml:mi>&#x03B2;</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>S</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>E</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mo>&#x2228;</mml:mo><mml:mi>S</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>E</mml:mi><mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>S</mml:mi><mml:mi>E</mml:mi></mml:mrow></mml:msub><mml:mo fence="false" stretchy="false">}</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mn>4</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E5"><mml:math id="eqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>S</mml:mi><mml:mi>E</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mi>S</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>E</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2228;</mml:mo><mml:mi>A</mml:mi><mml:mi>d</mml:mi><mml:mi>j</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>y</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>D</mml:mi><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>S</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>E</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mtext>&#x00A0;</mml:mtext><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mn>6123</mml:mn><mml:mo fence="false" stretchy="false">}</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mn>5</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="E6"><mml:math id="eqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>s</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>&#x00D7;</mml:mo><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mn>6</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Similarity <italic>sim<sub>x</sub></italic> takes the maximum of the similarity values of <italic>Drug</italic><sub><italic>&#x03B1;</italic></sub> with Drug <italic>D</italic><sub><italic>i</italic></sub> in <italic>Related</italic><sub><italic>Drugs</italic></sub>, the set of drugs known to be associated with side effect <italic>SE</italic><sub><italic>&#x03B2;</italic></sub>, using equation (2). The set <italic>Related</italic><sub><italic>Drugs</italic></sub> is obtained using equation (3), and by referring to the values in the adjacency matrix consisting of 1345 drugs and 6123 side effects, we construct the set of drugs associated with that side effect by including in the set <italic>Related</italic><sub><italic>Drugs</italic></sub> those drugs that have side effect SE, and a value of 1 in the adjacency matrix, out of a total of 1345 drugs. In other words, the highest similarity value to drugs known to be associated with side effect SE is called similarity <italic>sim</italic><sub><italic>x</italic></sub>.</p><p><xref ref-type="fig" rid="figure4">Figure 4</xref> shows the process of obtaining similarity <italic>sim</italic><sub><italic>x</italic></sub> using equation (2) and equation (3). Based on the values in the adjacency matrix, the computational process was to maximize the similarity of only those drugs that were related to the side effect SE out of the total 1345 drugs. If <italic>Drug</italic><sub><italic>&#x03B1;</italic></sub> is in the <italic>Related</italic><sub><italic>Drugs</italic></sub> set, exclude <italic>Drug</italic><sub><italic>&#x03B1;</italic></sub> from the similarity calculation.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Process of calculating similarity <italic>sim</italic><sub><italic>x</italic></sub>. SE: side effect.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67513_fig04.png"/></fig><p>Similarity <italic>sim</italic><sub><italic>y</italic></sub> uses equation (4) to take the maximum of the similarity values of side effects <italic>SE</italic><sub><italic>&#x03B2;</italic></sub> and <italic>SE</italic><sub><italic>i</italic></sub> in <italic>Related</italic><sub><italic>SE</italic></sub>, a set of side effects known to be related to the <italic>Drug</italic><sub><italic>&#x03B1;</italic></sub>. The set <italic>Related</italic><sub><italic>SE</italic></sub> is obtained using equation (5), and by referring to the values of the adjacency matrix mentioned above, we construct the set of side effects associated with the <italic>Drug</italic><sub><italic>&#x03B1;</italic></sub> out of the total 6123 side effects by including in the set <italic>Related</italic><sub><italic>SE</italic></sub> the drugs that have a value of 1 in the adjacency matrix with the <italic>Drug</italic><sub><italic>&#x03B1;</italic></sub>. In other words, the highest similarity value to the side effects that are known to be related to the drug is called the similarity <italic>sim</italic><sub><italic>y</italic></sub>.</p><p><xref ref-type="fig" rid="figure5">Figure 5</xref> shows the process of obtaining similarity <italic>sim</italic><sub><italic>y</italic></sub> using equation (4) and equation (5). Based on the values in the adjacency matrix, the computational process is to extract the similarity of only the side effects that are related to the <italic>Drug</italic><sub><italic>&#x03B1;</italic></sub>, out of the total 6123 side effects, and take the maximum value. If an <italic>SE</italic><sub><italic>&#x03B2;</italic></sub> belongs to the <italic>Related</italic><sub><italic>SE</italic></sub> set, exclude the <italic>SE</italic><sub><italic>&#x03B2;</italic></sub> from the similarity calculation.</p><p>Finally, the relation score between Drug and SE was obtained by multiplying <italic>sim</italic><sub><italic>x</italic></sub> and <italic>sim</italic><sub><italic>y</italic></sub> as shown in equation (6).</p><p>We applied the above calculation method to 1345 drugs and 6123 side effects to calculate the relation scores for all drug&#x2013;side effect pairs, resulting in a total of 8,235,435 drug&#x2013;side effect pairs. <xref ref-type="fig" rid="figure6">Figure 6</xref> illustrates the heatmap of the calculated relation scores.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Process of calculating similarity <italic>sim</italic><sub><italic>y</italic></sub>. SE: side effect.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67513_fig05.png"/></fig><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Heatmap of calculated relation score for 8,235,435 drug&#x2013;side effect pairs.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67513_fig06.png"/></fig></sec><sec id="s2-5"><title>Measuring AUC</title><p>In this study, AUC values were measured using 158,096 known drug&#x2013;side effect relationships provided by SIDER to evaluate the accuracy of predicting relationships based on scores calculated for a total of 8,235,435 drug&#x2013;side effect pairs. Of the 8,235,435 calculated drug&#x2013;side effect pairs, we assigned a class value of true to pairs that belonged to known drug&#x2013;side effect relationships in SIDER and false to pairs that did not. All drug&#x2013;side effect pairs were sorted by score, and a single receiver operating characteristic curve was calculated. The generated receiver operating characteristic curves and AUC values were used to establish the optimal threshold for predicting whether a drug&#x2013;side effect pair had a true relationship. If the drug&#x2013;side effect relation score exceeds this diagnostic threshold, it is predicted that there is a relationship between the drug and the side effect [<xref ref-type="bibr" rid="ref12">12</xref>].</p></sec><sec id="s2-6"><title>Ethical Considerations</title><p>This study uses the publicly accessible and anonymized SIDER and FAERS databases, which contain no personally identifiable information and do not involve human participant experimentation. Therefore, institutional review board approval is not required for this study.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Performance Comparison</title><p>The AUCs of the BERT-based models and the Word2Vec model using the proposed method are compared in <xref ref-type="table" rid="table1">Table 1</xref> and <xref ref-type="fig" rid="figure7">Figure 7</xref>. The optimal threshold for prediction was set as the point at which the sum of the sensitivity and specificity was maximized. The models used in this study included clagator/biobert_v1.1 [<xref ref-type="bibr" rid="ref31">31</xref>], BiomedBERT [<xref ref-type="bibr" rid="ref20">20</xref>], dmis-lab/biobert_v1.1 [<xref ref-type="bibr" rid="ref19">19</xref>], PharmBERT-uncased [<xref ref-type="bibr" rid="ref23">23</xref>], bert-base-uncased [<xref ref-type="bibr" rid="ref17">17</xref>], and Word2Vec [<xref ref-type="bibr" rid="ref11">11</xref>]. dmis-lab/biobert-v1.1 is the original BioBERT model pretrained on biomedical text, while clagator/biobert_v1.1 is a model based on it, which has been additionally fine-tuned on natural language inference and semantic textual similarity tasks to enhance its ability to recognize semantic relationships.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Performance comparison of Bidirectional Encoder Representations from Transformers (BERT)&#x2013;based models and Word2Vec model.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">AUC<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom">Optimal threshold</td><td align="left" valign="bottom">Sensitivity</td><td align="left" valign="bottom">Specificity</td></tr></thead><tbody><tr><td align="left" valign="top">clagator/biobert_v1.1</td><td align="left" valign="top">0.915<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="top">0.289</td><td align="left" valign="top">0.870</td><td align="left" valign="top">0.830</td></tr><tr><td align="left" valign="top">BiomedBERT<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top">0.907</td><td align="left" valign="top">0.925</td><td align="left" valign="top">0.857</td><td align="left" valign="top">0.821</td></tr><tr><td align="left" valign="top">dmis-lab/biobert_v1.1</td><td align="left" valign="top">0.901</td><td align="left" valign="top">0.780</td><td align="left" valign="top">0.851</td><td align="left" valign="top">0.814</td></tr><tr><td align="left" valign="top">PharmBERT-uncased</td><td align="left" valign="top">0.882</td><td align="left" valign="top">0.460</td><td align="left" valign="top">0.817</td><td align="left" valign="top">0.796</td></tr><tr><td align="left" valign="top">bert-base-uncased<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="top">0.857</td><td align="left" valign="top">0.617</td><td align="left" valign="top">0.769</td><td align="left" valign="top">0.793</td></tr><tr><td align="left" valign="top">Word2Vec</td><td align="left" valign="top">0.848<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup></td><td align="left" valign="top">0.112</td><td align="left" valign="top">0.762</td><td align="left" valign="top">0.780</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>AUC: area under the curve.</p></fn><fn id="table1fn2"><p><sup>b</sup>Highest value.</p></fn><fn id="table1fn3"><p><sup>c</sup>The old model was named PubMedBERT.</p></fn><fn id="table1fn4"><p><sup>d</sup>Vanilla BERT model.</p></fn><fn id="table1fn5"><p><sup>e</sup>Lowest value.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Receiver operating characteristic curves for Bidirectional Encoder Representations from Transformers (BERT)&#x2013;based models (left) and Word2Vec model (right). AUC: area under the curve.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67513_fig07.png"/></fig><p>The clagator/biobert _v1.1 model achieved the highest AUC value of 0.915 at an optimal threshold of 0.289. In contrast, the bert-base-uncased model, a vanilla BERT model pretrained on general corpora, showed an AUC of 0.857 at an optimal threshold of 0.617. In other words, BERT pretrained on the biomedical corpus outperformed vanilla BERT. In addition, the Word2Vec model recorded an AUC of 0.848, which was lower than those of the BERT-based models and was the lowest among all models used in this study.</p><p><xref ref-type="fig" rid="figure8">Figure 8</xref> (left) shows the performance comparison results to evaluate the effectiveness of the cosine similarity-based extraction approach. The comparison was conducted using the clagator/biobert_v1.1 model, which achieved the highest performance. Other vector similarity-based extraction methods used for comparison included Euclidean distance, Manhattan distance, Jaccard similarity, and dot product. The results demonstrate that the cosine similarity-based relation extraction method used in this study exhibited the highest performance with an AUC of 0.915, outperforming all other methods.</p><fig position="float" id="figure8"><label>Figure 8.</label><caption><p>Receiver operating characteristic curves for comparing vector similarity metrics (left) and evaluating the efficacy of our dual-similarity approach (right). AUC: area under the curve.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67513_fig08.png"/></fig><p><xref ref-type="fig" rid="figure8">Figure 8</xref> (right) evaluates the effectiveness of our dual-similarity approach against single-similarity methods. Using the same clagator/biobert_v1.1 model, we compared 3 extraction strategies: using only drug similarity information (Sim_X), using only side effect similarity information (Sim_Y), and our dual-similarity approach (relation score). Our dual-similarity approach significantly outperformed single-similarity methods with an AUC of 0.915 compared with 0.799 and 0.881, demonstrating the effectiveness of leveraging information from both similarity perspectives.</p></sec><sec id="s3-2"><title>Validation</title><p>To validate the similarity between drug&#x2013;side effect relationships predicted by our model based on SIDER data and relationships derived from FAERS data, we extracted drug&#x2013;side effect relationships from FAERS, a database not used in our Methods. FAERS is a database containing adverse event information for drugs submitted to the FDA [<xref ref-type="bibr" rid="ref32">32</xref>]. In this study, we used FAERS data from October 2012 to June 2023. By leveraging the list of 1345 drugs and 6123 side effects registered in SIDER to extract relationships from FAERS data, we obtained a total of 901,361 known relationships.</p><p>For validation, we used the results from clagator/biobert_v1.1 [<xref ref-type="bibr" rid="ref31">31</xref>], which performed best in our study. We constructed a contingency table using the predicted results based on relation scores from 8,235,435 drug&#x2013;side effect pairs, along with 901,361 known drug&#x2013;side effect relationships and unknown relationships extracted from FAERS. However, due to the data imbalance where unknown relationships outnumbered known relationships, we randomly sampled unknown relationships to match the number of known relationships. Based on this selected list of drug&#x2013;side effect pairs, we conducted the Fisher exact test between FAERS data and our study&#x2019;s predictions, repeating this process 2000 times and calculating the average of all results. The results showed a <italic>P</italic> value of <italic>P</italic>&#x003C;.001, confirming that the drug&#x2013;side effect relationships predicted in our study were statistically significant.</p><p>Furthermore, the odds ratio, calculated as the ratio of the odds of an event occurring to the odds of it not occurring, was 4.822. This means that the odds of our model predicting a relationship for known relationships in FAERS were 4.822 times higher than the odds of predicting a relationship for unknown relationships. In other words, relationships reported in FAERS were significantly more likely to be predicted as related by our model, demonstrating that our model&#x2019;s predictions are reliable when compared with external data.</p><p>To validate the utility of our model&#x2019;s predictions, we conducted case studies on drug&#x2013;side effect relationships not present in the SIDER database that were ranked within the top 1000 according to relation scores calculated by our model. For these candidates, we performed literature searches using Google Scholar and verified whether these relationships had been mentioned in case reports or research [<xref ref-type="bibr" rid="ref33">33</xref>]. In addition, following the input from an author specializing in pharmacoepidemiology, we excluded steroid-class drugs that are used despite their side effects from a clinical utility perspective, thereby enhancing the reliability of our ability to detect meaningful signals in actual clinical environments. <xref ref-type="table" rid="table2">Table 2</xref> illustrates the cases of lenalidomide-arthropathy, rosuvastatin-sleep disturbance, gadolinium-acute pulmonary edema, and cefazolin-hepatic failure. Upon conducting a literature review of the drug&#x2013;side effect relationships presented in <xref ref-type="table" rid="table2">Table 2</xref>, we confirmed that these associations were reported in research findings published after 2016 [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref37">37</xref>].</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Case studies of model predictions.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Drug <italic>&#x03B1;</italic></td><td align="left" valign="bottom">Side effect <italic>&#x03B2;</italic></td><td align="left" valign="bottom">Similarity <italic>x</italic></td><td align="left" valign="bottom">Similarity <italic>y</italic></td><td align="left" valign="bottom">Relation score</td><td align="left" valign="bottom">Model prediction</td></tr></thead><tbody><tr><td align="left" valign="top">Lenalidomide</td><td align="left" valign="top">Arthropathy</td><td align="left" valign="top">0.953</td><td align="left" valign="top">0.837</td><td align="left" valign="top">0.799</td><td align="left" valign="top">True</td></tr><tr><td align="left" valign="top">Rosuvastatin</td><td align="left" valign="top">Sleep disturbance</td><td align="left" valign="top">0.952</td><td align="left" valign="top">0.825</td><td align="left" valign="top">0.786</td><td align="left" valign="top">True</td></tr><tr><td align="left" valign="top">Gadolinium</td><td align="left" valign="top">Acute pulmonary edema</td><td align="left" valign="top">0.891</td><td align="left" valign="top">0.866</td><td align="left" valign="top">0.771</td><td align="left" valign="top">True</td></tr><tr><td align="left" valign="top">Cefazolin</td><td align="left" valign="top">Hepatic failure</td><td align="left" valign="top">0.861</td><td align="left" valign="top">0.857</td><td align="left" valign="top">0.738</td><td align="left" valign="top">True</td></tr></tbody></table></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>In this study, we propose a method to extract information about drug&#x2013;side effect relationships inherent in the pre&#x2013;trained parameters of language models and predict relation scores, indicating the possibility of unknown drug&#x2013;side effect relationships. This is accomplished using known drug&#x2013;side effect relationship data and embedding vectors from language models trained on biomedical corpora.</p><p>Our study confirmed that BERT-based models demonstrated superior performance in predicting drug&#x2013;side effect relationships. We evaluated the performance of BERT-based models using the relation score methodology proposed by Seungsoo et al [<xref ref-type="bibr" rid="ref12">12</xref>], and the clagator/biobert_v1.1 model [<xref ref-type="bibr" rid="ref31">31</xref>] achieved the highest performance with an AUC of 0.915 at an optimal threshold of 0.289. This suggests that BERT-based models perform better in predicting drug&#x2013;side effect relationships compared to the 0.85 AUC achieved by the Word2Vec model in a previous study. Therefore, our findings support the notion that context-aware BERT-based models outperform context-independent Word2Vec models in terms of embedding performance [<xref ref-type="bibr" rid="ref38">38</xref>].</p><p>In addition, our study demonstrates that BERT models pretrained on biomedical corpora outperform vanilla BERT models pretrained on general corpora. Vanilla BERT models, trained on general corpora, have limitations in fully reflecting the specificity of the biomedical field [<xref ref-type="bibr" rid="ref20">20</xref>]. In contrast, BERT-based models pretrained on large biomedical corpora, such as PubMed and PMC, more richly reflect drug mechanisms of action and biological relationships observed in clinical settings [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. The results of this study demonstrate that BERT models specialized for biomedical applications can provide more accurate drug&#x2013;side effect relationship predictions based on a deeper understanding of the domain. This aligns with previous studies that emphasize the importance of domain-specific models in BERT model applications [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. While this study evaluated the effectiveness of the proposed extraction method using BERT-based models, considering the rapid advancements in the field of NLP, models with different architectures or more recent models have the potential to understand the complexity of relationships more effectively and provide further performance improvements.</p><p>We performed external validation using FAERS data and found a high statistical significance (<italic>P</italic>&#x003C;.001) between 8,235,435 predicted drug&#x2013;side effect relationships and 901,361 actual data extracted from FAERS. In addition, to verify the real-world applicability of the model&#x2019;s predicted results, we conducted case studies on drug&#x2013;side effect relationships that were not confirmed in the SIDER database. We found that these drug&#x2013;side effect relationships have been reported in recent research findings published after 2016. This suggests that our methodology using the BERT-based model proposed in this study is applicable to the prediction of ADRs in practice. Considering this, we expect that our proposed methodology will allow for earlier detection of potential ADRs, increasing the likelihood of success in the drug development process and reducing the time and cost of ADR studies.</p><p>In the field of biomedical NLP, standardized terminology systems and synonym processing are important [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. In this study, we minimized the risk of data leakage by integrating drug synonyms using PubChem compound identifiers. We used MedDRA-based side effect names to ensure terminology standardization and reduce the likelihood of the model merely reidentifying variations of known relationships. However, we have not explicitly integrated the hierarchical information from MedDRA, and in future research, we plan to enhance our methodology by actively using MedDRA hierarchical information to integrate similar terms, further mitigating the reidentification issue.</p><p>In this study, we recognize that non&#x2013;dose-dependent adverse reactions present particular challenges for prediction models, and drug similarity may not necessarily be the main component in the development of such ADRs. While our dual-similarity approach partially addresses this by incorporating adverse event similarity patterns, future research would benefit from integrating NLP-based approaches with the chemical structure and mechanism of action of drugs. This multimodal approach would leverage literature-derived contextual relationships, molecular properties, and biological pathway insights to more accurately classify drug&#x2013;side effect relationships and would be particularly valuable for addressing current methodological limitations, including novel compound prediction and idiosyncratic reaction detection.</p></sec><sec id="s4-2"><title>Limitations</title><p>One of the primary limitations of this study is the lack of up-to-date data in the drug&#x2013;side effect database used. The SIDER database was last updated in 2015, meaning that despite using BERT-based models trained on the latest biomedical corpus, our prediction process may not fully reflect current drug&#x2013;side effect relationships. Consequently, incorporating more recent drug&#x2013;side effect data would likely improve the performance of our prediction model significantly.</p><p>Another significant limitation relates to the nature of case reports themselves. Such reports typically rely on a single clinically reported case, thereby making it difficult to establish clear causal relationships between drugs and side effects. In addition, their small sample sizes often limit their generalizability. These considerations become particularly relevant when extending our work to clinical applications, where patient care involves complex interactions of multiple factors, including diverse reporting patterns and polypharmacy. Although our model provides a systematic method for prioritizing potential associations for further investigation, all predictions should therefore be interpreted with appropriate caution and validated through additional pharmacovigilance methods before clinical application. For this reason, future research should utilize systematic clinical data or large-scale cohort studies to enhance the reliability of predictive models.</p><p>Furthermore, while our approach shows promise for identifying potential drug&#x2013;side effect relationships through vector space similarities, we recognize 2 additional important limitations. First, the current methodology provides generalized, population-level predictions and does not account for idiosyncratic reactions dependent on individual patient factors. As such, future work should explore integrating patient-specific data to enable more personalized adverse event predictions. Second, for entirely novel drug candidates absent from existing literature, the embedding vectors generated would be based primarily on semantic inference rather than established knowledge, potentially limiting prediction reliability. This underscores the importance of complementary approaches, particularly for new chemical entities.</p></sec><sec id="s4-3"><title>Conclusions</title><p>This study presents a novel approach for extracting drug&#x2013;side effect relationship information embedded within pretrained language model parameters and leveraging this information to predict unknown adverse reactions. Our methodology, using context-aware BERT-based language models, demonstrates that BERT models pretrained on biomedical corpora outperform vanilla BERT and Word2Vec. These results highlight how the contextual embedding capabilities of BERT architectures, coupled with domain-specific adaptation, enhance predictive performance in drug&#x2013;side effect relationship tasks. Furthermore, external validation using FAERS data and a literature review of selected cases confirmed the practical applicability of the proposed methodology.</p></sec></sec></body><back><ack><p>This research was supported by a grant of the Korea Health Technology R&#x0026;D Project through the Korea Health Industry Development Institute (KHIDI), funded by the Ministry of Health &#x0026; Welfare, Republic of Korea (grant RS-2022-KH125153).</p></ack><notes><sec><title>Data Availability</title><p>The datasets used in this study are available in the FDA Adverse Event Reporting System (FAERS) repository [<xref ref-type="bibr" rid="ref32">32</xref>] and the SIDER repository [<xref ref-type="bibr" rid="ref41">41</xref>]. The datasets generated or analyzed during this study are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>MP and WJ designed the study and drafted the manuscript. WJ was responsible for data collection, preprocessing, and validation. MP was responsible for the relation score calculation methodology and implementation of the BERT model. WN conducted the reproduction study using Word2Vec. DA and JS contributed to the manuscript revision. Suehyun L and Seunghee L contributed equally as co-corresponding authors and supervised the study. All authors have read and agreed to the published version of the manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ADR</term><def><p>adverse drug reaction</p></def></def-item><def-item><term id="abb2">AUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb3">BERT</term><def><p>Bidirectional Encoder Representations from Transformers</p></def></def-item><def-item><term id="abb4">FAERS</term><def><p>FDA Adverse Event Reporting System</p></def></def-item><def-item><term id="abb5">FDA</term><def><p>Food and Drug Administration</p></def></def-item><def-item><term id="abb6">MedDRA</term><def><p>Medical Dictionary for Regulatory Activities</p></def></def-item><def-item><term id="abb7">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb8">SIDER</term><def><p>Side Effect Resource</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Edwards</surname><given-names>IR</given-names> </name><name name-style="western"><surname>Aronson</surname><given-names>JK</given-names> </name></person-group><article-title>Adverse drug reactions: definitions, diagnosis, and management</article-title><source>The Lancet</source><year>2000</year><month>10</month><volume>356</volume><issue>9237</issue><fpage>1255</fpage><lpage>1259</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(00)02799-9</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pirmohamed</surname><given-names>M</given-names> </name><name name-style="western"><surname>James</surname><given-names>S</given-names> </name><name name-style="western"><surname>Meakin</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Adverse drug reactions as cause of admission to hospital: prospective analysis of 18 820 patients</article-title><source>BMJ</source><year>2004</year><month>07</month><day>3</day><volume>329</volume><issue>7456</issue><fpage>15</fpage><lpage>19</lpage><pub-id pub-id-type="doi">10.1136/bmj.329.7456.15</pub-id><pub-id pub-id-type="medline">15231615</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lazarou</surname><given-names>J</given-names> </name><name name-style="western"><surname>Pomeranz</surname><given-names>BH</given-names> </name><name name-style="western"><surname>Corey</surname><given-names>PN</given-names> </name></person-group><article-title>Incidence of adverse drug reactions in hospitalized patients: a meta-analysis of prospective studies</article-title><source>JAMA</source><year>1998</year><month>04</month><day>15</day><volume>279</volume><issue>15</issue><fpage>1200</fpage><lpage>1205</lpage><pub-id pub-id-type="doi">10.1001/jama.279.15.1200</pub-id><pub-id pub-id-type="medline">9555760</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>JY</given-names> </name></person-group><article-title>Predicting adverse side effects of drugs</article-title><source>BMC Genomics</source><year>2011</year><month>12</month><day>23</day><volume>12 Suppl 5</volume><issue>Suppl 5</issue><fpage>S11</fpage><pub-id pub-id-type="doi">10.1186/1471-2164-12-S5-S11</pub-id><pub-id pub-id-type="medline">22369493</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Toni</surname><given-names>E</given-names> </name><name name-style="western"><surname>Ayatollahi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Abbaszadeh</surname><given-names>R</given-names> </name><name name-style="western"><surname>Fotuhi Siahpirani</surname><given-names>A</given-names> </name></person-group><article-title>Machine learning techniques for predicting drug-related side effects: a scoping review</article-title><source>Pharmaceuticals (Basel)</source><year>2024</year><month>06</month><day>17</day><volume>17</volume><issue>6</issue><fpage>38931462</fpage><pub-id pub-id-type="doi">10.3390/ph17060795</pub-id><pub-id pub-id-type="medline">38931462</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kar</surname><given-names>S</given-names> </name></person-group><article-title>Application of artificial intelligence and machine learning in early detection of adverse drug reactions (ADRs) and drug-induced toxicity</article-title><source>Artificial Intelligence Chemistry</source><year>2023</year><month>12</month><volume>1</volume><issue>2</issue><fpage>100011</fpage><pub-id pub-id-type="doi">10.1016/j.aichem.2023.100011</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Seo</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Yoon</surname><given-names>Y</given-names> </name></person-group><article-title>Prediction of side effects using comprehensive similarity measures</article-title><source>Biomed Res Int</source><year>2020</year><volume>2020</volume><issue>1</issue><fpage>1357630</fpage><pub-id pub-id-type="doi">10.1155/2020/1357630</pub-id><pub-id pub-id-type="medline">32190647</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Leaman</surname><given-names>R</given-names> </name><name name-style="western"><surname>Wojtulewicz</surname><given-names>L</given-names> </name><name name-style="western"><surname>Sullivan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Skariah</surname><given-names>A</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>J</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Gonzalez</surname><given-names>G</given-names> </name></person-group><article-title>Towards internet-age pharmacovigilance: extracting adverse drug reactions from user posts in health-related social networks</article-title><conf-name>Proceedings of the 2010 workshop on biomedical natural language processing</conf-name><conf-date>Jul 15, 2010</conf-date><conf-loc>Uppsala, Sweden</conf-loc><publisher-name>Association for Computational Linguistics</publisher-name><fpage>117</fpage><lpage>125</lpage></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gurulingappa</surname><given-names>H</given-names> </name><name name-style="western"><surname>Mateen-Rajput</surname><given-names>A</given-names> </name><name name-style="western"><surname>Toldo</surname><given-names>L</given-names> </name></person-group><article-title>Extraction of potential adverse drug events from medical case reports</article-title><source>J Biomed Semantics</source><year>2012</year><month>12</month><day>20</day><volume>3</volume><issue>1</issue><fpage>15</fpage><pub-id pub-id-type="doi">10.1186/2041-1480-3-15</pub-id><pub-id pub-id-type="medline">23256479</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nikfarjam</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sarker</surname><given-names>A</given-names> </name><name name-style="western"><surname>O&#x2019;Connor</surname><given-names>K</given-names> </name><name name-style="western"><surname>Ginn</surname><given-names>R</given-names> </name><name name-style="western"><surname>Gonzalez</surname><given-names>G</given-names> </name></person-group><article-title>Pharmacovigilance from social media: mining adverse drug reaction mentions using sequence labeling with word embedding cluster features</article-title><source>J Am Med Inform Assoc</source><year>2015</year><month>05</month><volume>22</volume><issue>3</issue><fpage>671</fpage><lpage>681</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocu041</pub-id><pub-id pub-id-type="medline">25755127</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Mikolov</surname><given-names>T</given-names> </name><name name-style="western"><surname>Sutskever</surname><given-names>I</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>K</given-names> </name><name name-style="western"><surname>Corrado</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Dean</surname><given-names>J</given-names> </name></person-group><article-title>Distributed representations of words and phrases and their compositionality</article-title><source>Adv Neural Inf Process Syst</source><comment>Preprint posted online on 2013</comment><pub-id pub-id-type="doi">10.48550/arXiv.1310.4546</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Seungsoo</surname><given-names>L</given-names> </name><name name-style="western"><surname>Hayon</surname><given-names>L</given-names> </name><name name-style="western"><surname>Youngmi</surname><given-names>Y</given-names> </name></person-group><article-title>Prediction of new drug-side effect relation using Word2Vec model-based word similarity</article-title><source>JKIIT</source><year>2020</year><volume>18</volume><issue>11</issue><fpage>25</fpage><lpage>33</lpage><pub-id pub-id-type="doi">10.14801/jkiit.2020.18.11.25</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>F</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>B</given-names> </name><name name-style="western"><surname>Diao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>W</given-names> </name><name name-style="western"><surname>Shu</surname><given-names>T</given-names> </name></person-group><article-title>Prediction of adverse drug reactions based on knowledge graph embedding</article-title><source>BMC Med Inform Decis Mak</source><year>2021</year><month>02</month><day>4</day><volume>21</volume><issue>1</issue><fpage>38</fpage><pub-id pub-id-type="doi">10.1186/s12911-021-01402-3</pub-id><pub-id pub-id-type="medline">33541342</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Cohen</surname><given-names>KB</given-names> </name><name name-style="western"><surname>Demner-Fushman</surname><given-names>D</given-names> </name></person-group><source>Biomedical Natural Language Processing</source><year>2014</year><publisher-name>John Benjamins Publishing Company</publisher-name><pub-id pub-id-type="other">9027271062</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>H</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>Z</given-names> </name></person-group><article-title>BioWordVec, improving biomedical word embeddings with subword information and MeSH</article-title><source>Sci Data</source><year>2019</year><volume>6</volume><issue>1</issue><fpage>31076572</fpage><pub-id pub-id-type="doi">10.1038/s41597-019-0055-0</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hassan</surname><given-names>NA</given-names> </name><name name-style="western"><surname>Seoud</surname><given-names>RAA</given-names> </name><name name-style="western"><surname>Salem</surname><given-names>DA</given-names> </name></person-group><article-title>Bridging the gap: a hybrid approach to medical relation extraction using pretrained language models and traditional machine learning</article-title><source>JAIT</source><year>2024</year><volume>15</volume><issue>6</issue><fpage>723</fpage><lpage>734</lpage><pub-id pub-id-type="doi">10.12720/jait.15.6.723-734</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names> </name></person-group><article-title>Bert: pre-training of deep bidirectional transformers for language understanding</article-title><source>arXiv</source><comment>Preprint posted online on 2018</comment><pub-id pub-id-type="doi">10.48550/arXiv.1810.04805</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Vaswani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Shazeer</surname><given-names>N</given-names> </name><name name-style="western"><surname>Parmar</surname><given-names>N</given-names> </name><name name-style="western"><surname>Uszkoreit</surname><given-names>J</given-names> </name><name name-style="western"><surname>Jones</surname><given-names>L</given-names> </name><name name-style="western"><surname>Gomez</surname><given-names>AN</given-names> </name><etal/></person-group><article-title>Attention is all you need</article-title><source>Adv Neural Inf Process Syst</source><comment>Preprint posted online on 2017</comment><pub-id pub-id-type="doi">10.48550/arXiv.1706.03762</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>J</given-names> </name><name name-style="western"><surname>Yoon</surname><given-names>W</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>S</given-names> </name><etal/></person-group><article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title><source>Bioinformatics</source><year>2020</year><month>02</month><day>15</day><volume>36</volume><issue>4</issue><fpage>1234</fpage><lpage>1240</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id><pub-id pub-id-type="medline">31501885</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Tinn</surname><given-names>R</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Domain-specific language model pretraining for biomedical natural language processing</article-title><source>ACM Trans Comput Healthcare</source><year>2022</year><month>01</month><day>31</day><volume>3</volume><issue>1</issue><fpage>1</fpage><lpage>23</lpage><pub-id pub-id-type="doi">10.1145/3458754</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Peng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>Z</given-names> </name></person-group><article-title>Transfer learning in biomedical natural language processing: an evaluation of BERT and elmo on ten benchmarking datasets</article-title><conf-name>Proceedings of the 18th BioNLP Workshop and Shared Task</conf-name><conf-date>Aug 1, 2019</conf-date><conf-loc>Florence, Italy</conf-loc><fpage>190605474</fpage><pub-id pub-id-type="doi">10.18653/v1/W19-5006</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elbiach</surname><given-names>O</given-names> </name><name name-style="western"><surname>Grissette</surname><given-names>H</given-names> </name><name name-style="western"><surname>Nfaoui</surname><given-names>EH</given-names> </name></person-group><article-title>Leveraging transformer models for enhanced pharmacovigilance: a comparative analysis of ADR extraction from biomedical and social media texts</article-title><source>AI</source><year>2025</year><volume>6</volume><issue>2</issue><fpage>31</fpage><pub-id pub-id-type="doi">10.3390/ai6020031</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>ValizadehAslani</surname><given-names>T</given-names> </name><name name-style="western"><surname>Shi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ren</surname><given-names>P</given-names> </name><etal/></person-group><article-title>PharmBERT: a domain-specific BERT model for drug labels</article-title><source>Brief Bioinform</source><year>2023</year><month>07</month><day>20</day><volume>24</volume><issue>4</issue><fpage>bbad226</fpage><pub-id pub-id-type="doi">10.1093/bib/bbad226</pub-id><pub-id pub-id-type="medline">37317617</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nishioka</surname><given-names>S</given-names> </name><name name-style="western"><surname>Watanabe</surname><given-names>T</given-names> </name><name name-style="western"><surname>Asano</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Identification of hand-foot syndrome from cancer patients&#x2019; blog posts: BERT-based deep-learning approach to detect potential adverse drug reaction symptoms</article-title><source>PLoS ONE</source><year>2022</year><volume>17</volume><issue>5</issue><fpage>e0267901</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0267901</pub-id><pub-id pub-id-type="medline">35507636</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bergman</surname><given-names>E</given-names> </name><name name-style="western"><surname>D&#x00FC;rlich</surname><given-names>L</given-names> </name><name name-style="western"><surname>Arthurson</surname><given-names>V</given-names> </name><etal/></person-group><article-title>BERT based natural language processing for triage of adverse drug reaction reports shows close to human-level performance</article-title><source>PLOS Digit Health</source><year>2023</year><month>12</month><volume>2</volume><issue>12</issue><fpage>e0000409</fpage><pub-id pub-id-type="doi">10.1371/journal.pdig.0000409</pub-id><pub-id pub-id-type="medline">38055685</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hussain</surname><given-names>S</given-names> </name><name name-style="western"><surname>Afzal</surname><given-names>H</given-names> </name><name name-style="western"><surname>Saeed</surname><given-names>R</given-names> </name><name name-style="western"><surname>Iltaf</surname><given-names>N</given-names> </name><name name-style="western"><surname>Umair</surname><given-names>MY</given-names> </name></person-group><article-title>Pharmacovigilance with transformers: a framework to detect adverse drug reactions using BERT fine-tuned with FARM</article-title><source>Comput Math Methods Med</source><year>2021</year><volume>2021</volume><issue>1</issue><fpage>5589829</fpage><pub-id pub-id-type="doi">10.1155/2021/5589829</pub-id><pub-id pub-id-type="medline">34422092</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Mahendran</surname><given-names>D</given-names> </name><name name-style="western"><surname>McInnes</surname><given-names>BT</given-names> </name></person-group><article-title>Extracting Adverse Drug Events from Clinical Notes</article-title><source>AMIA Jt Summits Transl Sci Proc</source><year>2021</year><volume>2021</volume><fpage>420</fpage><lpage>429</lpage><pub-id pub-id-type="doi">10.48550/arXiv.2104.10791</pub-id><pub-id pub-id-type="medline">34457157</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kuhn</surname><given-names>M</given-names> </name><name name-style="western"><surname>Letunic</surname><given-names>I</given-names> </name><name name-style="western"><surname>Jensen</surname><given-names>LJ</given-names> </name><name name-style="western"><surname>Bork</surname><given-names>P</given-names> </name></person-group><article-title>The SIDER database of drugs and side effects</article-title><source>Nucleic Acids Res</source><year>2016</year><month>01</month><day>4</day><volume>44</volume><issue>D1</issue><fpage>D1075</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1093/nar/gkv1075</pub-id><pub-id pub-id-type="medline">26481350</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brown</surname><given-names>EG</given-names> </name><name name-style="western"><surname>Wood</surname><given-names>L</given-names> </name><name name-style="western"><surname>Wood</surname><given-names>S</given-names> </name></person-group><article-title>The medical dictionary for regulatory activities (MedDRA)</article-title><source>Drug Saf</source><year>1999</year><month>02</month><volume>20</volume><issue>2</issue><fpage>109</fpage><lpage>117</lpage><pub-id pub-id-type="doi">10.2165/00002018-199920020-00002</pub-id><pub-id pub-id-type="medline">10082069</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Canese</surname><given-names>K</given-names> </name><name name-style="western"><surname>Weis</surname><given-names>S</given-names> </name></person-group><article-title>PubMed: The Bibliographic Database</article-title><source>The NCBI Handbook</source><year>2013</year><volume>2</volume><publisher-name>National Center for Biotechnology Information</publisher-name><fpage>13</fpage><lpage>24</lpage></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><article-title>Biobert_v1.1_pubmed_nli_sts</article-title><source>Hugging Face Hub</source><access-date>2023-08-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://huggingface.co/clagator/biobert_v1.1_pubmed_nli_sts">https://huggingface.co/clagator/biobert_v1.1_pubmed_nli_sts</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="web"><article-title>Questions and answers on FDA&#x2019;s adverse event reporting system (FAERS)</article-title><source>US Food and Drug Administration</source><access-date>2023-08-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.fda.gov/drugs/surveillance/questions-and-answers-fdas-adverse-event-reporting-system-faers">https://www.fda.gov/drugs/surveillance/questions-and-answers-fdas-adverse-event-reporting-system-faers</ext-link></comment></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shariff</surname><given-names>SZ</given-names> </name><name name-style="western"><surname>Bejaimal</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Sontrop</surname><given-names>JM</given-names> </name><etal/></person-group><article-title>Retrieving clinical evidence: a comparison of PubMed and Google Scholar for quick clinical searches</article-title><source>J Med Internet Res</source><year>2013</year><month>08</month><day>15</day><volume>15</volume><issue>8</issue><fpage>e164</fpage><pub-id pub-id-type="doi">10.2196/jmir.2624</pub-id><pub-id pub-id-type="medline">23948488</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Icard</surname><given-names>C</given-names> </name><name name-style="western"><surname>Mocquot</surname><given-names>P</given-names> </name><name name-style="western"><surname>Nogaro</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Despas</surname><given-names>F</given-names> </name><name name-style="western"><surname>Gauthier</surname><given-names>M</given-names> </name></person-group><article-title>Lenalidomide-induced arthritis: a case report and review of literature and pharmacovigilance databases</article-title><source>J Oncol Pharm Pract</source><year>2022</year><month>03</month><volume>28</volume><issue>2</issue><fpage>453</fpage><lpage>456</lpage><pub-id pub-id-type="doi">10.1177/10781552211038001</pub-id><pub-id pub-id-type="medline">34590522</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Trambowicz</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gorzelak-Pabi&#x015B;</surname><given-names>P</given-names> </name><name name-style="western"><surname>Broncel</surname><given-names>M</given-names> </name></person-group><article-title>Statins and sleep &#x2013; clinical effects</article-title><source>Atherosclerosis</source><year>2019</year><month>08</month><volume>287</volume><fpage>e202</fpage><pub-id pub-id-type="doi">10.1016/j.atherosclerosis.2019.06.613</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lucas</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mohan</surname><given-names>G</given-names> </name><name name-style="western"><surname>Winkler</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gardner</surname><given-names>K</given-names> </name><name name-style="western"><surname>Whalen</surname><given-names>M</given-names> </name></person-group><article-title>Acute lung injury following gadolinium contrast: a case report</article-title><source>J Emerg Crit Care Med</source><year>2021</year><volume>5</volume><fpage>18</fpage><lpage>18</lpage><pub-id pub-id-type="doi">10.21037/jeccm-20-117</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gekhman</surname><given-names>D</given-names> </name><name name-style="western"><surname>Correa</surname><given-names>E</given-names> </name></person-group><article-title>One dose of cefazolin &#x2014; months of misery: a case of acute liver failure with grave consequences</article-title><source>American Journal of Gastroenterology</source><year>2018</year><month>10</month><volume>113</volume><issue>Supplement</issue><fpage>S1664</fpage><lpage>S1665</lpage><pub-id pub-id-type="doi">10.14309/00000434-201810001-03038</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Si</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Roberts</surname><given-names>K</given-names> </name></person-group><article-title>Enhancing clinical concept extraction with contextual embeddings</article-title><source>J Am Med Inform Assoc</source><year>2019</year><month>11</month><day>1</day><volume>26</volume><issue>11</issue><fpage>1297</fpage><lpage>1304</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocz096</pub-id><pub-id pub-id-type="medline">31265066</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khadhraoui</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bellaaj</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ammar</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Hamam</surname><given-names>H</given-names> </name><name name-style="western"><surname>Jmaiel</surname><given-names>M</given-names> </name></person-group><article-title>Survey of BERT-base models for scientific text classification: COVID-19 case study</article-title><source>Appl Sci (Basel)</source><year>2022</year><volume>12</volume><issue>6</issue><fpage>2891</fpage><pub-id pub-id-type="doi">10.3390/app12062891</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arbabi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Adams</surname><given-names>DR</given-names> </name><name name-style="western"><surname>Fidler</surname><given-names>S</given-names> </name><name name-style="western"><surname>Brudno</surname><given-names>M</given-names> </name></person-group><article-title>Identifying clinical terms in medical text using ontology-guided machine learning</article-title><source>JMIR Med Inform</source><year>2019</year><month>05</month><day>10</day><volume>7</volume><issue>2</issue><fpage>e12596</fpage><pub-id pub-id-type="doi">10.2196/12596</pub-id><pub-id pub-id-type="medline">31094361</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="web"><article-title>Side effect resource</article-title><source>SIDER 4.1</source><access-date>2023-08-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="http://sideeffects.embl.de">http://sideeffects.embl.de</ext-link></comment></nlm-citation></ref></ref-list></back></article>