<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v12i1e60665</article-id><article-id pub-id-type="doi">10.2196/60665</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>An Automatic and End-to-End System for Rare Disease Knowledge Graph Construction Based on Ontology-Enhanced Large Language Models: Development Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Cao</surname><given-names>Lang</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sun</surname><given-names>Jimeng</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Cross</surname><given-names>Adam</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Computer Science, University of Illinois Urbana-Champaign</institution><addr-line>Urbana</addr-line><addr-line>IL</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Pediatrics, University of Illinois College of Medicine Peoria</institution><addr-line>Peoria</addr-line><addr-line>IL</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Castonguay</surname><given-names>Alexandre</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Shyr</surname><given-names>Cathy</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Torii</surname><given-names>Manabu</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Sinha</surname><given-names>Urjoshi</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Li</surname><given-names>Yanzeng</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Adam Cross, MD, Department of Pediatrics, University of Illinois College of Medicine Peoria, 1 Illini Drive, Peoria, IL, 61605, United States, 1 309-671-3000; <email>arcross@uic.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2024</year></pub-date><pub-date pub-type="epub"><day>18</day><month>12</month><year>2024</year></pub-date><volume>12</volume><elocation-id>e60665</elocation-id><history><date date-type="received"><day>22</day><month>05</month><year>2024</year></date><date date-type="rev-recd"><day>18</day><month>09</month><year>2024</year></date><date date-type="accepted"><day>18</day><month>09</month><year>2024</year></date></history><copyright-statement>&#x00A9; Lang Cao, Jimeng Sun, Adam Cross. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 18.12.2024. </copyright-statement><copyright-year>2024</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2024/1/e60665"/><abstract><sec><title>Background</title><p>Rare diseases affect millions worldwide but sometimes face limited research focus individually due to low prevalence. Many rare diseases do not have specific <italic>International Classification of Diseases, Ninth Edition</italic> (<italic>ICD-9</italic>) and <italic>Tenth Edition</italic> (<italic>ICD-10</italic>), codes and therefore cannot be reliably extracted from granular fields like &#x201C;Diagnosis&#x201D; and &#x201C;Problem List&#x201D; entries, which complicates tasks that require identification of patients with these conditions, including clinical trial recruitment and research efforts. Recent advancements in large language models (LLMs) have shown promise in automating the extraction of medical information, offering the potential to improve medical research, diagnosis, and management. However, most LLMs lack professional medical knowledge, especially concerning specific rare diseases, and cannot effectively manage rare disease data in its various ontological forms, making it unsuitable for these tasks.</p></sec><sec><title>Objective</title><p>Our aim is to create an end-to-end system called automated rare disease mining (AutoRD), which automates the extraction of rare disease&#x2013;related information from medical text, focusing on entities and their relations to other medical concepts, such as signs and symptoms. AutoRD integrates up-to-date ontologies with other structured knowledge and demonstrates superior performance in rare disease extraction tasks. We conducted various experiments to evaluate AutoRD&#x2019;s performance, aiming to surpass common LLMs and traditional methods.</p></sec><sec sec-type="methods"><title>Methods</title><p>AutoRD is a pipeline system that involves data preprocessing, entity extraction, relation extraction, entity calibration, and knowledge graph construction. We implemented this system using GPT-4 and medical knowledge graphs developed from the open-source Human Phenotype and Orphanet ontologies, using techniques such as chain-of-thought reasoning and prompt engineering. We quantitatively evaluated our system&#x2019;s performance in entity extraction, relation extraction, and knowledge graph construction. The experiment used the well-curated dataset RareDis2023, which contains medical literature focused on rare disease entities and their relations, making it an ideal dataset for training and testing our methodology.</p></sec><sec sec-type="results"><title>Results</title><p>On the RareDis2023 dataset, AutoRD achieved an overall entity extraction <italic>F</italic><sub>1</sub>-score of 56.1% and a relation extraction <italic>F</italic><sub>1</sub>-score of 38.6%, marking a 14.4% improvement over the baseline LLM. Notably, the <italic>F</italic><sub>1</sub>-score for rare disease entity extraction reached 83.5%, indicating high precision and recall in identifying rare disease mentions. These results demonstrate the effectiveness of integrating LLMs with medical ontologies in extracting complex rare disease information.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>AutoRD is an automated end-to-end system for extracting rare disease information from text to build knowledge graphs, addressing critical limitations of existing LLMs by improving identification of these diseases and connecting them to related clinical features. This work underscores the significant potential of LLMs in transforming health care, particularly in the rare disease domain. By leveraging ontology-enhanced LLMs, AutoRD constructs a robust medical knowledge base that incorporates up-to-date rare disease information, facilitating improved identification of patients and resulting in more inclusive research and trial candidacy efforts.</p></sec></abstract><kwd-group><kwd>rare disease</kwd><kwd>clinical informatics</kwd><kwd>LLM</kwd><kwd>natural language processing</kwd><kwd>machine learning</kwd><kwd>artificial intelligence</kwd><kwd>large language models</kwd><kwd>data extraction</kwd><kwd>ontologies</kwd><kwd>knowledge graphs</kwd><kwd>text mining</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Objectives</title><p>Rare diseases, also known as orphan diseases, are relatively uncommon in isolation and sometimes receive less individual attention in medical research due to their low prevalence [<xref ref-type="bibr" rid="ref1">1</xref>]. The likelihood of an individual being affected by a rare disease is relatively low. However, when considering the global population, many individuals are impacted. In the United States, rare diseases affect approximately 30 million people [<xref ref-type="bibr" rid="ref2">2</xref>]; globally, the number rises to between 300 and 400 million [<xref ref-type="bibr" rid="ref3">3</xref>]. Furthermore, the rare disease patient population, distributed across 5000 to 10,000 distinct diseases [<xref ref-type="bibr" rid="ref4">4</xref>], suffers from a significant lack of medical knowledge due to the rarity of a given illness. Consequently, patients often face prolonged and costly diagnostic processes and intensive treatments, with many of these diseases lacking approved therapies [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. This situation underscores the substantial burden placed on both patients and health care systems [<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>Online resources, including open-source databases, offer valuable references for medical professionals, contributing to the development of a comprehensive rare disease knowledge system. Examples of such databases include the Unified Medical Language System [<xref ref-type="bibr" rid="ref8">8</xref>], the Human Phenotype Ontology [<xref ref-type="bibr" rid="ref9">9</xref>] and the Orphanet [<xref ref-type="bibr" rid="ref10">10</xref>]. Specifically, Orphanet&#x2019;s database provides detailed information linking rare diseases, genes, and phenotypes, which greatly aids in the identification and diagnosis of rare diseases, among other related processes. However, these databases require considerable human effort for curation and maintenance. Therefore, there is an urgent need to develop methods that can support the process of establishing and enhancing rare disease medical knowledge systems.</p><p>Natural language processing (NLP) techniques are instrumental in automatically processing unstructured text to extract structured and clinically relevant information. This technique is especially beneficial for information extraction and knowledge discovery in the medical field. Recently, large language models (LLMs) have demonstrated exceptional proficiency in language understanding and generation, garnering significant attention in the NLP domain [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. Their ease of use allows humans to complete a wide range of complex tasks in everyday life. Moreover, the extensive knowledge stored within their parameters equips them to excel in domain-specific applications, such as medicine and health care [<xref ref-type="bibr" rid="ref13">13</xref>].</p><p>Current research is beginning to evaluate the capabilities of the most powerful LLMs, such as ChatGPT and GPT-4, across various medical applications. These applications include licensing examinations [<xref ref-type="bibr" rid="ref14">14</xref>], question answering [<xref ref-type="bibr" rid="ref15">15</xref>], and medical education [<xref ref-type="bibr" rid="ref16">16</xref>]. Notably, several studies have demonstrated that LLMs are effective few-shot medical named entity recognition extractors, exhibiting superior few-shot learning capabilities compared with other NLP methods [<xref ref-type="bibr" rid="ref17">17</xref>]. In the context of rare diseases, where resources are often limited, LLMs emerge as valuable tools for extracting information about these conditions, showcasing their use in enhancing medical knowledge systems.</p><p>In this paper, we introduce automated rare disease mining (AutoRD) as an efficient tool for extracting information about rare diseases and constructing corresponding knowledge graphs (KGs). The system processes unstructured medical text as input and outputs extraction results and a KG. It is comprised of several key stages: data preprocessing, entity extraction, relation extraction, entity alignment, and KG construction. Among these, entity and relation extraction are the most critical parts. AutoRD is an LLM-based system built upon GPT-4 [<xref ref-type="bibr" rid="ref12">12</xref>]. We use prompts as instructions to guide the LLMs through the entity and relation extraction processes. The model leverages its strong zero-shot capabilities to identify and extract entities and to analyze relationships between them. Although LLMs are pretrained with extensive knowledge, they sometimes lack precise medical information. To address this, we enhance the LLM&#x2019;s medical knowledge using rare disease and phenotype ontologies. This is achieved by designing sophisticated prompts that incorporate relevant knowledge, including few-shot learning, structured output formats, and detailed guidance for LLMs. We conducted experiments to evaluate the system and identified the advantages and limitations of AutoRD. In summary, our contributions can be summarized as follows:</p><list list-type="order"><list-item><p>We propose AutoRD, an automated end-to-end system that efficiently extracts rare disease information from text and builds KGs. This is a useful and practical system that can help medical professionals discover information about rare diseases.</p></list-item><list-item><p>We use ontology-enhanced LLMs in the module of rare disease entity extraction and relation extraction. This approach harnesses the few-shot learning capabilities of LLMs and integrates medical knowledge from ontologies, resulting in an improved performance beyond what LLMs achieve alone.</p></list-item><list-item><p>We conduct experiments and provide extensive analysis to demonstrate the effectiveness of AutoRD on the carefully processed RareDis2023 dataset.</p></list-item></list></sec><sec id="s1-2"><title>Background and Significance</title><p>Several studies have used machine learning methods to support and enhance the medical management and process of rare diseases. Sanjak et al [<xref ref-type="bibr" rid="ref18">18</xref>] introduced an innovative method for clustering over 3000 rare diseases using node embeddings within a KG. This approach facilitates a deeper understanding of the relationships between different diseases and opens possibilities for drug repurposing. Alsentzer et al [<xref ref-type="bibr" rid="ref19">19</xref>] developed Shepherd, a deep learning model designed for diagnosing rare diseases. This model effectively leverages clinical and genetic patient data along with existing medical knowledge to uncover new disease-gene associations. This work exemplifies the potential of artificial intelligence in medical diagnostics, even in scenarios with limited labeled data. Rashid et al [<xref ref-type="bibr" rid="ref20">20</xref>] explored a unique approach in rare disease research through the National Mesothelioma Virtual Bank. They used REDCap (Research Electronic Database Capture; Vanderbilt University) and a web portal query tool to integrate and manage clinical data from multiple institutions. This method demonstrates the power of combining data management tools and web technologies to enhance research and collaboration in the field of rare diseases.</p><p>The advent of LLMs has led to their increasing application in the medical field. Datta et al [<xref ref-type="bibr" rid="ref21">21</xref>] developed AutoCriteria, an LLM-based information extraction system that has shown high accuracy and generalizability in extracting detailed eligibility criteria from clinical trial documents for various diseases. This represents a scalable solution for clinical trial applications. In the context of rare diseases, there have been specific research efforts using LLMs. Shyr et al [<xref ref-type="bibr" rid="ref22">22</xref>] explored the performance of ChatGPT in extracting rare disease phenotypes from unstructured text, using zero- and few-shot learning techniques. This study demonstrated potential in certain scenarios, particularly with tailored prompts and minimal data. Oniani et al [<xref ref-type="bibr" rid="ref23">23</xref>] proposed Models-Vote Prompting, an approach that improves LLM performance in few-shot learning scenarios by aggregating outputs from multiple LLMs through majority voting.</p><p>However, these studies on LLM applications for rare diseases are still preliminary. Both focused on evaluating the basic capabilities of LLMs in identifying rare diseases or used simple prompt ensembles to slightly enhance LLM performance. They did not explore the task of extracting relationships between rare diseases and related phenotypes. In addition, these studies primarily explore basic applications of LLMs and do not extend to a comprehensive LLM-based system. Building on these foundational works, our research continues to delve into the use of LLMs for rare disease applications. Unlike prior efforts, we propose an integrated and useful system aimed at extracting rare disease information from unstructured text. The elaborate methods incorporated into this system significantly enhance extraction accuracy compared with the use of pure LLMs, marking a substantial advancement in this field.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Ethical Considerations</title><p>This project was approved under Exempt Review by the Peoria Institutional Review Board as Protocol Number 1994008. The RareDis-v1 dataset used in this publication is open-access and deidentified. As such, the original Institutional Review Board approval covers secondary analysis without additional consent; therefore, participants were not constented nor compensated for this study. No identifiable features are included in this publication.</p></sec><sec id="s2-2"><title>Data</title><p>To improve the medical understanding of LLMs, we incorporated 3 medical ontologies into AutoRD: Orphanet Rare Disease Ontology (ORDO) [<xref ref-type="bibr" rid="ref10">10</xref>], Human Phenotype Ontology-Orphanet Rare Disease Ontology Ontology Module (HOOM) [<xref ref-type="bibr" rid="ref10">10</xref>], and Mondo Disease Ontology (Mondo) [<xref ref-type="bibr" rid="ref24">24</xref>] .</p><p>For assessing the entity and relation extraction capabilities of AutoRD, the RareDis-v1 dataset [<xref ref-type="bibr" rid="ref25">25</xref>] was used. Before use, this dataset underwent several reprocessing steps including manual review and revision of annotation errors followed by reshuffling. We have named this new dataset RareDis2023.</p></sec><sec id="s2-3"><title>Automated Rare Disease Mining Framework</title><p>AutoRD is an innovative system designed to automatically extract rare disease information from medical texts and create a KG. The AutoRD framework is illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref> and consists of a pipeline structure that includes data preprocessing, entity extraction, relation extraction, entity calibration, and KG construction. The extraction steps, which include entity and relation extraction, are the core components of the system. In these steps, we use LLMs, along with medical ontologies, to effectively extract information from the texts.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>The automated rare disease mining framework processes medical texts as input data and outputs entities related to rare diseases and rare disease triples, which are the results of the extraction process. Subsequently, it constructs a knowledge graph based on these extraction results. During the entity and relation extraction steps, ontology-enhanced large language models are used to enhance performance.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e60665_fig01.png"/></fig></sec><sec id="s2-4"><title>Task Definition</title><p>Given a medical text (T), AutoRD is designed to first extract entities (E = {E1, E2, ..., En}) and relations (R = {R1, R2, &#x2026;, R}), and then output a KG based on E and R.</p><p>The medical text (T) can include clinical notes, research articles, or any text containing potential rare disease information. The final output, a KG, represents a set of rare diseases and related entities (such as diseases, symptoms, etc) along with their relationships within the text.</p><p>The entity types listed in <xref ref-type="table" rid="table1">Table 1</xref> include &#x201C;rare_disease,&#x201D; &#x201C;disease,&#x201D; &#x201C;symptom_and_sign,&#x201D; and &#x201C;anaphor.&#x201D; We group &#x201C;symptom&#x201D; and &#x201C;sign&#x201D; together because they both represent phenotypic abnormalities that may suggest a disease or medical condition. Distinguishing between them is not crucial in the context of rare disease research.</p><p>Relation types are displayed in (<xref ref-type="table" rid="table2">Table 2</xref>), which include &#x201C;produces,&#x201D; &#x201C;increases_risk_of,&#x201D; &#x201C;is_a,&#x201D; &#x201C;is_acron,&#x201D; &#x201C;is_synon,&#x201D; and &#x201C;anaphora.&#x201D; Each relation type represents a specific kind of relationship between a subject and an object, both of which can be any medical term entity.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Entity types in the entity extraction task and their definitions and examples, based on the original RareDis dataset definitions [<xref ref-type="bibr" rid="ref25">25</xref>].</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Entity type</td><td align="left" valign="bottom">Definition</td><td align="left" valign="bottom">Example</td></tr></thead><tbody><tr><td align="left" valign="top">rare_disease</td><td align="left" valign="top">Diseases that affect a small number of people compared with the general population. A disease is often considered to be rare when it affects less than 1 in 2000 individuals [<xref ref-type="bibr" rid="ref26">26</xref>].</td><td align="left" valign="top">Acquired aplastic anemia, Fryns syndrome, giant cell myocarditis</td></tr><tr><td align="left" valign="top">disease</td><td align="left" valign="top">An abnormal condition of a part, organ, or system of an organism resulting from various causes such as infection, inflammation, environmental factors, or genetic defect, and characterized by a patterned group of signs <named-content content-type="background:#ffeb3b">or</named-content> symptoms.</td><td align="left" valign="top">Cancer, Alzheimer, cardiovascular disease</td></tr><tr><td align="left" valign="top">symptom_and_sign</td><td align="left" valign="top">Signs and symptoms are abnormalities that may suggest a disease. A symptom is a physical or mental problem that a person experiences that may indicate a disease or condition; it is a subjective finding reported by the patient. In contrast, a sign is an observable or otherwise discoverable feature that is considered abnormal.</td><td align="left" valign="top">Fatigue, dyspnea, pain inflammation, rash, abnormal heart rate, hypothermia</td></tr><tr><td align="left" valign="top">anaphor</td><td align="left" valign="top">Pronouns, words, or nominal phrases that refer to a rare disease (which is the antecedent of the anaphor)</td><td align="left" valign="top">This disease, these diseases</td></tr></tbody></table></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Relation types in the entity extraction task and their definitions, based on those in the original RareDis dataset [<xref ref-type="bibr" rid="ref25">25</xref>].</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Relation type</td><td align="left" valign="bottom">Definition</td></tr></thead><tbody><tr><td align="left" valign="top">produces</td><td align="left" valign="top">Relation between any disease and a sign or a symptom produced by that disease.</td></tr><tr><td align="left" valign="top">increases_risk_of</td><td align="left" valign="top">Relation between a disease and a disorder, in which the presence of the disease increases the likelihood of the presence of the disorder.</td></tr><tr><td align="left" valign="top">is_a</td><td align="left" valign="top">Relation between a given disease and its classification as a more general disease.</td></tr><tr><td align="left" valign="top">is_acron</td><td align="left" valign="top">Relation between an acronym and its full or expanded form.</td></tr><tr><td align="left" valign="top">is_synon</td><td align="left" valign="top">Relation between two different names designating the same disease.</td></tr><tr><td align="left" valign="top">anaphora</td><td align="left" valign="top">Relation between an antecedent and an anaphor entity. The antecedent must be a rare disease.</td></tr></tbody></table></table-wrap></sec><sec id="s2-5"><title>Data Preprocessing</title><p>Before the system performs entity and relation extraction, we first preprocess the data due to the token limit of LLMs. In our system, we use GPT-4, which has a token limit of 8000. Our maximum length for prompts is approximately 1000 tokens, including the length of both input and output in the prompt slot. We divide lengthy input documents into segments containing fewer than 2000 tokens to adhere to the token limit. To minimize entity relations across segments, we recognize that relations typically occur within a single natural paragraph. Therefore, we segment documents at natural paragraph boundaries, ensuring each segment contains fewer than 2000 tokens. When paragraphs are segmented during preprocessing, some relations might span across the segmented parts, leading to incomplete extraction. To address this, we re-extract relations from the middle portions of previously segmented text to capture any new or missed relationships that may not have been fully identified in the initial extraction. This ensures that all relevant relationships within the text are accurately identified and included in the final KG.</p><p>We process medical knowledge data from ontology files downloaded from official websites. ORDO encompasses rare diseases that have been discovered up to the current day. From this ontology, we extract the names and definitions of all rare diseases. Mondo offers a unified medical terminology covering various medical concepts, from which we extract all disease, symptom, and sign concepts along with their definitions. In addition, HOOM is an ontology that annotates the relationships between clinical entities and phenotypic abnormalities and reports their frequencies of occurrence. We extract information from HOOM as triples, consisting of (rare disease, frequency, and phenotype). After preprocessing the ontology files, we can easily integrate medical knowledge from these ontologies into LLMs to enhance their knowledge base.</p><p>Data in the RareDis dataset also requires preprocessing for evaluation. The input texts in RareDis are all shorter than 512 tokens and consist of single paragraphs from medical literature, which contains a total of 1040 data elements (texts and labels). We corrected some errors in the annotations of the original dataset. To evaluate performance and compare it with the fine-tuning baseline, we divided the dataset into training, validation, and test sets in a 6:2:2 ratio, resulting in 624, 208, and 208 entries, respectively. The training set is used for training fine-tuning models and selecting some exemplars for LLMs, while the validation set is used to select the best fine-tuning models during training. In alignment with our task definition, we merged &#x201C;Symptom&#x201D; and &#x201C;Sign&#x201D; from the original dataset into one entity type, &#x201C;symptom_and_sign.&#x201D; We named the newly processed dataset RareDis2023.</p></sec><sec id="s2-6"><title>Entity Extraction</title><p>After preprocessing, AutoRD subsequently carries out entity extraction. We drew inspiration from the concept of chain-of-thought (CoT) [<xref ref-type="bibr" rid="ref27">27</xref>] to structure the entity extraction process. CoT proposes that tackling complex problems step by step can enhance the performance of LLMs. Similarly, we divided entity extraction into 3 substeps. In each step, an LLM completes a specific, smaller task. This division of the task allows us to integrate external medical knowledge more effectively from ontologies into the LLMs during this process. The 3 steps are extract medical terms, extract more terms, and extract entities.</p><p>The first step, extract medical terms, extracts basic medical terms from the text. We only use a string-match algorithm with negation detection in this process. A dictionary is built from the medical ontology, Mondo. We use Mondo here because it encompasses nearly all standard medical terms. For each text, we use a string-match algorithm to search any medical terms in ontologies and save candidate medical terms as temporary results. For negation detection, we initially make a list of negation keywords manually, followed by the creation of a regular expression template. This template is then used to identify these keywords and extract the complete terms together. In this ontology, many terms include a free-text definition useful for model comprehension, so we make use of this information in subsequent steps.</p><p>The next step, extracting more terms, uses LLMs. The prompt template can be found in the left side of <xref ref-type="fig" rid="figure2">Figure 2</xref>. We input the original text and medical terms extracted from the previous step into the LLMs. The LLMs then output additional medical terms. These include terms that are medically relevant but did not directly match an ontology term, including lemmatizations. This process leverages the strong language comprehension capabilities of LLMs for more flexible term extraction. In this step, LLMs also identify anaphors. In the prompt, we first outline the specifics of the current task and provide clear definitions of the entity types. In addition, we include guidelines for the LLMs on identifying entities that are difficult to recognize. This part is significant and can be continuously improved by medical experts based on the performance of the LLMs and the results they produce. In many cases, LLMs may have misunderstandings in this task, so we need to use prompts to adjust for and correct their interpretations. Finally, we define the output format of the LLMs to be easily parsed, such as in JSON format.</p><p>The final step, entity extraction, also involves the use of LLMs. The instruction prompt template can be found in the central part of <xref ref-type="fig" rid="figure2">Figure 2</xref>. The input for this step includes the medical terms and anaphors extracted during the previous step, while the output is comprised of all extracted entities with their appropriate categorizations. The framework of the prompt is formatted like earlier steps; however, additional external information is incorporated into the prompt slots, including medical terms, exemplars, anaphors, and rare disease knowledge. Here, &#x201C;rare disease knowledge&#x201D; refers to terms that can be definitively classified as rare diseases, achieved by matching candidate entities with terms in ORDO.</p><p>Furthermore, we use the concept of in-context learning [<xref ref-type="bibr" rid="ref28">28</xref>]. In-context learning uses exemplars to enhance the performance of LLMs. Each exemplar is a gold input-output pair, demonstrating the correct method of processing input and generating output for LLMs. This approach is beneficial for guiding the output format of LLMs and providing them with reference material and knowledge to inform their responses. Exemplars can be randomly selected from the training set. After completing these 3 steps, we can extract entities from text using LLMs.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Content of all prompt templates in automated rare disease mining. This figure presents the simplified content of all prompts to provide a clear framework of the prompt structure. The black text represents the original text of the instructions. Gray text indicates a summary of each part of the instructions. Blue text highlights the prompt slots, where external information and inputs can be inserted.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e60665_fig02.png"/></fig></sec><sec id="s2-7"><title>Relation Extraction</title><p>In our methodology, relation extraction is conducted after entity extraction. All identified entities are fed into LLMs, which then output the extracted relations. The prompt template used for instructing the LLMs is depicted in the central part of <xref ref-type="fig" rid="figure2">Figure 2</xref>. The underlying logic of this process is akin to that of entity extraction. In the prompt, we initially provide an overview of the current task and establish clear definitions for both entity and relation types. We also include additional considerations for the LLMs to consider during relation extraction. Finally, we define the output format for the LLMs, which is structured to be easily parsed in JSON format. The prompt also contains examples of relation extraction.</p><p>For the extraction of rare disease knowledge, we use HOOM, an ontology that consists of rare disease-phenotype triples. This ontology provides information on symptoms and signs associated with rare diseases. We use rare diseases as keys to construct a dictionary, enabling the identification of related triples through string matching. This external medical knowledge aids the LLMs in acquiring information about existing relationships between rare diseases and certain phenotypes.</p></sec><sec id="s2-8"><title>Entity Calibration</title><p>Our goal is to construct a KG based on the extraction results. We consider that many entities might not have defined relationships with other entities. Moreover, after analyzing the extraction results, we observed that entities without any relationships are more likely to be irrelevant or falsely ascribed as medical entities within the context. For instance, the system identifies the term &#x201C;disorder&#x201D; during the entity extraction phase. However, in the relation extraction, the system fails to detect any &#x201C;anaphora&#x201D; or other relations, indicating that it is merely a generic term and can be disregarded in this context. In other instances, some false symptoms and signs are also effectively eliminated. Therefore, we introduce entity calibration as an additional step after relation extraction. The prompt template for this task can be seen on the right side of <xref ref-type="fig" rid="figure2">Figure 2</xref>. In this step, we provide all results obtained from the previous steps and use the LLMs to reanalyze the relationships, filtering out unrelated entities. By combining the results from both entity and relation extraction phases, we obtain the comprehensive outcome of the entire extraction process.</p></sec><sec id="s2-9"><title>Knowledge Graph Construction</title><p>After extracting entities and relations, we postprocess the data to prepare for KG construction. This includes aligning entities, which involves merging identical nodes in the KG. For each triple, we assess whether the subjects or objects are the same. We begin by converting the names to lowercase and then determining if they match. In addition, we transform all anaphoric relations to their original names.</p><p>After postprocessing, we can easily construct the KG based on these triples. Specifically, we use Neo4j [<xref ref-type="bibr" rid="ref29">29</xref>] for this purpose. Neo4j is a highly flexible and scalable graph database, designed to store and process complex networks of data. It enables efficient querying and management of interconnected information. Using the application programming interface of Neo4j, we add the rare disease triples into the graph database one by one. As a result, we can visualize our rare disease KG within the Neo4j platform.</p></sec><sec id="s2-10"><title>Evaluation</title><p>For the entity and relation extraction component, we quantitatively evaluated the performance of AutoRD using the processed RareDis2023 dataset.</p><p>Regarding our method, AutoRD, we specifically selected &#x201C;gpt-4&#x2010;0613,&#x201D; a version of GPT-4 from OpenAI, for the LLMs. We set the LLM&#x2019;s temperature to 0 to ensure the most stable output. For each prediction with exemplars, we randomly chose 5 exemplars from the training set. The performance of AutoRD is evaluated exclusively on the test set, and detailed prompt templates are available in the source code.</p><p>To analyze the improvement our method brings compared with using only LLMs, we evaluate the performance of the base LLM. We use the same LLM, &#x201C;gpt-4&#x2010;0613,&#x201D; and maintain all other settings identical to AutoRD. The prompts were developed collaboratively by clinicians and computer engineers. The detailed prompt template can be found in the source code.</p><p>For our fine-tuning model baseline, we selected BioClinicalBERT [<xref ref-type="bibr" rid="ref30">30</xref>]. Entity recognition is performed through token classification based on BIO labels, and relationships are identified by concatenating the embeddings of two entities, followed by a linear classification. This model is trained on the training set, optimized according to the validation set, and finally evaluated on the test set. Detailed experimental settings are available in the source code.</p><p>In terms of evaluation metrics, we use Precision, Recall, and <italic>F</italic><sub>1</sub>-score metrics in a named entity recognition setting. For entity and relation extraction, we measure entity <italic>F</italic><sub>1</sub>-score and relation <italic>F</italic><sub>1</sub>-score, respectively. The final overall results are represented by the overall <italic>F</italic><sub>1</sub>-score, calculated as the mean of entity <italic>F</italic><sub>1</sub>-score and relation <italic>F</italic><sub>1</sub>-score. We consider replicated entities in our extraction measurements, which are instances of the same entity occurring at different positions within the text. If the name of an extracted entity is correct, we regard it as true. The evaluation of relation extraction is not limited to correctly identified entities and for all true entities. We use the average score of entity <italic>F</italic><sub>1</sub>-score and relation <italic>F</italic><sub>1</sub>-score as the overall evaluation metric because both tasks are essential to AutoRD&#x2019;s performance. In some scenarios, simply identifying key rare disease entities is sufficient, while in others, understanding their relationships is equally important. By averaging these scores, we ensure a balanced assessment of the system&#x2019;s effectiveness across different scenarios.</p><p>In the test set, the entity instances include the follow number of each type: 463 &#x201C;disease,&#x201D; 1054 &#x201C;rare_disease,&#x201D; 1255 &#x201C;symptoms_and_signs,&#x201D; and 334 &#x201C;anaphor.&#x201D; Numbers of each type of relation include 1261 &#x201C;produces,&#x201D; 62 &#x201C;increases_risk_of,&#x201D; 188 &#x201C;is_a,&#x201D; 55 &#x201C;is_acron,&#x201D; 22 &#x201C;is_synon,&#x201D; and 331 &#x201C;anaphora.&#x201D;</p><p>For the KG construction, we conducted qualitative experiments and provide examples of the KG results. In this case, we only used the extraction results from the test dataset of RareDis2023.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Main Experimental Results</title><p>The primary experimental results are presented in <xref ref-type="table" rid="table3">Table 3</xref>, which includes comparisons of entity and relation extraction performance among BioClinicalBERT (a fine-tuning model), Base GPT-4 (a base LLM), and AutoRD (our method). Overall, AutoRD achieves an overall <italic>F</italic><sub>1</sub>-score of 47.3%. The system demonstrates superior performance over these two baselines, with an improvement of 0.8% in overall <italic>F</italic><sub>1</sub>-score compared with the fine-tuning model and a 14.4% improvement compared with the base LLM. Recall is deemed more important than precision in this context because human effort can be used to validate extracted results. The primary goal is to extract all gold entities initially. In terms of recall, our overall recall improved by 18.4% compared with Base GPT-4 and by 6.6% compared with the fine-tuning models. For each extraction objective, AutoRD achieves an overall entity extraction <italic>F</italic><sub>1</sub>-score of 56.1% (&#x201C;rare_disease&#x201D;: 83.5%, &#x201C;disease&#x201D;: 35.8%, &#x201C;symptom_and_sign&#x201D;: 46.1%, &#x201C;anaphor&#x201D;: 67.5%) and an overall relation extraction <italic>F</italic><sub>1</sub>-score of 38.6% (&#x201C;produces&#x201D;: 34.7%, &#x201C;increases_risk_of&#x201D;: 12.4%, &#x201C;is_a&#x201D;: 37.4%, &#x201C;is_acronym&#x201D;: 44.1%, &#x201C;is_synonym&#x201D;: 16.3%, &#x201C;anaphora&#x201D;: 57.5%).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>The main experimental results of entity and relation extraction. Our methods surpass both the fine-tuning model (BioClinical-BERT) and the base LLM (Base GPT-4) in terms of overall <italic>F</italic><sub>1</sub>-score (%). Bolded values indicate overall performance as represented by the <italic>F</italic><sub>1</sub>-score.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Method</td><td align="left" valign="bottom" colspan="2">Type</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td></tr></thead><tbody><tr><td align="left" valign="top">BioClinicalBERT</td><td align="left" valign="top" colspan="2">Entity</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;rare_disease</td><td align="left" valign="top">80.5</td><td align="left" valign="top">87.7</td><td align="left" valign="top">83.9</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;disease</td><td align="left" valign="top">53.2</td><td align="left" valign="top">46.0</td><td align="left" valign="top">49.3</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;symptom_and_sign</td><td align="left" valign="top">62.3</td><td align="left" valign="top">62.5</td><td align="left" valign="top">62.4</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;anaphor</td><td align="left" valign="top">89.9</td><td align="left" valign="top">93.7</td><td align="left" valign="top">91.7</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;entity_overall</td><td align="left" valign="top">70.9</td><td align="left" valign="top">72.0</td><td align="left" valign="top">71.4</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Relation</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;produces</td><td align="left" valign="top">49.7</td><td align="left" valign="top">13.6</td><td align="left" valign="top">21.4</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;increases_risk_of</td><td align="left" valign="top">0.0</td><td align="left" valign="top">0.0</td><td align="left" valign="top">0.0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;is_a</td><td align="left" valign="top">80.0</td><td align="left" valign="top">4.3</td><td align="left" valign="top">8.1</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;is_acron</td><td align="left" valign="top">0.0</td><td align="left" valign="top">0.0</td><td align="left" valign="top">0.0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;is_synon</td><td align="left" valign="top">0.0</td><td align="left" valign="top">0.0</td><td align="left" valign="top">0.0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;anaphora</td><td align="left" valign="top">82.9</td><td align="left" valign="top">23.3</td><td align="left" valign="top">36.3</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;relation_overall</td><td align="left" valign="top">57.0</td><td align="left" valign="top">13.4</td><td align="left" valign="top">21.7</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Overall</td><td align="left" valign="top">64.0</td><td align="left" valign="top">42.7</td><td align="left" valign="top"><bold>46.5</bold></td></tr><tr><td align="left" valign="top">Base GPT4</td><td align="left" valign="top" colspan="2">Entity</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;rare_disease</td><td align="left" valign="top">94.8</td><td align="left" valign="top">38.4</td><td align="left" valign="top">54.7</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;disease</td><td align="left" valign="top">22.5</td><td align="left" valign="top">59.8</td><td align="left" valign="top">32.7</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;symptom_and_sign</td><td align="left" valign="top">48.7</td><td align="left" valign="top">41.7</td><td align="left" valign="top">44.9</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;anaphor</td><td align="left" valign="top">45.2</td><td align="left" valign="top">69.5</td><td align="left" valign="top">54.7</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;entity_overall</td><td align="left" valign="top">43.2</td><td align="left" valign="top">46.3</td><td align="left" valign="top">44.7</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Relation</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;produces</td><td align="left" valign="top">26.5</td><td align="left" valign="top">3.3</td><td align="left" valign="top">5.8</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;increases_risk_of</td><td align="left" valign="top">9.6</td><td align="left" valign="top">8.1</td><td align="left" valign="top">8.8</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;is_a</td><td align="left" valign="top">33.0</td><td align="left" valign="top">30.9</td><td align="left" valign="top">31.9</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;is_acron</td><td align="left" valign="top">17.1</td><td align="left" valign="top">21.8</td><td align="left" valign="top">19.2</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;is_synon</td><td align="left" valign="top">0.0</td><td align="left" valign="top">0.0</td><td align="left" valign="top">0.0</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;anaphora</td><td align="left" valign="top">41.7</td><td align="left" valign="top">55.3</td><td align="left" valign="top">47.6</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;relation_overall</td><td align="left" valign="top">32.5</td><td align="left" valign="top">15.6</td><td align="left" valign="top">21.1</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Overall</td><td align="left" valign="top">37.9</td><td align="left" valign="top">30.9</td><td align="left" valign="top"><bold>32.9</bold></td></tr><tr><td align="left" valign="top">AutoRD (ours)</td><td align="left" valign="top" colspan="2">Entity</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;rare_disease</td><td align="left" valign="top">93.1</td><td align="left" valign="top">75.6</td><td align="left" valign="top">83.5</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;disease</td><td align="left" valign="top">26.6</td><td align="left" valign="top">54.9</td><td align="left" valign="top">35.8</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;symptom_and_sign</td><td align="left" valign="top">45.8</td><td align="left" valign="top">46.5</td><td align="left" valign="top">46.1</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;anaphor</td><td align="left" valign="top">59.0</td><td align="left" valign="top">79.0</td><td align="left" valign="top">67.5</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;entity_overall</td><td align="left" valign="top">51.8</td><td align="left" valign="top">61.1</td><td align="left" valign="top">56.1</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Relation</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;produces</td><td align="left" valign="top">37.2</td><td align="left" valign="top">32.4</td><td align="left" valign="top">34.7</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;increases_risk_of</td><td align="left" valign="top">11.8</td><td align="left" valign="top">13.1</td><td align="left" valign="top">12.4</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;is_a</td><td align="left" valign="top">41.4</td><td align="left" valign="top">34.0</td><td align="left" valign="top">37.4</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;is_acron</td><td align="left" valign="top">49.2</td><td align="left" valign="top">40.0</td><td align="left" valign="top">44.1</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;is_synon</td><td align="left" valign="top">12.8</td><td align="left" valign="top">22.7</td><td align="left" valign="top">16.3</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;anaphora</td><td align="left" valign="top">52.4</td><td align="left" valign="top">63.7</td><td align="left" valign="top">57.5</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x2003;relation_overall</td><td align="left" valign="top">39.8</td><td align="left" valign="top">37.5</td><td align="left" valign="top">38.6</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="2">Overall</td><td align="left" valign="top">45.8</td><td align="left" valign="top">49.3</td><td align="left" valign="top"><bold>47.3</bold></td></tr></tbody></table></table-wrap><p>First, comparing Base GPT-4 with BioClinicalBERT reveals that BioClinicalBERT, with its ample training data, aligns well with the original dataset&#x2019;s distribution and excels in multiple metrics. However, LLMs were not trained to fit this distribution. This discrepancy leads to issues such as misclassifications and ambiguous entity boundaries in LLMs. In relation extraction, however, the fine-tuned model fails to detect relations like &#x201C;increases_risk_of,&#x201D; &#x201C;is_acron,&#x201D; and &#x201C;is_synon,&#x201D; whereas Base GPT-4 detects all but &#x201C;is_synon.&#x201D; This demonstrates the strong zero-shot capabilities of LLMs, especially for relations sparsely represented in the training set.</p><p>When comparing AutoRD with BioClinicalBERT, it is apparent that while our method falls somewhat short in entity extraction, it excels in relation extraction. Specifically, the entity <italic>F</italic><sub>1</sub>-score is 15.1% lower than this baseline, but the relation <italic>F</italic><sub>1</sub>-score is 16.9% higher. This results in an overall performance that is 0.8% better. Relation extraction plays a pivotal role in the construction of KGs, as it is essential toward understanding the underlying relationships between entities. AutoRD leverages the few-shot learning capability of LLMs to better analyze relationships between medical entities. However, we observed a lower precision in AutoRD, primarily because it identifies too many entities as &#x201C;diseases&#x201D; and sometimes misclassifies &#x201C;symptom_and_sign according to its extraction results.</p><p>Furthermore, when comparing AutoRD with Base GPT-4, it is evident that our method significantly improves performance by 18.4%. The most notable improvement is a 37.2% increase in the recall of rare disease entities from Base GPT-4. Base GPT-4, with its poor analysis capability and lack of sufficient medical knowledge, struggles to identify all types of rare diseases. Overall, our approach demonstrates substantial improvements in most metrics.</p></sec><sec id="s3-2"><title>Ablation Study</title><p>We conduct an ablation study to analyze the contribution of various components within AutoRD to the overall system. The results are presented in <xref ref-type="table" rid="table4">Table 4</xref>, which clearly shows that each key component contributes to the improvement of AutoRD. Note that &#x201C;Knowledge&#x201D; represents the external knowledge sourced from medical ontologies, while &#x201C;Notice&#x201D; refers to the reminders for the LLMs. This study suggests that AutoRD can effectively use knowledge from both medical ontologies and exemplars. The &#x201C;Notice&#x201D; component brings a significant improvement of 8.1% in overall <italic>F</italic><sub>1</sub>-score. The current notices for LLMs in AutoRD have been carefully fine-tuned, demonstrating their effectiveness in adjusting and correcting for the LLMs&#x2019; interpretations for extraction tasks.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>The results of the ablation experiment. It clearly shows that each key component contributes to the improvement of automated rare disease mining in terms of <italic>F</italic><sub>1</sub>-score (%). The symbol &#x201C;&#x2207;&#x201D; represents the magnitude of the performance drop.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Method</td><td align="left" valign="bottom">Entity <italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">&#x2207;</td><td align="left" valign="bottom">Relation <italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">&#x2207;</td><td align="left" valign="bottom">Overall <italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">&#x2207;</td></tr></thead><tbody><tr><td align="left" valign="top">AutoRD<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> (Ours)</td><td align="left" valign="top">56.1</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="left" valign="top">38.6</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">47.3</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">AutoRD without knowledge</td><td align="left" valign="top">53.8</td><td align="left" valign="top">&#x2212;2.3</td><td align="left" valign="top">36.1</td><td align="left" valign="top">&#x2212;2.5</td><td align="left" valign="top">45.0</td><td align="left" valign="top">&#x2212;2.3</td></tr><tr><td align="left" valign="top">AutoRD without exemplars</td><td align="left" valign="top">52.9</td><td align="left" valign="top">&#x2212;3.2</td><td align="left" valign="top">34.9</td><td align="left" valign="top">&#x2212;3.7</td><td align="left" valign="top">43.9</td><td align="left" valign="top">&#x2212;3.4</td></tr><tr><td align="left" valign="top">AutoRD without notice</td><td align="left" valign="top">44.7</td><td align="left" valign="top">&#x2212;11.4</td><td align="left" valign="top">33.7</td><td align="left" valign="top">&#x2212;4.9</td><td align="left" valign="top">39.2</td><td align="left" valign="top">&#x2212;8.1</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>AutoRD: automated rare disease mining.</p></fn><fn id="table4fn2"><p><sup>b</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Error Analysis</title><p>We perform error analysis for each entity and relation type. To illustrate the distribution of extraction results, we use two confusion matrices: one for entity extraction results and the other for relation extraction results. The results are shown in <xref ref-type="fig" rid="figure3">Figures 3</xref> and <xref ref-type="fig" rid="figure4">4</xref>, respectively. The term &#x201C;Error&#x201D; in the &#x201C;Predicted&#x201D; axis refers to entities that have been incorrectly extracted, whereas &#x201C;Error&#x201D; in the &#x201C;True&#x201D; axis denotes real entities that were not extracted. We exclude replicated entities to simplify the computation of the confusion matrices.</p><p>In the entity extraction confusion matrix, there is significant confusion between the categories of &#x201C;disease&#x201D; and &#x201C;rare_disease,&#x201D; possibly due to overlapping textual features. The &#x201C;symptom_and_sign&#x201D; category exhibits high classification accuracy but is also prone to being misclassified as &#x201C;Error,&#x201D; suggesting the need for more distinctive features or additional contextual information in the dataset. The &#x201C;anaphor&#x201D; category was accurately classified with fewer errors, indicating that the system effectively captures its linguistic features. However, many predicted entities are incorrectly extracted and are labeled as &#x201C;Error,&#x201D; indicating that LLMs tend to extract more information with a low precision.</p><p>The confusion matrix for the relation extraction indicates varying degrees of performance across different categories. The &#x201C;produces&#x201D; relation is often identified correctly but also often misclassified as &#x201C;Error,&#x201D; indicating recognition issues. &#x201C;increases_risk_of&#x201D; is more frequently an &#x201C;Error&#x201D; than correct, demonstrating the recognition difficulty of this relation. &#x201C;is_a&#x201D; has moderate success but high error rates as well. &#x201C;is_acron&#x201D; and &#x201C;is_synon&#x201D; rarely hit true positives and mostly fall into &#x201C;Error,&#x201D; possibly due to acronym variability and synonym recognition failure.</p><p>&#x201C;anaphora&#x201D; resolution is relatively accurate but also misclassified, hinting at context comprehension challenges. The &#x201C;Error&#x201D; category&#x2019;s high rate of both true and false positives is primarily affected by the false results from entity extraction step before it.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>The confusion matrix of the entity extraction task results in RareDis2023.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e60665_fig03.png"/></fig><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>The confusion matrix of the relation extraction task results in RareDis2023.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e60665_fig04.png"/></fig></sec><sec id="s3-4"><title>Qualitative Results</title><p>Qualitative results are showcased in <xref ref-type="fig" rid="figure5">Figure 5</xref>, which depicts all the extracted results from the RareDis2023 dataset. Our qualitative results have been validated by medical experts and have shown satisfactory outcomes. This visualization provides a global perspective, highlighting the relationships among various rare diseases and their associated signs and symptoms in a concise KG.</p><p>Specific extraction results of the KG are depicted in <xref ref-type="fig" rid="figure6">Figure 6</xref>. This figure offers visualizations from a local perspective, illustrating an ideal structure of the KG. In this structure, rare diseases are positioned at the center of radial formations, with connections extending to entities like symptoms and signs. For example, in the rare disease &#x201C;Turcot syndrome&#x201D; is associated with &#x201C;abdominal pain,&#x201D; &#x201C;bleeding,&#x201D; &#x201C;fatigue,&#x201D; etc.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>An example of constructed knowledge graph from RareDis2023. The result is a clear and well-structured knowledge graph.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e60665_fig05.png"/></fig><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>An example provides a detailed view of a specific local section of the constructed knowledge graph.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v12i1e60665_fig06.png"/></fig><p>In addition, we experimented with training-specialized medical LLMs and compared their performance. Specifically, we used Camel-Platypus2-70B [<xref ref-type="bibr" rid="ref31">31</xref>], a health care&#x2013;tailored model that is an extension of Llama-2 [<xref ref-type="bibr" rid="ref32">32</xref>], through continuous training. Our experiments revealed that, without specific training, this type of model struggles to execute complex tasks, such as joint entity and relation extraction. It appears that the inherent medical knowledge is not readily applicable in these scenarios.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>Our experimentation demonstrates the effectiveness of our proposed system, AutoRD. It significantly improves upon the base LLM and even outperforms fine-tuning models without requiring any training. Within several designs, the incorporation of medical ontologies has notably enhanced the LLMs by addressing gaps in medical knowledge. Furthermore, the results achieved in KG construction by our system are commendable. We highlight the advantage of LLMs in low-resource scenarios such as rare disease extraction, showcasing their vast potential. Our meticulously designed system, AutoRD, substantiates this claim. The emergence of LLMs is generating unparalleled opportunities in the phenotyping of rare diseases. These models facilitate the automatic identification and extraction of concepts related to these diseases. Our prompts are easily adjustable due to their clear structure, allowing for simple modifications. In addition, medical knowledge derived from external sources can be updated at any time within the AutoRD system.</p></sec><sec id="s4-2"><title>Limitations</title><p>Nevertheless, there is considerable potential for further improvement with respect to AutoRD. For instance, integrating advanced text processing tools and specialized medical tools into our system could amplify its capabilities. In the future, we can deploy more powerful medical LLMs as base models to enhance medical understanding. Moreover, medical experts can contribute more tailored prompts to improve LLMs&#x2019; performance.</p><p>Our work has potential limitations and avenues for extension. For example, we have only evaluated AutoRD on a single dataset, so the results may not fully reflect the system&#x2019;s performance across the entire spectrum of rare diseases or in other long-text scenarios. In addition, the prompts we designed are intuitive, but there is still room for continuous tuning and experimentation of different prompts. We acknowledge that AutoRD may not be the optimal LLM application for this task, yet it significantly improves upon the baseline performance of LLMs. This work aims at demonstrating the potential of LLM applications in the healthcare field.</p></sec><sec id="s4-3"><title>Conclusions</title><p>AutoRD represents a significant advancement in the extraction of rare disease information, directly addressing the critical gaps associated with common LLMs used in rare disease medical research. By streamlining the process of building comprehensive KGs from unstructured medical texts, AutoRD tackles the substantial burden placed on patients and health care systems due to prolonged and costly diagnostic processes associated with rare diseases. By integrating ontology-enhanced LLMs, AutoRD overcomes the limitations of existing systems, particularly the significant human effort required for curation and maintenance of rare disease databases and the inability to handle complex and up-to-date rare disease information effectively.</p><p>Our experimental results demonstrate the system&#x2019;s effectiveness, achieving a 14.4% improvement over the most advanced LLM in both entity and relation extraction <italic>F</italic><sub>1</sub>-scores. This enhancement effectively fills critical gaps in rare disease research by providing an automated method to support the establishment and enhancement of rare disease medical knowledge systems. By leveraging LLMs&#x2019; strong zero-shot capabilities and integrating medical knowledge from ontologies, AutoRD contributes to a more robust and comprehensive medical knowledge base, ultimately facilitating faster diagnoses and improved management of rare diseases.</p><p>This study highlights AutoRD&#x2019;s potential to transform rare disease diagnostics and treatment by offering a scalable, automated solution for medical information extraction. The enhanced precision and recall in identifying rare disease entities and their relationships provide valuable insights for health care professionals, ultimately supporting better clinical decision-making and improved patient outcomes. Furthermore, AutoRD&#x2019;s flexible architecture incorporating techniques such as CoT and prompt engineering, offers promising opportunities for adaptation in other health care domains, especially in low-resource environments where medical expertise may be scarce.</p><p>In conclusion, AutoRD not only elevates the accuracy and efficiency of rare disease information extraction but also paves the way for future applications in medical diagnostics and personalized health care. By bridging the gap between vast unstructured medical data and actionable knowledge, AutoRD stands to significantly impact the fight against rare diseases, offering renewed hope to patients and clinicians alike as we move toward a future where advanced artificial intelligence technologies play a central role in health care innovation.</p></sec></sec></body><back><ack><p>This project has been funded by the Jump ARCHES endowment through the Health Care Engineering Systems Center at Illinois and the Order of Saint Francis Foundation.</p></ack><fn-group><fn fn-type="con"><p>LC designed the main method, wrote codes, conducted experiments, and wrote the manuscript. AC and JS participated in the design, analysis, and discussion of the experiment and helped to improve the method. AC and JS also assisted with revisions to the manuscript.</p></fn><fn fn-type="conflict"><p>JS was an associate editor for <italic>JMIR AI</italic> at the time of this publication.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AutoRD</term><def><p>automatic rare disease mining system</p></def></def-item><def-item><term id="abb2">CoT</term><def><p>chain-of-thought</p></def></def-item><def-item><term id="abb3">HOOM</term><def><p>Human Phenotype Ontology-Orphanet Rare Disease Ontology Module</p></def></def-item><def-item><term id="abb4">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb5">Mondo</term><def><p>Mondo Disease Ontology</p></def></def-item><def-item><term id="abb6">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb7">ORDO</term><def><p>Orphanet Rare Disease Ontology</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marwaha</surname><given-names>S</given-names> </name><name name-style="western"><surname>Knowles</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Ashley</surname><given-names>EA</given-names> </name></person-group><article-title>A guide for the diagnosis of rare and undiagnosed disease: beyond the exome</article-title><source>Genome Med</source><year>2022</year><month>02</month><day>28</day><volume>14</volume><issue>1</issue><fpage>23</fpage><pub-id pub-id-type="doi">10.1186/s13073-022-01026-w</pub-id><pub-id pub-id-type="medline">35220969</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Boat</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Field</surname><given-names>MJ</given-names> </name></person-group><source>Rare Diseases and Orphan Products: Accelerating Research and Development</source><year>2011</year><publisher-name>National Academies Press</publisher-name></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nguengang Wakap</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lambert</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Olry</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Estimating cumulative point prevalence of rare diseases: analysis of the Orphanet database</article-title><source>Eur J Hum Genet</source><year>2020</year><month>02</month><volume>28</volume><issue>2</issue><fpage>165</fpage><lpage>173</lpage><pub-id pub-id-type="doi">10.1038/s41431-019-0508-0</pub-id><pub-id pub-id-type="medline">31527858</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Haendel</surname><given-names>M</given-names> </name><name name-style="western"><surname>Vasilevsky</surname><given-names>N</given-names> </name><name name-style="western"><surname>Unni</surname><given-names>D</given-names> </name><etal/></person-group><article-title>How many rare diseases are there?</article-title><source>Nat Rev Drug Discov</source><year>2020</year><month>02</month><volume>19</volume><issue>2</issue><fpage>77</fpage><lpage>78</lpage><pub-id pub-id-type="doi">10.1038/d41573-019-00180-y</pub-id><pub-id pub-id-type="medline">32020066</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="web"><article-title>Rare diseases: although limited, available evidence suggests medical and other costs can be substantial</article-title><year>2021</year><access-date>2024-12-11</access-date><publisher-name>U.S. Government Accountability Office</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.gao.gov/assets/gao-22-104235.pdf">https://www.gao.gov/assets/gao-22-104235.pdf</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tisdale</surname><given-names>A</given-names> </name><name name-style="western"><surname>Cutillo</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Nathan</surname><given-names>R</given-names> </name><etal/></person-group><article-title>The IDeaS initiative: pilot study to assess the impact of rare diseases on patients and healthcare systems</article-title><source>Orphanet J Rare Dis</source><year>2021</year><month>10</month><day>22</day><volume>16</volume><issue>1</issue><fpage>429</fpage><pub-id pub-id-type="doi">10.1186/s13023-021-02061-3</pub-id><pub-id pub-id-type="medline">34674728</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ferreira</surname><given-names>CR</given-names> </name></person-group><article-title>The burden of rare diseases</article-title><source>Am J Med Genet A</source><year>2019</year><month>06</month><volume>179</volume><issue>6</issue><fpage>885</fpage><lpage>892</lpage><pub-id pub-id-type="doi">10.1002/ajmg.a.61124</pub-id><pub-id pub-id-type="medline">30883013</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bodenreider</surname><given-names>O</given-names> </name></person-group><article-title>The Unified Medical Language System (UMLS): integrating biomedical terminology</article-title><source>Nucleic Acids Res</source><year>2004</year><month>01</month><day>1</day><volume>32</volume><fpage>D267</fpage><lpage>D270</lpage><pub-id pub-id-type="doi">10.1093/nar/gkh061</pub-id><pub-id pub-id-type="medline">14681409</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>K&#x00F6;hler</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gargano</surname><given-names>M</given-names> </name><name name-style="western"><surname>Matentzoglu</surname><given-names>N</given-names> </name><etal/></person-group><article-title>The Human Phenotype Ontology in 2021</article-title><source>Nucleic Acids Res</source><year>2021</year><month>01</month><day>8</day><volume>49</volume><issue>D1</issue><fpage>D1207</fpage><lpage>D1217</lpage><pub-id pub-id-type="doi">10.1093/nar/gkaa1043</pub-id><pub-id pub-id-type="medline">33264411</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="web"><article-title>Orphanet</article-title><source>Orphanet</source><access-date>2023-11-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.orpha.net/consor/cgi-bin/index.php">https://www.orpha.net/consor/cgi-bin/index.php</ext-link></comment></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Zhao</surname><given-names>WX</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>K</given-names> </name><name name-style="western"><surname>Li</surname><given-names>J</given-names> </name><etal/></person-group><article-title>A survey of large language models</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 31, 2023</comment><pub-id pub-id-type="doi">arXiv:2303.18223</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><collab>OpenAI</collab><name name-style="western"><surname>Achiam</surname><given-names>J</given-names> </name><name name-style="western"><surname>Adler</surname><given-names>S</given-names> </name><etal/></person-group><article-title>GPT-4 technical report</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 15, 2023</comment><pub-id pub-id-type="doi">arXiv:2303.08774</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Karabacak</surname><given-names>M</given-names> </name><name name-style="western"><surname>Margetis</surname><given-names>K</given-names> </name></person-group><article-title>Embracing large language models for medical applications: opportunities and challenges</article-title><source>Cureus</source><year>2023</year><month>05</month><volume>15</volume><issue>5</issue><fpage>e39305</fpage><pub-id pub-id-type="doi">10.7759/cureus.39305</pub-id><pub-id pub-id-type="medline">37378099</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Kasai</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kasai</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Sakaguchi</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Evaluating GPT-4 and ChatGPT on Japanese medical licensing examinations</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 31, 2023</comment><pub-id pub-id-type="doi">arXiv:2303.18027</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Nori</surname><given-names>H</given-names> </name><name name-style="western"><surname>King</surname><given-names>N</given-names> </name><name name-style="western"><surname>McKinney</surname><given-names>SM</given-names> </name><etal/></person-group><article-title>Capabilities of GPT-4 on medical challenge problems</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 20, 2023</comment><pub-id pub-id-type="doi">arXiv:2303.13375</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kung</surname><given-names>TH</given-names> </name><name name-style="western"><surname>Cheatham</surname><given-names>M</given-names> </name><name name-style="western"><surname>Medenilla</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Performance of ChatGPT on USMLE: Potential for AI-assisted medical education using large language models</article-title><source>PLOS Digit Health</source><year>2023</year><month>02</month><volume>2</volume><issue>2</issue><fpage>e0000198</fpage><pub-id pub-id-type="doi">10.1371/journal.pdig.0000198</pub-id><pub-id pub-id-type="medline">36812645</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>R</given-names> </name></person-group><article-title>How far is language model from 100% few-shot named entity recognition in medical domain</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 1, 2023</comment><pub-id pub-id-type="doi">arXiv:2307.00186</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sanjak</surname><given-names>J</given-names> </name><name name-style="western"><surname>Binder</surname><given-names>J</given-names> </name><name name-style="western"><surname>Yadaw</surname><given-names>AS</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Math&#x00E9;</surname><given-names>EA</given-names> </name></person-group><article-title>Clustering rare diseases within an ontology-enriched knowledge graph</article-title><source>J Am Med Inform Assoc</source><year>2023</year><month>12</month><day>22</day><volume>31</volume><issue>1</issue><fpage>154</fpage><lpage>164</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocad186</pub-id><pub-id pub-id-type="medline">37759342</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Alsentzer</surname><given-names>E</given-names> </name><name name-style="western"><surname>Li</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Kobren</surname><given-names>SN</given-names> </name><etal/></person-group><article-title>Deep learning for diagnosing patients with rare genetic diseases</article-title><source>medRxiv</source><comment>Preprint posted online on 2022</comment><pub-id pub-id-type="doi">10.1101/2022.12.07.22283238</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rashid</surname><given-names>R</given-names> </name><name name-style="western"><surname>Copelli</surname><given-names>S</given-names> </name><name name-style="western"><surname>Silverstein</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Becich</surname><given-names>MJ</given-names> </name></person-group><article-title>REDCap and the National Mesothelioma Virtual Bank-a scalable and sustainable model for rare disease biorepositories</article-title><source>J Am Med Inform Assoc</source><year>2023</year><month>09</month><day>25</day><volume>30</volume><issue>10</issue><fpage>1634</fpage><lpage>1644</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocad132</pub-id><pub-id pub-id-type="medline">37487555</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Datta</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Paek</surname><given-names>H</given-names> </name><etal/></person-group><article-title>AutoCriteria: a generalizable clinical trial eligibility criteria extraction system powered by large language models</article-title><source>J Am Med Inform Assoc</source><year>2024</year><month>01</month><day>18</day><volume>31</volume><issue>2</issue><fpage>375</fpage><lpage>385</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocad218</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Shyr</surname><given-names>C</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Harris</surname><given-names>PA</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>H</given-names> </name></person-group><article-title>Identifying and extracting rare disease phenotypes with large language models</article-title><source>arXiv</source><comment>Preprint posted online on  Jun 22, 2023</comment><pub-id pub-id-type="doi">arXiv:2306.12656</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Oniani</surname><given-names>D</given-names> </name><name name-style="western"><surname>Hilsman</surname><given-names>J</given-names> </name><name name-style="western"><surname>Dong</surname><given-names>H</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>F</given-names> </name><name name-style="western"><surname>Verma</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name></person-group><article-title>Large language models vote: prompting for rare disease identification</article-title><source>arXiv</source><comment>Preprint posted online on  Jan 23, 2023</comment><pub-id pub-id-type="doi">arXiv:2308.12890</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Vasilevsky</surname><given-names>NA</given-names> </name><name name-style="western"><surname>Matentzoglu</surname><given-names>NA</given-names> </name><name name-style="western"><surname>Toro</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Mondo: unifying diseases for the world, by the world</article-title><source>medRxiv</source><comment>Preprint posted online on  May 3, 2022</comment><pub-id pub-id-type="doi">10.1101/2022.04.13.22273750</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mart&#x00ED;nez-deMiguel</surname><given-names>C</given-names> </name><name name-style="western"><surname>Segura-Bedmar</surname><given-names>I</given-names> </name><name name-style="western"><surname>Chac&#x00F3;n-Solano</surname><given-names>E</given-names> </name><name name-style="western"><surname>Guerrero-Aspizua</surname><given-names>S</given-names> </name></person-group><article-title>The RareDis corpus: A corpus annotated with rare diseases, their signs and symptoms</article-title><source>J Biomed Inform</source><year>2022</year><month>01</month><volume>125</volume><fpage>103961</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2021.103961</pub-id><pub-id pub-id-type="medline">34879250</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sinan</surname><given-names>I</given-names> </name><name name-style="western"><surname>Mihdawi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Farahat</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Fida</surname><given-names>M</given-names> </name></person-group><article-title>Knowledge and awareness of rare diseases among healthcare professionals in the Kingdom of Bahrain</article-title><source>Cureus</source><year>2023</year><month>10</month><volume>15</volume><issue>10</issue><fpage>e47676</fpage><pub-id pub-id-type="doi">10.7759/cureus.47676</pub-id><pub-id pub-id-type="medline">38022232</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Wei</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Schuurmans</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Chain-of-thought prompting elicits reasoning in large language models</article-title><source>arXiv</source><comment>Preprint posted online on  Jan 28, 2022</comment><pub-id pub-id-type="doi">arXiv:2201.11903</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Dong</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Li</surname><given-names>L</given-names> </name><name name-style="western"><surname>Dai</surname><given-names>D</given-names> </name><etal/></person-group><article-title>A survey on in-context learning</article-title><source>arXiv</source><comment>Preprint posted online on  Dec 31, 2022</comment><pub-id pub-id-type="doi">arXiv:2301.00234</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="web"><article-title>Neo4j graph database &#x0026; analytics</article-title><source>Neo4j</source><access-date>2024-12-11</access-date><comment><ext-link ext-link-type="uri" xlink:href="http://neo4j.org">http://neo4j.org</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Alsentzer</surname><given-names>E</given-names> </name><name name-style="western"><surname>Murphy</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Boag</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Publicly available clinical BERT embeddings</article-title><source>arXiv</source><comment>Preprint posted online on  Apr 6, 2019</comment><pub-id pub-id-type="doi">10.18653/v1/W19-1909</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>AN</given-names> </name><name name-style="western"><surname>Hunter</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Ruiz</surname><given-names>N</given-names> </name></person-group><article-title>Platypus: quick, cheap, and powerful refinement of LLMs</article-title><source>arXiv</source><comment>Preprint posted online on  Aug 14, 2023</comment><pub-id pub-id-type="doi">arXiv:2308.07317</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Touvron</surname><given-names>H</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>L</given-names> </name><name name-style="western"><surname>Stone</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Llama 2: open foundation and fine-tuned chat models</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 18, 2023</comment><pub-id pub-id-type="doi">arXiv:2307.09288</pub-id></nlm-citation></ref></ref-list></back></article>