<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e69286</article-id><article-id pub-id-type="doi">10.2196/69286</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>A Weighted Voting Approach for Traditional Chinese Medicine Formula Classification Using Large Language Models: Algorithm Development and Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Zhe</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Li</surname><given-names>Keqian</given-names></name><degrees>MM</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Peng</surname><given-names>Suyuan</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Liu</surname><given-names>Lihong</given-names></name><degrees>MM</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yang</surname><given-names>Xiaolin</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yao</surname><given-names>Keyu</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Herre</surname><given-names>Heinrich</given-names></name><degrees>Prof Dr</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Zhu</surname><given-names>Yan</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences; School of Basic Medicine, Peking Union Medical College</institution><addr-line>Beijing</addr-line><country>China</country></aff><aff id="aff2"><institution>Institute for Medical Informatics, Statistics and Epidemiology, University of Leipzig</institution><addr-line>Leipzig</addr-line><country>Germany</country></aff><aff id="aff3"><institution>School of Medical Information, Changchun University of Chinese Medicine</institution><addr-line>Changchun</addr-line><country>China</country></aff><aff id="aff4"><institution>Institute of Information on Traditional Chinese Medicine, China Academy of Chinese Medical Sciences</institution><addr-line>No 16, Nanxiao Street, Dongzhimen</addr-line><addr-line>Beijing</addr-line><country>China</country></aff><aff id="aff5"><institution>Institute for Computer Science, University of Leipzig</institution><addr-line>Leipzig</addr-line><country>Germany</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Castonguay</surname><given-names>Alexandre</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Wang</surname><given-names>Mengyang</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Li</surname><given-names>Xiaoying</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Liu</surname><given-names>Yishen</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Yan Zhu, PhD, Institute of Information on Traditional Chinese Medicine, China Academy of Chinese Medical Sciences, No 16, Nanxiao Street, Dongzhimen, Beijing, 100010, China, 86 010 64089639; <email>zhuyan166@126.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>24</day><month>7</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e69286</elocation-id><history><date date-type="received"><day>26</day><month>11</month><year>2024</year></date><date date-type="rev-recd"><day>30</day><month>04</month><year>2025</year></date><date date-type="accepted"><day>23</day><month>05</month><year>2025</year></date></history><copyright-statement>&#x00A9; Zhe Wang, Keqian Li, Suyuan Peng, Lihong Liu, Xiaolin Yang, Keyu Yao, Heinrich Herre, Yan Zhu. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 24.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e69286"/><abstract><sec><title>Background</title><p>Several clinical cases and experiments have demonstrated the effectiveness of traditional Chinese medicine (TCM) formulas in treating and preventing diseases. These formulas contain critical information about their ingredients, efficacy, and indications. Classifying TCM formulas based on this information can effectively standardize TCM formulas management, support clinical and research applications, and promote the modernization and scientific use of TCM. To further advance this task, TCM formulas can be classified using various approaches, including manual classification, machine learning, and deep learning. Additionally, large language models (LLMs) are gaining prominence in the biomedical field. Integrating LLMs into TCM research could significantly enhance and accelerate the discovery of TCM knowledge by leveraging their advanced linguistic understanding and contextual reasoning capabilities.</p></sec><sec><title>Objective</title><p>The objective of this study is to evaluate the performance of different LLMs in the TCM formula classification task. Additionally, by employing ensemble learning with multiple fine-tuned LLMs, this study aims to enhance classification accuracy.</p></sec><sec sec-type="methods"><title>Methods</title><p>The data for the TCM formula were manually refined and cleaned. We selected 10 LLMs that support Chinese for fine-tuning. We then employed an ensemble learning approach that combined the predictions of multiple models using both hard and weighted voting, with weights determined by the average accuracy of each model. Finally, we selected the top 5 most effective models from each series of LLMs for weighted voting (top 5) and the top 3 most accurate models of 10 for weighted voting (top 3).</p></sec><sec sec-type="results"><title>Results</title><p>A total of 2441 TCM formulas were curated manually from multiple sources, including the Coding Rules for Chinese Medicinal Formulas and Their Codes, the Chinese National Medical Insurance Catalog for proprietary Chinese medicines, textbooks of TCM formulas, and TCM literature. The dataset was divided into a training set of 1999 TCM formulas and test set of 442 TCM formulas. The testing results showed that Qwen-14B achieved the highest accuracy of 75.32% among the single models. The accuracy rates for hard voting, weighted voting, weighted voting (top 5), and weighted voting (top 3) were 75.79%, 76.47%, 75.57%, and 77.15%, respectively.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study aims to explore the effectiveness of LLMs in the TCM formula classification task. To this end, we propose an ensemble learning method that integrates multiple fine-tuned LLMs through a voting mechanism. This method not only improves classification accuracy but also enhances the existing classification system for classifying the efficacy of TCM formula.</p></sec></abstract><kwd-group><kwd>traditional Chinese medicine</kwd><kwd>TCM formula classification</kwd><kwd>large language models</kwd><kwd>ensemble learning</kwd><kwd>algorithm development</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Traditional Chinese medicine (TCM) formulas are combinations of medicinal substances developed through clinical experience and guided by TCM theory. They are developed through a systematic process involving syndrome differentiation, etiological analysis, determination of therapeutic principles, selection of appropriate herbs, dosage adjustment, formulation considerations, and specification of use, all in accordance with fundamental compositional structures [<xref ref-type="bibr" rid="ref1">1</xref>]. Several studies have shown that TCM formulas have significant efficacy in the treatment and prevention of disease in both clinical and experimental studies [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. A randomized clinical trial demonstrated that the TCM compound Tongxinluo significantly improved clinical outcomes in patients diagnosed with ST-segment elevation myocardial infarction [<xref ref-type="bibr" rid="ref5">5</xref>]; the traditional medicine Pien Tze Huang prevents colorectal cancer by influencing the gut microbiota, enhancing beneficial metabolites, and suppressing oncogenic and proinflammatory factors [<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>Classical TCM formulas represent exemplary applications of TCM prescriptions. The selection and cataloging system for classical formulas streamlines the approval process for Chinese herbal compound preparations by exempting them from pharmacological studies and clinical trial data submissions. These formulas are defined as &#x201C;those derived from ancient classical prescriptions that remain widely used today, demonstrate proven efficacy, exhibit distinctive characteristics and advantages, and were documented in medical texts prior to or during the Qing Dynasty.&#x201D; However, after thousands of years of clinical practice, TCM has accumulated an immense number of formulas by the late Qing period, incomplete statistics indicate that over 100,000 had been recorded [<xref ref-type="bibr" rid="ref7">7</xref>]. Current TCM formula textbooks and national standards still follow the efficacy-oriented classification system established in Wang Ang&#x2019;s Qing Dynasty work <italic>Yi Fang Ji Jie</italic> (Compilation of Medical Formulas, &#x533B;&#x65B9;&#x96C6;&#x89E3;) [<xref ref-type="bibr" rid="ref8">8</xref>]. Prior to the introduction of this system, many formulas lacked clear categorization, making manual classification both labor-intensive and susceptible to inconsistencies.</p><p>As the foundation of TCM syndrome differentiation and treatment, systematic classification of TCM formulas based on efficacy enables a comprehensive exploration of their latent information and reveals herb-disease relationship patterns. Previous research primarily relied on expert experience to develop classification systems organized by therapeutic methods (efficacy) [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. With advances in artificial intelligence, new methodological pathways have emerged for automated formula classification. Gu [<xref ref-type="bibr" rid="ref11">11</xref>] used the k-nearest neighbor algorithm in machine learning to calculate and classify the similarity of TCM formulas. However, the complex relationships between herbs and diseases in TCM formulas complicate the classification process. Cheng et al [<xref ref-type="bibr" rid="ref12">12</xref>] present an improved deep learning model: S-TextBLCNN for the TCM formula classification task; it has an accuracy of 0.858 and an <italic>F</italic><sub>1</sub>-score of 0.762. To further explore the optimal deep learning models in the TCM formula classification task, Ren et al [<xref ref-type="bibr" rid="ref13">13</xref>] combined several deep learning models to classify TCM formulas and found that bidirectional encoder representation from transformers-convolutional neural network was the most effective, achieving an accuracy of 77.87%, as well as weighted precision, weighted recall, and weighted <italic>F</italic><sub>1</sub>-score of 79.46%, 77.87%, and 77.44%, respectively. In 2022, OpenAI released applications such as ChatGPT [<xref ref-type="bibr" rid="ref14">14</xref>], have demonstrated strong performance in tasks like question answering and translation. Moreover, large language models (LLMs) have achieved remarkable results in the biomedical domain. For instance, Google&#x2019;s Med-PaLM2, which was fine-tuned from PaLM2 using data from the medical domain, achieved 86.5% accuracy on the MedQA dataset [<xref ref-type="bibr" rid="ref15">15</xref>], which is close to the response level of clinical doctors [<xref ref-type="bibr" rid="ref16">16</xref>]. Nijkamp et al [<xref ref-type="bibr" rid="ref17">17</xref>] have trained the ProGen2 model on many different sequence datasets and demonstrated state-of-the-art performance both in generating novel viable protein sequences and in predicting protein fitness tasks. Some scholars tried to use LLMs to classify the text [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>], therefore, to verify the classification ability of LLMs. In our previous study [<xref ref-type="bibr" rid="ref20">20</xref>], we used the prompt templates and LLMs, such as ChatGLM-6B [<xref ref-type="bibr" rid="ref21">21</xref>], ChatGLM2-6B [<xref ref-type="bibr" rid="ref22">22</xref>], InternLM-20B [<xref ref-type="bibr" rid="ref23">23</xref>], ChatGLM-130B [<xref ref-type="bibr" rid="ref24">24</xref>], and ChatGPT, to classify TCM formula and validate the potential of LLMs in the field of TCM.</p><p>Based on our previous study [<xref ref-type="bibr" rid="ref20">20</xref>], this study further explores the classification of TCM formulas by fine-tuning 10 distinct LLMs. It introduces an ensemble voting method based on multiple fine-tuned LLMs. This approach emphasizes the integration of predictions from each model through voting mechanisms, including both hard voting and weighted voting. The adoption of multiple LLMs for TCM formula classification offers a novel perspective on the application of LLMs in the field of TCM.</p></sec><sec id="s1-2"><title>Research Question</title><p>In our previous work [<xref ref-type="bibr" rid="ref20">20</xref>], we used LLMs to investigate TCM formula classification. We used prompt templates and both fine-tuned and original LLMs. The experimental results demonstrated that fine-tuned LLMs can enhance classification task accuracy. Therefore, in this study, we aim to explore the potential of LLMs in TCM formula classification tasks and rationality by posing the following research questions (RQs):</p><list list-type="bullet"><list-item><p>RQ1: What is the performance of different LLMs for TCM formula classification?</p></list-item><list-item><p>RQ2: How to improve the performance of TCM formula classification by multiple LLMs?</p></list-item><list-item><p>RQ3: What is the rationality of TCM formula classification using LLMs?</p></list-item></list><p>To explore RQ1, we used 10 LLMs and fine-tuned them on a manually curated TCM formula dataset, followed by a comparative analysis of the results. To address RQ2, we used an ensemble learning approach. Using the average accuracy derived from the fine-tuned LLMs as model weights, we performed both hard and weighted voting on the results. In addition, we explored the effectiveness of different strategies, including selecting the best-performing model within each category and weighted voting based on the 3 best-performing models, to determine the approach that yields optimal results. To investigate RQ3, TCM experts manually reviewed the predicted results generated by the fine-tuned LLMs. This process involved analyzing discrepancies between the voting results of the LLMs and the reference answer, thereby exploring the rationality behind the classification by the LLMs in the TCM formula classification task.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Data Preparation</title><p>We used 2441 TCM formulas manually curated from the Coding Rules for Chinese Medicinal Formulas and Their Codes [<xref ref-type="bibr" rid="ref25">25</xref>], the Chinese National Medical Insurance Catalog for proprietary Chinese medicines (2023) [<xref ref-type="bibr" rid="ref26">26</xref>], and textbooks of formulas of Chinese medicine [<xref ref-type="bibr" rid="ref10">10</xref>]. After manual review [<xref ref-type="bibr" rid="ref13">13</xref>], each formula consists of the formula name, ingredients, efficacy, and indications. The data processing procedure is as follows: only efficacy-related classifications are retained, while non-efficacy classifications, such as ethnic minority medicine, are removed; formulas under identical or similar classifications are consolidated (with classification names standardized according to national guidelines), while unique formula classifications are preserved; finally, the resulting data are subjected to deduplication.</p><p>In the Qing Dynasty, Wang Ang, in his work <italic>Yifang Jijie</italic>, proposed an integrated classification method that prioritized the efficacy of the TCM formula. This method was not only used in the textbooks of Chinese medicine formulas throughout various dynasties [<xref ref-type="bibr" rid="ref27">27</xref>] and the secondary classification of the Chinese National Medical Insurance Catalog for proprietary Chinese medicines [<xref ref-type="bibr" rid="ref28">28</xref>] but also served as the method used in the national standard, the Coding Rules for Chinese Medicinal Formulas and Their Codes. Therefore, this study established a harmonized efficacy classification system based on national standards, integrating multisource TCM formula data through structural realignment and category consolidation (the data remain intact with only standardized nomenclature adjustments made to classification categories exhibiting terminological variations, following national regulatory requirements), resulting in 22 standardized categories. However, The emetic formulations category contained insufficient samples and was therefore excluded; based on this categorization, a total of 2441 formulations were identified across 21 categories, as detailed in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Statistical information of traditional Chinese medicine formula data.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Standard efficacy</td><td align="left" valign="bottom">Abbreviation</td><td align="left" valign="bottom">Chinese name</td><td align="left" valign="bottom">Formulations (N=2441), n (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Supplementing and boosting formula</td><td align="left" valign="top">SBF</td><td align="left" valign="top">&#x8865;&#x76CA;&#x5242;</td><td align="left" valign="top">417 (17.08)</td></tr><tr><td align="left" valign="top">Heat-clearing formula</td><td align="left" valign="top">HCF</td><td align="left" valign="top">&#x6E05;&#x70ED;&#x5242;</td><td align="left" valign="top">411 (16.84)</td></tr><tr><td align="left" valign="top">Blood-regulating formula</td><td align="left" valign="top">BRF</td><td align="left" valign="top">&#x7406;&#x8840;&#x5242;</td><td align="left" valign="top">372 (15.24)</td></tr><tr><td align="left" valign="top">Desiccating formula</td><td align="left" valign="top">DF</td><td align="left" valign="top">&#x795B;&#x6E7F;&#x5242;</td><td align="left" valign="top">246 (10.08)</td></tr><tr><td align="left" valign="top">Superficies relieving formula</td><td align="left" valign="top">SF</td><td align="left" valign="top">&#x89E3;&#x8868;&#x5242;</td><td align="left" valign="top">160 (6.55)</td></tr><tr><td align="left" valign="top">Resolving phlegm relieving cough and relieving wheezing formula</td><td align="left" valign="top">RPRCRWF</td><td align="left" valign="top">&#x5316;&#x75F0;-&#x6B62;&#x54B3;-&#x5E73;&#x5598;&#x5242;</td><td align="left" valign="top">160 (6.55)</td></tr><tr><td align="left" valign="top">Qi regulated formula</td><td align="left" valign="top">QRF</td><td align="left" valign="top">&#x7406;&#x6C14;&#x5242;</td><td align="left" valign="top">134 (5.49)</td></tr><tr><td align="left" valign="top">Formula for wind disorder</td><td align="left" valign="top">FWD</td><td align="left" valign="top">&#x6CBB;&#x98CE;&#x5242;</td><td align="left" valign="top">119 (4.86)</td></tr><tr><td align="left" valign="top">Warming interior formula</td><td align="left" valign="top">WIF</td><td align="left" valign="top">&#x6E29;&#x91CC;&#x5242;</td><td align="left" valign="top">73 (2.99)</td></tr><tr><td align="left" valign="top">Formula for purgation</td><td align="left" valign="top">FP</td><td align="left" valign="top">&#x6CFB;&#x4E0B;&#x5242;</td><td align="left" valign="top">60 (2.46)</td></tr><tr><td align="left" valign="top">Reconciling formula</td><td align="left" valign="top">RF</td><td align="left" valign="top">&#x548C;&#x89E3;&#x5242;</td><td align="left" valign="top">43 (1.76)</td></tr><tr><td align="left" valign="top">Tranquillization formula</td><td align="left" valign="top">TF</td><td align="left" valign="top">&#x5B89;&#x795E;&#x5242;</td><td align="left" valign="top">39 (1.60)</td></tr><tr><td align="left" valign="top">Digestive formula</td><td align="left" valign="top">DIF</td><td align="left" valign="top">&#x6D88;&#x98DF;&#x5242;</td><td align="left" valign="top">38 (1.56)</td></tr><tr><td align="left" valign="top">Astringent formula</td><td align="left" valign="top">AF</td><td align="left" valign="top">&#x56FA;&#x6DA9;&#x5242;</td><td align="left" valign="top">32 (1.31)</td></tr><tr><td align="left" valign="top">Softening hard lumps and dispelling nodes formula</td><td align="left" valign="top">SHLDNF</td><td align="left" valign="top">&#x6D88;&#x80BF;&#x6563;&#x7ED3;&#x5242;</td><td align="left" valign="top">27 (1.11)</td></tr><tr><td align="left" valign="top">Formula for treating carbuncle and ulcer</td><td align="left" valign="top">FTCU</td><td align="left" valign="top">&#x75C8;&#x75A1;&#x5242;</td><td align="left" valign="top">25 (1.02)</td></tr><tr><td align="left" valign="top">Summer-heat-expelling formula</td><td align="left" valign="top">SHEF</td><td align="left" valign="top">&#x795B;&#x6691;&#x5242;</td><td align="left" valign="top">22 (0.90)</td></tr><tr><td align="left" valign="top">Formula for resuscitation</td><td align="left" valign="top">FR</td><td align="left" valign="top">&#x5F00;&#x7A8D;&#x5242;</td><td align="left" valign="top">21 (0.86)</td></tr><tr><td align="left" valign="top">Antidryness formula</td><td align="left" valign="top">ADF</td><td align="left" valign="top">&#x6CBB;&#x71E5;&#x5242;</td><td align="left" valign="top">16 (0.66)</td></tr><tr><td align="left" valign="top">Resolving turbidity and lowering lipids formula</td><td align="left" valign="top">RTLLF</td><td align="left" valign="top">&#x5316;&#x6D4A;&#x964D;&#x8102;&#x5242;</td><td align="left" valign="top">16 (0.66)</td></tr><tr><td align="left" valign="top">Antihelminthic formula</td><td align="left" valign="top">AHF</td><td align="left" valign="top">&#x9A71;&#x866B;&#x5242;</td><td align="left" valign="top">10 (0.41)</td></tr></tbody></table></table-wrap></sec><sec id="s2-2"><title>Selected LLMs</title><p>Since TCM formulas are documented in Chinese or ancient Chinese, in this study, the LLMs that have exceptional performance in Chinese or support multilingualism were selected to facilitate the model&#x2019;s understanding of this information [<xref ref-type="bibr" rid="ref29">29</xref>].</p><sec id="s2-2-1"><title>ChatGLM</title><p>ChatGLM-6B, ChatGLM2-6B, and ChatGLM3 are a series of open bilingual language models developed by the Knowledge Engineering Group and Data Mining of Tsinghua University. These models can be easily deployed and fine-tuned on standard consumer-grade graphics processing units, enabling users to perform personalized tasks [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. The official script was used to fine-tune these LLMs in this study.</p></sec><sec id="s2-2-2"><title>InternLM</title><p>The Shanghai Artificial Intelligence Laboratory, SenseTime Technology, the Chinese University of Hong Kong, and Fudan University jointly introduced InternLM-20B and InternLM-7B, which showed exceptional performance in areas such as mathematics, code, dialogue, and creative writing [<xref ref-type="bibr" rid="ref23">23</xref>]. In this study, Xtuner [<xref ref-type="bibr" rid="ref30">30</xref>] was used to fine-tune InternLM-20B for TCM formula classification. This tool helps users to fine-tune LLMs with limited hardware resources.</p></sec><sec id="s2-2-3"><title>Baichuan2</title><p>Baichuan2 is a multilanguage LLM developed by Baichuan-AI. It has been trained on 2.6 trillion tokens and performs well in medical and legal areas. Currently, Baichuan2 has released 7B and 13B to users [<xref ref-type="bibr" rid="ref31">31</xref>]. We fine-tuned Baichuan2-7B and Baichuan2-13B for our task using Xtuner.</p></sec><sec id="s2-2-4"><title>Qwen</title><p>Qwen is a series of language models introduced by AliCloud, including Qwen-1.8B, Qwen-7B, Qwen-14B, and Qwen-72B [<xref ref-type="bibr" rid="ref32">32</xref>]. The official script was used to fine-tune these models in this study. It is worth noting that we fine-tuned Qwen-1.8B with full parameters to improve the performance of the task.</p></sec><sec id="s2-2-5"><title>BLOOM</title><p>BigScience [<xref ref-type="bibr" rid="ref33">33</xref>] has launched a series of models of different sizes, known as the BigScience Large Open-Science Open-Access Multilingual Language Model (BLOOM), including 1B, 7B, 13B, and 176B. BLOOM&#x2019;s LLM generates text in multiple languages and codes. In this study, we fine-tuned the BLOOM-1.7B with full parameters using the LLMTuner tool [<xref ref-type="bibr" rid="ref34">34</xref>].</p></sec></sec><sec id="s2-3"><title>Ensemble Voting Algorithms for TCM Formula Classification</title><p>In our prior work, we used methods with prompt templates and fine-tuned LLMs to classify TCM formulas. The fine-tuned ChatGLM2-6B demonstrated optimal performance, achieving an accuracy rate of 71% in the classification task. However, other models did not surpass the 70% accuracy rate. Its performance failed to surpass that of the deep learning model trained on a single task. In a different study, Chatterjee et al [<xref ref-type="bibr" rid="ref35">35</xref>] proposed that an ensemble voting method outperforms traditional single classifiers in accurately diagnosing Alzheimer disease. To improve the accuracy of the fine-tuned LLMs in the TCM formula classification task, we adopted an ensemble learning approach, this methodology involved using several fine-tuned LLMs and developing a voting mechanism specifically tailored to their outputs. The goal of this methodology is to combine the predictions of multiple models thereby increasing the accuracy of the TCM formula classification task.</p><disp-formula id="equWL1"> <label>(1)</label><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mtext>hard</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>arg</mml:mi><mml:mo>&#x2061;</mml:mo><mml:munder><mml:mo movablelimits="true" form="prefix">max</mml:mo><mml:mi>c</mml:mi></mml:munder><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mn mathvariant="double-struck">1</mml:mn></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>In <xref ref-type="disp-formula" rid="equWL1">equation 1</xref>, we have <italic>N</italic> fine-tuned LLMs, where the predicted value for LLM<sub><italic>i</italic></sub> is denoted by <inline-formula><mml:math id="ieqn1"><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> , <italic>N</italic>=10, and <italic>c</italic> represents all classes of the TCM formula. <inline-formula><mml:math id="ieqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mn mathvariant="double-struck">1</mml:mn></mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula> is the indicator function, equal to 1 if <inline-formula><mml:math id="ieqn3"><mml:msub><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>c</mml:mi></mml:math></inline-formula>, and 0 otherwise. The final prediction result <inline-formula><mml:math id="ieqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mrow><mml:mi mathvariant="normal">h</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula> is determined by selecting the class with the highest number of votes.</p><disp-formula id="equWL2"> <label>(2)</label><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mtext>weighted</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>arg</mml:mi><mml:mo>&#x2061;</mml:mo><mml:munder><mml:mo movablelimits="true" form="prefix">max</mml:mo><mml:mi>c</mml:mi></mml:munder><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mrow><mml:mn mathvariant="double-struck">1</mml:mn></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>In <xref ref-type="disp-formula" rid="equWL2">equation 2</xref>, <inline-formula><mml:math id="ieqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula> represents the average accuracy of each LLM, the votes for each LLM are multiplied by the corresponding weights, and the resulting weighted votes are summed. Finally, the TCM formula class with the highest weighted sum is finally selected as the final prediction.</p></sec><sec id="s2-4"><title>Evaluation Metrics</title><p>To evaluate the performance of the fine-tuned LLMs on text classification tasks, we used the accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score, as defined in <xref ref-type="disp-formula" rid="equWL3 equWL4 equWL5 equWL6">equation 3-6</xref>, based on true positives (TP), true negatives (TN), false positives (FP), and false negatives (FN).</p><disp-formula id="equWL3"> <label>(3)</label><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mi mathvariant="normal">A</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">u</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">y</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">N</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">N</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">N</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL4"> <label>(4)</label><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mtable rowspacing="4pt" columnspacing="1em"><mml:mtr><mml:mtd><mml:mrow><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL5"> <label>(5)</label><mml:math id="eqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">T</mml:mi><mml:mi mathvariant="normal">P</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">F</mml:mi><mml:mi mathvariant="normal">N</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL6"> <label>(6)</label><mml:math id="eqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="italic">F</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mtext>-</mml:mtext><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:mo>&#x22C5;</mml:mo><mml:mrow><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow><mml:mo>&#x22C5;</mml:mo><mml:mrow><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Due to the unbalanced distribution of the TCM formula in our test set, we calculated the weighted precision, the weighted recall, and the weighted <italic>F</italic><sub>1</sub>-score to comprehensively evaluate the performance of the models in multiclassification tasks in <xref ref-type="disp-formula" rid="equWL7 equWL8 equWL9">equation 7-9</xref>.</p><disp-formula id="equWL7"> <label>(7)</label><mml:math id="eqn7"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mrow><mml:mtext>weighted-avg</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="normal">P</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi></mml:mrow><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL8"> <label>(8)</label><mml:math id="eqn8"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow><mml:mrow><mml:mtext>weighted-avg</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="normal">R</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL9"><label>(9)</label><mml:math id="eqn9"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="italic">F</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mtext>-</mml:mtext><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mrow><mml:mtext>weighted-avg</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="italic">F</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mtext>-</mml:mtext><mml:mi mathvariant="normal">s</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">e</mml:mi></mml:mrow><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>With the weighted approach, the performance in detecting classes with a larger number of samples can be adequately represented by assigning weights based on the percentage of sample numbers. In our current investigation, we are dealing with a dataset containing 21 classes of TCM formulas. In <xref ref-type="disp-formula" rid="equWL7 equWL8 equWL9">equation 7-9</xref>, <italic>L</italic>=21, where <inline-formula><mml:math id="ieqn6"><mml:msub><mml:mrow><mml:mi mathvariant="normal">&#x03C9;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the proportion of each TCM formula in the total dataset.</p></sec><sec id="s2-5"><title>Experiment</title><sec id="s2-5-1"><title>Experimental Design and Verification</title><sec id="s2-5-1-1"><title>Overview</title><p>We designed the following experiment to explore the RQ1, RQ2, and RQ3, as shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Workflow of traditional Chinese medicine formula classification using large language models. BLOOM: BigScience Large Open-Science Open-Access Multilingual Language Model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e69286_fig01.png"/></fig></sec><sec id="s2-5-1-2"><title>Experiment 1</title><p>To address RQ1, a prompt dataset was formulated from the curated TCM formula data. The processed prompt dataset was then divided into a training set and a test set containing 1999 and 442 formulas, respectively. The selected LLMs were fine-tuned using the training set, and their performance was evaluated using the test set. This process was iterated 10 times to obtain the average accuracy.</p></sec><sec id="s2-5-1-3"><title>Experiment 2</title><p>To validate RQ2, we used a multistage validation process. First, we implemented a hard voting approach to aggregate model predictions and derive a consolidated result. We then calculated the accuracy of this method. In the next step, we assigned weights to each model based on its average accuracy. Using a weighted voting strategy, we then calculated the accuracy of the weighted voting method. In the third step, we selected the model with the highest accuracy from each category and used these selections for weighted voting. Finally, we identified the top 3 models among the fine-tuned LLMs and conducted a weighted vote to determine the most effective voting method.</p></sec><sec id="s2-5-1-4"><title>Experiment 3</title><p>To validate RQ3, we extracted a subset of data from the voting results that met the following two conditions:(1) All model predictions were identical but different from the reference answer, or (2) more than 80% of the model predictions were the same but different from the reference answer. TCM experts subsequently reviewed this subset to evaluate the rationality of the LLMs&#x2019; classification results.</p></sec></sec></sec><sec id="s2-6"><title>Experimental Configuration Parameter and Platform</title><p>To ensure robust performance, we fine-tuned LLMs using the common configurations and selected the model that achieved the most stable and optimal results. The details of the configurations for the fine-tuned LLMs are shown in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The fine-tuning and verification were done by running and validating the LLMs on graphics processing unit computer servers.</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>We confirm that this study did not involve human or animal subjects. It used publicly available, deidentified text data and sourced from the web. Therefore, no ethics approval was required in accordance with relevant institutional guidelines and the JMIR editorial policy on ethics review requirements.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>The Experimental Results of Each Fine-Tuned LLM</title><p>In experiment 1, we performed a 10-iteration validation for each model using the test set. <xref ref-type="fig" rid="figure2">Figure 2</xref> shows the mean accuracy for each model. Notably, Qwen-14B (mean 75.32%, SD 0.48%) and Qwen-7B (mean 74.32%, SD 0.37%) showed the highest performance. Close behind were Qwen-1.8B (mean 72.96%, SD 0.53%), InternLM-20B (mean 72.40%, SD 0.47%), Baichuan2-7B (mean 70.86%, SD 0.32%), Baichuan2-13B (mean 71.63%, SD 0.38%), ChatGLM-6B (mean 70.09%, SD 0.30%), ChatGLM2-6B (mean 71.09%, SD 0.80%), and BLOOM-1.7B (mean 70.45%, SD 0.44%), all with accuracies above 70%. However, ChatGLM3-6B (mean 66.70%, SD 0.44%) did not exceed 70% accuracy (<xref ref-type="table" rid="table2">Table 2</xref>). The fine-tuning of individual LLMs does not lead to remarkable results for the TCM formula classification task in this study. It is noteworthy that the Qwen series of LLMs showed promising performance in the TCM classification task, and Qwen-1.8B and BLOOM-1.7B achieved significant accuracy after full-parameter fine-tuning.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>The average accuracy of each large language model in experiment 1. BLOOM: BigScience Large Open-Science Open-Access Multilingual Language Model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e69286_fig02.png"/></fig><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>The experimental results of each fine-tuned large language model (LLM).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">LLMs</td><td align="left" valign="bottom">Accuracy (%), mean (SD)</td></tr></thead><tbody><tr><td align="left" valign="top">Qwen-14B</td><td align="char" char="." valign="top">75.32 (0.48)</td></tr><tr><td align="left" valign="top">Qwen-7B</td><td align="char" char="." valign="top">74.32 (0.37)</td></tr><tr><td align="left" valign="top">Qwen-1.8B</td><td align="char" char="." valign="top">72.96 (0.53)</td></tr><tr><td align="left" valign="top">InternLM-20B</td><td align="char" char="." valign="top">72.40 (0.47)</td></tr><tr><td align="left" valign="top">Baichuan2-7B</td><td align="char" char="." valign="top">70.86 (0.32)</td></tr><tr><td align="left" valign="top">Baichuan2-13B</td><td align="char" char="." valign="top">71.63 (0.38)</td></tr><tr><td align="left" valign="top">ChatGLM-6B</td><td align="char" char="." valign="top">70.09 (0.30)</td></tr><tr><td align="left" valign="top">ChatGLM2-6B</td><td align="char" char="." valign="top">71.09 (0.80)</td></tr><tr><td align="left" valign="top">ChatGLM3-6B</td><td align="char" char="." valign="top">66.70 (0.44)</td></tr><tr><td align="left" valign="top">BLOOM-1.7B</td><td align="char" char="." valign="top">70.45 (0.44)</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>TCM Formula Classification Using Ensemble Learning</title><p>In experiment 2, to improve the accuracy of the classification task for validating RQ2, we used an ensemble learning approach by integrating fine-tuned LLMs for collective voting predictions. Given the clear result type of our fine-tuned model results (<xref ref-type="table" rid="table3">Table 3</xref>), we first applied hard voting, which resulted in an accuracy of 75.79%. The weighted precision, weighted recall, and weighted <italic>F</italic><sub>1</sub>-scores were calculated as 76.10%, 75.79%, and 75.31%, respectively. The confusion matrix is shown in <xref ref-type="fig" rid="figure3">Figure 3B</xref>. These results outperformed those of the single models; however, due to the generally low accuracy of our fine-tuned models, the single model results could potentially influence the final results. We therefore developed a weighted voting method using the average accuracy of each model as the weight. This approach resulted in an accuracy of 76.47%, with weighted precision, weighted recall, and weighted <italic>F</italic><sub>1</sub>-scores of 76.57%, 76.47%, and 75.98%, respectively. The weighted voting results not only outperformed direct hard voting but also showed superior performance on the 21 classifications within the test set, as shown in <xref ref-type="fig" rid="figure3">Figure 3C</xref>.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Accuracy, weighted precision, weighted recall, and weighted <italic>F</italic><sub>1</sub>-score and their confusion matrices in experiment 2. (A) Accuracy, weighted precision, weighted recall, and weighted <italic>F</italic><sub>1</sub>-score of different ensemble voting methods. (B) Confusion matrix of the hard voting method. (C) Confusion matrix of the weighted voting method. ADF: antidryness formula; AF: astringent formula; AHF: antihelminthic formula; BRF: blood-regulating formula; DF: desiccating formula; DIF: digestive formula; FP: formula for purgation; FR: formula for resuscitation; FTCU: formula for treating carbuncle and ulcer; FWD: formula for wind disorder; HCF: heat-clearing formula; QRF: Qi regulated formula; RF: reconciling formula; RPRCRWF: resolving phle-m relieving cough and relieving wheezing formula; RTLLF: resolving turbidity and lowering lipids formula; SBF: supplementing and boosting formula; SF: superficies relieving formula; SHEF: summer-heat-expelling formula; SHLDNF: softening hard lumps and dispelling nodes formula; TF: tranquilization formula; WIF: warming interior formula.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e69286_fig03.png"/></fig><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Findings of experiment 2.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Method</td><td align="left" valign="bottom">Accuracy (%)</td><td align="left" valign="bottom">Weighted precision (%)</td><td align="left" valign="bottom">Weighted recall (%)</td><td align="left" valign="bottom">Weighted <italic>F</italic><sub>1</sub>- score (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Hard voting</td><td align="char" char="." valign="top">75.79</td><td align="char" char="." valign="top">76.10</td><td align="char" char="." valign="top">75.79</td><td align="char" char="." valign="top">75.31</td></tr><tr><td align="left" valign="top">Weighted voting</td><td align="char" char="." valign="top">76.47</td><td align="char" char="." valign="top">76.57</td><td align="char" char="." valign="top">76.47</td><td align="char" char="." valign="top">75.98</td></tr><tr><td align="left" valign="top">Weighted voting (top 5)</td><td align="char" char="." valign="top">75.57</td><td align="char" char="." valign="top">75.72</td><td align="char" char="." valign="top">75.57</td><td align="char" char="." valign="top">75.05</td></tr><tr><td align="left" valign="top">Weighted voting (top 3)</td><td align="char" char="." valign="top">77.15</td><td align="char" char="." valign="top">78.69</td><td align="char" char="." valign="top">77.15</td><td align="char" char="." valign="top">76.90</td></tr></tbody></table></table-wrap><p>To further investigate the impact of multiple LLMs on the voting results, we selected the best-performing model within each category (top 5) and performed weighted voting, such as Baichuan-13B, ChatGLM2-6B, InternLM-20B, Qwen-14B, and BLOOM-1.7B. The statistical results showed an accuracy of 75.57%, a weighted precision of 75.72%, a weighted recall of 75.57%, and a weighted <italic>F</italic><sub>1</sub>-score of 75.05%, as shown in <xref ref-type="fig" rid="figure3">Figure 3A</xref>. The confusion matrix is shown in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p><p>We selected the 3 models with the highest accuracy, such as Qwen-14B, Qwen-7B, and Qwen-1.8B, to subject their prediction results to weighted voting. The resulting accuracy was 77.15%, the weighted precision was 78.69%, the weighted recall was 77.15%, and the weighted <italic>F</italic><sub>1</sub>-score was 76.90%, as shown in <xref ref-type="fig" rid="figure3">Figure 3A</xref>. The confusion matrix is shown in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>. As can be seen in <xref ref-type="fig" rid="figure3">Figure 3A</xref>, the accuracy obtained by weighted voting (top 3) was the highest, surpassing the accuracy obtained by hard voting. Hard voting and weighted voting (top 5) were similar, with no significant differences.</p></sec><sec id="s3-3"><title>Results Analysis of Test Set</title><p>During the analysis of LLM voting results on the test set, through TCM expert discussions on the classification results, we observed that in some cases, all LLMs voted the same way, but the results did not match the standard answers. Experts judged that the voting results of the LLMs had a certain degree of rationality. In such cases, it is necessary to discuss why the models reached the same voting results that deviate from the original answers and the rationality of these voting results. We illustrate this with a typical example (<xref ref-type="fig" rid="figure4">Figure 4</xref>).</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Results analysis of TCM experts. ADF: antidryness formula; LLM: large language model; SBF: supplementing and boosting formula; TCM: traditional Chinese medicine.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e69286_fig04.png"/></fig><p>TCM formula &#x201C;Yiweitang or Yiwei decoction (YWD)&#x201D; consists of Chinese herbs: Bei Sha Shen, Mai Dong, Bing Tang, Di Huang, and Yu Zhu. The indications for treatment are stomach yin injury syndrome. Symptoms include a burning and dull pain in the stomach, lack of appetite despite hunger, dry mouth and throat, dry and hard stool, dry retching, and hiccups. The existing classification of YWD is antidryness formula. However, current scholars have researched Yiwei decoction for the treatment of premature ovarian insufficiency [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>] and the prevention of osteoporosis related to it [<xref ref-type="bibr" rid="ref38">38</xref>], proving its efficacy through methods such as network pharmacology and molecular research. Zhang and Zhu [<xref ref-type="bibr" rid="ref39">39</xref>] have also used the spectrum-effect relationship and network pharmacology to screen for the antioxidant components of Yiwei decoction, demonstrating its function in nourishing stomach yin. Both the treatment or prevention of premature ovarian insufficiency and its related diseases, as well as the nourishing stomach yin function of YWD, fall under the category of supplementing and boosting formula.</p><p>In summary, through these discussion points and specific examples, we can gain a deeper understanding of the performance of LLMs in the classification of TCM formula. This can serve as a basis for exploring and reflecting on the existing classification system of formula efficacy. It provides reference and insights for future research on formula efficacy classification.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>TCM formulas contain valuable information on ingredients, efficacy, and indications. They serve as important reference for researchers in both clinical and experimental contexts. In this study, we used fine-tuned LLMs in combination with an ensemble learning approach to classify TCM formulas and identify potential information, thereby improved the accuracy of TCM formula classification. Our approach provides a new method for studying formulaic information in TCM.</p><p>In this study, we posed 3 RQs and designed corresponding experiments to explore the potential and rationality of fine-tuned LLMs in the TCM formula classification task. The results showed that the fine-tuned Qwen-14B performed remarkably well in the task, achieving an average accuracy of 75.32% (SD 0.48%). However, the accuracy of a single model did not exceed that of deep learning models trained for a single task. To enhance accuracy, we used 2 ensemble methods: hard voting and weighted voting. These methods integrate the 10 fine-tuned LLMs. The accuracy of hard voting and weighted voting reached 75.79% and 76.47%, respectively. The results demonstrate that both hard voting and weighted voting outperform the individual LLMs. Notably, weighted voting (top 3) achieved the highest accuracy, reaching an 77.15%. To evaluate the rationality of the LLMs in the task, we analyzed their prediction errors. In the selected example, all models produced the same prediction, which differed from the reference answer based on YWD. After being analyzed by TCM experts, some of the results predicted by the LLMs were deemed reasonable and can be used as a reference to improve the existing efficacy classification of TCM formulas.</p><p>For RQ3, compared to traditional rule-based or deep learning approaches, LLMs demonstrate superior capabilities in automatically identifying latent relationships between herbal formulas. By integrating multidimensional information, such as herbal composition, therapeutic effects, clinical indications, and modern medical research, they build more comprehensive classification systems to uncover potential information. Experimental results show that LLMs can detect potential categories that are not recognized in existing expert classification frameworks. In typical examples, LLM-predicted classifications were validated as clinically plausible by expert panel reviews and literature evidence, suggesting their potential to provide novel scientific foundations for updating and optimizing expert-based classification systems.</p><p>Therefore, from both theoretical and technical perspectives, we posit that LLMs can generate more meaningful outcomes for TCM formula classification. This serves as a basis for exploring and reflecting on the existing classification system of formula efficacy, providing references and insights for the subsequent screening of classic formulas.</p></sec><sec id="s4-2"><title>Limitations</title><p>However, despite numerous studies on the effectiveness of LLMs in classification tasks [<xref ref-type="bibr" rid="ref40">40</xref>-<xref ref-type="bibr" rid="ref42">42</xref>], there is a lack of research on text classification in the field of TCM. Our study confirmed that LLMs can achieve a certain effect in the TCM formula classification task. Due to the complexity of TCM knowledge, achieving high accuracy with simple fine-tuning and prompt engineering is difficult. In future research, the composition of herbs in formulas can be encoded through vectors to better explore potential relationships in TCM formula. LLMs in the Qwen series are trained on large-scale, high-quality, and diverse Chinese and English corpora, and have performance across various tasks, in our study, we fully fine-tuned Qwen-1.8B to enhance its capability in classifying TCM formulas and achieved significant accuracy. In the future, we plan to fully fine-tune more LLMs, such as internLM-1.8B [<xref ref-type="bibr" rid="ref43">43</xref>], and use a weighted voting approach to improve classification accuracy.</p><p>Several limitations may have influenced the predicted results. The predictive performance of the LLMs was affected by the relatively small and unevenly distributed dataset of TCM formula in our study. In the future, we plan to collect and curate a large, high-quality TCM formula dataset and integrate it with knowledge graphs to enhance the prediction accuracy of a single LLM. Additionally, we also aim to further improve the performance of the TCM formula classification task by refining our weighted voting methodology.</p></sec><sec id="s4-3"><title>Conclusions</title><p>This study explored the performance of various fine-tuned LLMs in the TCM formula classification task. To improve classification accuracy, both hard voting and weighted voting methods were employed. In conclusion, we also examined the rationality of using LLMs for TCM formula classification and discussed the potential of improving existing classification standards of TCM formulas through the application of LLMs.</p></sec></sec></body><back><ack><p>This work was supported by Beijing Natural Science Foundation (7254504 and 7252253), CAMS Innovation Fund for Medical Sciences (2021-I2M-1-057), National Natural Science Foundation of China (82174534), Noncommunicable Chronic Diseases-National Science and Technology Major Project (2024ZD0532900), and the Fundamental Research Funds for the Central Public Welfare Research Institutes (ZZ160311). The authors acknowledge the use of graphics processing unit and high-performance computing platform at the Center for Bioinformatics, Institute of Basic Medical Sciences, Chinese Academy of Medical Sciences; School of Basic Medicine, Peking Union Medical College.</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">BLOOM</term><def><p>BigScience Large Open-Science Open-Access Multilingual Language Model</p></def></def-item><def-item><term id="abb2">FN</term><def><p>false negative</p></def></def-item><def-item><term id="abb3">FP</term><def><p>false positive</p></def></def-item><def-item><term id="abb4">TN</term><def><p>true negative</p></def></def-item><def-item><term id="abb5">TP</term><def><p>true positive</p></def></def-item><def-item><term id="abb6">GPUs</term><def><p>Graphics Processing Units</p></def></def-item><def-item><term id="abb7">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb8">RQ</term><def><p>research question</p></def></def-item><def-item><term id="abb9">TCM</term><def><p>traditional Chinese medicine</p></def></def-item><def-item><term id="abb10">YWD</term><def><p>Yiweitang or Yiwei decoction</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Deng</surname><given-names>Z</given-names> </name></person-group><source>Formulary Version 1</source><year>2017</year><publisher-name>China Press of Chinese Medicine</publisher-name></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kong</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Gu</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Analysis of the molecular mechanism of Pudilan (PDL) treatment for COVID-19 by network pharmacology tools</article-title><source>Biomed Pharmacother</source><year>2020</year><month>08</month><volume>128</volume><fpage>110316</fpage><pub-id pub-id-type="doi">10.1016/j.biopha.2020.110316</pub-id><pub-id pub-id-type="medline">32505821</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>D</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>TCMID 2.0: a comprehensive resource for TCM</article-title><source>Nucleic Acids Res</source><year>2018</year><month>01</month><day>4</day><volume>46</volume><issue>D1</issue><fpage>D1117</fpage><lpage>D1120</lpage><pub-id pub-id-type="doi">10.1093/nar/gkx1028</pub-id><pub-id pub-id-type="medline">29106634</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Zheng</surname><given-names>X</given-names> </name><name name-style="western"><surname>Bai</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Association between use of Qingfei Paidu Tang and mortality in hospitalized patients with COVID-19: a national retrospective registry study</article-title><source>Phytomedicine</source><year>2021</year><month>05</month><volume>85</volume><fpage>153531</fpage><pub-id pub-id-type="doi">10.1016/j.phymed.2021.153531</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Traditional Chinese medicine compound (Tongxinluo) and clinical outcomes of patients with acute myocardial infarction: the CTS-AMI randomized clinical trial</article-title><source>JAMA</source><year>2023</year><month>10</month><day>24</day><volume>330</volume><issue>16</issue><fpage>1534</fpage><lpage>1545</lpage><pub-id pub-id-type="doi">10.1001/jama.2023.19524</pub-id><pub-id pub-id-type="medline">37874574</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gou</surname><given-names>H</given-names> </name><name name-style="western"><surname>Su</surname><given-names>H</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Traditional medicine Pien Tze Huang suppresses colorectal tumorigenesis through restoring gut microbiota and metabolites</article-title><source>Gastroenterology</source><year>2023</year><month>12</month><volume>165</volume><issue>6</issue><fpage>1404</fpage><lpage>1419</lpage><pub-id pub-id-type="doi">10.1053/j.gastro.2023.08.052</pub-id><pub-id pub-id-type="medline">37704113</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cui</surname><given-names>M</given-names> </name></person-group><article-title>An efficient approach of acquiring knowledge from ancient prescriptions and medicines based on information extraction</article-title><source>J Tradit Chin Med Pharm</source><year>2015</year><volume>30</volume><issue>5</issue><fpage>5</fpage></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name></person-group><article-title>An analysis of the academic thought of &#x201C;Collected Exegesis of Recipe&#x201D;</article-title><source>Trad Chin Med J</source><year>2019</year><volume>18</volume><issue>5</issue><fpage>9</fpage><lpage>11</lpage></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Jia</surname><given-names>B</given-names> </name></person-group><article-title>Shallow discussion about classification of prescriptions</article-title><source>Henan Tradit Chin Med</source><year>2018</year><volume>38</volume><issue>11</issue><fpage>4</fpage></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Xie</surname><given-names>M</given-names> </name></person-group><source>Formulaology Version 3</source><year>2016</year><publisher-name>People&#x2019;s Medical Publishing House</publisher-name></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gu</surname><given-names>Z</given-names> </name></person-group><article-title>Research on prescription classification using text classification technology</article-title><source>J Liaoning Univ Tradit Chin Med</source><year>2010</year><volume>2</volume><fpage>45</fpage><lpage>46</lpage></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cheng</surname><given-names>N</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>W</given-names> </name><etal/></person-group><article-title>An improved deep learning model: S-TextBLCNN for traditional Chinese medicine formula classification</article-title><source>Front Genet</source><year>2021</year><volume>12</volume><fpage>807825</fpage><pub-id pub-id-type="doi">10.3389/fgene.2021.807825</pub-id><pub-id pub-id-type="medline">35003231</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ren</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>TCM function multi-classification approach using deep learning models</article-title><conf-name>Web Information Systems and Applications: 20th International Conference, WISA 2023</conf-name><conf-date>Sep 15-17, 2023</conf-date><conf-loc>Chengdu, China</conf-loc><fpage>246</fpage><lpage>258</lpage><pub-id pub-id-type="doi">10.1007/978-981-99-6222-8_21</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="web"><article-title>Introducing ChatGPT</article-title><source>OpenAI</source><access-date>2025-06-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/blog/chatgpt">https://openai.com/blog/chatgpt</ext-link></comment></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jin</surname><given-names>D</given-names> </name><name name-style="western"><surname>Pan</surname><given-names>E</given-names> </name><name name-style="western"><surname>Oufattole</surname><given-names>N</given-names> </name><name name-style="western"><surname>Weng</surname><given-names>WH</given-names> </name><name name-style="western"><surname>Fang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Szolovits</surname><given-names>P</given-names> </name></person-group><article-title>What disease does this patient have? A large-scale open domain question answering dataset from medical exams</article-title><source>Appl Sci (Basel)</source><year>2021</year><volume>11</volume><issue>14</issue><fpage>6421</fpage><pub-id pub-id-type="doi">10.3390/app11146421</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Singhal</surname><given-names>K</given-names> </name><name name-style="western"><surname>Tu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Gottweis</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Toward expert-level medical question answering with large language models</article-title><source>Nat Med</source><year>2025</year><month>03</month><volume>31</volume><issue>3</issue><fpage>943</fpage><lpage>950</lpage><pub-id pub-id-type="doi">10.1038/s41591-024-03423-7</pub-id><pub-id pub-id-type="medline">39779926</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nijkamp</surname><given-names>E</given-names> </name><name name-style="western"><surname>Ruffolo</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Weinstein</surname><given-names>EN</given-names> </name><name name-style="western"><surname>Naik</surname><given-names>N</given-names> </name><name name-style="western"><surname>Madani</surname><given-names>A</given-names> </name></person-group><article-title>ProGen2: Exploring the boundaries of protein language models</article-title><source>Cell Syst</source><year>2023</year><month>11</month><day>15</day><volume>14</volume><issue>11</issue><fpage>968</fpage><lpage>978</lpage><pub-id pub-id-type="doi">10.1016/j.cels.2023.10.002</pub-id><pub-id pub-id-type="medline">37909046</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Kant</surname><given-names>N</given-names> </name><name name-style="western"><surname>Puri</surname><given-names>R</given-names> </name><name name-style="western"><surname>Yakovenko</surname><given-names>N</given-names> </name><name name-style="western"><surname>Catanzaro</surname><given-names>B</given-names> </name></person-group><article-title>Practical text classification with large pre-trained language models</article-title><source>arXiv</source><comment>Preprint posted online on  Dec 4, 2018</comment><pub-id pub-id-type="doi">10.48550/arXiv.1812.01207</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Sun</surname><given-names>X</given-names> </name><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name><name name-style="western"><surname>Li</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Text classification via large language models</article-title><year>2023</year><conf-name>Findings of the Association for Computational Linguistics</conf-name><conf-date>Dec 6-10, 2023</conf-date><conf-loc>Singapore</conf-loc><pub-id pub-id-type="doi">10.18653/v1/2023.findings-emnlp.603</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Li</surname><given-names>K</given-names> </name><name name-style="western"><surname>Ren</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Yao</surname><given-names>K</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>Y</given-names> </name></person-group><article-title>Traditional Chinese medicine formula classification using large language models</article-title><conf-name>2023 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)</conf-name><conf-date>Dec 5-8, 2023</conf-date><conf-loc>Istanbul, Turkiye</conf-loc><pub-id pub-id-type="doi">10.1109/BIBM58861.2023.10385776</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="web"><article-title>ChatGLM-6B</article-title><source>GitHub</source><access-date>2025-06-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/THUDM/ChatGLM-6B">https://github.com/THUDM/ChatGLM-6B</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="web"><article-title>ChatGLM2-6B</article-title><source>GitHub</source><access-date>2025-06-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/THUDM/ChatGLM2-6B">https://github.com/THUDM/ChatGLM2-6B</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="web"><article-title>InternLM</article-title><source>GitHub</source><access-date>2025-06-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/InternLM/InternLM">https://github.com/InternLM/InternLM</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Du</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Qian</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>X</given-names> </name><etal/></person-group><article-title>GLM: general language model pretraining with autoregressive blank infilling</article-title><conf-name>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</conf-name><conf-date>May 22-27, 2022</conf-date><conf-loc>Dublin, Ireland</conf-loc><pub-id pub-id-type="doi">10.18653/v1/2022.acl-long.26</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><article-title>Coding rules for Chinese medicinal for mulae and their codes (GB/T 31773-2015)</article-title><source>State Administration for Market Regulation</source><year>2015</year><access-date>2025-06-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openstd.samr.gov.cn/bzgk/gb/newGbInfo?hcno=A67D217316F6BCC4535E30586570D16F">https://openstd.samr.gov.cn/bzgk/gb/newGbInfo?hcno=A67D217316F6BCC4535E30586570D16F</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>Drug catalogue of national basic medical insurance, work related injury insurance and maternity insurance</article-title><source>Administration NHS, CM</source><year>2023</year><access-date>2025-06-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.gov.cn/zhengce/zhengceku/2023-01/18/content_5737840.htm">https://www.gov.cn/zhengce/zhengceku/2023-01/18/content_5737840.htm</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhao</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Li</surname><given-names>C</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Discussion on classification of Chinese herbal formulas</article-title><source>J Guangzhou Univ Tradit Chin Med</source><year>2019</year><volume>36</volume><issue>5</issue><fpage>746</fpage><lpage>751</lpage></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhao</surname><given-names>W</given-names> </name><name name-style="western"><surname>Zhan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Category exploration to Chinese patent medicine on clinic application</article-title><source>Modern Tradit Chin Med Mater Medica-World Sci Technol</source><year>2012</year><volume>14</volume><issue>2</issue><fpage>1357</fpage><lpage>1362</lpage></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="web"><article-title>ChatGLM3</article-title><source>GitHub</source><access-date>2025-06-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/THUDM/ChatGLM3">https://github.com/THUDM/ChatGLM3</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="web"><article-title>XTuner</article-title><source>GitHub</source><access-date>2025-07-07</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/InternLM/xtuner">https://github.com/InternLM/xtuner</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>A</given-names> </name><name name-style="western"><surname>Xiao</surname><given-names>B</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Baichuan 2: open large-scale language models</article-title><source>arXiv</source><comment>Preprint posted online on  Apr 17, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2309.10305</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bai</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bai</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chu</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Qwen technical report</article-title><source>arXiv</source><year>2023</year><month>09</month><day>28</day><pub-id pub-id-type="doi">10.48550/arXiv.2309.16609</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="web"><article-title>A one-year long research workshop on large multilingual models and datasets</article-title><source>BigScience</source><access-date>2025-07-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://bigscience.huggingface.co">https://bigscience.huggingface.co</ext-link></comment></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="web"><article-title>LLMTuner: large language model instruction tuning tools</article-title><source>GitHub</source><year>2023</year><access-date>2025-07-07</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/zejunwang1/LLMTuner">https://github.com/zejunwang1/LLMTuner</ext-link></comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chatterjee</surname><given-names>S</given-names> </name><name name-style="western"><surname>Byun</surname><given-names>YC</given-names> </name></person-group><article-title>Voting ensemble approach for enhancing Alzheimer&#x2019;s disease classification</article-title><source>Sensors (Basel)</source><year>2022</year><month>10</month><day>9</day><volume>22</volume><issue>19</issue><fpage>19</fpage><pub-id pub-id-type="doi">10.3390/s22197661</pub-id><pub-id pub-id-type="medline">36236757</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fan</surname><given-names>W</given-names> </name><name name-style="western"><surname>Lei</surname><given-names>H</given-names> </name><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name></person-group><article-title>Exploring the mechanism of Yiwei decoction in the intervention of a premature ovarian insufficiency rat based on network pharmacology and the miRNA-mRNA regulatory network</article-title><source>ACS Omega</source><year>2024</year><month>04</month><day>30</day><volume>9</volume><issue>17</issue><fpage>19009</fpage><lpage>19019</lpage><pub-id pub-id-type="doi">10.1021/.3c09551</pub-id><pub-id pub-id-type="medline">38708213</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fan</surname><given-names>W</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>J</given-names> </name></person-group><article-title>The impact of Yiwei decoction on the LncRNA and CircRNA regulatory networks in premature ovarian insufficiency</article-title><source>Heliyon</source><year>2023</year><month>09</month><volume>9</volume><issue>9</issue><fpage>e20022</fpage><pub-id pub-id-type="doi">10.1016/j.heliyon.2023.e20022</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fan</surname><given-names>W</given-names> </name><name name-style="western"><surname>Meng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>J</given-names> </name><etal/></person-group><article-title>To investigate the mechanism of Yiwei decoction in the treatment of premature ovarian insufficiency-related osteoporosis using transcriptomics, network pharmacology and molecular docking techniques</article-title><source>Sci Rep</source><year>2023</year><month>11</month><day>3</day><volume>13</volume><issue>1</issue><fpage>19016</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-45699-8</pub-id><pub-id pub-id-type="medline">37923747</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>W</given-names> </name></person-group><article-title>Screening antioxidant components in Yiwei decoction using spectrum-effect relationship and network pharmacology</article-title><source>J Anal Methods Chem</source><year>2024</year><volume>2024</volume><issue>1</issue><fpage>5514265</fpage><pub-id pub-id-type="doi">10.1155/2024/5514265</pub-id><pub-id pub-id-type="medline">39445127</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Gretz</surname><given-names>S</given-names> </name><name name-style="western"><surname>Halfon</surname><given-names>A</given-names> </name><name name-style="western"><surname>Shnayderman</surname><given-names>I</given-names> </name><etal/></person-group><article-title>Zero-shot topical text classification with LLMs&#x2014;an experimental study</article-title><year>2023</year><conf-name>Findings of the Association for Computational Linguistics</conf-name><conf-date>Dec 6-10, 2023</conf-date><conf-loc>Singapore</conf-loc><pub-id pub-id-type="doi">10.18653/v1/2023.findings-emnlp.647</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Abburi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Suesserman</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pudota</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Generative AI text classification using ensemble LLM approaches</article-title><source>arXiv</source><comment>Preprint posted online on  Sep 14, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2309.07755</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Evaluating the ChatGPT family of models for biomedical reasoning and classification</article-title><source>J Am Med Inform Assoc</source><year>2024</year><month>04</month><day>3</day><volume>31</volume><issue>4</issue><fpage>940</fpage><lpage>948</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocad256</pub-id><pub-id pub-id-type="medline">38261400</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="web"><article-title>Internlm2-1_8b</article-title><source>Hugging Face</source><access-date>2025-07-07</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://huggingface.co/internlm/internlm2-1_8b">https://huggingface.co/internlm/internlm2-1_8b</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Configuration parameters for each fine-tuned large language model.</p><media xlink:href="medinform_v13i1e69286_app1.docx" xlink:title="DOCX File, 19 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Confusion matrix of the weighted voting method with the top 5 models.</p><media xlink:href="medinform_v13i1e69286_app2.png" xlink:title="PNG File, 68 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Confusion matrix of the weighted voting method with the top 3 models.</p><media xlink:href="medinform_v13i1e69286_app3.png" xlink:title="PNG File, 66 KB"/></supplementary-material></app-group></back></article>