<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e67859</article-id><article-id pub-id-type="doi">10.2196/67859</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>Machine Learning and Deep Learning Techniques for Prediction and Diagnosis of Leptospirosis: Systematic Literature Review</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Sawesi</surname><given-names>Suhila</given-names></name><degrees>BPharm, MPharm, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Jadhav</surname><given-names>Arya</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Rashrash</surname><given-names>Bushra</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Health Informatics and Bioinformatics Program, College Of Computing, Grand Valley State University</institution><addr-line>333 Michigan St. NE</addr-line><addr-line>Grand Rapids</addr-line><addr-line>MI</addr-line><country>United States</country></aff><aff id="aff2"><institution>Data Science, College Of Computing, Grand Valley State University</institution><addr-line>Allendale</addr-line><addr-line>MI</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Biomedical Science, College of Liberal Arts and Sciences, Grand Valley State University</institution><addr-line>Allendale</addr-line><addr-line>MI</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Benis</surname><given-names>Arriel</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Ozek</surname><given-names>Burcu</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Sutrave</surname><given-names>Kruttika</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Khanna</surname><given-names>Munish</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Suhila Sawesi, BPharm, MPharm, PhD, Health Informatics and Bioinformatics Program, College Of Computing, Grand Valley State University, 333 Michigan St. NE, Grand Rapids, MI, 49503, United States, 1 616-331-7827 ext 17827; <email>sawesis@gvsu.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>29</day><month>5</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e67859</elocation-id><history><date date-type="received"><day>22</day><month>10</month><year>2024</year></date><date date-type="rev-recd"><day>11</day><month>04</month><year>2025</year></date><date date-type="accepted"><day>14</day><month>04</month><year>2025</year></date></history><copyright-statement>&#x00A9; Suhila Sawesi, Arya Jadhav, Bushra Rashrash. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 29.5.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e67859"/><abstract><sec><title>Background</title><p>Leptospirosis, a zoonotic disease caused by <italic>Leptospira</italic> bacteria, continues to pose significant public health risks, particularly in tropical and subtropical regions.</p></sec><sec><title>Objective</title><p>This systematic review aimed to evaluate the application of machine learning (ML) and deep learning (DL) techniques in predicting and diagnosing leptospirosis, focusing on the most used algorithms, validation methods, data types, and performance metrics.</p></sec><sec sec-type="methods"><title>Methods</title><p>Using Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) guidelines, Checklist for Critical Appraisal and Data Extraction for Systematic Reviews of Prediction Modelling Studies (CHARMS), and Prediction model Risk of Bias Assessment Tool (PROBAST) tools, we conducted a comprehensive review of studies applying ML and DL models for leptospirosis detection and prediction, examining algorithm performance, data sources, and validation approaches.</p></sec><sec sec-type="results"><title>Results</title><p>Out of a total of 374 articles screened, 17 studies were included in the qualitative synthesis, representing approximately 4.5% of the initial pool. The review identified frequent use of algorithms such as support vector machines, artificial neural networks, decision trees, and convolutional neural networks (CNNs). Among the included studies, 88% (15/17) used traditional ML methods, and 24% (4/17) used DL techniques. Several models demonstrated high predictive performance, with reported accuracy rates ranging from 80% to 98%, notably with the U-Net CNN achieving 98.02% accuracy. However, public datasets were underused, with only 35% (6/17) of studies incorporating publicly available data sources; the majority (65%, 11/17) relied primarily on private datasets from hospitals, clinical records, or regional surveillance systems.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>ML and DL techniques demonstrate potential for improving leptospirosis prediction and diagnosis, but future research should focus on using larger, more diverse datasets, adopting transfer learning strategies, and integrating advanced ensemble and validation techniques to strengthen model accuracy and generalization.</p></sec></abstract><kwd-group><kwd>leptospirosis</kwd><kwd>machine learning</kwd><kwd>deep learning</kwd><kwd>prediction models</kwd><kwd>diagnosis</kwd><kwd>artificial intelligence</kwd><kwd>convolutional neural networks</kwd><kwd>support vector machines</kwd><kwd>transfer learning</kwd><kwd>zoonotic diseases</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Overview of Leptospirosis and Its Diagnosis</title><p>Leptospirosis, a zoonotic disease caused by pathogenic <italic>Leptospira</italic> bacteria, is a global public health concern, with an estimated 1.03 million cases and approximately 58,900 deaths annually [<xref ref-type="bibr" rid="ref1">1</xref>]. The disease is particularly prevalent in tropical and subtropical regions, where environmental factors such as heavy rainfall, poor sanitation, and frequent flooding facilitate bacterial survival and transmission. Although less common, leptospirosis also occurs in temperate regions, including the United States, especially in areas prone to flooding or with high populations of animal carriers like rodents and livestock [<xref ref-type="bibr" rid="ref1">1</xref>].</p><p>In the United States, the Centers for Disease Control and Prevention (CDC) reports approximately 100&#x2010;150 cases annually, with the majority occurring in Puerto Rico and Hawaii due to their specific environmental conditions. Isolated cases in areas like New York City and California highlight the mobility of the disease and its potential for travel-related transmission [<xref ref-type="bibr" rid="ref1">1</xref>].</p><p>Transmission primarily occurs through direct contact with the urine or reproductive fluids of infected animals or exposure to contaminated water and soil, especially following periods of heavy rainfall [<xref ref-type="bibr" rid="ref2">2</xref>]. High-risk activities include wading, swimming, or boating in potentially contaminated freshwater. Effective control measures include improving sanitation, controlling rodent populations, and educating at-risk populations. The disease manifests with a broad range of clinical symptoms, from mild flu-like symptoms to severe complications such as Weil&#x2019;s disease, characterized by jaundice, renal failure, pulmonary hemorrhage, and multi-organ dysfunction, which can lead to death if not treated promptly [<xref ref-type="bibr" rid="ref1">1</xref>].</p><p>The diagnosis of leptospirosis is challenging due to the nonspecific nature of its early symptoms, which often overlap with other febrile illnesses. Traditional diagnostic methods include the microscopic agglutination test (MAT), considered the gold standard, and polymerase chain reaction (PCR). MAT is labor-intensive and requires specialized laboratory capabilities, making it less accessible in many endemic regions [<xref ref-type="bibr" rid="ref2">2</xref>]. PCR, while offering early detection by identifying <italic>Leptospira</italic> DNA in blood or urine, also requires advanced laboratory infrastructure. Rapid diagnostic tests (RDTs) provide quicker results, but their sensitivity and specificity can vary depending on the <italic>Leptospilora</italic> serovars and disease stages, limiting their effectiveness in some settings [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>].</p></sec><sec id="s1-2"><title>Machine Learning and Deep Learning in Disease Detection</title><p>Machine learning (ML) and deep learning (DL) have emerged as powerful tools in the field of disease detection and management. ML involves training computers to apply past experiences to solve new problems, leveraging algorithms that enable the machine to identify patterns, make predictions, and produce insightful judgments based on data. The increasing availability of computational power and data storage has significantly boosted the application of ML across various fields, including public health. In the context of infectious diseases like leptospirosis, ML can analyze large datasets, including clinical and laboratory data, to identify patterns and relationships that might not be apparent through traditional statistical methods [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>DL, a subset of ML, further enhances these capabilities by using neural networks with multiple layers to automatically extract, analyze, and understand useful information from raw data. Unlike traditional ML techniques that rely on handcrafted features, DL models are capable of automatic feature engineering, which significantly enhances classification performance. DL techniques, driven by neural networks, are known for their accuracy and performance, particularly in complex tasks such as image recognition and analysis [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. For example, convolutional neural networks (CNNs) have been successfully applied to medical imaging, enabling the precise identification and classification of pathogens in microscopy images [<xref ref-type="bibr" rid="ref9">9</xref>], which is crucial for diseases like leptospirosis.</p><p>The application of ML and DL in leptospirosis diagnosis represents a significant advancement over traditional methods. ML algorithms can analyze clinical and laboratory data, including patient symptoms, demographic information, and test results, to predict the likelihood of leptospirosis. This capability is particularly valuable in settings where access to advanced diagnostics is limited, as it allows for earlier and more accurate detection, potentially reducing the time to diagnosis and improving patient outcomes [<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>DL models, particularly CNNs, have shown great promise in analyzing blood and urine samples, medical imaging, and environmental data to predict the presence of <italic>Leptospira</italic> or the likelihood of an outbreak. These models can distinguish <italic>Leptospira</italic> bacteria in microscopy images with high accuracy, reducing the need for skilled microbiologists and improving diagnostic accessibility in low-resource settings [<xref ref-type="bibr" rid="ref4">4</xref>].</p><p>One of the most significant advantages of using ML and DL in leptospirosis diagnosis is their ability to integrate diverse data types&#x2014;such as clinical, laboratory, and environmental data&#x2014;into comprehensive predictive models. These models can be used for individual patient diagnosis and public health surveillance, enabling more targeted and timely interventions. For example, predictive models that incorporate climatic and environmental factors, such as rainfall patterns and flooding data, can help identify regions at higher risk for leptospirosis outbreaks, allowing for proactive disease control measures [<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>Despite the significant potential of ML and DL to revolutionize the diagnosis and management of leptospirosis, comprehensive reviews focusing specifically on their application in this area are scarce. Most existing reviews have primarily concentrated on more prevalent conditions such as tuberculosis, malaria, and COVID-19, with minimal attention given to zoonotic diseases like leptospirosis [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Furthermore, there is a recognized gap in the literature concerning the application of advanced AI techniques in the context of neglected tropical diseases, such as leptospirosis, where the potential for these technologies to improve diagnostic accuracy remains underexplored [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. This gap underscores the need for a focused review that synthesizes current research, identifies the most effective ML and DL models, and evaluates their impact on public health outcomes related to leptospirosis.</p><p>The aim of this systematic review is to comprehensively evaluate the application of ML and DL techniques in the prediction and diagnosis of leptospirosis. This review will address the following key research questions:</p><list list-type="order"><list-item><p>Which ML and DL algorithms are most frequently used in leptospirosis prediction and diagnosis, and how well do they perform?</p></list-item><list-item><p>What validation methods are most used in the evaluation of ML and DL models for leptospirosis? and how reliable are these methods?</p></list-item><list-item><p>What types of data are most used in ML and DL models for leptospirosis? How does the type of data influence the performance of these models?</p></list-item><list-item><p>What are the main challenges and limitations identified in the research studies regarding ML and DL applications in leptospirosis prediction and diagnosis?</p></list-item></list></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>We conducted a systematic review following the Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) checklist [<xref ref-type="bibr" rid="ref12">12</xref>] ( <xref ref-type="supplementary-material" rid="app5">Checklist 1</xref>). The Checklist for Critical Appraisal and Data Extraction for Systematic Reviews of Prediction Modelling Studies (CHARMS) was used to frame this review&#x2019;s objectives [<xref ref-type="bibr" rid="ref13">13</xref>].</p><sec id="s2-1-1"><title>Search Strategy</title><p>PubMed, IEEE, ACM, and Web of Science databases were searched for articles published from inception till May 29, 2024. Hand-searching of references within included articles was conducted to shortlist other potential articles. Our search strategy used a combination of subject terms related to &#x201C;machine learning&#x201D; and &#x201C;Leptospirosis&#x201D; (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p></sec><sec id="s2-1-2"><title>Eligibility Criteria</title><p>We included full-text English language articles that developed or validated diagnostic or predictive ML models for human leptospirosis. Our review focused specifically on ML and DL methods, including logistic regression, Bayesian learning, and generalized additive models when these were implemented within an ML or DL framework [<xref ref-type="bibr" rid="ref14">14</xref>].</p><p>Several categories of studies were excluded. First, we omitted case reports, case series, letters, corrigenda, editorial commentaries, literature reviews, and meta-analyses. Second, we excluded purely applied statistical methods that were not integrated with ML or DL frameworks, including traditional statistical analyses that did not incorporate ML optimization techniques. Third, non-artificial intelligence methods as well as general artificial intelligence (AI) approaches that could not be classified as either ML or DL (such as rule-based expert systems without learning components or symbolic AI methods) were excluded [<xref ref-type="bibr" rid="ref14">14</xref>].</p><p>The distinction between included and excluded methods was based on whether the approach involved automated learning from data. For instance, while standard logistic regression was excluded, logistic regression implemented with ML techniques like automated feature selection or hyperparameter tuning was included. Similarly, simple threshold-based diagnostic rules were excluded unless they were derived through ML processes. This approach ensured our review focused specifically on applications of ML and DL technologies in leptospirosis diagnosis and prediction.</p><p>In this review, diagnostic ML models refer to models that predict the disease status of an individual, while predictive models forecast the probability of future occurrence of the disease in an individual.</p></sec></sec><sec id="s2-2"><title>Study Selection</title><p>A total of 3 independent reviewers (SS, AJ, and BR) conducted the initial search across 4 databases using predefined search terms within the title and abstract, strictly following the inclusion and exclusion criteria. Zotero bibliography software was used to manage the search results by tracking reasons for inclusion and exclusion, grouping records, importing PDFs, and exporting data to Microsoft Excel for extraction. The interrater agreement between the coauthors was evaluated using Cohen&#x2019;s kappa (&#x03BA;&#x003E;0.80) [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>], ensuring a high level of consistency across the reviewers.</p><p>To enhance the screening process, we employed ChatGPT-4o (June 2024 version) as a fourth reviewer. This advanced large language model (LLM) was specifically selected for its proven capabilities in biomedical text analysis (OpenAI) and superior handling of technical medical terminology compared to previous versions. We opted to use only this single model to maintain consistency in evaluation criteria and avoid potential variability from multiple LLMs. During implementation, abstracts were systematically input to ChatGPT-4o with standardized prompts mirroring our inclusion and exclusion criteria, and all outputs were automatically logged for verification.</p><p>While ChatGPT-4o provided valuable preliminary classifications (achieving 88% initial alignment with human reviewers in our pilot test), we implemented a rigorous 3-stage human verification protocol: (1) initial matching of AI recommendations with human decisions, (2) consensus discussion for discrepancies (&#x03BA;&#x003C;0.80), and (3) final unanimous approval. For example, the model initially recommended including 12 statistical modeling studies that were properly excluded after human review. This AI-assisted process reduced initial screening time by 30% while maintaining 100% alignment with final human decisions through our verification protocol, which followed PRISMA-AI guidelines [<xref ref-type="bibr" rid="ref12">12</xref>] to mitigate potential AI limitations.</p><p>We emphasize that ChatGPT-4o served strictly in an advisory capacity, and no studies were included solely based on its recommendation. This approach aligns with emerging best practices for LLM-assisted systematic reviews [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>], with complete verification records available in our supplementary materials (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p></sec><sec id="s2-3"><title>Quality Assessment</title><p>The articles selected for inclusion were then subjected to a quality assessment using the Prediction model Risk of Bias Assessment Tool (PROBAST) tool [<xref ref-type="bibr" rid="ref13">13</xref>], which categorizes bias as low, medium, or high (see <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). All authors assessed the quality of their respective parts, starting with a pilot of 5 articles to ensure consistency. The PROBAST tool evaluates risk of bias based on 4 segments&#x2014;participants, predictors, outcome, and analysis. Each segment&#x2019;s risk of bias was rated as high, medium, low, or unclear. If any domain suggested a high risk of bias, the overall risk of bias for that study was considered high. These studies were not excluded but were analyzed to understand their limitations and impact on the overall findings. The assessment was conducted independently by all authors, ensuring a thorough evaluation process.</p></sec><sec id="s2-4"><title>Data Extraction</title><p>For the extraction process, a standardized form was used to collect data relevant to the review&#x2019;s objectives. This form was adapted from the CHARMS [<xref ref-type="bibr" rid="ref15">15</xref>] and the Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis (TRIPOD) guidelines [<xref ref-type="bibr" rid="ref19">19</xref>]. The extracted information included publication type, publication year, author, title, country of research, source of data, type of data (public or private), overall number of samples, and data collection methods (see <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>).</p><p>Additional details were gathered on the ML and DL algorithms used, including whether the models were pretrained or developed from scratch, as well as the use of transfer learning, data augmentation, validation methods, and evaluation metrics. The strength of leptospirosis predictions was documented using performance metrics. Tasks were categorized into segmentation, classification, and object detection, noting the type of classification and any limitations.</p><p>To ensure consistency, reviewers conducted a pilot phase where they independently extracted data from the first 5 articles and compared results, achieving a high agreement (&#x03BA;=0.98). Afterward, all 17 articles were reviewed, and discrepancies were resolved through discussion. The studies were then rigorously categorized before moving to theme formation. One author (SS) defined the themes, which were reviewed and adjusted by the other authors (AJ and BR) to ensure comprehensive categorization.</p></sec><sec id="s2-5"><title>Outcomes Assessed</title><p>The primary outcomes assessed in this review include the diagnostic and predictive performance of various ML and DL methods for leptospirosis detection, focusing on metrics like accuracy, area under the curve (AUC), sensitivity, and specificity. It also evaluates the applicability and generalizability of these models in health care settings, emphasizing the integration of advanced neural network architectures, transfer learning, and data augmentation to enhance performance.</p></sec><sec id="s2-6"><title>Data Analysis</title><p>We grouped the collected studies into summary tables based on the type of ML and DL models used for leptospirosis detection. R (version 4.3.2; R Foundation for Statistical Computing) was used to perform both descriptive statistical analyses and create visualizations.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Search and Selection Results</title><p><xref ref-type="fig" rid="figure1">Figure 1</xref> illustrates the process of identifying relevant literature. A comprehensive search across 5 databases yielded a total of 374 articles. After removing 25 duplicate records, 349 unique articles were screened based on their titles and abstracts. Following this initial screening, 61 articles were selected for full-text review. Of these, 45 were excluded for various reasons, including not meeting inclusion criteria or insufficient data for analysis. Ultimately, 16 studies were included in the review, with 1 additional study identified through hand-searching reference lists, bringing the total to 17 studies included in the qualitative synthesis.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Figure 1. PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) flow diagram illustrating the search and selection process used to identify relevant studies. AI: artificial intelligence; ML: machine learning; DL: deep learning.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67859_fig01.png"/></fig></sec><sec id="s3-2"><title>Study Description</title><p>In this systematic review, we examined 17 studies (n=17) published between 2012 and 2024 that used ML and DL techniques for the prediction and diagnosis of leptospirosis (see <xref ref-type="fig" rid="figure2">Figure 2</xref>). Most studies were published in 2019 (n=3, 18%), 2022 (n=3, 18%), and 2023 (n=3, 18%). Brazil was the most common country of research, contributing 4 studies (24%), followed by New Caledonia with 3 studies (18%). Regarding data sources, 6 studies (35%) used health records, 6 studies (35%) used environmental data, and 5 studies (29%) used epidemiological data.</p><p>Most studies (11/17, 65%) focused on predictive modeling, while 6/17 studies (35%) concentrated on diagnosis. ML algorithms were overwhelmingly preferred, with 15/17 studies (88%) using techniques such as SVM, decision tree (DT), and random forests. DL algorithms, including CNN and multilayer perceptrons (MLPs), were used in 4/17 studies (24%), and only 1/17 study (6%) combined both ML and DL methods. All studies developed models from scratch without using transfer learning, and only 1/17 study (6%) reported the application of data augmentation techniques.</p><p>Regarding model validation, cross-validation methods were most frequently used in 11/17 studies (65%), while holdout validation methods, such as train and test splits, were used in 6/17 studies (35%).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Distribution of studies on machine learning and deep learning applications for leptospirosis diagnosis and prediction by year and task type.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67859_fig02.png"/></fig></sec><sec id="s3-3"><title>Assessment of Risk of Bias in Machine Learning Models</title><p>In assessing the risk of bias across the included 17 studies, most were categorized as having a medium risk across key domains (<xref ref-type="fig" rid="figure3">Figure 3</xref>). A total of 14 studies (82%) were rated as having a medium risk of bias related to participant selection, primarily due to the selection of specific regions or populations that may not fully represent broader leptospirosis cases. Examples include studies by [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref32">32</xref>]. In addition, 2 studies (12%) [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>] were rated as low risk, while 1 study (6%) [<xref ref-type="bibr" rid="ref35">35</xref>] was rated as high risk due to narrower participant selection.</p><p>Regarding predictors, 16 studies (94%) demonstrated a medium risk of bias, often because they relied heavily on environmental or clinical data without fully accounting for confounding variables. Only 1 study (6%) [<xref ref-type="bibr" rid="ref34">34</xref>] was rated as low risk in this domain. For outcome bias, 13 studies (76%) [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] were assessed as low risk, with clear and consistent definitions applied across participants. A total of 4 studies (24%) [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref33">33</xref>] exhibited medium risk, mainly due to subjective outcome determinations or a lack of standardized measures.</p><p>In the analysis domain, 13 studies (76%) [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref33">33</xref>] demonstrated medium risk due to concerns about validation techniques, handling of missing data, and small sample sizes, while 4 studies (24%) [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] were rated as low risk, reflecting stronger analytical methodologies.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Distribution of risk of bias across domains in machine learning and deep learning studies for leptospirosis.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67859_fig03.png"/></fig></sec><sec id="s3-4"><title>Frequently Used ML and DL Algorithms for Leptospirosis Prediction and Diagnosis</title><p>This systematic review considered all ML and DL techniques used in the included studies and examined their applications in either leptospirosis prediction or diagnosis (classification). <xref ref-type="fig" rid="figure4">Figure 4</xref> illustrates the distribution of classifiers across the studies, highlighting the diversity of approaches and the frequency of use of certain models.</p><p>For prediction tasks, the most commonly used ML technique was support vector regression (SVR), applied in 2 studies [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. In addition to SVR, several other ML techniques were prominently featured. These included the na&#x00EF;ve networks and TAN (tree augmented naive) networks used by Mayfield et al [<xref ref-type="bibr" rid="ref24">24</xref>] for predictive risk mapping, and the random forest classifier and M1 mixed model used by Jayaramu et al [<xref ref-type="bibr" rid="ref30">30</xref>] for predictive risk modeling.</p><p>Models like LeptoScore and QuickLepto, applied by Galdino et al [<xref ref-type="bibr" rid="ref10">10</xref>], further illustrate the diversity of approaches taken in predictive modeling. SVM and MLP used in predictive risk modeling by Ahangarcani et al [<xref ref-type="bibr" rid="ref26">26</xref>], while Mohammadinia et al [<xref ref-type="bibr" rid="ref27">27</xref>] applied geographically weighted regression, generalized linear models, artificial neural networks (ANNs), and SVM for similar tasks. In addition, feedforward neural networks were used by [<xref ref-type="bibr" rid="ref31">31</xref>] in prediction tasks, demonstrating the growing role of neural network models in this domain.</p><p>For diagnosis (classification) tasks, ANNs were widely used, appearing in studies by [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. FuzzyARTMAP and ARTMAP-IC, both variants of ANN, were also used by [<xref ref-type="bibr" rid="ref20">20</xref>] to achieve classification. In addition, Bayesian classifiers such as Na&#x00EF;ve Bayes were applied by [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref29">29</xref>], further showcasing the diversity of ML techniques in classification tasks.</p><p>CNNs were another frequently used DL model for classification. Specifically, U-Net, a variant of CNN, was used by [<xref ref-type="bibr" rid="ref34">34</xref>] and achieved an impressive accuracy of 98%. Other classifiers, such as k-nearest neighbors (KNNs), DTs (J48), and random forests, were used in multiple studies, with [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref29">29</xref>] demonstrating their efficacy in disease classification.</p><p>Performance evaluations showed that many studies combined multiple classifiers. For example [<xref ref-type="bibr" rid="ref29">29</xref>], applied na&#x00EF;ve Bayes, KNN, MLP, J48 decision tree, random forest, multinomial logistic regression, and Adaboost within the same study, with random forest achieving the highest performance at 87% accuracy and 91% sensitivity.</p><p>Hybrid approaches combining ML and DL were also present, such as the use of a genetic algorithm combined with both ML and DL techniques in [<xref ref-type="bibr" rid="ref25">25</xref>], which attained an accuracy of 99%.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Heatmap of classifier usage across included studies [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref35">35</xref>]. ANN: artificial neural network; DL: deep learning; FAM: fuzzy adaptive resonance theory mapping; FFN: feed forward network; GA: genetic algorithm; GWR: geographically weighted regression; J48: J48 decision tree; JRIP: repeated incremental pruning to produce error reduction; LASSO: least absolute shrinkage and selection operator regression; Maxent: maximum entropy model; ML: machine learning; RF: random forest; SVM: support vector machine; SVR: support vector regression; TAN: tree augmented na&#x00EF;ve network; U-Net: U-Net convolutional neural network.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67859_fig04.png"/></fig><p>Upon comparing prediction and diagnosis (classification) tasks, it becomes evident that ML models dominated predictive risk mapping studies, while DL models, particularly U-Net, were more frequently used in classification tasks related to disease detection. For risk modeling, techniques like decision trees (J48) and random forest classifiers were commonly applied, with high specificity rates achieved in studies such as those by [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref30">30</xref>].</p><p>Interestingly, 50% of the studies (n=8) used more than one algorithm to assess performance, highlighting the importance of comparative evaluations in the field. Supervised-learning algorithms were predominant throughout the studies, with no mention of unsupervised-learning methods, such as K-means, or reinforcement-learning algorithms.</p><p>In summary, the most frequent algorithms in prediction tasks were SVR and random forest, while in diagnosis, ANN and U-Net stood out as the most frequently used. Across both types of tasks, the performance metrics indicated high accuracy and sensitivity, showcasing the reliability of ML and DL techniques in leptospirosis research.</p></sec><sec id="s3-5"><title>Algorithm Performance Metrics</title><p>Performance assessment is a crucial process in evaluating ML and DL models. Various metrics are used to measure model performance, including accuracy, sensitivity, specificity, precision, <italic>F</italic><sub>1</sub>-score, AUC, mean squared error (MSE), <italic>R</italic>-squared (<italic>R</italic>&#x00B2;), mean absolute error (MAE), and root mean squared error (RMSE). These metrics are typically evaluated using hidden or unseen examples to assess model generalizability. In the included studies, accuracy was the most frequently reported metric, followed by sensitivity, precision, specificity, and <italic>F</italic><sub>1</sub>-score. For prediction tasks, MSE, MAE, RMSE, and <italic>R</italic>&#x00B2; were used to assess regression performance.</p><p><xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref> demonstrates the widespread use of accuracy rates across various models. The studies examined used accuracy as the primary indicator of performance, although other metrics such as sensitivity and specificity were also highlighted. For instance, the performance of the ANN in the Seremban City dataset reached 80% accuracy, 83% sensitivity, and 75% specificity, while achieving an AUC of 87%. Models such as the Fuzzy ARTMAP applied to other datasets showed lower performance, with accuracy ranging between 60% and 80%, highlighting the variability in effectiveness across different methods.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Performance of best classification models from the research studies.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Study</td><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">Accuracy, %</td><td align="left" valign="bottom">Sensitivity, %</td><td align="left" valign="bottom">Specificity, %</td><td align="left" valign="bottom">AUC<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, %</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score, %</td></tr></thead><tbody><tr><td align="char" char="." valign="top">Rahmat et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">ANN<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top">86.44</td><td align="left" valign="top">79.33</td><td align="left" valign="top">89.04</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Collins et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">ANN</td><td align="left" valign="top">80</td><td align="left" valign="top">80</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Collins et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">Fuzzy ARTMAP<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="top">80</td><td align="left" valign="top">80</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Nery et al [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">JRIP<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup></td><td align="left" valign="top">80.10</td><td align="left" valign="top">85</td><td align="left" valign="top">81</td><td align="left" valign="top">82.60</td><td align="left" valign="top">75</td></tr><tr><td align="char" char="." valign="top">Shenoy et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">Random forest</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">87</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">91</td><td align="left" valign="top">86</td></tr><tr><td align="char" char="." valign="top">Sonthayanon et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">GA<sup><xref ref-type="table-fn" rid="table1fn6">f</xref></sup></td><td align="left" valign="top">98.90</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Nery Jr et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">JRIP</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">84</td><td align="left" valign="top">99</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Kulkarni et al [<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">U-Net<sup><xref ref-type="table-fn" rid="table1fn7">g</xref></sup></td><td align="left" valign="top">98.02</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Lopez et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">J48<sup><xref ref-type="table-fn" rid="table1fn8">h</xref></sup></td><td align="left" valign="top">70.5</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Zhao et al [<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">Maxent<sup><xref ref-type="table-fn" rid="table1fn9">i</xref></sup> model</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">96</td><td align="left" valign="top">&#x2014;</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>AUC: area under the curve.</p></fn><fn id="table1fn2"><p><sup>b</sup>ANN: artificial neural network.</p></fn><fn id="table1fn3"><p><sup>c</sup>Indicates metrics that were either not reported or not utilized in the original studies.</p></fn><fn id="table1fn4"><p><sup>d</sup>ARTMAP: adaptive resonance theory mapping.</p></fn><fn id="table1fn5"><p><sup>e</sup>JRIP: repeated incremental pruning to produce error reduction.</p></fn><fn id="table1fn6"><p><sup>f</sup>GA: genetic algorithm.</p></fn><fn id="table1fn7"><p><sup>g</sup>U-Net: U-Net convolutional neural network.</p></fn><fn id="table1fn8"><p><sup>h</sup>J48: J48 decision tree.</p></fn><fn id="table1fn9"><p><sup>i</sup>Maxent: maximum entropy.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Performance of best prediction models from the research studies.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Study</td><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">Accuracy, %</td><td align="left" valign="bottom">Sensitivity, %</td><td align="left" valign="bottom">Specificity, %</td><td align="left" valign="bottom">AUC<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>, %</td><td align="left" valign="bottom">MSE<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="bottom"><italic>R</italic><sup>2</sup></td><td align="left" valign="bottom">MAE<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="bottom">RMSE<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td></tr></thead><tbody><tr><td align="char" char="." valign="top">Douchet et al [<xref ref-type="bibr" rid="ref32">32</xref>]</td><td align="left" valign="top">SVR<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.19</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Sonthayanon et al [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">TAN<sup><xref ref-type="table-fn" rid="table2fn7">g</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">89</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Jayaramu et al [<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">RFC<sup><xref ref-type="table-fn" rid="table2fn8">h</xref></sup></td><td align="left" valign="top">82.60</td><td align="left" valign="top">60</td><td align="left" valign="top">96.60</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Galdino et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top">LASSO<sup><xref ref-type="table-fn" rid="table2fn9">i</xref></sup></td><td align="left" valign="top">78.30</td><td align="left" valign="top">81.10</td><td align="left" valign="top">57.10</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Ahangarcani et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">SVM<sup><xref ref-type="table-fn" rid="table2fn10">j</xref></sup></td><td align="left" valign="top">86.55</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">85.48</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Mohammadinia et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">GWR<sup><xref ref-type="table-fn" rid="table2fn11">k</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.05</td><td align="left" valign="top">0.85</td><td align="left" valign="top">0.01</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Douchet et al [<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">SVR</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.44</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="char" char="." valign="top">Thibeaux et al [<xref ref-type="bibr" rid="ref31">31</xref>]</td><td align="left" valign="top">FFN<sup><xref ref-type="table-fn" rid="table2fn12">l</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.67</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>AUC: area under the curve.</p></fn><fn id="table2fn2"><p><sup>b</sup>MSE: mean squared error.</p></fn><fn id="table2fn3"><p><sup>c</sup>MAE: mean absolute error.</p></fn><fn id="table2fn4"><p><sup>d</sup>RMSE: root mean squared error.</p></fn><fn id="table2fn5"><p><sup>e</sup>SVR: support vector regression.</p></fn><fn id="table2fn6"><p><sup>f</sup>Indicate metrics that were not reported or used in the original studies.</p></fn><fn id="table2fn7"><p><sup>g</sup>TAN: tree augmented na&#x00EF;ve network.</p></fn><fn id="table2fn8"><p><sup>h</sup>RFC: random forest classifier.</p></fn><fn id="table2fn9"><p><sup>i</sup>LASSO: least absolute shrinkage and selection operator regression.</p></fn><fn id="table2fn10"><p><sup>j</sup>SVM: support vector machine.</p></fn><fn id="table2fn11"><p><sup>k</sup>GWR: geographically weighted regression.</p></fn><fn id="table2fn12"><p><sup>l</sup>FFN: feed forward network.</p></fn></table-wrap-foot></table-wrap><p>However, as with all model comparisons, it is not possible to directly compare the efficiency of models trained and evaluated on dissimilar datasets. To provide a meaningful evaluation, studies that implemented multiple machine learning methods on the same datasets were carefully selected for comparison. This allows for an accurate ranking of the algorithms based on their mean scores for accuracy, sensitivity, specificity, <italic>F</italic><sub>1</sub>-score, and other metrics.</p><p>In several cases, regression models like SVR were assessed using MSE and <italic>R</italic>&#x00B2; values to gauge prediction performance. For instance, the SVR model in the Reunion Island dataset showed an MAE of 0.75 and an RMSE of 0.44, while other datasets revealed higher error rates, reflecting the challenges in prediction tasks [<xref ref-type="bibr" rid="ref32">32</xref>].</p><p>The study also reveals that deep learning models, particularly those employing architectures like U-Net and CNN, achieved outstanding accuracy rates, with U-Net recording an accuracy of 98% [<xref ref-type="bibr" rid="ref34">34</xref>]. Random forest models also performed consistently well, particularly in classification tasks, achieving high sensitivity and specificity.</p><p><xref ref-type="fig" rid="figure5">Figure 5</xref> shows the number of studies that reported various performance metrics, highlighting that accuracy and AUC were the most frequently used measures across the reviewed articles. This pattern reflects the widespread reliance on these metrics to evaluate classification performance in leptospirosis-related models. For instance, in one of the best-performing models, a genetic algorithm achieved an accuracy rate of 99%, significantly outperforming other models.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Number of algorithm performance metrics used in reviewed articles of dataset types used (public and private). AUC: area under the curve; F1: <italic>F</italic><sub>1</sub>-score; MAE: mean absolute error; MSE: mean squared error; RMSE: root mean squared error.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e67859_fig05.png"/></fig></sec><sec id="s3-6"><title>Datasets and Data Sources</title><p>ML and DL studies for leptospirosis prediction and diagnosis rely on diverse datasets with distinct characteristics (see <xref ref-type="table" rid="table3">Table 3</xref>). Public datasets, typically sourced from government agencies, provide large-scale environmental and epidemiological data ideal for transmission modeling. For instance, the Thai Surveillance System [<xref ref-type="bibr" rid="ref32">32</xref>] offered monthly rainfall measurements (0&#x2010;450 mm range), soil pH values (4.5&#x2010;8.2), and 30-meter resolution elevation data across 5 Southeast Asian countries from 2003 to 2018, comprising over 15,000 data points.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Comprehensive dataset characteristics of included studies.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Study</td><td align="left" valign="bottom">Data type (source)</td><td align="left" valign="bottom">Data categories</td><td align="left" valign="bottom">Sample size</td><td align="left" valign="bottom">Temporal resolution</td><td align="left" valign="bottom">Spatial resolution</td></tr></thead><tbody><tr><td align="char" char="." valign="top">Douchet et al [<xref ref-type="bibr" rid="ref32">32</xref>]</td><td align="left" valign="top">Public (Thai surveillance system)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Environmental: daily rainfall (mm)<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>, soil pH</p></list-item><list-item><p>Climatic: max/min temperatures (&#x00B0;C)</p></list-item><list-item><p>Topographic: elevation (SRTM<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> 30m)</p></list-item></list></td><td align="left" valign="top">NS<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">Monthly 2003&#x2010;2018</td><td align="left" valign="top">Regional (5 countries)</td></tr><tr><td align="char" char="." valign="top">Rahmat et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">Public (Malaysia Meteorological Department)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Meteorological: hourly rainfall (mm), RH<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup> (%)</p></list-item><list-item><p>Clinical: PCR-confirmed<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup> cases</p></list-item></list></td><td align="left" valign="top">364 weeks</td><td align="left" valign="top">Weekly 2011&#x2010;2017</td><td align="left" valign="top">District-level (n=12)</td></tr><tr><td align="char" char="." valign="top">Caicedo Torres et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">Private (hospital records)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Clinical: fever days, liver enzymes (U/L)</p></list-item><list-item><p>Demographic: age, gender, urban/rural</p></list-item></list></td><td align="left" valign="top">136 patients</td><td align="left" valign="top">Single admission</td><td align="left" valign="top">Hospital catchment</td></tr><tr><td align="char" char="." valign="top">Nery et al [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">Private (hospital records)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Clinical: serum creatinine (mg/dL)</p></list-item><list-item><p>Epidemiological: rodent exposure index</p></list-item><list-item><p>Geospatial: residence coordinates</p></list-item></list></td><td align="left" valign="top">4675 cases</td><td align="left" valign="top">2009&#x2010;2016</td><td align="left" valign="top">Household-level</td></tr><tr><td align="char" char="." valign="top">Nery Jr et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">Private (Gon&#x00E7;alo Moniz Institute [IGM], Federal University of Bahia [UFBA], Yale School of Public Health)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Clinical: patient records</p></list-item><list-item><p>Epidemiological: risk factors, daily activities</p></list-item></list></td><td align="left" valign="top">4675 suspected cases (2046 confirmed, 2629 unconfirmed)</td><td align="left" valign="top">Retrospective (hospital and community cohort)</td><td align="left" valign="top">Hospital or community level (Salvador, Brazil)</td></tr><tr><td align="char" char="." valign="top">Shenoy et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">Private (medical records)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Clinical: jaundice severity scale (0&#x2010;3)</p></list-item><list-item><p>Laboratory: ELISA<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup> optical densities</p></list-item><list-item><p>Comorbidities: diabetes status</p></list-item></list></td><td align="left" valign="top">800 patients</td><td align="left" valign="top">Retrospective 5y</td><td align="left" valign="top">Single tertiary center</td></tr><tr><td align="char" char="." valign="top">Sonthayanon et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">Private (bacterial cultures)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Genomic: 16S rRNA<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup> sequences</p></list-item><list-item><p>Proteomic: MALDI-TOF<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup> peaks (2k-20k m/z)</p></list-item></list></td><td align="left" valign="top">116 isolates</td><td align="left" valign="top">2015&#x2010;2018</td><td align="left" valign="top">Lab-level</td></tr><tr><td align="char" char="." valign="top">Mayfield et al [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">Private (serosurvey+ GIS<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup>)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p><sup><xref ref-type="table-fn" rid="table3fn10">j</xref></sup>Serological: MAT titers (1:50-1:6400)</p></list-item><list-item><p>Environmental: livestock density/km&#x00B2;</p></list-item><list-item><p>Village attributes: sanitation index</p></list-item></list></td><td align="left" valign="top">2152 people</td><td align="left" valign="top">Dry/wet season</td><td align="left" valign="top">GPS coordinates (82 villages)</td></tr><tr><td align="char" char="." valign="top">Jayaramu et al [<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">Private (hydrological stations)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Streamflow (m&#x00B3;/s)</p></list-item><list-item><p>Water level (m)</p></list-item><list-item><p>Case reports (weekly)</p></list-item></list></td><td align="left" valign="top">517 weeks</td><td align="left" valign="top">Daily &#x2192; weekly</td><td align="left" valign="top">Watershed-level</td></tr><tr><td align="char" char="." valign="top">Galdino et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top">Private (hospital EMR<sup><xref ref-type="table-fn" rid="table3fn11">k</xref></sup>)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Vital signs: MAP (mmHg)</p></list-item><list-item><p>Labs: creatinine (&#x03BC;mol/L)</p></list-item><list-item><p>Outcomes: mortality</p></list-item></list></td><td align="left" valign="top">295 patients</td><td align="left" valign="top">2009&#x2010;2022</td><td align="left" valign="top">3 hospitals</td></tr><tr><td align="char" char="." valign="top">Ahangarcani et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">Mixed (MODIS<sup><xref ref-type="table-fn" rid="table3fn12">l</xref></sup>+ CDC<sup><xref ref-type="table-fn" rid="table3fn13">m</xref></sup>)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Satellite: NDVI<sup><xref ref-type="table-fn" rid="table3fn14">n</xref></sup>, LST (&#x00B0;C)</p></list-item><list-item><p>Case reports: district-level</p></list-item><list-item><p>Topography: slope (%)</p></list-item></list></td><td align="left" valign="top">1863 cases</td><td align="left" valign="top">Monthly 2009&#x2010;2014</td><td align="left" valign="top">District-level</td></tr><tr><td align="char" char="." valign="top">Kulkarni et al [<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">Public (microscopy images)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Pixels: 256&#x00D7;256 RGB<sup><xref ref-type="table-fn" rid="table3fn15">o</xref></sup></p></list-item><list-item><p>Annotations: spirochete masks</p></list-item></list></td><td align="left" valign="top">366 images</td><td align="left" valign="top">N/A</td><td align="left" valign="top">Pixel-level</td></tr><tr><td align="char" char="." valign="top">Lopez et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">Public (SINAN<sup><xref ref-type="table-fn" rid="table3fn16">p</xref></sup> database)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Case reports: ICD-10<sup><xref ref-type="table-fn" rid="table3fn17">q</xref></sup> coded</p></list-item><list-item><p>Symptoms: 23-item checklist</p></list-item></list></td><td align="left" valign="top">890 cases</td><td align="left" valign="top">2007&#x2010;2016</td><td align="left" valign="top">State-level</td></tr><tr><td align="char" char="." valign="top">Mohammadinia et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">Public (National Ministry of Health and Treatment of Iran, National Centre of Statistics of Iran, Meteorology Agency of Iran)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Disease: positive ELISA test results</p></list-item></list></td><td align="left" valign="top">1186 positive cases (2009&#x2010;2011)</td><td align="left" valign="top">Longitudinal (2009&#x2010;2011)</td><td align="left" valign="top">District-level (Gilan Province, Iran)</td></tr><tr><td align="char" char="." valign="top">Douchet et al [<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">Public (island surveillance)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Climate: CHIRPS<sup><xref ref-type="table-fn" rid="table3fn18">r</xref></sup> rainfall (mm)</p></list-item><list-item><p>Case counts: ministry reports</p></list-item></list></td><td align="left" valign="top">Monthly NS</td><td align="left" valign="top">2010&#x2010;2022</td><td align="left" valign="top">Island-level</td></tr><tr><td align="char" char="." valign="top">Thibeaux et al [<xref ref-type="bibr" rid="ref31">31</xref>]</td><td align="left" valign="top">Private (water monitoring)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Hydrological: turbidity (NTU)<sup><xref ref-type="table-fn" rid="table3fn19">s</xref></sup></p></list-item><list-item><p>Microbiological: qPCR<sup><xref ref-type="table-fn" rid="table3fn20">t</xref></sup> (copies/mL)</p></list-item><list-item><p>Weather: 5-min rainfall</p></list-item></list></td><td align="left" valign="top">226 samples</td><td align="left" valign="top">Event-based</td><td align="left" valign="top">3 km&#x00B2; watershed</td></tr><tr><td align="char" char="." valign="top">Zhao et al [<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">Public (China CDC)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Environmental: river density (km/km&#x00B2;)</p></list-item><list-item><p>Socioeconomic: nightlight index</p></list-item><list-item><p>Cases: lab-confirmed</p></list-item></list></td><td align="left" valign="top">2741 cases</td><td align="left" valign="top">Annual 2004&#x2010;2014</td><td align="left" valign="top">County-level</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Standard units of measurement (mm, &#x00B0;C, m, etc.).</p></fn><fn id="table3fn2"><p><sup>b</sup>SRTM: Shuttle Radar Topography Mission.</p></fn><fn id="table3fn3"><p><sup>c</sup>NS: not specified.</p></fn><fn id="table3fn4"><p><sup>d</sup>RH: relative humidity .</p></fn><fn id="table3fn5"><p><sup>e</sup>PCR: polymerase chain reaction.</p></fn><fn id="table3fn6"><p><sup>f</sup>ELISA: Enzyme-Linked Immunosorbent Assay.</p></fn><fn id="table3fn7"><p><sup>g</sup>rRNA: ribosomal ribonucleic acid.</p></fn><fn id="table3fn8"><p><sup>h</sup>MALDI-TOF: Matrix-Assisted Laser Desorption/Ionization Time-of-Flight.</p></fn><fn id="table3fn9"><p><sup>i</sup>GIS: Geographic Information System.</p></fn><fn id="table3fn10"><p><sup>j</sup>MAT: microscopic agglutination test.</p></fn><fn id="table3fn11"><p><sup>k</sup>EMR: electronic medical record.</p></fn><fn id="table3fn12"><p><sup>l</sup>MODIS: Moderate Resolution Imaging Spectroradiometer.</p></fn><fn id="table3fn13"><p><sup>m</sup>CDC: Centers for Disease Control and Prevention.</p></fn><fn id="table3fn14"><p><sup>n</sup>NDVI: Normalized Difference Vegetation Index.</p></fn><fn id="table3fn15"><p><sup>o</sup>RGB: red-green-blue color model.</p></fn><fn id="table3fn16"><p><sup>p</sup>SINAN: Sistema de Informa&#x00E7;&#x00E3;o de Agravos de Notifica&#x00E7;&#x00E3;o.</p></fn><fn id="table3fn17"><p><sup>q</sup><italic>ICD-10: International Classification of Diseases, Tenth Revision.</italic></p></fn><fn id="table3fn18"><p><sup>r</sup>CHIRPS: Climate Hazards Group InfraRed Precipitation with Station data.</p></fn><fn id="table3fn19"><p><sup>s</sup>NTU: Nephelometric Turbidity Unit.</p></fn><fn id="table3fn20"><p><sup>t</sup>qPCR: quantitative polymerase chain reaction.</p></fn></table-wrap-foot></table-wrap><p>These datasets enabled regional risk prediction but lacked individual patient details. Similarly, Malaysia Meteorological Department records [<xref ref-type="bibr" rid="ref28">28</xref>] provided 364 weeks of hourly rainfall data (0&#x2010;65mm/hr) and relative humidity (45%&#x2010;100%) paired with PCR-confirmed cases across 12 districts, demonstrating how high-resolution temporal data improves ANN-based outbreak forecasting.</p><p>Private clinical datasets, while smaller in scale, delivered granular patient-level information crucial for diagnostic accuracy. The Napole&#x00F3;n Franco Pareja Children&#x2019;s Hospital dataset [<xref ref-type="bibr" rid="ref20">20</xref>] included 136 pediatric cases with detailed clinical parameters: fever duration (1&#x2010;21 d), liver enzyme levels (AST 15&#x2010;980 U/L), and urban/rural residence markers. More extensive Brazilian hospital records [<xref ref-type="bibr" rid="ref21">21</xref>] encompassed 4675 cases with serial creatinine measurements (0.2&#x2010;9.8 mg/dL) and household GPS coordinates, though missing 12% of lab results. These datasets typically included three key data categories: (1) clinical biomarkers (serum creatinine, MAT titers 1:50-1:6400), (2) demographic information (age, gender in 89% of studies), and (3) epidemiological risk factors (rodent exposure indices).</p><p>Advanced studies combined multiple data types to overcome individual limitations. Research in China [<xref ref-type="bibr" rid="ref35">35</xref>] integrated 2741 CDC case reports with satellite-derived nighttime light indices (0&#x2010;63 DN values) and river density maps (0&#x2010;5.7 km/km&#x00B2;), achieving exceptional predictive performance (AUC 0.95&#x2010;0.96). Hydrological studies in New Caledonia [<xref ref-type="bibr" rid="ref31">31</xref>] correlated 226 water samples (turbidity 0&#x2010;1,000 NTU, qPCR 10&#x2010;10&#x2076; copies/mL) with 5-minute rainfall events, demonstrating how microenvironmental data enhances transmission understanding. These multimodal approaches compensated for individual dataset constraints through: (1) temporal complementarity (monthly climate + daily case reports), (2) spatial layering (watershed hydrology+ village coordinates), and (3) clinical-environmental linkages (serum markers + livestock density).</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This systematic review of 17 studies (2012&#x2010;2024) shows that while ML and DL techniques achieve promising accuracy (80%&#x2010;98%) in leptospirosis prediction and diagnosis, 3 critical limitations hinder clinical translation: (1) reliance on small, private clinical datasets limiting generalizability; (2) inconsistent validation methods, with only 11.8% employing temporal validation despite seasonality; and (3) underuse of advanced techniques like transfer learning (0% adoption) and data augmentation (5.9% adoption).</p><p>The most effective algorithms varied by task&#x2014;SVR and Random Forest for prediction, versus U-Net CNNs for microscopy-based diagnosis&#x2014;but all models faced challenges related to data quality, sample size, and geographic bias.</p><p><xref ref-type="other" rid="box1">Textbox 1</xref> shows the summary of the strengths and limitations identified in ML and DL studies for leptospirosis prediction and diagnosis.</p><boxed-text id="box1"><title> Summary of strengths and limitations identified in machine learning (ML) and deep learning (DL) studies for leptospirosis prediction and diagnosis.</title><p>Strengths:</p><list list-type="bullet"><list-item><p>High predictive performance (80%&#x2010;98% accuracy).</p></list-item><list-item><p>Variety of ML and DL algorithms applied (eg, random forests, support vector machines, and onvolutional neural networks).</p></list-item><list-item><p>Integration of clinical and environmental data in some studies.</p></list-item><list-item><p>Growing research interest and recent publications.</p></list-item></list><p>Limitations:</p><list list-type="bullet"><list-item><p>Small, private datasets limit generalizability.</p></list-item><list-item><p>Lack of external validation across datasets.</p></list-item><list-item><p>Underuse of transfer learning and ensemble methods.</p></list-item><list-item><p>Inconsistent evaluation metrics (accuracy, area under the curve, and sensitivity).</p></list-item></list></boxed-text><p>While these methods show strong performance (80%&#x2010;98% accuracy in some cases), their real-world applicability remains limited due to dataset constraints, validation inconsistencies, and underuse of advanced techniques such as transfer learning and ensemble learning. Addressing these gaps is essential to improve the robustness and clinical adoption of AI-driven leptospirosis diagnostics.</p><p>The most frequently used ML techniques for prediction tasks were SVR and Random Forest, while ANNs and CNNs, particularly U-Net, were commonly applied for diagnosis. This aligns with the increasing popularity of supervised ML methods in disease prediction.</p><p>Performance was primarily assessed using metrics such as accuracy, sensitivity, specificity, precision, and <italic>F</italic><sub>1</sub>-score, although AUC offers a more comprehensive measure of model performance, especially in binary classification tasks. Hybrid methods often produced better outcomes, with Random Forest and U-Net demonstrating strong accuracy and sensitivity in leptospirosis classification tasks. For instance [<xref ref-type="bibr" rid="ref29">29</xref>], achieved 87% accuracy and 91% sensitivity using Random Forest for diagnosis, suggesting that ensemble techniques could further enhance model performance.</p><p>Despite these advancements, there are still limitations in reaching clinician-level accuracy, especially when dealing with smaller datasets or limited training data. Acceptable performance thresholds varied, with some studies using an AUC score of 0.96 or higher as a benchmark, but this was not universally applied. In addition, most studies relied on private, hospital-based datasets, limiting generalizability. The lack of external validation raises concerns about potential bias, emphasizing the need for public datasets and standardized validation protocols to improve cross-study comparability.</p><p>Cross-validation was the predominant method for evaluating the models, with k-fold or leave-one-out cross-validation being most common. However, due to the relatively small sample sizes in many studies, the conclusions drawn may not be as robust. Split validation (eg, 80:20 or 70:30 splits) was also used, but inconsistencies across studies hindered direct model comparisons. Future work should prioritize establishing standardized protocols to enhance consistency and reliability in ML and DL evaluations.</p><p>One significant finding of this review was the absence of pretrained models. Most studies developed models from scratch, limiting the generalizability and scalability of these models. Transfer learning involves using a pretrained model as a starting point and fine-tuning it for a specific task. It has proven effective in various fields, particularly in image analysis and natural language processing, by significantly improving performance on tasks with limited data.</p><p>The lack of transfer learning in these studies suggests a potential area for future research, as it could enhance the performance of DL models in leptospirosis prediction and diagnosis, especially in cases where training data is limited. Similarly, data augmentation, which helps expand training datasets through transformations (eg, rotations, translations, or noise), was only used in 1 study, highlighting a missed opportunity to improve model robustness.</p></sec><sec id="s4-2"><title>Challenges and Research Gaps in ML and DL Applications for Leptospirosis Prediction and Diagnosis</title><p>While ML and DL have significant potential in leptospirosis research, this review also identified key findings that limit their broad application in clinical and public health settings.</p><sec id="s4-2-1"><title>Challenge 1: Limited Data Availability and Quality</title><p>A major challenge faced by the studies was the limited availability and quality of data. Many datasets, such as the one used in [<xref ref-type="bibr" rid="ref21">21</xref>], had missing or incomplete data, which reduced the accuracy and generalizability of the models. Small sample sizes were a frequent issue as well, as observed in [<xref ref-type="bibr" rid="ref20">20</xref>], where a dataset of only 136 patients resulted in high variability in model performance, especially for underrepresented cases like leptospirosis. Retrospective data collection posed further challenges, leading to biases in model training and evaluation, as noted by Shenoy et al [<xref ref-type="bibr" rid="ref29">29</xref>].</p></sec><sec id="s4-2-2"><title>Challenge 2: Generalizability and Regional Bias</title><p>Several studies, such as [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref33">33</xref>], identified biases in data collection, such as under- and over-reporting, regional disparities, and sampling biases, which hindered the broader applicability of the models. In addition, studies like [<xref ref-type="bibr" rid="ref35">35</xref>] noted issues with the spatial resolution of environmental data, affecting the precision of fine-scale risk mapping. Moreover, the lack of external validation across different regions and datasets, as seen in studies like [<xref ref-type="bibr" rid="ref10">10</xref>], increased the risk of overfitting and limited the broader applicability of findings.</p><p>The review showed inconsistent performance across different ML and DL algorithms. For example, models like SVM, ANN, and CNNs performed well, with accuracy reaching up to 98% [<xref ref-type="bibr" rid="ref34">34</xref>]. However, advanced DL architectures like ResNet, Inception, and VGG were rarely used [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. Similarly, none of the studies applied transfer learning, a technique that could enhance performance, particularly when data is scarce [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. Beyond data related challenges, there are also limitations in the ML and DL techniques currently applied to leptospirosis diagnosis, as discussed next.</p></sec><sec id="s4-2-3"><title>Challenge 3: Underuse of Advanced Techniques</title><p>Advanced ensemble techniques, such as XGBoost and Adaboost, were notably underused [<xref ref-type="bibr" rid="ref26">26</xref>]. While Random Forest models and U-Net architectures performed well in specific tasks [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref34">34</xref>], ensemble methods could provide better predictive power when combining ML and DL approaches. The review also highlighted the minimal use of data augmentation techniques, which could help address the small sample size issues observed in many studies [<xref ref-type="bibr" rid="ref34">34</xref>].</p></sec></sec><sec id="s4-3"><title>Limitations of This Systematic Review</title><p>While this systematic review provides valuable insights, it has certain limitations. The heterogeneity of study designs, dataset sizes, and performance metrics precluded a meta-analysis, limiting our ability to provide a standardized comparison of model performances. In addition, the reliance on published studies may have introduced publication bias, as studies with less favorable results may have remained unpublished. Future systematic reviews should aim to standardize reporting metrics and ensure broader dataset accessibility to improve comparability across studies.</p></sec><sec id="s4-4"><title>Recommendations for Future Research</title><p>Based on these findings, future research in ML and DL applications for leptospirosis should focus on the following areas:</p><list list-type="bullet"><list-item><p>Integration of advanced DL techniques: future studies should explore the potential of advanced DL architectures, such as ResNet and Inception, which are known to improve predictive performance, especially in image-based analysis.</p></list-item><list-item><p>Leveraging pretrained models and transfer learning: research should investigate how pretrained models can be fine-tuned for leptospirosis applications, particularly in data-limited scenarios.</p></list-item><list-item><p>Use of ensemble and hybrid approaches: advanced ensemble techniques like XGBoost and hybrid ML-DL approaches should be explored to improve model accuracy and robustness.</p></list-item><list-item><p>Broader geographic representation: most studies focused on regions like Southeast Asia and Brazil, with limited research in other high-risk areas like Africa and Central America. Expanding research to these regions will improve model generalizability.</p></list-item></list><p>The primary goal moving forward is to aggregate a comprehensive dataset from diverse sources and develop a robust data library to enhance the accuracy and reliability of leptospirosis prediction models. Given the heterogeneity of data features across different studies&#x2014;ranging from clinical records to environmental data&#x2014;the focus will be on standardizing and harmonizing these features for better model integration. By consolidating larger and more varied datasets, we aim to improve model generalization and tackle current challenges related to small sample sizes and overfitting. This unified dataset will serve as a foundation for applying advanced techniques, such as transfer learning and ensemble methods, to further enhance the predictive power of ML and DL models in leptospirosis detection.</p></sec><sec id="s4-5"><title>Conclusion</title><p>This systematic review examined ML and DL techniques for leptospirosis prediction and diagnosis by analyzing algorithm performance, evaluation methods, and challenges. While models such as SVM, ANN, decision trees, and CNNs have shown strong predictive power, most studies have relied on private hospital-based datasets, limiting generalizability.</p><p>A key reason for the predominance of private datasets is that they often include detailed patient-level clinical information (eg, laboratory values, comorbidities, and symptoms) essential for developing diagnostic models. In contrast, available public datasets mainly provide aggregated epidemiological or environmental data, which, while valuable for outbreak prediction, lack the granular patient-specific features necessary for individual diagnosis. As a result, limited use of public datasets reflects the inherent constraints in the nature and detail of publicly available data, rather than a preference by researchers.</p><p>Furthermore, the lack of advanced techniques like transfer learning and ensemble methods remains a concern, along with small sample sizes and inconsistent validation protocols. Overall, while significant progress has been made, there is considerable potential to improve the accuracy and generalizability of leptospirosis prediction models by integrating more comprehensive datasets and adopting advanced AI methodologies in future research.</p></sec></sec></body><back><fn-group><fn fn-type="con"><p>SS led and conducted this study and contributed to the conceptualization, data extraction, analysis, and manuscript preparation and submission. AJ assisted with all steps, including data extraction, analysis, and manuscript preparation. BR contributed to data extraction, analysis, and provided critical input throughout the study.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviation</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">ANN</term><def><p>artificial neural network</p></def></def-item><def-item><term id="abb3">AUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb4">CHARMS</term><def><p>Critical Appraisal and Data Extraction for Systematic Reviews of Prediction Modelling Studies</p></def></def-item><def-item><term id="abb5">CNN</term><def><p>convolutional neural network</p></def></def-item><def-item><term id="abb6">DL</term><def><p>deep learning</p></def></def-item><def-item><term id="abb7">DT</term><def><p>decision tree</p></def></def-item><def-item><term id="abb8">KNN</term><def><p>k-nearest neighbor</p></def></def-item><def-item><term id="abb9">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb10">MAE</term><def><p>mean absolute error</p></def></def-item><def-item><term id="abb11">MAT</term><def><p>microscopic agglutination test</p></def></def-item><def-item><term id="abb12">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb13">MLP</term><def><p>multilayer perceptron</p></def></def-item><def-item><term id="abb14">MSE</term><def><p>mean squared error</p></def></def-item><def-item><term id="abb15">PCR</term><def><p>polymerase chain reaction</p></def></def-item><def-item><term id="abb16">PRISMA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p></def></def-item><def-item><term id="abb17">PROBAST</term><def><p>Prediction model Risk of Bias Assessment Tool</p></def></def-item><def-item><term id="abb18">RDT</term><def><p>rapid diagnostic test</p></def></def-item><def-item><term id="abb19">RMSE</term><def><p>root mean squared error</p></def></def-item><def-item><term id="abb20">SVM</term><def><p>support vector machine</p></def></def-item><def-item><term id="abb21">SVR</term><def><p>support vector regression</p></def></def-item><def-item><term id="abb22">TAN</term><def><p>tree augmented naive</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>Leptospirosis: fact sheet</article-title><source>World Health Organization</source><year>2009</year><access-date>2024-08-09</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/publications/i/item/B4221">https://www.who.int/publications/i/item/B4221</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="web"><article-title>About leptospirosis</article-title><source>Centers for Disease Control and Prevention</source><year>2024</year><access-date>2024-08-09</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/leptospirosis/about/index.html">https://www.cdc.gov/leptospirosis/about/index.html</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Valente</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bramugy</surname><given-names>J</given-names> </name><name name-style="western"><surname>Keddie</surname><given-names>SH</given-names> </name><etal/></person-group><article-title>Diagnosis of human leptospirosis: systematic review and meta-analysis of the diagnostic accuracy of the Leptospira microscopic agglutination test, PCR targeting LFB1, and IGM ELISA to Leptospira fainei serovar Hurstbridge</article-title><source>BMC Infect Dis</source><year>2024</year><month>02</month><day>7</day><volume>24</volume><issue>1</issue><fpage>168</fpage><pub-id pub-id-type="doi">10.1186/s12879-023-08935-0</pub-id><pub-id pub-id-type="medline">38326762</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ching</surname><given-names>T</given-names> </name><name name-style="western"><surname>Himmelstein</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Beaulieu-Jones</surname><given-names>BK</given-names> </name><etal/></person-group><article-title>Opportunities and obstacles for deep learning in biology and medicine</article-title><source>J R Soc Interface</source><year>2018</year><month>04</month><volume>15</volume><issue>141</issue><fpage>20170387</fpage><pub-id pub-id-type="doi">10.1098/rsif.2017.0387</pub-id><pub-id pub-id-type="medline">29618526</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Esteva</surname><given-names>A</given-names> </name><name name-style="western"><surname>Robicquet</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ramsundar</surname><given-names>B</given-names> </name><etal/></person-group><article-title>A guide to deep learning in healthcare</article-title><source>Nat Med</source><year>2019</year><month>01</month><volume>25</volume><issue>1</issue><fpage>24</fpage><lpage>29</lpage><pub-id pub-id-type="doi">10.1038/s41591-018-0316-z</pub-id><pub-id pub-id-type="medline">30617335</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Topol</surname><given-names>EJ</given-names> </name></person-group><article-title>High-performance medicine: the convergence of human and artificial intelligence</article-title><source>Nat Med</source><year>2019</year><month>01</month><volume>25</volume><issue>1</issue><fpage>44</fpage><lpage>56</lpage><pub-id pub-id-type="doi">10.1038/s41591-018-0300-7</pub-id><pub-id pub-id-type="medline">30617339</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>LeCun</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bengio</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Hinton</surname><given-names>G</given-names> </name></person-group><article-title>Deep learning</article-title><source>Nature New Biol</source><year>2015</year><month>05</month><day>28</day><volume>521</volume><issue>7553</issue><fpage>436</fpage><lpage>444</lpage><pub-id pub-id-type="doi">10.1038/nature14539</pub-id><pub-id pub-id-type="medline">26017442</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Litjens</surname><given-names>G</given-names> </name><name name-style="western"><surname>Kooi</surname><given-names>T</given-names> </name><name name-style="western"><surname>Bejnordi</surname><given-names>BE</given-names> </name><etal/></person-group><article-title>A survey on deep learning in medical image analysis</article-title><source>Med Image Anal</source><year>2017</year><month>12</month><volume>42</volume><fpage>60</fpage><lpage>88</lpage><pub-id pub-id-type="doi">10.1016/j.media.2017.07.005</pub-id><pub-id pub-id-type="medline">28778026</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sarvamangala</surname><given-names>DR</given-names> </name><name name-style="western"><surname>Kulkarni</surname><given-names>RV</given-names> </name></person-group><article-title>Convolutional neural networks in medical image understanding: a survey</article-title><source>Evol Intell</source><year>2022</year><volume>15</volume><issue>1</issue><fpage>1</fpage><lpage>22</lpage><pub-id pub-id-type="doi">10.1007/s12065-020-00540-3</pub-id><pub-id pub-id-type="medline">33425040</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Galdino</surname><given-names>GS</given-names> </name><name name-style="western"><surname>de Sandes-Freitas</surname><given-names>TV</given-names> </name><name name-style="western"><surname>de Andrade</surname><given-names>LGM</given-names> </name><etal/></person-group><article-title>Development and validation of a simple machine learning tool to predict mortality in leptospirosis</article-title><source>Sci Rep</source><year>2023</year><month>03</month><day>18</day><volume>13</volume><issue>1</issue><fpage>4506</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-31707-4</pub-id><pub-id pub-id-type="medline">36934135</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Angermueller</surname><given-names>C</given-names> </name><name name-style="western"><surname>P&#x00E4;rnamaa</surname><given-names>T</given-names> </name><name name-style="western"><surname>Parts</surname><given-names>L</given-names> </name><name name-style="western"><surname>Stegle</surname><given-names>O</given-names> </name></person-group><article-title>Deep learning for computational biology</article-title><source>Mol Syst Biol</source><year>2016</year><month>07</month><day>29</day><volume>12</volume><issue>7</issue><fpage>878</fpage><pub-id pub-id-type="doi">10.15252/msb.20156651</pub-id><pub-id pub-id-type="medline">27474269</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moher</surname><given-names>D</given-names> </name><name name-style="western"><surname>Liberati</surname><given-names>A</given-names> </name><name name-style="western"><surname>Tetzlaff</surname><given-names>J</given-names> </name><name name-style="western"><surname>Altman</surname><given-names>DG</given-names> </name><collab>PRISMA Group</collab></person-group><article-title>Preferred reporting items for systematic reviews and meta-analyses: the PRISMA statement</article-title><source>PLoS Med</source><year>2009</year><month>07</month><day>21</day><volume>6</volume><issue>7</issue><fpage>e1000097</fpage><pub-id pub-id-type="doi">10.1371/journal.pmed.1000097</pub-id><pub-id pub-id-type="medline">19621072</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name><name name-style="western"><surname>de Groot</surname><given-names>JAH</given-names> </name><name name-style="western"><surname>Bouwmeester</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Critical appraisal and data extraction for systematic reviews of prediction modelling studies: the CHARMS checklist</article-title><source>PLoS Med</source><year>2014</year><month>10</month><volume>11</volume><issue>10</issue><fpage>e1001744</fpage><pub-id pub-id-type="doi">10.1371/journal.pmed.1001744</pub-id><pub-id pub-id-type="medline">25314315</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Christodoulou</surname><given-names>E</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>J</given-names> </name><name name-style="western"><surname>Collins</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Steyerberg</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Verbakel</surname><given-names>JY</given-names> </name><name name-style="western"><surname>Van Calster</surname><given-names>B</given-names> </name></person-group><article-title>A systematic review shows no performance benefit of machine learning over logistic regression for clinical prediction models</article-title><source>J Clin Epidemiol</source><year>2019</year><month>06</month><volume>110</volume><fpage>12</fpage><lpage>22</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2019.02.004</pub-id><pub-id pub-id-type="medline">30763612</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Landis</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Koch</surname><given-names>GG</given-names> </name></person-group><article-title>The measurement of observer agreement for categorical data</article-title><source>Biometrics</source><year>1977</year><month>03</month><volume>33</volume><issue>1</issue><fpage>159</fpage><lpage>174</lpage><pub-id pub-id-type="doi">10.2307/2529310</pub-id><pub-id pub-id-type="medline">843571</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McHugh</surname><given-names>ML</given-names> </name></person-group><article-title>Interrater reliability: the kappa statistic</article-title><source>Biochem Med (Zagreb)</source><year>2012</year><volume>22</volume><issue>3</issue><fpage>276</fpage><lpage>282</lpage><pub-id pub-id-type="doi">10.11613/bm.2012.031</pub-id><pub-id pub-id-type="medline">23092060</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>He</surname><given-names>D</given-names> </name></person-group><article-title>The potential applications and challenges of ChatGPT in the medical field</article-title><source>Int J Gen Med</source><year>2024</year><volume>17</volume><fpage>817</fpage><lpage>826</lpage><pub-id pub-id-type="doi">10.2147/IJGM.S456659</pub-id><pub-id pub-id-type="medline">38476626</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Miao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Thongprayoon</surname><given-names>C</given-names> </name><name name-style="western"><surname>F&#x00FC;l&#x00F6;p</surname><given-names>T</given-names> </name><name name-style="western"><surname>Cheungpasitporn</surname><given-names>W</given-names> </name></person-group><article-title>Enhancing clinical decision&#x2010;making: optimizing ChatGPT&#x2019;s performance in hypertension care</article-title><source>J Clinical Hypertension</source><year>2024</year><month>05</month><volume>26</volume><issue>5</issue><fpage>588</fpage><lpage>593</lpage><pub-id pub-id-type="doi">10.1111/jch.14822</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Collins</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Reitsma</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Altman</surname><given-names>DG</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name></person-group><article-title>Transparent reporting of a multivariable prediction model for individual prognosis or diagnosis (TRIPOD): the TRIPOD statement</article-title><source>Ann Intern Med</source><year>2015</year><month>01</month><day>6</day><volume>162</volume><issue>1</issue><fpage>55</fpage><lpage>63</lpage><pub-id pub-id-type="doi">10.7326/M14-0697</pub-id><pub-id pub-id-type="medline">25560714</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Caicedo Torres</surname><given-names>W</given-names> </name><name name-style="western"><surname>Quintana</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pinz&#x00F3;n</surname><given-names>H</given-names> </name></person-group><article-title>Differential diagnosis of hemorrhagic fevers using ARTMAP and an Artificial Immune System</article-title><source>Int J Artif Intell</source><year>2013</year><access-date>2025-05-22</access-date><volume>11</volume><issue>13 A</issue><fpage>150</fpage><lpage>169</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.scopus.com/inward/record.uri?eid=2-s2.0-84879759875&#x0026;partnerID=40&#x0026;md5=e126f6afb294df43dee6cf023435ca5a">https://www.scopus.com/inward/record.uri?eid=2-s2.0-84879759875&#x0026;partnerID=40&#x0026;md5=e126f6afb294df43dee6cf023435ca5a</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Nery</surname><given-names>NRR</given-names> </name><name name-style="western"><surname>Claro</surname><given-names>DB</given-names> </name><name name-style="western"><surname>Lindow</surname><given-names>JC</given-names> </name></person-group><article-title>Classification model analysis for the prediction of leptospirosis cases</article-title><conf-name>2016 11th Iberian Conference on Information Systems and Technologies (CISTI)</conf-name><conf-date>Jun 15-18, 2016</conf-date><conf-loc>Gran Canaria, Spain</conf-loc><fpage>1</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1109/CISTI.2016.7521494</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nery</surname><given-names>NRR</given-names>  <suffix>Jr</suffix></name><name name-style="western"><surname>Claro</surname><given-names>DB</given-names> </name><name name-style="western"><surname>Lindow</surname><given-names>JC</given-names> </name></person-group><article-title>Prediction of leptospirosis cases using classification algorithms</article-title><source>IET softw</source><year>2017</year><month>06</month><volume>11</volume><issue>3</issue><fpage>93</fpage><lpage>99</lpage><pub-id pub-id-type="doi">10.1049/iet-sen.2016.0193</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lopez</surname><given-names>DM</given-names> </name><name name-style="western"><surname>de Mello</surname><given-names>FL</given-names> </name><name name-style="western"><surname>Giordano Dias</surname><given-names>CM</given-names> </name><etal/></person-group><article-title>Evaluating the surveillance system for spotted fever in Brazil using machine-learning techniques</article-title><source>Front Public Health</source><year>2017</year><volume>5</volume><fpage>323</fpage><pub-id pub-id-type="doi">10.3389/fpubh.2017.00323</pub-id><pub-id pub-id-type="medline">29250519</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mayfield</surname><given-names>HJ</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>CS</given-names> </name><name name-style="western"><surname>Lowry</surname><given-names>JH</given-names> </name><etal/></person-group><article-title>Predictive risk mapping of an environmentally-driven infectious disease using spatial Bayesian networks: a case study of leptospirosis in Fiji</article-title><source>PLOS Negl Trop Dis</source><year>2018</year><month>10</month><volume>12</volume><issue>10</issue><fpage>e0006857</fpage><pub-id pub-id-type="doi">10.1371/journal.pntd.0006857</pub-id><pub-id pub-id-type="medline">30307936</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sonthayanon</surname><given-names>P</given-names> </name><name name-style="western"><surname>Jaresitthikunchai</surname><given-names>J</given-names> </name><name name-style="western"><surname>Mangmee</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Whole cell matrix assisted laser desorption/ionization time-of-flight mass spectrometry (MALDI-TOF MS) for identification of Leptospira spp. in Thailand and Lao PDR</article-title><source>PLOS Negl Trop Dis</source><year>2019</year><month>04</month><volume>13</volume><issue>4</issue><fpage>e0007232</fpage><pub-id pub-id-type="doi">10.1371/journal.pntd.0007232</pub-id><pub-id pub-id-type="medline">30969958</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ahangarcani</surname><given-names>M</given-names> </name><name name-style="western"><surname>Farnaghi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Shirzadi</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Pilesj&#x00F6;</surname><given-names>P</given-names> </name><name name-style="western"><surname>Mansourian</surname><given-names>A</given-names> </name></person-group><article-title>Predictive risk mapping of human leptospirosis using support vector machine classification and multilayer perceptron neural network</article-title><source>Geospat Health</source><year>2019</year><month>05</month><day>14</day><volume>14</volume><issue>1</issue><pub-id pub-id-type="doi">10.4081/gh.2019.711</pub-id><pub-id pub-id-type="medline">31099515</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mohammadinia</surname><given-names>A</given-names> </name><name name-style="western"><surname>Saeidian</surname><given-names>B</given-names> </name><name name-style="western"><surname>Pradhan</surname><given-names>B</given-names> </name><name name-style="western"><surname>Ghaemi</surname><given-names>Z</given-names> </name></person-group><article-title>Prediction mapping of human leptospirosis using ANN, GWR, SVM and GLM approaches</article-title><source>BMC Infect Dis</source><year>2019</year><month>11</month><day>13</day><volume>19</volume><issue>1</issue><fpage>971</fpage><pub-id pub-id-type="doi">10.1186/s12879-019-4580-4</pub-id><pub-id pub-id-type="medline">31722676</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rahmat</surname><given-names>F</given-names> </name><name name-style="western"><surname>Zulkafli</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Juraiza Ishak</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mohd Noor</surname><given-names>SB</given-names> </name><name name-style="western"><surname>Yahaya</surname><given-names>H</given-names> </name><name name-style="western"><surname>Masrani</surname><given-names>A</given-names> </name></person-group><article-title>Exploratory data analysis and artificial neural network for prediction of leptospirosis occurrence in Seremban, Malaysia based on meteorological data</article-title><source>Front Earth Sci</source><year>2020</year><volume>8</volume><pub-id pub-id-type="doi">10.3389/feart.2020.00377</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shenoy</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rajan</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Rashid</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Artificial intelligence in differentiating tropical infections: a step ahead</article-title><source>PLOS Negl Trop Dis</source><year>2022</year><month>06</month><volume>16</volume><issue>6</issue><fpage>e0010455</fpage><pub-id pub-id-type="doi">10.1371/journal.pntd.0010455</pub-id><pub-id pub-id-type="medline">35771774</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jayaramu</surname><given-names>V</given-names> </name><name name-style="western"><surname>Zulkafli</surname><given-names>Z</given-names> </name><name name-style="western"><surname>De Stercke</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Leptospirosis modelling using hydrometeorological indices and random forest machine learning</article-title><source>Int J Biometeorol</source><year>2023</year><month>03</month><volume>67</volume><issue>3</issue><fpage>423</fpage><lpage>437</lpage><pub-id pub-id-type="doi">10.1007/s00484-022-02422-y</pub-id><pub-id pub-id-type="medline">36719482</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thibeaux</surname><given-names>R</given-names> </name><name name-style="western"><surname>Genthon</surname><given-names>P</given-names> </name><name name-style="western"><surname>Govan</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Rainfall-driven resuspension of pathogenic Leptospira in a leptospirosis hotspot</article-title><source>Sci Total Environ</source><year>2024</year><month>02</month><day>10</day><volume>911</volume><fpage>168700</fpage><pub-id pub-id-type="doi">10.1016/j.scitotenv.2023.168700</pub-id><pub-id pub-id-type="medline">37992819</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Douchet</surname><given-names>L</given-names> </name><name name-style="western"><surname>Menkes</surname><given-names>C</given-names> </name><name name-style="western"><surname>Herbreteau</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Climate-driven models of leptospirosis dynamics in tropical islands from three oceanic basins</article-title><source>PLOS Negl Trop Dis</source><year>2024</year><month>04</month><volume>18</volume><issue>4</issue><fpage>e0011717</fpage><pub-id pub-id-type="doi">10.1371/journal.pntd.0011717</pub-id><pub-id pub-id-type="medline">38662800</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Douchet</surname><given-names>L</given-names> </name><name name-style="western"><surname>Goarant</surname><given-names>C</given-names> </name><name name-style="western"><surname>Mangeas</surname><given-names>M</given-names> </name><name name-style="western"><surname>Menkes</surname><given-names>C</given-names> </name><name name-style="western"><surname>Hinjoy</surname><given-names>S</given-names> </name><name name-style="western"><surname>Herbreteau</surname><given-names>V</given-names> </name></person-group><article-title>Unraveling the invisible leptospirosis in mainland Southeast Asia and its fate under climate change</article-title><source>Sci Total Environ</source><year>2022</year><month>08</month><day>1</day><volume>832</volume><fpage>155018</fpage><pub-id pub-id-type="doi">10.1016/j.scitotenv.2022.155018</pub-id><pub-id pub-id-type="medline">35390383</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kulkarni</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sai Dinesh Reddy</surname><given-names>P</given-names> </name><name name-style="western"><surname>Bassi</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kashyap</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Vijayalakshmi</surname><given-names>M</given-names> </name></person-group><article-title>Spirochaeta bacteria detection using an effective semantic segmentation technique</article-title><source>Advances in Intelligent Systems and Computing</source><year>2022</year><fpage>355</fpage><lpage>365</lpage><pub-id pub-id-type="doi">10.1007/978-981-16-9573-5_26</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Liao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Mapping risk of leptospirosis in China using environmental and socioeconomic data</article-title><source>BMC Infect Dis</source><year>2016</year><month>07</month><day>22</day><volume>16</volume><issue>1</issue><fpage>343</fpage><pub-id pub-id-type="doi">10.1186/s12879-016-1653-5</pub-id><pub-id pub-id-type="medline">27448599</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Jadhav</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sawesi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rashrash</surname><given-names>B</given-names> </name></person-group><article-title>Bias and generalizability challenges in machine learning models for leptospirosis</article-title><conf-name>2024 IEEE International Conference on Big Data (BigData)</conf-name><conf-date>Dec 15-18, 2024</conf-date><conf-loc>Washington, DC, USA</conf-loc><fpage>4989</fpage><lpage>4995</lpage><pub-id pub-id-type="doi">10.1109/BigData62323.2024.10825588</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Search terms.</p><media xlink:href="medinform_v13i1e67859_app1.docx" xlink:title="DOCX File, 14 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Agreement between human reviewers and ChatGPT-4o in study screening.</p><media xlink:href="medinform_v13i1e67859_app2.docx" xlink:title="DOCX File, 74 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Quality assessment.</p><media xlink:href="medinform_v13i1e67859_app3.docx" xlink:title="DOCX File, 18 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Characteristics of included studies.</p><media xlink:href="medinform_v13i1e67859_app4.docx" xlink:title="DOCX File, 35 KB"/></supplementary-material><supplementary-material id="app5"><label>Checklist 1</label><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) checklist, detailing the sections, topics, checklist items, and their corresponding locations within the review.</p><media xlink:href="medinform_v13i1e67859_app5.docx" xlink:title="DOCX File, 24 KB"/></supplementary-material></app-group></back></article>