<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v14i1e78770</article-id><article-id pub-id-type="doi">10.2196/78770</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Mortality Prediction Among People Living With HIV on Antiretroviral Therapy in Public Health Facilities in Gondar City Administration, Northwest Ethiopia: Machine Learning&#x2013;Based Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Gedefaw</surname><given-names>Andualem Enyew</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Biwota</surname><given-names>Getaye Tizazu</given-names></name><degrees>MPH</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Mesele</surname><given-names>Abraraw Gebre</given-names></name><degrees>MPH</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Mengistu</surname><given-names>Abraham Keffale</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Teferi</surname><given-names>Gizaw Hailiye</given-names></name><degrees>MPH</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Health Informatics, Institute of Public Health, College of Medicine and Health Sciences, University of Gondar</institution><addr-line>P.O. Box 196, Hospita</addr-line><addr-line>Gondar</addr-line><country>Ethiopia</country></aff><aff id="aff2"><institution>Department of Health Informatics, College of Medicine and Health Sciences, Debre Markos University</institution><addr-line>Debre Markos</addr-line><country>Ethiopia</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Mpofu</surname><given-names>Rephaim</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Wei</surname><given-names>Wu Di</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Andualem Enyew Gedefaw, MSc, Department of Health Informatics, Institute of Public Health, College of Medicine and Health Sciences, University of Gondar, P.O. Box 196, Hospita, Gondar, Ethiopia, 251 918356802; <email>andualemenyew@gmail.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>20</day><month>4</month><year>2026</year></pub-date><volume>14</volume><elocation-id>e78770</elocation-id><history><date date-type="received"><day>09</day><month>06</month><year>2025</year></date><date date-type="rev-recd"><day>02</day><month>03</month><year>2026</year></date><date date-type="accepted"><day>12</day><month>03</month><year>2026</year></date></history><copyright-statement>&#x00A9; Andualem Enyew Gedefaw, Getaye Tizazu Biwota, Abraraw Gebre Mesele, Abraham Keffale Mengistu, Gizaw Hailiye Teferi. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 20.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2026/1/e78770"/><abstract><sec><title>Background</title><p>Predicting mortality among people living with HIV enables clinicians to implement timely, targeted, and preventive interventions at the start of antiretroviral therapy (ART). However, prognostic models must rely strictly on baseline predictors to avoid look-ahead bias and ensure scientific validity. This study evaluates machine-learning (ML) algorithms for baseline mortality prediction using routine electronic medical record data.</p></sec><sec><title>Objective</title><p>This study aims to predict mortality among people living with HIV receiving ART using baseline clinical and sociodemographic characteristics through ML models in public health facilities of Gondar City Administration, Northwest Ethiopia.</p></sec><sec sec-type="methods"><title>Methods</title><p>The retrospective cohort study was conducted using electronic medical record data from 12,871 people living with HIV on ART (2005&#x2010;2024). Seven base classifiers were evaluated using stratified 10-fold cross-validation. Synthetic minority oversampling technique (SMOTE)&#x2013;balanced variants were used only for sensitivity analysis. SMOTE oversampling was applied only to training folds; the final evaluation used the original imbalanced test data. Shapley Additive Explanations (SHAP) analysis identified key baseline predictors.</p></sec><sec sec-type="results"><title>Results</title><p>Gradient boosting on the original data achieved superior performance (accuracy 87.0%, <italic>F</italic><sub>1</sub>-score 0.619, area under the receiver operating characteristic curve 0.859), outperforming extreme gradient boosting (<italic>F</italic><sub>1</sub>-score 0.609, area under the receiver operating characteristic curve 0.835) and SMOTE variants. The SHAP analysis identified education level, lack of formal education (+0.84), and a low baseline cluster of differentiation 4 (CD4; a type of immune cell count) count of 140 cells/mm&#x00B3; (+0.54) as substantially increasing predicted mortality risk. Urban residence (&#x2212;0.35) and working functional status (&#x2212;0.12) showed protective effects, whereas age (45 y; &#x2212;0.02) had minimal influence in the illustrated case. Globally, lower CD4 counts and the absence of formal education were consistently associated with increased mortality risk.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Ensemble ML models demonstrated moderate-to-strong discrimination for predicting mortality among people living with HIV using strictly baseline routine electronic medical record data. SHAP-based interpretability confirmed that educational attainment and baseline CD4 count were the dominant determinants of predicted mortality risk, underscoring the combined influence of socioeconomic vulnerability and immunological status at ART initiation. These findings support the potential utility of interpretable ML models for early risk stratification and targeted clinical decision-making in resource-limited settings; however, external validation is required before routine clinical implementation.</p></sec></abstract><kwd-group><kwd>HIV mortality prediction</kwd><kwd>machine learning</kwd><kwd>ensemble algorithms</kwd><kwd>Shapley Additive Explanations</kwd><kwd>SHAP analysis</kwd><kwd>antiretroviral therapy</kwd><kwd>baseline predictors</kwd><kwd>electronic medical records</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Recent advances in machine learning (ML) have enabled the development of predictive models using routinely collected electronic medical record (EMR) data to support clinical decision-making and risk stratification. Several studies demonstrated the feasibility of applying ML algorithms, such as logistic regression, random forest, gradient boosting, and extreme gradient boosting (XGBoost), to predict mortality and other adverse outcomes across diverse clinical settings using EMR or electronic health record data. Systematic reviews highlighted that ML-based mortality prediction models often outperform traditional statistical approaches, while also emphasizing the importance of appropriate validation strategies and the prevention of data leakage through baseline-only predictors [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. EMR-based ML models have been successfully applied to predict short-term and in-hospital mortality among patients with conditions, such as cancer, heart failure, and COVID-19, using structured clinical and laboratory variables [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. These studies underscore the growing role of explainable artificial intelligence (XAI) methods, such as SHAP (Shapley Additive Explanations), in improving the interpretability and clinical relevance of complex ML models, which is a key consideration for adoption in real-world health care settings [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>HIV remains a major public health concern globally and in sub-Saharan Africa, where mortality among people living with HIV persists despite widespread ART coverage. Since the early 2000s, when the global HIV epidemic peaked, new infections and HIV-related deaths have declined. Expanded antiretroviral therapy (ART) coverage has reduced HIV-related mortality by almost 47%, from 2.1 million in 2004 to 630,000 in 2023 [<xref ref-type="bibr" rid="ref8">8</xref>]. However, the epidemic remains a significant global health issue, especially in regions with limited access to preventive, diagnostic, and treatment services. Currently, an estimated 39.9 million individuals are living with HIV, according to the UNAIDS (Joint United Nations Program on HIV/AIDS) 2024 Global HIV and AIDS Statistics Report.</p><p>Sub-Saharan Africa bears the highest global HIV burden, accounting for approximately 67% of HIV cases and over 60% of HIV-related deaths [<xref ref-type="bibr" rid="ref8">8</xref>]. Southern and Eastern African nations have made progress in reducing mortality and increasing access to ART. However, challenges, such as poverty, stigma, violence, and health care inequality, hinder efforts to decrease HIV-related fatalities. In 2023, approximately 390,000 Africans died from HIV-related causes. While ART rollouts have significantly reduced these numbers, the disease remains a serious public health concern.</p><p>Ethiopia has one of the largest HIV epidemics in East Africa, with approximately 610,000 people living with HIV in 2023 [<xref ref-type="bibr" rid="ref2">2</xref>]. Despite improvements in health care infrastructure and ART coverage, significant challenges persist, especially for individuals with advanced HIV or treatment failure. In 2023, Ethiopia recorded approximately 10,000 HIV-related deaths, with rural areas facing higher mortality due to limited health care access [<xref ref-type="bibr" rid="ref9">9</xref>]. HIV-related mortality remains high in sub-Saharan Africa, including Ethiopia, even though ART has transformed HIV into a chronic, manageable condition. Key contributors include late ART initiation, poor adherence, medication resistance, and virological failure, which prevent effective viral suppression [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. Virological failure accelerates disease progression and increases the risk of AIDS-related death. Reducing mortality among people living with HIV requires early diagnosis, improved ART regimens, and better patient monitoring.</p><p>A 2022 study at the University of Gondar Comprehensive and Specialized Hospital reported a virological failure rate of 14% among people living with HIV on ART [<xref ref-type="bibr" rid="ref12">12</xref>]. This highlights the urgent need for predictive tools to identify high-risk patients early and improve clinical outcomes. Mortality prediction plays a crucial role in health care, epidemiology, insurance, and policymaking. It helps health care providers identify high-risk individuals, enabling early interventions and personalized treatments that improve patient outcomes [<xref ref-type="bibr" rid="ref13">13</xref>]. It also supports epidemiological research by analyzing disease patterns and assessing intervention effectiveness [<xref ref-type="bibr" rid="ref8">8</xref>. Advanced methodologies, such as statistical models, ML, and deep learning, have significantly improved the accuracy and applicability of mortality predictions [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>].</p><p>ML offers a data-driven solution for predicting mortality risk in people living with HIV on ART. ML algorithms analyze large datasets to detect complex patterns, risk factors, and interactions that traditional statistical methods may overlook [<xref ref-type="bibr" rid="ref16">16</xref>]. This study develops interpretable ML models using strictly baseline predictors (recorded at ART initiation) from 12,871 people living with HIV EMRs to enable prospective mortality risk stratification. There is a pressing need to implement advanced predictive tools, such as ML models, to identify individuals at greater risk of treatment failure or death [<xref ref-type="bibr" rid="ref17">17</xref>]. These tools can enable health care providers to take proactive measures, improve treatment outcomes, and reduce HIV-related mortality in Ethiopia.</p><p>This study aimed to develop and validate ensemble ML models predicting mortality among people living with HIV on ART in Gondar, Ethiopia, using baseline-only EMR data, with the SHAP analysis to identify clinically actionable risk factors. Several studies in Africa and Asia have applied ML for HIV-related outcomes, including mortality and treatment interruption. A meta-analysis across 24 studies found that ML models achieved a C-index of ~0.83 for HIV mortality prediction [<xref ref-type="bibr" rid="ref18">18</xref>]. In China, an XGBoost and random forest ensemble achieved an area under the receiver operating characteristic (ROC) curve (AUC) of ~0.98 for in-hospital mortality among patients with HIV/AIDS with cryptococcal infection [<xref ref-type="bibr" rid="ref19">19</xref>]. In Nigeria, a large EMR-based ML study of 41,394 people living with HIV reported an AUC of ~0.8 for treatment interruption [<xref ref-type="bibr" rid="ref20">20</xref>]. However, these studies often used hospital-based cohorts or follow-up predictors, focused on treatment interruption or virological failure rather than long-term mortality, and lacked comprehensive interpretability analyses. Our study addresses these gaps through large-scale public health facility EMR data (n=12,871), strict baseline-only prediction, rigorous temporal validation, and SHAP-based clinical interpretability.</p><p>Traditional methods of monitoring and predicting patient outcomes rely on periodic clinical assessments, laboratory tests, and physician judgment. These approaches often detect complications only after symptoms have worsened, leading to delayed interventions that may not prevent adverse outcomes. Additionally, reliance on manual record keeping and retrospective data analysis limits the ability to identify high-risk patients early, reducing the effectiveness of timely medical interventions. In resource-constrained settings, such as the Gondar City Administration, integrating ML models could improve health care delivery, optimize resource distribution, and enhance proactive patient management [<xref ref-type="bibr" rid="ref21">21</xref>]. This approach aligns with Ethiopia&#x2019;s national HIV/AIDS control strategy and global health goals aimed at reducing HIV-related mortality.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Area and Period</title><p>Data were extracted from the ART EMRs covering the period from 2005 to 2024. Only patients on ART for &#x2265;6 months before analysis were included to ensure baseline predictor availability.</p><p>This study was conducted at public health facilities in Gondar City Administration (Amhara Region, Ethiopia) from October 10 to December 10, 2024. Gondar City (population 457,938) is located 748 km northwest of Addis Ababa. In the Gondar City Administration, several health care facilities provide ART services to people living with HIV. Among them, the University of Gondar Specialized and Comprehensive Hospital serves as a key provider of ART services, offering specialized care to patients. Additionally, multiple health centers, including Azezo Health Center, Gondar Health Center, Maraki Health Center, Mintwab Health Center, St. Gebriel Health Center, Teda Health Center, and Woleka Health Center, also provide ART services, ensuring accessibility to treatment at various levels of health care.</p><p>Health facility mapping was conducted to provide context to the data source; however, facility service gaps were not the focus of this predictive modeling study (see <xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Geographic location of the study area showing the Gondar City Administration, Amhara National Regional State, Northwest Ethiopia, where a retrospective cohort study of adults living with HIV receiving antiretroviral therapy (ART) was conducted using electronic medical records from public health facilities, 2005&#x2010;2024. The map of the study area was adopted from Tamiru AT, Rade BK et al. (2020) [<xref ref-type="bibr" rid="ref22">22</xref>].</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e78770_fig01.png"/></fig></sec><sec id="s2-2"><title>Study Design</title><p>The retrospective cohort study analyzed baseline-only EMR data (ART initiation records) to predict prospective mortality (see <xref ref-type="fig" rid="figure2">Figure 2</xref>).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Workflow diagram of the machine-learning pipeline used to predict mortality among adults living with HIV on antiretroviral therapy (ART) in public health facilities of Gondar City Administration, Northwest Ethiopia, 2024. AUC: area under the receiver operating characteristic curve; EMR: electronic medical record; SMOTE: synthetic minority oversampling technique; XGBoost: extreme gradient boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e78770_fig02.png"/></fig></sec><sec id="s2-3"><title>Data Source</title><p>Data are sourced from EMR-ART databases across Gondar City Administration public health facilities, containing baseline sociodemographic, clinical, laboratory, and treatment initiation data.</p><sec id="s2-3-1"><title>Description of the Dataset</title><p>The dataset used in this study focused on people living with HIV undergoing ART in public health facilities within the Gondar City Administration, Ethiopia. Data collection covered the entire period since the start of ART services at the facilities.</p></sec><sec id="s2-3-2"><title>Inclusion and Exclusion Criteria</title><p>Eligible participants were adults aged &#x2265;18 years of age with &#x2265;6 months of ART exposure. Participants were included if baseline variables were recorded at ART initiation; limited missing baseline values (&#x003C;10%) were addressed using imputation. We excluded patients with incomplete ART start dates, those transferred to other facilities without outcome documentation, and non-HIV&#x2013;related deaths (eg, accidents and trauma) to ensure outcome validity.</p></sec><sec id="s2-3-3"><title>Predictor Variables</title><p>The binary outcome was mortality (follow-up status: alive=0, dead=1). Baseline predictors (all recorded at ART initiation) included 10 variables across 3 domains: sociodemographic (age, sex, marital status, educational level, residence, and religion), clinical (tuberculosis [TB] screening result and functional status), and immunological (baseline cluster of differentiation 4 [CD4] count). The baseline CD4 count was modeled as a continuous numerical feature in all ML algorithms. Descriptive statistics are therefore presented as median and IQR. Variable selection used recursive feature elimination with cross-validation applied exclusively to training data. The subsequent SHAP analysis confirmed the clinical plausibility of retained predictors.</p></sec><sec id="s2-3-4"><title>Operational Definitions</title><p>Mortality, the outcome variable of interest in the study, is defined as the death of people living with HIV receiving ART. This is directly or indirectly related to the progression of HIV, opportunistic infections, or complications arising from the virus or treatment.</p><p>A low CD4 count of less than 200 cells/mm&#x00B3; indicates a severely weakened immune system, placing the individual at high risk for opportunistic infections and HIV-related complications. A normal CD4 count between 200 and 500 cells/mm&#x00B3; suggests a moderately functioning immune system. A high CD4 count greater than 500 cells/mm&#x00B3; is considered within the normal range and reflects a healthier immune system.</p><p>Comorbidities are the presence of other chronic health conditions in people living with HIV alongside HIV infection.</p></sec><sec id="s2-3-5"><title>Data Collection Tool and Procedures</title><p>The data collection tool used in this study was an electronic data extraction form specifically designed to retrieve relevant patient information from the EMR-ART database. The form was developed via a standardized template based on key study variables to ensure data consistency, completeness, and accuracy. The data collection process began with obtaining ethical approval and permission from relevant health authorities. Data collectors were trained in using the extraction tool and procedures to ensure uniformity in data retrieval. Data were extracted by reviewing patient records from the EMR-ART database using selected features and exported into Excel. Each record was thoroughly reviewed for accuracy, and a second data collector cross-verified entries to maintain data consistency.</p></sec></sec><sec id="s2-4"><title>Ethical Considerations</title><p>Ethical approval for this study was obtained from the College of Medicine and Health Sciences Institutional Research Ethics Review Committee (CMHS IRERC) at Debre Markos University (reference number RCSTTD/403/01/17). Due to the retrospective design, the requirement for informed consent was waived by the committee in accordance with national research ethics guidelines.</p><p>All data used were deidentified before analysis to ensure participant privacy and confidentiality. No personally identifiable information (eg, names or medical record numbers) was extracted. Data were stored on password-protected computers accessible only to authorized researchers.</p><p>There was no financial compensation to participants, as this study relied on existing EMRs. No images or identifiable information about individual participants are presented in this paper. Ethical approval was obtained from the institutional review board of the Amhara Public Health Institute (APHI) (approval number: APHI/322/007). The study was conducted in accordance with the ethical principles of the Declaration of Helsinki and complied with the Ethiopian National Research Ethics Review Guideline [<xref ref-type="bibr" rid="ref23">23</xref>].</p></sec><sec id="s2-5"><title>Data Quality Control</title><p>Data quality control procedures were applied throughout the study to ensure reliability and validity. These procedures involved consistency checks and the correction of missing entries. Data entry was supervised by experienced clerks, and predefined validation rules flagged inconsistencies.</p></sec><sec id="s2-6"><title>Data Management and Analysis</title><sec id="s2-6-1"><title>Association Rule Mining</title><p>Association rule mining was conducted to identify frequent co-occurrence patterns among clinical and treatment-related factors associated with mortality. The Apriori algorithm implemented in the mlxtend Python library was used. The mlxtend Python library was developed by Sebastian Raschka and provides useful extensions for machine learning and data science tasks. Key metrics included support (frequency of rule occurrence), confidence (conditional probability of the consequent given the antecedent), and lift (degree of dependence between antecedent and consequent). Rules with support &#x2265;0.065, confidence &#x2265;0.64, and lift &#x003E;4.6 were retained for interpretation. These analyses were used to explore unadjusted co-occurrence patterns and were not intended to estimate causal or adjusted effects.</p></sec><sec id="s2-6-2"><title>Data Cleaning</title><p>This phase involved handling missing values, detecting and removing outliers via Excel filters, and addressing class imbalance. The dataset contained less than 10% missing values for most variables. Median imputation was applied to numerical variables, such as CD4 count, while mode imputation was used for categorical baseline variables, such as marital status, residence, and religion. These simple imputation methods were chosen due to the relatively small proportion of missingness and to maintain data interpretability whereas mode imputation was applied to categorical data. One-hot encoding transforms categorical variables, creating separate columns for each category. To handle class imbalance, various techniques have been tested, with SMOTE (synthetic minority oversampling technique) providing the best accuracy. Other balancing techniques, such as random undersampling and cost-sensitive learning, were initially tested. However, they resulted in reduced sensitivity for the minority class (deceased patients). SMOTE provided the best balance between precision and recall, ensuring robust generalization across folds.</p></sec><sec id="s2-6-3"><title>Feature Engineering</title><p>By converting raw data into meaningful features, feature engineering enhances model performance. One-hot encoding was used for categorical variables, and normalization ensured comparable feature scales [<xref ref-type="bibr" rid="ref24">24</xref>].</p></sec><sec id="s2-6-4"><title>Feature Selection and Dimensionality Reduction</title><p>Feature selection was performed exclusively using recursive feature elimination with cross-validation on the training data to identify the most informative subset of predictors while minimizing redundancy and overfitting.</p></sec><sec id="s2-6-5"><title>Data Splitting</title><p>A training-test split and K-fold cross-validation ensured robust evaluation. The dataset was divided into training and testing sets, with K-fold validation reducing overfitting and improving generalizability.</p></sec><sec id="s2-6-6"><title>Model Training and Evaluation</title><p>To prevent data leakage, the dataset was first split into training and hold-out test sets. All preprocessing steps, including imputation, feature scaling, encoding, and feature selection, were conducted exclusively within the training data using a cross-validation framework. Model performance was assessed using stratified 10-fold cross-validation on the training set and subsequently evaluated on the original, imbalanced hold-out test set. Sensitivity analyses were conducted by applying SMOTE to the training data only to assess the robustness of model performance under class imbalance.</p></sec><sec id="s2-6-7"><title>Data Leakage Assessment and Stress-Test Validation</title><p>To ensure the robustness and validity of the predictive modeling framework, a series of predefined stress tests were conducted to assess potential data leakage, proxy outcome variables, and look-ahead bias.</p></sec><sec id="s2-6-8"><title>Stress Test: Broken Split Check (Demographic-Only Model)</title><p>To verify independence between training and testing datasets, an XGBoost model was trained using only demographic variables (age and sex). The resulting model achieved an AUC of 0.631 on the hold-out test dataset, consistent with expected performance for demographic-only mortality prediction models. This finding confirmed that the data split procedure did not introduce artificial performance inflation due to overlapping observations or improper sampling procedures.</p></sec><sec id="s2-6-9"><title>Model Selection</title><p>After the model was trained, several classifiers were evaluated to identify the most suitable model for predicting mortality among people living with HIV users. Given that the outcome variable is categorical and falls into 2 mutually exclusive groups, the problem was framed as a binary classification task. To select the best model, we compared the performance of these classifiers via evaluation metrics, such as accuracy, precision, recall, <italic>F</italic><sub>1</sub>-score, and AUC [<xref ref-type="bibr" rid="ref25">25</xref>]. During cross-validation, we examined the consistency of each model&#x2019;s performance across different subsets of the data. The model with the best trade-off between predictive power and generalization to unseen data was chosen for final model selection. This approach ensured that the selected model was the right model for accurately predicting mortality among people living with HIV.</p></sec><sec id="s2-6-10"><title>Evaluation Criteria</title><p>In this study, the performance of the predictive models was evaluated by testing a dataset within a training-test split and cross-validation. The performance of the trained models was subsequently evaluated on the test set based on the criteria of accuracy score, ROC curve, precision, recall, and <italic>F</italic>-measure. The confusion matrix, which is a matrix of N&#x00D7;N, where N is the number of predicted classes, displaying the number of correct and incorrect predictions made by the classification model, was used in this study. Because mortality prevalence was 20%, <italic>F</italic><sub>1</sub>-score and AUC were prioritized over accuracy for model comparison.</p></sec></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Description of Sociodemographic Characteristics</title><p>A total of 12,871 study participants were included in this study; the majority were between the ages of 38 and 47 years (n=4468, 34.7%), suggesting a largely middle-aged population; 7688 (59.7%) of the sample were female, indicating a gender imbalance; the majority were married (n= 4921, 51.2%) and had completed secondary school education (n=2953, 31.3%), indicating a moderate level of education; a sizable portion lived in urban areas (n=8878, 77.1%), suggesting greater representation of urban areas; and the vast majority were Orthodox Christians (n=8714, 91.5%), representing a highly homogeneous religious composition (see <xref ref-type="table" rid="table1">Table 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Sociodemographic characteristics of adult people living with HIV receiving antiretroviral therapy in public health facilities of Gondar City Administration, Northwest Ethiopia, based on a retrospective cohort extracted from electronic medical records, 2005&#x2010;2024 (N=12,871).</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Features and categories</td><td align="left" valign="bottom">Values, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Age, y</td><td align="left" valign="top"/></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>18&#x2010;27</td><td align="char" char="." valign="top">783 (6.1)</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>28&#x2010;37</td><td align="left" valign="top">1741 (13.5)</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>38&#x2010;47</td><td align="char" char="." valign="top">4468 (34.7)</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>48&#x2010;57</td><td align="left" valign="top">3998 (31.1)</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>58 and above</td><td align="left" valign="top">1881 (14.6)</td></tr><tr><td align="left" valign="top">Sex</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female</td><td align="char" char="." valign="top">7688 (59.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Male</td><td align="char" char="." valign="top">5183 (40.3)</td></tr><tr><td align="left" valign="top">Marital Status</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Married</td><td align="left" valign="top">4921 (51.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Divorced</td><td align="left" valign="top">2243 (23.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Never married</td><td align="left" valign="top">1449 (15.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Widowed</td><td align="left" valign="top">995 (10.4)</td></tr><tr><td align="left" valign="top">Education level</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Higher education</td><td align="char" char="." valign="top">1448 (15.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Secondary education</td><td align="left" valign="top">2953 (31.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Primary education</td><td align="left" valign="top">2602 (27.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No education</td><td align="left" valign="top">2443 (25.9)</td></tr><tr><td align="left" valign="top">Residence</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Urban</td><td align="char" char="." valign="top">8878 (77.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Rural</td><td align="left" valign="top">2635 (22.9)</td></tr><tr><td align="left" valign="top">Religion</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Orthodox</td><td align="char" char="." valign="top">8714 (91.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Muslim</td><td align="left" valign="top">684 (7.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Protestant</td><td align="left" valign="top">96 (1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Catholic</td><td align="left" valign="top">23 (0.24)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other</td><td align="left" valign="top">6 (0.06)</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>Baseline Clinical and Immunological Characteristics</title><p>Among 12,871 people living with HIV at ART initiation, most exhibited preserved functional status (working: n=11,970, 93.1%) with low TB prevalence (n=219, 1.7% positive), consistent with late ART presentation in Ethiopian cohorts (see <xref ref-type="table" rid="table2">Table 2</xref>). The median baseline CD4 count was 225 (IQR 131&#x2010;367) cells/mm&#x00B3;.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Baseline clinical and immunological characteristics of people living with HIV receiving antiretroviral therapy in public health facilities, Gondar City Administration, Northwest Ethiopia, 2024.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Feature and category</td><td align="left" valign="bottom">Values</td></tr></thead><tbody><tr><td align="left" valign="top">Tuberculosis screening result, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No</td><td align="left" valign="top">12,655 (98.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yes</td><td align="char" char="." valign="top">216 (1.7)</td></tr><tr><td align="left" valign="top">Functional status, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Working</td><td align="left" valign="top">11,978 (93.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ambulatory</td><td align="left" valign="top">527 (4.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bedridden</td><td align="left" valign="top">366 (2.8)</td></tr><tr><td align="left" valign="top">Baseline CD4<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> count (cells/mm&#x00B3;), median (IQR)</td><td align="left" valign="top">225 (131-367)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>CD4: cluster of differentiation 4 (a type of immune cell count).</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Balancing Dataset</title><p>As seen in the descriptive statistics, the prevalence of mortality prediction among people living with HIV was 20%. This shows that the dataset was imbalanced; most observations (80%) were concentrated in the majority class (ie, alive). The SMOTE oversampling strategy added 6186 synthetic observations from the minority group (ie, dead) to balance the unbalanced distribution of the outcome variable. Therefore, the class distribution for mortality prediction among people living with HIV was balanced using SMOTE, resulting in a symmetric training dataset with 8241 observations in each class (alive and dead) to support the development of reliable and robust predictive models (see <xref ref-type="fig" rid="figure3">Figure 3</xref>). To avoid data leakage, the dataset was first split into training (80%) and testing (20%) subsets. SMOTE oversampling was then applied only to the training data, while the test set remained in its original, imbalanced form. This ensured unbiased performance evaluation on unseen data.</p><p>ROC curves for 7 ML models trained on the original imbalanced dataset (20% mortality prevalence) demonstrated clinically realistic discrimination (AUC range: 0.70&#x2010;0.86). Gradient boosting achieved superior performance (AUC=0.859), followed by XGBoost (AUC=0.835) and logistic regression (AUC=0.824). Random forest (AUC=0.809) and Naive Bayes (AUC=0.803) showed moderate discrimination, while k-nearest neighbor (KNN) and decision tree showed lower discrimination compared to ensemble models (see <xref ref-type="fig" rid="figure4">Figure 4</xref>).</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Class distribution of mortality outcomes (alive vs deceased) among adults living with HIV receiving antiretroviral therapy (ART) in Gondar City Administration, Northwest Ethiopia, showing the original imbalanced dataset and the training set after SMOTE (synthetic minority oversampling technique) was applied to address class imbalance for model development, 2005&#x2010;2024.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e78770_fig03.png"/></fig><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Receiver operating characteristic (ROC) curves for 7 machine-learning classifiers evaluated on the original imbalanced test dataset for mortality prediction among adults living with HIV on antiretroviral therapy (ART) in public health facilities of Gondar City Administration, Northwest Ethiopia, reporting discrimination performance before class balancing, 2005 - 2024. AUC: area under the receiver operating characteristic curve; KNN: k-nearest neighbor; SVM: support vector machine; XGBoost: extreme gradient boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e78770_fig04.png"/></fig><p>SMOTE-balanced models (training only, evaluated on the original imbalanced test set) demonstrated stable discrimination with modest recall improvements but precision trade-offs typical of oversampling. Logistic regression achieved AUC=0.813 (gain +0.0 from original), decision tree AUC=0.700 (&#x2212;0.006), and KNN AUC=0.705 (&#x2212;0.009). Tree-based ensembles maintained robustness: andom forest AUC=0.794 (&#x2212;0.015), gradient boosting AUC=0.837 (&#x2212;0.022), XGBoost AUC=0.819 (&#x2212;0.016), and Naive Bayes AUC=0.797 (&#x2212;0.006). Original configurations outperformed SMOTE variants (median &#x0394;AUC=&#x2212;0.013 [IQR -0.019 to 0.008]), favoring models trained on unaltered data for potential future clinical implementation, pending external validation (see <xref ref-type="fig" rid="figure5">Figure 5</xref>).</p><p>Model performance comparison across original and SMOTE-balanced configurations demonstrates gradient boosting original&#x2019;s superiority (<italic>F</italic><sub>1</sub>-score=0.619, AUC=0.859), outperforming XGBoost original (<italic>F</italic><sub>1</sub>-score=0.609, AUC=0.835). SMOTE-trained models showed recall gains but precision losses: XGBoost SMOTE (<italic>F</italic><sub>1</sub>-score=0.592, AUC=0.819; &#x0394;<italic>F</italic><sub>1</sub>-score=&#x2212;0.017), random forest SMOTE (<italic>F</italic><sub>1</sub>-score=0.576, AUC =0.794; &#x0394;<italic>F</italic><sub>1</sub>-score=&#x2212;0.021), and gradient boosting SMOTE (<italic>F</italic><sub>1</sub>-score=0.608, AUC=0.837; &#x0394;<italic>F</italic><sub>1</sub>-score=&#x2212;0.011). Ensemble methods maintained robust discrimination after balancing (AUC&#x003E;0.79), whereas simpler models showed greater variability. KNN and decision tree exhibited the lowest performance across configurations, reaffirming ensemble learning&#x2019;s advantage for high-dimensional, imbalanced HIV mortality prediction.</p><p>Original data configurations consistently outperformed SMOTE variants (median &#x0394;<italic>F</italic><sub>1</sub>-score=&#x2212;0.011, median &#x0394;AUC=&#x2212;0.013), supporting potential future clinical implementation pending external validation without synthetic oversampling. This finding aligns with real-world prevalence modeling priorities where precision preservation exceeds recall optimization for resource allocation in ART programs (see <xref ref-type="fig" rid="figure6">Figure 6</xref>).</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Receiver operating characteristic (ROC) curves for 7 machine-learning classifiers trained on SMOTE (synthetic minority oversampling technique)-balanced training data and evaluated on the original imbalanced test dataset for mortality prediction among adults living with HIV on antiretroviral therapy (ART) in Gondar City Administration, Northwest Ethiopia, illustrating postbalancing model discrimination, 2024. AUC: area under the receiver operating characteristic curve; KNN: k-nearest neighbor; SVM: support vector machine; XGBoost: extreme gradient boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e78770_fig05.png"/></fig><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Comparative performance of machine-learning models (accuracy, precision, recall, <italic>F</italic><sub>1</sub>-score, and area under the receiver operating characteristic curve [AUC]) for mortality prediction among adults living with HIV receiving antiretroviral therapy (ART) in public health facilities of Gondar City Administration, Northwest Ethiopia, 2024. KNN: k-nearest neighbor; SMOTE: synthetic minority oversampling technique; SVM: support vector machine; XGBoost: extreme gradient boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e78770_fig06.png"/></fig></sec><sec id="s3-4"><title>Model Building and Selection</title><p>The predictive performance of 7 ML models was comprehensively evaluated using accuracy, precision, recall, <italic>F</italic><sub>1</sub>-score (primary), and AUC on an 80:20 stratified train-test split. A critical distinction is that all preprocessing procedures, including SMOTE, were applied to training data only; the final evaluation used the original imbalanced test set (20% mortality prevalence) to reflect real-world deployment conditions.</p><p>On the original imbalanced test data, gradient boosting demonstrated superior balanced performance (accuracy=87.0%, precision=74.5%, recall=52.9%, <italic>F</italic><sub>1</sub>-score=0.619, AUC=0.859), outperforming XGBoost (<italic>F</italic><sub>1</sub>-score=0.609, AUC=0.835) and random forest (<italic>F</italic><sub>1</sub>-score=0.597, AUC=0.809). Ensemble methods consistently exceeded single classifiers, followed by Naive Bayes (<italic>F</italic><sub>1</sub>-score=0.562) and logistic regression (<italic>F</italic><sub>1</sub>-score=0.553).</p><p>SMOTE sensitivity analysis (training only) yielded recall gains but precision losses: XGBoost recall improved by +8.2% (<italic>F</italic><sub>1</sub>-score=0.592, &#x0394;<italic>F</italic><sub>1</sub>-score=&#x2212;0.017) and gradient boosting by +14.2% (<italic>F</italic><sub>1</sub>-score=0.608, &#x0394;<italic>F</italic><sub>1</sub>-score=&#x2212;0.011). Original configurations outperformed SMOTE variants across top models (median &#x0394;<italic>F</italic><sub>1</sub>-score=&#x2212;0.011 [IQR 0.006-0.019], &#x0394;AUC=&#x2212;0.013 [IQR 0.008-0.019]), confirming unaltered EMR data superiority for clinical precision.</p><p>Gradient boosting (original) was selected as optimal due to the highest <italic>F</italic><sub>1</sub>-score, cross-validation stability, and SHAP interpretability, establishing moderate-to-strong discrimination performance for 20% prevalence mortality prediction in resource-limited settings (see <xref ref-type="table" rid="table3">Table 3</xref>).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Performance comparison of 7 machine-learning classifiers for binary mortality prediction among adults living with HIV on antiretroviral therapy in Gondar City Administration, Northwest Ethiopia, showing accuracy, precision, recall, <italic>F</italic><sub>1</sub>-score, and area under the receiver operating characteristic curve (AUC) evaluated on the original imbalanced hold-out test dataset, with models trained on unbalanced and SMOTE (synthetic minority oversampling technique&#x2013;balanced training data), 2024.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model and data</td><td align="left" valign="bottom">Accuracy</td><td align="left" valign="bottom">Precision</td><td align="left" valign="bottom">Recall</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">AUC</td><td align="left" valign="bottom">&#x0394;<italic>F</italic><sub>1</sub>-score</td></tr></thead><tbody><tr><td align="left" valign="top">Gradient boosting</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Original</td><td align="char" char="." valign="top">0.870</td><td align="char" char="." valign="top">0.745</td><td align="char" char="." valign="top">0.529</td><td align="char" char="." valign="top">0.619</td><td align="char" char="." valign="top">0.859</td><td align="char" char="." valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SMOTE</td><td align="left" valign="top">0.827</td><td align="left" valign="top">0.556</td><td align="left" valign="top">0.671</td><td align="left" valign="top">0.608</td><td align="left" valign="top">0.837</td><td align="left" valign="top">&#x2212;0.011</td></tr><tr><td align="left" valign="top">XGBoost<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Original</td><td align="char" char="." valign="top">0.863</td><td align="char" char="." valign="top">0.704</td><td align="char" char="." valign="top">0.537</td><td align="char" char="." valign="top">0.609</td><td align="char" char="." valign="top">0.835</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SMOTE</td><td align="left" valign="top">0.830</td><td align="left" valign="top">0.568</td><td align="left" valign="top">0.619</td><td align="left" valign="top">0.592</td><td align="left" valign="top">0.819</td><td align="left" valign="top">&#x2212;0.017</td></tr><tr><td align="left" valign="top">Random forest</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Original</td><td align="char" char="." valign="top">0.855</td><td align="char" char="." valign="top">0.672</td><td align="char" char="." valign="top">0.537</td><td align="char" char="." valign="top">0.597</td><td align="char" char="." valign="top">0.809</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SMOTE</td><td align="left" valign="top">0.821</td><td align="left" valign="top">0.546</td><td align="left" valign="top">0.609</td><td align="left" valign="top">0.576</td><td align="left" valign="top">0.794</td><td align="left" valign="top">&#x2212;0.021</td></tr><tr><td align="left" valign="top">Logistic regression</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Original</td><td align="char" char="." valign="top">0.852</td><td align="char" char="." valign="top">0.696</td><td align="char" char="." valign="top">0.459</td><td align="char" char="." valign="top">0.553</td><td align="char" char="." valign="top">0.824</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SMOTE</td><td align="left" valign="top">0.785</td><td align="left" valign="top">0.471</td><td align="left" valign="top">0.638</td><td align="left" valign="top">0.542</td><td align="left" valign="top">0.813</td><td align="left" valign="top">&#x2212;0.011</td></tr><tr><td align="left" valign="top">Naive Bayes</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Original</td><td align="char" char="." valign="top">0.821</td><td align="char" char="." valign="top">0.548</td><td align="char" char="." valign="top">0.576</td><td align="char" char="." valign="top">0.562</td><td align="char" char="." valign="top">0.803</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SMOTE</td><td align="left" valign="top">0.696</td><td align="left" valign="top">0.376</td><td align="left" valign="top">0.794</td><td align="left" valign="top">0.510</td><td align="left" valign="top">0.797</td><td align="left" valign="top">&#x2212;0.052</td></tr><tr><td align="left" valign="top">Decision tree</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Original</td><td align="char" char="." valign="top">0.806</td><td align="char" char="." valign="top">0.513</td><td align="char" char="." valign="top">0.527</td><td align="char" char="." valign="top">0.520</td><td align="char" char="." valign="top">0.706</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SMOTE</td><td align="left" valign="top">0.779</td><td align="left" valign="top">0.458</td><td align="left" valign="top">0.572</td><td align="left" valign="top">0.509</td><td align="left" valign="top">0.700</td><td align="left" valign="top">&#x2212;0.011</td></tr><tr><td align="left" valign="top">KNN<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Original</td><td align="char" char="." valign="top">0.824</td><td align="char" char="." valign="top">0.605</td><td align="char" char="." valign="top">0.342</td><td align="char" char="." valign="top">0.437</td><td align="char" char="." valign="top">0.714</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SMOTE</td><td align="left" valign="top">0.712</td><td align="left" valign="top">0.359</td><td align="left" valign="top">0.560</td><td align="left" valign="top">0.437</td><td align="left" valign="top">0.705</td><td align="left" valign="top">0.000</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Not available.</p></fn><fn id="table3fn2"><p><sup>b</sup>XGBoost: extreme gradient boosting.</p></fn><fn id="table3fn3"><p><sup>c</sup>KNN: k-nearest neighbor.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-5"><title>Association Rule Mining</title><p>Association rule mining using the Apriori algorithm (mlxtend; rules with support &#x2265;0.065, confidence &#x2265;0.64, and lift &#x003E;4.6 were retained for interpretation) identified high-risk sociodemographic profiles among baseline characteristics. The dominant pattern&#x2014;&#x201C;rural residence + age 38&#x2010;47 years + no formal education + low baseline CD4 &#x2192; mortality&#x201D;&#x2014;exhibited support=0.0686 (6.9% prevalence), confidence=68.3%, and lift=4.76, indicating that this subgroup showed a 4.76-fold higher co-occurrence with mortality compared with the baseline mortality prevalence (20% baseline prevalence).</p><p>In addition, demographic context (Christian religion, TB-negative status) maintained high lift values (4.62&#x2010;4.74), indicating that this subgroup exhibited the strongest co-occurrence pattern with mortality in the unadjusted association analysis (see <xref ref-type="table" rid="table4">Table 4</xref>). These association rules reflect co-occurrence patterns and should not be interpreted as causal risk factors.</p><p>The gradient boosting model demonstrates strong and clinically plausible classification performance (see <xref ref-type="fig" rid="figure7">Figure 7</xref>), which presents the confusion matrix of the optimized gradient boosting classifier.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Association rule mining results using the Apriori algorithm showing frequent baseline sociodemographic factor combinations associated with mortality among adults living with HIV on antiretroviral therapy (ART) in Gondar City Administration, Northwest Ethiopia, 2005&#x2010;2024 (N=12,871).</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Antecedent</td><td align="left" valign="bottom">Consequent</td><td align="left" valign="bottom">Support</td><td align="left" valign="bottom">Confidence</td><td align="left" valign="bottom">Lift</td></tr></thead><tbody><tr><td align="left" valign="top">Rural+age 38&#x2010;47+no education+low CD4<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">Mortality</td><td align="left" valign="top">0.0686</td><td align="left" valign="top">0.683</td><td align="left" valign="top">4.76</td></tr><tr><td align="left" valign="top">Christian+rural+age 38&#x2010;47+no education+low CD4</td><td align="left" valign="top">Mortality</td><td align="left" valign="top">0.0686</td><td align="left" valign="top">0.692</td><td align="left" valign="top">4.72</td></tr><tr><td align="left" valign="top">TB<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup>-+rural+age 38&#x2010;47+no education+low CD4</td><td align="left" valign="top">Mortality</td><td align="left" valign="top">0.0651</td><td align="left" valign="top">0.680</td><td align="left" valign="top">4.74</td></tr><tr><td align="left" valign="top">Rural+age 38&#x2010;47+no education</td><td align="left" valign="top">Mortality</td><td align="left" valign="top">0.0707</td><td align="left" valign="top">0.668</td><td align="left" valign="top">4.65</td></tr><tr><td align="left" valign="top">Christian+rural+age 38&#x2010;47+no education</td><td align="left" valign="top">Mortality</td><td align="left" valign="top">0.0707</td><td align="left" valign="top">0.677</td><td align="left" valign="top">4.62</td></tr><tr><td align="left" valign="top">TB-+rural+age 38&#x2010;47+no education</td><td align="left" valign="top">Mortality</td><td align="left" valign="top">0.0672</td><td align="left" valign="top">0.664</td><td align="left" valign="top">4.63</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>CD4: cluster of differentiation 4 (a type of immune cell count). </p></fn><fn id="table4fn2"><p><sup>b</sup>TB: tuberculosis.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Confusion matrix for the optimized gradient boosting classifier predicting mortality among adults living with HIV receiving antiretroviral therapy (ART) in public health facilities of Gondar City Administration, Northwest Ethiopia, 2024.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e78770_fig07.png"/></fig></sec><sec id="s3-6"><title>Feature Ranking</title><p>Gradient boosting trained on the original dataset was selected as the optimal model, and the SHAP summary analysis was identified as the most influential baseline predictors of mortality among 12,871 people living with HIV at ART initiation. The educational level was ranked as the strongest predictor, with lower education associated with positive SHAP values (increased mortality risk) and higher education demonstrating consistent protective effects. Residence was the second most important factor, with rural residence contributing to higher predicted mortality and urban residence showing protective effects. The baseline CD4 count exhibited a clear biological gradient: lower CD4 values clustered on the positive SHAP side, indicating increased mortality risk, whereas higher CD4 counts were associated with negative SHAP values, reflecting protection. Marital status and age showed moderate influence, with older age generally increasing predicted risk. Functional status also contributed meaningfully, as ambulatory or bedridden states increased mortality risk compared to working status. In contrast, sex, religion, and TB screening results demonstrated relatively small SHAP magnitudes, indicating limited impact on overall prediction. Overall, the SHAP ranking confirms that socioeconomic vulnerability and immunological status at ART initiation are the dominant determinants of predicted mortality risk (see <xref ref-type="fig" rid="figure8">Figure 8</xref>).</p><fig position="float" id="figure8"><label>Figure 8.</label><caption><p>Global feature importance ranking based on mean absolute Shapley Additive Explanations (SHAP) values from the gradient boosting model, identifying the top predictors of mortality among adults living with HIV on antiretroviral therapy (ART) in Gondar City Administration, Northwest Ethiopia, 2024. CD4: cluster of differentiation 4 (a type of immune cell count); TB: tuberculosis.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e78770_fig08.png"/></fig></sec><sec id="s3-7"><title>Waterfall Plot</title><p>The SHAP waterfall plot illustrates the individual prediction for a 45-year-old patient. The model output is expressed on the log-odds scale for the mortality class (dead=1), with the cohort baseline expected value of <italic>E</italic>[<italic>f</italic>(<italic>X</italic>)]=&#x2212;1.886, representing the average predicted mortality risk. For this patient, the cumulative contribution of key baseline features shifts the model output to <italic>f</italic>(<italic>x</italic>)=&#x2212;0.653. Because this value is substantially higher than the cohort baseline, it indicates an elevated predicted mortality risk relative to the average patient, although the absolute probability remains below 0.5.</p><p>Positive SHAP values (red bars) indicate features that increase predicted mortality risk, whereas negative SHAP values (blue bars) indicate protective effects. The strongest contributor to increased mortality risk was a lack of formal education (+0.84), followed by a low baseline CD4 count of 140 cells/mm&#x00B3; (+0.54). Married as marital status (+0.30) and male as sex (+0.03) provided additional, smaller risk contributions. In contrast, urban residence (&#x2212;0.35) and working functional status (&#x2212;0.12) exerted protective effects by reducing the predicted mortality risk. Age (45 y; &#x2212;0.02), religion (+0.02), and TB screening results (~0) had minimal influence on the individual prediction (see <xref ref-type="fig" rid="figure9">Figure 9</xref>).</p><fig position="float" id="figure9"><label>Figure 9.</label><caption><p>Shapley Additive Explanations (SHAP) waterfall plot illustrating an individual-level prediction from the gradient boosting mortality model for an adult living with HIV on antiretroviral therapy (ART) in Gondar City Administration, Northwest Ethiopia, 2024. CD4: cluster of differentiation 4 (a type of immune cell count); TB: tuberculosis.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e78770_fig09.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>To our knowledge, this represents one of the first Ethiopia-based applications of interpretable ensemble ML (gradient boosting combined with SHAP) using baseline EMR data to predict mortality among 12,871 people living with HIV, achieving moderate but clinically realistic performance (<italic>F</italic><sub>1</sub>-score=0.619, AUC=0.859). Unlike earlier exploratory analyses, the final model was developed strictly using predictors recorded at ART initiation, eliminating postbaseline variables to avoid look-ahead bias and ensure valid prognostic modeling. This study contributes new evidence by demonstrating the feasibility of applying XAI models to routine EMR data to support proactive risk stratification and clinical decision-making in resource-limited settings. However, the model should be interpreted as a decision-support tool rather than a standalone clinical triage system, given its moderate sensitivity. Consistent with findings from studies published in JMIR journals, our results demonstrate the feasibility of integrating XAI models into routine clinical care to support proactive risk stratification and decision-making, particularly in low-resource settings where the efficient use of existing EMR data is critical [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>Gradient boosting trained on verified baseline-only predictors achieved the best overall balance between discrimination and generalization performance (<italic>F</italic><sub>1</sub>-score=0.619, AUC=0.859), outperforming logistic regression, Naive Bayes, KNN, and decision tree models. While XGBoost and random forest also demonstrated competitive performance, the corrected baseline-only gradient boosting model provided the most stable and clinically interpretable results for the 20% mortality prevalence setting.</p><p>SHAP values were calculated with respect to the mortality class (dead=1). Accordingly, positive SHAP values indicate an increase in predicted mortality risk, whereas negative SHAP values represent a reduction in predicted mortality risk (protective effect).</p><p>The most influential baseline predictors included educational level, baseline CD4 count, residence, marital status, and functional status. In the illustrated case, lack of formal education and a low baseline CD4 count (140 cells/mm&#x00B3;) exerted strong positive contributions, substantially increasing the predicted mortality risk. Urban residence and working functional status demonstrated protective contributions.</p><p>Importantly, the baseline CD4 count was modeled as a continuous predictor in the ML algorithms, and descriptive statistics have been revised to report a median of 225 (IQR 131-367) cells/mm&#x00B3; to ensure methodological consistency. Unlike preliminary analyses that yielded inflated performance estimates, the corrected baseline-only model produced moderate but clinically realistic discrimination (AUC=0.859), consistent with previously published EMR-based mortality prediction studies [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref7">7</xref>].</p></sec><sec id="s4-2"><title>Comparison to Prior Work</title><p>These results are consistent in accuracy with studies conducted in Nigeria and Thailand, where ensemble models achieved high predictive accuracy in the clinical context [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Several studies have explored the use of ML algorithms for mortality prediction among people living with HIV. Similarly, a study from China utilizing a support vector machine reported an accuracy of 86%, which aligns with this study&#x2019;s pre-SMOTE support vector machine performance [<xref ref-type="bibr" rid="ref16">16</xref>].</p><p>The SHAP analysis provided valuable insights into the factors influencing mortality prediction.</p><p>The SHAP global importance analysis identified the educational level as the most influential baseline predictor of mortality, followed by residence, baseline CD4 count, marital status, age, and functional status. Specifically, a lack of formal education and lower baseline CD4 values were associated with an increased predicted mortality risk, whereas urban residence and working functional status were protective.</p><p>This hierarchy reflects the combined influence of socioeconomic vulnerability and biological immunosuppression at ART initiation, underscoring that both structural and clinical determinants contribute substantially to mortality risk prediction. For instance, a study conducted in Kenya found that patients on ART for extended periods had significantly lower mortality rates, reinforcing the critical role of early and consistent treatment adherence in improving survival [<xref ref-type="bibr" rid="ref27">27</xref>].</p><p>Ensemble methods, such as XGBoost, random forest, and gradient boosting, consistently outperformed simpler models, such as logistic regression and Naive Bayes. Similar findings were reported in studies from Northern Thailand, East Africa, and Nigeria, where ensemble models achieved superior performance in health prediction tasks [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>].</p><p>The model comparison plot demonstrated that gradient boosting achieved an accuracy of 87.0%, precision of 74.5%, recall of 52.9%, <italic>F</italic><sub>1</sub>-score of 0.619, and AUC of 0.859. Although discrimination was strong, the recall of 52.9% indicates that nearly half of the mortality cases were not identified at the default classification threshold. Therefore, while the model demonstrates predictive capability, its sensitivity limits immediate deployment as a high-stakes early-warning system without threshold optimization.</p></sec><sec id="s4-3"><title>Limitations</title><p>This study used retrospective secondary EMR data, which may contain incomplete or inaccurately recorded variables, introducing potential information bias. Only 1 baseline record per patient was retained to avoid correlated observations, limiting longitudinal analysis. Patients who transferred to other facilities were excluded, potentially introducing selection bias.</p><p>Although the cohort required documented baseline ART initiation records, limited variable-level missingness (&#x003C;10%) was addressed using median (continuous variables) and mode (categorical variables) imputation. Therefore, this was not a strict complete-case analysis. Excluding individuals on ART for less than 6 months may have introduced survivorship bias, potentially inflating performance metrics and limiting generalizability to newly initiated ART populations.</p><p>SMOTE-based oversampling may not fully replicate real-world population distributions. External validation using independent datasets was not performed, and model performance may vary in other geographic or clinical contexts. Data were drawn from a single city administration, limiting external validity.</p><p>Finally, given the moderate sensitivity (52.9%, 272/514), the model may miss a substantial proportion of high-risk patients at the default threshold. Future implementations should consider threshold calibration based on clinical priorities, particularly if minimizing false negatives is paramount.</p></sec><sec id="s4-4"><title>Future Directions</title><p>Future studies should use multicenter or national-level data to enhance generalizability and external validity. Prospective validation studies are needed to evaluate real-world performance. Threshold optimization strategies should be explored to improve sensitivity where early mortality detection is prioritized. Integrating temporal modeling approaches may capture longitudinal treatment dynamics. Additionally, embedding explainable models within clinical workflows could support structured risk assessment, provided that implementation is accompanied by clinician oversight and contextual calibration.</p></sec><sec id="s4-5"><title>Conclusion</title><p>This study demonstrates that interpretable ensemble ML models trained exclusively on baseline EMR data achieved moderate yet clinically meaningful discrimination in predicting mortality risk among people living with HIV initiating ART. SHAP-based interpretation confirmed that educational attainment and baseline CD4 count were the most influential predictors, followed by residence, age, and functional status, highlighting the combined impact of socioeconomic vulnerability and immunological status at treatment initiation. These findings suggest that transparent and explainable ML approaches may support early risk stratification and targeted intervention planning within HIV care programs. Nevertheless, external validation and prospective evaluation are required before integration into routine clinical decision-making.</p></sec></sec></body><back><ack><p>All authors declared that they had insufficient funding to support open-access publication of this manuscript, including from affiliated organizations or institutions, funding agencies, or other organizations. JMIR Publications provided article processing fee (APF) support for the publication of this article.</p></ack><notes><sec><title>Funding</title><p>The authors declared no financial support was received for this work.</p></sec><sec><title>Data Availability</title><p>The datasets analyzed during this study are available from the corresponding author upon reasonable request.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ART</term><def><p>antiretroviral therapy</p></def></def-item><def-item><term id="abb2">AUC </term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb3">CD4</term><def><p>cluster of differentiation 4 (a type of immune cell count)</p></def></def-item><def-item><term id="abb4">EMR</term><def><p>electronic medical record</p></def></def-item><def-item><term id="abb5">KNN</term><def><p>k-nearest neighbor</p></def></def-item><def-item><term id="abb6">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb7">ROC</term><def><p>receiver operating characteristic</p></def></def-item><def-item><term id="abb8">SHAP</term><def><p>Shapley Additive Explanations</p></def></def-item><def-item><term id="abb9">SMOTE</term><def><p>synthetic minority oversampling technique</p></def></def-item><def-item><term id="abb10">TB </term><def><p>tuberculosis</p></def></def-item><def-item><term id="abb11">UNAIDS</term><def><p>Joint United Nations Program on HIV/AIDS</p></def></def-item><def-item><term id="abb12">XAI</term><def><p>explainable artificial intelligence</p></def></def-item><def-item><term id="abb13">XGBoost</term><def><p>extreme gradient boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vasudevan</surname><given-names>L</given-names> </name><name name-style="western"><surname>Kibria</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Kucirka</surname><given-names>LM</given-names> </name><etal/></person-group><article-title>Machine learning models to predict risk of maternal morbidity and mortality from electronic medical record data: scoping review</article-title><source>J Med Internet Res</source><year>2025</year><month>08</month><day>14</day><volume>27</volume><fpage>e68225</fpage><pub-id pub-id-type="doi">10.2196/68225</pub-id><pub-id pub-id-type="medline">40811480</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tahir</surname><given-names>N</given-names> </name><name name-style="western"><surname>Jung</surname><given-names>CR</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SD</given-names> </name><name name-style="western"><surname>Azizah</surname><given-names>N</given-names> </name><name name-style="western"><surname>Ho</surname><given-names>WC</given-names> </name><name name-style="western"><surname>Li</surname><given-names>TC</given-names> </name></person-group><article-title>Federated learning-based model for predicting mortality: systematic review and meta-analysis</article-title><source>J Med Internet Res</source><year>2025</year><month>07</month><day>21</day><volume>27</volume><fpage>e65708</fpage><pub-id pub-id-type="doi">10.2196/65708</pub-id><pub-id pub-id-type="medline">40690657</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lu</surname><given-names>SC</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Nguyen</surname><given-names>CH</given-names> </name><name name-style="western"><surname>Geng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Pfob</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sidey-Gibbons</surname><given-names>C</given-names> </name></person-group><article-title>Machine learning-based short-term mortality prediction models for patients with cancer using electronic health record data: systematic review and critical appraisal</article-title><source>JMIR Med Inform</source><year>2022</year><month>03</month><day>14</day><volume>10</volume><issue>3</issue><fpage>e33182</fpage><pub-id pub-id-type="doi">10.2196/33182</pub-id><pub-id pub-id-type="medline">35285816</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ikemura</surname><given-names>K</given-names> </name><name name-style="western"><surname>Bellin</surname><given-names>E</given-names> </name><name name-style="western"><surname>Yagi</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Using automated machine learning to predict the mortality of patients with COVID-19: prediction model development study</article-title><source>J Med Internet Res</source><year>2021</year><month>02</month><day>26</day><volume>23</volume><issue>2</issue><fpage>e23458</fpage><pub-id pub-id-type="doi">10.2196/23458</pub-id><pub-id pub-id-type="medline">33539308</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vaid</surname><given-names>A</given-names> </name><name name-style="western"><surname>Somani</surname><given-names>S</given-names> </name><name name-style="western"><surname>Russak</surname><given-names>AJ</given-names> </name><etal/></person-group><article-title>Machine learning to predict mortality and critical events in a cohort of patients with COVID-19 in New York City: model development and validation</article-title><source>J Med Internet Res</source><year>2020</year><month>11</month><day>6</day><volume>22</volume><issue>11</issue><fpage>e24018</fpage><pub-id pub-id-type="doi">10.2196/24018</pub-id><pub-id pub-id-type="medline">33027032</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lv</surname><given-names>H</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Machine learning-driven models to predict prognostic outcomes in patients hospitalized with heart failure using electronic health records: retrospective study</article-title><source>J Med Internet Res</source><year>2021</year><month>04</month><day>19</day><volume>23</volume><issue>4</issue><fpage>e24996</fpage><pub-id pub-id-type="doi">10.2196/24996</pub-id><pub-id pub-id-type="medline">33871375</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Diwan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gandhi</surname><given-names>V</given-names> </name><name name-style="western"><surname>Baidya Kayal</surname><given-names>E</given-names> </name><name name-style="western"><surname>Khanna</surname><given-names>P</given-names> </name><name name-style="western"><surname>Mehndiratta</surname><given-names>A</given-names> </name></person-group><article-title>Explainable machine learning models for mortality prediction in patients with sepsis in tertiary care hospital ICU in low- to middle-income countries</article-title><source>Intensive Care Med Exp</source><year>2025</year><month>06</month><day>3</day><volume>13</volume><issue>1</issue><fpage>56</fpage><pub-id pub-id-type="doi">10.1186/s40635-025-00765-5</pub-id><pub-id pub-id-type="medline">40459817</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="web"><article-title>Global HIV &#x0026; AIDS statistics &#x2014; Fact sheet</article-title><source>UNAIDS</source><access-date>2026-04-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.unaids.org/en/resources/fact-sheet">https://www.unaids.org/en/resources/fact-sheet</ext-link></comment></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kloos</surname><given-names>H</given-names> </name><name name-style="western"><surname>Converse</surname><given-names>P</given-names> </name><name name-style="western"><surname>Mariam</surname><given-names>DH</given-names> </name><name name-style="western"><surname>Mulatu</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Kaba</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mekonnen</surname><given-names>W</given-names> </name></person-group><article-title>Bibliography on HIV/AIDS in Ethiopia and Ethiopians in the diaspora: the 2017 update</article-title><source>Ethiop J Health Dev</source><year>2018</year><access-date>2026-03-27</access-date><volume>32</volume><issue>4</issue><comment><ext-link ext-link-type="uri" xlink:href="https://www.ejhd.org/index.php/ejhd/article/view/1973">https://www.ejhd.org/index.php/ejhd/article/view/1973</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Vitalis</surname><given-names>D</given-names> </name></person-group><source>Adherence to Antiretroviral Therapy among Perinatal Women in Guyana: Challenges and Lessons for Developing Nations</source><year>2021</year><publisher-name>Springer</publisher-name><fpage>183</fpage><lpage>199</lpage><pub-id pub-id-type="doi">10.1007/978-981-15-3974-9</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Argaw</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Gelaye</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Lakew</surname><given-names>AM</given-names> </name><etal/></person-group><article-title>Survival and predictors of mortality among HIV-infected adults after initiation of antiretroviral therapy in Eastern Ethiopia Governmental hospitals, from January 2015 to December 2021 (multi-center retrospective follow-up study)</article-title><source>BMC Infect Dis</source><year>2024</year><volume>24</volume><issue>1</issue><pub-id pub-id-type="doi">10.1186/s12879-024-10225-2</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mamo</surname><given-names>DN</given-names> </name><name name-style="western"><surname>Yilma</surname><given-names>TM</given-names> </name><name name-style="western"><surname>Tewelgne</surname><given-names>MF</given-names> </name><etal/></person-group><article-title>Machine learning to predict virological failure among HIV patients on antiretroviral therapy in the University of Gondar Comprehensive and Specialized Hospital, in Amhara Region, Ethiopia, 2022</article-title><source>BMC Med Inform Decis Mak</source><year>2023</year><month>04</month><day>21</day><volume>23</volume><issue>1</issue><fpage>75</fpage><pub-id pub-id-type="doi">10.1186/s12911-023-02167-7</pub-id><pub-id pub-id-type="medline">37085851</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goecks</surname><given-names>J</given-names> </name><name name-style="western"><surname>Jalili</surname><given-names>V</given-names> </name><name name-style="western"><surname>Heiser</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Gray</surname><given-names>JW</given-names> </name></person-group><article-title>How machine learning will transform biomedicine</article-title><source>Cell</source><year>2020</year><month>04</month><day>2</day><volume>181</volume><issue>1</issue><fpage>92</fpage><lpage>101</lpage><pub-id pub-id-type="doi">10.1016/j.cell.2020.03.022</pub-id><pub-id pub-id-type="medline">32243801</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Breiman</surname><given-names>L</given-names> </name></person-group><article-title>Random forests</article-title><source>Mach Learn</source><year>2001</year><month>10</month><volume>45</volume><issue>1</issue><fpage>5</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Katzman</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Shaham</surname><given-names>U</given-names> </name><name name-style="western"><surname>Cloninger</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bates</surname><given-names>J</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kluger</surname><given-names>Y</given-names> </name></person-group><article-title>DeepSurv: personalized treatment recommender system using a Cox proportional hazards deep neural network</article-title><source>BMC Med Res Methodol</source><year>2018</year><month>02</month><day>26</day><volume>18</volume><issue>1</issue><fpage>24</fpage><pub-id pub-id-type="doi">10.1186/s12874-018-0482-1</pub-id><pub-id pub-id-type="medline">29482517</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wei</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Machine learning-based in-hospital mortality prediction of HIV/AIDS patients with Talaromyces marneffei infection in Guangxi, China</article-title><source>PLoS Negl Trop Dis</source><year>2022</year><month>05</month><volume>16</volume><issue>5</issue><fpage>e0010388</fpage><pub-id pub-id-type="doi">10.1371/journal.pntd.0010388</pub-id><pub-id pub-id-type="medline">35507586</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gebremedhin</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gebremariam</surname><given-names>S</given-names> </name><name name-style="western"><surname>Haile</surname><given-names>F</given-names> </name><name name-style="western"><surname>Weldearegawi</surname><given-names>B</given-names> </name><name name-style="western"><surname>Decotelli</surname><given-names>C</given-names> </name></person-group><article-title>Predictors of mortality among HIV infected children on anti-retroviral therapy in Mekelle Hospital, Northern Ethiopia: a retrospective cohort study</article-title><source>BMC Public Health</source><year>2013</year><month>11</month><day>6</day><volume>13</volume><issue>1</issue><fpage>1047</fpage><pub-id pub-id-type="doi">10.1186/1471-2458-13-1047</pub-id><pub-id pub-id-type="medline">24517533</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Feng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>He</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>The predictive accuracy of machine learning for the risk of death in HIV patients: a systematic review and meta-analysis</article-title><source>BMC Infect Dis</source><year>2024</year><month>05</month><day>6</day><volume>24</volume><issue>1</issue><fpage>474</fpage><pub-id pub-id-type="doi">10.1186/s12879-024-09368-z</pub-id><pub-id pub-id-type="medline">38711068</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhan</surname><given-names>B</given-names> </name><name name-style="western"><surname>Wei</surname><given-names>W</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Machine learning-based prognostic prediction for hospitalized HIV/AIDS patients with cryptococcus infection in Guangxi, China</article-title><source>BMC Infect Dis</source><year>2024</year><month>10</month><day>8</day><volume>24</volume><issue>1</issue><fpage>1121</fpage><pub-id pub-id-type="doi">10.1186/s12879-024-10013-y</pub-id><pub-id pub-id-type="medline">39379851</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ijaiya</surname><given-names>M</given-names> </name><name name-style="western"><surname>Troncoso</surname><given-names>E</given-names> </name><name name-style="western"><surname>Mutloatse</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Use of machine learning in predicting continuity of HIV treatment in selected Nigerian states</article-title><source>PLOS Glob Public Health</source><year>2025</year><volume>5</volume><issue>4</issue><fpage>e0004497</fpage><pub-id pub-id-type="doi">10.1371/journal.pgph.0004497</pub-id><pub-id pub-id-type="medline">40273279</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Odukoya</surname><given-names>O</given-names> </name><name name-style="western"><surname>Nwaneri</surname><given-names>S</given-names> </name><name name-style="western"><surname>Odeniyi</surname><given-names>I</given-names> </name><etal/></person-group><article-title>Development and comparison of three data models for predicting diabetes mellitus using risk factors in a Nigerian population</article-title><source>Healthc Inform Res</source><year>2022</year><month>01</month><volume>28</volume><issue>1</issue><fpage>58</fpage><lpage>67</lpage><pub-id pub-id-type="doi">10.4258/hir.2022.28.1.58</pub-id><pub-id pub-id-type="medline">35172091</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Tamiru</surname><given-names>AT</given-names> </name><name name-style="western"><surname>Rade</surname><given-names>BK</given-names> </name><name name-style="western"><surname>Taye</surname><given-names>EB</given-names> </name><etal/></person-group><article-title>Community Level of COVID-19 Information Exposure and Influencing Factors in Northwest Ethiopia</article-title></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="web"><source>Ethiopia&#x2019;s national research ethics guidelines</source><access-date>2026-04-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.scribd.com/document/450446123/national-research-ethics-review-guidline">https://www.scribd.com/document/450446123/national-research-ethics-review-guidline</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Sarkar</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bali</surname><given-names>R</given-names> </name><name name-style="western"><surname>Sharma</surname><given-names>T</given-names> </name></person-group><source>Practical Machine Learning with Python: A Problem-Solver&#x2019;s Guide to Building Real-World Intelligent Systems</source><year>2018</year><access-date>2026-03-27</access-date><publisher-name>Apress</publisher-name><fpage>25</fpage><lpage>30</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://link.springer.com/book/10.1007/978-1-4842-3207-1">https://link.springer.com/book/10.1007/978-1-4842-3207-1</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kuhn</surname><given-names>M</given-names> </name><name name-style="western"><surname>Johnson</surname><given-names>K</given-names> </name></person-group><source>Applied Predictive Modeling</source><year>2013</year><publisher-name>Springer</publisher-name><pub-id pub-id-type="other">978-1-4614-6848-6</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Soha</surname><given-names>K</given-names> </name><name name-style="western"><surname>Phuthomdee</surname><given-names>S</given-names> </name><name name-style="western"><surname>Srichai</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kittiratanawasin</surname><given-names>L</given-names> </name><name name-style="western"><surname>Han</surname><given-names>WM</given-names> </name><name name-style="western"><surname>Teeraananchai</surname><given-names>S</given-names> </name></person-group><article-title>Evaluating machine learning algorithms for predicting HIV status among young Thai men who have sex with men</article-title><source>BMJ Health Care Inform</source><year>2025</year><month>05</month><day>15</day><volume>32</volume><issue>1</issue><fpage>e101189</fpage><pub-id pub-id-type="doi">10.1136/bmjhci-2024-101189</pub-id><pub-id pub-id-type="medline">40379266</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mugo</surname><given-names>CW</given-names> </name><name name-style="western"><surname>Shkedy</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Mwalili</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Modelling trends of CD4 counts for patients on antiretroviral therapy (ART): a comprehensive health care clinic in Nairobi, Kenya</article-title><source>BMC Infect Dis</source><year>2022</year><month>01</month><day>4</day><volume>22</volume><issue>1</issue><fpage>29</fpage><pub-id pub-id-type="doi">10.1186/s12879-021-06977-w</pub-id><pub-id pub-id-type="medline">34983418</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Quan</surname><given-names>VM</given-names> </name><name name-style="western"><surname>Vongchak</surname><given-names>T</given-names> </name><name name-style="western"><surname>Jittiwutikarn</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Predictors of mortality among injecting and non-injecting HIV-negative drug users in northern Thailand</article-title><source>Addiction</source><year>2007</year><month>03</month><volume>102</volume><issue>3</issue><fpage>441</fpage><lpage>446</lpage><pub-id pub-id-type="doi">10.1111/j.1360-0443.2006.01709.x</pub-id><pub-id pub-id-type="medline">17298652</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kibuuka</surname><given-names>H</given-names> </name><name name-style="western"><surname>Musingye</surname><given-names>E</given-names> </name><name name-style="western"><surname>Mwesigwa</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Predictors of all-cause mortality among people with human immunodeficiency virus (HIV) in a prospective cohort study in East Africa and Nigeria</article-title><source>Clin Infect Dis</source><year>2022</year><month>09</month><day>10</day><volume>75</volume><issue>4</issue><fpage>657</fpage><lpage>664</lpage><pub-id pub-id-type="doi">10.1093/cid/ciab995</pub-id><pub-id pub-id-type="medline">34864933</pub-id></nlm-citation></ref></ref-list></back></article>