<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v14i1e80829</article-id><article-id pub-id-type="doi">10.2196/80829</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>From Flow to Feature Using a Proof-of-Concept Spectral-Driven Machine Learning Approach Using Smart Urinary and Drainage Catheter Systems: Algorithm Development and Validation</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Poggi</surname><given-names>Leonardo</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Meckler</surname><given-names>Anastasia</given-names></name><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>K&#x00FC;nert</surname><given-names>Sebastian</given-names></name><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Jeske</surname><given-names>Julia</given-names></name><degrees>Dr rer nat</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Siaj</surname><given-names>Ramsi</given-names></name><degrees>MDRA, Dr rer nat</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Selvamoorthy</surname><given-names>Thanusiah</given-names></name><degrees>BSc</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Berger</surname><given-names>Michael Fabian</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nensa</surname><given-names>Felix</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kohnke</surname><given-names>Judith</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hosters</surname><given-names>Bernadette</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Brendt-M&#x00FC;ller</surname><given-names>Jennifer</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Roser</surname><given-names>Mario</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Hosch</surname><given-names>Ren&#x00E9;</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>Institute of Diagnostic and Interventional Radiology and Neuroradiology, Essen University Hospital</institution><addr-line>Hufelandstra&#x00DF;e 55</addr-line><addr-line>Essen</addr-line><country>Germany</country></aff><aff id="aff2"><institution>Institute for Artificial Intelligence in Medicine (IKIM), Essen University Hospital</institution><addr-line>Essen</addr-line><country>Germany</country></aff><aff id="aff3"><institution>Department of Pediatric Surgery, Essen University Hospital</institution><addr-line>Essen</addr-line><country>Germany</country></aff><aff id="aff4"><institution>Department of Nursing Development and Nursing Research, Essen University Hospital</institution><addr-line>Essen</addr-line><country>Germany</country></aff><aff id="aff5"><institution>Elixion Medical GmbH</institution><addr-line>D&#x00FC;sseldorf</addr-line><country>Germany</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Benis</surname><given-names>Arriel</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Wang</surname><given-names>Chunzhuo</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Lapcharoensuk</surname><given-names>Ravipat</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Ren&#x00E9; Hosch, PhD, Institute of Diagnostic and Interventional Radiology and Neuroradiology, Essen University Hospital, Hufelandstra&#x00DF;e 55, Essen, 45147, Germany, 43 20172377817; <email>rene.hosch@uk-essen.de</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>14</day><month>5</month><year>2026</year></pub-date><volume>14</volume><elocation-id>e80829</elocation-id><history><date date-type="received"><day>17</day><month>07</month><year>2025</year></date><date date-type="rev-recd"><day>20</day><month>02</month><year>2026</year></date><date date-type="accepted"><day>06</day><month>03</month><year>2026</year></date></history><copyright-statement>&#x00A9; Leonardo Poggi, Anastasia Meckler, Sebastian K&#x00FC;nert, Julia Jeske, Ramsi Siaj, Thanusiah Selvamoorthy, Michael Fabian Berger, Felix Nensa, Judith Kohnke, Bernadette Hosters, Jennifer Brendt-M&#x00FC;ller, Mario Roser, Ren&#x00E9; Hosch. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 14.5.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2026/1/e80829"/><abstract><sec><title>Background</title><p>Current urinary and drainage catheter systems collect fluids for visual inspection or manual sampling, offering limited diagnostic value while being labor-intensive and prone to error. Machine learning (ML) has the potential to automate the analysis of these fluids. However, existing methods rely on complex preprocessing steps, which hinder real-time analysis.</p></sec><sec><title>Objective</title><p>We aim to develop and evaluate a fully automated, real-time diagnostic approach for smart urinary and drainage catheter systems by leveraging spectral data and ML to differentiate pathological from healthy excreted fluids without the need for manual preprocessing.</p></sec><sec sec-type="methods"><title>Methods</title><p>This study proposes a novel, fully automated approach for smart urinary and drainage catheter systems that uses spectra and ML to extract features from excreted fluids, enabling real-time analysis directly. A total of 454 surgical drainage fluid samples (from 181 patients) and 401 urine catheter samples (from 168 patients) were analyzed using smart catheters and drains equipped with compact mini-spectrometer sensors. The collected spectral data were fed into 3 different ML models: a random forest, a partial least squares discriminant analysis regression, and a convolutional neural network (CNN). Each model aimed to extract features and differentiate between pathological and healthy urine and drainage samples based on the various biomarkers available from previously conducted laboratory analyses.</p></sec><sec sec-type="results"><title>Results</title><p>All 3 approaches (random forest, partial least squares discriminant analysis regression, and CNN) achieved promising results, demonstrating the potential of the overall approach. In particular, the CNN models trained on the drainage biomarkers hemoglobin and bilirubin achieved the best results. Matthews correlation coefficient scores of 0.83 and 0.81 were obtained for hemoglobin and bilirubin, respectively, when differentiating between pathological and healthy samples using the extracted spectral features.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This work demonstrates the potential of spectral-driven ML for smart urinary and drainage catheter systems. This approach offers a real-time, noninvasive method for analyzing excreted fluids, paving the way for improved diagnostics and personalized patient care. Further research will explore the optimal ML model for this application.</p></sec></abstract><kwd-group><kwd>AI in medicine</kwd><kwd>urine diagnostics</kwd><kwd>surgical drains</kwd><kwd>spectroscopy</kwd><kwd>digital health care</kwd><kwd>real-time monitoring</kwd><kwd>early warning systems</kwd><kwd>artificial intelligence</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Urinary and drainage catheters are indispensable medical devices widely used across various clinical settings. The primary usage of such devices is to aid the excretion of biological fluids from hospitalized patients. The collected liquids are periodically monitored by health care professionals to gain insight into the patients&#x2019; health status. For example, surgical drains are used to evacuate fluid from the postoperative site, allowing for the monitoring of wound healing progress by tracking the volume and quality of the fluid [<xref ref-type="bibr" rid="ref1">1</xref>]. In the case of urinary catheters, the different urine biomarkers are critical for identifying diseases and underlying conditions [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>While these devices play a crucial role in patient care, their extensive usage can predispose individuals to a variety of complications. In the case of urinary catheters, common complications include urinary tract infections, bladder stones, and urethral injuries [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. Similarly, the usage of drainage catheters can lead to complications such as catheter blockage, leakage, tissue trauma, and infections at the insertion site [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. These complications can lead to prolonged hospital stays and even systemic infections if not promptly identified and addressed.</p><p>As of today, the current approach consists of visual inspection and manual sampling of these fluids at specific, discrete time intervals by medical staff. To obtain a quantitative evaluation of the composition of the collected fluids, separate laboratory analyses must be performed. This implies a significant time delay in detecting potentially severe complications. For these reasons, a reliable automation of such monitoring processes would bring along numerous advantages. In times of severe medical staff shortages, automated and continuous monitoring could not only contribute to improved patient monitoring quality but also, at the same time, reduce the monitoring routine workload.</p><p>Research has been conducted in this field. For instance, in the case of catheter-associated urinary tract infections, electronic monitoring systems have been developed to accelerate the detection of such complications [<xref ref-type="bibr" rid="ref10">10</xref>]. Additionally, to enhance the performance of ordinary urine dipstick tests, a machine learning (ML)&#x2013;based approach has been proposed [<xref ref-type="bibr" rid="ref11">11</xref>]. For surgical drain outputs, digital solutions have been implemented for volumetric measurements [<xref ref-type="bibr" rid="ref12">12</xref>] and for determining the fluid&#x2019;s color [<xref ref-type="bibr" rid="ref13">13</xref>]. A more comprehensive solution for surgical drain outputs has been proposed by Roser et al [<xref ref-type="bibr" rid="ref14">14</xref>] by introducing a so-called SmartDrain (Elixion Medical GmbH) device that performs and analyzes spectral measurements on drainage fluids at the patient&#x2019;s bedside.</p><p>In recent years, artificial intelligence (AI) has garnered significant attention in the context of biomedical research. The application of ML methods to such research questions, coupled with the increasing computational power available, has demonstrated remarkable performance in analyzing vast amounts of biomedical data and extracting meaningful features [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>].</p><p>In the present study, we aim to build upon the approach proposed by Roser et al [<xref ref-type="bibr" rid="ref14">14</xref>] by implementing an AI-driven early warning system for the detection of pathological markers in urine and drainage samples. Specifically, we analyze spectral data acquired with a compact mini-spectrometer using classification algorithms such as partial least squares discriminant analysis regression (PLS-DA), random forest (RF), and convolutional neural networks (CNN). PLS-DA is an established, robust, and highly interpretable classification method mostly used in chemometrics in combination with high-dimensional spectral data [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. RF is a flexible, nonlinear ensemble-based approach capable of modeling complex interactions while maintaining a relatively high degree of interpretability [<xref ref-type="bibr" rid="ref19">19</xref>]. On the other hand, CNNs represent a data-driven deep learning strategy that has the potential to capture subtle patterns that may not be detectable by more traditional ML approaches [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Several studies have demonstrated superior performance when processing spectral data with 2D CNNs as opposed to 1D architectures [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. For this reason, in the present study, we implement a simple CNN model that classifies 3-channel images obtained from the raw spectral data.</p><p>Together, the selected ML approaches span a wide range of complexity and interpretability, enabling a systematic evaluation of their effectiveness and suitability for spectral-data-based catheter monitoring. Ultimately, our goal is to revolutionize the management of catheter-related complications, improving patient care and health care efficiency.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>In <xref ref-type="fig" rid="figure1">Figure 1</xref>, the entire pipeline implemented for this study is presented. In the first step, labeled data were generated by acquiring drainage and urine samples and performing spectral measurements as well as laboratory analyses on them. For each collected sample, 3 spectra and a series of laboratory markers were obtained. This labeled data was used to train AI models for each of the laboratory markers (urine and drainage). In the second step of the pipeline, the spectra were preprocessed in preparation for the AI models. This step involved normalizing the spectra. For the CNN model, an additional step was performed by converting the spectra to 3-channel images. The labeled data was eventually fed to 3 separate AI models: a CNN, a PLS-DA, and an RF classification model.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Classification pipeline adopted for this study. In the first step, urine and drainage samples are acquired from patients. For these samples, both spectral measurements and laboratory analysis are performed. To create labeled data, the values of the biomarkers measured in the laboratory are binarized into the categories healthy and pathological. The measured spectra undergo a preprocessing step that includes normalization. The transformed spectra are classified using RF and PLS-DA models. Additionally, the normalized spectra are transformed into 3-channel images, which are then fed to a CNN for classification. Ch: channel; CNN: convolutional neural network; P/H: pathological/health; PLS-DA: partial least squares discriminant analysis regression; RF: random forest.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80829_fig01.png"/></fig></sec><sec id="s2-2"><title>Ethical Considerations</title><p>This study was approved by the Ethics Committee of the University Hospital Essen (21-10402-BO). Informed consent was obtained from all patients. The data included in this study were fully anonymized. Participants received no compensation.</p></sec><sec id="s2-3"><title>Dataset and Data Generation</title><p>The dataset used in the present work consists of 454 (181 patients) drainage and 401 (168 patients) urine samples. The samples were acquired from patients aged between 0 and 85 years at the University Hospital Essen. The age distribution of the drainage dataset has a median and IQR of 57 and 22, respectively. For the urine data, the median amounts to 56 and the IQR to 20.25.</p><p>Each sample was divided into 2 batches. The samples were then frozen and stored at &#x2212;80&#x00B0;C until further processing. All samples from the first batch were analyzed at the central laboratory of the University Hospital Essen. A total of 14 drainage markers (<xref ref-type="table" rid="table1">Table 1</xref>) and a total of 11 markers for the urine samples (<xref ref-type="table" rid="table2">Table 2</xref>) were examined. For each sample, the measured markers were binarized using predefined cutoff values. If a specific marker was higher than the corresponding cutoff value, the marker was labeled as pathologic. Otherwise, it was labeled as healthy. Those cutoff values were defined by the guidelines provided by the central laboratory (version 1.4 dated September 21, 2021). As there are no predefined cutoff values for drainage fluids, standard values for serum were used as a reference for those samples. However, for the markers hemoglobin and erythrocytes, the cutoff value for pathology was set to 0 because their presence in surgical drain fluids is universally considered pathological and may even indicate relevant postoperative bleeding [<xref ref-type="bibr" rid="ref25">25</xref>].</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Overview of the drainage markers considered in this study. The marker of a sample is categorized as pathological (red) if its value is less than or equal to the corresponding cutoff value. Otherwise, it is marked as healthy (green). Additionally, the ratio between the minority and majority classes of each binarized marker is listed in the table alongside a visual representation of the distribution between pathological (red bars) and healthy samples (green bars).</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Drain marker</td><td align="left" valign="bottom">Cutoff value</td><td align="left" valign="bottom">Samples, n</td><td align="left" valign="bottom">Minimum/majority class</td><td align="left" valign="bottom">Ratios P versus H</td></tr></thead><tbody><tr><td align="left" valign="top">Total protein</td><td align="left" valign="top">2.5 g/dL</td><td align="left" valign="top">454</td><td align="left" valign="top">0.99</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig02.png"/></td></tr><tr><td align="left" valign="top">Glucose</td><td align="left" valign="top">50 mg/dL</td><td align="left" valign="top">453</td><td align="left" valign="top">0.72</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig03.png"/></td></tr><tr><td align="left" valign="top">LDH<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="top">247 U/L</td><td align="left" valign="top">449</td><td align="left" valign="top">0.54</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig04.png"/></td></tr><tr><td align="left" valign="top">Hemoglobin</td><td align="left" valign="top">0 mg/dL</td><td align="left" valign="top">425</td><td align="left" valign="top">0.5</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig05.png"/></td></tr><tr><td align="left" valign="top">Lipase</td><td align="left" valign="top">53 U/L</td><td align="left" valign="top">454</td><td align="left" valign="top">0.4</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig06.png"/></td></tr><tr><td align="left" valign="top">Amylase</td><td align="left" valign="top">53 U/L</td><td align="left" valign="top">447</td><td align="left" valign="top">0.24</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig07.png"/></td></tr><tr><td align="left" valign="top">Bilirubin</td><td align="left" valign="top">1.2 mg/dL</td><td align="left" valign="top">453</td><td align="left" valign="top">0.24</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig08.png"/></td></tr><tr><td align="left" valign="top">Albumin</td><td align="left" valign="top">2.5 mg/dL</td><td align="left" valign="top">449</td><td align="left" valign="top">0.19</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig09.png"/></td></tr><tr><td align="left" valign="top">Erythrocytes count</td><td align="left" valign="top">0</td><td align="left" valign="top">427</td><td align="left" valign="top">0.16</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig10.png"/></td></tr><tr><td align="left" valign="top">Uric acid</td><td align="left" valign="top">7.2 mg/dL</td><td align="left" valign="top">453</td><td align="left" valign="top">0.09</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig11.png"/></td></tr><tr><td align="left" valign="top">Mononuclear cells</td><td align="left" valign="top">0</td><td align="left" valign="top">420</td><td align="left" valign="top">0.09</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig12.png"/></td></tr><tr><td align="left" valign="top">Triglycerides</td><td align="left" valign="top">200 mg/dL</td><td align="left" valign="top">453</td><td align="left" valign="top">0.04</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig13.png"/></td></tr><tr><td align="left" valign="top">Polymorphonuclear cells</td><td align="left" valign="top">0</td><td align="left" valign="top">420</td><td align="left" valign="top">0.02</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig14.png"/></td></tr><tr><td align="left" valign="top">Leucocytes</td><td align="left" valign="top">0/nL</td><td align="left" valign="top">419</td><td align="left" valign="top">0.01</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig15.png"/></td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>LDH: lactate dehydrogenase.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Overview of the urine markers considered in this study. The marker of a sample is categorized as pathological if its value is less than or equal to the corresponding cutoff value. Otherwise, it is marked as healthy. An exception is made for the marker pH, where the healthy samples are found in a range of pH values between 5 and 7.5. Additionally, the ratio between the minority and majority classes of each binarized marker is listed in the table, along with a visual representation of the distribution between pathological (red bars) and healthy samples (green bars).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Urine marker</td><td align="left" valign="bottom">Cutoff value</td><td align="left" valign="bottom">Samples, n</td><td align="left" valign="bottom">Minimum/majority class</td><td align="left" valign="bottom">Ratios P versus H</td></tr></thead><tbody><tr><td align="left" valign="top">Protein</td><td align="left" valign="top">+</td><td align="left" valign="top">401</td><td align="left" valign="top">0.93</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig16.png"/></td></tr><tr><td align="left" valign="top">Leucocytes</td><td align="left" valign="top">+</td><td align="left" valign="top">401</td><td align="left" valign="top">0.74</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig17.png"/></td></tr><tr><td align="left" valign="top">Albumin</td><td align="left" valign="top">2 mg/dL</td><td align="left" valign="top">401</td><td align="left" valign="top">0.32</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig18.png"/></td></tr><tr><td align="left" valign="top">Erythrocytes</td><td align="left" valign="top">+</td><td align="left" valign="top">401</td><td align="left" valign="top">0.24</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig19.png"/></td></tr><tr><td align="left" valign="top">Glucose</td><td align="left" valign="top">16.5 mg/dL</td><td align="left" valign="top">401</td><td align="left" valign="top">0.18</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig20.png"/></td></tr><tr><td align="left" valign="top">Bilirubin</td><td align="left" valign="top">+</td><td align="left" valign="top">401</td><td align="left" valign="top">0.17</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig21.png"/></td></tr><tr><td align="left" valign="top">Urobilinogen</td><td align="left" valign="top">0.2 mg/dL</td><td align="left" valign="top">401</td><td align="left" valign="top">0.15</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig22.png"/></td></tr><tr><td align="left" valign="top">Nitrite</td><td align="left" valign="top">+</td><td align="left" valign="top">401</td><td align="left" valign="top">0.14</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig23.png"/></td></tr><tr><td align="left" valign="top">Ketones</td><td align="left" valign="top">+</td><td align="left" valign="top">401</td><td align="left" valign="top">0.13</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig24.png"/></td></tr><tr><td align="left" valign="top">Glucose (stick test)</td><td align="left" valign="top">+</td><td align="left" valign="top">401</td><td align="left" valign="top">0.12</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig25.png"/></td></tr><tr><td align="left" valign="top">pH</td><td align="left" valign="top">[5, 7.5]</td><td align="left" valign="top">401</td><td align="left" valign="top">0.1</td><td align="left" valign="top"><graphic xlink:href="medinform_v14i1e80829_fig26.png"/></td></tr></tbody></table></table-wrap><p>Two exceptions to the binarization procedure were made for the majority of urine markers. Most of these were measured using urine dipstick tests, which estimate the concentration of the marker using a categorical scale. A measurement was marked with the minus symbol &#x201C;&#x2212;&#x201D; if the concentration of the marker is not high enough to be detectable. Pathologic concentrations of the marker were marked with a series of plus symbols &#x201C;+.&#x201D; Therefore, a marker was classified as pathologic if the laboratory stick measurement showed at least one &#x201C;+&#x201D; symbol. Otherwise, the marker was classified as healthy. The second exception to the binarization procedure was made for pH. This marker does not present a single cutoff value but rather a range of values for which the marker was considered normal and was, therefore, considered healthy. Values measured outside the defined ranges were defined as pathological.</p><p>Additionally, in <xref ref-type="table" rid="table1">Tables 1 and 2</xref>, the ratio between the minority and majority classes of each binarized drainage and urine marker is listed. This value provides important information on the balance between pathological (red bars) and healthy (green bars) samples within a specific fluid marker. Therefore, a perfectly balanced dataset where both classes include the same number of samples would produce a ratio of 1. On the contrary, a dataset where 1 of the 2 classes does not contain any sample would produce a ratio of 0. As shown in the results, this value greatly affects the quality and performance of the trained models.</p><p>For each sample in the second batch, spectral measurements were performed using a compact mini-spectrometer. The mini-spectrometer was integrated with a self-developed lens array and an electrical current- and temperature-controlled hyperspectral illumination source. This setup ensures broad-spectrum illumination of the samples. The evaluation platform, although relatively large in size, was designed to accommodate the assessment of multiple illumination angles simultaneously and did not yet focus on size reduction during its development. A schematic representation of the spectrometer is presented in <xref ref-type="fig" rid="figure2">Figure 2</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Schematic of the mini spectrometer and its key components. Light from the hyperspectral light sources is directed via the light guides to the sample chamber where it interacts with the sample. The resulting light output is captured by a lens array and focused on the spectrometer head. This setup uses three light sources placed at different angles to form three distinct light paths: direct transmission (DT), angular transmission (AT), and angular reflection (AR). This figure was rendered using Autodesk Fusion 360. Figure rendered with Autodesk Fusion 360 (Autodesk GmbH). AR: angular reflection; AT: angular transmission; DT: direct transmission.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80829_fig27.png"/></fig><p>The measured spectra consist of 288 data points captured between 313.08 and 874.27 nm. The data collection was improved by illuminating each sample from 3 different angles: direct transmission (DT), angular transmission (AT), and angular reflection (AR). The exposure time for each angle was fine-tuned to achieve an optimal signal-to-noise ratio, amounting to 20, 200, and 320 &#x00C2;&#x00B5;s for the settings DT, AT, and AR, respectively. Therefore, the input data for the AI models can be thought of as a feature matrix with the shape of (N, 3, 288). Where N is the number of samples of a specific fluid marker, 3 is the number of measured spectra per sample (DT, AT, and AR), and 288 is the number of datapoints measured for each spectrum. An example measurement of a drainage sample is shown in <xref ref-type="fig" rid="figure3">Figure 3</xref>.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Example spectral measurement on a drainage sample. Each sample is illuminated from 3 different angles, obtaining 3 spectra: DT (blue curve), AT (red curve), and AR (green curve). AR: angular reflection; AT: angular transmission; DT: direct transmission.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80829_fig28.png"/></fig><p>Following each measurement, the sample was discarded, and the evaluation platform&#x2019;s tubing was thoroughly flushed with sterile water to prevent cross-contamination between samples. After completing the batch measurement, the tubing was flushed with isopropyl alcohol followed by water to prevent sample cross-contamination and bacterial growth.</p></sec><sec id="s2-4"><title>Spectra Preprocessing</title><sec id="s2-4-1"><title>Spectra Normalization</title><p>In the present work, the effect of bias in the input data was mitigated by scaling the spectra using the standard normal variate (SNV) method [<xref ref-type="bibr" rid="ref26">26</xref>]. Therefore, all spectra were transformed into new spectra with 0 mean and unit variance, as shown in <xref ref-type="fig" rid="figure4">Figure 4A-C</xref>. For each wavelength, the SD of the intensities of the spectra before (blue line) and after (red line) the SNV correction is shown in a semilogarithmic plot. For all spectrometer settings (DT, AT, and AR) and liquid type, a drastic reduction in the SD of the intensities is obtained, from approximately 2500 (before SNV) to 0.2 (after SNV).</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>SNV correction applied to the spectral data. (A) Original spectra. (B) Spectra after the SNV correction. (C) Semilogarithmic plot of the SD of the intensities before (blue line) and after (red line) the SNV correction. SNV: standard normal variate;</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80829_fig29.png"/></fig><p>For each spectrometer setting, the drainage spectra exhibited a higher spectral variability when compared to the urine spectra. Quantitatively, the mean of the wavelength-wise intensity SDs showed increases of approximately 53% (DT), 140% (AT), and 133% (AR) for drainage relative to urine samples. A visual representation of this behavior is presented in Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-4-2"><title>Spectra to Image Conversion</title><p>For the CNN classification models, 3-channel images from the measured spectra were generated. The 3 spectra of each sample can be thought of as a 2D array with the shape of (3, 288). After normalizing the spectra in the range [0, 1], symmetric 0 padding with a length of 56 was applied to the second dimension of the array to reach a total length of 400. After that, the array is reshaped into the shape (3, 20, 20; <xref ref-type="fig" rid="figure5">Figure 5</xref>).</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Spectra to image conversion. The 3 spectra measured on each sample are transformed into a 3-channel image by reshaping and padding the arrays of intensity values. Those images were used as input for the CNN architecture. AR: angular reflection; AT: angular transmission; CNN: convolutional neural network; DT: direct transmission.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80829_fig30.png"/></fig></sec></sec><sec id="s2-5"><title>AI Classification Models</title><p>For each fluid marker dataset, the samples were grouped at the patient level to ensure that all measurements from a given patient were kept together. Thereafter, a patient-wise train-test split with an 80/20 ratio was performed. This approach prevents data leakage, ensuring that all samples measured on a specific patient are not spread across multiple datasets [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. Consequently, the training set was further subdivided into 3 nonoverlapping subsets, grouped at the patient level and stratified with respect to the target label (eg, ratio between pathological and healthy subjects). Those datasets were used for cross-validation training. Each fold was then tested on the test set, and the classification probabilities were averaged across all 3 folds to obtain the final classification probabilities on the test data.</p><p>Due to the highly imbalanced datasets of many fluid markers, the Matthews correlation coefficient (MCC) was chosen as the primary metric to track and maximize. Alongside MCC, the <italic>F</italic><sub>1</sub>-score and the true positive rate (TPR) were measured. Additionally, the receiving operating characteristic (ROC) curve was computed together with the corresponding area under the curve (ROCAUC) of each model. Additionally, the predictability of the trained models was assessed by comparing them to a dummy classifier.</p></sec><sec id="s2-6"><title>Models</title><sec id="s2-6-1"><title>About PLS-DA</title><p>The partial least square (PLS) algorithm is a regression-based method widely used in near-infrared spectroscopy and chemometrics [<xref ref-type="bibr" rid="ref17">17</xref>]. Unlike principal component regression, PLS presents the advantage of maximizing the covariance between the transformed input features and the target labels. If the target label is categorical, we speak of PLS-DA [<xref ref-type="bibr" rid="ref18">18</xref>]. In this study, PLS-DA models for classification were implemented using the following procedures. First, the feature matrix with a shape of (N, 3, 288) was transformed into a 2D matrix with a shape of (N, 864) by concatenating the 3 spectra across the wavelength dimension. In the second step, the whole training set was used to compute up to 50 PLS-DA models. In each iteration, the number of PLS components was increased by one, and the MCC score was measured. Therefore, by maximizing the MCC score, the optimal number of PLS components for a specific model was found. In the final step, a PLS-DA model trained with the optimal number of components identified previously was tested on the test set using 3-fold cross-validation. Additionally, the PLS coefficients of the models were examined to enhance explainability and determine which wavelengths are most crucial for the models&#x2019; classifications. All PLS-DA models were implemented using the Python (Python Software Foundation) library Scikit-learn (version 1.3.1).</p></sec><sec id="s2-6-2"><title>About RF</title><p>RF is a popular ML algorithm based on decision trees. With appropriate data, it can deliver robust, highly explainable models, and it is fast to train without needing much computational power [<xref ref-type="bibr" rid="ref19">19</xref>]. In the present study, RF models were trained in 3-fold cross-validation on N X 864 feature matrices of the different fluid markers. Where N is the number of samples, and 864 are the intensity values of the 3 spectra measured on the samples. Together with the metrics mentioned in the evaluation strategy, the models&#x2019; feature importances were tracked to understand which wavelengths contribute the most to the classification&#x2019;s decision. The implementation of the RF models has been conducted using the Python library Scikit-learn (version 1.3.1). The chosen hyperparameters are listed in Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-6-3"><title>About CNN</title><p>The third ML approach involves the training of a simple CNN. The architecture consisted of 2 2D convolutional layers and 2 fully connected layers. After each convolution operation, batch normalization and max pooling operations were performed. The 3-channel images obtained from the spectra (see the section About RF) were used as input for the CNN. To obtain a classification probability, the output of the fully connected layer was normalized to a value between 0 and 1 using the sigmoid function. The hyperparameters of the CNN (learning rate, batch size, and optimizer) were determined by means of a manual search guided by the mean MCC computed across all biomarker models trained and evaluated on the whole training set. Using this single set of hyperparameters, separate CNN models were trained for each fluid marker in 3-fold cross-validation, with the model&#x2019;s weights randomly reinitialized at the beginning of each fold. All CNN models were implemented using the Python library PyTorch (version 1.13.1). An overview of the implemented architecture, along with the training parameters, is provided in Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>In <xref ref-type="table" rid="table3">Table 3</xref>, an overview of the performance of the AI models trained on all drainage markers is presented. For each of the markers, hemoglobin, bilirubin, albumin, lactate dehydrogenase, total protein, and mononuclear cells, at least 1 model was trained for which the MCC score measured on the test dataset was higher than 0.5. For those markers, <italic>F</italic><sub>1</sub>-scores of 0.71 or higher were measured for at least 1 of the AI models. Except for mononuclear cell models, the ROCAUCs of the mentioned markers were consistently higher than 0.8 on all 3 AI approaches. To assess the overall performance of the 3 AI approaches, the average value of the MCC scores across all fluid markers was computed. The PLS-DA and RF methods yield comparable mean MCC scores of 0.37 (SD 0.28) and 0.37 (SD 0.26), respectively, whereas the CNN approach scores slightly higher at 0.4 (SD 0.3). A Friedman test showed no statistically significant difference in performance between the models (<italic>&#x03C7;</italic>&#x00B2;<sub>2</sub>=2.31, <italic>P</italic>=.315, Kendall <italic>W</italic>=0.08).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Overview of the results of the PLS-DA<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>, RF<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup>, and CNN<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup> models trained and tested on the drainage markers datasets.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom" colspan="3">MCC<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="bottom" colspan="3"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom" colspan="3">ROCAUC<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="left" valign="bottom" colspan="3">TPR<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td></tr><tr><td align="left" valign="bottom">Drainage marker</td><td align="left" valign="bottom">PLS-DA</td><td align="left" valign="bottom">RF</td><td align="left" valign="bottom">CNN</td><td align="left" valign="bottom">PLS-DA</td><td align="left" valign="bottom">RF</td><td align="left" valign="bottom">CNN</td><td align="left" valign="bottom">PLS-DA</td><td align="left" valign="bottom">RF</td><td align="left" valign="bottom">CNN</td><td align="left" valign="bottom">PLS-DA</td><td align="left" valign="bottom">RF</td><td align="left" valign="bottom">CNN</td></tr></thead><tbody><tr><td align="left" valign="top">Hemoglobin</td><td align="left" valign="top">0.78</td><td align="left" valign="top">0.78</td><td align="left" valign="top">0.83</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.99</td></tr><tr><td align="left" valign="top">Bilirubin</td><td align="left" valign="top">0.81</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.81</td><td align="left" valign="top">0.86</td><td align="left" valign="top">0.79</td><td align="left" valign="top">0.85</td><td align="left" valign="top">0.94</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.83</td><td align="left" valign="top">0.65</td><td align="left" valign="top">0.74</td></tr><tr><td align="left" valign="top">Albumin</td><td align="left" valign="top">0.29</td><td align="left" valign="top">0.48</td><td align="left" valign="top">0.71</td><td align="left" valign="top">0.41</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.71</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.87</td><td align="left" valign="top">0.88</td><td align="left" valign="top">0.55</td><td align="left" valign="top">0.64</td><td align="left" valign="top">0.55</td></tr><tr><td align="left" valign="top">LDH<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup></td><td align="left" valign="top">0.47</td><td align="left" valign="top">0.7</td><td align="left" valign="top">0.65</td><td align="left" valign="top">0.81</td><td align="left" valign="top">0.91</td><td align="left" valign="top">0.88</td><td align="left" valign="top">0.79</td><td align="left" valign="top">0.91</td><td align="left" valign="top">0.89</td><td align="left" valign="top">0.79</td><td align="left" valign="top">0.94</td><td align="left" valign="top">0.85</td></tr><tr><td align="left" valign="top">Total protein</td><td align="left" valign="top">0.61</td><td align="left" valign="top">0.48</td><td align="left" valign="top">0.58</td><td align="left" valign="top">0.78</td><td align="left" valign="top">0.71</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.85</td><td align="left" valign="top">0.8</td><td align="left" valign="top">0.85</td><td align="left" valign="top">0.81</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.71</td></tr><tr><td align="left" valign="top">Mononuclear cells</td><td align="left" valign="top">0.46</td><td align="left" valign="top">0.42</td><td align="left" valign="top">0.51</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.63</td><td align="left" valign="top">0.57</td><td align="left" valign="top">0.76</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.98</td></tr><tr><td align="left" valign="top">Triglycerides</td><td align="left" valign="top">0.7</td><td align="left" valign="top">0.49</td><td align="left" valign="top">0.49</td><td align="left" valign="top">0.67</td><td align="left" valign="top">0.5</td><td align="left" valign="top">0.5</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.5</td><td align="left" valign="top">0.5</td><td align="left" valign="top">0.5</td></tr><tr><td align="left" valign="top">Lipase</td><td align="left" valign="top">0.3</td><td align="left" valign="top">0.19</td><td align="left" valign="top">0.36</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.51</td><td align="left" valign="top">0.42</td><td align="left" valign="top">0.71</td><td align="left" valign="top">0.66</td><td align="left" valign="top">0.62</td><td align="left" valign="top">0.48</td><td align="left" valign="top">0.45</td><td align="left" valign="top">0.27</td></tr><tr><td align="left" valign="top">Glucose</td><td align="left" valign="top">0.17</td><td align="left" valign="top">0.29</td><td align="left" valign="top">0.35</td><td align="left" valign="top">0.55</td><td align="left" valign="top">0.55</td><td align="left" valign="top">0.6</td><td align="left" valign="top">0.51</td><td align="left" valign="top">0.69</td><td align="left" valign="top">0.69</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.47</td><td align="left" valign="top">0.53</td></tr><tr><td align="left" valign="top">Erythrocytes count</td><td align="left" valign="top">0.14</td><td align="left" valign="top">0.1</td><td align="left" valign="top">0.22</td><td align="left" valign="top">0.93</td><td align="left" valign="top">0.94</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.77</td><td align="left" valign="top">0.89</td><td align="left" valign="top">0.81</td><td align="left" valign="top">0.92</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.96</td></tr><tr><td align="left" valign="top">Amylase</td><td align="left" valign="top">0.12</td><td align="left" valign="top">0.14</td><td align="left" valign="top">0.08</td><td align="left" valign="top">0.27</td><td align="left" valign="top">0.28</td><td align="left" valign="top">0.08</td><td align="left" valign="top">0.39</td><td align="left" valign="top">0.45</td><td align="left" valign="top">0.48</td><td align="left" valign="top">0.21</td><td align="left" valign="top">0.21</td><td align="left" valign="top">0.04</td></tr><tr><td align="left" valign="top">Uric acid</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0.57</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.61</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Polymorphonuclear cells</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0.99</td><td align="left" valign="top">0.99</td><td align="left" valign="top">0.99</td><td align="left" valign="top">0.15</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Leukocytes</td><td align="left" valign="top">0.34</td><td align="left" valign="top">0.4</td><td align="left" valign="top">0</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.99</td><td align="left" valign="top">1</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.93</td><td align="left" valign="top">0.95</td><td align="left" valign="top">1</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>PLS-DA: partial least squares discriminant analysis regression.</p></fn><fn id="table3fn2"><p><sup>b</sup>RF: random forest.</p></fn><fn id="table3fn3"><p><sup>c</sup>CNN: convolutional neural network.</p></fn><fn id="table3fn4"><p><sup>d</sup>MCC: Matthew correlation coefficient.</p></fn><fn id="table3fn5"><p><sup>e</sup>ROCAUC: area under the curve.</p></fn><fn id="table3fn6"><p><sup>f</sup>TPR: true positive rate.</p></fn><fn id="table3fn7"><p><sup>g</sup>LDH: lactate dehydrogenase. </p></fn></table-wrap-foot></table-wrap><p>The models trained on hemoglobin and bilirubin data stand out from the remaining markers, showing the highest performance measured in this study. The CNN approach performed best for hemoglobin data with an MCC score of 0.83, <italic>F</italic><sub>1</sub>-score of 0.96, ROCAUC of 0.97, and a TPR of 0.99. The CNN approach delivered promising results also on bilirubin data: MCC score of 0.81, <italic>F</italic><sub>1</sub>-score of 0.85, ROCAUC of 0.95, and a TPR of 0.74.</p><p>In <xref ref-type="fig" rid="figure6">Figure 6</xref>, the confusion matrices for drainage bilirubin and hemoglobin classification on the test dataset are shown, along with the ROC curves calculated from the predicted classification probabilities. The confusion matrices and ROC curves of the other drainage and urine marker models can be inspected in Figures S2 and S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>The models trained on urine data exhibit overall lower performance when evaluated on the test datasets. The mean MCC scores measured across all markers amount to 0.29 (SD 0.26; PLS-DA), 0.28 (SD 0.29; RF), and 0.31 (SD 0.27; CNN). A Friedman test showed no statistically significant difference in performance between the models, with a small to moderate effect size (<italic>&#x03C7;</italic>&#x00B2;<sub>2</sub>=5.15, <italic>P</italic>=.08, Kendall <italic>W</italic>=0.23). Nevertheless, the markers bilirubin, erythrocytes (hemoglobin), protein, urobilinogen, and albumin were able to achieve MCC scores higher than 0.5. The highest MCC score recorded is achieved with the RF model on bilirubin data, reaching a value of 0.74. An overview of the urine models' performances is presented in <xref ref-type="table" rid="table4">Table 4</xref>. Overall, the models trained on urine data show clearly less predictive power than the drainage models.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Confusion matrices and ROC curves. Confusion matrices and ROC curves of the PLS-DA, RF, and CNN models trained on drainage hemoglobin and bilirubin data. AUC: area under the curve; CNN: convolutional neural network; PLS-DA: partial least squares discriminant analysis regression; RF: random forest; ROC: receiver operating characteristic.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80829_fig31.png"/></fig><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Overview of the results of the PLS-DA<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup>, RF<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup>, and CNN<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup> models trained and tested on the urine markers datasets.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom" colspan="3">MCC<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="left" valign="bottom" colspan="3"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom" colspan="3">ROCAUC<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="left" valign="bottom" colspan="3">TPR<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td></tr><tr><td align="left" valign="bottom">Urine marker</td><td align="left" valign="bottom">PLS-DA</td><td align="left" valign="bottom">RF</td><td align="left" valign="bottom">CNN</td><td align="left" valign="bottom">PLS-DA</td><td align="left" valign="bottom">RF</td><td align="left" valign="bottom">CNN</td><td align="left" valign="bottom">PLS-DA</td><td align="left" valign="bottom">RF</td><td align="left" valign="bottom">CNN</td><td align="left" valign="bottom">PLS-DA</td><td align="left" valign="bottom">RF</td><td align="left" valign="bottom">CNN</td></tr></thead><tbody><tr><td align="left" valign="top">Bilirubin</td><td align="left" valign="top">0.57</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.63</td><td align="left" valign="top">0.57</td><td align="left" valign="top">0.77</td><td align="left" valign="top">0.67</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.43</td><td align="left" valign="top">0.71</td><td align="left" valign="top">0.57</td></tr><tr><td align="left" valign="top">Erythrocytes or hemoglobin</td><td align="left" valign="top">0.51</td><td align="left" valign="top">0.51</td><td align="left" valign="top">0.61</td><td align="left" valign="top">0.52</td><td align="left" valign="top">0.52</td><td align="left" valign="top">0.59</td><td align="left" valign="top">0.76</td><td align="left" valign="top">0.8</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.37</td><td align="left" valign="top">0.37</td><td align="left" valign="top">0.42</td></tr><tr><td align="left" valign="top">Protein</td><td align="left" valign="top">0.46</td><td align="left" valign="top">0.46</td><td align="left" valign="top">0.54</td><td align="left" valign="top">0.71</td><td align="left" valign="top">0.72</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.81</td><td align="left" valign="top">0.8</td><td align="left" valign="top">0.82</td><td align="left" valign="top">0.7</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.7</td></tr><tr><td align="left" valign="top">Urobilinogen</td><td align="left" valign="top">0.63</td><td align="left" valign="top">0.45</td><td align="left" valign="top">0.53</td><td align="left" valign="top">0.67</td><td align="left" valign="top">0.47</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.84</td><td align="left" valign="top">0.84</td><td align="left" valign="top">0.64</td><td align="left" valign="top">0.36</td><td align="left" valign="top">0.45</td></tr><tr><td align="left" valign="top">Albumin</td><td align="left" valign="top">0.38</td><td align="left" valign="top">0.43</td><td align="left" valign="top">0.5</td><td align="left" valign="top">0.46</td><td align="left" valign="top">0.53</td><td align="left" valign="top">0.58</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.78</td><td align="left" valign="top">0.77</td><td align="left" valign="top">0.35</td><td align="left" valign="top">0.43</td><td align="left" valign="top">0.48</td></tr><tr><td align="left" valign="top">Leucocytes</td><td align="left" valign="top">0.3</td><td align="left" valign="top">0.36</td><td align="left" valign="top">0.4</td><td align="left" valign="top">0.42</td><td align="left" valign="top">0.59</td><td align="left" valign="top">0.44</td><td align="left" valign="top">0.66</td><td align="left" valign="top">0.7</td><td align="left" valign="top">0.72</td><td align="left" valign="top">0.29</td><td align="left" valign="top">0.55</td><td align="left" valign="top">0.29</td></tr><tr><td align="left" valign="top">Nitrite</td><td align="left" valign="top">0.34</td><td align="left" valign="top">0.34</td><td align="left" valign="top">0.25</td><td align="left" valign="top">0.24</td><td align="left" valign="top">0.24</td><td align="left" valign="top">0.22</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.88</td><td align="left" valign="top">0.13</td><td align="left" valign="top">0.13</td><td align="left" valign="top">0.13</td></tr><tr><td align="left" valign="top">Glucose</td><td align="left" valign="top">0.23</td><td align="left" valign="top">&#x2212;0.12</td><td align="left" valign="top">0</td><td align="left" valign="top">0.27</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0.57</td><td align="left" valign="top">0.47</td><td align="left" valign="top">0.25</td><td align="left" valign="top">0.2</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Glucose (stick test)</td><td align="left" valign="top">&#x2212;0.09</td><td align="left" valign="top">&#x2212;0.07</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0.42</td><td align="left" valign="top">0.39</td><td align="left" valign="top">0.43</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">Ketones</td><td align="left" valign="top">&#x2212;0.08</td><td align="left" valign="top">&#x2212;0.04</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0.52</td><td align="left" valign="top">0.13</td><td align="left" valign="top">0.24</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top">pH</td><td align="left" valign="top">&#x2212;0.04</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0.72</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.81</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>PLS-DA: partial least squares discriminant analysis regression. </p></fn><fn id="table4fn2"><p><sup>b</sup>RF: random forest.</p></fn><fn id="table4fn3"><p><sup>c</sup>CNN: convolutional neural network.</p></fn><fn id="table4fn4"><p><sup>d</sup>MCC: Matthew correlation coefficient.</p></fn><fn id="table4fn5"><p><sup>e</sup>ROCAUC: area under the curve.</p></fn><fn id="table4fn6"><p><sup>f</sup>TPR: true positive rate.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>PLS-DA Coefficients and RF Feature Importances</title><p>To improve explainability, the absolute values of the regression coefficients in the PLS-DA models were examined. For each fluid marker model, the values of each PLS coefficient were normalized to the sum of all of the 288 (number of wavelengths) &#x00D7; 3 (light pathways DT, AT, and AR) coefficients. Afterward, for each wavelength, the contributions of the coefficients for the different lightway paths (DT, AT, and AR) were summed to obtain a single value for each wavelength and fluid marker. In <xref ref-type="fig" rid="figure7">Figure 7A</xref>, the percentage contribution of the normalized sum of the PLS-DA coefficients is displayed in the form of a heatmap.</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>PLS-DA coefficients and RF feature importances. (A) Heatmap representation of the absolute value of the PLS-DA regression coefficients of each drainage and urine model. (B) Heatmap representation of the summed feature importances of each drainage and urine RF model. LDH: lactate dehydrogenase; PLS-DA: partial least squares discriminant analysis regression; RF: random forest.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80829_fig32.png"/></fig><p>In <xref ref-type="fig" rid="figure7">Figure 7B</xref>, the RF feature importance values for each fluid marker model are examined. Similarly to the PLS-DA coefficients, the feature importances of the 3 lightway paths (DT, AT, and AR) were summed wavelength-wise. As the sum of all feature importances of the trained models is already equal to 1, there was no need to further normalize the contribution of each wavelength. The summed coefficients were then arranged into a heatmap.</p><p>A one-to-one comparison of the PLS-DA and RF heatmaps is not advisable because the PLS coefficients and the RF feature importances represent different statistical concepts (regression vs decision trees). Nevertheless, similar responses are visible when examining the RF feature importances and PLS-DA coefficients of the studied markers. For instance, the summed RF feature importances of drainage bilirubin exhibit 2 distinct peaks, located in the proximity of 470 and 570 nm. These peaks are also clearly visible for the RF model trained on urine bilirubin data. To some extent, similar responses can be seen in the PLS-DA coefficients of the same marker. Another noticeable example is given by hemoglobin. The bright peak at approximately 400 nm, visible in the summed RF feature importances of drainage hemoglobin, can also be identified in the PLS-DA coefficients of the marker. Interestingly, the same peak is clearly visible for the marker erythrocyte count on drainage data. Most probably, this is a consequence of the correlation between the number of blood cells in the samples and the total amount of hemoglobin [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>].</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>In the present work, an ML-based approach for classifying urine and drainage samples is presented. Using a compact mini-spectrometer that integrates into ordinary catheter tubes, spectral data were acquired from the samples and used as input for training ML models. This approach plays a key role in the development of an early warning system that could enable real-time monitoring of the composition of the collected liquids.</p><p>The results of this study demonstrate that the adopted methodology has significant potential for addressing the investigated research question. Promising AI models were trained for several fluid marker datasets. In particular, MCC scores of at least 0.5 were measured for at least 6 of the 14 drainage markers and 5 of the 11 urine markers. Those models show clear predictive power when classifying the unseen test data. Therefore, it can be confidently stated that the inspected data contains pivotal information for the classification process. Especially relevant are the results obtained on hemoglobin and bilirubin data. Those fluid markers stand out from the rest, showing excellent performance with all 3 AI approaches.</p><p>Overall, all 3 approaches led to comparable performances, with the CNN method producing slightly higher scores. As shown in <xref ref-type="fig" rid="figure7">Figure 7</xref>, the PLS-DA and RF algorithms offer the benefit of being highly explainable, as they consider the regression coefficients (PLS-DA) and feature importances (RF), respectively. The same wavelength regions seem to play an important role in the model&#x2019;s decision for many of the markers, but in the case of the PLS-DA models, the responses are more widespread across multiple wavelengths. Put differently, the RF models are, in general, more selective than the PLS-DA ones. The implemented CNN architecture is relatively shallow and small in size. This offers the great advantage of being highly portable and can therefore be easily embedded in microcontrollers for a smart catheter device.</p><p>This study has some limitations. In general, the models trained with drainage data as input exhibited better performance than those trained on urine data. This discrepancy in performance is most probably the consequence of the higher spectral variability shown by the drainage spectra (see also Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) and could be mitigated by experimenting with liquid-specific spectrometer exposure times. Furthermore, as shown in <xref ref-type="table" rid="table1">Tables 1 and 2</xref>, the distribution between healthy and pathological samples is heavily imbalanced for many fluid markers. For this reason, the trained models for fluid markers, where the imbalance is particularly pronounced, exhibit unsatisfactory performance when tested on the test data. Additionally, the performance of the models is influenced by the adopted dataset-splitting strategy. As described previously, a grouped train-test split and cross-validation are performed based on patient information to prevent data leakage. However, this process further worsens the ratio between the minority and majority label classes of many of the fluid markers datasets, leading to poorly representative training and testing datasets. For more information, consult the Tables S1 and S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>For these reasons, the adoption of the MCC score is crucial for accurately interpreting the model&#x2019;s performance. On the other hand, the <italic>F</italic><sub>1</sub>-score, the ROCAUC, and the TPR offer only a limited view on the real performances of the trained models and prove to be reliable metrics only on the datasets that show less pronounced imbalances. Models with high MCC scores tend to exhibit high ROCAUCs, <italic>F</italic><sub>1</sub>-scores, and TPRs. However, the opposite is not always true. The ROC curve, the <italic>F</italic><sub>1</sub>-score, and the TPR are biased toward positive samples (ie, pathological samples) [<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref33">33</xref>]. Therefore, models trained on heavily imbalanced datasets with low MCC scores will tend to show high ROCAUCs, <italic>F</italic><sub>1</sub>-scores, and TPRs if the majority class of the dataset is represented by pathological samples. For those reasons, the MCC score is the most trustworthy of the considered performance metrics and should be prioritized, especially when examining the results of models trained on highly imbalanced datasets.</p><p>Further work is needed to solidify and confirm the findings presented in this work. The datasets should be improved by increasing the number of samples with the aim of reducing the imbalance between pathological and healthy samples. Additionally, increasing the total number of samples would be beneficial to ensure the generation of more representative training, validation, and test datasets. While the current results are promising, further experimentation with several elements of the classification pipeline may help to boost performance. In the first place, different spectral preprocessing techniques could be tested with the aim of enhancing spectral quality [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. Furthermore, the 3 AI algorithms could be further fine-tuned. In particular, a variable selection method could be applied to the PLS-DA algorithm. This method iteratively removes from the input feature matrix the intensity values of the wavelengths that contribute less to the classification&#x2019;s decision, to optimize the MCC score. Both RF and the CNN method would benefit from standard regularization techniques to reduce overfitting in the validation phase [<xref ref-type="bibr" rid="ref36">36</xref>]. These refinements could lead to higher performances, especially in fluid markers, where the balance between positive and negative samples is improved by the inclusion of new samples. Another architectural design change worth exploring would be the implementation of a multitask classification framework with the intent of simultaneously modeling multiple biomarkers by learning features from the same spectral input. However, at the present stage, given the limitations regarding sample size and class imbalances, the current, more conservative approach of training a separate classifier for each biomarker is more robust and offers greater interpretability.</p><p>Concluding, we value our work as a highly successful proof-of-concept with significant clinical implications. This study represents an important initial step in the development of an early warning system integrated into a smart catheter device. The successful implementation of such a vision has the potential to drastically improve everyday clinical practice by reducing the workload of health care professionals and offering an early detection of urinary and drainage-related complications.</p></sec></body><back><ack><p>During the preparation of this work, the authors used ChatGPT (OpenAI) and Grammarly (Superhuman Platform) in order to improve the paper's readability. After using these tools, the authors reviewed and edited the content as needed and take full responsibility for the content of this published paper.</p></ack><notes><sec><title>Funding</title><p>This research was supported by the Ministry of Economic Affairs, Industry, Climate Action, and Energy of the State of North Rhine-Westphalia through the funding program ZukunftBIO.NRW.</p></sec><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>MR, AM, SK, TS, JJ, and RS performed the spectral measurements on the urine and drainage samples. LP and RH designed and implemented the artificial intelligence pipeline and conducted the analysis of the experimental data. LP, MR, and RH co-wrote this paper and approved its final content. AM, SK, RS, MFB, FN, JK, BH, and JB-M critically revised this paper and approved the final content of this paper. RH, MR, MFB, and FN designed and coordinated this study.</p></fn><fn fn-type="conflict"><p>FN and MFB are co-founders and members of the advisory board of Elixion Medical. MR is co-founder of Elixion Medical GmbH.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">AR</term><def><p>angular reflection</p></def></def-item><def-item><term id="abb3">AT</term><def><p>angular transmission</p></def></def-item><def-item><term id="abb4">CNN</term><def><p>convolutional neural network</p></def></def-item><def-item><term id="abb5">DT</term><def><p>direct transmission</p></def></def-item><def-item><term id="abb6">MCC</term><def><p>Matthew correlation coefficient</p></def></def-item><def-item><term id="abb7">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb8">PLS</term><def><p>partial least square</p></def></def-item><def-item><term id="abb9">PLS-DA</term><def><p>partial least squares discriminant analysis regression</p></def></def-item><def-item><term id="abb10">RF</term><def><p>random forest</p></def></def-item><def-item><term id="abb11">ROC</term><def><p>receiving operating characteristic</p></def></def-item><def-item><term id="abb12">ROCAUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb13">SNV</term><def><p>standard normal variate</p></def></def-item><def-item><term id="abb14">TPR</term><def><p>true positive rate</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Okada</surname><given-names>N</given-names> </name><name name-style="western"><surname>Narita</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Takada</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Early removal of drains and the incidence of seroma after breast surgery</article-title><source>Breast Cancer (Auckl)</source><year>2015</year><month>01</month><volume>22</volume><issue>1</issue><fpage>79</fpage><lpage>83</lpage><pub-id pub-id-type="doi">10.1007/s12282-013-0457-3</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Simerville</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Maxted</surname><given-names>WC</given-names> </name><name name-style="western"><surname>Pahira</surname><given-names>JJ</given-names> </name></person-group><article-title>Urinalysis: a comprehensive review</article-title><source>Am Fam Physician</source><year>2005</year><month>03</month><day>15</day><volume>71</volume><issue>6</issue><fpage>1153</fpage><lpage>1162</lpage><pub-id pub-id-type="medline">15791892</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sequeira-Antunes</surname><given-names>B</given-names> </name><name name-style="western"><surname>Ferreira</surname><given-names>HA</given-names> </name></person-group><article-title>Urinary biomarkers and point-of-care urinalysis devices for early diagnosis and management of disease: a review</article-title><source>Biomedicines</source><year>2023</year><month>03</month><day>29</day><volume>11</volume><issue>4</issue><fpage>1051</fpage><pub-id pub-id-type="doi">10.3390/biomedicines11041051</pub-id><pub-id pub-id-type="medline">37189669</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Park</surname><given-names>YI</given-names> </name><name name-style="western"><surname>Linsenmeyer</surname><given-names>TA</given-names> </name></person-group><article-title>A method to minimize indwelling catheter calcification and bladder stones in individuals with spinal cord injury</article-title><source>J Spinal Cord Med</source><year>2001</year><volume>24</volume><issue>2</issue><fpage>105</fpage><lpage>108</lpage><pub-id pub-id-type="doi">10.1080/10790268.2001.11753564</pub-id><pub-id pub-id-type="medline">11587416</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dellimore</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Helyer</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Franklin</surname><given-names>SE</given-names> </name></person-group><article-title>A scoping review of important urinary catheter induced complications</article-title><source>J Mater Sci Mater Med</source><year>2013</year><month>08</month><volume>24</volume><issue>8</issue><fpage>1825</fpage><lpage>1835</lpage><pub-id pub-id-type="doi">10.1007/s10856-013-4953-y</pub-id><pub-id pub-id-type="medline">23661258</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Clayton</surname><given-names>JL</given-names> </name></person-group><article-title>Indwelling urinary catheters: a pathway to health care-associated infections</article-title><source>AORN J</source><year>2017</year><month>05</month><volume>105</volume><issue>5</issue><fpage>446</fpage><lpage>452</lpage><pub-id pub-id-type="doi">10.1016/j.aorn.2017.02.013</pub-id><pub-id pub-id-type="medline">28454610</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xue</surname><given-names>DQ</given-names> </name><name name-style="western"><surname>Qian</surname><given-names>C</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>XF</given-names> </name></person-group><article-title>Risk factors for surgical site infections after breast surgery: a systematic review and meta-analysis</article-title><source>Eur J Surg Oncol</source><year>2012</year><month>05</month><volume>38</volume><issue>5</issue><fpage>375</fpage><lpage>381</lpage><pub-id pub-id-type="doi">10.1016/j.ejso.2012.02.179</pub-id><pub-id pub-id-type="medline">22421530</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Niedergethmann</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bludau</surname><given-names>F</given-names> </name><name name-style="western"><surname>Dusch</surname><given-names>N</given-names> </name><name name-style="western"><surname>Nowak</surname><given-names>K</given-names> </name><name name-style="western"><surname>Post</surname><given-names>S</given-names> </name></person-group><article-title>Significance of drains in surgery</article-title><source>Chirurg</source><year>2011</year><month>12</month><volume>82</volume><issue>12</issue><fpage>1079</fpage><lpage>1084</lpage><pub-id pub-id-type="doi">10.1007/s00104-011-2115-7</pub-id><pub-id pub-id-type="medline">22105796</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>CF</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>SF</given-names> </name><name name-style="western"><surname>Hung</surname><given-names>CF</given-names> </name><name name-style="western"><surname>Chou</surname><given-names>P</given-names> </name></person-group><article-title>Risk of infection is associated more with drain duration than daily drainage volume in prosthesis-based breast reconstruction</article-title><source>Medicine (Abingdon)</source><year>2016</year><volume>95</volume><issue>49</issue><fpage>e5605</fpage><pub-id pub-id-type="doi">10.1097/MD.0000000000005605</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hsu</surname><given-names>HE</given-names> </name><name name-style="western"><surname>Shenoy</surname><given-names>ES</given-names> </name><name name-style="western"><surname>Kelbaugh</surname><given-names>D</given-names> </name><etal/></person-group><article-title>An electronic surveillance tool for catheter-associated urinary tract infection in intensive care units</article-title><source>Am J Infect Control</source><year>2015</year><month>06</month><volume>43</volume><issue>6</issue><fpage>592</fpage><lpage>599</lpage><pub-id pub-id-type="doi">10.1016/j.ajic.2015.02.019</pub-id><pub-id pub-id-type="medline">25840717</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jang</surname><given-names>EC</given-names> </name><name name-style="western"><surname>Park</surname><given-names>YM</given-names> </name><name name-style="western"><surname>Han</surname><given-names>HW</given-names> </name><etal/></person-group><article-title>Machine-learning enhancement of urine dipstick tests for chronic kidney disease detection</article-title><source>J Am Med Inform Assoc</source><year>2023</year><month>05</month><day>19</day><volume>30</volume><issue>6</issue><fpage>1114</fpage><lpage>1124</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocad051</pub-id><pub-id pub-id-type="medline">37027837</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Duren</surname><given-names>BH</given-names> </name><name name-style="western"><surname>van Boxel</surname><given-names>GI</given-names> </name></person-group><article-title>A novel method for electronic measurement and recording of surgical drain output</article-title><source>J Med Eng Technol</source><year>2017</year><month>04</month><volume>41</volume><issue>3</issue><fpage>179</fpage><lpage>185</lpage><pub-id pub-id-type="doi">10.1080/03091902.2016.1271045</pub-id><pub-id pub-id-type="medline">28084110</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bocobo</surname><given-names>GA</given-names> </name><name name-style="western"><surname>Tharan</surname><given-names>N</given-names> </name><name name-style="western"><surname>Choudhury</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Automation of postoperative surgical drain monitoring with novel biosensing technology: proof of concept in a peritoneal injury model</article-title><source>Surg Innov</source><year>2021</year><month>08</month><volume>28</volume><issue>4</issue><fpage>504</fpage><lpage>506</lpage><pub-id pub-id-type="doi">10.1177/1553350620979819</pub-id><pub-id pub-id-type="medline">33382350</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Roser</surname><given-names>MV</given-names> </name><name name-style="western"><surname>Frank</surname><given-names>AHR</given-names> </name><name name-style="western"><surname>Henrichs</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Evaluation of an integrated smart sensor system for real-time characterization and digitalization of postoperative abdominal drain output: a pilot study</article-title><source>Surg Innov</source><year>2022</year><month>06</month><volume>29</volume><issue>3</issue><fpage>438</fpage><lpage>445</lpage><pub-id pub-id-type="doi">10.1177/15533506211031459</pub-id><pub-id pub-id-type="medline">34784819</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rabbani</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>GYE</given-names> </name><name name-style="western"><surname>Suarez</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>JH</given-names> </name></person-group><article-title>Applications of machine learning in routine laboratory medicine: current state and future directions</article-title><source>Clin Biochem</source><year>2022</year><month>05</month><volume>103</volume><fpage>1</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1016/j.clinbiochem.2022.02.011</pub-id><pub-id pub-id-type="medline">35227670</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Topol</surname><given-names>EJ</given-names> </name></person-group><article-title>High-performance medicine: the convergence of human and artificial intelligence</article-title><source>Nat Med</source><year>2019</year><month>01</month><volume>25</volume><issue>1</issue><fpage>44</fpage><lpage>56</lpage><pub-id pub-id-type="doi">10.1038/s41591-018-0300-7</pub-id><pub-id pub-id-type="medline">30617339</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mehmood</surname><given-names>T</given-names> </name><name name-style="western"><surname>S&#x00E6;b&#x00F8;</surname><given-names>S</given-names> </name><name name-style="western"><surname>Liland</surname><given-names>KH</given-names> </name></person-group><article-title>Comparison of variable selection methods in partial least squares regression</article-title><source>J Chemom</source><year>2020</year><month>06</month><volume>34</volume><issue>6</issue><fpage>e3226</fpage><pub-id pub-id-type="doi">10.1002/cem.3226</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barker</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rayens</surname><given-names>W</given-names> </name></person-group><article-title>Partial least squares for discrimination</article-title><source>J Chemom</source><year>2003</year><month>03</month><volume>17</volume><issue>3</issue><fpage>166</fpage><lpage>173</lpage><pub-id pub-id-type="doi">10.1002/cem.785</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Boulesteix</surname><given-names>A</given-names> </name><name name-style="western"><surname>Janitza</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kruppa</surname><given-names>J</given-names> </name><name name-style="western"><surname>K&#x00F6;nig</surname><given-names>IR</given-names> </name></person-group><article-title>Overview of random forest methodology and practical guidance with emphasis on computational biology and bioinformatics</article-title><source>WIREs Data Min Knowl</source><year>2012</year><month>11</month><volume>2</volume><issue>6</issue><fpage>493</fpage><lpage>507</lpage><pub-id pub-id-type="doi">10.1002/widm.1072</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ajit</surname><given-names>A</given-names> </name><name name-style="western"><surname>Acharya</surname><given-names>K</given-names> </name><name name-style="western"><surname>Samanta</surname><given-names>A</given-names> </name></person-group><article-title>A review of convolutional neural networks</article-title><source>2020 Int Conf Emerging Trends Inf Technol Eng (ic-ETITE)</source><year>2020</year><fpage>1</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1109/ic-ETITE47903.2020.049</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Han</surname><given-names>X</given-names> </name><name name-style="western"><surname>Deveci</surname><given-names>M</given-names> </name><name name-style="western"><surname>Parmar</surname><given-names>M</given-names> </name></person-group><article-title>A review of convolutional neural networks in computer vision</article-title><source>Artif Intell Rev</source><year>2024</year><month>03</month><day>23</day><volume>57</volume><issue>4</issue><fpage>99</fpage><pub-id pub-id-type="doi">10.1007/s10462-024-10721-6</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Deev</surname><given-names>V</given-names> </name><name name-style="western"><surname>Panchuk</surname><given-names>V</given-names> </name><name name-style="western"><surname>Boichenko</surname><given-names>E</given-names> </name><name name-style="western"><surname>Kirsanov</surname><given-names>D</given-names> </name></person-group><article-title>Spectrum is a picture: feasibility study of two-dimensional convolutional neural networks in spectral processing</article-title><source>Microchem J</source><year>2024</year><month>10</month><volume>205</volume><fpage>111329</fpage><pub-id pub-id-type="doi">10.1016/j.microc.2024.111329</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jin</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>J</given-names> </name><name name-style="western"><surname>Rao</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>An innovative approach for integrating two-dimensional conversion of Vis-NIR spectra with the Swin Transformer model to leverage deep learning for predicting soil properties</article-title><source>Geoderma</source><year>2023</year><month>08</month><volume>436</volume><fpage>116555</fpage><pub-id pub-id-type="doi">10.1016/j.geoderma.2023.116555</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shahid</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Ko</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kwon</surname><given-names>S</given-names> </name></person-group><article-title>Performance comparison of 1D and 2D convolutional neural networks for real-time classification of time series sensor data</article-title><source>2022 Int Conf Inf Networking (ICOIN)</source><year>2022</year><fpage>507</fpage><lpage>511</lpage><pub-id pub-id-type="doi">10.1109/ICOIN53446.2022.9687284</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Miyauchi</surname><given-names>W</given-names> </name><name name-style="western"><surname>Fujii</surname><given-names>M</given-names> </name><name name-style="western"><surname>Saiki</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Evaluation of the hemoglobin concentration in drainage fluid after surgery by absorbance spectrophotometry using an optical sensor</article-title><source>Yonago Acta Med</source><year>2023</year><month>05</month><volume>66</volume><issue>2</issue><fpage>281</fpage><lpage>286</lpage><pub-id pub-id-type="doi">10.33160/yam.2023.05.020</pub-id><pub-id pub-id-type="medline">37234223</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Genkawa</surname><given-names>T</given-names> </name><name name-style="western"><surname>Shinzawa</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kato</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Baseline correction of diffuse reflection near-infrared spectra using Searching Region Standard Normal Variate (SRSNV)</article-title><source>Appl Spectrosc</source><year>2015</year><month>12</month><volume>69</volume><issue>12</issue><fpage>1432</fpage><lpage>1441</lpage><pub-id pub-id-type="doi">10.1366/15-07905</pub-id><pub-id pub-id-type="medline">26556507</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Bussola</surname><given-names>N</given-names> </name><name name-style="western"><surname>Marcolini</surname><given-names>A</given-names> </name><name name-style="western"><surname>Maggio</surname><given-names>V</given-names> </name><name name-style="western"><surname>Jurman</surname><given-names>G</given-names> </name><name name-style="western"><surname>Furlanello</surname><given-names>C</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>A</surname><given-names>DB</given-names> </name><name name-style="western"><surname>R</surname><given-names>C</given-names> </name><name name-style="western"><surname>S</surname><given-names>S</given-names> </name><name name-style="western"><surname>GM</surname><given-names>F</given-names> </name><name name-style="western"><surname>T</surname><given-names>M</given-names> </name><name name-style="western"><surname>M</surname><given-names>B</given-names> </name><name name-style="western"><surname>HJ</surname><given-names>E</given-names> </name><name name-style="western"><surname>R</surname><given-names>V</given-names> </name></person-group><article-title>AI slipping on tiles: data leakage in digital pathology</article-title><source>Pattern Recognit DAGM</source><year>2021</year><volume>12661</volume><publisher-name>Springer International Publishing</publisher-name><fpage>167</fpage><lpage>182</lpage><pub-id pub-id-type="doi">10.1007/978-3-030-68763-2_13</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ko&#x00C3;&#x00A7;ak</surname><given-names>B</given-names> </name></person-group><article-title>Key concepts, common pitfalls, and best practices in artificial intelligence and machine learning: focus on radiomics</article-title><source>Diagn Interv Radiol</source><year>2022</year><month>09</month><volume>28</volume><issue>5</issue><fpage>450</fpage><lpage>462</lpage><pub-id pub-id-type="doi">10.5152/dir.2022.211297</pub-id><pub-id pub-id-type="medline">36218149</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ponder</surname><given-names>E</given-names> </name></person-group><article-title>The relation between red blood cell density and corpuscular hemoglobin concentration</article-title><source>J Biol Chem</source><year>1942</year><month>07</month><volume>144</volume><issue>2</issue><fpage>333</fpage><lpage>338</lpage><pub-id pub-id-type="doi">10.1016/S0021-9258(18)72513-1</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Dean</surname><given-names>L</given-names> </name></person-group><article-title>Blood and the cells it contains</article-title><source>Blood Groups Red Cell Antigens</source><year>2005</year><access-date>2026-04-10</access-date><publisher-name>National Center for Biotechnology Information</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/books/NBK2263">https://www.ncbi.nlm.nih.gov/books/NBK2263</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chicco</surname><given-names>D</given-names> </name><name name-style="western"><surname>Jurman</surname><given-names>G</given-names> </name></person-group><article-title>The advantages of the Matthews correlation coefficient (MCC) over F1 score and accuracy in binary classification evaluation</article-title><source>BMC Genomics</source><year>2020</year><month>01</month><day>2</day><volume>21</volume><issue>1</issue><fpage>6</fpage><pub-id pub-id-type="doi">10.1186/s12864-019-6413-7</pub-id><pub-id pub-id-type="medline">31898477</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jeni</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Cohn</surname><given-names>JF</given-names> </name><name name-style="western"><surname>De La Torre</surname><given-names>F</given-names> </name></person-group><article-title>Facing imbalanced data recommendations for the use of performance metrics</article-title><source>Int Conf Affect Comput Intell Interact Workshops</source><year>2013</year><volume>2013</volume><fpage>245</fpage><lpage>251</lpage><pub-id pub-id-type="doi">10.1109/ACII.2013.47</pub-id><pub-id pub-id-type="medline">25574450</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saito</surname><given-names>T</given-names> </name><name name-style="western"><surname>Rehmsmeier</surname><given-names>M</given-names> </name></person-group><article-title>The precision-recall plot is more informative than the ROC plot when evaluating binary classifiers on imbalanced datasets</article-title><source>PLoS ONE</source><year>2015</year><volume>10</volume><issue>3</issue><fpage>e0118432</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0118432</pub-id><pub-id pub-id-type="medline">25738806</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yan</surname><given-names>C</given-names> </name></person-group><article-title>A review on spectral data preprocessing techniques for machine learning and quantitative analysis</article-title><source>iScience</source><year>2025</year><month>07</month><day>18</day><volume>28</volume><issue>7</issue><fpage>112759</fpage><pub-id pub-id-type="doi">10.1016/j.isci.2025.112759</pub-id><pub-id pub-id-type="medline">40606754</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Padhi</surname><given-names>SR</given-names> </name><name name-style="western"><surname>John</surname><given-names>R</given-names> </name><name name-style="western"><surname>Tripathi</surname><given-names>K</given-names> </name><etal/></person-group><article-title>A comparison of spectral preprocessing methods and their effects on nutritional traits in cowpea germplasm</article-title><source>Legume Sci</source><year>2024</year><month>06</month><volume>6</volume><issue>2</issue><fpage>e2977</fpage><pub-id pub-id-type="doi">10.1002/leg3.229</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tian</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name></person-group><article-title>A comprehensive survey on regularization strategies in machine learning</article-title><source>Inf Fusion</source><year>2022</year><month>04</month><volume>80</volume><fpage>146</fpage><lpage>166</lpage><pub-id pub-id-type="doi">10.1016/j.inffus.2021.11.005</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Additional tables and figures.</p><media xlink:href="medinform_v14i1e80829_app1.docx" xlink:title="DOCX File, 5253 KB"/></supplementary-material></app-group></back></article>