<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e66556</article-id><article-id pub-id-type="doi">10.2196/66556</article-id><article-categories><subj-group subj-group-type="heading"><subject>Letter to the Editor</subject></subj-group></article-categories><title-group><article-title>Code Error in &#x201C;Diagnostic Classification and Prognostic Prediction Using Common Genetic Variants in Autism Spectrum Disorder: Genotype-Based Deep Learning&#x201D;</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Miller</surname><given-names>Catriona</given-names></name><degrees>BSc</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Portlock</surname><given-names>Theo</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nyaga</surname><given-names>Denis M</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Gamble</surname><given-names>Greg D</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>O'Sullivan</surname><given-names>Justin M</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>Liggins Institute, University of Auckland</institution><addr-line>85 Park Road, Private Bag 92019</addr-line><addr-line>Auckland</addr-line><country>New Zealand</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname/><given-names>JMIR Editorial Office</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Justin M O'Sullivan, PhD, Liggins Institute, University of Auckland, 85 Park Road, Private Bag 92019, Auckland, 1142, New Zealand, 64 099239868; <email>justin.osullivan@auckland.ac.nz</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>6</day><month>5</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e66556</elocation-id><history><date date-type="received"><day>21</day><month>09</month><year>2024</year></date><date date-type="accepted"><day>12</day><month>04</month><year>2025</year></date></history><copyright-statement>&#x00A9; Catriona Miller, Theo Portlock, Denis M Nyaga, Greg D Gamble, Justin M O'Sullivan. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 6.5.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e66556"/><related-article related-article-type="commentary article" ext-link-type="doi" xlink:href="10.2196/24754" xlink:title="Comment on" xlink:type="simple">https://medinform.jmir.org/2021/4/e24754/</related-article><related-article related-article-type="retraction forward" ext-link-type="doi" xlink:href="10.2196/76833" xlink:title="Retraction notice" xlink:type="simple">https://medinform.jmir.org/2025/1/e76833</related-article><kwd-group><kwd>autism prediction</kwd><kwd>machine learning</kwd><kwd>data leakage</kwd></kwd-group></article-meta></front><body><p>Wang and Avillach [<xref ref-type="bibr" rid="ref1">1</xref>] developed a convolutional neural network (CNN)&#x2013;based diagnostic classifier for autism spectrum disorder (ASD). After preprocessing the genomics data from the Simons Simplex Collection (SSC) [<xref ref-type="bibr" rid="ref2">2</xref>], common variants that may be protective or pathogenic for autism were extracted based on a <italic>&#x03C7;</italic><sup>2</sup> test. The authors then designed a CNN-based diagnostic classifier for ASD with an accuracy and area under the receiver operating characteristic curve of 88% and 0.955, respectively. The predictor in Wang and Avillach [<xref ref-type="bibr" rid="ref1">1</xref>] is currently considered the exemplar in the field, giving much more accurate predictions for autism than other studies [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>However, when inspecting the code and repeating the analyses, we contend that the method used is flawed and leads to an approximately 30% overestimation of predictive ability.</p><p>Wang and Avillach [<xref ref-type="bibr" rid="ref1">1</xref>] did not provide a GitHub link to the code that was used in their paper. However, code can be found in Dr Wang&#x2019;s GitHub repository [<xref ref-type="bibr" rid="ref4">4</xref>] that matches the results and figures in the manuscript.</p><p>An error occurred in the data split for training and test sets. The methods state &#x201C;...the SSC samples were partitioned into two sets based on random sampling of individuals into a training set (80%) and a hold-out test set (20%). There was no overlap of individuals across the two partitions&#x201D; [<xref ref-type="bibr" rid="ref1">1</xref>]. However, the code uses different indexing methods for the test and training sets (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Because there appears to be no random seed in Wang and Avillach [<xref ref-type="bibr" rid="ref1">1</xref>], we cannot reproduce the exact overlap that was mentioned in the manuscript. However, simulations (N=100) using the code identified an average of 80% (SD 1%) of the test dataset being represented in the training dataset.</p><p>We corrected this error in the code [<xref ref-type="bibr" rid="ref5">5</xref>] and generated new models using the 100 features that were identified in Wang and Avillach [<xref ref-type="bibr" rid="ref1">1</xref>] and the genomics data from the SSC [<xref ref-type="bibr" rid="ref2">2</xref>]. Simulations (N=100) with these models result in an area under the receiver operating characteristic curve of 0.61 (SD 0.02) and an accuracy of 60% (SD 2%; <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). This is 0.34 and 28% lower than the reported metrics, respectively, in Wang and Avillach [<xref ref-type="bibr" rid="ref1">1</xref>].</p><p>The accuracy of the CNN-based diagnostic classifier for ASD presented in Wang and Avillach [<xref ref-type="bibr" rid="ref1">1</xref>] is overestimated by ~28%. We contend that Wang and Avillach [<xref ref-type="bibr" rid="ref1">1</xref>] should be retracted according to the Committee on Publication Ethics (COPE) guidelines.</p></body><back><ack><p>We acknowledge Simons Simplex Collection project number 15286.1.1.</p></ack><notes><sec><title>Data Availability</title><p>The code used for Wang and Avillach [<xref ref-type="bibr" rid="ref1">1</xref>] is available on GitHub [<xref ref-type="bibr" rid="ref4">4</xref>]. The clone of the code and corrected code are also available on GitHub [<xref ref-type="bibr" rid="ref5">5</xref>]. Simons Simplex Collection is accessible at [<xref ref-type="bibr" rid="ref6">6</xref>].</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn><fn fn-type="other"><p><bold>Editorial Notice</bold></p><p>The corresponding author of &#x201C;Diagnostic Classification and Prognostic Prediction Using Common Genetic Variants in Autism Spectrum Disorder: Genotype-Based Deep Learning&#x201D; did not submit a reply to this letter.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ASD</term><def><p>autism spectrum disorder</p></def></def-item><def-item><term id="abb2">CNN</term><def><p>convolutional neural network</p></def></def-item><def-item><term id="abb3">COPE</term><def><p>Committee on Publication Ethics</p></def></def-item><def-item><term id="abb4">SSC</term><def><p>Simons Simplex Collection</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Avillach</surname><given-names>P</given-names> </name></person-group><article-title>Diagnostic classification and prognostic prediction using common genetic variants in autism spectrum disorder: genotype-based deep learning</article-title><source>JMIR Med Inform</source><year>2021</year><month>04</month><day>7</day><volume>9</volume><issue>4</issue><fpage>e24754</fpage><comment>Retracted in</comment><comment>JMIR Med Inform 2025;13:e76833</comment><pub-id pub-id-type="doi">10.2196/76833</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fischbach</surname><given-names>GD</given-names> </name><name name-style="western"><surname>Lord</surname><given-names>C</given-names> </name></person-group><article-title>The Simons Simplex Collection: a resource for identification of autism genetic risk factors</article-title><source>Neuron</source><year>2010</year><month>10</month><day>21</day><volume>68</volume><issue>2</issue><fpage>192</fpage><lpage>195</lpage><pub-id pub-id-type="doi">10.1016/j.neuron.2010.10.006</pub-id><pub-id pub-id-type="medline">20955926</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alowais</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Alghamdi</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Alsuhebany</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Revolutionizing healthcare: the role of artificial intelligence in clinical practice</article-title><source>BMC Med Educ</source><year>2023</year><month>09</month><day>22</day><volume>23</volume><issue>1</issue><fpage>689</fpage><pub-id pub-id-type="doi">10.1186/s12909-023-04698-z</pub-id><pub-id pub-id-type="medline">37740191</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>H</given-names> </name></person-group><article-title>Hms-dbmi/haishuai</article-title><source>GitHub</source><year>2020</year><access-date>2024-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/hms-dbmi/Haishuai">https://github.com/hms-dbmi/Haishuai</ext-link></comment></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Miller</surname><given-names>C</given-names> </name></person-group><article-title>Catriona-miller/sfari_paper_clone</article-title><source>GitHub</source><year>2024</year><access-date>2024-09-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/Catriona-Miller/SFARI_paper_clone">https://github.com/Catriona-Miller/SFARI_paper_clone</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="web"><article-title>Simons Simplex Collection</article-title><source>Simons Foundation Autism Research Initiative</source><access-date>2025-04-24</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.sfari.org/resource/simons-simplex-collection/">https://www.sfari.org/resource/simons-simplex-collection/</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Walkthrough of the steps that were taken and the results of our attempt to reproduce the work of Wang and Avillach (2021).</p><media xlink:href="medinform_v13i1e66556_app1.pptx" xlink:title="PPTX File, 417 KB"/></supplementary-material></app-group></back></article>