@Article{info:doi/10.2196/23328,
author="Park, Ho Young
and Bae, Hyun-Jin
and Hong, Gil-Sun
and Kim, Minjee
and Yun, JiHye
and Park, Sungwon
and Chung, Won Jung
and Kim, NamKug",
title="Realistic High-Resolution Body Computed Tomography Image Synthesis by Using Progressive Growing Generative Adversarial Network: Visual Turing Test",
journal="JMIR Med Inform",
year="2021",
month="Mar",
day="17",
volume="9",
number="3",
pages="e23328",
keywords="generative adversarial network; unsupervised deep learning; computed tomography; synthetic body images; visual Turing test",
abstract="Background: Generative adversarial network (GAN)--based synthetic images can be viable solutions to current supervised deep learning challenges. However, generating highly realistic images is a prerequisite for these approaches. Objective: The aim of this study was to investigate and validate the unsupervised synthesis of highly realistic body computed tomography (CT) images by using a progressive growing GAN (PGGAN) trained to learn the probability distribution of normal data. Methods: We trained the PGGAN by using 11,755 body CT scans. Ten radiologists (4 radiologists with <5 years of experience [Group I], 4 radiologists with 5-10 years of experience [Group II], and 2 radiologists with >10 years of experience [Group III]) evaluated the results in a binary approach by using an independent validation set of 300 images (150 real and 150 synthetic) to judge the authenticity of each image. Results: The mean accuracy of the 10 readers in the entire image set was higher than random guessing (1781/3000, 59.4{\%} vs 1500/3000, 50.0{\%}, respectively; P<.001). However, in terms of identifying synthetic images as fake, there was no significant difference in the specificity between the visual Turing test and random guessing (779/1500, 51.9{\%} vs 750/1500, 50.0{\%}, respectively; P=.29). The accuracy between the 3 reader groups with different experience levels was not significantly different (Group I, 696/1200, 58.0{\%}; Group II, 726/1200, 60.5{\%}; and Group III, 359/600, 59.8{\%}; P=.36). Interreader agreements were poor ($\kappa$=0.11) for the entire image set. In subgroup analysis, the discrepancies between real and synthetic CT images occurred mainly in the thoracoabdominal junction and in the anatomical details. Conclusions: The GAN can synthesize highly realistic high-resolution body CT images that are indistinguishable from real images; however, it has limitations in generating body images of the thoracoabdominal junction and lacks accuracy in the anatomical details. ",
issn="2291-9694",
doi="10.2196/23328",
url="https://medinform.jmir.org/2021/3/e23328",
url="https://doi.org/10.2196/23328",
url="http://www.ncbi.nlm.nih.gov/pubmed/33609339"
}