@article{Barz19:ciFAIR, type = {article}, key = {Barz19:ciFAIR}, title = {Do We Train on Test Data? Purging CIFAR of Near-Duplicates}, author = {Björn Barz and Joachim Denzler}, journal = {Journal of Imaging}, year = {2020}, number = {6}, volume = {6}, abstract = {We find that 3.3% and 10% of the images from the CIFAR-10 and CIFAR-100 test sets, respectively, have duplicates in the training set. This may incur a bias on the comparison of image recognition techniques with respect to their generalization capability on these heavily benchmarked datasets. To eliminate this bias, we provide the "fair CIFAR" (ciFAIR) dataset, where we replaced all duplicates in the test sets with new images sampled from the same domain. The training set remains unchanged, in order not to invalidate pre-trained models. We then re-evaluate the classification performance of various popular state-of-the-art CNN architectures on these new test sets to investigate whether recent research has overfitted to memorizing data instead of learning abstract concepts. We find a significant drop in classification accuracy of between 9% and 14% relative to the original performance on the duplicate-free test set. The ciFAIR dataset and pre-trained models are available at https://cvjena.github.io/cifair/, where we also maintain a leaderboard.}, article-number = {41}, doi = {10.3390/jimaging6060041}, url = {https://cvjena.github.io/cifair/}, }