@techreport{freytag2014seeing2, type = {techreport}, key = {freytag2014seeing2}, title = {Seeing through bag-of-visual-word glasses: towards understanding quantization effects in feature extraction methods}, author = {Alexander Freytag and Johannes Rühle and Paul Bodesheim and Erik Rodner and Joachim Denzler}, institution = {Computer Vision Group, Friedrich Schiller University Jena, Germany}, year = {2014}, month = {March}, note = {Technical Report TR-FSU-INF-CV-2014-01}, abstract = {The bag-of-visual-word (BoW) model is one of the most common concepts for image categorization and feature extraction. Although our community developed powerful BoW approaches for visual recognition and it serves as a great ad-hoc solution, unfortunately, there are several drawbacks that most researchers might be not aware of. In this paper, we aim at seeing behind the curtains and point to some of the negative aspects of these approaches which go usually unnoticed: (i) although BoW approaches are often motivated by relating clusters to meaningful object parts, this relation does not hold in practice with low-dimensional features such as HOG, and standard clustering method, (ii) clusters can be chosen randomly without loss in performance, (iii) BoW is often only collecting background statistics, and (iv) cluster assignments are not robust to small spatial shifts. Furthermore, we show the effect of BoW quantization and the related loss of visual information by a simple inversion method called HoggleBoW.}, booktitle = {ICPR Workshop on Features and Structures (FEAST)}, code = {https://github.com/cvjena/bowInversion}, groups = {recognitionanalysis,visualrecognition}, owner = {paul}, timestamp = {2021.01.15}, }