@inproceedings{bjerge2025deep,
    type = {inproceedings},
    key = {bjerge2025deep},
    title = {Deep Image Clustering with Model-Agnostic Meta-Learning},
    booktitle = {International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications (VISAPP)},
    author = {Kim Bjerge and Paul Bodesheim and Henrik Karstoft},
    year = {2025},
    pages = {286-297},
    doi = {10.5220/0013114600003912},
    abstract = {Deep clustering has proven successful in analyzing complex, high-dimensional real-world data. Typically, features are extracted from a deep neural network and then clustered. However, training the network to extract features that can be clustered efficiently in a semantically meaningful way is particularly challenging when data is sparse. In this paper, we present a semi-supervised method to fine-tune a deep learning network using Model-Agnostic Meta-Learning, commonly employed in Few-Shot Learning. We apply episodic training with a novel multivariate scatter loss, designed to enhance inter-class feature separation while minimizing intra-class variance, thereby improving overall clustering performance. Our approach works with state-of-the-art deep learning models, spanning convolutional neural networks and vision transformers, as well as different clustering algorithms like K-means and Spectral clustering. The effectiveness of our method is tested on several commonly used Few-Shot Le arning datasets, where episodic fine-tuning with our multivariate scatter loss and a ConvNeXt backbone outperforms other models, achieving adjusted rand index scores of 89.7% on the EU moths dataset and 86.9% on the Caltech birds dataset, respectively. Hence, our proposed method can be applied across various practical domains, such as clustering images of animal species in biology.},
}