@inproceedings{yu2025exploiting, type = {inproceedings}, key = {yu2025exploiting}, author = {Hui Yu and Joachim Denzler and Dennis Böttger and Gunnar Brehm and Paul Bodesheim}, title = {Exploiting Unlabeled Images via Pseudo-Labelling and Paste-In Augmentation for Insect Localisation in Automated Monitoring}, booktitle = {International Workshop Series on Camera Traps, AI, \& Ecology (CamTrapAI)}, volume = {}, year = {2025}, month = {}, number = {}, article-number = {}, url = {}, abstract = {Insect monitoring using an automated deep learning pipeline has become increasingly important in understanding the crisis of insect decline. Advanced model architectures trained with high-resolution images are essential to ensure the quality of insect localisation and species identification. Recent methods struggle with limited annotated data, which requires time-consuming manual labelling for bounding boxes and domain expert-level knowledge for insect categorisation. In this paper, we present a comprehensive benchmark of object detection models for this task, evaluating YOLOv9 and SSD architectures across three distinct datasets: EU-Moths, NID-Moths, and AMI-Traps. Our experiments reveal that high-resolution inputs are a dominant factor for accurate insect localisation, with performance improving substantially with larger image sizes. In addition, we perform cross-dataset validation to verify the generalisation capabilities of YOLOv9 on these datasets, justifying the choice of the AMI-Traps dataset as our pre-training dataset for obtaining a robust detector. Finally, to leverage large amounts of unlabeled data, we investigate a pseudo-labelling and paste-in data augmentation strategy. While this technique provides only modest improvements in overall detection metrics, qualitative analysis demonstrates that it enhances model robustness, enabling the detection of insects in challenging, low-contrast conditions where a strong baseline model would otherwise fail. In our experiments, YOLOv9 outperforms SSD on the one-class NID-Moths and AMI-Traps datasets with average precisions of 0.951 and 0.742, respectively. On the binary-class AMI-Traps dataset, a larger YOLOv9 model with a 1280x1280 input resolution achieves an average precision of 0.972 for the moth category. These results indicate the importance of data-centric approaches and high-resolution imagery for building effective automated insect monitoring systems.}, doi = {}, }