@article{penzel2026model,
    type = {article},
    key = {penzel2026model},
    title = {Model utility and explainability in federated learning - A case study in healthcare using fundus oculi datasets},
    author = {Niklas Penzel and Daniel Scheliga and Hannes Oppermann and Patrick Mäder and Jens Haueisen and Joachim Denzler and Marco Seeland},
    year = {2026},
    journal = {Journal of Biomedical Informatics},
    abstract = {Objective: Introduce a case study for Federated Learning (FL) in healthcare, addressing challenges posed by patient privacy and limited large-scale datasets. Our goal is to assess the features learned by FL methods in a simulated, diverse setting that emphasizes realistic data heterogeneity, and to analyze the learned representations for their medical relevance using both local and global explainability techniques. Methods: Six fundus oculi datasets were combined to simulate a diverse federated learning environment, representing heterogeneous data conditions. We evaluated three established FL methods against centrally trained models, assessing both predictive performance and the learned representations. Specifically, explainability techniques were employed to examine the features learned by the models, and local explanations were evaluated against attention maps annotated by ophthalmologists. Robustness against common biases in fundus datasets was also assessed. Results: Our study found improvements in model utility (up to 9.97%) with FL methods compared to isolated training. Analysis of learned representations revealed that federated models predominantly learn the vertical cup-to-disc ratio, a crucial feature for glaucoma diagnosis, and demonstrated robustness against common biases. High agreement was observed between local explanations and ophthalmologist-annotated attention maps. Conclusion: This study demonstrates the benefits of FL systems in a healthcare scenario, providing a case study for evaluating federated systems beyond idealized benchmarks. Our findings highlight the potential of FL to not only improve model utility in privacy-sensitive medical domains but also to learn medically relevant features instead of spurious correlations.},
    groups = {understanding-dl},
    volume = {177},
    pages = {105010},
    doi = {10.1016/j.jbi.2026.105010},
    issn = {1532-0464},
    url = {https://www.sciencedirect.com/science/article/pii/S1532046426000341},
    keywords = {Federated learning, XAI, Feature attribution, Distributed scenario, Benchmark},
}