Most satellite images have systematically missing pixels (i.e., missing data not at random (MNAR)) due to factors such as clouds. If not addressed, these missing pixels can lead to representation bias in automated feature extraction models. In this work, we show that spurious association between the label and the number of missing values in methane plume detection can cause the model to associate the coverage (i.e., the percentage of valid pixels in an image) with the label, subsequently under-detecting plumes in low-coverage images. We evaluate multiple imputation approaches to remove the dependence between the coverage and a label. Additionally, we propose a weighted resampling scheme during training that removes the association between the label and the coverage by enforcing class balance in each coverage bin. Our results show that both resampling and imputation can significantly reduce the representation bias without hurting balanced accuracy, precision, or recall. Finally, we evaluate the capability of the debiased models using these techniques in an operational scenario and demonstrate that the debiased models have a higher chance of detecting plumes in low-coverage images.
@inproceedings{WasEtAl2025,
author = {Wąsala, J. and Maasakkers, J.D. and Aben, E.E.A. and Schneider, R. and Hoos, H.H. and Baratchi, M.},
title = {Mitigating Representation Bias Caused by Missing Pixels in Methane Plume Detection},
maintitle = {European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML-PKDD)},
booktitle = {Workshop on Machine Learning for Earth Observation}
year = {2025},
}