@article{mahecha2020earth, type = {article}, key = {mahecha2020earth}, title = {Earth system data cubes unravel global multivariate dynamics}, author = {Miguel D. Mahecha and Fabian Gans and Gunnar Brandt and Rune Christiansen and Sarah E. Cornell and Normann Fomferra and Guido Kraemer and Jonas Peters and Paul Bodesheim and Gustau Camps-Valls and Jonathan F. Donges and Wouter Dorigo and Lina M. Estupinan-Suarez and Victor H. Gutierrez-Velez and Martin Gutwin and Martin Jung and Maria C. Londono and Diego Miralles and Phillip Papastefanou and Markus Reichstein}, journal = {Earth System Dynamics}, year = {2020}, number = {1}, pages = {201-234}, volume = {11}, abstract = {Understanding Earth system dynamics in light of ongoing human intervention and dependency remains a major scientific challenge. The unprecedented availability of data streams describing different facets of the Earth now offers fundamentally new avenues to address this quest. However, several practical hurdles, especially the lack of data interoperability, limit the joint potential of these data streams. Today, many initiatives within and beyond the Earth system sciences are exploring new approaches to overcome these hurdles and meet the growing interdisciplinary need for data-intensive research; using data cubes is one promising avenue. Here, we introduce the concept of Earth system data cubes and how to operate on them in a formal way. The idea is that treating multiple data dimensions, such as spatial, temporal, variable, frequency, and other grids alike, allows effective application of user-defined functions to co-interpret Earth observations and/or model–data integration. An implementation of this concept combines analysis-ready data cubes with a suitable analytic interface. In three case studies, we demonstrate how the concept and its implementation facilitate the execution of complex workflows for research across multiple variables, and spatial and temporal scales: (1) summary statistics for ecosystem and climate dynamics; (2) intrinsic dimensionality analysis on multiple timescales; and (3) model–data integration. We discuss the emerging perspectives for investigating global interacting and coupled phenomena in observed or simulated data. In particular, we see many emerging perspectives of this approach for interpreting large-scale model ensembles. The latest developments in machine learning, causal inference, and model–data integration can be seamlessly implemented in the proposed framework, supporting rapid progress in data-intensive research across disciplinary boundaries.}, doi = {10.5194/esd-11-201-2020}, owner = {paul}, timestamp = {2021.01.07}, url = {https://dx.doi.org/10.5194/esd-11-201-2020}, }