#!/usr/bin/env python from __future__ import absolute_import, division, print_function from warnings import warn import dask.array as dask_array import numpy as np import xarray as xr import xskillscore as xs from scipy import special from .common_utils import esmlab_xr_set_options def validate_weights(da, dim, weights): if not isinstance(weights, xr.DataArray): raise TypeError( f'You provided weights with type={type(weights)}.\n' 'Weights must be an xarray.DataArray with shape that is broadcastable \n' 'to shape= {da.data.shape} of da = {da}.' ) # if NaN are present, we need to use individual weights total_weights = weights.where(da.notnull()).sum(dim=dim) # Make sure weights add up to 1.0 rtol = 1e-6 if weights.dtype == np.float32 else 1e-7 np.testing.assert_allclose((weights / weights.sum(dim)).sum(dim), 1.0, rtol=rtol) return weights, total_weights def weighted_sum_da(da, dim=None, weights=None): """ Compute weighted mean for DataArray Parameters ---------- da : xarray.DataArray DataArray for which to compute `weighted mean` dim : str or sequence of str, optional Dimension(s) over which to apply mean. weights : xarray.DataArray or array-like weights to apply. Shape must be broadcastable to shape of da. Returns ------- reduced : xarray.DataArray New DataArray with mean applied to its data and the indicated dimension(s) removed. """ if dim is None: dim = list(da.dims) if weights is None: return da.sum(dim) else: weights, _ = validate_weights(da, dim, weights) return (da * weights).sum(dim) @esmlab_xr_set_options(arithmetic_join='exact', keep_attrs=True) def weighted_sum(data, dim=None, weights=None): """ Compute weighted sum for xarray objects Parameters ---------- data : xarray.Dataset or xarray.DataArray xarray object for which to compute `weighted sum` dim : str or sequence of str, optional Dimension(s) over which to apply sum. weights : xarray.DataArray or array-like weights to apply. Shape must be broadcastable to shape of data. Returns ------- reduced : xarray.Dataset or xarray.DataArray New xarray object with weighted sum applied to its data and the indicated dimension(s) removed. """ if isinstance(dim, str): dim = [dim] if isinstance(data, xr.DataArray): return weighted_sum_da(data, dim, weights) elif isinstance(data, xr.Dataset): return data.apply(weighted_sum_da, dim=dim, weights=weights) else: raise ValueError('Data must be an xarray Dataset or DataArray') def weighted_mean_da(da, dim=None, weights=None): """ Compute weighted mean for DataArray Parameters ---------- da : xarray.DataArray DataArray for which to compute `weighted mean` dim : str or sequence of str, optional Dimension(s) over which to apply mean. weights : xarray.DataArray or array-like weights to apply. Shape must be broadcastable to shape of da. Returns ------- reduced : xarray.DataArray New DataArray with mean applied to its data and the indicated dimension(s) removed. """ if dim is None: dim = list(da.dims) if weights is None: return da.mean(dim) elif all(d in da.dims for d in dim): weights, total_weights = validate_weights(da, dim, weights) return (da * weights).sum(dim) / total_weights else: return da @esmlab_xr_set_options(arithmetic_join='exact', keep_attrs=True) def weighted_mean(data, dim=None, weights=None): """ Compute weighted mean for xarray objects Parameters ---------- data : xarray.Dataset or xarray.DataArray xarray object for which to compute `weighted mean` dim : str or sequence of str, optional Dimension(s) over which to apply mean. weights : xarray.DataArray or array-like weights to apply. Shape must be broadcastable to shape of data. Returns ------- reduced : xarray.Dataset or xarray.DataArray New xarray object with weighted mean applied to its data and the indicated dimension(s) removed. """ if isinstance(dim, str): dim = [dim] if isinstance(data, xr.DataArray): return weighted_mean_da(data, dim, weights) elif isinstance(data, xr.Dataset): return data.apply(weighted_mean_da, dim=dim, weights=weights) else: raise ValueError('Data must be an xarray Dataset or DataArray') def weighted_std_da(da, dim=None, weights=None, ddof=0): """ Compute weighted standard deviation for DataArray Parameters ---------- da : xarray.DataArray DataArray for which to compute `weighted std` dim : str or sequence of str, optional Dimension(s) over which to apply standard deviation. weights : xarray.DataArray or array-like weights to apply. Shape must be broadcastable to shape of da. ddof : int, optional Delta Degrees of Freedom. By default ddof is zero. Returns ------- reduced : xarray.DataArray New DataArray with standard deviation applied to its data and the indicated dimension(s) removed. """ if dim is None: dim = list(da.dims) if weights is None: return da.std(dim) elif all(d in da.dims for d in dim): weights, total_weights = validate_weights(da, dim, weights) da_mean = weighted_mean_da(da, dim, weights) std = np.sqrt((weights * (da - da_mean) ** 2).sum(dim) / (total_weights - ddof)) return std else: return da @esmlab_xr_set_options(arithmetic_join='exact', keep_attrs=True) def weighted_std(data, dim=None, weights=None): """ Compute weighted standard deviation for xarray objects Parameters ---------- data : xarray.Dataset or xarray.DataArray xarray object for which to compute `weighted std` dim : str or sequence of str, optional Dimension(s) over which to apply standard deviation. weights : xarray.DataArray or array-like weights to apply. Shape must be broadcastable to shape of data. Returns ------- reduced : xarray.Dataset or xarray.DataArray New xarray object with weighted standard deviation applied to its data and the indicated dimension(s) removed. """ if isinstance(dim, str): dim = [dim] if isinstance(data, xr.DataArray): return weighted_std_da(data, dim, weights) elif isinstance(data, xr.Dataset): return data.apply(weighted_std_da, dim=dim, weights=weights) else: raise ValueError('Data must be an xarray Dataset or DataArray') @esmlab_xr_set_options(arithmetic_join='exact', keep_attrs=True) def weighted_rmsd(x, y, dim=None, weights=None): """ Compute weighted root mean square deviation between two xarray DataArrays Parameters ---------- x, y : xarray.DataArray xarray DataArray for which to compute `weighted_rmsd`. dim : str or sequence of str, optional Dimension(s) over which to apply rmsd. weights : xarray.DataArray or array-like weights to apply. Shape must be broadcastable to shape of data. Returns ------- reduced : xarray.DataArray New DataArray with root mean square deviation applied to x, y and the indicated dimension(s) removed. """ if isinstance(dim, str): dim = [dim] if not isinstance(x, xr.DataArray) or not isinstance(y, xr.DataArray): raise ValueError('x and y must be xarray DataArrays') dev = (x - y) ** 2 dev_mean = weighted_mean(dev, dim, weights) return np.sqrt(dev_mean) @esmlab_xr_set_options(arithmetic_join='exact', keep_attrs=True) def weighted_cov(x, y, dim=None, weights=None): """ Compute weighted covariance between two xarray DataArrays. Parameters ---------- x, y : xarray.DataArray xarray DataArrays for which to compute `weighted covariance`. dim : str or sequence of str, optional Dimension(s) over which to apply covariance. weights : xarray.DataArray or array-like weights to apply. Shape must be broadcastable to shape of data. Returns ------- reduced : DataArray New DataArray with covariance applied to x, y and the indicated dimension(s) removed. """ if isinstance(dim, str): dim = [dim] if not isinstance(x, xr.DataArray) or not isinstance(y, xr.DataArray): raise ValueError('x and y must be xarray DataArrays') mean_x = weighted_mean(x, dim, weights) mean_y = weighted_mean(y, dim, weights) dev_x = x - mean_x dev_y = y - mean_y dev_xy = dev_x * dev_y cov_xy = weighted_mean(dev_xy, dim, weights) return cov_xy @esmlab_xr_set_options(arithmetic_join='exact') def weighted_corr(x, y, dim=None, weights=None, return_p=True): """ Compute weighted correlation between two `xarray.DataArray`. Parameters ---------- x, y : xarray.DataArray xarray DataArrays for which to compute `weighted correlation`. dim : str or sequence of str, optional Dimension(s) over which to apply correlation. weights : xarray.DataArray or array-like weights to apply. Shape must be broadcastable to shape of data. return_p : bool, default: True If True, compute and return the p-value(s) associated with the correlation. Returns ------- reduced : xarray.DataArray New DataArray with correlation applied to x, y and the indicated dimension(s) removed. If `return_p` is True, appends the resulting p values to the returned Dataset. """ if isinstance(dim, str): dim = [dim] if not isinstance(x, xr.DataArray) or not isinstance(y, xr.DataArray): raise ValueError('x and y must be xarray DataArrays') valid_values = x.notnull() & y.notnull() x = x.where(valid_values) y = y.where(valid_values) numerator = weighted_cov(x, y, dim, weights) denominator = np.sqrt(weighted_cov(x, x, dim, weights) * weighted_cov(y, y, dim, weights)) corr_xy = numerator / denominator if return_p: p = compute_corr_significance(corr_xy, len(x)) corr_xy.name = 'r' p.name = 'p' return xr.merge([corr_xy, p]) else: return corr_xy @esmlab_xr_set_options(arithmetic_join='exact', keep_attrs=True) def compute_corr_significance(r, N): """ Compute statistical significance for a pearson correlation between two xarray objects. Parameters ---------- r : `xarray.DataArray` object correlation coefficient between two time series. N : int length of time series being correlated. Returns ------- pval : float p value for pearson correlation. """ df = N - 2 t_squared = r ** 2 * (df / ((1.0 - r) * (1.0 + r))) # method used in scipy, where `np.fmin` constrains values to be # below 1 due to errors in floating point arithmetic. pval = special.betainc(0.5 * df, 0.5, np.fmin(df / (df + t_squared), 1.0)) return xr.DataArray(pval, coords=t_squared.coords, dims=t_squared.dims) def rmse(x, y, dim): """ Compute Root Mean Squared Error. Parameters ---------- x : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars Mix of labeled and/or unlabeled arrays to which to apply the function. y : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars Mix of labeled and/or unlabeled arrays to which to apply the function. dim : str The dimension to apply the correlation along. Returns ------- Root Mean Squared Error Single value or tuple of Dataset, DataArray, Variable, dask.array.Array or numpy.ndarray, the first type on that list to appear on an input. """ return xs.rmse(x, y, dim) def mse(x, y, dim): """ Compute Mean Squared Error. Parameters ---------- x : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars Mix of labeled and/or unlabeled arrays to which to apply the function. y : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars Mix of labeled and/or unlabeled arrays to which to apply the function. dim : str The dimension to apply the correlation along. Returns ------- Mean Squared Error Single value or tuple of Dataset, DataArray, Variable, dask.array.Array or numpy.ndarray, the first type on that list to appear on an input. """ return xs.mse(x, y, dim) def mae(x, y, dim): """ Compute Mean Absolute Error. Parameters ---------- x : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars Mix of labeled and/or unlabeled arrays to which to apply the function. y : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars Mix of labeled and/or unlabeled arrays to which to apply the function. dim : str The dimension to apply the correlation along. Returns ------- Mean Absolute Error Single value or tuple of Dataset, DataArray, Variable, dask.array.Array or numpy.ndarray, the first type on that list to appear on an input. """ return xs.mae(x, y, dim)