Skip to content

Utility Functions

Helper functions and data processing utilities for statistical analysis.

Utility Functions for Statistics (Aero Protocol Compliant).

angular_difference(angle1, angle2, units='degrees')

Calculate the smallest angular difference between two angles (Aero Protocol).

Backend-agnostic (supports NumPy and Xarray/Dask).

Parameters

angle1 : ArrayLike First angle(s). angle2 : ArrayLike Second angle(s). units : str, optional Units of angles ('degrees' or 'radians'). Default is 'degrees'.

Returns

Any Smallest angular difference between the two angles.

Source code in src/monet_stats/utils_stats.py
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
def angular_difference(angle1: ArrayLike, angle2: ArrayLike, units: str = "degrees") -> Any:
    """
    Calculate the smallest angular difference between two angles (Aero Protocol).

    Backend-agnostic (supports NumPy and Xarray/Dask).

    Parameters
    ----------
    angle1 : ArrayLike
        First angle(s).
    angle2 : ArrayLike
        Second angle(s).
    units : str, optional
        Units of angles ('degrees' or 'radians'). Default is 'degrees'.

    Returns
    -------
    Any
        Smallest angular difference between the two angles.
    """
    if units == "degrees":
        max_val = 360.0
    elif units == "radians":
        max_val = 2 * np.pi
    else:
        raise ValueError("units must be 'degrees' or 'radians'")

    if isinstance(angle1, (xr.DataArray, xr.Dataset)) or isinstance(angle2, (xr.DataArray, xr.Dataset)):
        if isinstance(angle1, (xr.DataArray, xr.Dataset)) and isinstance(angle2, (xr.DataArray, xr.Dataset)):
            angle1, angle2 = xr.align(angle1, angle2, join="inner")

        diff = abs(angle1 - angle2)
        result = xr.where(diff > max_val / 2, max_val - diff, diff)
        return _update_history(result, "angular_difference")

    angle1_arr = np.asanyarray(angle1)
    angle2_arr = np.asanyarray(angle2)
    diff = np.abs(angle1_arr - angle2_arr)
    return np.minimum(diff, max_val - diff)

circlebias(b)

Circular bias (wrapped to [-180, 180] degrees) (Aero Protocol).

Handles both dense and masked arrays, as well as Xarray/Dask objects.

Parameters

b : ArrayLike Difference between two wind directions (degrees).

Returns

Any Circularly wrapped difference (degrees).

Examples

circlebias(190) -170 circlebias(-190) 170

Source code in src/monet_stats/utils_stats.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
def circlebias(b: ArrayLike) -> Any:
    """
    Circular bias (wrapped to [-180, 180] degrees) (Aero Protocol).

    Handles both dense and masked arrays, as well as Xarray/Dask objects.

    Parameters
    ----------
    b : ArrayLike
        Difference between two wind directions (degrees).

    Returns
    -------
    Any
        Circularly wrapped difference (degrees).

    Examples
    --------
    >>> circlebias(190)
    -170
    >>> circlebias(-190)
    170
    """
    if isinstance(b, (xr.DataArray, xr.Dataset)):
        res = (b + 180) % 360 - 180
        return _update_history(res, "circlebias")

    # Handle both dense and masked numpy arrays
    b_arr = np.ma.masked_invalid(b)
    res_arr = (b_arr + 180) % 360 - 180

    if not np.ma.is_masked(res_arr) and not isinstance(b, np.ma.MaskedArray):
        if np.issubdtype(res_arr.dtype, np.floating):
            return res_arr.filled(np.nan)
        return np.ma.getdata(res_arr)

    return res_arr

circlebias_m(b)

Robust circular bias for wind direction (Alias for circlebias).

Parameters

b : ArrayLike Difference between two wind directions (degrees).

Returns

Any Circularly wrapped difference.

Source code in src/monet_stats/utils_stats.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def circlebias_m(b: ArrayLike) -> Any:
    """
    Robust circular bias for wind direction (Alias for circlebias).

    Parameters
    ----------
    b : ArrayLike
        Difference between two wind directions (degrees).

    Returns
    -------
    Any
        Circularly wrapped difference.
    """
    return circlebias(b)

correlation(x, y, axis=None)

Calculate Pearson correlation coefficient (Alias for correlation_metrics.pearsonr).

Parameters

x : Union[np.ndarray, xr.DataArray] First variable. y : Union[np.ndarray, xr.DataArray] Second variable. axis : Union[int, str, Iterable], optional Axis along which to compute correlation.

Returns

Union[np.number, np.ndarray, xr.DataArray] Pearson correlation coefficient.

Raises

ValueError If input arrays are empty.

Source code in src/monet_stats/utils_stats.py
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
def correlation(
    x: Union[np.ndarray, xr.DataArray],
    y: Union[np.ndarray, xr.DataArray],
    axis: Optional[Union[int, str, Iterable[Union[int, str]]]] = None,
) -> Union[np.number, np.ndarray, xr.DataArray]:
    """
    Calculate Pearson correlation coefficient (Alias for correlation_metrics.pearsonr).

    Parameters
    ----------
    x : Union[np.ndarray, xr.DataArray]
        First variable.
    y : Union[np.ndarray, xr.DataArray]
        Second variable.
    axis : Union[int, str, Iterable], optional
        Axis along which to compute correlation.

    Returns
    -------
    Union[np.number, np.ndarray, xr.DataArray]
        Pearson correlation coefficient.

    Raises
    ------
    ValueError
        If input arrays are empty.
    """
    if hasattr(x, "size") and x.size == 0:
        raise ValueError("Input arrays cannot be empty")
    if hasattr(y, "size") and y.size == 0:
        raise ValueError("Input arrays cannot be empty")

    from .correlation_metrics import pearsonr

    res = pearsonr(x, y, axis=axis)
    if isinstance(res, (xr.DataArray, xr.Dataset)):
        return _update_history(res, "correlation")
    return res

mae(obs, mod, axis=None)

Calculate Mean Absolute Error (Alias for error_metrics.MAE).

Parameters

obs : Union[np.ndarray, xr.DataArray] Observed values. mod : Union[np.ndarray, xr.DataArray] Model or predicted values. axis : Union[int, str, Iterable], optional Axis along which to compute MAE.

Returns

Union[np.number, np.ndarray, xr.DataArray] Mean absolute error.

Source code in src/monet_stats/utils_stats.py
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
def mae(
    obs: Union[np.ndarray, xr.DataArray],
    mod: Union[np.ndarray, xr.DataArray],
    axis: Optional[Union[int, str, Iterable[Union[int, str]]]] = None,
) -> Union[np.number, np.ndarray, xr.DataArray]:
    """
    Calculate Mean Absolute Error (Alias for error_metrics.MAE).

    Parameters
    ----------
    obs : Union[np.ndarray, xr.DataArray]
        Observed values.
    mod : Union[np.ndarray, xr.DataArray]
        Model or predicted values.
    axis : Union[int, str, Iterable], optional
        Axis along which to compute MAE.

    Returns
    -------
    Union[np.number, np.ndarray, xr.DataArray]
        Mean absolute error.
    """
    from .error_metrics import MAE

    res = MAE(obs, mod, axis=axis)
    if isinstance(res, (xr.DataArray, xr.Dataset)):
        return _update_history(res, "mae")
    return res

matchedcompressed(a1, a2)

Return compressed (non-masked) values from two matched arrays.

Note: For Xarray DataArrays, this function will trigger a computation if the data is Dask-backed, as it returns NumPy ndarrays. For lazy operations, prefer using Xarray-native methods with skipna=True.

Parameters

a1 : ArrayLike First input array. a2 : ArrayLike Second input array.

Returns

Tuple[np.ndarray, np.ndarray] Tuple of (a1_compressed, a2_compressed), both 1D arrays of valid values.

Examples

import numpy as np a = np.array([1, np.nan, 3]) b = np.array([4, 5, 6]) matchedcompressed(a, b) (array([1., 3.]), array([4., 6.]))

Source code in src/monet_stats/utils_stats.py
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def matchedcompressed(a1: ArrayLike, a2: ArrayLike) -> Tuple[np.ndarray, np.ndarray]:
    """
    Return compressed (non-masked) values from two matched arrays.

    Note: For Xarray DataArrays, this function will trigger a computation if
    the data is Dask-backed, as it returns NumPy ndarrays. For lazy operations,
    prefer using Xarray-native methods with `skipna=True`.

    Parameters
    ----------
    a1 : ArrayLike
        First input array.
    a2 : ArrayLike
        Second input array.

    Returns
    -------
    Tuple[np.ndarray, np.ndarray]
        Tuple of (a1_compressed, a2_compressed), both 1D arrays of valid values.

    Examples
    --------
    >>> import numpy as np
    >>> a = np.array([1, np.nan, 3])
    >>> b = np.array([4, 5, 6])
    >>> matchedcompressed(a, b)
    (array([1., 3.]), array([4., 6.]))
    """
    # Handle Xarray objects by extracting values (triggers computation for Dask)
    if isinstance(a1, (xr.DataArray, xr.Dataset)):
        if hasattr(a1, "data") and hasattr(a1.data, "chunks"):
            warnings.warn(
                "matchedcompressed triggered computation on Dask-backed array. "
                "Consider using backend-agnostic xarray operations instead.",
                UserWarning,
                stacklevel=2,
            )
        a1 = a1.values
    if isinstance(a2, (xr.DataArray, xr.Dataset)):
        if hasattr(a2, "data") and hasattr(a2.data, "chunks"):
            warnings.warn(
                "matchedcompressed triggered computation on Dask-backed array. "
                "Consider using backend-agnostic xarray operations instead.",
                UserWarning,
                stacklevel=2,
            )
        a2 = a2.values

    # Convert to masked arrays to handle existing masks and NaNs
    a1_m = np.ma.masked_invalid(a1)
    a2_m = np.ma.masked_invalid(a2)

    # Handle mismatched shapes by truncating both to the minimum size
    if a1_m.shape != a2_m.shape:
        min_size = min(a1_m.size, a2_m.size)
        a1_m = a1_m.flat[:min_size]
        a2_m = a2_m.flat[:min_size]

    mask = np.ma.getmaskarray(a1_m) | np.ma.getmaskarray(a2_m)
    a1_matched = np.ma.masked_where(mask, a1_m)
    a2_matched = np.ma.masked_where(mask, a2_m)

    return a1_matched.compressed(), a2_matched.compressed()

matchmasks(a1, a2)

Match and combine masks from two arrays or align Xarray objects (Aero Protocol).

Parameters

a1 : Any First input array or DataArray. a2 : Any Second input array or DataArray.

Returns

Tuple[Any, Any] Tuple of (a1_matched, a2_matched).

Source code in src/monet_stats/utils_stats.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def matchmasks(a1: Any, a2: Any) -> Tuple[Any, Any]:
    """
    Match and combine masks from two arrays or align Xarray objects (Aero Protocol).

    Parameters
    ----------
    a1 : Any
        First input array or DataArray.
    a2 : Any
        Second input array or DataArray.

    Returns
    -------
    Tuple[Any, Any]
        Tuple of (a1_matched, a2_matched).
    """
    if isinstance(a1, (xr.DataArray, xr.Dataset)) and isinstance(a2, (xr.DataArray, xr.Dataset)):
        return xr.align(a1, a2, join="inner")

    a1_arr = np.asanyarray(a1)
    a2_arr = np.asanyarray(a2)

    # Unify mask handling for numpy
    a1_m = np.ma.masked_invalid(a1_arr)
    a2_m = np.ma.masked_invalid(a2_arr)

    common_mask = np.ma.getmaskarray(a1_m) | np.ma.getmaskarray(a2_m)
    return np.ma.masked_where(common_mask, a1_m), np.ma.masked_where(common_mask, a2_m)

rmse(obs, mod, axis=None)

Calculate Root Mean Square Error (Alias for error_metrics.RMSE).

Parameters

obs : Union[np.ndarray, xr.DataArray] Observed values. mod : Union[np.ndarray, xr.DataArray] Model or predicted values. axis : Union[int, str, Iterable], optional Axis along which to compute RMSE.

Returns

Union[np.number, np.ndarray, xr.DataArray] Root mean square error.

Source code in src/monet_stats/utils_stats.py
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
def rmse(
    obs: Union[np.ndarray, xr.DataArray],
    mod: Union[np.ndarray, xr.DataArray],
    axis: Optional[Union[int, str, Iterable[Union[int, str]]]] = None,
) -> Union[np.number, np.ndarray, xr.DataArray]:
    """
    Calculate Root Mean Square Error (Alias for error_metrics.RMSE).

    Parameters
    ----------
    obs : Union[np.ndarray, xr.DataArray]
        Observed values.
    mod : Union[np.ndarray, xr.DataArray]
        Model or predicted values.
    axis : Union[int, str, Iterable], optional
        Axis along which to compute RMSE.

    Returns
    -------
    Union[np.number, np.ndarray, xr.DataArray]
        Root mean square error.
    """
    from .error_metrics import RMSE

    res = RMSE(obs, mod, axis=axis)
    if isinstance(res, (xr.DataArray, xr.Dataset)):
        return _update_history(res, "rmse")
    return res