From 33921941b90898f0523777dbc98a215dc8091659 Mon Sep 17 00:00:00 2001 From: Ewan Short Date: Fri, 21 Nov 2025 13:17:51 +1100 Subject: [PATCH 1/8] add nunique GH9548 --- doc/api/dataarray.rst | 1 + doc/api/dataset.rst | 1 + doc/api/datatree.rst | 1 + doc/whats-new.rst | 4 + xarray/core/_aggregations.py | 2130 ++++++++------------------ xarray/core/duck_array_ops.py | 46 +- xarray/namedarray/_aggregations.py | 157 +- xarray/tests/test_dataarray.py | 39 + xarray/tests/test_dataset.py | 61 + xarray/tests/test_datatree.py | 12 + xarray/tests/test_duck_array_ops.py | 28 + xarray/util/generate_aggregations.py | 1 + 12 files changed, 909 insertions(+), 1572 deletions(-) diff --git a/doc/api/dataarray.rst b/doc/api/dataarray.rst index 9d4e81c8677..8e4c2e77e11 100644 --- a/doc/api/dataarray.rst +++ b/doc/api/dataarray.rst @@ -162,6 +162,7 @@ Aggregation DataArray.min DataArray.mean DataArray.median + DataArray.nunique DataArray.prod DataArray.sum DataArray.std diff --git a/doc/api/dataset.rst b/doc/api/dataset.rst index 733c9768d2f..0c8e1e49679 100644 --- a/doc/api/dataset.rst +++ b/doc/api/dataset.rst @@ -169,6 +169,7 @@ Aggregation Dataset.min Dataset.mean Dataset.median + Dataset.nunique Dataset.prod Dataset.sum Dataset.std diff --git a/doc/api/datatree.rst b/doc/api/datatree.rst index 8501440b7d7..487e47c5927 100644 --- a/doc/api/datatree.rst +++ b/doc/api/datatree.rst @@ -266,6 +266,7 @@ Aggregate data in all nodes in the subtree simultaneously. DataTree.min DataTree.mean DataTree.median + DataTree.nunique DataTree.prod DataTree.sum DataTree.std diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 255f88d241e..8062d65f1f3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -17,6 +17,10 @@ New Features - :py:func:`combine_nested` now support :py:class:`DataTree` objects (:pull:`10849`). By `Stephan Hoyer `_. +- Add :py:func:`nunique` reduction function (:issue:`9548`), which behaves like + :py:func:`pandas.DataFrame.nunique` applied along specific dimensions. + By `Ewan Short `_. + Breaking Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index adc064840de..7f4cc34a323 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -88,21 +88,8 @@ def count( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.count() - - Group: / - Dimensions: () - Data variables: - foo int64 8B 5 """ return self.reduce( duck_array_ops.count, @@ -171,21 +158,8 @@ def all( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.all() - - Group: / - Dimensions: () - Data variables: - foo bool 1B False """ return self.reduce( duck_array_ops.array_all, @@ -254,21 +228,8 @@ def any( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.any() - - Group: / - Dimensions: () - Data variables: - foo bool 1B True """ return self.reduce( duck_array_ops.array_any, @@ -338,30 +299,12 @@ def max( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.max() - - Group: / - Dimensions: () - Data variables: - foo float64 8B 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> dt.max(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo float64 8B nan """ return self.reduce( duck_array_ops.max, @@ -432,30 +375,12 @@ def min( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.min() - - Group: / - Dimensions: () - Data variables: - foo float64 8B 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> dt.min(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo float64 8B nan """ return self.reduce( duck_array_ops.min, @@ -513,7 +438,7 @@ def mean( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -530,30 +455,12 @@ def mean( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.mean() - - Group: / - Dimensions: () - Data variables: - foo float64 8B 1.6 Use ``skipna`` to control whether NaNs are ignored. >>> dt.mean(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo float64 8B nan """ return self.reduce( duck_array_ops.mean, @@ -618,7 +525,7 @@ def prod( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -635,39 +542,16 @@ def prod( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.prod() - - Group: / - Dimensions: () - Data variables: - foo float64 8B 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> dt.prod(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo float64 8B nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> dt.prod(skipna=True, min_count=2) - - Group: / - Dimensions: () - Data variables: - foo float64 8B 0.0 """ return self.reduce( duck_array_ops.prod, @@ -733,7 +617,7 @@ def sum( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -750,39 +634,16 @@ def sum( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.sum() - - Group: / - Dimensions: () - Data variables: - foo float64 8B 8.0 Use ``skipna`` to control whether NaNs are ignored. >>> dt.sum(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo float64 8B nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> dt.sum(skipna=True, min_count=2) - - Group: / - Dimensions: () - Data variables: - foo float64 8B 8.0 """ return self.reduce( duck_array_ops.sum, @@ -845,7 +706,7 @@ def std( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -862,39 +723,16 @@ def std( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.std() - - Group: / - Dimensions: () - Data variables: - foo float64 8B 1.02 Use ``skipna`` to control whether NaNs are ignored. >>> dt.std(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo float64 8B nan Specify ``ddof=1`` for an unbiased estimate. >>> dt.std(skipna=True, ddof=1) - - Group: / - Dimensions: () - Data variables: - foo float64 8B 1.14 """ return self.reduce( duck_array_ops.std, @@ -957,7 +795,7 @@ def var( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -974,39 +812,16 @@ def var( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.var() - - Group: / - Dimensions: () - Data variables: - foo float64 8B 1.04 Use ``skipna`` to control whether NaNs are ignored. >>> dt.var(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo float64 8B nan Specify ``ddof=1`` for an unbiased estimate. >>> dt.var(skipna=True, ddof=1) - - Group: / - Dimensions: () - Data variables: - foo float64 8B 1.3 """ return self.reduce( duck_array_ops.var, @@ -1065,7 +880,7 @@ def median( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -1082,30 +897,12 @@ def median( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.median() - - Group: / - Dimensions: () - Data variables: - foo float64 8B 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> dt.median(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo float64 8B nan """ return self.reduce( duck_array_ops.median, @@ -1116,6 +913,81 @@ def median( **kwargs, ) + def nunique( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this DataTree's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + DataArray.nunique + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt + + >>> dt.nunique() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> dt.nunique(skipna=False) + """ + return self.reduce( + duck_array_ops.nunique, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + def cumsum( self, dim: Dims = None, @@ -1164,7 +1036,7 @@ def cumsum( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -1185,32 +1057,12 @@ def cumsum( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.cumsum() - - Group: / - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - foo (time) float64 48B 1.0 3.0 6.0 6.0 8.0 8.0 Use ``skipna`` to control whether NaNs are ignored. >>> dt.cumsum(skipna=False) - - Group: / - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - foo (time) float64 48B 1.0 3.0 6.0 6.0 8.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -1269,7 +1121,7 @@ def cumprod( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -1290,32 +1142,12 @@ def cumprod( ... ), ... ) >>> dt - - Group: / - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> dt.cumprod() - - Group: / - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - foo (time) float64 48B 1.0 2.0 6.0 0.0 0.0 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> dt.cumprod(skipna=False) - - Group: / - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - foo (time) float64 48B 1.0 2.0 6.0 0.0 0.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -1392,19 +1224,8 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.count() - Size: 8B - Dimensions: () - Data variables: - da int64 8B 5 """ return self.reduce( duck_array_ops.count, @@ -1464,19 +1285,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 78B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.all() - Size: 1B - Dimensions: () - Data variables: - da bool 1B False """ return self.reduce( duck_array_ops.array_all, @@ -1536,19 +1346,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 78B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.any() - Size: 1B - Dimensions: () - Data variables: - da bool 1B True """ return self.reduce( duck_array_ops.array_any, @@ -1614,27 +1413,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.max() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.max(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan """ return self.reduce( duck_array_ops.max, @@ -1701,27 +1485,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.min() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.min(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan """ return self.reduce( duck_array_ops.min, @@ -1776,6 +1545,10 @@ def mean( :ref:`agg` User guide on reduction or aggregation operations. + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -1788,27 +1561,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.mean() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 1.6 Use ``skipna`` to control whether NaNs are ignored. >>> ds.mean(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan """ return self.reduce( duck_array_ops.mean, @@ -1872,7 +1630,7 @@ def prod( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -1886,35 +1644,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.prod() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.prod(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.prod(skipna=True, min_count=2) - Size: 8B - Dimensions: () - Data variables: - da float64 8B 0.0 """ return self.reduce( duck_array_ops.prod, @@ -1979,7 +1718,7 @@ def sum( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -1993,35 +1732,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.sum() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 8.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.sum(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.sum(skipna=True, min_count=2) - Size: 8B - Dimensions: () - Data variables: - da float64 8B 8.0 """ return self.reduce( duck_array_ops.sum, @@ -2083,7 +1803,7 @@ def std( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -2097,35 +1817,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.std() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 1.02 Use ``skipna`` to control whether NaNs are ignored. >>> ds.std(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.std(skipna=True, ddof=1) - Size: 8B - Dimensions: () - Data variables: - da float64 8B 1.14 """ return self.reduce( duck_array_ops.std, @@ -2187,7 +1888,7 @@ def var( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -2201,35 +1902,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.var() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 1.04 Use ``skipna`` to control whether NaNs are ignored. >>> ds.var(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.var(skipna=True, ddof=1) - Size: 8B - Dimensions: () - Data variables: - da float64 8B 1.3 """ return self.reduce( duck_array_ops.var, @@ -2287,7 +1969,7 @@ def median( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -2301,27 +1983,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.median() - Size: 8B - Dimensions: () - Data variables: - da float64 8B 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.median(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan """ return self.reduce( duck_array_ops.median, @@ -2332,6 +1999,77 @@ def median( **kwargs, ) + def nunique( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this Dataset's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + DataArray.nunique + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + >>> ds.nunique() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.nunique(skipna=False) + """ + return self.reduce( + duck_array_ops.nunique, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + def cumsum( self, dim: Dims = None, @@ -2379,7 +2117,7 @@ def cumsum( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -2397,29 +2135,12 @@ def cumsum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.cumsum() - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 3.0 6.0 6.0 8.0 8.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.cumsum(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 3.0 6.0 6.0 8.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -2477,7 +2198,7 @@ def cumprod( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -2495,29 +2216,12 @@ def cumprod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.cumprod() - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 6.0 0.0 0.0 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.cumprod(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 6.0 0.0 0.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -2593,15 +2297,8 @@ def count( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.count() - Size: 8B - array(5) """ return self.reduce( duck_array_ops.count, @@ -2659,15 +2356,8 @@ def all( ... ), ... ) >>> da - Size: 6B - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.all() - Size: 1B - array(False) """ return self.reduce( duck_array_ops.array_all, @@ -2725,15 +2415,8 @@ def any( ... ), ... ) >>> da - Size: 6B - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.any() - Size: 1B - array(True) """ return self.reduce( duck_array_ops.array_any, @@ -2797,21 +2480,12 @@ def max( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.max() - Size: 8B - array(3.) Use ``skipna`` to control whether NaNs are ignored. >>> da.max(skipna=False) - Size: 8B - array(nan) """ return self.reduce( duck_array_ops.max, @@ -2876,21 +2550,12 @@ def min( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.min() - Size: 8B - array(0.) Use ``skipna`` to control whether NaNs are ignored. >>> da.min(skipna=False) - Size: 8B - array(nan) """ return self.reduce( duck_array_ops.min, @@ -2944,6 +2609,10 @@ def mean( :ref:`agg` User guide on reduction or aggregation operations. + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -2955,21 +2624,12 @@ def mean( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.mean() - Size: 8B - array(1.6) Use ``skipna`` to control whether NaNs are ignored. >>> da.mean(skipna=False) - Size: 8B - array(nan) """ return self.reduce( duck_array_ops.mean, @@ -3032,7 +2692,7 @@ def prod( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -3045,27 +2705,16 @@ def prod( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.prod() - Size: 8B - array(0.) Use ``skipna`` to control whether NaNs are ignored. >>> da.prod(skipna=False) - Size: 8B - array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.prod(skipna=True, min_count=2) - Size: 8B - array(0.) """ return self.reduce( duck_array_ops.prod, @@ -3129,7 +2778,7 @@ def sum( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -3142,27 +2791,16 @@ def sum( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.sum() - Size: 8B - array(8.) Use ``skipna`` to control whether NaNs are ignored. >>> da.sum(skipna=False) - Size: 8B - array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.sum(skipna=True, min_count=2) - Size: 8B - array(8.) """ return self.reduce( duck_array_ops.sum, @@ -3223,7 +2861,7 @@ def std( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -3236,27 +2874,16 @@ def std( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.std() - Size: 8B - array(1.0198039) Use ``skipna`` to control whether NaNs are ignored. >>> da.std(skipna=False) - Size: 8B - array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.std(skipna=True, ddof=1) - Size: 8B - array(1.14017543) """ return self.reduce( duck_array_ops.std, @@ -3317,7 +2944,7 @@ def var( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -3330,27 +2957,16 @@ def var( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.var() - Size: 8B - array(1.04) Use ``skipna`` to control whether NaNs are ignored. >>> da.var(skipna=False) - Size: 8B - array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.var(skipna=True, ddof=1) - Size: 8B - array(1.3) """ return self.reduce( duck_array_ops.var, @@ -3407,7 +3023,7 @@ def median( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -3420,21 +3036,12 @@ def median( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.median() - Size: 8B - array(2.) Use ``skipna`` to control whether NaNs are ignored. >>> da.median(skipna=False) - Size: 8B - array(nan) """ return self.reduce( duck_array_ops.median, @@ -3444,6 +3051,75 @@ def median( **kwargs, ) + def nunique( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this DataArray's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + >>> da.nunique() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.nunique(skipna=False) + """ + return self.reduce( + duck_array_ops.nunique, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + def cumsum( self, dim: Dims = None, @@ -3491,7 +3167,7 @@ def cumsum( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -3508,27 +3184,12 @@ def cumsum( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.cumsum() - Size: 48B - array([1., 3., 6., 6., 8., 8.]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.cumsum(skipna=False) - Size: 48B - array([ 1., 3., 6., 6., 8., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.cumprod() - Size: 48B - array([1., 2., 6., 0., 0., 0.]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.cumprod(skipna=False) - Size: 48B - array([ 1., 2., 6., 0., 0., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").count() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) int64 24B 1 2 2 """ if ( flox_available @@ -3809,21 +3442,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 78B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").all() - Size: 27B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) bool 3B False True True """ if ( flox_available @@ -3905,21 +3525,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 78B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").any() - Size: 27B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) bool 3B True True True """ if ( flox_available @@ -4007,31 +3614,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").max() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 1.0 2.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").max(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 2.0 3.0 """ if ( flox_available @@ -4121,31 +3709,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").min() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 1.0 2.0 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").min(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 2.0 0.0 """ if ( flox_available @@ -4223,6 +3792,8 @@ def mean( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -4235,31 +3806,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").mean() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 1.0 2.0 1.5 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").mean(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 2.0 1.5 """ if ( flox_available @@ -4344,7 +3896,7 @@ def prod( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -4358,41 +3910,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").prod() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 1.0 4.0 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").prod(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 4.0 0.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").prod(skipna=True, min_count=2) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 4.0 0.0 """ if ( flox_available @@ -4479,7 +4006,7 @@ def sum( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -4493,41 +4020,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").sum() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 1.0 4.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").sum(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 4.0 3.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").sum(skipna=True, min_count=2) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 4.0 3.0 """ if ( flox_available @@ -4611,7 +4113,7 @@ def std( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -4625,41 +4127,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").std() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 0.0 0.0 1.5 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").std(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 0.0 1.5 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").std(skipna=True, ddof=1) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 0.0 2.121 """ if ( flox_available @@ -4743,7 +4220,7 @@ def var( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -4757,41 +4234,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").var() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 0.0 0.0 2.25 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").var(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 0.0 2.25 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").var(skipna=True, ddof=1) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 0.0 4.5 """ if ( flox_available @@ -4871,7 +4323,7 @@ def median( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -4885,31 +4337,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").median() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B 1.0 2.0 1.5 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").median(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 2.0 1.5 """ return self.reduce( duck_array_ops.median, @@ -4920,6 +4353,100 @@ def median( **kwargs, ) + def nunique( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Dataset: + """ + Reduce this Dataset's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + >>> ds.groupby("labels").nunique() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").nunique(skipna=False) + """ + if ( + flox_available + and OPTIONS["use_flox"] + and contains_only_chunked_or_numpy(self._obj) + ): + return self._flox_reduce( + func="nunique", + dim=dim, + skipna=skipna, + numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.nunique, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + def cumsum( self, dim: Dims = None, @@ -4973,7 +4500,7 @@ def cumsum( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -4991,29 +4518,12 @@ def cumsum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").cumsum() - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 3.0 3.0 4.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").cumsum(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 3.0 3.0 4.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -5077,7 +4587,7 @@ def cumprod( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -5095,29 +4605,12 @@ def cumprod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").cumprod() - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 3.0 0.0 4.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").cumprod(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 3.0 0.0 4.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -5209,21 +4702,8 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").count() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) int64 24B 1 3 1 """ if ( flox_available @@ -5305,21 +4785,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 78B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").all() - Size: 27B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool 3B True True False """ if ( flox_available @@ -5401,21 +4868,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 78B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").any() - Size: 27B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool 3B True True True """ if ( flox_available @@ -5503,31 +4957,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").max() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 3.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").max(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 3.0 nan """ if ( flox_available @@ -5617,31 +5052,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").min() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 0.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").min(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 0.0 nan """ if ( flox_available @@ -5719,6 +5135,8 @@ def mean( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -5731,31 +5149,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").mean() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 1.667 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").mean(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 1.667 nan """ if ( flox_available @@ -5840,7 +5239,7 @@ def prod( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -5854,41 +5253,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").prod() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 0.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").prod(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 0.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3ME").prod(skipna=True, min_count=2) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B nan 0.0 nan """ if ( flox_available @@ -5975,7 +5349,7 @@ def sum( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -5989,41 +5363,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").sum() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 5.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").sum(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 5.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3ME").sum(skipna=True, min_count=2) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B nan 5.0 nan """ if ( flox_available @@ -6107,7 +5456,7 @@ def std( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -6121,41 +5470,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").std() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 0.0 1.247 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").std(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 0.0 1.247 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3ME").std(skipna=True, ddof=1) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B nan 1.528 nan """ if ( flox_available @@ -6239,7 +5563,7 @@ def var( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -6253,41 +5577,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").var() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 0.0 1.556 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").var(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 0.0 1.556 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3ME").var(skipna=True, ddof=1) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B nan 2.333 nan """ if ( flox_available @@ -6367,7 +5666,7 @@ def median( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -6381,31 +5680,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").median() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").median(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 2.0 nan """ return self.reduce( duck_array_ops.median, @@ -6416,6 +5696,100 @@ def median( **kwargs, ) + def nunique( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Dataset: + """ + Reduce this Dataset's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + >>> ds.resample(time="3ME").nunique() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").nunique(skipna=False) + """ + if ( + flox_available + and OPTIONS["use_flox"] + and contains_only_chunked_or_numpy(self._obj) + ): + return self._flox_reduce( + func="nunique", + dim=dim, + skipna=skipna, + numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.nunique, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + def cumsum( self, dim: Dims = None, @@ -6469,7 +5843,7 @@ def cumsum( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -6487,29 +5861,12 @@ def cumsum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").cumsum() - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 5.0 5.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").cumsum(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 5.0 5.0 2.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -6573,7 +5930,7 @@ def cumprod( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -6591,29 +5948,12 @@ def cumprod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - Size: 120B - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3ME").cumprod() - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 6.0 0.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3ME").cumprod(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 6.0 0.0 2.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -6704,17 +6044,8 @@ def count( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").count() - Size: 24B - array([1, 2, 2]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6793,17 +6124,8 @@ def all( ... ), ... ) >>> da - Size: 6B - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").all() - Size: 3B - array([False, True, True]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6882,17 +6204,8 @@ def any( ... ), ... ) >>> da - Size: 6B - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").any() - Size: 3B - array([ True, True, True]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6977,25 +6290,12 @@ def max( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").max() - Size: 24B - array([1., 2., 3.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").max(skipna=False) - Size: 24B - array([nan, 2., 3.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7082,25 +6382,12 @@ def min( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").min() - Size: 24B - array([1., 2., 0.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").min(skipna=False) - Size: 24B - array([nan, 2., 0.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7176,6 +6463,8 @@ def mean( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -7187,25 +6476,12 @@ def mean( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").mean() - Size: 24B - array([1. , 2. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").mean(skipna=False) - Size: 24B - array([nan, 2. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7288,7 +6564,7 @@ def prod( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -7301,33 +6577,16 @@ def prod( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").prod() - Size: 24B - array([1., 4., 0.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").prod(skipna=False) - Size: 24B - array([nan, 4., 0.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").prod(skipna=True, min_count=2) - Size: 24B - array([nan, 4., 0.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7412,7 +6671,7 @@ def sum( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -7425,33 +6684,16 @@ def sum( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").sum() - Size: 24B - array([1., 4., 3.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").sum(skipna=False) - Size: 24B - array([nan, 4., 3.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").sum(skipna=True, min_count=2) - Size: 24B - array([nan, 4., 3.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7533,7 +6775,7 @@ def std( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -7546,33 +6788,16 @@ def std( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").std() - Size: 24B - array([0. , 0. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").std(skipna=False) - Size: 24B - array([nan, 0. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").std(skipna=True, ddof=1) - Size: 24B - array([ nan, 0. , 2.12132034]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7654,7 +6879,7 @@ def var( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -7667,33 +6892,16 @@ def var( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").var() - Size: 24B - array([0. , 0. , 2.25]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").var(skipna=False) - Size: 24B - array([ nan, 0. , 2.25]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").var(skipna=True, ddof=1) - Size: 24B - array([nan, 0. , 4.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7771,7 +6979,7 @@ def median( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -7784,25 +6992,12 @@ def median( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").median() - Size: 24B - array([1. , 2. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").median(skipna=False) - Size: 24B - array([nan, 2. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' """ return self.reduce( duck_array_ops.median, @@ -7812,6 +7007,97 @@ def median( **kwargs, ) + def nunique( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> DataArray: + """ + Reduce this DataArray's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + DataArray.nunique + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + >>> da.groupby("labels").nunique() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").nunique(skipna=False) + """ + if ( + flox_available + and OPTIONS["use_flox"] + and contains_only_chunked_or_numpy(self._obj) + ): + return self._flox_reduce( + func="nunique", + dim=dim, + skipna=skipna, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.nunique, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + def cumsum( self, dim: Dims = None, @@ -7865,7 +7151,7 @@ def cumsum( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -7882,27 +7168,12 @@ def cumsum( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").cumsum() - Size: 48B - array([1., 2., 3., 3., 4., 1.]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").cumsum(skipna=False) - Size: 48B - array([ 1., 2., 3., 3., 4., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -7982,27 +7253,12 @@ def cumprod( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").cumprod() - Size: 48B - array([1., 2., 3., 0., 4., 1.]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").cumprod(skipna=False) - Size: 48B - array([ 1., 2., 3., 0., 4., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").count() - Size: 24B - array([1, 3, 1]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -8181,17 +7428,8 @@ def all( ... ), ... ) >>> da - Size: 6B - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").all() - Size: 3B - array([ True, True, False]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -8270,17 +7508,8 @@ def any( ... ), ... ) >>> da - Size: 6B - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").any() - Size: 3B - array([ True, True, True]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -8365,25 +7594,12 @@ def max( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").max() - Size: 24B - array([1., 3., 2.]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").max(skipna=False) - Size: 24B - array([ 1., 3., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -8470,25 +7686,12 @@ def min( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").min() - Size: 24B - array([1., 0., 2.]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").min(skipna=False) - Size: 24B - array([ 1., 0., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -8564,6 +7767,8 @@ def mean( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -8575,25 +7780,12 @@ def mean( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").mean() - Size: 24B - array([1. , 1.66666667, 2. ]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").mean(skipna=False) - Size: 24B - array([1. , 1.66666667, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -8676,7 +7868,7 @@ def prod( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -8689,33 +7881,16 @@ def prod( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").prod() - Size: 24B - array([1., 0., 2.]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").prod(skipna=False) - Size: 24B - array([ 1., 0., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3ME").prod(skipna=True, min_count=2) - Size: 24B - array([nan, 0., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -8800,7 +7975,7 @@ def sum( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -8813,33 +7988,16 @@ def sum( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").sum() - Size: 24B - array([1., 5., 2.]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").sum(skipna=False) - Size: 24B - array([ 1., 5., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3ME").sum(skipna=True, min_count=2) - Size: 24B - array([nan, 5., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -8921,7 +8079,7 @@ def std( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -8934,33 +8092,16 @@ def std( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").std() - Size: 24B - array([0. , 1.24721913, 0. ]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").std(skipna=False) - Size: 24B - array([0. , 1.24721913, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3ME").std(skipna=True, ddof=1) - Size: 24B - array([ nan, 1.52752523, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -9042,7 +8183,7 @@ def var( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -9055,33 +8196,16 @@ def var( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").var() - Size: 24B - array([0. , 1.55555556, 0. ]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").var(skipna=False) - Size: 24B - array([0. , 1.55555556, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3ME").var(skipna=True, ddof=1) - Size: 24B - array([ nan, 2.33333333, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -9159,7 +8283,7 @@ def median( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- @@ -9172,25 +8296,12 @@ def median( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").median() - Size: 24B - array([1., 2., 2.]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3ME").median(skipna=False) - Size: 24B - array([ 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.median, @@ -9200,6 +8311,97 @@ def median( **kwargs, ) + def nunique( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> DataArray: + """ + Reduce this DataArray's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + DataArray.nunique + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + >>> da.resample(time="3ME").nunique() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3ME").nunique(skipna=False) + """ + if ( + flox_available + and OPTIONS["use_flox"] + and contains_only_chunked_or_numpy(self._obj) + ): + return self._flox_reduce( + func="nunique", + dim=dim, + skipna=skipna, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.nunique, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + def cumsum( self, dim: Dims = None, @@ -9253,7 +8455,7 @@ def cumsum( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -9270,27 +8472,12 @@ def cumsum( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").cumsum() - Size: 48B - array([1., 2., 5., 5., 2., 2.]) - Coordinates: - labels (time) >> da.resample(time="3ME").cumsum(skipna=False) - Size: 48B - array([ 1., 2., 5., 5., 2., nan]) - Coordinates: - labels (time) `_ for more. - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -9370,27 +8557,12 @@ def cumprod( ... ), ... ) >>> da - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3ME").cumprod() - Size: 48B - array([1., 2., 6., 0., 2., 2.]) - Coordinates: - labels (time) >> da.resample(time="3ME").cumprod(skipna=False) - Size: 48B - array([ 1., 2., 6., 0., 2., nan]) - Coordinates: - labels (time) >> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) >>> na.count() - Size: 8B - array(5) """ return self.reduce( duck_array_ops.count, @@ -116,12 +111,8 @@ def all( ... "x", np.array([True, True, True, True, True, False], dtype=bool) ... ) >>> na - Size: 6B - array([ True, True, True, True, True, False]) >>> na.all() - Size: 1B - array(False) """ return self.reduce( duck_array_ops.array_all, @@ -169,12 +160,8 @@ def any( ... "x", np.array([True, True, True, True, True, False], dtype=bool) ... ) >>> na - Size: 6B - array([ True, True, True, True, True, False]) >>> na.any() - Size: 1B - array(True) """ return self.reduce( duck_array_ops.array_any, @@ -227,18 +214,12 @@ def max( >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) >>> na.max() - Size: 8B - array(3.) Use ``skipna`` to control whether NaNs are ignored. >>> na.max(skipna=False) - Size: 8B - array(nan) """ return self.reduce( duck_array_ops.max, @@ -292,18 +273,12 @@ def min( >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) >>> na.min() - Size: 8B - array(0.) Use ``skipna`` to control whether NaNs are ignored. >>> na.min(skipna=False) - Size: 8B - array(nan) """ return self.reduce( duck_array_ops.min, @@ -352,23 +327,21 @@ def mean( :ref:`agg` User guide on reduction or aggregation operations. + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) >>> na.mean() - Size: 8B - array(1.6) Use ``skipna`` to control whether NaNs are ignored. >>> na.mean(skipna=False) - Size: 8B - array(nan) """ return self.reduce( duck_array_ops.mean, @@ -426,31 +399,23 @@ def prod( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) >>> na.prod() - Size: 8B - array(0.) Use ``skipna`` to control whether NaNs are ignored. >>> na.prod(skipna=False) - Size: 8B - array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> na.prod(skipna=True, min_count=2) - Size: 8B - array(0.) """ return self.reduce( duck_array_ops.prod, @@ -509,31 +474,23 @@ def sum( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) >>> na.sum() - Size: 8B - array(8.) Use ``skipna`` to control whether NaNs are ignored. >>> na.sum(skipna=False) - Size: 8B - array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> na.sum(skipna=True, min_count=2) - Size: 8B - array(8.) """ return self.reduce( duck_array_ops.sum, @@ -589,31 +546,23 @@ def std( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) >>> na.std() - Size: 8B - array(1.0198039) Use ``skipna`` to control whether NaNs are ignored. >>> na.std(skipna=False) - Size: 8B - array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> na.std(skipna=True, ddof=1) - Size: 8B - array(1.14017543) """ return self.reduce( duck_array_ops.std, @@ -669,31 +618,23 @@ def var( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) >>> na.var() - Size: 8B - array(1.04) Use ``skipna`` to control whether NaNs are ignored. >>> na.var(skipna=False) - Size: 8B - array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> na.var(skipna=True, ddof=1) - Size: 8B - array(1.3) """ return self.reduce( duck_array_ops.var, @@ -745,25 +686,19 @@ def median( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Examples -------- >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) >>> na.median() - Size: 8B - array(2.) Use ``skipna`` to control whether NaNs are ignored. >>> na.median(skipna=False) - Size: 8B - array(nan) """ return self.reduce( duck_array_ops.median, @@ -772,6 +707,64 @@ def median( **kwargs, ) + def nunique( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + DataArray.nunique + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + + >>> na.nunique() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.nunique(skipna=False) + """ + return self.reduce( + duck_array_ops.nunique, + dim=dim, + skipna=skipna, + **kwargs, + ) + def cumsum( self, dim: Dims = None, @@ -815,7 +808,7 @@ def cumsum( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -826,18 +819,12 @@ def cumsum( >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) >>> na.cumsum() - Size: 48B - array([1., 3., 6., 6., 8., 8.]) Use ``skipna`` to control whether NaNs are ignored. >>> na.cumsum(skipna=False) - Size: 48B - array([ 1., 3., 6., 6., 8., nan]) """ return self.reduce( duck_array_ops.cumsum, @@ -889,7 +876,7 @@ def cumprod( Notes ----- - Non-numeric variables will be removed prior to reducing. + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated @@ -900,18 +887,12 @@ def cumprod( >>> from xarray.namedarray.core import NamedArray >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) >>> na - Size: 48B - array([ 1., 2., 3., 0., 2., nan]) >>> na.cumprod() - Size: 48B - array([1., 2., 6., 0., 0., 0.]) Use ``skipna`` to control whether NaNs are ignored. >>> na.cumprod(skipna=False) - Size: 48B - array([ 1., 2., 6., 0., 0., nan]) """ return self.reduce( duck_array_ops.cumprod, diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5eec7b8a2fd..d0bab53330d 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4916,6 +4916,45 @@ def line(x, a, b): assert_allclose(fit.curvefit_coefficients, expected) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("dim", ["c", None, ("b", "c")]) + def test_nunique(self, skipna, dim): + x = np.array( + [ + [ + [np.nan, np.nan, 2.0, np.nan], + [np.nan, 5.0, 6.0, np.nan], + [8.0, 9.0, 10.0, np.nan], + ], + [ + [np.nan, 13.0, 14.0, 15.0], + [np.nan, 17.0, 18.0, np.nan], + [np.nan, 21.0, np.nan, np.nan], + ], + ] + ) + coords = { + "a": range(x.shape[0]), + "b": range(x.shape[1]), + "c": range(x.shape[2]), + } + da = DataArray(x, coords=coords) + + coords_1 = {"a": range(x.shape[0]), "b": range(x.shape[1])} + coords_3 = {"a": range(x.shape[0])} + + expected_results = { + (True, "c"): DataArray([[1, 2, 3], [3, 2, 1]], coords=coords_1), + (True, None): DataArray(12), + (True, ("b", "c")): DataArray([6, 6], coords=coords_3), + (False, "c"): DataArray([[2, 3, 4], [4, 3, 2]], coords=coords_1), + (False, None): DataArray(13), + (False, ("b", "c")): DataArray([7, 7], coords=coords_3), + } + + result = da.nunique(dim=dim, skipna=skipna) + assert_identical(result, expected_results[(skipna, dim)]) + class TestReduce: @pytest.fixture(autouse=True) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index e677430dfbf..209920b8f0d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -7482,6 +7482,67 @@ def test_query(self, backend, engine, parser) -> None: # pytest tests — new tests should go here, rather than in the class. +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("dim", [("c", "dim_0", "dim_1"), None, ("a", "b")]) +def test_nunique(skipna, dim): + # Create test data + x = np.array( + [ + [ + [np.nan, np.nan, 2.0, np.nan], + [np.nan, 5.0, 6.0, np.nan], + [8.0, 9.0, 10.0, np.nan], + ], + [ + [np.nan, 13.0, 14.0, 15.0], + [np.nan, 17.0, 18.0, np.nan], + [np.nan, 21.0, np.nan, np.nan], + ], + ] + ) + coords = {"a": range(x.shape[0]), "b": range(x.shape[1]), "c": range(x.shape[2])} + da_1 = DataArray(x, coords=coords) + da_2 = DataArray(x) + ds = Dataset({"da_1": da_1, "da_2": da_2}) + + # Specify the coordinates and arrays we expect for each test case + coords_1 = {"a": range(x.shape[0]), "b": range(x.shape[1])} + coords_3 = {"c": range(x.shape[2])} + arr_1 = np.array([[1, 2, 3], [3, 2, 1]]) + arr_3 = np.array([1, 5, 5, 1]) + expected_results = { + (True, ("c", "dim_0", "dim_1")): (arr_1, coords_1, arr_3, ["dim_2"]), + (True, None): (12, None, 12, None), + (True, ("a", "b")): (arr_3, coords_3, x, None), + (False, ("c", "dim_0", "dim_1")): (arr_1 + 1, coords_1, arr_3 + 1, ["dim_2"]), + (False, None): (13, None, 13, None), + (False, ("a", "b")): (arr_3 + 1, coords_3, x, None), + } + + # Get the expected result for the current parameters + expected_result = expected_results[(skipna, dim)] + expected_ds = Dataset( + { + "da_1": DataArray(expected_result[0], coords=expected_result[1]), + "da_2": DataArray(expected_result[2], dims=expected_result[3]), + } + ) + + # Get the actual result and compare + result = ds.nunique(dim=dim, skipna=skipna) + assert_identical(result, expected_ds) + + +@pytest.mark.parametrize("skipna", [True, False]) +def test_nunique_pandas(skipna): + get_col = lambda: np.random.randint(0, 100, size=100) + get_da = lambda: xr.DataArray(get_col(), coords={"x": np.arange(100)}) + ds = xr.Dataset({"a": get_da(), "b": get_da(), "c": get_da(), "d": get_da()}) + xr_result = ds.nunique(skipna=skipna).to_array().values + pd_result = ds.to_dataframe().nunique(dropna=skipna).values + assert_array_equal(xr_result, pd_result) + + @pytest.mark.parametrize("parser", ["pandas", "python"]) def test_eval(ds, parser) -> None: """Currently much more minimal testing that `query` above, and much of the setup diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 0cd888f5782..21e1a6e0435 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -2312,6 +2312,18 @@ def test_subtree(self) -> None: actual = tree.children["child"].mean() assert_identical(expected, actual) + def test_nunique(self) -> None: + arr = np.array([[1, 2, 2], [3, 3, 3]]) + da = xr.DataArray(arr, coords={"x": [0, 1], "y": [0, 1, 2]}) + ds = xr.Dataset({"a": da}) + dt = DataTree.from_dict({"root": ds, "root/child": 2 * ds}) + expected_da = xr.DataArray(np.array([2, 1]), coords={"x": [0, 1]}) + expected_ds = xr.Dataset({"a": expected_da}) + expected_dt = DataTree.from_dict( + {"root": expected_ds, "root/child": expected_ds} + ) + assert_identical(expected_dt, dt.nunique(dim="y")) + class TestOps: def test_unary_op(self) -> None: diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 83c7c2bb207..4f4feed51f0 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -24,6 +24,7 @@ least_squares, mean, np_timedelta64_to_float, + nunique, pd_timedelta_to_float, push, py_timedelta_to_float, @@ -165,6 +166,33 @@ def test_count(self): assert 1 == count(np.datetime64("2000-01-01")) + @pytest.mark.parametrize("mixed_type", [True, False]) + @pytest.mark.parametrize("string_array", [True, False]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("axis", [2, None, (1, 2)]) + def test_nunique(self, axis, skipna, string_array, mixed_type): + expected_results = { + (True, 2): np.array([[1, 2, 3], [3, 2, 1]]), + (True, None): np.array(12), + (True, (1, 2)): np.array([6, 6]), + (False, 2): np.array([[2, 3, 4], [4, 3, 2]]), + (False, None): np.array(13), + (False, (1, 2)): np.array([7, 7]), + } + x = self.x.copy() + if string_array: + # Convert to str + x = x.astype(str) + # Convert to object and put nans back in + x = x.astype(object) + x[x == "nan"] = np.nan + if mixed_type: + x = x.astype(object) + x[(x == 10.0) | (x == "10.0")] = True + x[(x == 2.0) | (x == "2.0")] = np.sum + result = nunique(x, axis=axis, skipna=skipna) + assert_array_equal(result, expected_results[(skipna, axis)]) + def test_where_type_promotion(self): result = where(np.array([True, False]), np.array([1, 2]), np.array(["a", "b"])) assert_array_equal(result, np.array([1, "b"], dtype=object)) diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index e386b96f63d..8602b3181c1 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -530,6 +530,7 @@ def generate_code(self, method, has_keep_attrs): Method( "median", extra_kwargs=(skipna,), numeric_only=True, min_flox_version="0.9.2" ), + Method("nunique", extra_kwargs=(skipna,), see_also_modules=("pandas.DataFrame",)), # Cumulatives: Method( "cumsum", From 250f6a5a18b902a2997a5273904be4b9e89690d8 Mon Sep 17 00:00:00 2001 From: Ewan Short Date: Fri, 21 Nov 2025 13:28:07 +1100 Subject: [PATCH 2/8] fix accidental noqa removal --- xarray/core/duck_array_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 113434cf587..2bcc9a189f7 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -183,7 +183,7 @@ def isnull(data): dtype = xp.bool_ if hasattr(xp, "bool_") else xp.bool return full_like(data, dtype=dtype, fill_value=False) # at this point, array should have dtype=object - elif isinstance(data, np.ndarray) or pd.api.types.is_extension_array_dtype(data): + elif isinstance(data, np.ndarray) or pd.api.types.is_extension_array_dtype(data): # noqa: TID251 return pandas_isnull(data) else: # Not reachable yet, but intended for use with other duck array From 2c318b32030e0e9d607ced7c2835f1eb45d53f6f Mon Sep 17 00:00:00 2001 From: Ewan Short Date: Fri, 21 Nov 2025 13:38:49 +1100 Subject: [PATCH 3/8] fix accidental reformat --- xarray/core/duck_array_ops.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 2bcc9a189f7..22b54836b01 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -276,14 +276,10 @@ def as_shared_dtype(scalars_or_arrays, xp=None): isinstance(x, type(extension_array_types[0])) for x in extension_array_types ): return [ - ( - x - if not isna(x) - else PandasExtensionArray( - type(non_nans[0].array)._from_sequence( - [x], dtype=non_nans[0].dtype - ) - ) + x + if not isna(x) + else PandasExtensionArray( + type(non_nans[0].array)._from_sequence([x], dtype=non_nans[0].dtype) ) for x in scalars_or_arrays ] From 2ebb2abb6b2e78ea63886fc290c63ea37d25e7a4 Mon Sep 17 00:00:00 2001 From: Ewan Short Date: Fri, 21 Nov 2025 15:50:57 +1100 Subject: [PATCH 4/8] fix doctests --- xarray/core/_aggregations.py | 13695 +++++++++++++++------------ xarray/namedarray/_aggregations.py | 1394 +-- 2 files changed, 8497 insertions(+), 6592 deletions(-) diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index 7f4cc34a323..9e2393cb2df 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -42,54 +42,72 @@ def count( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - Dataset.count - DataArray.count - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.count() + Reduce this DataTree's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.count() + + Group: / + Dimensions: () + Data variables: + foo int64 8B 5 """ return self.reduce( duck_array_ops.count, @@ -107,59 +125,78 @@ def all( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - DataArray.all - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=( - ... "time", - ... np.array([True, True, True, True, True, False], dtype=bool), - ... ) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.all() + Reduce this DataTree's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=( + ... "time", + ... np.array( + ... [True, True, True, True, True, False], + ... dtype=bool, + ... ), + ... ) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.all() + + Group: / + Dimensions: () + Data variables: + foo bool 1B False """ return self.reduce( duck_array_ops.array_all, @@ -177,59 +214,78 @@ def any( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - DataArray.any - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=( - ... "time", - ... np.array([True, True, True, True, True, False], dtype=bool), - ... ) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.any() + Reduce this DataTree's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=( + ... "time", + ... np.array( + ... [True, True, True, True, True, False], + ... dtype=bool, + ... ), + ... ) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.any() + + Group: / + Dimensions: () + Data variables: + foo bool 1B True """ return self.reduce( duck_array_ops.array_any, @@ -248,63 +304,86 @@ def max( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - DataArray.max - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.max() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> dt.max(skipna=False) + Reduce this DataTree's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.max() + + Group: / + Dimensions: () + Data variables: + foo float64 8B 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> dt.max(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo float64 8B nan """ return self.reduce( duck_array_ops.max, @@ -324,63 +403,86 @@ def min( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - DataArray.min - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.min() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> dt.min(skipna=False) + Reduce this DataTree's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.min() + + Group: / + Dimensions: () + Data variables: + foo float64 8B 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> dt.min(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo float64 8B nan """ return self.reduce( duck_array_ops.min, @@ -400,67 +502,90 @@ def mean( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - DataArray.mean - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.mean() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> dt.mean(skipna=False) + Reduce this DataTree's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.mean() + + Group: / + Dimensions: () + Data variables: + foo float64 8B 1.6 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> dt.mean(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo float64 8B nan """ return self.reduce( duck_array_ops.mean, @@ -481,77 +606,105 @@ def prod( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - DataArray.prod - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.prod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> dt.prod(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> dt.prod(skipna=True, min_count=2) + Reduce this DataTree's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.prod() + + Group: / + Dimensions: () + Data variables: + foo float64 8B 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> dt.prod(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo float64 8B nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> dt.prod(skipna=True, min_count=2) + + Group: / + Dimensions: () + Data variables: + foo float64 8B 0.0 """ return self.reduce( duck_array_ops.prod, @@ -573,77 +726,105 @@ def sum( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - DataArray.sum - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.sum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> dt.sum(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> dt.sum(skipna=True, min_count=2) + Reduce this DataTree's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.sum() + + Group: / + Dimensions: () + Data variables: + foo float64 8B 8.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> dt.sum(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo float64 8B nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> dt.sum(skipna=True, min_count=2) + + Group: / + Dimensions: () + Data variables: + foo float64 8B 8.0 """ return self.reduce( duck_array_ops.sum, @@ -665,74 +846,102 @@ def std( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - DataArray.std - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.std() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> dt.std(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> dt.std(skipna=True, ddof=1) + Reduce this DataTree's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.std() + + Group: / + Dimensions: () + Data variables: + foo float64 8B 1.02 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> dt.std(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo float64 8B nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> dt.std(skipna=True, ddof=1) + + Group: / + Dimensions: () + Data variables: + foo float64 8B 1.14 """ return self.reduce( duck_array_ops.std, @@ -754,74 +963,102 @@ def var( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - DataArray.var - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.var() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> dt.var(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> dt.var(skipna=True, ddof=1) + Reduce this DataTree's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.var() + + Group: / + Dimensions: () + Data variables: + foo float64 8B 1.04 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> dt.var(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo float64 8B nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> dt.var(skipna=True, ddof=1) + + Group: / + Dimensions: () + Data variables: + foo float64 8B 1.3 """ return self.reduce( duck_array_ops.var, @@ -842,67 +1079,90 @@ def median( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - DataArray.median - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.median() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> dt.median(skipna=False) + Reduce this DataTree's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.median() + + Group: / + Dimensions: () + Data variables: + foo float64 8B 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> dt.median(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo float64 8B nan """ return self.reduce( duck_array_ops.median, @@ -922,62 +1182,85 @@ def nunique( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - Dataset.nunique - DataArray.nunique - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.nunique() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> dt.nunique(skipna=False) + Reduce this DataTree's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + DataArray.nunique + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.nunique() + + Group: / + Dimensions: () + Data variables: + foo int64 8B 5 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> dt.nunique(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo int64 8B 5 """ return self.reduce( duck_array_ops.nunique, @@ -997,72 +1280,97 @@ def cumsum( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - Dataset.cumsum - DataArray.cumsum - DataTree.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.cumsum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> dt.cumsum(skipna=False) + Reduce this DataTree's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + DataArray.cumsum + DataTree.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.cumsum() + + Group: / + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + foo (time) float64 48B 1.0 3.0 6.0 6.0 8.0 8.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> dt.cumsum(skipna=False) + + Group: / + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + foo (time) float64 48B 1.0 3.0 6.0 6.0 8.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -1082,72 +1390,97 @@ def cumprod( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - Dataset.cumprod - DataArray.cumprod - DataTree.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ), - ... ) - >>> dt - - >>> dt.cumprod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> dt.cumprod(skipna=False) + Reduce this DataTree's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + DataArray.cumprod + DataTree.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=( + ... "time", + ... np.array(["a", "b", "c", "c", "b", "a"]), + ... ), + ... ), + ... ), + ... ) + >>> dt + + Group: / + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> dt.cumprod() + + Group: / + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + foo (time) float64 48B 1.0 2.0 6.0 0.0 0.0 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> dt.cumprod(skipna=False) + + Group: / + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + foo (time) float64 48B 1.0 2.0 6.0 0.0 0.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -1182,50 +1515,64 @@ def count( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - DataArray.count - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.count() + Reduce this Dataset's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.count() + Size: 8B + Dimensions: () + Data variables: + da int64 8B 5 """ return self.reduce( duck_array_ops.count, @@ -1243,50 +1590,64 @@ def all( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - DataArray.all - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.all() + Reduce this Dataset's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 78B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.all() + Size: 1B + Dimensions: () + Data variables: + da bool 1B False """ return self.reduce( duck_array_ops.array_all, @@ -1304,50 +1665,64 @@ def any( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - DataArray.any - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.any() + Reduce this Dataset's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 78B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.any() + Size: 1B + Dimensions: () + Data variables: + da bool 1B True """ return self.reduce( duck_array_ops.array_any, @@ -1366,59 +1741,77 @@ def max( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - DataArray.max - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.max() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.max(skipna=False) + Reduce this Dataset's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.max() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.max(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan """ return self.reduce( duck_array_ops.max, @@ -1438,59 +1831,77 @@ def min( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - DataArray.min - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.min() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.min(skipna=False) + Reduce this Dataset's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.min() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.min(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan """ return self.reduce( duck_array_ops.min, @@ -1510,63 +1921,81 @@ def mean( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - DataArray.mean - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.mean() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.mean(skipna=False) + Reduce this Dataset's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.mean() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 1.6 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.mean(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan """ return self.reduce( duck_array_ops.mean, @@ -1587,73 +2016,95 @@ def prod( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - DataArray.prod - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.prod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.prod(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> ds.prod(skipna=True, min_count=2) + Reduce this Dataset's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.prod() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.prod(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.prod(skipna=True, min_count=2) + Size: 8B + Dimensions: () + Data variables: + da float64 8B 0.0 """ return self.reduce( duck_array_ops.prod, @@ -1675,73 +2126,95 @@ def sum( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - DataArray.sum - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.sum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.sum(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> ds.sum(skipna=True, min_count=2) + Reduce this Dataset's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.sum() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 8.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.sum(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.sum(skipna=True, min_count=2) + Size: 8B + Dimensions: () + Data variables: + da float64 8B 8.0 """ return self.reduce( duck_array_ops.sum, @@ -1763,70 +2236,92 @@ def std( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - DataArray.std - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.std() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.std(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> ds.std(skipna=True, ddof=1) + Reduce this Dataset's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.std() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 1.02 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.std(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.std(skipna=True, ddof=1) + Size: 8B + Dimensions: () + Data variables: + da float64 8B 1.14 """ return self.reduce( duck_array_ops.std, @@ -1848,70 +2343,92 @@ def var( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - DataArray.var - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.var() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.var(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> ds.var(skipna=True, ddof=1) + Reduce this Dataset's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.var() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 1.04 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.var(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.var(skipna=True, ddof=1) + Size: 8B + Dimensions: () + Data variables: + da float64 8B 1.3 """ return self.reduce( duck_array_ops.var, @@ -1932,63 +2449,81 @@ def median( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - DataArray.median - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.median() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.median(skipna=False) + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.median() + Size: 8B + Dimensions: () + Data variables: + da float64 8B 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.median(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan """ return self.reduce( duck_array_ops.median, @@ -2008,58 +2543,76 @@ def nunique( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - DataArray.nunique - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.nunique() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.nunique(skipna=False) + Reduce this Dataset's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + DataArray.nunique + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.nunique() + Size: 8B + Dimensions: () + Data variables: + da int64 8B 5 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.nunique(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da int64 8B 5 """ return self.reduce( duck_array_ops.nunique, @@ -2079,68 +2632,88 @@ def cumsum( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - DataArray.cumsum - Dataset.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.cumsum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.cumsum(skipna=False) + Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + DataArray.cumsum + Dataset.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.cumsum() + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 3.0 6.0 6.0 8.0 8.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.cumsum(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 3.0 6.0 6.0 8.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -2160,68 +2733,88 @@ def cumprod( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - DataArray.cumprod - Dataset.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.cumprod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.cumprod(skipna=False) + Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + DataArray.cumprod + Dataset.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.cumprod() + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 6.0 0.0 0.0 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.cumprod(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 6.0 0.0 0.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -2256,49 +2849,59 @@ def count( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - Dataset.count - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.count() + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.count() + Size: 8B + array(5) """ return self.reduce( duck_array_ops.count, @@ -2315,49 +2918,59 @@ def all( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.all() + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 6B + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.all() + Size: 1B + array(False) """ return self.reduce( duck_array_ops.array_all, @@ -2374,49 +2987,59 @@ def any( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.any() + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 6B + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.any() + Size: 1B + array(True) """ return self.reduce( duck_array_ops.array_any, @@ -2434,58 +3057,70 @@ def max( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.max() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.max(skipna=False) + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.max() + Size: 8B + array(3.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.max(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.max, @@ -2504,58 +3139,70 @@ def min( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.min() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.min(skipna=False) + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.min() + Size: 8B + array(0.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.min(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.min, @@ -2574,62 +3221,74 @@ def mean( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.mean() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.mean(skipna=False) + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.mean() + Size: 8B + array(1.6) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.mean(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.mean, @@ -2649,72 +3308,86 @@ def prod( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.prod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.prod(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> da.prod(skipna=True, min_count=2) + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.prod() + Size: 8B + array(0.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.prod(skipna=False) + Size: 8B + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.prod(skipna=True, min_count=2) + Size: 8B + array(0.) """ return self.reduce( duck_array_ops.prod, @@ -2735,72 +3408,86 @@ def sum( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.sum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.sum(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> da.sum(skipna=True, min_count=2) + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.sum() + Size: 8B + array(8.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.sum(skipna=False) + Size: 8B + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.sum(skipna=True, min_count=2) + Size: 8B + array(8.) """ return self.reduce( duck_array_ops.sum, @@ -2821,69 +3508,83 @@ def std( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.std() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.std(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> da.std(skipna=True, ddof=1) + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.std() + Size: 8B + array(1.0198039) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.std(skipna=False) + Size: 8B + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.std(skipna=True, ddof=1) + Size: 8B + array(1.14017543) """ return self.reduce( duck_array_ops.std, @@ -2904,69 +3605,83 @@ def var( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.var() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.var(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> da.var(skipna=True, ddof=1) + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.var() + Size: 8B + array(1.04) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.var(skipna=False) + Size: 8B + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.var(skipna=True, ddof=1) + Size: 8B + array(1.3) """ return self.reduce( duck_array_ops.var, @@ -2986,62 +3701,74 @@ def median( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.median() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.median(skipna=False) + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.median() + Size: 8B + array(2.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.median(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.median, @@ -3060,57 +3787,69 @@ def nunique( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - Dataset.nunique - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.nunique() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.nunique(skipna=False) + Reduce this DataArray's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.nunique() + Size: 8B + array(5) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.nunique(skipna=False) + Size: 8B + array(5) """ return self.reduce( duck_array_ops.nunique, @@ -3129,67 +3868,85 @@ def cumsum( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - Dataset.cumsum - DataArray.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.cumsum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.cumsum(skipna=False) + Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + DataArray.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.cumsum() + Size: 48B + array([1., 3., 6., 6., 8., 8.]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.cumsum(skipna=False) + Size: 48B + array([ 1., 3., 6., 6., 8., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Self: """ - Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - Dataset.cumprod - DataArray.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.cumprod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.cumprod(skipna=False) + Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + DataArray.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.cumprod() + Size: 48B + array([1., 2., 6., 0., 0., 0.]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.cumprod(skipna=False) + Size: 48B + array([ 1., 2., 6., 0., 0., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Dataset: """ - Reduce this Dataset's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - Dataset.count - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").count() + Reduce this Dataset's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").count() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) int64 24B 1 2 2 """ if ( flox_available @@ -3392,58 +4183,74 @@ def all( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").all() + Reduce this Dataset's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 78B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").all() + Size: 27B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) bool 3B False True True """ if ( flox_available @@ -3475,58 +4282,74 @@ def any( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").any() + Reduce this Dataset's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 78B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").any() + Size: 27B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) bool 3B True True True """ if ( flox_available @@ -3559,67 +4382,89 @@ def max( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").max() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").max(skipna=False) + Reduce this Dataset's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").max() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 1.0 2.0 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").max(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 2.0 3.0 """ if ( flox_available @@ -3654,67 +4499,89 @@ def min( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").min() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").min(skipna=False) + Reduce this Dataset's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").min() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 1.0 2.0 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").min(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 2.0 0.0 """ if ( flox_available @@ -3749,69 +4616,91 @@ def mean( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").mean() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").mean(skipna=False) + Reduce this Dataset's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").mean() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 1.0 2.0 1.5 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").mean(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 2.0 1.5 """ if ( flox_available @@ -3847,79 +4736,107 @@ def prod( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").prod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").prod(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> ds.groupby("labels").prod(skipna=True, min_count=2) + Reduce this Dataset's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").prod() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 1.0 4.0 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").prod(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 4.0 0.0 + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.groupby("labels").prod(skipna=True, min_count=2) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 4.0 0.0 """ if ( flox_available @@ -3957,79 +4874,107 @@ def sum( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").sum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").sum(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> ds.groupby("labels").sum(skipna=True, min_count=2) + Reduce this Dataset's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").sum() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 1.0 4.0 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").sum(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 4.0 3.0 + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.groupby("labels").sum(skipna=True, min_count=2) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 4.0 3.0 """ if ( flox_available @@ -4067,76 +5012,104 @@ def std( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").std() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").std(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> ds.groupby("labels").std(skipna=True, ddof=1) + Reduce this Dataset's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").std() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 0.0 0.0 1.5 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").std(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 0.0 1.5 + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.groupby("labels").std(skipna=True, ddof=1) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 0.0 2.121 """ if ( flox_available @@ -4174,76 +5147,104 @@ def var( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").var() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").var(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> ds.groupby("labels").var(skipna=True, ddof=1) + Reduce this Dataset's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").var() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 0.0 0.0 2.25 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").var(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 0.0 2.25 + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.groupby("labels").var(skipna=True, ddof=1) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 0.0 4.5 """ if ( flox_available @@ -4280,69 +5281,91 @@ def median( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").median() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").median(skipna=False) + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").median() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B 1.0 2.0 1.5 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").median(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 2.0 1.5 """ return self.reduce( duck_array_ops.median, @@ -4362,66 +5385,76 @@ def nunique( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - Dataset.nunique - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").nunique() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").nunique(skipna=False) + Reduce this Dataset's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").nunique() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").nunique(skipna=False) """ if ( flox_available @@ -4456,74 +5489,94 @@ def cumsum( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - Dataset.cumsum - Dataset.cumulative - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").cumsum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").cumsum(skipna=False) + Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + Dataset.cumulative + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").cumsum() + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 3.0 3.0 4.0 1.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").cumsum(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 3.0 3.0 4.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -4543,74 +5596,94 @@ def cumprod( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - Dataset.cumprod - Dataset.cumulative - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.groupby("labels").cumprod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").cumprod(skipna=False) + Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + Dataset.cumulative + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").cumprod() + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 3.0 0.0 4.0 1.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").cumprod(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 3.0 0.0 4.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -4652,58 +5725,74 @@ def count( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - Dataset.count - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").count() + Reduce this Dataset's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").count() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 24B 1 3 1 """ if ( flox_available @@ -4735,58 +5824,74 @@ def all( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").all() + Reduce this Dataset's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 78B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").all() + Size: 27B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool 3B True True False """ if ( flox_available @@ -4818,58 +5923,74 @@ def any( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").any() + Reduce this Dataset's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 78B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").any() + Size: 27B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool 3B True True True """ if ( flox_available @@ -4902,67 +6023,89 @@ def max( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").max() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3ME").max(skipna=False) + Reduce this Dataset's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").max() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 3.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").max(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 3.0 nan """ if ( flox_available @@ -4997,67 +6140,89 @@ def min( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").min() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3ME").min(skipna=False) + Reduce this Dataset's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").min() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 0.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").min(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 0.0 nan """ if ( flox_available @@ -5092,69 +6257,91 @@ def mean( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").mean() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3ME").mean(skipna=False) + Reduce this Dataset's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").mean() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 1.667 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").mean(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 1.667 nan """ if ( flox_available @@ -5190,79 +6377,107 @@ def prod( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").prod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3ME").prod(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> ds.resample(time="3ME").prod(skipna=True, min_count=2) + Reduce this Dataset's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").prod() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 0.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").prod(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 0.0 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.resample(time="3ME").prod(skipna=True, min_count=2) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B nan 0.0 nan """ if ( flox_available @@ -5300,79 +6515,107 @@ def sum( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").sum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3ME").sum(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> ds.resample(time="3ME").sum(skipna=True, min_count=2) + Reduce this Dataset's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").sum() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 5.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").sum(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 5.0 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.resample(time="3ME").sum(skipna=True, min_count=2) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B nan 5.0 nan """ if ( flox_available @@ -5410,76 +6653,104 @@ def std( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").std() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3ME").std(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> ds.resample(time="3ME").std(skipna=True, ddof=1) + Reduce this Dataset's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").std() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 0.0 1.247 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").std(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 0.0 1.247 nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.resample(time="3ME").std(skipna=True, ddof=1) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B nan 1.528 nan """ if ( flox_available @@ -5517,76 +6788,104 @@ def var( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").var() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3ME").var(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> ds.resample(time="3ME").var(skipna=True, ddof=1) + Reduce this Dataset's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").var() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 0.0 1.556 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").var(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 0.0 1.556 nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.resample(time="3ME").var(skipna=True, ddof=1) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B nan 2.333 nan """ if ( flox_available @@ -5623,69 +6922,91 @@ def median( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").median() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3ME").median(skipna=False) + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").median() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").median(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 2.0 nan """ return self.reduce( duck_array_ops.median, @@ -5705,66 +7026,76 @@ def nunique( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - Dataset.nunique - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").nunique() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3ME").nunique(skipna=False) + Reduce this Dataset's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").nunique() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").nunique(skipna=False) """ if ( flox_available @@ -5799,74 +7130,94 @@ def cumsum( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - Dataset.cumsum - Dataset.cumulative - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").cumsum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3ME").cumsum(skipna=False) + Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + Dataset.cumulative + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").cumsum() + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 5.0 5.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").cumsum(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 5.0 5.0 2.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -5886,74 +7237,94 @@ def cumprod( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - Dataset.cumprod - Dataset.cumulative - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - >>> ds.resample(time="3ME").cumprod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3ME").cumprod(skipna=False) + Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + Dataset.cumulative + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + Size: 120B + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").cumprod() + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 6.0 0.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").cumprod(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 6.0 0.0 2.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -5995,57 +7366,69 @@ def count( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - DataArray.count - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").count() + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + DataArray.count + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").count() + Size: 24B + array([1, 2, 2]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6075,57 +7458,69 @@ def all( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - DataArray.all - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").all() + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 6B + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").all() + Size: 3B + array([False, True, True]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6155,57 +7550,69 @@ def any( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - DataArray.any - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").any() + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 6B + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").any() + Size: 3B + array([ True, True, True]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6236,66 +7643,82 @@ def max( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - DataArray.max - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").max() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").max(skipna=False) + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").max() + Size: 24B + array([1., 2., 3.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").max(skipna=False) + Size: 24B + array([nan, 2., 3.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6328,66 +7751,82 @@ def min( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - DataArray.min - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").min() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").min(skipna=False) + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").min() + Size: 24B + array([1., 2., 0.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").min(skipna=False) + Size: 24B + array([nan, 2., 0.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6420,68 +7859,84 @@ def mean( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - DataArray.mean - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").mean() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").mean(skipna=False) + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").mean() + Size: 24B + array([1. , 2. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").mean(skipna=False) + Size: 24B + array([nan, 2. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6515,78 +7970,98 @@ def prod( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - DataArray.prod - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").prod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").prod(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> da.groupby("labels").prod(skipna=True, min_count=2) + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").prod() + Size: 24B + array([1., 4., 0.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").prod(skipna=False) + Size: 24B + array([nan, 4., 0.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.groupby("labels").prod(skipna=True, min_count=2) + Size: 24B + array([nan, 4., 0.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6622,78 +8097,98 @@ def sum( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - DataArray.sum - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").sum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").sum(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> da.groupby("labels").sum(skipna=True, min_count=2) + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").sum() + Size: 24B + array([1., 4., 3.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").sum(skipna=False) + Size: 24B + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.groupby("labels").sum(skipna=True, min_count=2) + Size: 24B + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6729,75 +8224,95 @@ def std( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - DataArray.std - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").std() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").std(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> da.groupby("labels").std(skipna=True, ddof=1) + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").std() + Size: 24B + array([0. , 0. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").std(skipna=False) + Size: 24B + array([nan, 0. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.groupby("labels").std(skipna=True, ddof=1) + Size: 24B + array([ nan, 0. , 2.12132034]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6833,75 +8348,95 @@ def var( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - DataArray.var - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").var() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").var(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> da.groupby("labels").var(skipna=True, ddof=1) + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").var() + Size: 24B + array([0. , 0. , 2.25]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").var(skipna=False) + Size: 24B + array([ nan, 0. , 2.25]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.groupby("labels").var(skipna=True, ddof=1) + Size: 24B + array([nan, 0. , 4.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -6936,68 +8471,84 @@ def median( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - DataArray.median - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").median() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").median(skipna=False) + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").median() + Size: 24B + array([1. , 2. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").median(skipna=False) + Size: 24B + array([nan, 2. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ return self.reduce( duck_array_ops.median, @@ -7016,65 +8567,73 @@ def nunique( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - DataArray.nunique - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").nunique() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").nunique(skipna=False) + Reduce this DataArray's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + DataArray.nunique + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").nunique() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").nunique(skipna=False) """ if ( flox_available @@ -7107,73 +8666,91 @@ def cumsum( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - DataArray.cumsum - DataArray.cumulative - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").cumsum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").cumsum(skipna=False) + Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + DataArray.cumsum + DataArray.cumulative + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").cumsum() + Size: 48B + array([1., 2., 3., 3., 4., 1.]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").cumsum(skipna=False) + Size: 48B + array([ 1., 2., 3., 3., 4., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) DataArray: """ - Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - DataArray.cumprod - DataArray.cumulative - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.groupby("labels").cumprod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").cumprod(skipna=False) + Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + DataArray.cumprod + DataArray.cumulative + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").cumprod() + Size: 48B + array([1., 2., 3., 0., 4., 1.]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").cumprod(skipna=False) + Size: 48B + array([ 1., 2., 3., 0., 4., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) DataArray: """ - Reduce this DataArray's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - DataArray.count - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").count() + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + DataArray.count + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").count() + Size: 24B + array([1, 3, 1]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7379,57 +8986,69 @@ def all( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - DataArray.all - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").all() + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 6B + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").all() + Size: 3B + array([ True, True, False]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7459,57 +9078,69 @@ def any( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - DataArray.any - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").any() + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 6B + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").any() + Size: 3B + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7540,66 +9171,82 @@ def max( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - DataArray.max - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").max() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3ME").max(skipna=False) + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").max() + Size: 24B + array([1., 3., 2.]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3ME").max(skipna=False) + Size: 24B + array([ 1., 3., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7632,66 +9279,82 @@ def min( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - DataArray.min - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").min() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3ME").min(skipna=False) + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").min() + Size: 24B + array([1., 0., 2.]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3ME").min(skipna=False) + Size: 24B + array([ 1., 0., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7724,68 +9387,84 @@ def mean( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - DataArray.mean - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").mean() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3ME").mean(skipna=False) + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").mean() + Size: 24B + array([1. , 1.66666667, 2. ]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3ME").mean(skipna=False) + Size: 24B + array([1. , 1.66666667, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7819,78 +9498,98 @@ def prod( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - DataArray.prod - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").prod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3ME").prod(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> da.resample(time="3ME").prod(skipna=True, min_count=2) + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").prod() + Size: 24B + array([1., 0., 2.]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3ME").prod(skipna=False) + Size: 24B + array([ 1., 0., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.resample(time="3ME").prod(skipna=True, min_count=2) + Size: 24B + array([nan, 0., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -7926,78 +9625,98 @@ def sum( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - DataArray.sum - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").sum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3ME").sum(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> da.resample(time="3ME").sum(skipna=True, min_count=2) + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").sum() + Size: 24B + array([1., 5., 2.]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3ME").sum(skipna=False) + Size: 24B + array([ 1., 5., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.resample(time="3ME").sum(skipna=True, min_count=2) + Size: 24B + array([nan, 5., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -8033,75 +9752,95 @@ def std( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - DataArray.std - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").std() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3ME").std(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> da.resample(time="3ME").std(skipna=True, ddof=1) + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").std() + Size: 24B + array([0. , 1.24721913, 0. ]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3ME").std(skipna=False) + Size: 24B + array([0. , 1.24721913, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.resample(time="3ME").std(skipna=True, ddof=1) + Size: 24B + array([ nan, 1.52752523, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -8137,75 +9876,95 @@ def var( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - DataArray.var - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").var() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3ME").var(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> da.resample(time="3ME").var(skipna=True, ddof=1) + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").var() + Size: 24B + array([0. , 1.55555556, 0. ]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3ME").var(skipna=False) + Size: 24B + array([0. , 1.55555556, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.resample(time="3ME").var(skipna=True, ddof=1) + Size: 24B + array([ nan, 2.33333333, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -8240,68 +9999,84 @@ def median( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - DataArray.median - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").median() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3ME").median(skipna=False) + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").median() + Size: 24B + array([1., 2., 2.]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3ME").median(skipna=False) + Size: 24B + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.median, @@ -8320,65 +10095,73 @@ def nunique( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - DataArray.nunique - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").nunique() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3ME").nunique(skipna=False) + Reduce this DataArray's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + DataArray.nunique + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").nunique() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3ME").nunique(skipna=False) """ if ( flox_available @@ -8411,73 +10194,91 @@ def cumsum( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - DataArray.cumsum - DataArray.cumulative - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").cumsum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3ME").cumsum(skipna=False) + Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + DataArray.cumsum + DataArray.cumulative + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").cumsum() + Size: 48B + array([1., 2., 5., 5., 2., 2.]) + Coordinates: + labels (time) >> da.resample(time="3ME").cumsum(skipna=False) + Size: 48B + array([ 1., 2., 5., 5., 2., nan]) + Coordinates: + labels (time) DataArray: """ - Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - DataArray.cumprod - DataArray.cumulative - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - >>> da.resample(time="3ME").cumprod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3ME").cumprod(skipna=False) + Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + DataArray.cumprod + DataArray.cumulative + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3ME").cumprod() + Size: 48B + array([1., 2., 6., 0., 2., 2.]) + Coordinates: + labels (time) >> da.resample(time="3ME").cumprod(skipna=False) + Size: 48B + array([ 1., 2., 6., 0., 2., nan]) + Coordinates: + labels (time) Self: """ - Reduce this NamedArray's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - Dataset.count - DataArray.count - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na - - >>> na.count() + Reduce this NamedArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.count() + Size: 8B + array(5) """ return self.reduce( duck_array_ops.count, @@ -77,42 +81,46 @@ def all( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - DataArray.all - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray( - ... "x", np.array([True, True, True, True, True, False], dtype=bool) - ... ) - >>> na - - >>> na.all() + Reduce this NamedArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", np.array([True, True, True, True, True, False], dtype=bool) + ... ) + >>> na + Size: 6B + array([ True, True, True, True, True, False]) + + >>> na.all() + Size: 1B + array(False) """ return self.reduce( duck_array_ops.array_all, @@ -126,42 +134,46 @@ def any( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - DataArray.any - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray( - ... "x", np.array([True, True, True, True, True, False], dtype=bool) - ... ) - >>> na - - >>> na.any() + Reduce this NamedArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", np.array([True, True, True, True, True, False], dtype=bool) + ... ) + >>> na + Size: 6B + array([ True, True, True, True, True, False]) + + >>> na.any() + Size: 1B + array(True) """ return self.reduce( duck_array_ops.array_any, @@ -177,49 +189,55 @@ def max( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - DataArray.max - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na - - >>> na.max() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> na.max(skipna=False) + Reduce this NamedArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.max() + Size: 8B + array(3.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.max(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.max, @@ -236,49 +254,55 @@ def min( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - DataArray.min - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na - - >>> na.min() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> na.min(skipna=False) + Reduce this NamedArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.min() + Size: 8B + array(0.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.min(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.min, @@ -295,53 +319,59 @@ def mean( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - DataArray.mean - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na - - >>> na.mean() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> na.mean(skipna=False) + Reduce this NamedArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.mean() + Size: 8B + array(1.6) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.mean(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.mean, @@ -359,63 +389,71 @@ def prod( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - DataArray.prod - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na - - >>> na.prod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> na.prod(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> na.prod(skipna=True, min_count=2) + Reduce this NamedArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.prod() + Size: 8B + array(0.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.prod(skipna=False) + Size: 8B + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> na.prod(skipna=True, min_count=2) + Size: 8B + array(0.) """ return self.reduce( duck_array_ops.prod, @@ -434,63 +472,71 @@ def sum( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - DataArray.sum - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na - - >>> na.sum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> na.sum(skipna=False) - - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> na.sum(skipna=True, min_count=2) + Reduce this NamedArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.sum() + Size: 8B + array(8.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.sum(skipna=False) + Size: 8B + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> na.sum(skipna=True, min_count=2) + Size: 8B + array(8.) """ return self.reduce( duck_array_ops.sum, @@ -509,60 +555,68 @@ def std( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - DataArray.std - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na - - >>> na.std() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> na.std(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> na.std(skipna=True, ddof=1) + Reduce this NamedArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.std() + Size: 8B + array(1.0198039) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.std(skipna=False) + Size: 8B + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> na.std(skipna=True, ddof=1) + Size: 8B + array(1.14017543) """ return self.reduce( duck_array_ops.std, @@ -581,60 +635,68 @@ def var( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - DataArray.var - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na - - >>> na.var() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> na.var(skipna=False) - - Specify ``ddof=1`` for an unbiased estimate. - - >>> na.var(skipna=True, ddof=1) + Reduce this NamedArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.var() + Size: 8B + array(1.04) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.var(skipna=False) + Size: 8B + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> na.var(skipna=True, ddof=1) + Size: 8B + array(1.3) """ return self.reduce( duck_array_ops.var, @@ -652,53 +714,59 @@ def median( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - DataArray.median - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na - - >>> na.median() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> na.median(skipna=False) + Reduce this NamedArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.median() + Size: 8B + array(2.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.median(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.median, @@ -715,48 +783,54 @@ def nunique( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - Dataset.nunique - DataArray.nunique - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na - - >>> na.nunique() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> na.nunique(skipna=False) + Reduce this NamedArray's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + DataArray.nunique + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.nunique() + Size: 8B + array(5) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.nunique(skipna=False) + Size: 8B + array(5) """ return self.reduce( duck_array_ops.nunique, @@ -773,58 +847,64 @@ def cumsum( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - Dataset.cumsum - DataArray.cumsum - NamedArray.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na - - >>> na.cumsum() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> na.cumsum(skipna=False) + Reduce this NamedArray's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + DataArray.cumsum + NamedArray.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.cumsum() + Size: 48B + array([1., 3., 6., 6., 8., 8.]) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.cumsum(skipna=False) + Size: 48B + array([ 1., 3., 6., 6., 8., nan]) """ return self.reduce( duck_array_ops.cumsum, @@ -841,58 +921,64 @@ def cumprod( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - Dataset.cumprod - DataArray.cumprod - NamedArray.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na - - >>> na.cumprod() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> na.cumprod(skipna=False) + Reduce this NamedArray's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + DataArray.cumprod + NamedArray.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na + Size: 48B + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.cumprod() + Size: 48B + array([1., 2., 6., 0., 0., 0.]) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.cumprod(skipna=False) + Size: 48B + array([ 1., 2., 6., 0., 0., nan]) """ return self.reduce( duck_array_ops.cumprod, From 82e481675edf003f84b36d116c9875c1b20b5039 Mon Sep 17 00:00:00 2001 From: Ewan Short Date: Fri, 21 Nov 2025 21:24:49 +1100 Subject: [PATCH 5/8] fix doctests --- xarray/core/_aggregations.py | 12690 ++++++++++++------------- xarray/namedarray/_aggregations.py | 1290 +-- xarray/util/generate_aggregations.py | 41 +- 3 files changed, 6901 insertions(+), 7120 deletions(-) diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index 9e2393cb2df..fc037be15ee 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -42,57 +42,52 @@ def count( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - Dataset.count - DataArray.count - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -102,12 +97,13 @@ def count( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.count() - - Group: / - Dimensions: () - Data variables: - foo int64 8B 5 + + >>> dt.count() + + Group: / + Dimensions: () + Data variables: + foo int64 8B 5 """ return self.reduce( duck_array_ops.count, @@ -125,63 +121,57 @@ def all( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - DataArray.all - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=( - ... "time", - ... np.array( - ... [True, True, True, True, True, False], - ... dtype=bool, - ... ), - ... ) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=( + ... "time", + ... np.array([True, True, True, True, True, False], dtype=bool), + ... ) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -191,12 +181,13 @@ def all( Data variables: foo (time) bool 6B True True True True True False - >>> dt.all() - - Group: / - Dimensions: () - Data variables: - foo bool 1B False + + >>> dt.all() + + Group: / + Dimensions: () + Data variables: + foo bool 1B False """ return self.reduce( duck_array_ops.array_all, @@ -214,63 +205,57 @@ def any( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - DataArray.any - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=( - ... "time", - ... np.array( - ... [True, True, True, True, True, False], - ... dtype=bool, - ... ), - ... ) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict( + ... foo=( + ... "time", + ... np.array([True, True, True, True, True, False], dtype=bool), + ... ) + ... ), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -280,12 +265,13 @@ def any( Data variables: foo (time) bool 6B True True True True True False - >>> dt.any() - - Group: / - Dimensions: () - Data variables: - foo bool 1B True + + >>> dt.any() + + Group: / + Dimensions: () + Data variables: + foo bool 1B True """ return self.reduce( duck_array_ops.array_any, @@ -304,62 +290,57 @@ def max( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - DataArray.max - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -369,21 +350,22 @@ def max( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.max() + + >>> dt.max() Group: / Dimensions: () Data variables: foo float64 8B 3.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> dt.max(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo float64 8B nan + >>> dt.max(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo float64 8B nan """ return self.reduce( duck_array_ops.max, @@ -403,62 +385,57 @@ def min( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - DataArray.min - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -468,21 +445,22 @@ def min( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.min() + + >>> dt.min() Group: / Dimensions: () Data variables: foo float64 8B 0.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> dt.min(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo float64 8B nan + >>> dt.min(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo float64 8B nan """ return self.reduce( duck_array_ops.min, @@ -502,66 +480,61 @@ def mean( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - DataArray.mean - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -571,21 +544,22 @@ def mean( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.mean() + + >>> dt.mean() Group: / Dimensions: () Data variables: foo float64 8B 1.6 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> dt.mean(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo float64 8B nan + >>> dt.mean(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo float64 8B nan """ return self.reduce( duck_array_ops.mean, @@ -606,72 +580,67 @@ def prod( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - DataArray.prod - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -681,30 +650,31 @@ def prod( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.prod() + + >>> dt.prod() Group: / Dimensions: () Data variables: foo float64 8B 0.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> dt.prod(skipna=False) + >>> dt.prod(skipna=False) Group: / Dimensions: () Data variables: foo float64 8B nan - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> dt.prod(skipna=True, min_count=2) - - Group: / - Dimensions: () - Data variables: - foo float64 8B 0.0 + >>> dt.prod(skipna=True, min_count=2) + + Group: / + Dimensions: () + Data variables: + foo float64 8B 0.0 """ return self.reduce( duck_array_ops.prod, @@ -726,72 +696,67 @@ def sum( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - DataArray.sum - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -801,30 +766,31 @@ def sum( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.sum() + + >>> dt.sum() Group: / Dimensions: () Data variables: foo float64 8B 8.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> dt.sum(skipna=False) + >>> dt.sum(skipna=False) Group: / Dimensions: () Data variables: foo float64 8B nan - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> dt.sum(skipna=True, min_count=2) - - Group: / - Dimensions: () - Data variables: - foo float64 8B 8.0 + >>> dt.sum(skipna=True, min_count=2) + + Group: / + Dimensions: () + Data variables: + foo float64 8B 8.0 """ return self.reduce( duck_array_ops.sum, @@ -846,69 +812,64 @@ def std( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - DataArray.std - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -918,30 +879,31 @@ def std( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.std() + + >>> dt.std() Group: / Dimensions: () Data variables: foo float64 8B 1.02 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> dt.std(skipna=False) + >>> dt.std(skipna=False) Group: / Dimensions: () Data variables: foo float64 8B nan - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> dt.std(skipna=True, ddof=1) - - Group: / - Dimensions: () - Data variables: - foo float64 8B 1.14 + >>> dt.std(skipna=True, ddof=1) + + Group: / + Dimensions: () + Data variables: + foo float64 8B 1.14 """ return self.reduce( duck_array_ops.std, @@ -963,69 +925,64 @@ def var( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - DataArray.var - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -1035,30 +992,31 @@ def var( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.var() + + >>> dt.var() Group: / Dimensions: () Data variables: foo float64 8B 1.04 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> dt.var(skipna=False) + >>> dt.var(skipna=False) Group: / Dimensions: () Data variables: foo float64 8B nan - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> dt.var(skipna=True, ddof=1) - - Group: / - Dimensions: () - Data variables: - foo float64 8B 1.3 + >>> dt.var(skipna=True, ddof=1) + + Group: / + Dimensions: () + Data variables: + foo float64 8B 1.3 """ return self.reduce( duck_array_ops.var, @@ -1079,66 +1037,61 @@ def median( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - DataArray.median - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -1148,21 +1101,22 @@ def median( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.median() + + >>> dt.median() Group: / Dimensions: () Data variables: foo float64 8B 2.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> dt.median(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo float64 8B nan + >>> dt.median(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo float64 8B nan """ return self.reduce( duck_array_ops.median, @@ -1182,61 +1136,56 @@ def nunique( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - Dataset.nunique - DataArray.nunique - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + DataArray.nunique + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -1246,21 +1195,22 @@ def nunique( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.nunique() + + >>> dt.nunique() Group: / Dimensions: () Data variables: foo int64 8B 5 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> dt.nunique(skipna=False) - - Group: / - Dimensions: () - Data variables: - foo int64 8B 5 + >>> dt.nunique(skipna=False) + + Group: / + Dimensions: () + Data variables: + foo int64 8B 5 """ return self.reduce( duck_array_ops.nunique, @@ -1280,71 +1230,66 @@ def cumsum( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - Dataset.cumsum - DataArray.cumsum - DataTree.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + DataArray.cumsum + DataTree.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -1354,7 +1299,8 @@ def cumsum( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.cumsum() + + >>> dt.cumsum() Group: / Dimensions: (time: 6) @@ -1362,15 +1308,15 @@ def cumsum( Data variables: foo (time) float64 48B 1.0 3.0 6.0 6.0 8.0 8.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> dt.cumsum(skipna=False) - - Group: / - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - foo (time) float64 48B 1.0 3.0 6.0 6.0 8.0 nan + >>> dt.cumsum(skipna=False) + + Group: / + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + foo (time) float64 48B 1.0 3.0 6.0 6.0 8.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -1390,71 +1336,66 @@ def cumprod( **kwargs: Any, ) -> Self: """ - Reduce this DataTree's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataTree - New DataTree with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - Dataset.cumprod - DataArray.cumprod - DataTree.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> dt = xr.DataTree( - ... xr.Dataset( - ... data_vars=dict( - ... foo=("time", np.array([1, 2, 3, 0, 2, np.nan])) - ... ), - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=( - ... "time", - ... np.array(["a", "b", "c", "c", "b", "a"]), - ... ), - ... ), - ... ), - ... ) - >>> dt + Reduce this DataTree's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataTree + New DataTree with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + DataArray.cumprod + DataTree.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> dt = xr.DataTree( + ... xr.Dataset( + ... data_vars=dict(foo=("time", np.array([1, 2, 3, 0, 2, np.nan]))), + ... coords=dict( + ... time=( + ... "time", + ... pd.date_range("2001-01-01", freq="ME", periods=6), + ... ), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ), + ... ) + >>> dt Group: / Dimensions: (time: 6) @@ -1464,7 +1405,8 @@ def cumprod( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.cumprod() + + >>> dt.cumprod() Group: / Dimensions: (time: 6) @@ -1472,15 +1414,15 @@ def cumprod( Data variables: foo (time) float64 48B 1.0 2.0 6.0 0.0 0.0 0.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> dt.cumprod(skipna=False) - - Group: / - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - foo (time) float64 48B 1.0 2.0 6.0 0.0 0.0 nan + >>> dt.cumprod(skipna=False) + + Group: / + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + foo (time) float64 48B 1.0 2.0 6.0 0.0 0.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -1515,51 +1457,48 @@ def count( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - DataArray.count - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -1568,11 +1507,12 @@ def count( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.count() - Size: 8B - Dimensions: () - Data variables: - da int64 8B 5 + + >>> ds.count() + Size: 8B + Dimensions: () + Data variables: + da int64 8B 5 """ return self.reduce( duck_array_ops.count, @@ -1590,51 +1530,48 @@ def all( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - DataArray.all - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 78B Dimensions: (time: 6) Coordinates: @@ -1643,11 +1580,12 @@ def all( Data variables: da (time) bool 6B True True True True True False - >>> ds.all() - Size: 1B - Dimensions: () - Data variables: - da bool 1B False + + >>> ds.all() + Size: 1B + Dimensions: () + Data variables: + da bool 1B False """ return self.reduce( duck_array_ops.array_all, @@ -1665,51 +1603,48 @@ def any( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - DataArray.any - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 78B Dimensions: (time: 6) Coordinates: @@ -1718,11 +1653,12 @@ def any( Data variables: da (time) bool 6B True True True True True False - >>> ds.any() - Size: 1B - Dimensions: () - Data variables: - da bool 1B True + + >>> ds.any() + Size: 1B + Dimensions: () + Data variables: + da bool 1B True """ return self.reduce( duck_array_ops.array_any, @@ -1741,56 +1677,53 @@ def max( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - DataArray.max - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -1799,19 +1732,20 @@ def max( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.max() + + >>> ds.max() Size: 8B Dimensions: () Data variables: da float64 8B 3.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.max(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan + >>> ds.max(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan """ return self.reduce( duck_array_ops.max, @@ -1831,56 +1765,53 @@ def min( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - DataArray.min - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -1889,19 +1820,20 @@ def min( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.min() + + >>> ds.min() Size: 8B Dimensions: () Data variables: da float64 8B 0.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.min(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan + >>> ds.min(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan """ return self.reduce( duck_array_ops.min, @@ -1921,60 +1853,57 @@ def mean( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - DataArray.mean - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -1983,19 +1912,20 @@ def mean( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.mean() + + >>> ds.mean() Size: 8B Dimensions: () Data variables: da float64 8B 1.6 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.mean(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan + >>> ds.mean(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan """ return self.reduce( duck_array_ops.mean, @@ -2016,66 +1946,63 @@ def prod( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - DataArray.prod - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -2084,27 +2011,28 @@ def prod( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.prod() + + >>> ds.prod() Size: 8B Dimensions: () Data variables: da float64 8B 0.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.prod(skipna=False) + >>> ds.prod(skipna=False) Size: 8B Dimensions: () Data variables: da float64 8B nan - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> ds.prod(skipna=True, min_count=2) - Size: 8B - Dimensions: () - Data variables: - da float64 8B 0.0 + >>> ds.prod(skipna=True, min_count=2) + Size: 8B + Dimensions: () + Data variables: + da float64 8B 0.0 """ return self.reduce( duck_array_ops.prod, @@ -2126,66 +2054,63 @@ def sum( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - DataArray.sum - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -2194,27 +2119,28 @@ def sum( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.sum() + + >>> ds.sum() Size: 8B Dimensions: () Data variables: da float64 8B 8.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.sum(skipna=False) + >>> ds.sum(skipna=False) Size: 8B Dimensions: () Data variables: da float64 8B nan - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> ds.sum(skipna=True, min_count=2) - Size: 8B - Dimensions: () - Data variables: - da float64 8B 8.0 + >>> ds.sum(skipna=True, min_count=2) + Size: 8B + Dimensions: () + Data variables: + da float64 8B 8.0 """ return self.reduce( duck_array_ops.sum, @@ -2236,63 +2162,60 @@ def std( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - DataArray.std - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -2301,27 +2224,28 @@ def std( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.std() + + >>> ds.std() Size: 8B Dimensions: () Data variables: da float64 8B 1.02 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.std(skipna=False) + >>> ds.std(skipna=False) Size: 8B Dimensions: () Data variables: da float64 8B nan - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> ds.std(skipna=True, ddof=1) - Size: 8B - Dimensions: () - Data variables: - da float64 8B 1.14 + >>> ds.std(skipna=True, ddof=1) + Size: 8B + Dimensions: () + Data variables: + da float64 8B 1.14 """ return self.reduce( duck_array_ops.std, @@ -2343,63 +2267,60 @@ def var( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - DataArray.var - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -2408,27 +2329,28 @@ def var( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.var() + + >>> ds.var() Size: 8B Dimensions: () Data variables: da float64 8B 1.04 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.var(skipna=False) + >>> ds.var(skipna=False) Size: 8B Dimensions: () Data variables: da float64 8B nan - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> ds.var(skipna=True, ddof=1) - Size: 8B - Dimensions: () - Data variables: - da float64 8B 1.3 + >>> ds.var(skipna=True, ddof=1) + Size: 8B + Dimensions: () + Data variables: + da float64 8B 1.3 """ return self.reduce( duck_array_ops.var, @@ -2449,60 +2371,57 @@ def median( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - DataArray.median - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -2511,19 +2430,20 @@ def median( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.median() + + >>> ds.median() Size: 8B Dimensions: () Data variables: da float64 8B 2.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.median(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da float64 8B nan + >>> ds.median(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da float64 8B nan """ return self.reduce( duck_array_ops.median, @@ -2543,55 +2463,52 @@ def nunique( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - DataArray.nunique - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + DataArray.nunique + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -2600,19 +2517,20 @@ def nunique( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.nunique() + + >>> ds.nunique() Size: 8B Dimensions: () Data variables: da int64 8B 5 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.nunique(skipna=False) - Size: 8B - Dimensions: () - Data variables: - da int64 8B 5 + >>> ds.nunique(skipna=False) + Size: 8B + Dimensions: () + Data variables: + da int64 8B 5 """ return self.reduce( duck_array_ops.nunique, @@ -2632,65 +2550,62 @@ def cumsum( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - DataArray.cumsum - Dataset.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + DataArray.cumsum + Dataset.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -2699,21 +2614,22 @@ def cumsum( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.cumsum() + + >>> ds.cumsum() Size: 48B Dimensions: (time: 6) Dimensions without coordinates: time Data variables: da (time) float64 48B 1.0 3.0 6.0 6.0 8.0 8.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.cumsum(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 3.0 6.0 6.0 8.0 nan + >>> ds.cumsum(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 3.0 6.0 6.0 8.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -2733,65 +2649,62 @@ def cumprod( **kwargs: Any, ) -> Self: """ - Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - DataArray.cumprod - Dataset.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + DataArray.cumprod + Dataset.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -2800,21 +2713,22 @@ def cumprod( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.cumprod() + + >>> ds.cumprod() Size: 48B Dimensions: (time: 6) Dimensions without coordinates: time Data variables: da (time) float64 48B 1.0 2.0 6.0 0.0 0.0 0.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.cumprod(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 6.0 0.0 0.0 nan + >>> ds.cumprod(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 6.0 0.0 0.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -2849,59 +2763,57 @@ def count( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - Dataset.count - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.count() - Size: 8B - array(5) + + >>> da.count() + Size: 8B + array(5) """ return self.reduce( duck_array_ops.count, @@ -2918,59 +2830,57 @@ def all( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 6B array([ True, True, True, True, True, False]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.all() - Size: 1B - array(False) + + >>> da.all() + Size: 1B + array(False) """ return self.reduce( duck_array_ops.array_all, @@ -2987,59 +2897,57 @@ def any( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 6B array([ True, True, True, True, True, False]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.any() - Size: 1B - array(True) + + >>> da.any() + Size: 1B + array(True) """ return self.reduce( duck_array_ops.array_any, @@ -3057,70 +2965,68 @@ def max( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.max() + + >>> da.max() Size: 8B array(3.) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.max(skipna=False) - Size: 8B - array(nan) + >>> da.max(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.max, @@ -3139,70 +3045,68 @@ def min( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.min() + + >>> da.min() Size: 8B array(0.) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.min(skipna=False) - Size: 8B - array(nan) + >>> da.min(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.min, @@ -3221,74 +3125,72 @@ def mean( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.mean() + + >>> da.mean() Size: 8B array(1.6) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.mean(skipna=False) - Size: 8B - array(nan) + >>> da.mean(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.mean, @@ -3308,86 +3210,84 @@ def prod( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.prod() + + >>> da.prod() Size: 8B array(0.) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.prod(skipna=False) + >>> da.prod(skipna=False) Size: 8B array(nan) - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> da.prod(skipna=True, min_count=2) - Size: 8B - array(0.) + >>> da.prod(skipna=True, min_count=2) + Size: 8B + array(0.) """ return self.reduce( duck_array_ops.prod, @@ -3408,86 +3308,84 @@ def sum( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.sum() + + >>> da.sum() Size: 8B array(8.) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.sum(skipna=False) + >>> da.sum(skipna=False) Size: 8B array(nan) - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> da.sum(skipna=True, min_count=2) - Size: 8B - array(8.) + >>> da.sum(skipna=True, min_count=2) + Size: 8B + array(8.) """ return self.reduce( duck_array_ops.sum, @@ -3508,83 +3406,81 @@ def std( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.std() + + >>> da.std() Size: 8B array(1.0198039) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.std(skipna=False) + >>> da.std(skipna=False) Size: 8B array(nan) - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> da.std(skipna=True, ddof=1) - Size: 8B - array(1.14017543) + >>> da.std(skipna=True, ddof=1) + Size: 8B + array(1.14017543) """ return self.reduce( duck_array_ops.std, @@ -3605,83 +3501,81 @@ def var( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.var() + + >>> da.var() Size: 8B array(1.04) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.var(skipna=False) + >>> da.var(skipna=False) Size: 8B array(nan) - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> da.var(skipna=True, ddof=1) - Size: 8B - array(1.3) + >>> da.var(skipna=True, ddof=1) + Size: 8B + array(1.3) """ return self.reduce( duck_array_ops.var, @@ -3701,74 +3595,72 @@ def median( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.median() + + >>> da.median() Size: 8B array(2.) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.median(skipna=False) - Size: 8B - array(nan) + >>> da.median(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.median, @@ -3787,69 +3679,67 @@ def nunique( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - Dataset.nunique - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.nunique() + + >>> da.nunique() Size: 8B array(5) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.nunique(skipna=False) - Size: 8B - array(5) + >>> da.nunique(skipna=False) + Size: 8B + array(5) """ return self.reduce( duck_array_ops.nunique, @@ -3868,85 +3758,83 @@ def cumsum( **kwargs: Any, ) -> Self: """ - Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - Dataset.cumsum - DataArray.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + DataArray.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.cumsum() + + >>> da.cumsum() Size: 48B array([1., 3., 6., 6., 8., 8.]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.cumsum(skipna=False) - Size: 48B - array([ 1., 3., 6., 6., 8., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.cumsum(skipna=False) + Size: 48B + array([ 1., 3., 6., 6., 8., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Self: """ - Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - Dataset.cumprod - DataArray.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + DataArray.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.cumprod() + + >>> da.cumprod() Size: 48B array([1., 2., 6., 0., 0., 0.]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.cumprod(skipna=False) - Size: 48B - array([ 1., 2., 6., 0., 0., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.cumprod(skipna=False) + Size: 48B + array([ 1., 2., 6., 0., 0., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Dataset: """ - Reduce this Dataset's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - Dataset.count - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -4145,13 +4028,14 @@ def count( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").count() - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) int64 24B 1 2 2 + + >>> ds.groupby("labels").count() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) int64 24B 1 2 2 """ if ( flox_available @@ -4183,59 +4067,56 @@ def all( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 78B Dimensions: (time: 6) Coordinates: @@ -4244,13 +4125,14 @@ def all( Data variables: da (time) bool 6B True True True True True False - >>> ds.groupby("labels").all() - Size: 27B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) bool 3B False True True + + >>> ds.groupby("labels").all() + Size: 27B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) bool 3B False True True """ if ( flox_available @@ -4282,59 +4164,56 @@ def any( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 78B Dimensions: (time: 6) Coordinates: @@ -4343,13 +4222,14 @@ def any( Data variables: da (time) bool 6B True True True True True False - >>> ds.groupby("labels").any() - Size: 27B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) bool 3B True True True + + >>> ds.groupby("labels").any() + Size: 27B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) bool 3B True True True """ if ( flox_available @@ -4382,64 +4262,61 @@ def max( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -4448,7 +4325,8 @@ def max( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").max() + + >>> ds.groupby("labels").max() Size: 48B Dimensions: (labels: 3) Coordinates: @@ -4456,15 +4334,15 @@ def max( Data variables: da (labels) float64 24B 1.0 2.0 3.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").max(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 2.0 3.0 + >>> ds.groupby("labels").max(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 2.0 3.0 """ if ( flox_available @@ -4499,64 +4377,61 @@ def min( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -4565,7 +4440,8 @@ def min( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").min() + + >>> ds.groupby("labels").min() Size: 48B Dimensions: (labels: 3) Coordinates: @@ -4573,15 +4449,15 @@ def min( Data variables: da (labels) float64 24B 1.0 2.0 0.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").min(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 2.0 0.0 + >>> ds.groupby("labels").min(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 2.0 0.0 """ if ( flox_available @@ -4616,66 +4492,63 @@ def mean( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -4684,7 +4557,8 @@ def mean( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").mean() + + >>> ds.groupby("labels").mean() Size: 48B Dimensions: (labels: 3) Coordinates: @@ -4692,15 +4566,15 @@ def mean( Data variables: da (labels) float64 24B 1.0 2.0 1.5 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").mean(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 2.0 1.5 + >>> ds.groupby("labels").mean(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 2.0 1.5 """ if ( flox_available @@ -4736,72 +4610,69 @@ def prod( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -4810,7 +4681,8 @@ def prod( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").prod() + + >>> ds.groupby("labels").prod() Size: 48B Dimensions: (labels: 3) Coordinates: @@ -4818,9 +4690,9 @@ def prod( Data variables: da (labels) float64 24B 1.0 4.0 0.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").prod(skipna=False) + >>> ds.groupby("labels").prod(skipna=False) Size: 48B Dimensions: (labels: 3) Coordinates: @@ -4828,15 +4700,15 @@ def prod( Data variables: da (labels) float64 24B nan 4.0 0.0 - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> ds.groupby("labels").prod(skipna=True, min_count=2) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 4.0 0.0 + >>> ds.groupby("labels").prod(skipna=True, min_count=2) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 4.0 0.0 """ if ( flox_available @@ -4874,72 +4746,69 @@ def sum( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -4948,7 +4817,8 @@ def sum( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").sum() + + >>> ds.groupby("labels").sum() Size: 48B Dimensions: (labels: 3) Coordinates: @@ -4956,9 +4826,9 @@ def sum( Data variables: da (labels) float64 24B 1.0 4.0 3.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").sum(skipna=False) + >>> ds.groupby("labels").sum(skipna=False) Size: 48B Dimensions: (labels: 3) Coordinates: @@ -4966,15 +4836,15 @@ def sum( Data variables: da (labels) float64 24B nan 4.0 3.0 - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> ds.groupby("labels").sum(skipna=True, min_count=2) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 4.0 3.0 + >>> ds.groupby("labels").sum(skipna=True, min_count=2) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 4.0 3.0 """ if ( flox_available @@ -5012,69 +4882,66 @@ def std( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -5083,7 +4950,8 @@ def std( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").std() + + >>> ds.groupby("labels").std() Size: 48B Dimensions: (labels: 3) Coordinates: @@ -5091,9 +4959,9 @@ def std( Data variables: da (labels) float64 24B 0.0 0.0 1.5 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").std(skipna=False) + >>> ds.groupby("labels").std(skipna=False) Size: 48B Dimensions: (labels: 3) Coordinates: @@ -5101,15 +4969,15 @@ def std( Data variables: da (labels) float64 24B nan 0.0 1.5 - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> ds.groupby("labels").std(skipna=True, ddof=1) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 0.0 2.121 + >>> ds.groupby("labels").std(skipna=True, ddof=1) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 0.0 2.121 """ if ( flox_available @@ -5147,69 +5015,66 @@ def var( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -5218,7 +5083,8 @@ def var( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").var() + + >>> ds.groupby("labels").var() Size: 48B Dimensions: (labels: 3) Coordinates: @@ -5226,9 +5092,9 @@ def var( Data variables: da (labels) float64 24B 0.0 0.0 2.25 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").var(skipna=False) + >>> ds.groupby("labels").var(skipna=False) Size: 48B Dimensions: (labels: 3) Coordinates: @@ -5236,15 +5102,15 @@ def var( Data variables: da (labels) float64 24B nan 0.0 2.25 - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> ds.groupby("labels").var(skipna=True, ddof=1) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 0.0 4.5 + >>> ds.groupby("labels").var(skipna=True, ddof=1) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 0.0 4.5 """ if ( flox_available @@ -5281,66 +5147,63 @@ def median( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -5349,7 +5212,8 @@ def median( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").median() + + >>> ds.groupby("labels").median() Size: 48B Dimensions: (labels: 3) Coordinates: @@ -5357,15 +5221,15 @@ def median( Data variables: da (labels) float64 24B 1.0 2.0 1.5 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").median(skipna=False) - Size: 48B - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' - Data variables: - da (labels) float64 24B nan 2.0 1.5 + >>> ds.groupby("labels").median(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) float64 24B nan 2.0 1.5 """ return self.reduce( duck_array_ops.median, @@ -5385,63 +5249,60 @@ def nunique( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - Dataset.nunique - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -5450,35 +5311,33 @@ def nunique( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").nunique() - Use ``skipna`` to control whether NaNs are ignored. + >>> ds.groupby("labels").nunique() + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) int64 24B 2 1 2 - >>> ds.groupby("labels").nunique(skipna=False) - """ - if ( - flox_available - and OPTIONS["use_flox"] - and contains_only_chunked_or_numpy(self._obj) - ): - return self._flox_reduce( - func="nunique", - dim=dim, - skipna=skipna, - numeric_only=False, - # fill_value=fill_value, - keep_attrs=keep_attrs, - **kwargs, - ) - else: - return self.reduce( - duck_array_ops.nunique, - dim=dim, - skipna=skipna, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").nunique(skipna=False) + Size: 48B + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) int64 24B 2 1 2 + """ + return self.reduce( + duck_array_ops.nunique, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def cumsum( self, @@ -5489,71 +5348,68 @@ def cumsum( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - Dataset.cumsum - Dataset.cumulative - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + Dataset.cumulative + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -5562,21 +5418,22 @@ def cumsum( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").cumsum() + + >>> ds.groupby("labels").cumsum() Size: 48B Dimensions: (time: 6) Dimensions without coordinates: time Data variables: da (time) float64 48B 1.0 2.0 3.0 3.0 4.0 1.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").cumsum(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 3.0 3.0 4.0 nan + >>> ds.groupby("labels").cumsum(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 3.0 3.0 4.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -5596,71 +5453,68 @@ def cumprod( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - Dataset.cumprod - Dataset.cumulative - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + Dataset.cumulative + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -5669,21 +5523,22 @@ def cumprod( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").cumprod() + + >>> ds.groupby("labels").cumprod() Size: 48B Dimensions: (time: 6) Dimensions without coordinates: time Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 4.0 1.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").cumprod(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 3.0 0.0 4.0 nan + >>> ds.groupby("labels").cumprod(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 3.0 0.0 4.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -5725,59 +5580,56 @@ def count( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - Dataset.count - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -5786,13 +5638,14 @@ def count( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").count() - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) int64 24B 1 3 1 + + >>> ds.resample(time="3ME").count() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 24B 1 3 1 """ if ( flox_available @@ -5824,59 +5677,56 @@ def all( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 78B Dimensions: (time: 6) Coordinates: @@ -5885,13 +5735,14 @@ def all( Data variables: da (time) bool 6B True True True True True False - >>> ds.resample(time="3ME").all() - Size: 27B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool 3B True True False + + >>> ds.resample(time="3ME").all() + Size: 27B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool 3B True True False """ if ( flox_available @@ -5923,59 +5774,56 @@ def any( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 78B Dimensions: (time: 6) Coordinates: @@ -5984,13 +5832,14 @@ def any( Data variables: da (time) bool 6B True True True True True False - >>> ds.resample(time="3ME").any() - Size: 27B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool 3B True True True + + >>> ds.resample(time="3ME").any() + Size: 27B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool 3B True True True """ if ( flox_available @@ -6023,64 +5872,61 @@ def max( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -6089,7 +5935,8 @@ def max( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").max() + + >>> ds.resample(time="3ME").max() Size: 48B Dimensions: (time: 3) Coordinates: @@ -6097,15 +5944,15 @@ def max( Data variables: da (time) float64 24B 1.0 3.0 2.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.resample(time="3ME").max(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 3.0 nan + >>> ds.resample(time="3ME").max(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 3.0 nan """ if ( flox_available @@ -6140,64 +5987,61 @@ def min( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -6206,7 +6050,8 @@ def min( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").min() + + >>> ds.resample(time="3ME").min() Size: 48B Dimensions: (time: 3) Coordinates: @@ -6214,15 +6059,15 @@ def min( Data variables: da (time) float64 24B 1.0 0.0 2.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.resample(time="3ME").min(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 0.0 nan + >>> ds.resample(time="3ME").min(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 0.0 nan """ if ( flox_available @@ -6257,66 +6102,63 @@ def mean( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -6325,7 +6167,8 @@ def mean( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").mean() + + >>> ds.resample(time="3ME").mean() Size: 48B Dimensions: (time: 3) Coordinates: @@ -6333,15 +6176,15 @@ def mean( Data variables: da (time) float64 24B 1.0 1.667 2.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.resample(time="3ME").mean(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 1.667 nan + >>> ds.resample(time="3ME").mean(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 1.667 nan """ if ( flox_available @@ -6377,72 +6220,69 @@ def prod( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -6451,7 +6291,8 @@ def prod( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").prod() + + >>> ds.resample(time="3ME").prod() Size: 48B Dimensions: (time: 3) Coordinates: @@ -6459,9 +6300,9 @@ def prod( Data variables: da (time) float64 24B 1.0 0.0 2.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.resample(time="3ME").prod(skipna=False) + >>> ds.resample(time="3ME").prod(skipna=False) Size: 48B Dimensions: (time: 3) Coordinates: @@ -6469,15 +6310,15 @@ def prod( Data variables: da (time) float64 24B 1.0 0.0 nan - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> ds.resample(time="3ME").prod(skipna=True, min_count=2) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B nan 0.0 nan + >>> ds.resample(time="3ME").prod(skipna=True, min_count=2) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B nan 0.0 nan """ if ( flox_available @@ -6515,72 +6356,69 @@ def sum( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -6589,7 +6427,8 @@ def sum( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").sum() + + >>> ds.resample(time="3ME").sum() Size: 48B Dimensions: (time: 3) Coordinates: @@ -6597,9 +6436,9 @@ def sum( Data variables: da (time) float64 24B 1.0 5.0 2.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.resample(time="3ME").sum(skipna=False) + >>> ds.resample(time="3ME").sum(skipna=False) Size: 48B Dimensions: (time: 3) Coordinates: @@ -6607,15 +6446,15 @@ def sum( Data variables: da (time) float64 24B 1.0 5.0 nan - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> ds.resample(time="3ME").sum(skipna=True, min_count=2) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B nan 5.0 nan + >>> ds.resample(time="3ME").sum(skipna=True, min_count=2) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B nan 5.0 nan """ if ( flox_available @@ -6653,69 +6492,66 @@ def std( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -6724,7 +6560,8 @@ def std( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").std() + + >>> ds.resample(time="3ME").std() Size: 48B Dimensions: (time: 3) Coordinates: @@ -6732,9 +6569,9 @@ def std( Data variables: da (time) float64 24B 0.0 1.247 0.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.resample(time="3ME").std(skipna=False) + >>> ds.resample(time="3ME").std(skipna=False) Size: 48B Dimensions: (time: 3) Coordinates: @@ -6742,15 +6579,15 @@ def std( Data variables: da (time) float64 24B 0.0 1.247 nan - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> ds.resample(time="3ME").std(skipna=True, ddof=1) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B nan 1.528 nan + >>> ds.resample(time="3ME").std(skipna=True, ddof=1) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B nan 1.528 nan """ if ( flox_available @@ -6788,69 +6625,66 @@ def var( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -6859,7 +6693,8 @@ def var( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").var() + + >>> ds.resample(time="3ME").var() Size: 48B Dimensions: (time: 3) Coordinates: @@ -6867,9 +6702,9 @@ def var( Data variables: da (time) float64 24B 0.0 1.556 0.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.resample(time="3ME").var(skipna=False) + >>> ds.resample(time="3ME").var(skipna=False) Size: 48B Dimensions: (time: 3) Coordinates: @@ -6877,15 +6712,15 @@ def var( Data variables: da (time) float64 24B 0.0 1.556 nan - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> ds.resample(time="3ME").var(skipna=True, ddof=1) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B nan 2.333 nan + >>> ds.resample(time="3ME").var(skipna=True, ddof=1) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B nan 2.333 nan """ if ( flox_available @@ -6922,66 +6757,63 @@ def median( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -6990,7 +6822,8 @@ def median( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").median() + + >>> ds.resample(time="3ME").median() Size: 48B Dimensions: (time: 3) Coordinates: @@ -6998,15 +6831,15 @@ def median( Data variables: da (time) float64 24B 1.0 2.0 2.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.resample(time="3ME").median(skipna=False) - Size: 48B - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 24B 1.0 2.0 nan + >>> ds.resample(time="3ME").median(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 24B 1.0 2.0 nan """ return self.reduce( duck_array_ops.median, @@ -7026,63 +6859,60 @@ def nunique( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - Dataset.nunique - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -7091,35 +6921,33 @@ def nunique( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").nunique() - Use ``skipna`` to control whether NaNs are ignored. + >>> ds.resample(time="3ME").nunique() + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 24B 1 3 2 - >>> ds.resample(time="3ME").nunique(skipna=False) + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3ME").nunique(skipna=False) + Size: 48B + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 24B 1 3 2 """ - if ( - flox_available - and OPTIONS["use_flox"] - and contains_only_chunked_or_numpy(self._obj) - ): - return self._flox_reduce( - func="nunique", - dim=dim, - skipna=skipna, - numeric_only=False, - # fill_value=fill_value, - keep_attrs=keep_attrs, - **kwargs, - ) - else: - return self.reduce( - duck_array_ops.nunique, - dim=dim, - skipna=skipna, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.nunique, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def cumsum( self, @@ -7130,71 +6958,68 @@ def cumsum( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - Dataset.cumsum - Dataset.cumulative - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + Dataset.cumulative + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -7203,21 +7028,22 @@ def cumsum( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").cumsum() + + >>> ds.resample(time="3ME").cumsum() Size: 48B Dimensions: (time: 6) Dimensions without coordinates: time Data variables: da (time) float64 48B 1.0 2.0 5.0 5.0 2.0 2.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.resample(time="3ME").cumsum(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 5.0 5.0 2.0 nan + >>> ds.resample(time="3ME").cumsum(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 5.0 5.0 2.0 nan """ return self.reduce( duck_array_ops.cumsum, @@ -7237,71 +7063,68 @@ def cumprod( **kwargs: Any, ) -> Dataset: """ - Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : Dataset - New Dataset with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - Dataset.cumprod - Dataset.cumulative - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds + Reduce this Dataset's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + Dataset.cumulative + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds Size: 120B Dimensions: (time: 6) Coordinates: @@ -7310,21 +7133,22 @@ def cumprod( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").cumprod() + + >>> ds.resample(time="3ME").cumprod() Size: 48B Dimensions: (time: 6) Dimensions without coordinates: time Data variables: da (time) float64 48B 1.0 2.0 6.0 0.0 2.0 2.0 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> ds.resample(time="3ME").cumprod(skipna=False) - Size: 48B - Dimensions: (time: 6) - Dimensions without coordinates: time - Data variables: - da (time) float64 48B 1.0 2.0 6.0 0.0 2.0 nan + >>> ds.resample(time="3ME").cumprod(skipna=False) + Size: 48B + Dimensions: (time: 6) + Dimensions without coordinates: time + Data variables: + da (time) float64 48B 1.0 2.0 6.0 0.0 2.0 nan """ return self.reduce( duck_array_ops.cumprod, @@ -7366,69 +7190,67 @@ def count( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - DataArray.count - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + DataArray.count + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").count() - Size: 24B - array([1, 2, 2]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' + + >>> da.groupby("labels").count() + Size: 24B + array([1, 2, 2]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7458,69 +7280,67 @@ def all( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - DataArray.all - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 6B array([ True, True, True, True, True, False]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").all() - Size: 3B - array([False, True, True]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' + + >>> da.groupby("labels").all() + Size: 3B + array([False, True, True]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7550,69 +7370,67 @@ def any( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - DataArray.any - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 6B array([ True, True, True, True, True, False]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").any() - Size: 3B - array([ True, True, True]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' + + >>> da.groupby("labels").any() + Size: 3B + array([ True, True, True]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7643,82 +7461,80 @@ def max( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - DataArray.max - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").max() + + >>> da.groupby("labels").max() Size: 24B array([1., 2., 3.]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.groupby("labels").max(skipna=False) - Size: 24B - array([nan, 2., 3.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' + >>> da.groupby("labels").max(skipna=False) + Size: 24B + array([nan, 2., 3.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7751,82 +7567,80 @@ def min( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - DataArray.min - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").min() + + >>> da.groupby("labels").min() Size: 24B array([1., 2., 0.]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.groupby("labels").min(skipna=False) - Size: 24B - array([nan, 2., 0.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' + >>> da.groupby("labels").min(skipna=False) + Size: 24B + array([nan, 2., 0.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7859,84 +7673,82 @@ def mean( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - DataArray.mean - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").mean() + + >>> da.groupby("labels").mean() Size: 24B array([1. , 2. , 1.5]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.groupby("labels").mean(skipna=False) - Size: 24B - array([nan, 2. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' + >>> da.groupby("labels").mean(skipna=False) + Size: 24B + array([nan, 2. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -7970,98 +7782,96 @@ def prod( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - DataArray.prod - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").prod() + + >>> da.groupby("labels").prod() Size: 24B array([1., 4., 0.]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.groupby("labels").prod(skipna=False) + >>> da.groupby("labels").prod(skipna=False) Size: 24B array([nan, 4., 0.]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> da.groupby("labels").prod(skipna=True, min_count=2) - Size: 24B - array([nan, 4., 0.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' + >>> da.groupby("labels").prod(skipna=True, min_count=2) + Size: 24B + array([nan, 4., 0.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -8097,98 +7907,96 @@ def sum( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - DataArray.sum - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").sum() + + >>> da.groupby("labels").sum() Size: 24B array([1., 4., 3.]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.groupby("labels").sum(skipna=False) + >>> da.groupby("labels").sum(skipna=False) Size: 24B array([nan, 4., 3.]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> da.groupby("labels").sum(skipna=True, min_count=2) - Size: 24B - array([nan, 4., 3.]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' + >>> da.groupby("labels").sum(skipna=True, min_count=2) + Size: 24B + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -8224,95 +8032,93 @@ def std( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - DataArray.std - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").std() + + >>> da.groupby("labels").std() Size: 24B array([0. , 0. , 1.5]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.groupby("labels").std(skipna=False) + >>> da.groupby("labels").std(skipna=False) Size: 24B array([nan, 0. , 1.5]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> da.groupby("labels").std(skipna=True, ddof=1) - Size: 24B - array([ nan, 0. , 2.12132034]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' + >>> da.groupby("labels").std(skipna=True, ddof=1) + Size: 24B + array([ nan, 0. , 2.12132034]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -8348,95 +8154,93 @@ def var( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - DataArray.var - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").var() + + >>> da.groupby("labels").var() Size: 24B array([0. , 0. , 2.25]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.groupby("labels").var(skipna=False) + >>> da.groupby("labels").var(skipna=False) Size: 24B array([ nan, 0. , 2.25]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> da.groupby("labels").var(skipna=True, ddof=1) - Size: 24B - array([nan, 0. , 4.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' + >>> da.groupby("labels").var(skipna=True, ddof=1) + Size: 24B + array([nan, 0. , 4.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ if ( flox_available @@ -8471,84 +8275,82 @@ def median( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - DataArray.median - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").median() + + >>> da.groupby("labels").median() Size: 24B array([1. , 2. , 1.5]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.groupby("labels").median(skipna=False) - Size: 24B - array([nan, 2. , 1.5]) - Coordinates: - * labels (labels) object 24B 'a' 'b' 'c' + >>> da.groupby("labels").median(skipna=False) + Size: 24B + array([nan, 2. , 1.5]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ return self.reduce( duck_array_ops.median, @@ -8567,95 +8369,87 @@ def nunique( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - DataArray.nunique - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + DataArray.nunique + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").nunique() - Use ``skipna`` to control whether NaNs are ignored. + >>> da.groupby("labels").nunique() + Size: 24B + array([2, 1, 2]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. - >>> da.groupby("labels").nunique(skipna=False) + >>> da.groupby("labels").nunique(skipna=False) + Size: 24B + array([2, 1, 2]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' """ - if ( - flox_available - and OPTIONS["use_flox"] - and contains_only_chunked_or_numpy(self._obj) - ): - return self._flox_reduce( - func="nunique", - dim=dim, - skipna=skipna, - # fill_value=fill_value, - keep_attrs=keep_attrs, - **kwargs, - ) - else: - return self.reduce( - duck_array_ops.nunique, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.nunique, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def cumsum( self, @@ -8666,91 +8460,89 @@ def cumsum( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - DataArray.cumsum - DataArray.cumulative - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + DataArray.cumsum + DataArray.cumulative + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").cumsum() + + >>> da.groupby("labels").cumsum() Size: 48B array([1., 2., 3., 3., 4., 1.]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").cumsum(skipna=False) - Size: 48B - array([ 1., 2., 3., 3., 4., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").cumsum(skipna=False) + Size: 48B + array([ 1., 2., 3., 3., 4., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) DataArray: """ - Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - DataArray.cumprod - DataArray.cumulative - :ref:`groupby` - User guide on groupby operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up groupby computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the GroupBy dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + DataArray.cumprod + DataArray.cumulative + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up groupby computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").cumprod() + + >>> da.groupby("labels").cumprod() Size: 48B array([1., 2., 3., 0., 4., 1.]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").cumprod(skipna=False) - Size: 48B - array([ 1., 2., 3., 0., 4., nan]) - Coordinates: - * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").cumprod(skipna=False) + Size: 48B + array([ 1., 2., 3., 0., 4., nan]) + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) DataArray: """ - Reduce this DataArray's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - DataArray.count - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + DataArray.count + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").count() - Size: 24B - array([1, 3, 1]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + >>> da.resample(time="3ME").count() + Size: 24B + array([1, 3, 1]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -8986,69 +8774,67 @@ def all( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - DataArray.all - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 6B array([ True, True, True, True, True, False]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").all() - Size: 3B - array([ True, True, False]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + >>> da.resample(time="3ME").all() + Size: 3B + array([ True, True, False]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -9078,69 +8864,67 @@ def any( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - DataArray.any - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 6B array([ True, True, True, True, True, False]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").any() - Size: 3B - array([ True, True, True]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + >>> da.resample(time="3ME").any() + Size: 3B + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -9171,82 +8955,80 @@ def max( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - DataArray.max - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").max() + + >>> da.resample(time="3ME").max() Size: 24B array([1., 3., 2.]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.resample(time="3ME").max(skipna=False) - Size: 24B - array([ 1., 3., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3ME").max(skipna=False) + Size: 24B + array([ 1., 3., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -9279,82 +9061,80 @@ def min( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - DataArray.min - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").min() + + >>> da.resample(time="3ME").min() Size: 24B array([1., 0., 2.]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.resample(time="3ME").min(skipna=False) - Size: 24B - array([ 1., 0., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3ME").min(skipna=False) + Size: 24B + array([ 1., 0., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -9387,84 +9167,82 @@ def mean( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - DataArray.mean - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").mean() + + >>> da.resample(time="3ME").mean() Size: 24B array([1. , 1.66666667, 2. ]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.resample(time="3ME").mean(skipna=False) - Size: 24B - array([1. , 1.66666667, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3ME").mean(skipna=False) + Size: 24B + array([1. , 1.66666667, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -9498,98 +9276,96 @@ def prod( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - DataArray.prod - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").prod() + + >>> da.resample(time="3ME").prod() Size: 24B array([1., 0., 2.]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.resample(time="3ME").prod(skipna=False) + >>> da.resample(time="3ME").prod(skipna=False) Size: 24B array([ 1., 0., nan]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> da.resample(time="3ME").prod(skipna=True, min_count=2) - Size: 24B - array([nan, 0., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3ME").prod(skipna=True, min_count=2) + Size: 24B + array([nan, 0., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -9625,98 +9401,96 @@ def sum( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - DataArray.sum - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").sum() + + >>> da.resample(time="3ME").sum() Size: 24B array([1., 5., 2.]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.resample(time="3ME").sum(skipna=False) + >>> da.resample(time="3ME").sum(skipna=False) Size: 24B array([ 1., 5., nan]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> da.resample(time="3ME").sum(skipna=True, min_count=2) - Size: 24B - array([nan, 5., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3ME").sum(skipna=True, min_count=2) + Size: 24B + array([nan, 5., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -9752,95 +9526,93 @@ def std( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - DataArray.std - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").std() + + >>> da.resample(time="3ME").std() Size: 24B array([0. , 1.24721913, 0. ]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.resample(time="3ME").std(skipna=False) + >>> da.resample(time="3ME").std(skipna=False) Size: 24B array([0. , 1.24721913, nan]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> da.resample(time="3ME").std(skipna=True, ddof=1) - Size: 24B - array([ nan, 1.52752523, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3ME").std(skipna=True, ddof=1) + Size: 24B + array([ nan, 1.52752523, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -9876,95 +9648,93 @@ def var( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - DataArray.var - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").var() + + >>> da.resample(time="3ME").var() Size: 24B array([0. , 1.55555556, 0. ]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.resample(time="3ME").var(skipna=False) + >>> da.resample(time="3ME").var(skipna=False) Size: 24B array([0. , 1.55555556, nan]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> da.resample(time="3ME").var(skipna=True, ddof=1) - Size: 24B - array([ nan, 2.33333333, nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3ME").var(skipna=True, ddof=1) + Size: 24B + array([ nan, 2.33333333, nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ if ( flox_available @@ -9999,84 +9769,82 @@ def median( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - DataArray.median - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").median() + + >>> da.resample(time="3ME").median() Size: 24B array([1., 2., 2.]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> da.resample(time="3ME").median(skipna=False) - Size: 24B - array([ 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3ME").median(skipna=False) + Size: 24B + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.median, @@ -10095,95 +9863,87 @@ def nunique( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - DataArray.nunique - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + DataArray.nunique + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").nunique() - Use ``skipna`` to control whether NaNs are ignored. + >>> da.resample(time="3ME").nunique() + Size: 24B + array([1, 3, 2]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. - >>> da.resample(time="3ME").nunique(skipna=False) + >>> da.resample(time="3ME").nunique(skipna=False) + Size: 24B + array([1, 3, 2]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ - if ( - flox_available - and OPTIONS["use_flox"] - and contains_only_chunked_or_numpy(self._obj) - ): - return self._flox_reduce( - func="nunique", - dim=dim, - skipna=skipna, - # fill_value=fill_value, - keep_attrs=keep_attrs, - **kwargs, - ) - else: - return self.reduce( - duck_array_ops.nunique, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.nunique, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def cumsum( self, @@ -10194,91 +9954,89 @@ def cumsum( **kwargs: Any, ) -> DataArray: """ - Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - DataArray.cumsum - DataArray.cumulative - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + DataArray.cumsum + DataArray.cumulative + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").cumsum() + + >>> da.resample(time="3ME").cumsum() Size: 48B array([1., 2., 5., 5., 2., 2.]) Coordinates: labels (time) >> da.resample(time="3ME").cumsum(skipna=False) - Size: 48B - array([ 1., 2., 5., 5., 2., nan]) - Coordinates: - labels (time) >> da.resample(time="3ME").cumsum(skipna=False) + Size: 48B + array([ 1., 2., 5., 5., 2., nan]) + Coordinates: + labels (time) DataArray: """ - Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. - If "...", will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool or None, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False, the new object will be - returned without attributes. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : DataArray - New DataArray with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - DataArray.cumprod - DataArray.cumulative - :ref:`resampling` - User guide on resampling operations. - - Notes - ----- - Use the ``flox`` package to significantly speed up resampling computations, - especially with dask arrays. Xarray will use flox by default if installed. - Pass flox-specific keyword arguments in ``**kwargs``. - See the `flox documentation `_ for more. - - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 0, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=( - ... "time", - ... pd.date_range("2001-01-01", freq="ME", periods=6), - ... ), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da + Reduce this DataArray's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over the Resample dimensions. + If "...", will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool or None, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False, the new object will be + returned without attributes. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + DataArray.cumprod + DataArray.cumulative + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Use the ``flox`` package to significantly speed up resampling computations, + especially with dask arrays. Xarray will use flox by default if installed. + Pass flox-specific keyword arguments in ``**kwargs``. + See the `flox documentation `_ for more. + + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="ME", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da Size: 48B array([ 1., 2., 3., 0., 2., nan]) Coordinates: * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").cumprod() + + >>> da.resample(time="3ME").cumprod() Size: 48B array([1., 2., 6., 0., 2., 2.]) Coordinates: labels (time) >> da.resample(time="3ME").cumprod(skipna=False) - Size: 48B - array([ 1., 2., 6., 0., 2., nan]) - Coordinates: - labels (time) >> da.resample(time="3ME").cumprod(skipna=False) + Size: 48B + array([ 1., 2., 6., 0., 2., nan]) + Coordinates: + labels (time) Self: """ - Reduce this NamedArray's data by applying ``count`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``count`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``count`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.count - dask.dataframe.DataFrame.count - Dataset.count - DataArray.count - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na + Reduce this NamedArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.count() - Size: 8B - array(5) + + >>> na.count() + Size: 8B + array(5) """ return self.reduce( duck_array_ops.count, @@ -81,46 +82,47 @@ def all( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``all`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``all`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``all`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - DataArray.all - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray( - ... "x", np.array([True, True, True, True, True, False], dtype=bool) - ... ) - >>> na + Reduce this NamedArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", np.array([True, True, True, True, True, False], dtype=bool) + ... ) + >>> na Size: 6B array([ True, True, True, True, True, False]) - >>> na.all() - Size: 1B - array(False) + + >>> na.all() + Size: 1B + array(False) """ return self.reduce( duck_array_ops.array_all, @@ -134,46 +136,47 @@ def any( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``any`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``any`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``any`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - DataArray.any - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray( - ... "x", np.array([True, True, True, True, True, False], dtype=bool) - ... ) - >>> na + Reduce this NamedArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", np.array([True, True, True, True, True, False], dtype=bool) + ... ) + >>> na Size: 6B array([ True, True, True, True, True, False]) - >>> na.any() - Size: 1B - array(True) + + >>> na.any() + Size: 1B + array(True) """ return self.reduce( duck_array_ops.array_any, @@ -189,55 +192,56 @@ def max( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``max`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``max`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``max`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - DataArray.max - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na + Reduce this NamedArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.max() + + >>> na.max() Size: 8B array(3.) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> na.max(skipna=False) - Size: 8B - array(nan) + >>> na.max(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.max, @@ -254,55 +258,56 @@ def min( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``min`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``min`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``min`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - DataArray.min - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na + Reduce this NamedArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.min() + + >>> na.min() Size: 8B array(0.) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> na.min(skipna=False) - Size: 8B - array(nan) + >>> na.min(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.min, @@ -319,59 +324,60 @@ def mean( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``mean`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``mean`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``mean`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - DataArray.mean - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na + Reduce this NamedArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.mean() + + >>> na.mean() Size: 8B array(1.6) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> na.mean(skipna=False) - Size: 8B - array(nan) + >>> na.mean(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.mean, @@ -389,71 +395,72 @@ def prod( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``prod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``prod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``prod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - DataArray.prod - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na + Reduce this NamedArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.prod() + + >>> na.prod() Size: 8B array(0.) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> na.prod(skipna=False) + >>> na.prod(skipna=False) Size: 8B array(nan) - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> na.prod(skipna=True, min_count=2) - Size: 8B - array(0.) + >>> na.prod(skipna=True, min_count=2) + Size: 8B + array(0.) """ return self.reduce( duck_array_ops.prod, @@ -472,71 +479,72 @@ def sum( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``sum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - min_count : int or None, optional - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``sum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``sum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - DataArray.sum - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na + Reduce this NamedArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.sum() + + >>> na.sum() Size: 8B array(8.) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> na.sum(skipna=False) + >>> na.sum(skipna=False) Size: 8B array(nan) - Specify ``min_count`` for finer control over when NaNs are ignored. + Specify ``min_count`` for finer control over when NaNs are ignored. - >>> na.sum(skipna=True, min_count=2) - Size: 8B - array(8.) + >>> na.sum(skipna=True, min_count=2) + Size: 8B + array(8.) """ return self.reduce( duck_array_ops.sum, @@ -555,68 +563,69 @@ def std( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``std`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``std`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``std`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - DataArray.std - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na + Reduce this NamedArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.std() + + >>> na.std() Size: 8B array(1.0198039) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> na.std(skipna=False) + >>> na.std(skipna=False) Size: 8B array(nan) - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> na.std(skipna=True, ddof=1) - Size: 8B - array(1.14017543) + >>> na.std(skipna=True, ddof=1) + Size: 8B + array(1.14017543) """ return self.reduce( duck_array_ops.std, @@ -635,68 +644,69 @@ def var( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``var`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - ddof : int, default: 0 - “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, - where ``N`` represents the number of elements. - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``var`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``var`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - DataArray.var - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na + Reduce this NamedArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.var() + + >>> na.var() Size: 8B array(1.04) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> na.var(skipna=False) + >>> na.var(skipna=False) Size: 8B array(nan) - Specify ``ddof=1`` for an unbiased estimate. + Specify ``ddof=1`` for an unbiased estimate. - >>> na.var(skipna=True, ddof=1) - Size: 8B - array(1.3) + >>> na.var(skipna=True, ddof=1) + Size: 8B + array(1.3) """ return self.reduce( duck_array_ops.var, @@ -714,59 +724,60 @@ def median( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``median`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - DataArray.median - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na + Reduce this NamedArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.median() + + >>> na.median() Size: 8B array(2.) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> na.median(skipna=False) - Size: 8B - array(nan) + >>> na.median(skipna=False) + Size: 8B + array(nan) """ return self.reduce( duck_array_ops.median, @@ -783,54 +794,55 @@ def nunique( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``nunique`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``nunique`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``nunique`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - pandas.DataFrame.nunique - Dataset.nunique - DataArray.nunique - :ref:`agg` - User guide on reduction or aggregation operations. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na + Reduce this NamedArray's data by applying ``nunique`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``nunique``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``nunique`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``nunique`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.nunique + Dataset.nunique + DataArray.nunique + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.nunique() + + >>> na.nunique() Size: 8B array(5) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> na.nunique(skipna=False) - Size: 8B - array(5) + >>> na.nunique(skipna=False) + Size: 8B + array(5) """ return self.reduce( duck_array_ops.nunique, @@ -847,64 +859,65 @@ def cumsum( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``cumsum`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumsum`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``cumsum`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumsum - dask.array.cumsum - Dataset.cumsum - DataArray.cumsum - NamedArray.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na + Reduce this NamedArray's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + DataArray.cumsum + NamedArray.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.cumsum() + + >>> na.cumsum() Size: 48B array([1., 3., 6., 6., 8., 8.]) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> na.cumsum(skipna=False) - Size: 48B - array([ 1., 3., 6., 6., 8., nan]) + >>> na.cumsum(skipna=False) + Size: 48B + array([ 1., 3., 6., 6., 8., nan]) """ return self.reduce( duck_array_ops.cumsum, @@ -921,64 +934,65 @@ def cumprod( **kwargs: Any, ) -> Self: """ - Reduce this NamedArray's data by applying ``cumprod`` along some dimension(s). - - Parameters - ---------- - dim : str, Iterable of Hashable, "..." or None, default: None - Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. - skipna : bool or None, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or ``skipna=True`` has not been - implemented (object, datetime64 or timedelta64). - **kwargs : Any - Additional keyword arguments passed on to the appropriate array - function for calculating ``cumprod`` on this object's data. - These could include dask-specific kwargs like ``split_every``. - - Returns - ------- - reduced : NamedArray - New NamedArray with ``cumprod`` applied to its data and the - indicated dimension(s) removed - - See Also - -------- - numpy.cumprod - dask.array.cumprod - Dataset.cumprod - DataArray.cumprod - NamedArray.cumulative - :ref:`agg` - User guide on reduction or aggregation operations. - - Notes - ----- - Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. - - Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) - and better supported. ``cumsum`` and ``cumprod`` may be deprecated - in the future. - - Examples - -------- - >>> from xarray.namedarray.core import NamedArray - >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) - >>> na + Reduce this NamedArray's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + DataArray.cumprod + NamedArray.cumulative + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. datetime64 and timedelta64 dtypes are treated as numeric for aggregation operations. + + Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) + and better supported. ``cumsum`` and ``cumprod`` may be deprecated + in the future. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray("x", np.array([1, 2, 3, 0, 2, np.nan])) + >>> na Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.cumprod() + + >>> na.cumprod() Size: 48B array([1., 2., 6., 0., 0., 0.]) - Use ``skipna`` to control whether NaNs are ignored. + Use ``skipna`` to control whether NaNs are ignored. - >>> na.cumprod(skipna=False) - Size: 48B - array([ 1., 2., 6., 0., 0., nan]) + >>> na.cumprod(skipna=False) + Size: 48B + array([ 1., 2., 6., 0., 0., nan]) """ return self.reduce( duck_array_ops.cumprod, diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 8602b3181c1..05d5b40003e 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -239,28 +239,34 @@ class ExtraKwarg(NamedTuple): docs=_SKIPNA_DOCSTRING, kwarg="skipna: bool | None = None,", call="skipna=skipna,", - example="""\n - Use ``skipna`` to control whether NaNs are ignored. - - >>> {calculation}(skipna=False)""", + example=( + "\n \n" + " Use ``skipna`` to control whether NaNs are ignored.\n" + " \n" + " >>> {calculation}(skipna=False)" + ), ) min_count = ExtraKwarg( docs=_MINCOUNT_DOCSTRING, kwarg="min_count: int | None = None,", call="min_count=min_count,", - example="""\n - Specify ``min_count`` for finer control over when NaNs are ignored. - - >>> {calculation}(skipna=True, min_count=2)""", + example=( + "\n \n" + " Specify ``min_count`` for finer control over when NaNs are ignored.\n" + " \n" + " >>> {calculation}(skipna=True, min_count=2)" + ), ) ddof = ExtraKwarg( docs=_DDOF_DOCSTRING, kwarg="ddof: int = 0,", call="ddof=ddof,", - example="""\n - Specify ``ddof=1`` for an unbiased estimate. - - >>> {calculation}(skipna=True, ddof=1)""", + example=( + "\n \n" + " Specify ``ddof=1`` for an unbiased estimate.\n" + " \n" + " >>> {calculation}(skipna=True, ddof=1)" + ), ) @@ -424,11 +430,11 @@ def generate_example(self, method): else: extra_examples = "" + blank_line = 8 * " " return f""" Examples --------{created} - >>> {self.datastructure.example_var_name} - + >>> {self.datastructure.example_var_name}\n{blank_line}\n >>> {calculation}(){extra_examples}""" @@ -444,7 +450,12 @@ def generate_code(self, method, has_keep_attrs): # median isn't enabled yet, because it would break if a single group was present in multiple # chunks. The non-flox code path will just rechunk every group to a single chunk and execute the median - method_is_not_flox_supported = method.name in ("median", "cumsum", "cumprod") + method_is_not_flox_supported = method.name in ( + "median", + "cumsum", + "cumprod", + "nunique", + ) if method_is_not_flox_supported: indent = 12 else: From cf9f6a30ff332b52d14cdef7c20d5a8a9939344d Mon Sep 17 00:00:00 2001 From: Ewan Short Date: Fri, 21 Nov 2025 21:32:07 +1100 Subject: [PATCH 6/8] remove bad blank lines --- xarray/core/_aggregations.py | 98 ---------------------------- xarray/namedarray/_aggregations.py | 14 ---- xarray/util/generate_aggregations.py | 2 +- 3 files changed, 1 insertion(+), 113 deletions(-) diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index fc037be15ee..f8fa59532e8 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -97,7 +97,6 @@ def count( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.count() Group: / @@ -181,7 +180,6 @@ def all( Data variables: foo (time) bool 6B True True True True True False - >>> dt.all() Group: / @@ -265,7 +263,6 @@ def any( Data variables: foo (time) bool 6B True True True True True False - >>> dt.any() Group: / @@ -350,7 +347,6 @@ def max( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.max() Group: / @@ -445,7 +441,6 @@ def min( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.min() Group: / @@ -544,7 +539,6 @@ def mean( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.mean() Group: / @@ -650,7 +644,6 @@ def prod( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.prod() Group: / @@ -766,7 +759,6 @@ def sum( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.sum() Group: / @@ -879,7 +871,6 @@ def std( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.std() Group: / @@ -992,7 +983,6 @@ def var( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.var() Group: / @@ -1101,7 +1091,6 @@ def median( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.median() Group: / @@ -1195,7 +1184,6 @@ def nunique( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.nunique() Group: / @@ -1299,7 +1287,6 @@ def cumsum( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.cumsum() Group: / @@ -1405,7 +1392,6 @@ def cumprod( Data variables: foo (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> dt.cumprod() Group: / @@ -1507,7 +1493,6 @@ def count( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.count() Size: 8B Dimensions: () @@ -1580,7 +1565,6 @@ def all( Data variables: da (time) bool 6B True True True True True False - >>> ds.all() Size: 1B Dimensions: () @@ -1653,7 +1637,6 @@ def any( Data variables: da (time) bool 6B True True True True True False - >>> ds.any() Size: 1B Dimensions: () @@ -1732,7 +1715,6 @@ def max( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.max() Size: 8B Dimensions: () @@ -1820,7 +1802,6 @@ def min( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.min() Size: 8B Dimensions: () @@ -1912,7 +1893,6 @@ def mean( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.mean() Size: 8B Dimensions: () @@ -2011,7 +1991,6 @@ def prod( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.prod() Size: 8B Dimensions: () @@ -2119,7 +2098,6 @@ def sum( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.sum() Size: 8B Dimensions: () @@ -2224,7 +2202,6 @@ def std( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.std() Size: 8B Dimensions: () @@ -2329,7 +2306,6 @@ def var( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.var() Size: 8B Dimensions: () @@ -2430,7 +2406,6 @@ def median( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.median() Size: 8B Dimensions: () @@ -2517,7 +2492,6 @@ def nunique( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.nunique() Size: 8B Dimensions: () @@ -2614,7 +2588,6 @@ def cumsum( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.cumsum() Size: 48B Dimensions: (time: 6) @@ -2713,7 +2686,6 @@ def cumprod( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.cumprod() Size: 48B Dimensions: (time: 6) @@ -2810,7 +2782,6 @@ def count( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.count() Size: 8B array(5) @@ -2877,7 +2848,6 @@ def all( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.all() Size: 1B array(False) @@ -2944,7 +2914,6 @@ def any( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.any() Size: 1B array(True) @@ -3017,7 +2986,6 @@ def max( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.max() Size: 8B array(3.) @@ -3097,7 +3065,6 @@ def min( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.min() Size: 8B array(0.) @@ -3181,7 +3148,6 @@ def mean( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.mean() Size: 8B array(1.6) @@ -3272,7 +3238,6 @@ def prod( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.prod() Size: 8B array(0.) @@ -3370,7 +3335,6 @@ def sum( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.sum() Size: 8B array(8.) @@ -3465,7 +3429,6 @@ def std( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.std() Size: 8B array(1.0198039) @@ -3560,7 +3523,6 @@ def var( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.var() Size: 8B array(1.04) @@ -3651,7 +3613,6 @@ def median( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.median() Size: 8B array(2.) @@ -3730,7 +3691,6 @@ def nunique( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.nunique() Size: 8B array(5) @@ -3819,7 +3779,6 @@ def cumsum( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.cumsum() Size: 48B array([1., 3., 6., 6., 8., 8.]) @@ -3914,7 +3873,6 @@ def cumprod( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.cumprod() Size: 48B array([1., 2., 6., 0., 0., 0.]) @@ -4028,7 +3986,6 @@ def count( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").count() Size: 48B Dimensions: (labels: 3) @@ -4125,7 +4082,6 @@ def all( Data variables: da (time) bool 6B True True True True True False - >>> ds.groupby("labels").all() Size: 27B Dimensions: (labels: 3) @@ -4222,7 +4178,6 @@ def any( Data variables: da (time) bool 6B True True True True True False - >>> ds.groupby("labels").any() Size: 27B Dimensions: (labels: 3) @@ -4325,7 +4280,6 @@ def max( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").max() Size: 48B Dimensions: (labels: 3) @@ -4440,7 +4394,6 @@ def min( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").min() Size: 48B Dimensions: (labels: 3) @@ -4557,7 +4510,6 @@ def mean( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").mean() Size: 48B Dimensions: (labels: 3) @@ -4681,7 +4633,6 @@ def prod( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").prod() Size: 48B Dimensions: (labels: 3) @@ -4817,7 +4768,6 @@ def sum( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").sum() Size: 48B Dimensions: (labels: 3) @@ -4950,7 +4900,6 @@ def std( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").std() Size: 48B Dimensions: (labels: 3) @@ -5083,7 +5032,6 @@ def var( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").var() Size: 48B Dimensions: (labels: 3) @@ -5212,7 +5160,6 @@ def median( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").median() Size: 48B Dimensions: (labels: 3) @@ -5311,7 +5258,6 @@ def nunique( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").nunique() Size: 48B Dimensions: (labels: 3) @@ -5418,7 +5364,6 @@ def cumsum( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").cumsum() Size: 48B Dimensions: (time: 6) @@ -5523,7 +5468,6 @@ def cumprod( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.groupby("labels").cumprod() Size: 48B Dimensions: (time: 6) @@ -5638,7 +5582,6 @@ def count( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").count() Size: 48B Dimensions: (time: 3) @@ -5735,7 +5678,6 @@ def all( Data variables: da (time) bool 6B True True True True True False - >>> ds.resample(time="3ME").all() Size: 27B Dimensions: (time: 3) @@ -5832,7 +5774,6 @@ def any( Data variables: da (time) bool 6B True True True True True False - >>> ds.resample(time="3ME").any() Size: 27B Dimensions: (time: 3) @@ -5935,7 +5876,6 @@ def max( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").max() Size: 48B Dimensions: (time: 3) @@ -6050,7 +5990,6 @@ def min( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").min() Size: 48B Dimensions: (time: 3) @@ -6167,7 +6106,6 @@ def mean( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").mean() Size: 48B Dimensions: (time: 3) @@ -6291,7 +6229,6 @@ def prod( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").prod() Size: 48B Dimensions: (time: 3) @@ -6427,7 +6364,6 @@ def sum( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").sum() Size: 48B Dimensions: (time: 3) @@ -6560,7 +6496,6 @@ def std( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").std() Size: 48B Dimensions: (time: 3) @@ -6693,7 +6628,6 @@ def var( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").var() Size: 48B Dimensions: (time: 3) @@ -6822,7 +6756,6 @@ def median( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").median() Size: 48B Dimensions: (time: 3) @@ -6921,7 +6854,6 @@ def nunique( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").nunique() Size: 48B Dimensions: (time: 3) @@ -7028,7 +6960,6 @@ def cumsum( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").cumsum() Size: 48B Dimensions: (time: 6) @@ -7133,7 +7064,6 @@ def cumprod( Data variables: da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan - >>> ds.resample(time="3ME").cumprod() Size: 48B Dimensions: (time: 6) @@ -7245,7 +7175,6 @@ def count( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").count() Size: 24B array([1, 2, 2]) @@ -7335,7 +7264,6 @@ def all( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").all() Size: 3B array([False, True, True]) @@ -7425,7 +7353,6 @@ def any( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").any() Size: 3B array([ True, True, True]) @@ -7521,7 +7448,6 @@ def max( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").max() Size: 24B array([1., 2., 3.]) @@ -7627,7 +7553,6 @@ def min( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").min() Size: 24B array([1., 2., 0.]) @@ -7735,7 +7660,6 @@ def mean( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").mean() Size: 24B array([1. , 2. , 1.5]) @@ -7850,7 +7774,6 @@ def prod( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").prod() Size: 24B array([1., 4., 0.]) @@ -7975,7 +7898,6 @@ def sum( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").sum() Size: 24B array([1., 4., 3.]) @@ -8097,7 +8019,6 @@ def std( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").std() Size: 24B array([0. , 0. , 1.5]) @@ -8219,7 +8140,6 @@ def var( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").var() Size: 24B array([0. , 0. , 2.25]) @@ -8337,7 +8257,6 @@ def median( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").median() Size: 24B array([1. , 2. , 1.5]) @@ -8428,7 +8347,6 @@ def nunique( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").nunique() Size: 24B array([2, 1, 2]) @@ -8527,7 +8445,6 @@ def cumsum( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").cumsum() Size: 48B array([1., 2., 3., 3., 4., 1.]) @@ -8628,7 +8545,6 @@ def cumprod( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.groupby("labels").cumprod() Size: 48B array([1., 2., 3., 0., 4., 1.]) @@ -8739,7 +8655,6 @@ def count( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").count() Size: 24B array([1, 3, 1]) @@ -8829,7 +8744,6 @@ def all( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").all() Size: 3B array([ True, True, False]) @@ -8919,7 +8833,6 @@ def any( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").any() Size: 3B array([ True, True, True]) @@ -9015,7 +8928,6 @@ def max( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").max() Size: 24B array([1., 3., 2.]) @@ -9121,7 +9033,6 @@ def min( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").min() Size: 24B array([1., 0., 2.]) @@ -9229,7 +9140,6 @@ def mean( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").mean() Size: 24B array([1. , 1.66666667, 2. ]) @@ -9344,7 +9254,6 @@ def prod( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").prod() Size: 24B array([1., 0., 2.]) @@ -9469,7 +9378,6 @@ def sum( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").sum() Size: 24B array([1., 5., 2.]) @@ -9591,7 +9499,6 @@ def std( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").std() Size: 24B array([0. , 1.24721913, 0. ]) @@ -9713,7 +9620,6 @@ def var( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").var() Size: 24B array([0. , 1.55555556, 0. ]) @@ -9831,7 +9737,6 @@ def median( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").median() Size: 24B array([1., 2., 2.]) @@ -9922,7 +9827,6 @@ def nunique( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").nunique() Size: 24B array([1, 3, 2]) @@ -10021,7 +9925,6 @@ def cumsum( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").cumsum() Size: 48B array([1., 2., 5., 5., 2., 2.]) @@ -10122,7 +10025,6 @@ def cumprod( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) >> da.resample(time="3ME").cumprod() Size: 48B array([1., 2., 6., 0., 2., 2.]) diff --git a/xarray/namedarray/_aggregations.py b/xarray/namedarray/_aggregations.py index ef83cb97c1a..0d81f61adf2 100644 --- a/xarray/namedarray/_aggregations.py +++ b/xarray/namedarray/_aggregations.py @@ -65,7 +65,6 @@ def count( Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.count() Size: 8B array(5) @@ -119,7 +118,6 @@ def all( Size: 6B array([ True, True, True, True, True, False]) - >>> na.all() Size: 1B array(False) @@ -173,7 +171,6 @@ def any( Size: 6B array([ True, True, True, True, True, False]) - >>> na.any() Size: 1B array(True) @@ -232,7 +229,6 @@ def max( Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.max() Size: 8B array(3.) @@ -298,7 +294,6 @@ def min( Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.min() Size: 8B array(0.) @@ -368,7 +363,6 @@ def mean( Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.mean() Size: 8B array(1.6) @@ -445,7 +439,6 @@ def prod( Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.prod() Size: 8B array(0.) @@ -529,7 +522,6 @@ def sum( Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.sum() Size: 8B array(8.) @@ -610,7 +602,6 @@ def std( Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.std() Size: 8B array(1.0198039) @@ -691,7 +682,6 @@ def var( Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.var() Size: 8B array(1.04) @@ -768,7 +758,6 @@ def median( Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.median() Size: 8B array(2.) @@ -833,7 +822,6 @@ def nunique( Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.nunique() Size: 8B array(5) @@ -908,7 +896,6 @@ def cumsum( Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.cumsum() Size: 48B array([1., 3., 6., 6., 8., 8.]) @@ -983,7 +970,6 @@ def cumprod( Size: 48B array([ 1., 2., 3., 0., 2., nan]) - >>> na.cumprod() Size: 48B array([1., 2., 6., 0., 0., 0.]) diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 05d5b40003e..772a9a8a291 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -434,7 +434,7 @@ def generate_example(self, method): return f""" Examples --------{created} - >>> {self.datastructure.example_var_name}\n{blank_line}\n + >>> {self.datastructure.example_var_name}\n{blank_line} >>> {calculation}(){extra_examples}""" From 456f9536386692a22ced29aede4456cac02c1d8f Mon Sep 17 00:00:00 2001 From: Ewan Short Date: Mon, 24 Nov 2025 16:38:07 +1100 Subject: [PATCH 7/8] implement vectorized method and dask extension --- .pre-commit-config.yaml | 6 +- xarray/core/_aggregations.py | 163 +++++++++++++++++++++++ xarray/core/duck_array_ops.py | 185 ++++++++++++++++++++++++--- xarray/namedarray/_aggregations.py | 22 ++++ xarray/tests/test_duck_array_ops.py | 34 +++-- xarray/util/generate_aggregations.py | 30 ++++- 6 files changed, 406 insertions(+), 34 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index feaea311db8..d64523a4a9f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,13 +24,13 @@ repos: - id: rst-inline-touching-normal - id: text-unicode-replacement-char - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.3 + rev: v0.14.6 hooks: - id: ruff-check args: ["--fix", "--show-fixes"] - id: ruff-format - repo: https://github.com/keewis/blackdoc - rev: v0.4.5 + rev: v0.4.6 hooks: - id: blackdoc exclude: "generate_aggregations.py" @@ -76,6 +76,6 @@ repos: - id: validate-pyproject additional_dependencies: ["validate-pyproject-schema-store[all]"] - repo: https://github.com/adhtruong/mirrors-typos - rev: v1.39.0 + rev: v1.39.2 hooks: - id: typos diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index f8fa59532e8..a04bcfbb88e 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -1121,6 +1121,7 @@ def nunique( dim: Dims = None, *, skipna: bool | None = None, + equalna: bool | None = True, keep_attrs: bool | None = None, **kwargs: Any, ) -> Self: @@ -1137,6 +1138,11 @@ def nunique( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + equalna : bool or None, default: True + If ``skipna == False``, ``equalna`` determines whether null values + are counted as distinct values or not. Set ``equalna = True`` for + consistency with ``pandas.DataFrame.nunique``, or ``equalna = False`` + for consistency with the `Python array API `_. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -1160,6 +1166,15 @@ def nunique( :ref:`agg` User guide on reduction or aggregation operations. + Notes + ----- + Note that identifying unique values on very large + arrays is slow and memory intensive when there are many unique values. + For such arrays, consider lowering the precision, e.g. rounding floats + then converting them to integers, before searching for unique values. + For dask arrays, performance is improved when chunksizes are largest on + the dimension(s) being reduced. + Examples -------- >>> dt = xr.DataTree( @@ -1195,6 +1210,15 @@ def nunique( >>> dt.nunique(skipna=False) + Group: / + Dimensions: () + Data variables: + foo int64 8B 5 + + Use ``equalna`` to control whether NaNs are counted as distinct values. + + >>> dt.nunique(skipna=False, equalna=False) + Group: / Dimensions: () Data variables: @@ -1204,6 +1228,7 @@ def nunique( duck_array_ops.nunique, dim=dim, skipna=skipna, + equalna=equalna, numeric_only=False, keep_attrs=keep_attrs, **kwargs, @@ -2434,6 +2459,7 @@ def nunique( dim: Dims = None, *, skipna: bool | None = None, + equalna: bool | None = True, keep_attrs: bool | None = None, **kwargs: Any, ) -> Self: @@ -2450,6 +2476,11 @@ def nunique( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + equalna : bool or None, default: True + If ``skipna == False``, ``equalna`` determines whether null values + are counted as distinct values or not. Set ``equalna = True`` for + consistency with ``pandas.DataFrame.nunique``, or ``equalna = False`` + for consistency with the `Python array API `_. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -2472,6 +2503,15 @@ def nunique( :ref:`agg` User guide on reduction or aggregation operations. + Notes + ----- + Note that identifying unique values on very large + arrays is slow and memory intensive when there are many unique values. + For such arrays, consider lowering the precision, e.g. rounding floats + then converting them to integers, before searching for unique values. + For dask arrays, performance is improved when chunksizes are largest on + the dimension(s) being reduced. + Examples -------- >>> da = xr.DataArray( @@ -2503,6 +2543,14 @@ def nunique( >>> ds.nunique(skipna=False) Size: 8B Dimensions: () + Data variables: + da int64 8B 5 + + Use ``equalna`` to control whether NaNs are counted as distinct values. + + >>> ds.nunique(skipna=False, equalna=False) + Size: 8B + Dimensions: () Data variables: da int64 8B 5 """ @@ -2510,6 +2558,7 @@ def nunique( duck_array_ops.nunique, dim=dim, skipna=skipna, + equalna=equalna, numeric_only=False, keep_attrs=keep_attrs, **kwargs, @@ -3636,6 +3685,7 @@ def nunique( dim: Dims = None, *, skipna: bool | None = None, + equalna: bool | None = True, keep_attrs: bool | None = None, **kwargs: Any, ) -> Self: @@ -3652,6 +3702,11 @@ def nunique( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + equalna : bool or None, default: True + If ``skipna == False``, ``equalna`` determines whether null values + are counted as distinct values or not. Set ``equalna = True`` for + consistency with ``pandas.DataFrame.nunique``, or ``equalna = False`` + for consistency with the `Python array API `_. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -3674,6 +3729,15 @@ def nunique( :ref:`agg` User guide on reduction or aggregation operations. + Notes + ----- + Note that identifying unique values on very large + arrays is slow and memory intensive when there are many unique values. + For such arrays, consider lowering the precision, e.g. rounding floats + then converting them to integers, before searching for unique values. + For dask arrays, performance is improved when chunksizes are largest on + the dimension(s) being reduced. + Examples -------- >>> da = xr.DataArray( @@ -3700,11 +3764,18 @@ def nunique( >>> da.nunique(skipna=False) Size: 8B array(5) + + Use ``equalna`` to control whether NaNs are counted as distinct values. + + >>> da.nunique(skipna=False, equalna=False) + Size: 8B + array(5) """ return self.reduce( duck_array_ops.nunique, dim=dim, skipna=skipna, + equalna=equalna, keep_attrs=keep_attrs, **kwargs, ) @@ -5192,6 +5263,7 @@ def nunique( dim: Dims = None, *, skipna: bool | None = None, + equalna: bool | None = True, keep_attrs: bool | None = None, **kwargs: Any, ) -> Dataset: @@ -5209,6 +5281,11 @@ def nunique( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + equalna : bool or None, default: True + If ``skipna == False``, ``equalna`` determines whether null values + are counted as distinct values or not. Set ``equalna = True`` for + consistency with ``pandas.DataFrame.nunique``, or ``equalna = False`` + for consistency with the `Python array API `_. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -5238,6 +5315,13 @@ def nunique( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. + Note that identifying unique values on very large + arrays is slow and memory intensive when there are many unique values. + For such arrays, consider lowering the precision, e.g. rounding floats + then converting them to integers, before searching for unique values. + For dask arrays, performance is improved when chunksizes are largest on + the dimension(s) being reduced. + Examples -------- >>> da = xr.DataArray( @@ -5271,6 +5355,16 @@ def nunique( >>> ds.groupby("labels").nunique(skipna=False) Size: 48B Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + Data variables: + da (labels) int64 24B 2 1 2 + + Use ``equalna`` to control whether NaNs are counted as distinct values. + + >>> ds.groupby("labels").nunique(skipna=False, equalna=False) + Size: 48B + Dimensions: (labels: 3) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' Data variables: @@ -5280,6 +5374,7 @@ def nunique( duck_array_ops.nunique, dim=dim, skipna=skipna, + equalna=equalna, numeric_only=False, keep_attrs=keep_attrs, **kwargs, @@ -6788,6 +6883,7 @@ def nunique( dim: Dims = None, *, skipna: bool | None = None, + equalna: bool | None = True, keep_attrs: bool | None = None, **kwargs: Any, ) -> Dataset: @@ -6805,6 +6901,11 @@ def nunique( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + equalna : bool or None, default: True + If ``skipna == False``, ``equalna`` determines whether null values + are counted as distinct values or not. Set ``equalna = True`` for + consistency with ``pandas.DataFrame.nunique``, or ``equalna = False`` + for consistency with the `Python array API `_. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -6834,6 +6935,13 @@ def nunique( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. + Note that identifying unique values on very large + arrays is slow and memory intensive when there are many unique values. + For such arrays, consider lowering the precision, e.g. rounding floats + then converting them to integers, before searching for unique values. + For dask arrays, performance is improved when chunksizes are largest on + the dimension(s) being reduced. + Examples -------- >>> da = xr.DataArray( @@ -6867,6 +6975,16 @@ def nunique( >>> ds.resample(time="3ME").nunique(skipna=False) Size: 48B Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 24B 1 3 2 + + Use ``equalna`` to control whether NaNs are counted as distinct values. + + >>> ds.resample(time="3ME").nunique(skipna=False, equalna=False) + Size: 48B + Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 Data variables: @@ -6876,6 +6994,7 @@ def nunique( duck_array_ops.nunique, dim=dim, skipna=skipna, + equalna=equalna, numeric_only=False, keep_attrs=keep_attrs, **kwargs, @@ -8284,6 +8403,7 @@ def nunique( dim: Dims = None, *, skipna: bool | None = None, + equalna: bool | None = True, keep_attrs: bool | None = None, **kwargs: Any, ) -> DataArray: @@ -8301,6 +8421,11 @@ def nunique( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + equalna : bool or None, default: True + If ``skipna == False``, ``equalna`` determines whether null values + are counted as distinct values or not. Set ``equalna = True`` for + consistency with ``pandas.DataFrame.nunique``, or ``equalna = False`` + for consistency with the `Python array API `_. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -8330,6 +8455,13 @@ def nunique( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. + Note that identifying unique values on very large + arrays is slow and memory intensive when there are many unique values. + For such arrays, consider lowering the precision, e.g. rounding floats + then converting them to integers, before searching for unique values. + For dask arrays, performance is improved when chunksizes are largest on + the dimension(s) being reduced. + Examples -------- >>> da = xr.DataArray( @@ -8358,6 +8490,14 @@ def nunique( >>> da.groupby("labels").nunique(skipna=False) Size: 24B array([2, 1, 2]) + Coordinates: + * labels (labels) object 24B 'a' 'b' 'c' + + Use ``equalna`` to control whether NaNs are counted as distinct values. + + >>> da.groupby("labels").nunique(skipna=False, equalna=False) + Size: 24B + array([2, 1, 2]) Coordinates: * labels (labels) object 24B 'a' 'b' 'c' """ @@ -8365,6 +8505,7 @@ def nunique( duck_array_ops.nunique, dim=dim, skipna=skipna, + equalna=equalna, keep_attrs=keep_attrs, **kwargs, ) @@ -9764,6 +9905,7 @@ def nunique( dim: Dims = None, *, skipna: bool | None = None, + equalna: bool | None = True, keep_attrs: bool | None = None, **kwargs: Any, ) -> DataArray: @@ -9781,6 +9923,11 @@ def nunique( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + equalna : bool or None, default: True + If ``skipna == False``, ``equalna`` determines whether null values + are counted as distinct values or not. Set ``equalna = True`` for + consistency with ``pandas.DataFrame.nunique``, or ``equalna = False`` + for consistency with the `Python array API `_. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -9810,6 +9957,13 @@ def nunique( Pass flox-specific keyword arguments in ``**kwargs``. See the `flox documentation `_ for more. + Note that identifying unique values on very large + arrays is slow and memory intensive when there are many unique values. + For such arrays, consider lowering the precision, e.g. rounding floats + then converting them to integers, before searching for unique values. + For dask arrays, performance is improved when chunksizes are largest on + the dimension(s) being reduced. + Examples -------- >>> da = xr.DataArray( @@ -9838,6 +9992,14 @@ def nunique( >>> da.resample(time="3ME").nunique(skipna=False) Size: 24B array([1, 3, 2]) + Coordinates: + * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 + + Use ``equalna`` to control whether NaNs are counted as distinct values. + + >>> da.resample(time="3ME").nunique(skipna=False, equalna=False) + Size: 24B + array([1, 3, 2]) Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ @@ -9845,6 +10007,7 @@ def nunique( duck_array_ops.nunique, dim=dim, skipna=skipna, + equalna=equalna, keep_attrs=keep_attrs, **kwargs, ) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 22b54836b01..98cc20b1ccd 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -13,6 +13,7 @@ from collections.abc import Callable from functools import partial from importlib import import_module +from itertools import product from typing import Any import numpy as np @@ -276,10 +277,14 @@ def as_shared_dtype(scalars_or_arrays, xp=None): isinstance(x, type(extension_array_types[0])) for x in extension_array_types ): return [ - x - if not isna(x) - else PandasExtensionArray( - type(non_nans[0].array)._from_sequence([x], dtype=non_nans[0].dtype) + ( + x + if not isna(x) + else PandasExtensionArray( + type(non_nans[0].array)._from_sequence( + [x], dtype=non_nans[0].dtype + ) + ) ) for x in scalars_or_arrays ] @@ -386,36 +391,174 @@ def count(data, axis=None): return xp.sum(xp.logical_not(isnull(data)), axis=axis) -def nunique(data, axis=None, skipna=True): - """Count the number of unique values in this array along the given axis or axes""" +def _dask_nunique(data): + """Helper function to get nunique on dask arrays. Assumes reduction axis is -1.""" + import dask + + xp = get_array_namespace(data) + + # To track unique elements across chunks we will use an object array containing + # variable length xp arrays. The idea is that we collect the sorted uniques for each chunk + # as we go, speeding up subsequent concatenation and sorting. Another option might + # be to use a fixed length (masked) sparse array with an extra dimension, but such + # an array would likely use more memory, and the sort and concatenation steps + # would likely be slower. + def _build_storage_array(shape): + size = np.array(shape).prod() + storage_array = xp.empty(size, dtype=object) + # Assign empty arrays to each element + for i in range(size): + storage_array[i] = xp.array([], dtype=data.dtype) + # Reshape to the desired grid shape + return storage_array.reshape(shape) + + # We're going to use dask reduction, so define chunk, combine, aggregate functions. + def chunk_uniques(chunk, axis=None, keepdims=False): + """ + Get the unique values along the required axis for a chunk. Adapt the approach + described at https://stackoverflow.com/questions/46893369 + """ + if data.ndim == 1: + uniques = xp.empty([1], dtype=object) + uniques[0] = _get_uniques_1d(chunk) + return uniques + chunk = xp.sort(chunk, axis=-1) + if chunk.shape[-1] == 1: + uniques_bool = xp.ones_like(chunk, dtype=bool) + else: + uniques_bool = xp.not_equal(chunk[..., :-1], chunk[..., 1:]) + # Pad start with true as first element always unique + pad = xp.ones_like(uniques_bool[..., :1], dtype=bool) + uniques_bool = xp.concatenate([pad, uniques_bool], axis=-1) + # Store the uniques in an object array so we can use a dask reduction + uniques = _build_storage_array(chunk.shape[:-1]) + + for idx in product(*[range(s) for s in chunk.shape[:-1]]): + uniques[idx] = chunk[idx][uniques_bool[idx]] + return uniques + + def _get_uniques_1d(array): + # Use the same vectorized style to get uniques from 1d array. + if len(array) < 2: + return array + array = xp.sort(array) + uniques_bool = xp.not_equal(array[:-1], array[1:]) + # Pad start with true as first element always unique + uniques_bool = xp.concatenate([xp.array([True]), uniques_bool]) + return array[uniques_bool] + + # Sometimes combine will return nested lists of arrays, so we need a flattener. + def _flatten_to_arrays(nested): + result = [] + + def append_arrays(x): + if isinstance(x, np.ndarray): + result.append(x) + elif isinstance(x, (list, tuple)): + for y in x: + append_arrays(y) + else: + raise ValueError(f"Unexpected type in nested structure: {type(x)}") + + append_arrays(nested) + return result + + def _merge_unique_arrays(arrays_input): + # Sometimes combine will return nested lists of arrays, so flatten first + arrays = _flatten_to_arrays(arrays_input) + # If single array, return it + if len(arrays) == 1: + return arrays[0] + # Merge multiple arrays + result = _build_storage_array(arrays[0].shape) + for idx in product(*[range(s) for s in result.shape]): + combined_vals = xp.concatenate([arr[idx] for arr in arrays], axis=0) + result[idx] = _get_uniques_1d(combined_vals) + return result + + def combine_uniques(uniques_list, axis=None, keepdims=False): + return _merge_unique_arrays(uniques_list) + + def aggregate_uniques(combined, axis=None, keepdims=False): + # First flatten and merge final list + combined = _flatten_to_arrays(combined) + combined = _merge_unique_arrays(combined) + unique_counts = _build_storage_array(combined.shape) + for idx in product(*[range(s) for s in combined.shape]): + unique_counts[idx] = len(combined[idx]) + return unique_counts + + meta_shape = (0,) * (data.ndim - 1) + meta_array = xp.empty(meta_shape, dtype=object) + + return dask.array.reduction( + data, + chunk=chunk_uniques, + combine=combine_uniques, + aggregate=aggregate_uniques, + dtype=object, + concatenate=False, + meta=meta_array, + axis=-1, + keepdims=False, + ) + + +def _factorize(data): + """Helper function for nunique to factorize mixed type arrays to float.""" + if not isinstance(data, np.ndarray): + message = "nunique with object dtype only implemented for np.ndarray." + raise NotImplementedError(message) + data = pd.factorize(data.reshape(-1))[0].reshape(data.shape) + data = data.astype(float) + data[data == -1] = np.nan + return data + + +def nunique(data, axis=None, skipna=True, equalna=True): + """ + Count the number of unique values in this array along the given dimensions + """ xp = get_array_namespace(data) if axis is None: axis = list(range(data.ndim)) elif isinstance(axis, (int, tuple)): axis = [axis] if isinstance(axis, int) else list(axis) - - # If axis empty, return unchanged data. if not axis: + # Return unchanged so downstream aggregation functions work as expected. return data + # Normalize negative axes + axis = [ax % data.ndim for ax in axis] + shape = data.shape + + # If mixed type array, convert to float first + if is_duck_array(data) and data.dtype == np.object_: + data = _factorize(data) # Move axes to be aggregated to the end and stack - shape = data.shape new_order = [i for i in range(len(shape)) if i not in axis] + axis new_shape = [s for i, s in enumerate(shape) if i not in axis] + [-1] - stacked = xp.reshape(xp.transpose(data, new_order), new_shape) - - # Check if data has type object; if so use pd.factorize for unique integers - factorize = bool(is_duck_array(data) and data.dtype == np.object_) + data = xp.reshape(xp.permute_dims(data, new_order), new_shape) - def _nunique_along_axis(array): - if skipna: - array = array[notnull(array)] - if factorize: - array = pd.factorize(array)[0] - return len(xp.unique(array)) - - return xp.apply_along_axis(_nunique_along_axis, -1, stacked) + if is_duck_dask_array(data): + unique_counts = _dask_nunique(data) + else: + # If not using dask, get counts using the approach described at + # https://stackoverflow.com/questions/46893369 + sorted_data = xp.sort(data, axis=-1) + unique_counts = xp.not_equal(sorted_data[..., :-1], sorted_data[..., 1:]) + unique_counts = xp.sum(unique_counts, axis=-1) + 1 + + # Subtract of na values as required + if skipna or (not skipna and equalna): + na_counts = isnull(data).astype(int) + na_counts = xp.sum(na_counts, axis=-1) + if not skipna and equalna: + na_counts = xp.clip(na_counts - 1, 0, None) + unique_counts = unique_counts - na_counts + + return unique_counts def sum_where(data, axis=None, dtype=None, where=None): diff --git a/xarray/namedarray/_aggregations.py b/xarray/namedarray/_aggregations.py index 0d81f61adf2..fa564ebded2 100644 --- a/xarray/namedarray/_aggregations.py +++ b/xarray/namedarray/_aggregations.py @@ -780,6 +780,7 @@ def nunique( dim: Dims = None, *, skipna: bool | None = None, + equalna: bool | None = True, **kwargs: Any, ) -> Self: """ @@ -795,6 +796,11 @@ def nunique( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + equalna : bool or None, default: True + If ``skipna == False``, ``equalna`` determines whether null values + are counted as distinct values or not. Set ``equalna = True`` for + consistency with ``pandas.DataFrame.nunique``, or ``equalna = False`` + for consistency with the `Python array API `_. **kwargs : Any Additional keyword arguments passed on to the appropriate array function for calculating ``nunique`` on this object's data. @@ -814,6 +820,15 @@ def nunique( :ref:`agg` User guide on reduction or aggregation operations. + Notes + ----- + Note that identifying unique values on very large + arrays is slow and memory intensive when there are many unique values. + For such arrays, consider lowering the precision, e.g. rounding floats + then converting them to integers, before searching for unique values. + For dask arrays, performance is improved when chunksizes are largest on + the dimension(s) being reduced. + Examples -------- >>> from xarray.namedarray.core import NamedArray @@ -831,11 +846,18 @@ def nunique( >>> na.nunique(skipna=False) Size: 8B array(5) + + Use ``equalna`` to control whether NaNs are counted as distinct values. + + >>> na.nunique(skipna=False, equalna=False) + Size: 8B + array(5) """ return self.reduce( duck_array_ops.nunique, dim=dim, skipna=skipna, + equalna=equalna, **kwargs, ) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 4f4feed51f0..4390e5d17c8 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -166,18 +166,25 @@ def test_count(self): assert 1 == count(np.datetime64("2000-01-01")) + @pytest.mark.parametrize("equalna", [True, False]) @pytest.mark.parametrize("mixed_type", [True, False]) @pytest.mark.parametrize("string_array", [True, False]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("axis", [2, None, (1, 2)]) - def test_nunique(self, axis, skipna, string_array, mixed_type): + def test_nunique(self, axis, skipna, equalna, string_array, mixed_type): expected_results = { - (True, 2): np.array([[1, 2, 3], [3, 2, 1]]), - (True, None): np.array(12), - (True, (1, 2)): np.array([6, 6]), - (False, 2): np.array([[2, 3, 4], [4, 3, 2]]), - (False, None): np.array(13), - (False, (1, 2)): np.array([7, 7]), + (True, True, 2): np.array([[1, 2, 3], [3, 2, 1]]), + (True, True, None): np.array(12), + (True, True, (1, 2)): np.array([6, 6]), + (True, False, 2): np.array([[2, 3, 4], [4, 3, 2]]), + (True, False, None): np.array(13), + (True, False, (1, 2)): np.array([7, 7]), + (False, True, 2): np.array([[1, 2, 3], [3, 2, 1]]), + (False, True, None): np.array(12), + (False, True, (1, 2)): np.array([6, 6]), + (False, False, 2): np.array([[4, 4, 4], [4, 4, 4]]), + (False, False, None): np.array(24), + (False, False, (1, 2)): np.array([12, 12]), } x = self.x.copy() if string_array: @@ -190,8 +197,13 @@ def test_nunique(self, axis, skipna, string_array, mixed_type): x = x.astype(object) x[(x == 10.0) | (x == "10.0")] = True x[(x == 2.0) | (x == "2.0")] = np.sum - result = nunique(x, axis=axis, skipna=skipna) - assert_array_equal(result, expected_results[(skipna, axis)]) + # Object arrays currently only supported for np.ndarray + if (mixed_type or string_array) and not isinstance(x, np.ndarray): + with pytest.raises(NotImplementedError): + nunique(x, axis=axis, skipna=skipna, equalna=equalna) + return + result = nunique(x, axis=axis, skipna=skipna, equalna=equalna) + assert_array_equal(result, expected_results[(equalna, skipna, axis)]) def test_where_type_promotion(self): result = where(np.array([True, False]), np.array([1, 2]), np.array(["a", "b"])) @@ -291,6 +303,10 @@ def setUp(self): chunks=(2, 1, 2), ) + def test_nunique_dask_lazy(self): + with raise_if_dask_computes(): + nunique(self.x, axis=0) + def test_cumsum_1d(): inputs = np.array([0, 1, 2, 3]) diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 772a9a8a291..562c5826557 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -194,6 +194,12 @@ def {method}( have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64).""" +_EQUALNA_DOCSTRING = """equalna : bool or None, default: True + If ``skipna == False``, ``equalna`` determines whether null values + are counted as distinct values or not. Set ``equalna = True`` for + consistency with ``pandas.DataFrame.nunique``, or ``equalna = False`` + for consistency with the `Python array API `_.""" + _MINCOUNT_DOCSTRING = """min_count : int or None, optional The required number of valid values to perform the operation. If fewer than min_count non-NA values are present the result will be @@ -226,6 +232,12 @@ def {method}( _CUM_NOTES = """Note that the methods on the ``cumulative`` method are more performant (with numbagg installed) and better supported. ``cumsum`` and ``cumprod`` may be deprecated in the future.""" +_NUNIQUE_NOTES = """Note that identifying unique values on very large +arrays is slow and memory intensive when there are many unique values. +For such arrays, consider lowering the precision, e.g. rounding floats +then converting them to integers, before searching for unique values. +For dask arrays, performance is improved when chunksizes are largest on +the dimension(s) being reduced.""" class ExtraKwarg(NamedTuple): @@ -246,6 +258,17 @@ class ExtraKwarg(NamedTuple): " >>> {calculation}(skipna=False)" ), ) +equalna = ExtraKwarg( + docs=_EQUALNA_DOCSTRING, + kwarg="equalna: bool | None = True,", + call="equalna=equalna,", + example=( + "\n \n" + " Use ``equalna`` to control whether NaNs are counted as distinct values.\n" + " \n" + " >>> {calculation}(skipna=False, equalna=False)" + ), +) min_count = ExtraKwarg( docs=_MINCOUNT_DOCSTRING, kwarg="min_count: int | None = None,", @@ -541,7 +564,12 @@ def generate_code(self, method, has_keep_attrs): Method( "median", extra_kwargs=(skipna,), numeric_only=True, min_flox_version="0.9.2" ), - Method("nunique", extra_kwargs=(skipna,), see_also_modules=("pandas.DataFrame",)), + Method( + "nunique", + extra_kwargs=(skipna, equalna), + see_also_modules=("pandas.DataFrame",), + additional_notes=_NUNIQUE_NOTES, + ), # Cumulatives: Method( "cumsum", From 3ace1b9a85cc339b27aa181fd8e4df1972d3e48c Mon Sep 17 00:00:00 2001 From: Ewan Short Date: Mon, 24 Nov 2025 17:44:52 +1100 Subject: [PATCH 8/8] fix missing permute_dims in old numpy versions --- xarray/core/duck_array_ops.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 98cc20b1ccd..bf568dd84b6 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -515,6 +515,17 @@ def _factorize(data): return data +def _permute_dims(data, axes): + """Helper function to get a suitable permute dims function.""" + xp = get_array_namespace(data) + if hasattr(xp, "permute_dims"): + return xp.permute_dims(data, axes) + elif hasattr(xp, "transpose"): + return xp.transpose(data, axes) + else: + raise NotImplementedError(f"Unknown transpose method for namespace {xp}") + + def nunique(data, axis=None, skipna=True, equalna=True): """ Count the number of unique values in this array along the given dimensions @@ -539,7 +550,7 @@ def nunique(data, axis=None, skipna=True, equalna=True): # Move axes to be aggregated to the end and stack new_order = [i for i in range(len(shape)) if i not in axis] + axis new_shape = [s for i, s in enumerate(shape) if i not in axis] + [-1] - data = xp.reshape(xp.permute_dims(data, new_order), new_shape) + data = xp.reshape(_permute_dims(data, new_order), new_shape) if is_duck_dask_array(data): unique_counts = _dask_nunique(data)