diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8929f604088..d2f951dabe1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,9 @@ Deprecations Bug Fixes ~~~~~~~~~ +- When assigning an indexed coordinate to a data variable or coordinate, coerce it from + ``IndexVariable`` to ``Variable`` (:issue:`9859`, :issue:`10829`, :pull:`10909`) + By `Julia Signell `_ - The NetCDF4 backend will now claim to be able to read any URL except for one that contains the substring zarr. This restores backward compatibility after :pull:`10804` broke workflows that relied on ``xr.open_dataset("http://...")`` diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index ab5bf7408f3..9aa64a57ff2 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -1265,7 +1265,7 @@ def create_coords_with_default_indexes( variables.update(idx_vars) all_variables.update(idx_vars) else: - variables[name] = variable + variables[name] = variable.to_base_variable() new_coords = Coordinates._construct_direct(coords=variables, indexes=indexes) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index e5f3c0959bd..d398a37c8ef 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -22,7 +22,12 @@ emit_user_level_warning, equivalent, ) -from xarray.core.variable import Variable, as_variable, calculate_dimensions +from xarray.core.variable import ( + IndexVariable, + Variable, + as_variable, + calculate_dimensions, +) from xarray.structure.alignment import deep_align from xarray.util.deprecation_helpers import ( _COMPAT_DEFAULT, @@ -1206,7 +1211,11 @@ def dataset_update_method(dataset: Dataset, other: CoercibleMapping) -> _MergeRe if c not in value.dims and c in dataset.coords ] if coord_names: - other[key] = value.drop_vars(coord_names) + value = value.drop_vars(coord_names) + if isinstance(value.variable, IndexVariable): + variable = value.variable.to_base_variable() + value = value._replace(variable=variable) + other[key] = value return merge_core( [dataset, other], diff --git a/xarray/testing/assertions.py b/xarray/testing/assertions.py index 39e6da6a83c..7a5c6523bdb 100644 --- a/xarray/testing/assertions.py +++ b/xarray/testing/assertions.py @@ -3,6 +3,7 @@ import functools import warnings from collections.abc import Hashable +from typing import Any import numpy as np import pandas as pd @@ -362,6 +363,17 @@ def _assert_indexes_invariants_checks( if isinstance(v, IndexVariable) } assert indexes.keys() <= index_vars, (set(indexes), index_vars) + assert all( + k in index_vars + for k, v in possible_coord_variables.items() + if v.dims == (k,) + ), {k: type(v) for k, v in possible_coord_variables.items()} + + assert not any( + isinstance(v, IndexVariable) + for k, v in possible_coord_variables.items() + if k not in indexes.keys() + ), {k: type(v) for k, v in possible_coord_variables.items()} # check pandas index wrappers vs. coordinate data adapters for k, index in indexes.items(): @@ -401,11 +413,17 @@ def _assert_indexes_invariants_checks( ) -def _assert_variable_invariants(var: Variable, name: Hashable = None): +def _assert_variable_invariants( + var: Variable | Any, + name: Hashable = None, +) -> None: if name is None: name_or_empty: tuple = () else: name_or_empty = (name,) + + assert isinstance(var, Variable), {name: type(var)} + assert isinstance(var._dims, tuple), name_or_empty + (var._dims,) assert len(var._dims) == len(var._data.shape), name_or_empty + ( var._dims, @@ -418,35 +436,28 @@ def _assert_variable_invariants(var: Variable, name: Hashable = None): def _assert_dataarray_invariants(da: DataArray, check_default_indexes: bool): - assert isinstance(da._variable, Variable), da._variable _assert_variable_invariants(da._variable) assert isinstance(da._coords, dict), da._coords - assert all(isinstance(v, Variable) for v in da._coords.values()), da._coords if check_default_indexes: assert all(set(v.dims) <= set(da.dims) for v in da._coords.values()), ( da.dims, {k: v.dims for k, v in da._coords.items()}, ) - assert all( - isinstance(v, IndexVariable) - for (k, v) in da._coords.items() - if v.dims == (k,) - ), {k: type(v) for k, v in da._coords.items()} for k, v in da._coords.items(): _assert_variable_invariants(v, k) - if da._indexes is not None: - _assert_indexes_invariants_checks( - da._indexes, da._coords, da.dims, check_default=check_default_indexes - ) + assert da._indexes is not None + _assert_indexes_invariants_checks( + da._indexes, da._coords, da.dims, check_default=check_default_indexes + ) def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool): assert isinstance(ds._variables, dict), type(ds._variables) - assert all(isinstance(v, Variable) for v in ds._variables.values()), ds._variables + for k, v in ds._variables.items(): _assert_variable_invariants(v, k) @@ -466,17 +477,10 @@ def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool): ds._dims[k] == v.sizes[k] for v in ds._variables.values() for k in v.sizes ), (ds._dims, {k: v.sizes for k, v in ds._variables.items()}) - if check_default_indexes: - assert all( - isinstance(v, IndexVariable) - for (k, v) in ds._variables.items() - if v.dims == (k,) - ), {k: type(v) for k, v in ds._variables.items() if v.dims == (k,)} - - if ds._indexes is not None: - _assert_indexes_invariants_checks( - ds._indexes, ds._variables, ds._dims, check_default=check_default_indexes - ) + assert ds._indexes is not None + _assert_indexes_invariants_checks( + ds._indexes, ds._variables, ds._dims, check_default=check_default_indexes + ) assert isinstance(ds._encoding, type(None) | dict) assert isinstance(ds._attrs, type(None) | dict) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5eec7b8a2fd..6b453d13646 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1702,6 +1702,20 @@ def should_add_coord_to_array(self, name, var, dims): assert_identical(actual, expected, check_default_indexes=False) assert "x_bnds" not in actual.dims + def test_assign_coords_uses_base_variable_class(self) -> None: + a = DataArray([0, 1, 3], dims=["x"], coords={"x": [0, 1, 2]}) + a = a.assign_coords(foo=a.x) + + # explicit check + assert isinstance(a["x"].variable, IndexVariable) + assert not isinstance(a["foo"].variable, IndexVariable) + + # test internal invariant checks when comparing the datasets + expected = DataArray( + [0, 1, 3], dims=["x"], coords={"x": [0, 1, 2], "foo": ("x", [0, 1, 2])} + ) + assert_identical(a, expected) + def test_coords_alignment(self) -> None: lhs = DataArray([1, 2, 3], [("x", [0, 1, 2])]) rhs = DataArray([2, 3, 4], [("x", [1, 2, 3])]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index bf2e8f6cb8c..08474be8489 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4311,9 +4311,11 @@ def test_to_stacked_array_preserves_dtype(self) -> None: # coordinate created from variables names should be of string dtype data = np.array(["a", "a", "a", "b"], dtype=" None: @@ -4779,6 +4781,18 @@ def test_setitem_using_list_errors(self, var_list, data, error_regex) -> None: with pytest.raises(ValueError, match=error_regex): actual[var_list] = data + def test_setitem_uses_base_variable_class_even_for_index_variables(self) -> None: + ds = Dataset(coords={"x": [1, 2, 3]}) + ds["y"] = ds["x"] + + # explicit check + assert isinstance(ds["x"].variable, IndexVariable) + assert not isinstance(ds["y"].variable, IndexVariable) + + # test internal invariant checks when comparing the datasets + expected = Dataset(data_vars={"y": ("x", [1, 2, 3])}, coords={"x": [1, 2, 3]}) + assert_identical(ds, expected) + def test_assign(self) -> None: ds = Dataset() actual = ds.assign(x=[0, 1, 2], y=2)