Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue where the mask of Dask arrays was ignored and improve tests #6325

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions lib/iris/tests/unit/concatenate/test_hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import pytest

from iris import _concatenate
from iris.tests.unit.util.test_array_equal import TEST_CASES
from iris.util import array_equal


@pytest.mark.parametrize(
Expand Down Expand Up @@ -75,6 +77,20 @@ def test_compute_hashes(a, b, eq):
assert eq == (hashes["a"] == hashes["b"])


@pytest.mark.parametrize(
"a,b",
[
(a, b)
for (a, b, withnans, eq) in TEST_CASES
if isinstance(a, np.ndarray | da.Array) and isinstance(b, np.ndarray | da.Array)
],
)
def test_compute_hashes_vs_array_equal(a, b):
"""Test that hashing give the same answer as `array_equal(withnans=True)`."""
hashes = _concatenate._compute_hashes({"a": a, "b": b})
assert array_equal(a, b, withnans=True) == (hashes["a"] == hashes["b"])


def test_arrayhash_equal_incompatible_chunks_raises():
hash1 = _concatenate._ArrayHash(1, chunks=((1, 1),))
hash2 = _concatenate._ArrayHash(1, chunks=((2,),))
Expand Down
298 changes: 173 additions & 125 deletions lib/iris/tests/unit/util/test_array_equal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,133 +4,181 @@
# See LICENSE in the root of the repository for full licensing details.
"""Test function :func:`iris.util.array_equal`."""

import dask.array as da
import numpy as np
import numpy.ma as ma
import pytest

from iris.util import array_equal


class Test:
def test_0d(self):
array_a = np.array(23)
array_b = np.array(23)
array_c = np.array(7)
assert array_equal(array_a, array_b)
assert not array_equal(array_a, array_c)

def test_0d_and_scalar(self):
array_a = np.array(23)
assert array_equal(array_a, 23)
assert not array_equal(array_a, 45)

def test_1d_and_sequences(self):
for sequence_type in (list, tuple):
seq_a = sequence_type([1, 2, 3])
array_a = np.array(seq_a)
assert array_equal(array_a, seq_a)
assert not array_equal(array_a, seq_a[:-1])
array_a[1] = 45
assert not array_equal(array_a, seq_a)

def test_nd(self):
array_a = np.array(np.arange(24).reshape(2, 3, 4))
array_b = np.array(np.arange(24).reshape(2, 3, 4))
array_c = np.array(np.arange(24).reshape(2, 3, 4))
array_c[0, 1, 2] = 100
assert array_equal(array_a, array_b)
assert not array_equal(array_a, array_c)

def test_masked_is_not_ignored(self):
array_a = ma.masked_array([1, 2, 3], mask=[1, 0, 1])
array_b = ma.masked_array([2, 2, 2], mask=[1, 0, 1])
assert array_equal(array_a, array_b)

def test_masked_is_different(self):
array_a = ma.masked_array([1, 2, 3], mask=[1, 0, 1])
array_b = ma.masked_array([1, 2, 3], mask=[0, 0, 1])
assert not array_equal(array_a, array_b)

def test_masked_isnt_unmasked(self):
array_a = np.array([1, 2, 2])
array_b = ma.masked_array([1, 2, 2], mask=[0, 0, 1])
assert not array_equal(array_a, array_b)

def test_masked_unmasked_equivelance(self):
array_a = np.array([1, 2, 2])
array_b = ma.masked_array([1, 2, 2])
array_c = ma.masked_array([1, 2, 2], mask=[0, 0, 0])
assert array_equal(array_a, array_b)
assert array_equal(array_a, array_c)

def test_fully_masked_arrays(self):
array_a = ma.masked_array(np.arange(24).reshape(2, 3, 4), mask=True)
array_b = ma.masked_array(np.arange(24).reshape(2, 3, 4), mask=True)
assert array_equal(array_a, array_b)

def test_fully_masked_0d_arrays(self):
array_a = ma.masked_array(3, mask=True)
array_b = ma.masked_array(3, mask=True)
assert array_equal(array_a, array_b)

def test_fully_masked_string_arrays(self):
array_a = ma.masked_array(["a", "b", "c"], mask=True)
array_b = ma.masked_array(["a", "b", "c"], mask=[1, 1, 1])
assert array_equal(array_a, array_b)

def test_partially_masked_string_arrays(self):
array_a = ma.masked_array(["a", "b", "c"], mask=[1, 0, 1])
array_b = ma.masked_array(["a", "b", "c"], mask=[1, 0, 1])
assert array_equal(array_a, array_b)

def test_string_arrays_equal(self):
array_a = np.array(["abc", "def", "efg"])
array_b = np.array(["abc", "def", "efg"])
assert array_equal(array_a, array_b)

def test_string_arrays_different_contents(self):
array_a = np.array(["abc", "def", "efg"])
array_b = np.array(["abc", "de", "efg"])
assert not array_equal(array_a, array_b)

def test_string_arrays_subset(self):
array_a = np.array(["abc", "def", "efg"])
array_b = np.array(["abc", "def"])
assert not array_equal(array_a, array_b)
assert not array_equal(array_b, array_a)

def test_string_arrays_unequal_dimensionality(self):
array_a = np.array("abc")
array_b = np.array(["abc"])
array_c = np.array([["abc"]])
assert not array_equal(array_a, array_b)
assert not array_equal(array_b, array_a)
assert not array_equal(array_a, array_c)
assert not array_equal(array_b, array_c)

def test_string_arrays_0d_and_scalar(self):
array_a = np.array("foobar")
assert array_equal(array_a, "foobar")
assert not array_equal(array_a, "foo")
assert not array_equal(array_a, "foobar.")

def test_nan_equality_nan_ne_nan(self):
array_a = np.array([1.0, np.nan, 2.0, np.nan, 3.0])
array_b = array_a.copy()
assert not array_equal(array_a, array_a)
assert not array_equal(array_a, array_b)

def test_nan_equality_nan_naneq_nan(self):
array_a = np.array([1.0, np.nan, 2.0, np.nan, 3.0])
array_b = np.array([1.0, np.nan, 2.0, np.nan, 3.0])
assert array_equal(array_a, array_a, withnans=True)
assert array_equal(array_a, array_b, withnans=True)

def test_nan_equality_nan_nanne_a(self):
array_a = np.array([1.0, np.nan, 2.0, np.nan, 3.0])
array_b = np.array([1.0, np.nan, 2.0, 0.0, 3.0])
assert not array_equal(array_a, array_b, withnans=True)

def test_nan_equality_a_nanne_b(self):
array_a = np.array([1.0, np.nan, 2.0, np.nan, 3.0])
array_b = np.array([1.0, np.nan, 2.0, np.nan, 4.0])
assert not array_equal(array_a, array_b, withnans=True)
ARRAY1 = np.array(np.arange(24).reshape(2, 3, 4))
ARRAY1[0, 1, 2] = 100

ARRAY2 = np.array([1.0, np.nan, 2.0, np.nan, 3.0])

TEST_CASES = [
# test 0d
(np.array(23), np.array(23), False, True),
(np.array(23), np.array(7), False, False),
# test 0d and scalar
(np.array(23), 23, False, True),
(np.array(23), 45, False, False),
# test 1d and sequences
(np.array([1, 2, 3]), [1, 2, 3], False, True),
(np.array([1, 2, 3]), [1, 2], False, False),
(np.array([1, 45, 3]), [1, 2, 3], False, False),
(np.array([1, 2, 3]), (1, 2, 3), False, True),
(np.array([1, 2, 3]), (1, 2), False, False),
(np.array([1, 45, 3]), (1, 2, 3), False, False),
# test 3d
(
np.array(np.arange(24).reshape(2, 3, 4)),
np.array(np.arange(24).reshape(2, 3, 4)),
False,
True,
),
(
np.array(np.arange(24).reshape(2, 3, 4)),
ARRAY1,
False,
False,
),
# test masked is not ignored
(
ma.masked_array([1, 2, 3], mask=[1, 0, 1]),
ma.masked_array([2, 2, 2], mask=[1, 0, 1]),
False,
True,
),
# test masked is different
(
ma.masked_array([1, 2, 3], mask=[1, 0, 1]),
ma.masked_array([1, 2, 3], mask=[0, 0, 1]),
False,
False,
),
# test masked isn't unmasked
(
np.array([1, 2, 2]),
ma.masked_array([1, 2, 2], mask=[0, 0, 1]),
False,
False,
),
(
np.array([1, 2]),
ma.masked_array([1, 3], mask=[0, 1]),
False,
False,
),
# test masked/unmasked_equivalence
(
np.array([1, 2, 2]),
ma.masked_array([1, 2, 2]),
False,
True,
),
(
np.array([1, 2, 2]),
ma.masked_array([1, 2, 2], mask=[0, 0, 0]),
False,
True,
),
# test fully masked arrays
(
ma.masked_array(np.arange(24).reshape(2, 3, 4), mask=True),
ma.masked_array(np.arange(24).reshape(2, 3, 4), mask=True),
False,
True,
),
# test fully masked 0d arrays
(
ma.masked_array(3, mask=True),
ma.masked_array(3, mask=True),
False,
True,
),
# test fully masked string arrays
(
ma.masked_array(["a", "b", "c"], mask=True),
ma.masked_array(["a", "b", "c"], mask=[1, 1, 1]),
False,
True,
),
# test partially masked string arrays
(
ma.masked_array(["a", "b", "c"], mask=[1, 0, 1]),
ma.masked_array(["a", "b", "c"], mask=[1, 0, 1]),
False,
True,
),
# test string arrays equal
(
np.array(["abc", "def", "efg"]),
np.array(["abc", "def", "efg"]),
False,
True,
),
# test string arrays different contents
(
np.array(["abc", "def", "efg"]),
np.array(["abc", "de", "efg"]),
False,
False,
),
# test string arrays subset
(
np.array(["abc", "def", "efg"]),
np.array(["abc", "def"]),
False,
False,
),
(
np.array(["abc", "def"]),
np.array(["abc", "def", "efg"]),
False,
False,
),
# test string arrays unequal dimensionality
(np.array("abc"), np.array(["abc"]), False, False),
(np.array(["abc"]), np.array("abc"), False, False),
(np.array("abc"), np.array([["abc"]]), False, False),
(np.array(["abc"]), np.array([["abc"]]), False, False),
# test string arrays 0d and scalar
(np.array("foobar"), "foobar", False, True),
(np.array("foobar"), "foo", False, False),
(np.array("foobar"), "foobar.", False, False),
# test nan equality nan ne nan
(ARRAY2, ARRAY2, False, False),
(ARRAY2, ARRAY2.copy(), False, False),
# test nan equality nan naneq nan
(ARRAY2, ARRAY2, True, True),
(ARRAY2, ARRAY2.copy(), True, True),
# test nan equality nan nanne a
(
np.array([1.0, np.nan, 2.0, np.nan, 3.0]),
np.array([1.0, np.nan, 2.0, 0.0, 3.0]),
True,
False,
),
# test nan equality a nanne b
(
np.array([1.0, np.nan, 2.0, np.nan, 3.0]),
np.array([1.0, np.nan, 2.0, np.nan, 4.0]),
True,
False,
),
]


@pytest.mark.parametrize("lazy", [False, True])
@pytest.mark.parametrize("array_a,array_b,withnans,eq", TEST_CASES)
def test_array_equal(array_a, array_b, withnans, eq, lazy):
if lazy:
identical = array_a is array_b
if isinstance(array_a, np.ndarray):
array_a = da.asarray(array_a)
if isinstance(array_b, np.ndarray):
array_b = da.asarray(array_b)
if identical:
array_b = array_a
assert eq == array_equal(array_a, array_b, withnans=withnans)
38 changes: 29 additions & 9 deletions lib/iris/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,16 +422,36 @@ def normalise_array(array):

eq = array1.shape == array2.shape
if eq:
array1_masked = ma.is_masked(array1)
eq = array1_masked == ma.is_masked(array2)
if eq and array1_masked:
eq = np.array_equal(ma.getmaskarray(array1), ma.getmaskarray(array2))
if eq:
eqs = array1 == array2
if is_lazy_data(array1) or is_lazy_data(array2):
data1 = da.ma.getdata(array1)
data2 = da.ma.getdata(array2)
mask1 = da.ma.getmaskarray(array1)
mask2 = da.ma.getmaskarray(array2)
else:
data1 = ma.getdata(array1)
data2 = ma.getdata(array2)
mask1 = ma.getmask(array1)
mask2 = ma.getmask(array2)

if mask1 is ma.nomask or mask2 is ma.nomask:
ignore = np.False_
else:
ignore = mask1 & mask2

if withnans and (array1.dtype.kind == "f" or array2.dtype.kind == "f"):
eqs = np.where(np.isnan(array1) & np.isnan(array2), True, eqs)
eq = np.all(eqs)
eq = bool(eq) or eq is ma.masked
nanmask = np.isnan(data1) & np.isnan(data2)
if ignore is np.False_:
ignore = nanmask
else:
ignore |= nanmask

data_eqs = data1 == data2
if ignore is not np.False_:
data_eqs = np.where(ignore, True, data_eqs)
data_eq = data_eqs.all()
mask_eq = (mask1 == mask2).all()

eq = bool(data_eq & mask_eq)

return eq

Expand Down
Loading