From 4a5e36118be805cca22d55f67072779ccfaa24f6 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 23 Jul 2025 22:57:45 +0100 Subject: [PATCH 1/4] add test checking chunk grid shapes are consistent --- virtualizarr/tests/test_manifests/test_array.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py index 57535782..146f4c8f 100644 --- a/virtualizarr/tests/test_manifests/test_array.py +++ b/virtualizarr/tests/test_manifests/test_array.py @@ -48,6 +48,18 @@ def test_manifest_array_dict_v3_metadata(self, array_v3_metadata): assert marr.shape == shape assert marr.size == 5 * 2 * 20 assert marr.ndim == 3 + + def test_consistency_checks(self, array_v3_metadata): + chunks_dict = { + "0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100}, + } + manifest = ChunkManifest(entries=chunks_dict) + chunks = (1,) + shape = (2,) + metadata = array_v3_metadata(shape=shape, chunks=chunks) + + with pytest.raises(ValueError, match="do not match shape of chunk grid"): + ManifestArray(metadata=metadata, chunkmanifest=manifest) class TestResultType: From a38eef9ccf31aba2c676cc9304762c8991d0657c Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 23 Jul 2025 22:58:00 +0100 Subject: [PATCH 2/4] pass the new test --- virtualizarr/manifests/array.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py index f6d93819..e93bbd48 100644 --- a/virtualizarr/manifests/array.py +++ b/virtualizarr/manifests/array.py @@ -7,6 +7,7 @@ from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGrid import virtualizarr.manifests.utils as utils +from virtualizarr.utils import determine_chunk_grid_shape from virtualizarr.manifests.array_api import ( MANIFESTARRAY_HANDLED_ARRAY_FUNCTIONS, _isnan, @@ -64,7 +65,12 @@ def __init__( f"chunkmanifest arg must be of type ChunkManifest or dict, but got type {type(chunkmanifest)}" ) - # TODO check that the metadata shape and chunkmanifest shape are consistent with one another + # check that the metadata shape and chunkmanifest shape are consistent with one another + metadata_chunk_grid_shape = determine_chunk_grid_shape(shape=metadata.shape, chunks=metadata.chunks) + if _chunkmanifest.shape_chunk_grid != metadata_chunk_grid_shape: + raise ValueError("Set of virtual chunk keys in manifest do not match shape of chunk grid implied by array metadata. \n" + f"Keys in chunkmanifest imply a chunk grid shape of {_chunkmanifest.shape_chunk_grid} but the metadata contains shape={_metadata.shape} and chunks={_metadata.chunks} which imply a chunk grid shape of {metadata_chunk_grid_shape}") + # TODO also cover the special case of scalar arrays self._metadata = _metadata From 151e4502ee9ff1701a4fbedb088a65be7490f06d Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 23 Jul 2025 23:01:06 +0100 Subject: [PATCH 3/4] add comments to test --- virtualizarr/tests/test_manifests/test_array.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py index 146f4c8f..1561f774 100644 --- a/virtualizarr/tests/test_manifests/test_array.py +++ b/virtualizarr/tests/test_manifests/test_array.py @@ -50,10 +50,13 @@ def test_manifest_array_dict_v3_metadata(self, array_v3_metadata): assert marr.ndim == 3 def test_consistency_checks(self, array_v3_metadata): + # create a manifest with only one chunk chunks_dict = { "0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100}, } manifest = ChunkManifest(entries=chunks_dict) + + # but array metadata implying there should be two chunks chunks = (1,) shape = (2,) metadata = array_v3_metadata(shape=shape, chunks=chunks) From 22c22e3211f63670da0b7424a8f54fa38c244286 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 23 Jul 2025 22:02:01 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- virtualizarr/manifests/array.py | 12 ++++++++---- virtualizarr/tests/test_manifests/test_array.py | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py index e93bbd48..6430dc64 100644 --- a/virtualizarr/manifests/array.py +++ b/virtualizarr/manifests/array.py @@ -7,13 +7,13 @@ from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGrid import virtualizarr.manifests.utils as utils -from virtualizarr.utils import determine_chunk_grid_shape from virtualizarr.manifests.array_api import ( MANIFESTARRAY_HANDLED_ARRAY_FUNCTIONS, _isnan, expand_dims, ) from virtualizarr.manifests.manifest import ChunkManifest +from virtualizarr.utils import determine_chunk_grid_shape class ManifestArray: @@ -66,10 +66,14 @@ def __init__( ) # check that the metadata shape and chunkmanifest shape are consistent with one another - metadata_chunk_grid_shape = determine_chunk_grid_shape(shape=metadata.shape, chunks=metadata.chunks) + metadata_chunk_grid_shape = determine_chunk_grid_shape( + shape=metadata.shape, chunks=metadata.chunks + ) if _chunkmanifest.shape_chunk_grid != metadata_chunk_grid_shape: - raise ValueError("Set of virtual chunk keys in manifest do not match shape of chunk grid implied by array metadata. \n" - f"Keys in chunkmanifest imply a chunk grid shape of {_chunkmanifest.shape_chunk_grid} but the metadata contains shape={_metadata.shape} and chunks={_metadata.chunks} which imply a chunk grid shape of {metadata_chunk_grid_shape}") + raise ValueError( + "Set of virtual chunk keys in manifest do not match shape of chunk grid implied by array metadata. \n" + f"Keys in chunkmanifest imply a chunk grid shape of {_chunkmanifest.shape_chunk_grid} but the metadata contains shape={_metadata.shape} and chunks={_metadata.chunks} which imply a chunk grid shape of {metadata_chunk_grid_shape}" + ) # TODO also cover the special case of scalar arrays diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py index 1561f774..f8c948dd 100644 --- a/virtualizarr/tests/test_manifests/test_array.py +++ b/virtualizarr/tests/test_manifests/test_array.py @@ -48,14 +48,14 @@ def test_manifest_array_dict_v3_metadata(self, array_v3_metadata): assert marr.shape == shape assert marr.size == 5 * 2 * 20 assert marr.ndim == 3 - + def test_consistency_checks(self, array_v3_metadata): # create a manifest with only one chunk chunks_dict = { "0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100}, } manifest = ChunkManifest(entries=chunks_dict) - + # but array metadata implying there should be two chunks chunks = (1,) shape = (2,)