diff --git a/docs/user-guide/consolidated_metadata.rst b/docs/user-guide/consolidated_metadata.rst index 3c015dcfca..edd5bafc8d 100644 --- a/docs/user-guide/consolidated_metadata.rst +++ b/docs/user-guide/consolidated_metadata.rst @@ -114,3 +114,23 @@ removed, or modified, consolidated metadata may not be desirable. metadata. .. _Consolidated Metadata: https://github.com/zarr-developers/zarr-specs/pull/309 + +Stores Without Support for Consolidated Metadata +------------------------------------------------ + +Some stores may want to opt out of the consolidated metadata mechanism. This +may be for several reasons like: + +* They want to maintain read-write consistency, which is challenging with + consolidated metadata. +* They have their own consolidated metadata mechanism. +* They offer good enough performance without need for consolidation. + +This type of store can declare it doesn't want consolidation by implementing +`Store.supports_consolidated_metadata` and returning `False`. For stores that don't support +consolidation, Zarr will: + +* Raise an error on `consolidate_metadata` calls, maintaining the store in + its unconsolidated state. +* Raise an error in `AsyncGroup.open(..., use_consolidated=True)` +* Not use consolidated metadata in `AsyncGroup.open(..., use_consolidated=None)` diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 96165f8ba0..db4dee8cdd 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -264,6 +264,18 @@ async def _set_many(self, values: Iterable[tuple[str, Buffer]]) -> None: """ await gather(*starmap(self.set, values)) + @property + def supports_consolidated_metadata(self) -> bool: + """ + Does the store support consolidated metadata?. + + If it doesn't an error will be raised on requests to consolidate the metadata. + Returning `False` can be useful for stores which implement their own + consolidation mechanism outside of the zarr-python implementation. + """ + + return True + @property @abstractmethod def supports_deletes(self) -> bool: diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index b296d21bd4..3ef9903c57 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -174,7 +174,8 @@ async def consolidate_metadata( Consolidate the metadata of all nodes in a hierarchy. Upon completion, the metadata of the root node in the Zarr hierarchy will be - updated to include all the metadata of child nodes. + updated to include all the metadata of child nodes. For Stores that do + not support consolidated metadata, this operation raises a ``TypeError``. Parameters ---------- @@ -194,10 +195,18 @@ async def consolidate_metadata( ------- group: AsyncGroup The group, with the ``consolidated_metadata`` field set to include - the metadata of each child node. + the metadata of each child node. If the Store doesn't support + consolidated metadata, this function raises a `TypeError`. + See ``Store.supports_consolidated_metadata``. """ store_path = await make_store_path(store, path=path) + if not store_path.store.supports_consolidated_metadata: + store_name = type(store_path.store).__name__ + raise TypeError( + f"The Zarr Store in use ({store_name}) doesn't support consolidated metadata", + ) + group = await AsyncGroup.open(store_path, zarr_format=zarr_format, use_consolidated=False) group.store_path.store._check_writable() diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index d4b652ad6e..6aa1cc4de7 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -81,7 +81,8 @@ def consolidate_metadata( Consolidate the metadata of all nodes in a hierarchy. Upon completion, the metadata of the root node in the Zarr hierarchy will be - updated to include all the metadata of child nodes. + updated to include all the metadata of child nodes. For Stores that do + not use consolidated metadata, this operation raises a `TypeError`. Parameters ---------- @@ -101,7 +102,10 @@ def consolidate_metadata( ------- group: Group The group, with the ``consolidated_metadata`` field set to include - the metadata of each child node. + the metadata of each child node. If the Store doesn't support + consolidated metadata, this function raises a `TypeError`. + See ``Store.supports_consolidated_metadata``. + """ return Group(sync(async_api.consolidate_metadata(store, path=path, zarr_format=zarr_format))) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 6c6f104605..3ce46ec97b 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -490,10 +490,11 @@ async def open( By default, consolidated metadata is used if it's present in the store (in the ``zarr.json`` for Zarr format 3 and in the ``.zmetadata`` file - for Zarr format 2). + for Zarr format 2) and the Store supports it. - To explicitly require consolidated metadata, set ``use_consolidated=True``, - which will raise an exception if consolidated metadata is not found. + To explicitly require consolidated metadata, set ``use_consolidated=True``. + In this case, if the Store doesn't support consolidation or consolidated metadata is + not found, a ``ValueError`` exception is raised. To explicitly *not* use consolidated metadata, set ``use_consolidated=False``, which will fall back to using the regular, non consolidated metadata. @@ -503,6 +504,16 @@ async def open( to load consolidated metadata from a non-default key. """ store_path = await make_store_path(store) + if not store_path.store.supports_consolidated_metadata: + # Fail if consolidated metadata was requested but the Store doesn't support it + if use_consolidated: + store_name = type(store_path.store).__name__ + raise ValueError( + f"The Zarr store in use ({store_name}) doesn't support consolidated metadata." + ) + + # if use_consolidated was None (optional), the Store dictates it doesn't want consolidation + use_consolidated = False consolidated_key = ZMETADATA_V2_JSON diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py index f71a946300..ff4fe6a780 100644 --- a/tests/test_metadata/test_consolidated.py +++ b/tests/test_metadata/test_consolidated.py @@ -651,3 +651,38 @@ async def test_consolidated_metadata_encodes_special_chars( elif zarr_format == 3: assert root_metadata["child"]["attributes"]["test"] == expected_fill_value assert root_metadata["time"]["fill_value"] == expected_fill_value + + +class NonConsolidatedStore(zarr.storage.MemoryStore): + """A store that doesn't support consolidated metadata""" + + @property + def supports_consolidated_metadata(self) -> bool: + return False + + +async def test_consolidate_metadata_raises_for_self_consolidating_stores(): + """Verify calling consolidate_metadata on a non supporting stores raises an error.""" + + memory_store = NonConsolidatedStore() + root = await zarr.api.asynchronous.create_group(store=memory_store) + await root.create_group("a/b") + + with pytest.raises(TypeError, match="doesn't support consolidated metadata"): + await zarr.api.asynchronous.consolidate_metadata(memory_store) + + +async def test_open_group_in_non_consolidating_stores(): + memory_store = NonConsolidatedStore() + root = await zarr.api.asynchronous.create_group(store=memory_store) + await root.create_group("a/b") + + # Opening a group without consolidatedion works as expected + await AsyncGroup.open(memory_store, use_consolidated=False) + + # let the Store opt out of consolidation + await AsyncGroup.open(memory_store, use_consolidated=None) + + # Opening a group with use_consolidated=True should fail + with pytest.raises(ValueError, match="doesn't support consolidated metadata"): + await AsyncGroup.open(memory_store, use_consolidated=True)