diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index df79252b6..20af05077 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -52,6 +52,7 @@ _VectorIndexConfigDynamicUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigUpdate, ) from weaviate.collections.classes.config_vector_index import ( @@ -1846,6 +1847,21 @@ def vector_index_type() -> str: VectorIndexConfigHNSW = _VectorIndexConfigHNSW +@dataclass +class _VectorIndexConfigHFresh(_VectorIndexConfig): + distance_metric: VectorDistances + max_posting_size_kb: int + replicas: int + search_probe: int + + @staticmethod + def vector_index_type() -> str: + return VectorIndexType.HFRESH.value + + +VectorIndexConfigHFresh = _VectorIndexConfigHFresh + + @dataclass class _VectorIndexConfigFlat(_VectorIndexConfig): distance_metric: VectorDistances @@ -1919,7 +1935,10 @@ def to_dict(self) -> Dict[str, Any]: class _NamedVectorConfig(_ConfigBase): vectorizer: _NamedVectorizerConfig vector_index_config: Union[ - VectorIndexConfigHNSW, VectorIndexConfigFlat, VectorIndexConfigDynamic + VectorIndexConfigHNSW, + VectorIndexConfigFlat, + VectorIndexConfigDynamic, + VectorIndexConfigHFresh, ] def to_dict(self) -> Dict: @@ -1956,7 +1975,11 @@ class _CollectionConfig(_ConfigBase): reranker_config: Optional[RerankerConfig] sharding_config: Optional[ShardingConfig] vector_index_config: Union[ - VectorIndexConfigHNSW, VectorIndexConfigFlat, VectorIndexConfigDynamic, None + VectorIndexConfigHNSW, + VectorIndexConfigFlat, + VectorIndexConfigDynamic, + VectorIndexConfigHFresh, + None, ] vector_index_type: Optional[VectorIndexType] vectorizer_config: Optional[VectorizerConfig] @@ -2610,6 +2633,25 @@ def dynamic( quantizer=quantizer, ) + @staticmethod + def hfresh( + max_posting_size_kb: Optional[int] = None, + search_probe: Optional[int] = None, + quantizer: Optional[_RQConfigUpdate] = None, + ) -> _VectorIndexConfigHFreshUpdate: + """Create an `_VectorIndexConfigHFreshUpdate` object to update the configuration of the HFresh vector index. + + Use this method when defining the `vectorizer_config` argument in `collection.update()`. + + Args: + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for a more detailed view! + """ # noqa: D417 (missing argument descriptions in the docstring) + return _VectorIndexConfigHFreshUpdate( + maxPostingSizeKB=max_posting_size_kb, + searchProbe=search_probe, + quantizer=quantizer, + ) + class Reconfigure: """Use this factory class to generate the correct `xxxConfig` object for use when using the `collection.update()` method. diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index 8d4f0c4ae..2713994d7 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -41,6 +41,7 @@ _VectorIndexConfigDynamic, _VectorIndexConfigFlat, _VectorIndexConfigHNSW, + _VectorIndexConfigHFresh, _VectorizerConfig, ) @@ -213,6 +214,18 @@ def __get_hnsw_config(config: Dict[str, Any]) -> _VectorIndexConfigHNSW: ) +def __get_hfresh_config(config: Dict[str, Any]) -> _VectorIndexConfigHFresh: + quantizer = __get_quantizer_config(config) + return _VectorIndexConfigHFresh( + distance_metric=VectorDistances(config.get("distance")), + max_posting_size_kb=config["maxPostingSizeKB"], + replicas=config["replicas"], + search_probe=config["searchProbe"], + quantizer=quantizer, + multi_vector=None, + ) + + def __get_flat_config(config: Dict[str, Any]) -> _VectorIndexConfigFlat: quantizer = __get_quantizer_config(config) return _VectorIndexConfigFlat( @@ -225,7 +238,13 @@ def __get_flat_config(config: Dict[str, Any]) -> _VectorIndexConfigFlat: def __get_vector_index_config( schema: Dict[str, Any], -) -> Union[_VectorIndexConfigHNSW, _VectorIndexConfigFlat, _VectorIndexConfigDynamic, None]: +) -> Union[ + _VectorIndexConfigHNSW, + _VectorIndexConfigFlat, + _VectorIndexConfigDynamic, + _VectorIndexConfigHFresh, + None, +]: if "vectorIndexConfig" not in schema: return None if schema["vectorIndexType"] == "hnsw": @@ -239,6 +258,8 @@ def __get_vector_index_config( hnsw=__get_hnsw_config(schema["vectorIndexConfig"]["hnsw"]), flat=__get_flat_config(schema["vectorIndexConfig"]["flat"]), ) + elif schema["vectorIndexType"] == "hfresh": + return __get_hfresh_config(schema["vectorIndexConfig"]) else: return None diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index de2f5577d..e7b600325 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -15,6 +15,7 @@ _VectorIndexConfigDynamicUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigUpdate, ) from weaviate.collections.classes.config_vectorizers import ( @@ -1340,6 +1341,7 @@ def update( *, vector_index_config: Union[ _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, ], diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index ce70c9a0f..4de0ab419 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -34,11 +34,14 @@ class VectorIndexType(str, Enum): Attributes: HNSW: Hierarchical Navigable Small World (HNSW) index. FLAT: Flat index. + DYNAMIC: Dynamic index. + HFRESH: HFRESH index. """ HNSW = "hnsw" FLAT = "flat" DYNAMIC = "dynamic" + HFRESH = "hfresh" class _MultiVectorConfigCreateBase(_ConfigCreateModel): @@ -127,6 +130,16 @@ def vector_index_type() -> VectorIndexType: return VectorIndexType.HNSW +class _VectorIndexConfigHFreshCreate(_VectorIndexConfigCreate): + maxPostingSizeKB: Optional[int] + replicas: Optional[int] + searchProbe: Optional[int] + + @staticmethod + def vector_index_type() -> VectorIndexType: + return VectorIndexType.HFRESH + + class _VectorIndexConfigFlatCreate(_VectorIndexConfigCreate): vectorCacheMaxObjects: Optional[int] @@ -149,6 +162,15 @@ def vector_index_type() -> VectorIndexType: return VectorIndexType.HNSW +class _VectorIndexConfigHFreshUpdate(_VectorIndexConfigUpdate): + maxPostingSizeKB: Optional[int] + searchProbe: Optional[int] + + @staticmethod + def vector_index_type() -> VectorIndexType: + return VectorIndexType.HFRESH + + class _VectorIndexConfigFlatUpdate(_VectorIndexConfigUpdate): vectorCacheMaxObjects: Optional[int] @@ -564,6 +586,31 @@ def hnsw( multivector=multi_vector, ) + @staticmethod + def hfresh( + distance_metric: Optional[VectorDistances] = None, + max_posting_size_kb: Optional[int] = None, + replicas: Optional[int] = None, + search_probe: Optional[int] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + multi_vector: Optional[_MultiVectorConfigCreate] = None, + ) -> _VectorIndexConfigHFreshCreate: + """Create a `_VectorIndexConfigHFreshCreate` object to be used when defining the HFresh vector index configuration of Weaviate. + + Use this method when defining the `vector_index_config` argument in `collections.create()`. + + Args: + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hfresh) for a more detailed view! + """ # noqa: D417 (missing argument descriptions in the docstring) + return _VectorIndexConfigHFreshCreate( + distance=distance_metric, + maxPostingSizeKB=max_posting_size_kb, + replicas=replicas, + searchProbe=search_probe, + quantizer=quantizer, + multivector=multi_vector, + ) + @staticmethod def flat( distance_metric: Optional[VectorDistances] = None, diff --git a/weaviate/collections/classes/config_vectors.py b/weaviate/collections/classes/config_vectors.py index cbfe5c8cd..7f9d0d492 100644 --- a/weaviate/collections/classes/config_vectors.py +++ b/weaviate/collections/classes/config_vectors.py @@ -20,6 +20,8 @@ _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWCreate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshCreate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigUpdate, ) from weaviate.collections.classes.config_vectorizers import ( @@ -128,6 +130,17 @@ def __hnsw( multivector=multivector, ) + @staticmethod + def __hfresh(*, quantizer: Optional[_QuantizerConfigCreate]) -> _VectorIndexConfigHFreshCreate: + return _VectorIndexConfigHFreshCreate( + maxPostingSizeKB=None, + replicas=None, + searchProbe=None, + quantizer=quantizer, + multivector=None, + distance=None, + ) + @staticmethod def __flat(*, quantizer: Optional[_QuantizerConfigCreate]) -> _VectorIndexConfigFlatCreate: return _VectorIndexConfigFlatCreate( @@ -1804,6 +1817,7 @@ def update( name: Optional[str] = None, vector_index_config: Union[ _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, ], diff --git a/weaviate/collections/config/async_.pyi b/weaviate/collections/config/async_.pyi index 3b07f55c6..6c631191b 100644 --- a/weaviate/collections/config/async_.pyi +++ b/weaviate/collections/config/async_.pyi @@ -21,6 +21,7 @@ from weaviate.collections.classes.config import ( _VectorConfigUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, ) from weaviate.collections.classes.config_object_ttl import _ObjectTTLConfigUpdate from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate @@ -47,13 +48,18 @@ class _ConfigCollectionAsync(_ConfigCollectionExecutor[ConnectionAsync]): object_ttl_config: Optional[_ObjectTTLConfigUpdate] = None, replication_config: Optional[_ReplicationConfigUpdate] = None, vector_index_config: Optional[ - Union[_VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate] + Union[ + _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigFlatUpdate, + _VectorIndexConfigHFreshUpdate, + ] ] = None, vectorizer_config: Optional[ Union[ _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigHFreshUpdate, List[_NamedVectorConfigUpdate], ] ] = None, diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py index bb1f33859..42c2082e9 100644 --- a/weaviate/collections/config/executor.py +++ b/weaviate/collections/config/executor.py @@ -38,6 +38,7 @@ _VectorConfigUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, ) from weaviate.collections.classes.config_methods import ( _collection_config_from_json, @@ -136,6 +137,7 @@ def update( Union[ _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, + _VectorIndexConfigHFreshUpdate, ] ] = None, vectorizer_config: Optional[ @@ -143,6 +145,7 @@ def update( _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigHFreshUpdate, List[_NamedVectorConfigUpdate], ] ] = None, @@ -187,6 +190,7 @@ def update( _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigHFreshUpdate, ), ): _Warnings.vectorizer_config_in_config_update() diff --git a/weaviate/collections/config/sync.pyi b/weaviate/collections/config/sync.pyi index 21fd705ac..44e08545c 100644 --- a/weaviate/collections/config/sync.pyi +++ b/weaviate/collections/config/sync.pyi @@ -21,6 +21,7 @@ from weaviate.collections.classes.config import ( _VectorConfigUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigHFreshUpdate, ) from weaviate.collections.classes.config_object_ttl import _ObjectTTLConfigUpdate from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate @@ -45,13 +46,18 @@ class _ConfigCollection(_ConfigCollectionExecutor[ConnectionSync]): object_ttl_config: Optional[_ObjectTTLConfigUpdate] = None, replication_config: Optional[_ReplicationConfigUpdate] = None, vector_index_config: Optional[ - Union[_VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate] + Union[ + _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigFlatUpdate, + _VectorIndexConfigHFreshUpdate, + ] ] = None, vectorizer_config: Optional[ Union[ _VectorIndexConfigHNSWUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigHFreshUpdate, List[_NamedVectorConfigUpdate], ] ] = None, diff --git a/weaviate/outputs/config.py b/weaviate/outputs/config.py index d6c8ed230..d6c3b4965 100644 --- a/weaviate/outputs/config.py +++ b/weaviate/outputs/config.py @@ -23,6 +23,7 @@ VectorDistances, VectorIndexConfigFlat, VectorIndexConfigHNSW, + VectorIndexConfigHFresh, VectorIndexType, VectorizerConfig, Vectorizers, @@ -52,6 +53,7 @@ "ShardTypes", "VectorDistances", "VectorIndexConfigHNSW", + "VectorIndexConfigHFresh", "VectorIndexConfigFlat", "VectorIndexType", "Vectorizers",