Skip to content

Commit 61318ae

Browse files
junqiu-leiluyunchengvibrantvarunjmazanec15
authored
Support distance type radius search for Lucene engine (#1498)
* Optimize Faiss Query With Filters: Reduce iteration and memory for id filter (#1402) * Optimize Faiss Query With Filters. Reduce iteration copy for docid set iterator Signed-off-by: luyuncheng <[email protected]> * Optimize Faiss Query With Filters. Reduce iteration copy for docid set iterator. Use Bitmap And Batch to do id filter. and you sparse or fixed bitset do exact ANN search Signed-off-by: luyuncheng <[email protected]> * Using int64_t instead of long type for GetLongArrayElements Signed-off-by: luyuncheng <[email protected]> * Add IDSelectorJlongBitmap Signed-off-by: luyuncheng <[email protected]> * 1. Add IDSelectorJlongBitmap and UT for it 2. Move FilterIdsSelectorType to a util class Signed-off-by: luyuncheng <[email protected]> * 1. Add IDSelectorJlongBitmap and UT for it 2. Move FilterIdsSelectorType to a util class 3. Spotless apply Signed-off-by: luyuncheng <[email protected]> * Rebase remote-tracking branch 'origin/main' into Filter Signed-off-by: luyuncheng <[email protected]> * tidy Signed-off-by: luyuncheng <[email protected]> * Add Changelog Signed-off-by: luyuncheng <[email protected]> * fix javadoc tasks Signed-off-by: luyuncheng <[email protected]> * fix bwc javadoc Signed-off-by: luyuncheng <[email protected]> * UpdatedFilterIdsSelector Signed-off-by: luyuncheng <[email protected]> * UpdatedFilterIdsSelector Signed-off-by: luyuncheng <[email protected]> * Rebase faiss_wrapper.cpp Signed-off-by: luyuncheng <[email protected]> * UpdatedFilterIdsSelector For description Select different FilterIdsSelectorType Signed-off-by: luyuncheng <[email protected]> * UpdatedFilterIdsSelector For description Select different FilterIdsSelectorType Signed-off-by: luyuncheng <[email protected]> * UpdatedFilterIdsSelector as Byte.SIZE Signed-off-by: luyuncheng <[email protected]> * UpdatedFilterIdsSelector For comments Signed-off-by: luyuncheng <[email protected]> --------- Signed-off-by: luyuncheng <[email protected]> * Increment 2.12.0-SNAPSHOT to 2.13.0-SNAPSHOT in BWC workflow (#1505) Signed-off-by: Varun Jain <[email protected]> * Manually install zlib for win CI (#1513) Signed-off-by: John Mazanec <[email protected]> * Upgrade faiss to 12b92e9 (#1509) Upgrades faiss to facebookresearch/faiss@12b92e9. Cleanup outdated patches. Signed-off-by: John Mazanec <[email protected]> * Disable sdc table for HNSWPQ read-only indices (#1518) Passes flag to disable sdc table for the HNSWPQ indices. This table is only used by HNSWPQ during graph creation to compare nodes already present in graph. When we call load index, the graph is read only. Hence, we wont be doing any ingestion and so the table can be disabled to save some memory. Along with this, added a unit test and a couple test helper methods for generating random data. Signed-off-by: John Mazanec <[email protected]> * Support distance type radius search for Lucene engine Signed-off-by: Junqiu Lei <[email protected]> * Resolve feedback Signed-off-by: Junqiu Lei <[email protected]> * Resolve feedback Signed-off-by: Junqiu Lei <[email protected]> * Resolve comments Signed-off-by: Junqiu Lei <[email protected]> * Resolve comments Signed-off-by: Junqiu Lei <[email protected]> * Add RNNQueryFactory class Signed-off-by: Junqiu Lei <[email protected]> * Add javadoc Signed-off-by: Junqiu Lei <[email protected]> * Resolve feedback Signed-off-by: Junqiu Lei <[email protected]> * Resolve feedback Signed-off-by: Junqiu Lei <[email protected]> * Resolve feedback Signed-off-by: Junqiu Lei <[email protected]> --------- Signed-off-by: luyuncheng <[email protected]> Signed-off-by: Varun Jain <[email protected]> Signed-off-by: John Mazanec <[email protected]> Signed-off-by: Junqiu Lei <[email protected]> Co-authored-by: luyuncheng <[email protected]> Co-authored-by: Varun Jain <[email protected]> Co-authored-by: John Mazanec <[email protected]>
1 parent f54cf37 commit 61318ae

20 files changed

+867
-138
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1414

1515
## [Unreleased 2.x](https://github.com/opensearch-project/k-NN/compare/2.12...2.x)
1616
### Features
17+
* Support distance type radius search for Lucene engine [#1498](https://github.com/opensearch-project/k-NN/pull/1498)
1718
### Enhancements
1819
* Optize Faiss Query With Filters: Reduce iteration and memory for id filter [#1402](https://github.com/opensearch-project/k-NN/pull/1402)
1920
### Bug Fixes

src/main/java/org/opensearch/knn/common/KNNConstants.java

+2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ public class KNNConstants {
6666
public static final String VECTOR_DATA_TYPE_FIELD = "data_type";
6767
public static final VectorDataType DEFAULT_VECTOR_DATA_TYPE_FIELD = VectorDataType.FLOAT;
6868

69+
public static final String RADIAL_SEARCH_KEY = "radial_search";
70+
6971
// Lucene specific constants
7072
public static final String LUCENE_NAME = "lucene";
7173

src/main/java/org/opensearch/knn/index/IndexUtil.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,14 @@
3737
public class IndexUtil {
3838

3939
public static final String MODEL_NODE_ASSIGNMENT_KEY = KNNConstants.MODEL_NODE_ASSIGNMENT;
40-
4140
private static final Version MINIMAL_SUPPORTED_VERSION_FOR_IGNORE_UNMAPPED = Version.V_2_11_0;
4241
private static final Version MINIMAL_SUPPORTED_VERSION_FOR_MODEL_NODE_ASSIGNMENT = Version.V_2_12_0;
42+
private static final Version MINIMAL_SUPPORTED_VERSION_FOR_RADIAL_SEARCH = Version.V_2_13_0;
4343
private static final Map<String, Version> minimalRequiredVersionMap = new HashMap<String, Version>() {
4444
{
4545
put("ignore_unmapped", MINIMAL_SUPPORTED_VERSION_FOR_IGNORE_UNMAPPED);
4646
put(MODEL_NODE_ASSIGNMENT_KEY, MINIMAL_SUPPORTED_VERSION_FOR_MODEL_NODE_ASSIGNMENT);
47+
put(KNNConstants.RADIAL_SEARCH_KEY, MINIMAL_SUPPORTED_VERSION_FOR_RADIAL_SEARCH);
4748
}
4849
};
4950

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.knn.index.query;
7+
8+
import lombok.AllArgsConstructor;
9+
import lombok.Builder;
10+
import lombok.Getter;
11+
import lombok.NonNull;
12+
import lombok.extern.log4j.Log4j2;
13+
import org.apache.lucene.search.Query;
14+
import org.apache.lucene.search.join.BitSetProducer;
15+
import org.apache.lucene.search.join.ToChildBlockJoinQuery;
16+
import org.opensearch.index.query.QueryBuilder;
17+
import org.opensearch.index.query.QueryShardContext;
18+
import org.opensearch.index.search.NestedHelper;
19+
import org.opensearch.knn.index.VectorDataType;
20+
import org.opensearch.knn.index.util.KNNEngine;
21+
22+
import java.io.IOException;
23+
import java.util.Optional;
24+
25+
/**
26+
* Base class for creating vector search queries.
27+
*/
28+
@Log4j2
29+
public abstract class BaseQueryFactory {
30+
/**
31+
* DTO object to hold data required to create a Query instance.
32+
*/
33+
@AllArgsConstructor
34+
@Builder
35+
@Getter
36+
public static class CreateQueryRequest {
37+
@NonNull
38+
private KNNEngine knnEngine;
39+
@NonNull
40+
private String indexName;
41+
private String fieldName;
42+
private float[] vector;
43+
private byte[] byteVector;
44+
private VectorDataType vectorDataType;
45+
private Integer k;
46+
private Float radius;
47+
private QueryBuilder filter;
48+
private QueryShardContext context;
49+
50+
public Optional<QueryBuilder> getFilter() {
51+
return Optional.ofNullable(filter);
52+
}
53+
54+
public Optional<QueryShardContext> getContext() {
55+
return Optional.ofNullable(context);
56+
}
57+
}
58+
59+
/**
60+
* Creates a query filter.
61+
*
62+
* @param createQueryRequest request object that has all required fields to construct the query
63+
* @return Lucene Query
64+
*/
65+
protected static Query getFilterQuery(BaseQueryFactory.CreateQueryRequest createQueryRequest) {
66+
if (!createQueryRequest.getFilter().isPresent()) {
67+
return null;
68+
}
69+
70+
final QueryShardContext queryShardContext = createQueryRequest.getContext()
71+
.orElseThrow(() -> new RuntimeException("Shard context cannot be null"));
72+
log.debug(
73+
String.format(
74+
"Creating query with filter for index [%s], field [%s]",
75+
createQueryRequest.getIndexName(),
76+
createQueryRequest.getFieldName()
77+
)
78+
);
79+
final Query filterQuery;
80+
try {
81+
filterQuery = createQueryRequest.getFilter().get().toQuery(queryShardContext);
82+
} catch (IOException e) {
83+
throw new RuntimeException("Cannot create query with filter", e);
84+
}
85+
BitSetProducer parentFilter = queryShardContext.getParentFilter();
86+
if (parentFilter != null) {
87+
boolean mightMatch = new NestedHelper(queryShardContext.getMapperService()).mightMatchNestedDocs(filterQuery);
88+
if (mightMatch) {
89+
return filterQuery;
90+
}
91+
return new ToChildBlockJoinQuery(filterQuery, parentFilter);
92+
}
93+
return filterQuery;
94+
}
95+
}

0 commit comments

Comments
 (0)