Skip to content

Commit 68cdbc9

Browse files
authored
Support k-NN radial search parameters in neural search (#697)
* Support k-NN radial search parameters in neural search Signed-off-by: Junqiu Lei <[email protected]>
1 parent 86f6d4c commit 68cdbc9

File tree

15 files changed

+558
-27
lines changed

15 files changed

+558
-27
lines changed

Diff for: CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1919

2020
## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.13...2.x)
2121
### Features
22+
- Support k-NN radial search parameters in neural search([#697](https://github.com/opensearch-project/neural-search/pull/697))
2223
### Enhancements
2324
- BWC tests for text chunking processor ([#661](https://github.com/opensearch-project/neural-search/pull/661))
2425
- Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670))

Diff for: CONTRIBUTING.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ To send us a pull request, please:
3131

3232
1. Fork the repository.
3333
2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34-
3. Include tests that check your new feature or bug fix. Ideally, we're looking for unit, integration, and BWC tests, but that depends on how big and critical your change is.
35-
If you're adding an integration test and it is using local ML models, please make sure that the number of model deployments is limited, and you're using the smallest possible model.
34+
3. Include tests that check your new feature or bug fix. Ideally, we're looking for unit, integration, and BWC tests, but that depends on how big and critical your change is.
35+
If you're adding an integration test and it is using local ML models, please make sure that the number of model deployments is limited, and you're using the smallest possible model.
3636
Each model deployment consumes resources, and having too many models may cause unexpected test failures.
3737
4. Ensure local tests pass.
3838
5. Commit to your fork using clear commit messages.

Diff for: qa/restart-upgrade/build.gradle

+14
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
9090
}
9191
}
9292

93+
// Excluding the k-NN radial search tests because we introduce this feature in 2.14
94+
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){
95+
filter {
96+
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
97+
}
98+
}
99+
93100
nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
94101
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
95102
systemProperty 'tests.security.manager', 'false'
@@ -139,6 +146,13 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
139146
}
140147
}
141148

149+
// Excluding the k-NN radial search tests because we introduce this feature in 2.14
150+
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){
151+
filter {
152+
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
153+
}
154+
}
155+
142156
nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
143157
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
144158
systemProperty 'tests.security.manager', 'false'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
package org.opensearch.neuralsearch.bwc;
6+
7+
import java.nio.file.Files;
8+
import java.nio.file.Path;
9+
import java.util.Map;
10+
import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
11+
import static org.opensearch.neuralsearch.util.TestUtils.TEXT_IMAGE_EMBEDDING_PROCESSOR;
12+
import static org.opensearch.neuralsearch.util.TestUtils.getModelId;
13+
import org.opensearch.neuralsearch.query.NeuralQueryBuilder;
14+
15+
public class KnnRadialSearchIT extends AbstractRestartUpgradeRestTestCase {
16+
private static final String PIPELINE_NAME = "radial-search-pipeline";
17+
private static final String TEST_FIELD = "passage_text";
18+
private static final String TEST_IMAGE_FIELD = "passage_image";
19+
private static final String TEXT = "Hello world";
20+
private static final String TEXT_1 = "Hello world a";
21+
private static final String TEST_IMAGE_TEXT = "/9j/4AAQSkZJRgABAQAASABIAAD";
22+
private static final String TEST_IMAGE_TEXT_1 = "/9j/4AAQSkZJRgbdwoeicfhoid";
23+
24+
// Test rolling-upgrade with kNN radial search
25+
// Create Text Image Embedding Processor, Ingestion Pipeline and add document
26+
// Validate radial query, pipeline and document count in restart-upgrade scenario
27+
public void testKnnRadialSearch_E2EFlow() throws Exception {
28+
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
29+
30+
if (isRunningAgainstOldCluster()) {
31+
String modelId = uploadTextEmbeddingModel();
32+
loadModel(modelId);
33+
createPipelineForTextImageProcessor(modelId, PIPELINE_NAME);
34+
createIndexWithConfiguration(
35+
getIndexNameForTest(),
36+
Files.readString(Path.of(classLoader.getResource("processor/IndexMappingMultipleShard.json").toURI())),
37+
PIPELINE_NAME
38+
);
39+
addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT, TEST_IMAGE_FIELD, TEST_IMAGE_TEXT);
40+
} else {
41+
String modelId = null;
42+
try {
43+
modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_IMAGE_EMBEDDING_PROCESSOR);
44+
loadModel(modelId);
45+
addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_1, TEST_IMAGE_FIELD, TEST_IMAGE_TEXT_1);
46+
validateIndexQuery(modelId);
47+
} finally {
48+
wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, null);
49+
}
50+
}
51+
}
52+
53+
private void validateIndexQuery(final String modelId) {
54+
NeuralQueryBuilder neuralQueryBuilderWithMinScoreQuery = new NeuralQueryBuilder(
55+
"passage_embedding",
56+
TEXT,
57+
TEST_IMAGE_TEXT,
58+
modelId,
59+
null,
60+
null,
61+
0.01f,
62+
null,
63+
null
64+
);
65+
Map<String, Object> responseWithMinScoreQuery = search(getIndexNameForTest(), neuralQueryBuilderWithMinScoreQuery, 1);
66+
assertNotNull(responseWithMinScoreQuery);
67+
68+
NeuralQueryBuilder neuralQueryBuilderWithMaxDistanceQuery = new NeuralQueryBuilder(
69+
"passage_embedding",
70+
TEXT,
71+
TEST_IMAGE_TEXT,
72+
modelId,
73+
null,
74+
100000f,
75+
null,
76+
null,
77+
null
78+
);
79+
Map<String, Object> responseWithMaxDistanceQuery = search(getIndexNameForTest(), neuralQueryBuilderWithMaxDistanceQuery, 1);
80+
assertNotNull(responseWithMaxDistanceQuery);
81+
}
82+
}

Diff for: qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java

+11-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,17 @@ public void testTextImageEmbeddingProcessor_E2EFlow() throws Exception {
5353
private void validateTestIndex(final String modelId) throws Exception {
5454
int docCount = getDocCount(getIndexNameForTest());
5555
assertEquals(2, docCount);
56-
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder("passage_embedding", TEXT, TEST_IMAGE_TEXT, modelId, 1, null, null);
56+
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
57+
"passage_embedding",
58+
TEXT,
59+
TEST_IMAGE_TEXT,
60+
modelId,
61+
1,
62+
null,
63+
null,
64+
null,
65+
null
66+
);
5767
Map<String, Object> response = search(getIndexNameForTest(), neuralQueryBuilder, 1);
5868
assertNotNull(response);
5969
}

Diff for: qa/rolling-upgrade/build.gradle

+14
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
9090
}
9191
}
9292

93+
// Excluding the k-NN radial search tests because we introduce this feature in 2.14
94+
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){
95+
filter {
96+
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
97+
}
98+
}
99+
93100
nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
94101
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
95102
systemProperty 'tests.security.manager', 'false'
@@ -140,6 +147,13 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) {
140147
}
141148
}
142149

150+
// Excluding the k-NN radial search tests because we introduce this feature in 2.14
151+
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){
152+
filter {
153+
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
154+
}
155+
}
156+
143157
nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
144158
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
145159
systemProperty 'tests.security.manager', 'false'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
package org.opensearch.neuralsearch.bwc;
6+
7+
import java.nio.file.Files;
8+
import java.nio.file.Path;
9+
import java.util.Map;
10+
import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
11+
import static org.opensearch.neuralsearch.util.TestUtils.TEXT_IMAGE_EMBEDDING_PROCESSOR;
12+
import static org.opensearch.neuralsearch.util.TestUtils.getModelId;
13+
import org.opensearch.neuralsearch.query.NeuralQueryBuilder;
14+
15+
public class KnnRadialSearchIT extends AbstractRollingUpgradeTestCase {
16+
private static final String PIPELINE_NAME = "radial-search-pipeline";
17+
private static final String TEST_FIELD = "passage_text";
18+
private static final String TEST_IMAGE_FIELD = "passage_image";
19+
private static final String TEXT = "Hello world";
20+
private static final String TEXT_MIXED = "Hello world mixed";
21+
private static final String TEXT_UPGRADED = "Hello world upgraded";
22+
private static final String TEST_IMAGE_TEXT = "/9j/4AAQSkZJRgABAQAASABIAAD";
23+
private static final String TEST_IMAGE_TEXT_MIXED = "/9j/4AAQSkZJRgbdwoeicfhoid";
24+
private static final String TEST_IMAGE_TEXT_UPGRADED = "/9j/4AAQSkZJR8eydhgfwceocvlk";
25+
26+
private static final int NUM_DOCS_PER_ROUND = 1;
27+
private static String modelId = "";
28+
29+
// Test rolling-upgrade with kNN radial search
30+
// Create Text Image Embedding Processor, Ingestion Pipeline and add document
31+
// Validate radial query, pipeline and document count in rolling-upgrade scenario
32+
public void testKnnRadialSearch_E2EFlow() throws Exception {
33+
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
34+
switch (getClusterType()) {
35+
case OLD:
36+
modelId = uploadTextImageEmbeddingModel();
37+
loadModel(modelId);
38+
createPipelineForTextImageProcessor(modelId, PIPELINE_NAME);
39+
createIndexWithConfiguration(
40+
getIndexNameForTest(),
41+
Files.readString(Path.of(classLoader.getResource("processor/IndexMappings.json").toURI())),
42+
PIPELINE_NAME
43+
);
44+
addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT, TEST_IMAGE_FIELD, TEST_IMAGE_TEXT);
45+
break;
46+
case MIXED:
47+
modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_IMAGE_EMBEDDING_PROCESSOR);
48+
int totalDocsCountMixed;
49+
if (isFirstMixedRound()) {
50+
totalDocsCountMixed = NUM_DOCS_PER_ROUND;
51+
validateIndexQueryOnUpgrade(totalDocsCountMixed, modelId, TEXT, TEST_IMAGE_TEXT);
52+
addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_MIXED, TEST_IMAGE_FIELD, TEST_IMAGE_TEXT_MIXED);
53+
} else {
54+
totalDocsCountMixed = 2 * NUM_DOCS_PER_ROUND;
55+
validateIndexQueryOnUpgrade(totalDocsCountMixed, modelId, TEXT_MIXED, TEST_IMAGE_TEXT_MIXED);
56+
}
57+
break;
58+
case UPGRADED:
59+
try {
60+
modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_IMAGE_EMBEDDING_PROCESSOR);
61+
int totalDocsCountUpgraded = 3 * NUM_DOCS_PER_ROUND;
62+
loadModel(modelId);
63+
addDocument(getIndexNameForTest(), "2", TEST_FIELD, TEXT_UPGRADED, TEST_IMAGE_FIELD, TEST_IMAGE_TEXT_UPGRADED);
64+
validateIndexQueryOnUpgrade(totalDocsCountUpgraded, modelId, TEXT_UPGRADED, TEST_IMAGE_TEXT_UPGRADED);
65+
} finally {
66+
wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, null);
67+
}
68+
break;
69+
default:
70+
throw new IllegalStateException("Unexpected value: " + getClusterType());
71+
}
72+
}
73+
74+
private void validateIndexQueryOnUpgrade(final int numberOfDocs, final String modelId, final String text, final String imageText)
75+
throws Exception {
76+
int docCount = getDocCount(getIndexNameForTest());
77+
assertEquals(numberOfDocs, docCount);
78+
loadModel(modelId);
79+
80+
NeuralQueryBuilder neuralQueryBuilderWithMinScoreQuery = new NeuralQueryBuilder(
81+
"passage_embedding",
82+
text,
83+
imageText,
84+
modelId,
85+
null,
86+
null,
87+
0.01f,
88+
null,
89+
null
90+
);
91+
Map<String, Object> responseWithMinScore = search(getIndexNameForTest(), neuralQueryBuilderWithMinScoreQuery, 1);
92+
assertNotNull(responseWithMinScore);
93+
94+
NeuralQueryBuilder neuralQueryBuilderWithMaxDistanceQuery = new NeuralQueryBuilder(
95+
"passage_embedding",
96+
text,
97+
imageText,
98+
modelId,
99+
null,
100+
100000f,
101+
null,
102+
null,
103+
null
104+
);
105+
Map<String, Object> responseWithMaxScore = search(getIndexNameForTest(), neuralQueryBuilderWithMaxDistanceQuery, 1);
106+
assertNotNull(responseWithMaxScore);
107+
}
108+
}

Diff for: qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java

+13-3
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,18 @@ private void validateTestIndexOnUpgrade(final int numberOfDocs, final String mod
7676
int docCount = getDocCount(getIndexNameForTest());
7777
assertEquals(numberOfDocs, docCount);
7878
loadModel(modelId);
79-
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder("passage_embedding", text, imageText, modelId, 1, null, null);
80-
Map<String, Object> response = search(getIndexNameForTest(), neuralQueryBuilder, 1);
81-
assertNotNull(response);
79+
NeuralQueryBuilder neuralQueryBuilderWithKQuery = new NeuralQueryBuilder(
80+
"passage_embedding",
81+
text,
82+
imageText,
83+
modelId,
84+
1,
85+
null,
86+
null,
87+
null,
88+
null
89+
);
90+
Map<String, Object> responseWithKQuery = search(getIndexNameForTest(), neuralQueryBuilderWithKQuery, 1);
91+
assertNotNull(responseWithKQuery);
8292
}
8393
}

0 commit comments

Comments
 (0)