Skip to content

Commit f9c1120

Browse files
committed
introduce jVector to the supported KNN engines
Signed-off-by: Samuel Herman <[email protected]>
1 parent 8374b8f commit f9c1120

39 files changed

+2582
-90
lines changed

build.gradle

+15-3
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,10 @@ dependencies {
316316
}
317317
testFixturesImplementation "org.opensearch:common-utils:${version}"
318318
implementation 'com.github.oshi:oshi-core:6.4.13'
319+
320+
implementation 'io.github.jbellis:jvector:4.0.0-beta.2-SNAPSHOT'
321+
implementation 'org.agrona:agrona:1.20.0'
322+
319323
api "net.java.dev.jna:jna:5.13.0"
320324
api "net.java.dev.jna:jna-platform:5.13.0"
321325
// OpenSearch core is using slf4j 1.7.36. Therefore, we cannot change the version here.
@@ -331,7 +335,7 @@ task windowsPatches(type:Exec) {
331335
task cmakeJniLib(type:Exec) {
332336
workingDir 'jni'
333337
def args = []
334-
args.add("cmake")
338+
args.add("/opt/homebrew/bin/cmake")
335339
args.add(".")
336340
args.add("-DKNN_PLUGIN_VERSION=${opensearch_version}")
337341
args.add("-DAVX2_ENABLED=${avx2_enabled}")
@@ -364,6 +368,8 @@ test {
364368
dependsOn buildJniLib
365369
systemProperty 'tests.security.manager', 'false'
366370
systemProperty "java.library.path", "$rootDir/jni/release"
371+
systemProperty 'log4j.configurationFile', "$rootDir/src/test/resources/log4j2.properties"
372+
367373
//this change enables mockito-inline that supports mocking of static classes/calls
368374
systemProperty "jdk.attach.allowAttachSelf", true
369375
if (Os.isFamily(Os.FAMILY_WINDOWS)) {
@@ -378,6 +384,11 @@ integTest {
378384
dependsOn buildJniLib
379385
}
380386
systemProperty 'tests.security.manager', 'false'
387+
println "Project root directory: ${project.rootDir}"
388+
systemProperty "java.security.policy", "file://${project.rootDir}/src/main/plugin-metadata/plugin-security.policy"
389+
systemProperty 'log4j.configurationFile', "${project.rootDir}/src/test/resources/log4j2.properties"
390+
testLogging.showStandardStreams = true
391+
systemProperty 'tests.output', 'true'
381392
systemProperty 'java.io.tmpdir', opensearch_tmp_dir.absolutePath
382393
systemProperty "java.library.path", "$rootDir/jni/release"
383394
// allows integration test classes to access test resource from project root path
@@ -421,7 +432,8 @@ integTest {
421432

422433
testClusters.integTest {
423434
testDistribution = "ARCHIVE"
424-
435+
systemProperty "java.security.policy", "file://${project.rootDir}/src/main/plugin-metadata/plugin-security.policy"
436+
systemProperty 'log4j.configurationFile', "${project.rootDir}/src/test/resources/log4j2.properties"
425437
// Optionally install security
426438
if (System.getProperty("security.enabled") != null) {
427439
configureSecurityPlugin(testClusters.integTest)
@@ -460,7 +472,7 @@ task integTestRemote(type: RestIntegTestTask) {
460472
systemProperty 'cluster.number_of_nodes', "${_numNodes}"
461473

462474
systemProperty 'tests.security.manager', 'false'
463-
475+
systemProperty 'tests.output', 'true'
464476
// Run tests with remote cluster only if rest case is defined
465477
if (System.getProperty("tests.rest.cluster") != null) {
466478
filter {

demo.sh

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#!/bin/bash
2+
3+
./gradlew run -PcustomDistributionUrl=file://${HOME}/projects/OpenSearch/distribution/archives/darwin-tar/build/distributions/opensearch-min-3.0.0-SNAPSHOT-darwin-x64.tar.gz
4+
5+
# ping local cluster
6+
curl localhost:9200
7+
8+
# Check test cluster status
9+
curl -X GET "http://localhost:9200/_cluster/health?pretty"
10+
11+
# Create new knn index with 1 shard and 0 replicas
12+
curl -X PUT "localhost:9200/my_knn_index?pretty" -H 'Content-Type: application/json' -d'
13+
{
14+
"settings": {
15+
"index.knn": true,
16+
"index.number_of_shards": 1,
17+
"index.number_of_replicas": 0,
18+
"index.use_compound_file": false
19+
}
20+
}'
21+
22+
# Check index settings
23+
curl -X GET "localhost:9200/my_knn_index/_settings?pretty"
24+
25+
# Add mapping for knn_vector field with jVector engine
26+
curl -X PUT "localhost:9200/my_knn_index/_mapping?pretty" -H 'Content-Type: application/json' -d'
27+
{
28+
"properties": {
29+
"my_vector": {
30+
"type": "knn_vector",
31+
"dimension": 3,
32+
"method": {
33+
"name": "disk_ann",
34+
"space_type": "l2",
35+
"engine": "jvector"
36+
}
37+
}
38+
}
39+
}'
40+
41+
42+
# Check index mapping
43+
curl -X GET "localhost:9200/my_knn_index/_mapping?pretty"
44+
45+
# Add document with knn_vector field
46+
curl -X POST "localhost:9200/_bulk?pretty" -H 'Content-Type: application/json' -d'
47+
{"index": {"_index": "my_knn_index"}}
48+
{"my_vector": [1, 2, 3]}
49+
{"index": {"_index": "my_knn_index"}}
50+
{"my_vector": [4, 5, 6]}
51+
{"index": {"_index": "my_knn_index"}}
52+
{"my_vector": [7, 8, 9]}
53+
'
54+
55+
# refresh index
56+
curl -X POST "localhost:9200/my_knn_index/_refresh?pretty"
57+
58+
59+
# Search for nearest neighbors
60+
curl -X GET "localhost:9200/my_knn_index/_search?pretty" -H 'Content-Type: application/json' -d'
61+
{
62+
"query": {
63+
"knn": {
64+
"my_vector": {
65+
"vector": [1, 2, 3],
66+
"k": 3
67+
}
68+
}
69+
}
70+
}'
71+
72+
# Delete index
73+
curl -X DELETE "localhost:9200/my_knn_index?pretty"
74+
75+
76+
# Check test cluster location
77+
ls -lah build/testclusters/integTest-0/data/nodes/0/indices

gradle/wrapper/gradle-wrapper.jar

-19.7 KB
Binary file not shown.
+2-7
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
1-
#
2-
# Copyright OpenSearch Contributors
3-
# SPDX-License-Identifier: Apache-2.0
4-
#
5-
61
distributionBase=GRADLE_USER_HOME
72
distributionPath=wrapper/dists
8-
distributionSha256Sum=f2b9ed0faf8472cbe469255ae6c86eddb77076c75191741b4a462f33128dd419
9-
distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-all.zip
3+
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
104
networkTimeout=10000
5+
validateDistributionUrl=true
116
zipStoreBase=GRADLE_USER_HOME
127
zipStorePath=wrapper/dists

gradlew

+5-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
#!/bin/sh
2-
#
3-
# Copyright OpenSearch Contributors
4-
# SPDX-License-Identifier: Apache-2.0
5-
#
62

73
#
84
# Copyright © 2015-2021 the original authors.
@@ -19,6 +15,8 @@
1915
# See the License for the specific language governing permissions and
2016
# limitations under the License.
2117
#
18+
# SPDX-License-Identifier: Apache-2.0
19+
#
2220

2321
##############################################################################
2422
#
@@ -59,7 +57,7 @@
5957
# Darwin, MinGW, and NonStop.
6058
#
6159
# (3) This script is generated from the Groovy template
62-
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
60+
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
6361
# within the Gradle project.
6462
#
6563
# You can find Gradle at https://github.com/gradle/gradle/.
@@ -88,7 +86,8 @@ done
8886
# shellcheck disable=SC2034
8987
APP_BASE_NAME=${0##*/}
9088
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
91-
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
89+
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
90+
' "$PWD" ) || exit
9291

9392
# Use the maximum available, or set MAX_FD != -1 to use that value.
9493
MAX_FD=maximum

gradlew.bat

+12-14
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
@rem
2-
@rem Copyright OpenSearch Contributors
3-
@rem SPDX-License-Identifier: Apache-2.0
4-
@rem
5-
@rem
62
@rem Copyright 2015 the original author or authors.
73
@rem
84
@rem Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,6 +13,8 @@
1713
@rem See the License for the specific language governing permissions and
1814
@rem limitations under the License.
1915
@rem
16+
@rem SPDX-License-Identifier: Apache-2.0
17+
@rem
2018

2119
@if "%DEBUG%"=="" @echo off
2220
@rem ##########################################################################
@@ -47,11 +45,11 @@ set JAVA_EXE=java.exe
4745
%JAVA_EXE% -version >NUL 2>&1
4846
if %ERRORLEVEL% equ 0 goto execute
4947

50-
echo.
51-
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
52-
echo.
53-
echo Please set the JAVA_HOME variable in your environment to match the
54-
echo location of your Java installation.
48+
echo. 1>&2
49+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
50+
echo. 1>&2
51+
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
52+
echo location of your Java installation. 1>&2
5553

5654
goto fail
5755

@@ -61,11 +59,11 @@ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
6159

6260
if exist "%JAVA_EXE%" goto execute
6361

64-
echo.
65-
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
66-
echo.
67-
echo Please set the JAVA_HOME variable in your environment to match the
68-
echo location of your Java installation.
62+
echo. 1>&2
63+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
64+
echo. 1>&2
65+
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
66+
echo location of your Java installation. 1>&2
6967

7068
goto fail
7169

src/main/java/org/opensearch/knn/common/KNNConstants.java

+4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ public class KNNConstants {
1717
public static final String NAME = "name";
1818
public static final String PARAMETERS = "parameters";
1919
public static final String METHOD_HNSW = "hnsw";
20+
public static final String DISK_ANN = "disk_ann";
2021
public static final String TYPE = "type";
2122
public static final String TYPE_NESTED = "nested";
2223
public static final String PATH = "path";
@@ -121,6 +122,9 @@ public class KNNConstants {
121122
public static final String FAISS_SIGNED_BYTE_SQ = "SQ8_direct_signed";
122123
public static final String FAISS_SQ_CLIP = "clip";
123124

125+
// JVector specific constants
126+
public static final String JVECTOR_NAME = "jvector";
127+
124128
// Parameter defaults/limits
125129
public static final Integer ENCODER_PARAMETER_PQ_CODE_COUNT_DEFAULT = 1;
126130
public static final Integer ENCODER_PARAMETER_PQ_CODE_COUNT_LIMIT = 1024;

src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java

+44-40
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.opensearch.index.mapper.MapperService;
1616
import org.opensearch.knn.index.KNNSettings;
1717
import org.opensearch.knn.index.codec.KNN990Codec.NativeEngines990KnnVectorsFormat;
18+
import org.opensearch.knn.index.codec.jvector.JVectorFormat;
1819
import org.opensearch.knn.index.codec.params.KNNScalarQuantizedVectorsFormatParams;
1920
import org.opensearch.knn.index.codec.params.KNNVectorsFormatParams;
2021
import org.opensearch.knn.index.engine.KNNEngine;
@@ -24,6 +25,7 @@
2425

2526
import java.util.Map;
2627
import java.util.Optional;
28+
import java.util.function.BiFunction;
2729
import java.util.function.Function;
2830
import java.util.function.Supplier;
2931

@@ -42,7 +44,7 @@ public abstract class BasePerFieldKnnVectorsFormat extends PerFieldKnnVectorsFor
4244
private final int defaultMaxConnections;
4345
private final int defaultBeamWidth;
4446
private final Supplier<KnnVectorsFormat> defaultFormatSupplier;
45-
private final Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier;
47+
private final BiFunction<KNNEngine, KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier;
4648
private Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier;
4749
private static final String MAX_CONNECTIONS = "max_connections";
4850
private static final String BEAM_WIDTH = "beam_width";
@@ -52,7 +54,7 @@ public BasePerFieldKnnVectorsFormat(
5254
int defaultMaxConnections,
5355
int defaultBeamWidth,
5456
Supplier<KnnVectorsFormat> defaultFormatSupplier,
55-
Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier
57+
BiFunction<KNNEngine,KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier
5658
) {
5759
this.mapperService = mapperService;
5860
this.defaultMaxConnections = defaultMaxConnections;
@@ -89,50 +91,52 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) {
8991
.orElseThrow(() -> new IllegalArgumentException("KNN method context cannot be empty"));
9092
final KNNEngine engine = knnMethodContext.getKnnEngine();
9193
final Map<String, Object> params = knnMethodContext.getMethodComponentContext().getParameters();
94+
switch (engine) {
95+
// All Java engines to use Lucene extensions directly
96+
case JVECTOR:
97+
case LUCENE:
98+
if (params != null && params.containsKey(METHOD_ENCODER_PARAMETER)) {
99+
KNNScalarQuantizedVectorsFormatParams knnScalarQuantizedVectorsFormatParams = new KNNScalarQuantizedVectorsFormatParams(
100+
params,
101+
defaultMaxConnections,
102+
defaultBeamWidth
103+
);
104+
if (knnScalarQuantizedVectorsFormatParams.validate(params)) {
105+
log.debug(
106+
"Initialize KNN vector format for field [{}] with params [{}] = \"{}\", [{}] = \"{}\", [{}] = \"{}\", [{}] = \"{}\"",
107+
field,
108+
MAX_CONNECTIONS,
109+
knnScalarQuantizedVectorsFormatParams.getMaxConnections(),
110+
BEAM_WIDTH,
111+
knnScalarQuantizedVectorsFormatParams.getBeamWidth(),
112+
LUCENE_SQ_CONFIDENCE_INTERVAL,
113+
knnScalarQuantizedVectorsFormatParams.getConfidenceInterval(),
114+
LUCENE_SQ_BITS,
115+
knnScalarQuantizedVectorsFormatParams.getBits()
116+
);
117+
return scalarQuantizedVectorsFormatSupplier.apply(knnScalarQuantizedVectorsFormatParams);
118+
}
119+
}
92120

93-
if (engine == KNNEngine.LUCENE) {
94-
if (params != null && params.containsKey(METHOD_ENCODER_PARAMETER)) {
95-
KNNScalarQuantizedVectorsFormatParams knnScalarQuantizedVectorsFormatParams = new KNNScalarQuantizedVectorsFormatParams(
96-
params,
97-
defaultMaxConnections,
98-
defaultBeamWidth
121+
KNNVectorsFormatParams knnVectorsFormatParams = new KNNVectorsFormatParams(
122+
params,
123+
defaultMaxConnections,
124+
defaultBeamWidth,
125+
knnMethodContext.getSpaceType()
99126
);
100-
if (knnScalarQuantizedVectorsFormatParams.validate(params)) {
101-
log.debug(
102-
"Initialize KNN vector format for field [{}] with params [{}] = \"{}\", [{}] = \"{}\", [{}] = \"{}\", [{}] = \"{}\"",
127+
log.debug(
128+
"Initialize KNN vector format for field [{}] with params [{}] = \"{}\" and [{}] = \"{}\"",
103129
field,
104130
MAX_CONNECTIONS,
105-
knnScalarQuantizedVectorsFormatParams.getMaxConnections(),
131+
knnVectorsFormatParams.getMaxConnections(),
106132
BEAM_WIDTH,
107-
knnScalarQuantizedVectorsFormatParams.getBeamWidth(),
108-
LUCENE_SQ_CONFIDENCE_INTERVAL,
109-
knnScalarQuantizedVectorsFormatParams.getConfidenceInterval(),
110-
LUCENE_SQ_BITS,
111-
knnScalarQuantizedVectorsFormatParams.getBits()
112-
);
113-
return scalarQuantizedVectorsFormatSupplier.apply(knnScalarQuantizedVectorsFormatParams);
114-
}
115-
}
116-
117-
KNNVectorsFormatParams knnVectorsFormatParams = new KNNVectorsFormatParams(
118-
params,
119-
defaultMaxConnections,
120-
defaultBeamWidth,
121-
knnMethodContext.getSpaceType()
122-
);
123-
log.debug(
124-
"Initialize KNN vector format for field [{}] with params [{}] = \"{}\" and [{}] = \"{}\"",
125-
field,
126-
MAX_CONNECTIONS,
127-
knnVectorsFormatParams.getMaxConnections(),
128-
BEAM_WIDTH,
129-
knnVectorsFormatParams.getBeamWidth()
130-
);
131-
return vectorsFormatSupplier.apply(knnVectorsFormatParams);
133+
knnVectorsFormatParams.getBeamWidth()
134+
);
135+
return vectorsFormatSupplier.apply(engine, knnVectorsFormatParams);
136+
default:
137+
// All native engines to use NativeEngines990KnnVectorsFormat
138+
return nativeEngineVectorsFormat();
132139
}
133-
134-
// All native engines to use NativeEngines990KnnVectorsFormat
135-
return nativeEngineVectorsFormat();
136140
}
137141

138142
private NativeEngines990KnnVectorsFormat nativeEngineVectorsFormat() {

src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120Codec.java

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.apache.lucene.codecs.FilterCodec;
1313
import org.apache.lucene.codecs.KnnVectorsFormat;
1414
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
15+
import org.opensearch.index.mapper.MapperService;
1516
import org.opensearch.knn.index.codec.KNNCodecVersion;
1617
import org.opensearch.knn.index.codec.KNNFormatFacade;
1718

0 commit comments

Comments
 (0)