Skip to content

Commit 1f9d215

Browse files
committed
adding reindex
Signed-off-by: Amit Galitzky <[email protected]>
1 parent 0901ba1 commit 1f9d215

8 files changed

+194
-8
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ List<Provider<RegularFile>> plugins = [
494494
return new RegularFile() {
495495
@Override
496496
File getAsFile() {
497-
return configurations.zipArchive.asFileTree.getSingleFile()
497+
return configurations.zipArchive.asFileTree.getFiles()
498498
}
499499
}
500500
}

src/main/java/org/opensearch/flowframework/common/CommonValue.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,4 +225,6 @@ private CommonValue() {}
225225
public static final String CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN = "create_connector.credential.session_token";
226226
/** The field name for ingest pipeline model ID substitution */
227227
public static final String CREATE_INGEST_PIPELINE_MODEL_ID = "create_ingest_pipeline.model_id";
228+
/** The field name for reindex source index substitution */
229+
public static final String REINDEX_SOURCE_INDEX = "reindex.source_index";
228230
}

src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SECRET_KEY;
2323
import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN;
2424
import static org.opensearch.flowframework.common.CommonValue.CREATE_INGEST_PIPELINE_MODEL_ID;
25+
import static org.opensearch.flowframework.common.CommonValue.REINDEX_SOURCE_INDEX;
2526

2627
/**
2728
* Enum encapsulating the different default use cases and templates we have stored
@@ -147,6 +148,13 @@ public enum DefaultUseCases {
147148
"defaults/hybrid-search-with-local-model-defaults.json",
148149
"substitutionTemplates/hybrid-search-with-local-model-template.json",
149150
Collections.emptyList()
151+
),
152+
/** defaults file and substitution ready template for semantic search with reindex command*/
153+
SEMANTIC_SEARCH_WITH_REINDEX(
154+
"semantic_search_with_reindex",
155+
"defaults/semantic-search-with-reindex-defaults.json",
156+
"substitutionTemplates/semantic-search-with-reindex-template.json",
157+
List.of(CREATE_CONNECTOR_CREDENTIAL_KEY, REINDEX_SOURCE_INDEX)
150158
);
151159

152160
private final String useCaseName;

src/main/java/org/opensearch/flowframework/workflow/ReindexStep.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,20 @@ public PlainActionFuture<WorkflowData> execute(
9595
Float requestsPerSecond = inputs.containsKey(REQUESTS_PER_SECOND)
9696
? Float.parseFloat(inputs.get(REQUESTS_PER_SECOND).toString())
9797
: null;
98+
requestsPerSecond = requestsPerSecond < 0 ? Float.POSITIVE_INFINITY : requestsPerSecond;
9899
Boolean requireAlias = inputs.containsKey(REQUIRE_ALIAS) ? Booleans.parseBoolean(inputs.get(REQUIRE_ALIAS).toString()) : null;
99-
Integer slices = (Integer) inputs.get(SLICES);
100-
Integer maxDocs = (Integer) inputs.get(MAX_DOCS);
101-
100+
Integer slices;
101+
Integer maxDocs;
102+
if (inputs.get(SLICES) != null) {
103+
slices = Integer.parseInt(String.valueOf(inputs.get(SLICES)));
104+
} else {
105+
slices = (Integer) inputs.get(SLICES);
106+
}
107+
if (inputs.get(MAX_DOCS) != null) {
108+
maxDocs = Integer.parseInt(String.valueOf(inputs.get(MAX_DOCS)));
109+
} else {
110+
maxDocs = (Integer) inputs.get(MAX_DOCS);
111+
}
102112
ReindexRequest reindexRequest = new ReindexRequest().setSourceIndices(Strings.splitStringByCommaToArray(sourceIndices))
103113
.setDestIndex(destinationIndex);
104114

src/main/resources/defaults/hybrid-search-with-local-model-defaults.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
{
22
"template.name": "hybrid-search",
33
"template.description": "Setting up hybrid search, ingest pipeline and index",
4-
"register_local_pretrained_model.name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b",
4+
"register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2",
55
"register_local_pretrained_model.description": "This is a sentence transformer model",
66
"register_local_pretrained_model.model_format": "TORCH_SCRIPT",
77
"register_local_pretrained_model.deploy": "true",
8-
"register_local_pretrained_model.version": "1.0.2",
8+
"register_local_pretrained_model.version": "1.0.1",
99
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
1010
"create_ingest_pipeline.description": "A text embedding pipeline",
1111
"create_ingest_pipeline.model_id": "123",

src/main/resources/defaults/semantic-search-with-local-model-defaults.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
{
22
"template.name": "semantic search with local pretrained model",
33
"template.description": "Setting up semantic search, with a local pretrained embedding model",
4-
"register_local_pretrained_model.name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b",
4+
"register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2",
55
"register_local_pretrained_model.description": "This is a sentence transformer model",
66
"register_local_pretrained_model.model_format": "TORCH_SCRIPT",
77
"register_local_pretrained_model.deploy": "true",
8-
"register_local_pretrained_model.version": "1.0.2",
8+
"register_local_pretrained_model.version": "1.0.1",
99
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
1010
"create_ingest_pipeline.description": "A text embedding pipeline",
1111
"text_embedding.field_map.input": "passage_text",
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"template.name": "semantic search with cohere embedding",
3+
"template.description": "Setting up semantic search, with a Cohere embedding model",
4+
"create_connector.name": "cohere-embedding-connector",
5+
"create_connector.description": "The connector to Cohere's public embed API",
6+
"create_connector.protocol": "http",
7+
"create_connector.model": "embed-english-v3.0",
8+
"create_connector.input_type": "search_document",
9+
"create_connector.truncate": "end",
10+
"create_connector.credential.key": "123",
11+
"create_connector.actions.url": "https://api.cohere.ai/v1/embed",
12+
"create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",
13+
"create_connector.actions.pre_process_function": "connector.pre_process.cohere.embedding",
14+
"create_connector.actions.post_process_function": "connector.post_process.cohere.embedding",
15+
"register_remote_model.name": "Cohere english embed model",
16+
"register_remote_model.description": "cohere-embedding-model",
17+
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
18+
"create_ingest_pipeline.description": "A text embedding pipeline",
19+
"text_embedding.field_map.input": "passage_text",
20+
"text_embedding.field_map.output": "passage_embedding",
21+
"create_index.name": "my-nlp-index",
22+
"create_index.settings.number_of_shards": "2",
23+
"create_index.mappings.method.engine": "lucene",
24+
"create_index.mappings.method.space_type": "l2",
25+
"create_index.mappings.method.name": "hnsw",
26+
"text_embedding.field_map.output.dimension": "1024",
27+
"create_search_pipeline.pipeline_id": "default_model_pipeline",
28+
"reindex.source_index": "",
29+
"reindex.requests_per_second": "-1",
30+
"reindex.slices": "1"
31+
}
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
{
2+
"name": "${{template.name}}",
3+
"description": "${{template.description}}",
4+
"use_case": "SEMANTIC_SEARCH",
5+
"version": {
6+
"template": "1.0.0",
7+
"compatibility": [
8+
"2.12.0",
9+
"3.0.0"
10+
]
11+
},
12+
"workflows": {
13+
"provision": {
14+
"nodes": [
15+
{
16+
"id": "create_connector",
17+
"type": "create_connector",
18+
"user_inputs": {
19+
"name": "${{create_connector.name}}",
20+
"description": "${{create_connector.description}}",
21+
"version": "1",
22+
"protocol": "${{create_connector.protocol}}",
23+
"parameters": {
24+
"endpoint": "${{create_connector.endpoint}}",
25+
"model": "${{create_connector.model}}",
26+
"input_type": "search_document",
27+
"truncate": "END"
28+
},
29+
"credential": {
30+
"key": "${{create_connector.credential.key}}"
31+
},
32+
"actions": [
33+
{
34+
"action_type": "predict",
35+
"method": "POST",
36+
"url": "${{create_connector.actions.url}}",
37+
"headers": {
38+
"Authorization": "Bearer ${credential.key}",
39+
"Request-Source": "unspecified:opensearch"
40+
},
41+
"request_body": "${{create_connector.actions.request_body}}",
42+
"pre_process_function": "${{create_connector.actions.pre_process_function}}",
43+
"post_process_function": "${{create_connector.actions.post_process_function}}"
44+
}
45+
]
46+
}
47+
},
48+
{
49+
"id": "register_model",
50+
"type": "register_remote_model",
51+
"previous_node_inputs": {
52+
"create_connector": "connector_id"
53+
},
54+
"user_inputs": {
55+
"name": "${{register_remote_model.name}}",
56+
"function_name": "remote",
57+
"description": "${{register_remote_model.description}}",
58+
"deploy": true
59+
}
60+
},
61+
{
62+
"id": "create_ingest_pipeline",
63+
"type": "create_ingest_pipeline",
64+
"previous_node_inputs": {
65+
"register_model": "model_id"
66+
},
67+
"user_inputs": {
68+
"pipeline_id": "${{create_ingest_pipeline.pipeline_id}}",
69+
"configurations": {
70+
"description": "${{create_ingest_pipeline.description}}",
71+
"processors": [
72+
{
73+
"text_embedding": {
74+
"model_id": "${{register_model.model_id}}",
75+
"field_map": {
76+
"${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}"
77+
}
78+
}
79+
}
80+
]
81+
}
82+
}
83+
},
84+
{
85+
"id": "create_index",
86+
"type": "create_index",
87+
"previous_node_inputs": {
88+
"create_ingest_pipeline": "pipeline_id"
89+
},
90+
"user_inputs": {
91+
"index_name": "${{create_index.name}}",
92+
"configurations": {
93+
"settings": {
94+
"index.knn": true,
95+
"default_pipeline": "${{create_ingest_pipeline.pipeline_id}}",
96+
"number_of_shards": "${{create_index.settings.number_of_shards}}"
97+
},
98+
"mappings": {
99+
"properties": {
100+
"${{text_embedding.field_map.output}}": {
101+
"type": "knn_vector",
102+
"dimension": "${{text_embedding.field_map.output.dimension}}",
103+
"method": {
104+
"engine": "${{create_index.mappings.method.engine}}",
105+
"space_type": "${{create_index.mappings.method.space_type}}",
106+
"name": "${{create_index.mappings.method.name}}",
107+
"parameters": {}
108+
}
109+
},
110+
"${{text_embedding.field_map.input}}": {
111+
"type": "text"
112+
}
113+
}
114+
}
115+
}
116+
}
117+
},
118+
{
119+
"id": "reindex",
120+
"type": "reindex",
121+
"previous_node_inputs": {
122+
"create_index": "index_name"
123+
},
124+
"user_inputs": {
125+
"source_index": "${{reindex.source_index}}",
126+
"destination_index": "${{create_index.name}}",
127+
"refresh": false,
128+
"requests_per_second": "${{reindex.requests_per_second}}",
129+
"slices": "${{reindex.slices}}"
130+
}
131+
}
132+
]
133+
}
134+
}
135+
}

0 commit comments

Comments
 (0)