Skip to content

Commit c4eefdd

Browse files
authored
[Backport 2.x] Adding additional default use cases (#731) (#734)
Adding additional default use cases (#731) * adding pretrained model templates * adding reindex * changing file structure for bwc --------- Signed-off-by: Amit Galitzky <[email protected]>
1 parent 9e0fc98 commit c4eefdd

24 files changed

+631
-45
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/)
1717
### Enhancements
1818
- Add Workflow Step for Reindex from source index to destination ([#718](https://github.com/opensearch-project/flow-framework/pull/718))
1919
- Add param to delete workflow API to clear status even if resources exist ([#719](https://github.com/opensearch-project/flow-framework/pull/719))
20+
- Add additional default use cases ([#731](https://github.com/opensearch-project/flow-framework/pull/731))
2021
### Bug Fixes
2122
- Add user mapping to Workflow State index ([#705](https://github.com/opensearch-project/flow-framework/pull/705))
2223

build.gradle

+25-1
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ dependencies {
180180

181181
// ZipArchive dependencies used for integration tests
182182
zipArchive group: 'org.opensearch.plugin', name:'opensearch-ml-plugin', version: "${opensearch_build}"
183+
zipArchive group: 'org.opensearch.plugin', name:'opensearch-knn', version: "${opensearch_build}"
184+
zipArchive group: 'org.opensearch.plugin', name:'neural-search', version: "${opensearch_build}"
183185
secureIntegTestPluginArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}"
184186

185187
configurations.all {
@@ -491,7 +493,29 @@ List<Provider<RegularFile>> plugins = [
491493
return new RegularFile() {
492494
@Override
493495
File getAsFile() {
494-
return configurations.zipArchive.asFileTree.getSingleFile()
496+
return configurations.zipArchive.asFileTree.matching{include "**/opensearch-ml-plugin-${opensearch_build}.zip"}.getSingleFile()
497+
}
498+
}
499+
}
500+
}),
501+
provider(new Callable<RegularFile>(){
502+
@Override
503+
RegularFile call() throws Exception {
504+
return new RegularFile() {
505+
@Override
506+
File getAsFile() {
507+
return configurations.zipArchive.asFileTree.matching{include "**/opensearch-knn-${opensearch_build}.zip"}.getSingleFile()
508+
}
509+
}
510+
}
511+
}),
512+
provider(new Callable<RegularFile>(){
513+
@Override
514+
RegularFile call() throws Exception {
515+
return new RegularFile() {
516+
@Override
517+
File getAsFile() {
518+
return configurations.zipArchive.asFileTree.matching{include "**/neural-search-${opensearch_build}.zip"}.getSingleFile()
495519
}
496520
}
497521
}

src/main/java/org/opensearch/flowframework/common/CommonValue.java

+2
Original file line numberDiff line numberDiff line change
@@ -225,4 +225,6 @@ private CommonValue() {}
225225
public static final String CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN = "create_connector.credential.session_token";
226226
/** The field name for ingest pipeline model ID substitution */
227227
public static final String CREATE_INGEST_PIPELINE_MODEL_ID = "create_ingest_pipeline.model_id";
228+
/** The field name for reindex source index substitution */
229+
public static final String REINDEX_SOURCE_INDEX = "reindex.source_index";
228230
}

src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java

+23
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SECRET_KEY;
2323
import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN;
2424
import static org.opensearch.flowframework.common.CommonValue.CREATE_INGEST_PIPELINE_MODEL_ID;
25+
import static org.opensearch.flowframework.common.CommonValue.REINDEX_SOURCE_INDEX;
2526

2627
/**
2728
* Enum encapsulating the different default use cases and templates we have stored
@@ -132,6 +133,28 @@ public enum DefaultUseCases {
132133
"defaults/conversational-search-defaults.json",
133134
"substitutionTemplates/conversational-search-with-cohere-model-template.json",
134135
List.of(CREATE_CONNECTOR_CREDENTIAL_KEY)
136+
),
137+
/** defaults file and substitution ready template for semantic search with a local pretrained model*/
138+
SEMANTIC_SEARCH_WITH_LOCAL_MODEL(
139+
"semantic_search_with_local_model",
140+
"defaults/semantic-search-with-local-model-defaults.json",
141+
"substitutionTemplates/semantic-search-with-local-model-template.json",
142+
Collections.emptyList()
143+
144+
),
145+
/** defaults file and substitution ready template for hybrid search with a local pretrained model*/
146+
HYBRID_SEARCH_WITH_LOCAL_MODEL(
147+
"hybrid_search_with_local_model",
148+
"defaults/hybrid-search-with-local-model-defaults.json",
149+
"substitutionTemplates/hybrid-search-with-local-model-template.json",
150+
Collections.emptyList()
151+
),
152+
/** defaults file and substitution ready template for semantic search with reindex command*/
153+
SEMANTIC_SEARCH_WITH_REINDEX(
154+
"semantic_search_with_reindex",
155+
"defaults/semantic-search-with-reindex-defaults.json",
156+
"substitutionTemplates/semantic-search-with-reindex-template.json",
157+
List.of(CREATE_CONNECTOR_CREDENTIAL_KEY, REINDEX_SOURCE_INDEX)
135158
);
136159

137160
private final String useCaseName;

src/main/java/org/opensearch/flowframework/workflow/ReindexStep.java

+13-3
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,20 @@ public PlainActionFuture<WorkflowData> execute(
9595
Float requestsPerSecond = inputs.containsKey(REQUESTS_PER_SECOND)
9696
? Float.parseFloat(inputs.get(REQUESTS_PER_SECOND).toString())
9797
: null;
98+
requestsPerSecond = requestsPerSecond < 0 ? Float.POSITIVE_INFINITY : requestsPerSecond;
9899
Boolean requireAlias = inputs.containsKey(REQUIRE_ALIAS) ? Booleans.parseBoolean(inputs.get(REQUIRE_ALIAS).toString()) : null;
99-
Integer slices = (Integer) inputs.get(SLICES);
100-
Integer maxDocs = (Integer) inputs.get(MAX_DOCS);
101-
100+
Integer slices;
101+
Integer maxDocs;
102+
if (inputs.get(SLICES) != null) {
103+
slices = Integer.parseInt(String.valueOf(inputs.get(SLICES)));
104+
} else {
105+
slices = (Integer) inputs.get(SLICES);
106+
}
107+
if (inputs.get(MAX_DOCS) != null) {
108+
maxDocs = Integer.parseInt(String.valueOf(inputs.get(MAX_DOCS)));
109+
} else {
110+
maxDocs = (Integer) inputs.get(MAX_DOCS);
111+
}
102112
ReindexRequest reindexRequest = new ReindexRequest().setSourceIndices(Strings.splitStringByCommaToArray(sourceIndices))
103113
.setDestIndex(destinationIndex);
104114

src/main/resources/defaults/hybrid-search-defaults.json

+1-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,5 @@
1414
"text_embedding.field_map.output.dimension": "1024",
1515
"create_search_pipeline.pipeline_id": "nlp-search-pipeline",
1616
"normalization-processor.normalization.technique": "min_max",
17-
"normalization-processor.combination.technique": "arithmetic_mean",
18-
"normalization-processor.combination.parameters.weights": "[0.3, 0.7]"
17+
"normalization-processor.combination.technique": "arithmetic_mean"
1918
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"template.name": "hybrid-search",
3+
"template.description": "Setting up hybrid search, ingest pipeline and index",
4+
"register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2",
5+
"register_local_pretrained_model.description": "This is a sentence transformer model",
6+
"register_local_pretrained_model.model_format": "TORCH_SCRIPT",
7+
"register_local_pretrained_model.deploy": "true",
8+
"register_local_pretrained_model.version": "1.0.1",
9+
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
10+
"create_ingest_pipeline.description": "A text embedding pipeline",
11+
"create_ingest_pipeline.model_id": "123",
12+
"text_embedding.field_map.input": "passage_text",
13+
"text_embedding.field_map.output": "passage_embedding",
14+
"create_index.name": "my-nlp-index",
15+
"create_index.settings.number_of_shards": "2",
16+
"create_index.mappings.method.engine": "lucene",
17+
"create_index.mappings.method.space_type": "l2",
18+
"create_index.mappings.method.name": "hnsw",
19+
"text_embedding.field_map.output.dimension": "768",
20+
"create_search_pipeline.pipeline_id": "nlp-search-pipeline",
21+
"normalization-processor.normalization.technique": "min_max",
22+
"normalization-processor.combination.technique": "arithmetic_mean"
23+
}

src/main/resources/defaults/multi-modal-search-defaults.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,7 @@
1111
"create_index.settings.number_of_shards": "2",
1212
"text_image_embedding.field_map.output.dimension": "1024",
1313
"create_index.mappings.method.engine": "lucene",
14-
"create_index.mappings.method.name": "hnsw"
14+
"create_index.mappings.method.name": "hnsw",
15+
"text_image_embedding.field_map.image.type": "text",
16+
"text_image_embedding.field_map.text.type": "text"
1517
}

src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,7 @@
2424
"create_index.settings.number_of_shards": "2",
2525
"text_image_embedding.field_map.output.dimension": "1024",
2626
"create_index.mappings.method.engine": "lucene",
27-
"create_index.mappings.method.name": "hnsw"
27+
"create_index.mappings.method.name": "hnsw",
28+
"text_image_embedding.field_map.image.type": "text",
29+
"text_image_embedding.field_map.text.type": "text"
2830
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"template.name": "semantic search with local pretrained model",
3+
"template.description": "Setting up semantic search, with a local pretrained embedding model",
4+
"register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2",
5+
"register_local_pretrained_model.description": "This is a sentence transformer model",
6+
"register_local_pretrained_model.model_format": "TORCH_SCRIPT",
7+
"register_local_pretrained_model.deploy": "true",
8+
"register_local_pretrained_model.version": "1.0.1",
9+
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
10+
"create_ingest_pipeline.description": "A text embedding pipeline",
11+
"text_embedding.field_map.input": "passage_text",
12+
"text_embedding.field_map.output": "passage_embedding",
13+
"create_index.name": "my-nlp-index",
14+
"create_index.settings.number_of_shards": "2",
15+
"create_index.mappings.method.engine": "lucene",
16+
"create_index.mappings.method.space_type": "l2",
17+
"create_index.mappings.method.name": "hnsw",
18+
"text_embedding.field_map.output.dimension": "768",
19+
"create_search_pipeline.pipeline_id": "default_model_pipeline"
20+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"template.name": "semantic search with cohere embedding",
3+
"template.description": "Setting up semantic search, with a Cohere embedding model",
4+
"create_connector.name": "cohere-embedding-connector",
5+
"create_connector.description": "The connector to Cohere's public embed API",
6+
"create_connector.protocol": "http",
7+
"create_connector.model": "embed-english-v3.0",
8+
"create_connector.input_type": "search_document",
9+
"create_connector.truncate": "end",
10+
"create_connector.credential.key": "123",
11+
"create_connector.actions.url": "https://api.cohere.ai/v1/embed",
12+
"create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",
13+
"create_connector.actions.pre_process_function": "connector.pre_process.cohere.embedding",
14+
"create_connector.actions.post_process_function": "connector.post_process.cohere.embedding",
15+
"register_remote_model.name": "Cohere english embed model",
16+
"register_remote_model.description": "cohere-embedding-model",
17+
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
18+
"create_ingest_pipeline.description": "A text embedding pipeline",
19+
"text_embedding.field_map.input": "passage_text",
20+
"text_embedding.field_map.output": "passage_embedding",
21+
"create_index.name": "my-nlp-index",
22+
"create_index.settings.number_of_shards": "2",
23+
"create_index.mappings.method.engine": "lucene",
24+
"create_index.mappings.method.space_type": "l2",
25+
"create_index.mappings.method.name": "hnsw",
26+
"text_embedding.field_map.output.dimension": "1024",
27+
"create_search_pipeline.pipeline_id": "default_model_pipeline",
28+
"reindex.source_index": "",
29+
"reindex.requests_per_second": "-1",
30+
"reindex.slices": "1"
31+
}

src/main/resources/substitutionTemplates/hybrid-search-template.json

+1-7
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,6 @@
4949
},
5050
"mappings": {
5151
"properties": {
52-
"id": {
53-
"type": "text"
54-
},
5552
"${{text_embedding.field_map.output}}": {
5653
"type": "knn_vector",
5754
"dimension": "${{text_embedding.field_map.output.dimension}}",
@@ -84,10 +81,7 @@
8481
"technique": "${{normalization-processor.normalization.technique}}"
8582
},
8683
"combination": {
87-
"technique": "${{normalization-processor.combination.technique}}",
88-
"parameters": {
89-
"weights": "${{normalization-processor.combination.parameters.weights}}"
90-
}
84+
"technique": "${{normalization-processor.combination.technique}}"
9185
}
9286
}
9387
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
{
2+
"name": "${{template.name}}",
3+
"description": "${{template.description}}",
4+
"use_case": "HYBRID_SEARCH",
5+
"version": {
6+
"template": "1.0.0",
7+
"compatibility": [
8+
"2.12.0",
9+
"3.0.0"
10+
]
11+
},
12+
"workflows": {
13+
"provision": {
14+
"nodes": [
15+
{
16+
"id": "register_local_pretrained_model",
17+
"type": "register_local_pretrained_model",
18+
"user_inputs": {
19+
"name": "${{register_local_pretrained_model.name}}",
20+
"version": "${{register_local_pretrained_model.version}}",
21+
"description": "${{register_local_pretrained_model.description}}",
22+
"model_format": "${{register_local_pretrained_model.model_format}}",
23+
"deploy": true
24+
}
25+
},
26+
{
27+
"id": "create_ingest_pipeline",
28+
"type": "create_ingest_pipeline",
29+
"previous_node_inputs": {
30+
"register_local_pretrained_model": "model_id"
31+
},
32+
"user_inputs": {
33+
"pipeline_id": "${{create_ingest_pipeline.pipeline_id}}",
34+
"configurations": {
35+
"description": "${{create_ingest_pipeline.description}}",
36+
"processors": [
37+
{
38+
"text_embedding": {
39+
"model_id": "${{register_local_pretrained_model.model_id}}",
40+
"field_map": {
41+
"${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}"
42+
}
43+
}
44+
}
45+
]
46+
}
47+
}
48+
},
49+
{
50+
"id": "create_index",
51+
"type": "create_index",
52+
"previous_node_inputs": {
53+
"create_ingest_pipeline": "pipeline_id"
54+
},
55+
"user_inputs": {
56+
"index_name": "${{create_index.name}}",
57+
"configurations": {
58+
"settings": {
59+
"index.knn": true,
60+
"default_pipeline": "${{create_ingest_pipeline.pipeline_id}}",
61+
"number_of_shards": "${{create_index.settings.number_of_shards}}",
62+
"index.search.default_pipeline": "${{create_search_pipeline.pipeline_id}}"
63+
},
64+
"mappings": {
65+
"properties": {
66+
"${{text_embedding.field_map.output}}": {
67+
"type": "knn_vector",
68+
"dimension": "${{text_embedding.field_map.output.dimension}}",
69+
"method": {
70+
"engine": "${{create_index.mappings.method.engine}}",
71+
"space_type": "${{create_index.mappings.method.space_type}}",
72+
"name": "${{create_index.mappings.method.name}}",
73+
"parameters": {}
74+
}
75+
},
76+
"${{text_embedding.field_map.input}}": {
77+
"type": "text"
78+
}
79+
}
80+
}
81+
}
82+
}
83+
},
84+
{
85+
"id": "create_search_pipeline",
86+
"type": "create_search_pipeline",
87+
"user_inputs": {
88+
"pipeline_id": "${{create_search_pipeline.pipeline_id}}",
89+
"configurations": {
90+
"description": "Post processor for hybrid search",
91+
"phase_results_processors": [
92+
{
93+
"normalization-processor": {
94+
"normalization": {
95+
"technique": "${{normalization-processor.normalization.technique}}"
96+
},
97+
"combination": {
98+
"technique": "${{normalization-processor.combination.technique}}"
99+
}
100+
}
101+
}
102+
]
103+
}
104+
}
105+
}
106+
]
107+
}
108+
}
109+
}

src/main/resources/substitutionTemplates/multi-modal-search-template.json

+2-5
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,6 @@
5050
},
5151
"mappings": {
5252
"properties": {
53-
"id": {
54-
"type": "text"
55-
},
5653
"${{text_image_embedding.embedding}}": {
5754
"type": "knn_vector",
5855
"dimension": "${{text_image_embedding.field_map.output.dimension}}",
@@ -63,10 +60,10 @@
6360
}
6461
},
6562
"${{text_image_embedding.field_map.text}}": {
66-
"type": "text"
63+
"type": "${{text_image_embedding.field_map.text.type}}"
6764
},
6865
"${{text_image_embedding.field_map.image}}": {
69-
"type": "binary"
66+
"type": "${{text_image_embedding.field_map.image.type}}"
7067
}
7168
}
7269
}

src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json

+2-5
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,6 @@
100100
},
101101
"mappings": {
102102
"properties": {
103-
"id": {
104-
"type": "text"
105-
},
106103
"${{text_image_embedding.embedding}}": {
107104
"type": "knn_vector",
108105
"dimension": "${{text_image_embedding.field_map.output.dimension}}",
@@ -113,10 +110,10 @@
113110
}
114111
},
115112
"${{text_image_embedding.field_map.text}}": {
116-
"type": "text"
113+
"type": "${{text_image_embedding.field_map.text.type}}"
117114
},
118115
"${{text_image_embedding.field_map.image}}": {
119-
"type": "binary"
116+
"type": "${{text_image_embedding.field_map.image.type}}"
120117
}
121118
}
122119
}

0 commit comments

Comments
 (0)