diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b02020f..848b1ccf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) ## [Unreleased 3.0](https://github.com/opensearch-project/anomaly-detection/compare/2.x...HEAD) ### Features ### Enhancements +#### Added workflow preset for Semantic Search using Sparse Encoders (https://github.com/opensearch-project/dashboards-flow-framework/pull/742) ### Bug Fixes ### Infrastructure ### Documentation diff --git a/common/constants.ts b/common/constants.ts index 6e7ef1c0..e86e36f4 100644 --- a/common/constants.ts +++ b/common/constants.ts @@ -155,6 +155,38 @@ export const OPENAI_CONFIGS = { } as RemoteEmbeddingModelConfig, }; +// Neural Sparse +export const NEURAL_SPARSE_CONFIGS = { + [`opensearch-neural-sparse-encoding-v2-distill`]: { + dimension: 30522, + fieldName: 'passage_embedding', + } as RemoteEmbeddingModelConfig, + [`opensearch-neural-sparse-encoding-v1`]: { + dimension: 30522, + fieldName: 'passage_embedding', + } as RemoteEmbeddingModelConfig, + [`opensearch-neural-sparse-encoding-multilingual-v1`]: { + dimension: 105879, + fieldName: 'passage_embedding', + } as RemoteEmbeddingModelConfig, + [`opensearch-neural-sparse-encoding-doc-v2-mini`]: { + dimension: 30522, + fieldName: 'passage_embedding', + } as RemoteEmbeddingModelConfig, + [`opensearch-neural-sparse-encoding-doc-v3-distill`]: { + dimension: 30522, + fieldName: 'passage_embedding', + } as RemoteEmbeddingModelConfig, + [`opensearch-neural-sparse-encoding-doc-v1`]: { + dimension: 30522, + fieldName: 'passage_embedding', + } as RemoteEmbeddingModelConfig, + [`opensearch-neural-sparse-encoding-doc-v2-distill`]: { + dimension: 30522, + fieldName: 'passage_embedding', + } as RemoteEmbeddingModelConfig, +}; + /** * Various constants pertaining to Workflow configs */ @@ -173,6 +205,7 @@ export enum WORKFLOW_TYPE { HYBRID_SEARCH = 'Hybrid Search', VECTOR_SEARCH_WITH_RAG = 'RAG with Vector Retrieval', HYBRID_SEARCH_WITH_RAG = 'RAG with Hybrid Search', + SEMANTIC_SEARCH_USING_SPARSE_ENCODERS = 'Semantic Search using Sparse Encoders', CUSTOM = 'Custom Search', UNKNOWN = 'Unknown', } @@ -211,6 +244,7 @@ export enum MODEL_TYPE { export enum MODEL_CATEGORY { EMBEDDING = 'EMBEDDING', LLM = 'LLM', + SPARSE_ENCODER = 'SPARSE_ENCODER', } /** @@ -293,6 +327,14 @@ export const COHERE_EMBEDDING_MODEL_DOCS_LINK = export const BEDROCK_TITAN_EMBEDDING_DOCS_LINK = 'https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md#amazon-bedrock-titan-text-embedding'; +// Sparse Encoder Models Documentation Links +export const OPENSEARCH_NEURAL_SPARSE_DOCS_LINK = + 'https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-v2-distill'; + +// TODO: Update this with the official OpenSearch documentation URL when it's available +export const SAGEMAKER_SPARSE_DEPLOY_LINK = + 'https://github.com/zhichao-aws/opensearch-neural-sparse-sample/tree/main/examples/deploy_on_sagemaker'; + // ML Models setup Documentation Link export const ML_MODELS_SETUP_DOCS_LINK = 'https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md'; @@ -595,6 +637,18 @@ export const HYBRID_SEARCH_QUERY_MATCH_TERM = { }, }, }; +export const NEURAL_SPARSE_SEARCH_QUERY = { + _source: { + excludes: [VECTOR_FIELD_PATTERN], + }, + query: { + neural_sparse: { + [VECTOR_FIELD_PATTERN]: { + query_tokens: VECTOR_PATTERN, + }, + }, + }, +}; export const QUERY_PRESETS = [ { @@ -649,6 +703,10 @@ export const QUERY_PRESETS = [ name: WORKFLOW_TYPE.MULTIMODAL_SEARCH, query: customStringify(MULTIMODAL_SEARCH_QUERY_BOOL), }, + { + name: 'Neural Sparse Search Query', + query: customStringify(NEURAL_SPARSE_SEARCH_QUERY), + }, { name: 'Semantic search (neural query)', query: customStringify(SEMANTIC_SEARCH_QUERY_NEURAL), diff --git a/common/utils.ts b/common/utils.ts index 8e71edf8..edaa3bd3 100644 --- a/common/utils.ts +++ b/common/utils.ts @@ -53,6 +53,7 @@ export function isVectorSearchUseCase(workflowType?: WORKFLOW_TYPE): boolean { WORKFLOW_TYPE.HYBRID_SEARCH, WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG, WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG, + WORKFLOW_TYPE.SEMANTIC_SEARCH_USING_SPARSE_ENCODERS, ].includes(workflowType) ); } diff --git a/documentation/models.md b/documentation/models.md index bb867587..835cb43d 100644 --- a/documentation/models.md +++ b/documentation/models.md @@ -473,6 +473,123 @@ POST /_plugins/_ml/models/_register } ``` +### Neural Sparse Encoding + +Deploy a sparse encoding model from the Hugging Face Model Hub to a SageMaker real-time inference endpoint using this [guide](https://github.com/zhichao-aws/opensearch-neural-sparse-sample/tree/main/examples/deploy_on_sagemaker). + +Connector: + +``` +POST /_plugins/_ml/connectors/_create +{ + "name": "Neural Sparse Encoding", + "description": "Test connector for Sagemaker model", + "version": 1, + "protocol": "aws_sigv4", + "credential": { + "access_key": "", + "secret_key": "", + "session_token": "" + }, + "parameters": { + "region": "us-east-1", + "service_name": "sagemaker", + "model": "opensearch-neural-sparse-encoding-v2-distill" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "headers": { + "content-type": "application/json" + }, + "url": "https://runtime.sagemaker.us-east-1.amazonaws.com/endpoints/xxxx/invocations", + "request_body": "[\"${parameters.text_doc}\"]", + "post_process_function": "String escape(def input) { if (input instanceof String) { if (input.contains('\\\\')) { input = input.replace('\\\\', '\\\\\\\\'); } if (input.contains('\"')) { input = input.replace('\"', '\\\\\"'); } if (input.contains('\r')) { input = input.replace('\r', '\\\\r'); } if (input.contains('\t')) { input = input.replace('\t', '\\\\t'); } if (input.contains('\n')) { input = input.replace('\n', '\\\\n'); } if (input.contains('\b')) { input = input.replace('\b', '\\\\b'); } if (input.contains('\f')) { input = input.replace('\f', '\\\\f'); } return input; } return input.toString(); } if (params.result == null || params.result.length == 0) { return '{\"dataAsMap\":{\"error\":\"no response error\"}}'; } String response = params.result[0].toString(); response = response.substring(1, response.length() - 1).replace('=', '\":').replace(', ', ',\"'); return '{\"dataAsMap\":{\"response\":{\"' + response + '}}}';" + } + ] +} +``` + +Model: + +``` +POST /_plugins/_ml/models/_register +{ "name": "Neural Sparse Encoding Model", + "function_name": "remote", + "version": "1.0.0", + "connector_id": "", + "description": "Test connector for Sagemaker model", + "interface": { + "input": { + "type": "object", + "properties": { + "parameters": { + "type": "object", + "properties": { + "text_doc": { + "type": "string" + } + }, + "additionalProperties": true, + "required": [ + "text_doc" + ] + } + } + }, + "output": { + "type": "object", + "properties": { + "inference_results": { + "type": "array", + "items": { + "type": "object", + "properties": { + "output": { + "type": "array", + "items": { + "type": "object", + "properties": { + "dataAsMap": { + "type": "object", + "properties": { + "response": { + "type": "object", + "additionalProperties": { + "type": "number" + } + } + }, + "required": [ + "response" + ] + } + }, + "required": [ + "dataAsMap" + ] + } + }, + "status_code": { + "type": "integer" + } + }, + "required": [ + "output", + "status_code" + ] + } + } + }, + "required": [ + "inference_results" + ] + } + } +} +``` + ## Generative models ### Claude 3 Sonnet (hosted on Amazon Bedrock) diff --git a/documentation/tutorial.md b/documentation/tutorial.md deleted file mode 100644 index a6ff414c..00000000 --- a/documentation/tutorial.md +++ /dev/null @@ -1,493 +0,0 @@ -The following tutorial is an accurate representation of the OpenSearch Flow OSD Plugin as of 12/30/2024, based on OSD 2.18. - -Changelog: - -- Initially created 11/18/2024 -- Updated on 11/27/2024 after input/output transform design was overhauled. See PR #504 - -# Tutorial - -For an overview of the plugin, please see [README](../README.md) - -## 1. Provision ML resources - -This plugin is not responsible for connector/model creation, this should be done separately. For several examples that cover a variety of vector search & RAG use cases, see the preset connector blueprints [here](https://opensearch.org/docs/latest/ml-commons-plugin/api/model-apis/register-model/#connector-model-interfaces). For more general information on the ML plugin, connectors, and models, see the [ML Commons plugin documentation](https://opensearch.org/docs/latest/ml-commons-plugin/). You should have deployed remote models with sufficient model interfaces available. These will be the models used for stitching together your ingest and search flows for different use cases. - -## 2. Navigate to the plugin on OSD - -The "OpenSearch Flow" plugin will be under "Search" in the side navigation on OSD. Click to enter the plugin home page. - -![sidenav](./images/sidenav.png) - -## 3. Select your use case - -Start by selecting a preset template for your particular use case. If you want to first test out some basic use cases, you may choose one of the preset templates. You can fill out some initial information about your use case, such as the model, and some of the different input fields. It is all optional, but will help auto-populate some of the configuration if provided. If you anticipate a more advanced/custom use case, you can choose "Custom", which will provide a blank slate, letting you build out all of your configuration from scratch. - -The below screenshots will illustrate a basic semantic search use case starting from scratch. - -![presets-page](./images/presets-page.png) - -## 4. Get familiar with the Workflow Details page - -After selecting, you will enter the Workflow Details page. This page is broken down into 3 main sections: - -1. The form. This is where you will spend most of your time, configuring your ingest and search pipelines. It is split into 2 main steps - first configuring your ingest flow, and secondly, configuring your search flow. We will go into more detail on these later. - -![form](./images/form.png) - -2. The preview workspace. This is a read-only workspace, provided as a visual helper to see how your data flows & is transformed across ingest & search. You can toggle to the JSON view to get more details on the underlying resource configurations as you build your flows out. - -![workspace](./images/workspace.png) - -3. The inspector. You can think of this similar to an IDE - it provides different information as you build out your flows, including the responses after running ingest / search, any errors seen while testing, and the list of underlying created resources for this particular workflow. - -![inspector](./images/inspector.png) - -4. Header buttons - -These allow you to undo current changes, save your current form, export your workflow, or exit and return to the homepage. NOTE: depending on the OSD configuration `useNewHomePage` feature flag), these buttons may look different. - -![buttons](./images/buttons.png) - -## 5. Provide some sample data - -Now we can begin building the use case! Let's start by providing some sample data. The data should be in a JSON array format. 3 options are provided for your convenience: manual input, importing from a file, or taking some sample data from an existing index. _Note if you already have sample data and are only interested in adding search functionality, you can skip this step entirely by un-checking the "Enabled" checkbox. This will let you navigate directly to the search flow_. - -For this example, we will manually input some sample data containing various clothing items. - -![import-data](./images/import-data.png) - -![import-data-populated](./images/import-data-populated.png) - -## 6. Enrich your data - -You can now enrich your data by building out an ingest pipeline & chaining together different ingest processors. The current list of supported processors is visible in the dropdown when clicking "Add processor". - -![enrich-data](./images/enrich-data.png) - -Continuing with the semantic search example, you can now select and configure an ML inference processor to embed the input text. This cluster has a deployed Amazon Bedrock Titan text embedding model. The model has a defined interface, and expects a single input called `inputText`, and returns a single output called `embedding`. - -This is where you can now map data to and from the model inputs and outputs, respectively. "Inputs" allows selecting and transforming the data to conform to the expected model inputs. "Outputs" allows selecting and transforming the model outputs to new fields. There are different types of transformations you can do, including field-level mapping (extracting out a document field value), expressions (more complex transformations using [JSONPath](https://en.wikipedia.org/wiki/JSONPath)), and others. For this example, you can just select the `item_text` field to map to the `inputText` model input, and map the output `embedding` field to a new document field called `my_embedding`. _(Behind the scenes, this is configuring the "input_map" and "output_map" configuration settings for [ML inference ingest processors](https://opensearch.org/docs/latest/ingest-pipelines/processors/ml-inference/))_ - -![ml-config-ingest](./images/ml-config-ingest.png) - -Click "Save" to return to the form. - -### Aside: advanced data transformations - -Continuing with the above example, let's suppose the input data (the document) is more complex, and a simple field-level mapping is not sufficient. Maybe you need to parse out some nested field's value. This can be done by changing the transformation type to `Expression` - -![expression-ingest](./images/expression-ingest.png) - -From there, click "Configure" to open the "Configure JSONPath expression" modal. On the right-hand side, you can click "Run preview" to fetch the input data to this processor. - -![expression-modal-ingest](./images/expression-modal-ingest.png) - -On the left-hand side, you can define a [JSONPath](https://en.wikipedia.org/wiki/JSONPath) transform to parse out the data you want. Suppose it is the description, "red shoes". You can write some JSONPath to pull out this data - `$.item.description`. The transformed value will appear under the "Extracted data" box in the lower-right-hand corner as you define your JSONPath. For models with defined [JSON Schema](https://json-schema.org/) interfaces, you will see a marker indicating whether the transform is valid or invalid for that particular model field (in this case, `inputText`): - -![expression-modal-ingest-validated](./images/expression-modal-ingest-validated.png) - -## 7. Ingest data - -Ensure your index configurations are up-to-date, and optionally enter an index name. For vector search use cases like in this example, ensure any vector fields are mapped as such, and with appropriate vector dimensions. Additionally, the index settings should ensure this is labeled as a knn index. Note that for preset use cases (non-"Custom" use cases), many of this will be automatically populated for your convenience. - -![index-settings-updated](./images/index-settings-updated.png) - -After configuring, click "Build and run ingestion". This will build out your index, ingest pipeline, and finally bulk ingest your sample documents. The OpenSearch response will be visible under the Inspector panel, as well as any errors if they should occur. - -![build-and-run-ingestion-response](./images/build-and-run-ingestion-response.png) - -You have now completed your ingest flow! Let's move on to configuring search by clicking the "Search pipeline >" button. - -## 8. Configure query - -The query is the starting point for your search flow. Note the index is already set to the one you've configured from the ingest flow _(NOTE: if you skipped ingest, a dropdown will be available to select from an existing index)_. You can select from some preset options as a starting point, and fully configure your query. Continuing with this semantic search example, we will follow this standard vector search pattern: - -1. Provide a query containing the data you need to generate embeddings for - -2. (Step 9 - see below) Configure an ML inference processor to parse the input data, generate vector embedding(s), and create a knn query using the generated vector(s) - -So, we will provide a basic term query with the input data to be vectorized here: - -![edit-query-term](./images/edit-query-term.png) - -## 9. Enrich query request - -Similar to Step 6 - Enrich data, this allows you to enrich the query request by configuring a series of processors - in this case, [search request processors](https://opensearch.org/docs/latest/search-plugins/search-pipelines/search-processors/#search-request-processors). Currently, only the ML inference processor is supported. Continuing with the semantic search example, we will configure an ML processor using the same Titan text embedding model. First, configure the input and output mappings to generate the vector, similar to what was done on the ingest side. Specifically, here we select the query value containing the text we want to embed, "shoes". And, we map the embedding to some field called "vector". - -![enrich-query-request](./images/enrich-query-request.png) - -Next, we need to update our query to use this generated vector embedding. Click "Override query" to open the modal. We can select a knn query preset to start. - -![override-query-with-placeholders](./images/override-query-with-placeholders.png) - -From there, populate any placeholder values, such as "${vector_field}" with the associated vector field you have in your index. In this case, "my_embedding" that we configured on ingest. To use the produced vector in the model output, we can see the list of available model outputs under "Model outputs". There is a utility copy button on the right-hand side to copy the template variable. Inject/paste this variable anywhere in the query to dynamically inject it into the query at runtime. In this example, it has already populated "${vector}" as the "vector" value for the knn query, so there is nothing left to do. The final query should have no placeholders, besides any model output dynamic variables that will be populated at runtime. - -![override-query](./images/override-query.png) - -## 10. Enrich query results - -Similar to Step 9 - Enrich query request, we can configure a series of [search response processors](https://opensearch.org/docs/latest/search-plugins/search-pipelines/search-processors/#search-response-processors) to enrich/transform the returned matching documents. For this particular example, this is not needed. _For more examples using search response processors, see "More examples" below, including RAG & reranking use cases which involve processing & manipulating the search response._ - -![enrich-query-results](./images/enrich-query-results.png) - -## 11. Execute search - -We are finished configuring! Now click "Build and run query" to build out the search pipeline and execute the search request against the index. The final results will pop up in the "Inspector" panel. For this example, we see the top results pertaining to shoes. - -![search-response](./images/search-response.png) - -## 12. Export workflow - -If you are satisfied with the final workflow and the results it is producing, you can click the "Export" button in the header. This will open a modal, showing you the end-to-end [workflow template](https://opensearch.org/docs/latest/automating-configurations/workflow-templates/) containing all of the configuration details for your index, ingest pipeline, and search pipeline, as well as associated UI metadata (for example, certain things like the search request are not concrete resources - we persist them here for ease-of-use if importing this template on the UI). It can be copied in JSON or YAML format. Note: any cluster-specific IDs, such as model IDs, will need to be updated, if importing into a different cluster. - -![export-modal](./images/export-modal.png) - -And that's it! If you have followed all of these steps, you now have a successful semantic search use case, with all of the required resources bundled up into a single template. You can import this template on the UI and rebuild for different clusters, or execute directly using the [Flow Framework Provision API](https://opensearch.org/docs/latest/automating-configurations/api/provision-workflow/). - -# More examples - -## 1. Semantic search - -### ML resources - -Create and deploy a [Bedrock Titan Text Embedding model](https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md#amazon-bedrock-titan-text-embedding). - -### Index - -Ensure the index settings include `index.knn: true`, & mappings have a `knn_vector` field - something like the following: - -``` -"": { - "type": "knn_vector", - "dimension": -} -``` - -### Ingest pipeline - -Single ML inference processor. Map your input text to the `inputText` model input field. Optionally map the output `embedding` to a new document field. - -### Search pipeline - -Single ML inference **search request** processor. Map the query field containing the input text to the `inputText` model input field. Optionally map the output `embedding` to a new field. Override the query to a knn query. For example: - -``` -{ - "_source": { - "excludes": [ - "" - ] - }, - "query": { - "knn": { - "": { - "vector": ${embedding}, - "k": 10 - } - } - } -} -``` - ---- - -## 2. Hybrid search (BM25 + k-NN) - -### ML resources - -Create and deploy a [Bedrock Titan Text Embedding model](https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md#amazon-bedrock-titan-text-embedding). - -### Index - -Ensure the index settings include `index.knn: true`, & mappings have a `knn_vector` field - something like the following: - -``` -"": { - "type": "knn_vector", - "dimension": -} -``` - -### Ingest pipeline - -Single ML inference processor. Map your input text to the `inputText` model input field. Optionally map the output `embedding` to a new document field. - -### Search pipeline - -An ML inference **search request** processor & normalization processor. - -**For the ML inference processor:** - -Map the query field containing the input text to the `inputText` model input field. Optionally map the output `embedding` to a new field. Override the query to a hybrid query. See example below. Ensure to set the `embedding_field`, `text_field`, & `text_field_input`: - -``` -{ - "_source": { - "excludes": [ - "" - ] - }, - "query": { - "hybrid": { - "queries": [ - { - "match": { - "": { - "query": "" - } - } - }, - { - "knn": { - "": { - "vector": ${embedding}, - "k": 10 - } - } - } - ] - } - } -} -``` - -**For the normalization processor:** - -Configure weights for each sub-query. You may refer to the [hybrid search normalization processor example](https://opensearch.org/docs/latest/search-plugins/hybrid-search/#step-4-configure-a-search-pipeline) for reference. - ---- - -## 3. Basic RAG (document summarization) - -NOTE: the below connector blueprint & model interface may change over time. The following example uses a connector blueprint that abstracts a lot of the complexity around the [Claude v1 messages API](https://docs.anthropic.com/en/api/getting-started#examples), exposing only a single `prompt` field as input in the model. An example input may look like the following, with placeholders containing dynamically-fetched results: - -``` -{ - "prompt": "Human: You are a professional data analyst. You are given a list of document results. You will analyze the data and generate a human-readable summary of the results. If you don't know the answer, just say I don't know.\n\n Results: ${parameters.results.toString()}\n\n Human: Please summarize the results.\n\n Assistant:" -} -``` - -### ML resources - -Create and deploy a [Claude 3 Sonnet model](https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md#claude-3-sonnet-hosted-on-amazon-bedrock). - -### Index - -Nothing special needs to be configured. - -### Ingest pipeline - -Nothing special needs to be configured. - -### Search pipeline - -Single ML inference **search response** processor. Choose `Template` as the transformation type for the `prompt` input field. Open up the template configuration by clicking "Configure". Select a preset to start with for your convenience. Then, create an input variable that parses out the list of reviews, something like `review`. Inject the variable into the prompt by copying and pasting it. Click "Run preview" to test that the final transformed prompt with sample dynamic data looks as expected. Click "Save" to save and exit. - ---- - -## 4. Multimodal search (text & image) - -### ML resources - -Create and deploy an [Amazon Titan Multimodal Embedding model](https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md#amazon-bedrock-titan-multimodal-embedding). - -### Index - -Ensure the index settings include `index.knn: true`, & mappings have a `binary` field and `knn_vector` field to persist the image binaries and generated image embeddings, respectively. For example: - -``` -"image_base64": { - "type": "binary" -}, -"image_embedding": { - "type": "knn_vector", - "dimension": -}, -``` - -### Ingest pipeline - -Single ML inference processor. Map your input text field and input image field to the `inputText` and `inputImage` model input fields, respectively. This is assuming both inputs are desired to generate a single embedding; alternatively, only one input (text or image) is technically required. - -Optionally map the output `embedding` to a new document field. - -### Search pipeline - -Single ML inference **search request** processor. Map the input text field and input image field in the query, to the `inputText`and `inputImage` model input fields, respectively (again, assuming both inputs are desired, while only one or the other is required). - -Override the query to a knn query, including the embedding output. For example: - -``` -{ - "_source": { - "excludes": [ - "" - ] - }, - "query": { - "knn": { - "": { - "vector": ${embedding}, - "k": 10 - } - } - } -} -``` - ---- - -## 5. Named entity recognition (NER) - -### ML resources - -Create and deploy an [Amazon Comprehend Entity Detection model](https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md#amazon-comprehend---entity-detection). - -### Index - -Nothing special needs to be configured. - -### Ingest pipeline - -Single ML inference processor. Map your input text field to the `text` model input fields. Suppose you want to persist any found entities with each document. In this case, you can transform the output (an array of Entities), and persist them under an `entities_found` field. You may use the following output_map configuration as a reference. - -``` -"output_map": [ - { - "entities_found": "$.response.Entities[*].Type" - } -], -``` - -### Search pipeline - -Nothing special needs to be configured. - ---- - -## 6. Language detection / classification - -### ML resources - -Create and deploy an [Amazon Comprehend Language Detection model](https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md#amazon-comprehend---language-detection). - -### Index - -Nothing special needs to be configured. - -### Ingest pipeline - -Single ML inference processor. Map your input text field to the `text` model input fields. Suppose you want to persist the most relevant / most likely language for each document. In this case, you can transform the output (an array of Languages), and persist them under a `detected_dominant_language` field. You may use the following output_map configuration as a reference. - -``` -"output_map": [ - { - "detected_dominant_language": "response.Languages[0].LanguageCode" - } -], -``` - -### Search pipeline - -Nothing special needs to be configured. - ---- - -## 7. Reranking results - -Reranking can be achieved in many different ways. Typically, the models will take in at least 2 inputs; one with the original query/input, and one with the data to assign a relevance score to. Some models support batching, and take in a set of results to assign scores to, and return the reranked set of results in one inference call. When implementing in OpenSearch to rerank search results, this gives us 2 common patterns: - -1. Batching enabled: collect all results => pass to single ML processor with batched results => return top n ranked results - -2. Batching disabled: collect all results => { for each result: pass to ML processor and get a new relevancy score } => pass all results with newly-assigned relevancy scores to the rerank processor to handle re-sorting results => return top n ranked results. - -To highlight the rerank processor, the below example follows Pattern 2 (batching disabled). _Note the model used (Cohere Rerank) DOES support batching if you want to follow Pattern 1 using this model._ - -### ML resources - -Create and deploy a [Cohere Rerank model](https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md#cohere-rerank). - -### Index - -Nothing special needs to be configured. - -### Ingest pipeline - -Nothing special needs to be configured. - -### Search pipeline - -ML inference **search response** processor, followed by a rerank **search response** processor. As described by Pattern 2, we will use the ML processor for processing the returned results and generating new scores, and then using the reranker to rerank the results based on these new scores. - -ML processor config: map the document field containing the data you want to use for comparison, and map to the model's `documents` field. Map the original query that you want to use for comparison, and map to the model's `query` field. _Note: for accessing the query JSON, use JSONPath, and prefix with `_request.query`. The UI will be iteratively improved to make this more intuitive_. The final input map should look something like this: - -``` -"input_map": [ - { - "documents": "description", - "query": "$._request.query.term.value" - } -], -``` - -Optionally store the rescored result in the model output under a new field. You can even parse out the score, and only persist that. For example: - -``` -"input_map": [ - { - "new_score": "results[0].relevance_score" - } -], -``` - -Rerank processor config: under target_field, select the model score field - continuing with this example, we set it to `new_score`. - ---- - -## 8. Multimodal search (text or image) with custom CLIP model - -The following example uses a custom CLIP model hosted on Sagemaker, that dynamically takes in text OR image URLs as input, and returns a vector embedding. - -### ML resources - -Create and deploy a [Custom CLIP Multimodal model](https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md#custom-clip-multimodal-embedding). - -### Index - -Ensure the index settings include `index.knn: true`, & mappings have a `knn_vector` field - something like the following: - -``` -"": { - "type": "knn_vector", - "dimension": -} -``` - -### Ingest pipeline - -Single ML inference processor. Map your image field to the `image_url` model input field, OR, your text field to the `text` model input field. This depends on what type of data you are ingesting/persisting in your index. _For example, if building an application that returns relevant images based on text or image input, you would likely persist images, and as such, should map to the `image_url` field accordingly_. - -### Search pipeline - -Single ML inference **search request** processor. Map the input image field OR the input text field in the query, to the `image_url` OR `text` model input fields, respectively. The CLIP model flexibly handles one or the other, so it just depends on the application your are trying to build, and what the expected initial user query looks like. - -Override the query to a knn query, including the embedding output. For example: - -``` -{ - "_source": { - "excludes": [ - "" - ] - }, - "query": { - "knn": { - "": { - "vector": ${embedding}, - "k": 10 - } - } - } -} -``` diff --git a/public/pages/workflow_detail/component_input/component_input.tsx b/public/pages/workflow_detail/component_input/component_input.tsx index 2be8571a..a8e4eb49 100644 --- a/public/pages/workflow_detail/component_input/component_input.tsx +++ b/public/pages/workflow_detail/component_input/component_input.tsx @@ -319,7 +319,7 @@ export function ComponentInput(props: ComponentInputProps) { ) : props.selectedComponentId === COMPONENT_ID.INGEST_DATA ? ( - + ) : props.selectedComponentId === COMPONENT_ID.SEARCH_REQUEST ? ( ) : props.selectedComponentId === COMPONENT_ID.RUN_QUERY ? ( diff --git a/public/pages/workflow_detail/component_input/ingest_inputs/advanced_settings.tsx b/public/pages/workflow_detail/component_input/ingest_inputs/advanced_settings.tsx index 96f164f3..e6e963e0 100644 --- a/public/pages/workflow_detail/component_input/ingest_inputs/advanced_settings.tsx +++ b/public/pages/workflow_detail/component_input/ingest_inputs/advanced_settings.tsx @@ -14,7 +14,7 @@ import { } from '@elastic/eui'; import { JsonField } from '../input_fields'; import { getIn, useFormikContext } from 'formik'; -import { WorkflowFormValues } from '../../../../../common'; +import { WorkflowFormValues, WORKFLOW_TYPE } from '../../../../../common'; import { AppState } from '../../../../store'; import { getEmbeddingField, @@ -28,6 +28,7 @@ import { interface AdvancedSettingsProps { setHasInvalidDimensions: (hasInvalidDimensions: boolean) => void; + workflowType: WORKFLOW_TYPE | undefined; disabled: boolean; } @@ -65,7 +66,7 @@ export function AdvancedSettings(props: AdvancedSettingsProps) { // If a dimension is found, it is a known embedding model. // Ensure the index is configured to be knn-enabled. - if (dimension !== undefined) { + if (dimension !== undefined && props.workflowType !== WORKFLOW_TYPE.SEMANTIC_SEARCH_USING_SPARSE_ENCODERS) { if (!isKnnIndex(curSettings)) { setFieldValue( indexSettingsPath, diff --git a/public/pages/workflow_detail/component_input/ingest_inputs/ingest_data.tsx b/public/pages/workflow_detail/component_input/ingest_inputs/ingest_data.tsx index 7bd449fa..7670307f 100644 --- a/public/pages/workflow_detail/component_input/ingest_inputs/ingest_data.tsx +++ b/public/pages/workflow_detail/component_input/ingest_inputs/ingest_data.tsx @@ -13,10 +13,11 @@ import { } from '@elastic/eui'; import { TextField } from '../input_fields'; import { AdvancedSettings } from './advanced_settings'; -import { KNN_VECTOR_DOCS_LINK } from '../../../../../common'; +import { KNN_VECTOR_DOCS_LINK, WORKFLOW_TYPE } from '../../../../../common'; interface IngestDataProps { disabled: boolean; + workflowType: WORKFLOW_TYPE | undefined; } /** @@ -57,6 +58,7 @@ export function IngestData(props: IngestDataProps) { diff --git a/public/pages/workflow_detail/component_input/input_fields/models_info_popover.tsx b/public/pages/workflow_detail/component_input/input_fields/models_info_popover.tsx index fac5d130..822e0f4b 100644 --- a/public/pages/workflow_detail/component_input/input_fields/models_info_popover.tsx +++ b/public/pages/workflow_detail/component_input/input_fields/models_info_popover.tsx @@ -17,6 +17,8 @@ import { BEDROCK_CLAUDE_3_SONNET_DOCS_LINK, OPENAI_GPT35_DOCS_LINK, DEEPSEEK_CHAT_DOCS_LINK, + OPENSEARCH_NEURAL_SPARSE_DOCS_LINK, + SAGEMAKER_SPARSE_DEPLOY_LINK, } from '../../../../../common'; interface ModelInfoPopoverProps { @@ -55,6 +57,19 @@ export function ModelInfoPopover({ modelCategory }: ModelInfoPopoverProps) { ); + } else if (modelCategory === MODEL_CATEGORY.SPARSE_ENCODER) { + return ( + <> + + OpenSearch Neural Sparse Encoder + + {' (deployable using '} + + SageMaker Connector + + {')'} + + ); } return null; }; @@ -64,6 +79,8 @@ export function ModelInfoPopover({ modelCategory }: ModelInfoPopoverProps) { return 'n embedding'; } else if (modelCategory === MODEL_CATEGORY.LLM) { return ' large language'; + } else if (modelCategory === MODEL_CATEGORY.SPARSE_ENCODER) { + return ' sparse encoder'; } return ''; }; @@ -85,7 +102,7 @@ export function ModelInfoPopover({ modelCategory }: ModelInfoPopoverProps) { >

- To create this workflow, you must select a{getModelTypeText()} model. + To create this workflow, you must select a{getModelTypeText()} model. {getModelLinks() && <> For example: {getModelLinks()}.}

@@ -97,5 +114,5 @@ export function ModelInfoPopover({ modelCategory }: ModelInfoPopoverProps) {

); - + } diff --git a/public/pages/workflows/new_workflow/quick_configure_modal.tsx b/public/pages/workflows/new_workflow/quick_configure_modal.tsx index 6ad1f6b3..0f43db5a 100644 --- a/public/pages/workflows/new_workflow/quick_configure_modal.tsx +++ b/public/pages/workflows/new_workflow/quick_configure_modal.tsx @@ -330,31 +330,56 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) { )} - - setQuickConfigureFields({ - ...quickConfigureFields, - embeddingModelId: modelId, - }) - } - /> - - - )} + label="Sparse encoder" + helpText="The model to generate sparse embeddings." + fullWidth={true} + showError={true} + onModelChange={(modelId) => + setQuickConfigureFields({ + ...quickConfigureFields, + embeddingModelId: modelId, + }) + } + /> + ) : ( + + setQuickConfigureFields({ + ...quickConfigureFields, + embeddingModelId: modelId, + }) + } + /> + )} + + + )} {props.workflow?.ui_metadata?.type !== WORKFLOW_TYPE.CUSTOM && ( <> @@ -446,6 +471,7 @@ function injectQuickConfigureFields( switch (workflow.ui_metadata?.type) { case WORKFLOW_TYPE.SEMANTIC_SEARCH: case WORKFLOW_TYPE.HYBRID_SEARCH: + case WORKFLOW_TYPE.SEMANTIC_SEARCH_USING_SPARSE_ENCODERS: case WORKFLOW_TYPE.MULTIMODAL_SEARCH: { if (!isEmpty(quickConfigureFields) && workflow.ui_metadata?.config) { workflow.ui_metadata.config = updateIngestProcessors( @@ -456,6 +482,7 @@ function injectQuickConfigureFields( ); workflow.ui_metadata.config = updateIndexConfig( workflow.ui_metadata.config, + workflow.ui_metadata?.type, quickConfigureFields ); workflow.ui_metadata.config.search.request.value = injectPlaceholderValues( @@ -482,6 +509,7 @@ function injectQuickConfigureFields( ); workflow.ui_metadata.config = updateIndexConfig( workflow.ui_metadata.config, + workflow.ui_metadata?.type, quickConfigureFields ); workflow.ui_metadata.config.search.request.value = injectPlaceholderValues( @@ -807,6 +835,7 @@ function updateRAGSearchResponseProcessors( // prefill index mappings/settings, if applicable function updateIndexConfig( config: WorkflowConfig, + workflow_type: WORKFLOW_TYPE, fields: QuickConfigureFields ): WorkflowConfig { if ( @@ -835,10 +864,10 @@ function updateIndexConfig( }; } if (fields.vectorField) { - properties[fields.vectorField] = { - type: 'knn_vector', - dimension: fields.embeddingLength || '', - }; + properties[fields.vectorField] = + workflow_type !== WORKFLOW_TYPE.SEMANTIC_SEARCH_USING_SPARSE_ENCODERS + ? { type: 'knn_vector', dimension: fields.embeddingLength || '' } + : { type: 'rank_features' }; } if (fields.labelField) { properties[fields.labelField] = { diff --git a/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx b/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx index e9d3ec63..e702cfc0 100644 --- a/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx +++ b/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx @@ -67,6 +67,7 @@ export function QuickConfigureOptionalFields( let defaultFieldValues = {} as QuickConfigureFields; switch (props.workflowType) { case WORKFLOW_TYPE.SEMANTIC_SEARCH: + case WORKFLOW_TYPE.SEMANTIC_SEARCH_USING_SPARSE_ENCODERS: case WORKFLOW_TYPE.HYBRID_SEARCH: { defaultFieldValues = { textField: DEFAULT_TEXT_FIELD, @@ -193,7 +194,9 @@ export function QuickConfigureOptionalFields( <> @@ -208,7 +211,7 @@ export function QuickConfigureOptionalFields( }} /> - {unknownEmbeddingLength && ( + {unknownEmbeddingLength && props.workflowType !== WORKFLOW_TYPE.SEMANTIC_SEARCH_USING_SPARSE_ENCODERS && ( <> + ( + + {children} + + ), + }} + > + {props.workflow?.description || ''} + + + } footer={ diff --git a/public/pages/workflows/new_workflow/utils.ts b/public/pages/workflows/new_workflow/utils.ts index f0db6030..79f9bc58 100644 --- a/public/pages/workflows/new_workflow/utils.ts +++ b/public/pages/workflows/new_workflow/utils.ts @@ -29,6 +29,7 @@ import { MULTIMODAL_SEARCH_QUERY_NEURAL, HYBRID_SEARCH_QUERY_MATCH_NEURAL, MATCH_QUERY_TEXT, + NEURAL_SPARSE_SEARCH_QUERY, } from '../../../../common'; import { generateId } from '../../../utils'; import semver from 'semver'; @@ -64,6 +65,10 @@ export function enrichPresetWorkflowWithUiMetadata( uiMetadata = fetchHybridSearchWithRAGMetadata(workflowVersion); break; } + case WORKFLOW_TYPE.SEMANTIC_SEARCH_USING_SPARSE_ENCODERS: { + uiMetadata = fetchNeuralSparseSearchMetadata(workflowVersion); + break; + } default: { uiMetadata = fetchEmptyMetadata(); break; @@ -179,6 +184,26 @@ export function fetchSemanticSearchMetadata(version: string): UIState { return baseState; } +export function fetchNeuralSparseSearchMetadata(version: string): UIState { + let baseState = fetchEmptyMetadata(); + baseState.type = WORKFLOW_TYPE.SEMANTIC_SEARCH_USING_SPARSE_ENCODERS; + + baseState.config.ingest.enrich.processors = [new MLIngestProcessor().toObj()]; + + baseState.config.ingest.index.name.value = generateId('neural_sparse_index', 6); + baseState.config.ingest.index.settings.value = customStringify({}); + + baseState.config.search.request.value = customStringify(MATCH_QUERY_TEXT); + baseState.config.search.enrichRequest.processors = [ + injectQueryTemplateInProcessor( + new MLSearchRequestProcessor().toObj(), + NEURAL_SPARSE_SEARCH_QUERY + ), + ]; + + return baseState; +} + export function fetchMultimodalSearchMetadata(version: string): UIState { const isPreV219 = semver.lt(version, MINIMUM_FULL_SUPPORTED_VERSION); let baseState = fetchEmptyMetadata(); diff --git a/public/utils/utils.tsx b/public/utils/utils.tsx index f131ce0a..f2b00945 100644 --- a/public/utils/utils.tsx +++ b/public/utils/utils.tsx @@ -32,6 +32,7 @@ import { BEDROCK_CONFIGS, COHERE_CONFIGS, OPENAI_CONFIGS, + NEURAL_SPARSE_CONFIGS, customStringify, NO_TRANSFORMATION, TRANSFORM_TYPE, @@ -779,7 +780,9 @@ export function getEmbeddingModelDimensions( // @ts-ignore OPENAI_CONFIGS[connector.parameters?.model]?.dimension || // @ts-ignore - BEDROCK_CONFIGS[connector.parameters?.model]?.dimension + BEDROCK_CONFIGS[connector.parameters?.model]?.dimension || + // @ts-ignore + NEURAL_SPARSE_CONFIGS[connector.parameters?.model]?.dimension ); } else { return undefined; @@ -841,7 +844,9 @@ function getEmbeddingFieldFromConnector( // @ts-ignore OPENAI_CONFIGS[connector?.parameters?.model]?.fieldName || // @ts-ignore - BEDROCK_CONFIGS[connector?.parameters?.model]?.fieldName + BEDROCK_CONFIGS[connector?.parameters?.model]?.fieldName || + // @ts-ignore + NEURAL_SPARSE_CONFIGS[connector?.parameters?.model]?.fieldName ); } else { return undefined; diff --git a/server/resources/templates/semantic_search_using_sparse_encoders.json b/server/resources/templates/semantic_search_using_sparse_encoders.json new file mode 100644 index 00000000..e5e6da9d --- /dev/null +++ b/server/resources/templates/semantic_search_using_sparse_encoders.json @@ -0,0 +1,14 @@ +{ + "name": "Semantic Search using Sparse Encoders", + "description": "Build a flow that allows you to search by text and rank results by semantic similarity, to improve search quality. This template uses [Neural Sparse](https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-v2-distill), a sparse encoder, to convert text into sparse vectors. This implementation is potentially more cost efficient than the dense (k-NN) vectors for smaller indexes (< 10M documents).", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.19.0", + "3.0.0" + ] + }, + "ui_metadata": { + "type": "Semantic Search using Sparse Encoders" + } +} \ No newline at end of file diff --git a/yarn.lock b/yarn.lock index 6fca7a37..8972f3bf 100644 --- a/yarn.lock +++ b/yarn.lock @@ -68,11 +68,6 @@ classcat "^5.0.3" zustand "^4.4.1" -"@testing-library/user-event@^14.5.2": - version "14.5.2" - resolved "https://registry.yarnpkg.com/@testing-library/user-event/-/user-event-14.5.2.tgz#db7257d727c891905947bd1c1a99da20e03c2ebd" - integrity sha512-YAh82Wh4TIrxYLmfGcixwD18oIjyC1pFQC2Y01F2lzV2HTMiYrI0nze0FD0ocB//CKS/7jIUgae+adPqxK5yCQ== - "@types/d3-array@*": version "3.0.8" resolved "https://registry.yarnpkg.com/@types/d3-array/-/d3-array-3.0.8.tgz#a5d0687a12b48142c6f124d5e3796054e91bcea5"