Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/)
## [Unreleased 3.0](https://github.com/opensearch-project/anomaly-detection/compare/2.x...HEAD)
### Features
### Enhancements
#### Added workflow preset for Semantic Search using Sparse Encoders (https://github.com/opensearch-project/dashboards-flow-framework/pull/742)
### Bug Fixes
### Infrastructure
### Documentation
Expand Down
57 changes: 57 additions & 0 deletions common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,38 @@ export const OPENAI_CONFIGS = {
} as RemoteEmbeddingModelConfig,
};

// Neural Sparse
export const NEURAL_SPARSE_CONFIGS = {
[`opensearch-neural-sparse-encoding-v2-distill`]: {
dimension: 30522,
fieldName: 'passage_embedding',
} as RemoteEmbeddingModelConfig,
[`opensearch-neural-sparse-encoding-v1`]: {
dimension: 30522,
fieldName: 'passage_embedding',
} as RemoteEmbeddingModelConfig,
[`opensearch-neural-sparse-encoding-multilingual-v1`]: {
dimension: 105879,
fieldName: 'passage_embedding',
} as RemoteEmbeddingModelConfig,
[`opensearch-neural-sparse-encoding-doc-v2-mini`]: {
dimension: 30522,
fieldName: 'passage_embedding',
} as RemoteEmbeddingModelConfig,
[`opensearch-neural-sparse-encoding-doc-v3-distill`]: {
dimension: 30522,
fieldName: 'passage_embedding',
} as RemoteEmbeddingModelConfig,
[`opensearch-neural-sparse-encoding-doc-v1`]: {
dimension: 30522,
fieldName: 'passage_embedding',
} as RemoteEmbeddingModelConfig,
[`opensearch-neural-sparse-encoding-doc-v2-distill`]: {
dimension: 30522,
fieldName: 'passage_embedding',
} as RemoteEmbeddingModelConfig,
};

/**
* Various constants pertaining to Workflow configs
*/
Expand All @@ -173,6 +205,7 @@ export enum WORKFLOW_TYPE {
HYBRID_SEARCH = 'Hybrid Search',
VECTOR_SEARCH_WITH_RAG = 'RAG with Vector Retrieval',
HYBRID_SEARCH_WITH_RAG = 'RAG with Hybrid Search',
SEMANTIC_SEARCH_USING_SPARSE_ENCODERS = 'Semantic Search using Sparse Encoders',
CUSTOM = 'Custom Search',
UNKNOWN = 'Unknown',
}
Expand Down Expand Up @@ -211,6 +244,7 @@ export enum MODEL_TYPE {
export enum MODEL_CATEGORY {
EMBEDDING = 'EMBEDDING',
LLM = 'LLM',
SPARSE_ENCODER = 'SPARSE_ENCODER',
}

/**
Expand Down Expand Up @@ -293,6 +327,13 @@ export const COHERE_EMBEDDING_MODEL_DOCS_LINK =
export const BEDROCK_TITAN_EMBEDDING_DOCS_LINK =
'https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md#amazon-bedrock-titan-text-embedding';

// Sparse Encoder Models Documentation Links
export const OPENSEARCH_NEURAL_SPARSE_DOCS_LINK =
'https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-v2-distill';

export const SAGEMAKER_SPARSE_DEPLOY_LINK =
'https://github.com/zhichao-aws/opensearch-neural-sparse-sample/tree/main/examples/deploy_on_sagemaker';

// ML Models setup Documentation Link
export const ML_MODELS_SETUP_DOCS_LINK =
'https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md';
Expand Down Expand Up @@ -595,6 +636,18 @@ export const HYBRID_SEARCH_QUERY_MATCH_TERM = {
},
},
};
export const NEURAL_SPARSE_SEARCH_QUERY = {
_source: {
excludes: [VECTOR_FIELD_PATTERN],
},
query: {
neural_sparse: {
[VECTOR_FIELD_PATTERN]: {
query_tokens: VECTOR_PATTERN,
},
},
},
};

export const QUERY_PRESETS = [
{
Expand Down Expand Up @@ -649,6 +702,10 @@ export const QUERY_PRESETS = [
name: WORKFLOW_TYPE.MULTIMODAL_SEARCH,
query: customStringify(MULTIMODAL_SEARCH_QUERY_BOOL),
},
{
name: 'Neural Sparse Search Query',
query: customStringify(NEURAL_SPARSE_SEARCH_QUERY),
},
{
name: 'Semantic search (neural query)',
query: customStringify(SEMANTIC_SEARCH_QUERY_NEURAL),
Expand Down
1 change: 1 addition & 0 deletions common/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export function isVectorSearchUseCase(workflowType?: WORKFLOW_TYPE): boolean {
WORKFLOW_TYPE.HYBRID_SEARCH,
WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG,
WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG,
WORKFLOW_TYPE.SEMANTIC_SEARCH_USING_SPARSE_ENCODERS,
].includes(workflowType)
);
}
Expand Down
117 changes: 117 additions & 0 deletions documentation/models.md
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,123 @@ POST /_plugins/_ml/models/_register
}
```

### Neural Sparse Encoding

Deploy a sparse encoding model from the Hugging Face Model Hub to a SageMaker real-time inference endpoint using this [guide](https://github.com/zhichao-aws/opensearch-neural-sparse-sample/tree/main/examples/deploy_on_sagemaker).

Connector:

```
POST /_plugins/_ml/connectors/_create
{
"name": "Neural Sparse Encoding",
"description": "Test connector for Sagemaker model",
"version": 1,
"protocol": "aws_sigv4",
"credential": {
"access_key": "",
"secret_key": "",
"session_token": ""
},
"parameters": {
"region": "us-east-1",
"service_name": "sagemaker",
"model": "opensearch-neural-sparse-encoding-v2-distill"
},
"actions": [
{
"action_type": "predict",
"method": "POST",
"headers": {
"content-type": "application/json"
},
"url": "https://runtime.sagemaker.us-east-1.amazonaws.com/endpoints/xxxx/invocations",
"request_body": "[\"${parameters.text_doc}\"]",
"post_process_function": "String escape(def input) { if (input instanceof String) { if (input.contains('\\\\')) { input = input.replace('\\\\', '\\\\\\\\'); } if (input.contains('\"')) { input = input.replace('\"', '\\\\\"'); } if (input.contains('\r')) { input = input.replace('\r', '\\\\r'); } if (input.contains('\t')) { input = input.replace('\t', '\\\\t'); } if (input.contains('\n')) { input = input.replace('\n', '\\\\n'); } if (input.contains('\b')) { input = input.replace('\b', '\\\\b'); } if (input.contains('\f')) { input = input.replace('\f', '\\\\f'); } return input; } return input.toString(); } if (params.result == null || params.result.length == 0) { return '{\"dataAsMap\":{\"error\":\"no response error\"}}'; } String response = params.result[0].toString(); response = response.substring(1, response.length() - 1).replace('=', '\":').replace(', ', ',\"'); return '{\"dataAsMap\":{\"response\":{\"' + response + '}}}';"
}
]
}
```

Model:

```
POST /_plugins/_ml/models/_register
{ "name": "Neural Sparse Encoding Model",
"function_name": "remote",
"version": "1.0.0",
"connector_id": "<connector-id>",
"description": "Test connector for Sagemaker model",
"interface": {
"input": {
"type": "object",
"properties": {
"parameters": {
"type": "object",
"properties": {
"text_doc": {
"type": "string"
}
},
"additionalProperties": true,
"required": [
"text_doc"
]
}
}
},
"output": {
"type": "object",
"properties": {
"inference_results": {
"type": "array",
"items": {
"type": "object",
"properties": {
"output": {
"type": "array",
"items": {
"type": "object",
"properties": {
"dataAsMap": {
"type": "object",
"properties": {
"response": {
"type": "object",
"additionalProperties": {
"type": "number"
}
}
},
"required": [
"response"
]
}
},
"required": [
"dataAsMap"
]
}
},
"status_code": {
"type": "integer"
}
},
"required": [
"output",
"status_code"
]
}
}
},
"required": [
"inference_results"
]
}
}
}
```

## Generative models

### Claude 3 Sonnet (hosted on Amazon Bedrock)
Expand Down
43 changes: 43 additions & 0 deletions documentation/tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -491,3 +491,46 @@ Override the query to a knn query, including the embedding output. For example:
}
}
```

---

## 9. Neural sparse search

### ML resources
Create and deploy a [Neural Sparse Encoding model](https://github.com/opensearch-project/dashboards-flow-framework/blob/main/documentation/models.md#neural-sparse-encoding).

### Index

Ensure the index mappings have a `rank_features` field - something like the following:

```
"<embedding_field_name>": {
"type": "rank_features"
}
```

### Ingest pipeline

Single ML inference processor. Map your input text to the `text_doc` model input field. Optionally map the output `response` to a new document field. Transform the response if needed using JSONPath expression.


### Search pipeline

Single ML inference **search request** processor. Map the query field containing the input text to the `text_doc` model input field. Optionally map the output `response` to a new field. Transform the response if needed using JSONPath expression. Override the query to a neural sparse query. For example:

```
{
"_source": {
"excludes": [
"<embedding_field>"
]
},
"query": {
"neural_sparse": {
"<embedding_field>": {
"query_tokens": ${response},
}
}
}
}
```
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ export function ComponentInput(props: ComponentInputProps) {
</EuiFlexItem>
</EuiFlexGroup>
) : props.selectedComponentId === COMPONENT_ID.INGEST_DATA ? (
<IngestData disabled={props.readonly} />
<IngestData disabled={props.readonly} workflowType={props.workflow?.ui_metadata?.type} />
) : props.selectedComponentId === COMPONENT_ID.SEARCH_REQUEST ? (
<ConfigureSearchRequest disabled={props.readonly} />
) : props.selectedComponentId === COMPONENT_ID.RUN_QUERY ? (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import {
} from '@elastic/eui';
import { JsonField } from '../input_fields';
import { getIn, useFormikContext } from 'formik';
import { WorkflowFormValues } from '../../../../../common';
import { WorkflowFormValues, WORKFLOW_TYPE } from '../../../../../common';
import { AppState } from '../../../../store';
import {
getEmbeddingField,
Expand All @@ -28,6 +28,7 @@ import {

interface AdvancedSettingsProps {
setHasInvalidDimensions: (hasInvalidDimensions: boolean) => void;
workflowType: WORKFLOW_TYPE | undefined;
disabled: boolean;
}

Expand Down Expand Up @@ -65,7 +66,7 @@ export function AdvancedSettings(props: AdvancedSettingsProps) {

// If a dimension is found, it is a known embedding model.
// Ensure the index is configured to be knn-enabled.
if (dimension !== undefined) {
if (dimension !== undefined && props.workflowType !== WORKFLOW_TYPE.SEMANTIC_SEARCH_USING_SPARSE_ENCODERS) {
if (!isKnnIndex(curSettings)) {
setFieldValue(
indexSettingsPath,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ import {
} from '@elastic/eui';
import { TextField } from '../input_fields';
import { AdvancedSettings } from './advanced_settings';
import { KNN_VECTOR_DOCS_LINK } from '../../../../../common';
import { KNN_VECTOR_DOCS_LINK, WORKFLOW_TYPE } from '../../../../../common';

interface IngestDataProps {
disabled: boolean;
workflowType: WORKFLOW_TYPE | undefined;
}

/**
Expand Down Expand Up @@ -57,6 +58,7 @@ export function IngestData(props: IngestDataProps) {
<EuiFlexItem>
<AdvancedSettings
setHasInvalidDimensions={setHasInvalidDimensions}
workflowType={props.workflowType}
disabled={props.disabled}
/>
</EuiFlexItem>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import {
BEDROCK_CLAUDE_3_SONNET_DOCS_LINK,
OPENAI_GPT35_DOCS_LINK,
DEEPSEEK_CHAT_DOCS_LINK,
OPENSEARCH_NEURAL_SPARSE_DOCS_LINK,
SAGEMAKER_SPARSE_DEPLOY_LINK,
} from '../../../../../common';

interface ModelInfoPopoverProps {
Expand Down Expand Up @@ -55,6 +57,19 @@ export function ModelInfoPopover({ modelCategory }: ModelInfoPopoverProps) {
</EuiLink>
</>
);
} else if (modelCategory === MODEL_CATEGORY.SPARSE_ENCODER) {
return (
<>
<EuiLink external href={OPENSEARCH_NEURAL_SPARSE_DOCS_LINK} target="_blank">
OpenSearch Neural Sparse Encoder
</EuiLink>
{' (deployable using '}
<EuiLink external href={SAGEMAKER_SPARSE_DEPLOY_LINK} target="_blank">
SageMaker Connector
</EuiLink>
{')'}
</>
);
}
return null;
};
Expand All @@ -64,6 +79,8 @@ export function ModelInfoPopover({ modelCategory }: ModelInfoPopoverProps) {
return 'n embedding';
} else if (modelCategory === MODEL_CATEGORY.LLM) {
return ' large language';
} else if (modelCategory === MODEL_CATEGORY.SPARSE_ENCODER) {
return ' sparse encoder';
}
return '';
};
Expand All @@ -85,7 +102,7 @@ export function ModelInfoPopover({ modelCategory }: ModelInfoPopoverProps) {
>
<div style={{ padding: '12px', width: '400px' }}>
<p style={{ margin: '0', lineHeight: '1.5' }}>
To create this workflow, you must select a{getModelTypeText()} model.
To create this workflow, you must select a{getModelTypeText()} model.
{getModelLinks() && <> For example: {getModelLinks()}.</>}
</p>
<p style={{ margin: '24px 0 0 0', lineHeight: '1.5' }}>
Expand All @@ -97,5 +114,5 @@ export function ModelInfoPopover({ modelCategory }: ModelInfoPopoverProps) {
</div>
</EuiPopover>
);

}
Loading
Loading