Skip to content

Commit c0f4287

Browse files
committed
Remove validation on text and image field for text_image_embedding processor (#1230)
* Remove validation on text and image field for text_image_embedding processor Signed-off-by: Weijia Zhao <[email protected]> * Add Changelog Signed-off-by: Weijia Zhao <[email protected]> --------- Signed-off-by: Weijia Zhao <[email protected]> (cherry picked from commit 8506daa)
1 parent f73cf85 commit c0f4287

File tree

4 files changed

+20
-27
lines changed

4 files changed

+20
-27
lines changed

CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1616
## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.19...2.x)
1717
### Features
1818
### Enhancements
19-
### Bug Fixes
19+
- Remove validations for unmapped fields (text and image) in TextImageEmbeddingProcessor ([#1230](https://github.com/opensearch-project/neural-search/pull/1230))
2020
### Infrastructure
2121
### Documentation
2222
### Maintenance

src/main/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessor.java

-17
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,13 @@
1717
import org.opensearch.cluster.service.ClusterService;
1818
import org.opensearch.core.action.ActionListener;
1919
import org.opensearch.env.Environment;
20-
import org.opensearch.index.mapper.IndexFieldMapper;
2120
import org.opensearch.ingest.AbstractProcessor;
2221
import org.opensearch.ingest.IngestDocument;
2322
import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor;
2423

2524
import com.google.common.annotations.VisibleForTesting;
2625

2726
import lombok.extern.log4j.Log4j2;
28-
import org.opensearch.neuralsearch.util.ProcessorDocumentUtils;
2927

3028
/**
3129
* This processor is used for user input data text and image embedding processing, model_id can be used to indicate which model user use,
@@ -107,7 +105,6 @@ public IngestDocument execute(IngestDocument ingestDocument) {
107105
@Override
108106
public void execute(final IngestDocument ingestDocument, final BiConsumer<IngestDocument, Exception> handler) {
109107
try {
110-
validateEmbeddingFieldsValue(ingestDocument);
111108
Map<String, String> knnMap = buildMapWithKnnKeyAndOriginalValue(ingestDocument);
112109
Map<String, String> inferenceMap = createInferences(knnMap);
113110
if (inferenceMap.isEmpty()) {
@@ -170,20 +167,6 @@ Map<String, Object> buildTextEmbeddingResult(final String knnKey, List<Float> mo
170167
return result;
171168
}
172169

173-
private void validateEmbeddingFieldsValue(final IngestDocument ingestDocument) {
174-
Map<String, Object> sourceAndMetadataMap = ingestDocument.getSourceAndMetadata();
175-
String indexName = sourceAndMetadataMap.get(IndexFieldMapper.NAME).toString();
176-
ProcessorDocumentUtils.validateMapTypeValue(
177-
FIELD_MAP_FIELD,
178-
sourceAndMetadataMap,
179-
fieldMap,
180-
indexName,
181-
clusterService,
182-
environment,
183-
false
184-
);
185-
}
186-
187170
@Override
188171
public String getType() {
189172
return TYPE;

src/test/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessorIT.java

+16-8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import java.nio.file.Path;
99

1010
import org.junit.Before;
11+
import org.opensearch.client.ResponseException;
1112
import org.opensearch.neuralsearch.BaseNeuralSearchIT;
1213

1314
/**
@@ -21,19 +22,15 @@ public class TextImageEmbeddingProcessorIT extends BaseNeuralSearchIT {
2122
private static final String INGEST_DOCUMENT = "{\n"
2223
+ " \"title\": \"This is a good day\",\n"
2324
+ " \"description\": \"daily logging\",\n"
24-
+ " \"passage_text\": \"A very nice day today\",\n"
25+
+ " \"passage_text\": \"passage_text_value\",\n"
26+
+ " \"text\": \"\",\n"
27+
+ " \"image\": null,\n"
2528
+ " \"favorites\": {\n"
2629
+ " \"game\": \"overwatch\",\n"
2730
+ " \"movie\": null\n"
2831
+ " }\n"
2932
+ "}\n";
3033

31-
private static final String INGEST_DOCUMENT_UNMAPPED_FIELDS = "{\n"
32-
+ " \"title\": \"This is a good day\",\n"
33-
+ " \"description\": \"daily logging\",\n"
34-
+ " \"some_random_field\": \"Today is a sunny weather\"\n"
35-
+ "}\n";
36-
3734
@Before
3835
public void setUp() throws Exception {
3936
super.setUp();
@@ -51,13 +48,24 @@ public void testEmbeddingProcessor_whenIngestingDocumentWithOrWithoutSourceMatch
5148
ingestDocument(INDEX_NAME, INGEST_DOCUMENT);
5249
assertEquals(1, getDocCount(INDEX_NAME));
5350
// verify doc without mapping
54-
ingestDocument(INDEX_NAME, INGEST_DOCUMENT_UNMAPPED_FIELDS);
51+
String documentWithUnmappedFields;
52+
documentWithUnmappedFields = INGEST_DOCUMENT.replace("passage_text", "random_field_1");
53+
ingestDocument(INDEX_NAME, documentWithUnmappedFields);
5554
assertEquals(2, getDocCount(INDEX_NAME));
5655
} finally {
5756
wipeOfTestResources(INDEX_NAME, PIPELINE_NAME, modelId, null);
5857
}
5958
}
6059

60+
public void testEmbeddingProcessor_whenIngestingDocumentWithNullMappingValue_thenThrowException() throws Exception {
61+
String modelId = uploadModel();
62+
loadModel(modelId);
63+
createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.TEXT_IMAGE_EMBEDDING);
64+
createIndexWithPipeline(INDEX_NAME, "IndexMappings.json", PIPELINE_NAME);
65+
66+
expectThrows(ResponseException.class, () -> ingestDocument(INDEX_NAME, INGEST_DOCUMENT.replace("\"passage_text_value\"", "null")));
67+
}
68+
6169
private String uploadModel() throws Exception {
6270
String requestBody = Files.readString(Path.of(classLoader.getResource("processor/UploadModelRequestBody.json").toURI()));
6371
return registerModelGroupAndUploadModel(requestBody);

src/test/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessorTests.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,9 @@ public void testExecute_successful() {
185185
sourceAndMetadata.put(IndexFieldMapper.NAME, "my_index");
186186
sourceAndMetadata.put("key1", "value1");
187187
sourceAndMetadata.put("my_text_field", "value2");
188-
sourceAndMetadata.put("key3", "value3");
188+
sourceAndMetadata.put("text", "");
189+
sourceAndMetadata.put("image", null);
190+
sourceAndMetadata.put("key5", Map.of("inner_field", "innerValue1"));
189191
sourceAndMetadata.put("image_field", "base64_of_image_1234567890");
190192
IngestDocument ingestDocument = new IngestDocument(sourceAndMetadata, new HashMap<>());
191193
TextImageEmbeddingProcessor processor = createInstance();

0 commit comments

Comments
 (0)