diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d3242a23..174a5dbec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Enhancements ### Bug Fixes +- Remove validations for unmapped fields (text and image) in TextImageEmbeddingProcessor ([#1230](https://github.com/opensearch-project/neural-search/pull/1230)) ### Infrastructure diff --git a/src/main/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessor.java index d675f6f04..9923ff1d4 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessor.java @@ -17,7 +17,6 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.core.action.ActionListener; import org.opensearch.env.Environment; -import org.opensearch.index.mapper.IndexFieldMapper; import org.opensearch.ingest.AbstractProcessor; import org.opensearch.ingest.IngestDocument; import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor; @@ -25,7 +24,6 @@ import com.google.common.annotations.VisibleForTesting; import lombok.extern.log4j.Log4j2; -import org.opensearch.neuralsearch.util.ProcessorDocumentUtils; /** * This processor is used for user input data text and image embedding processing, model_id can be used to indicate which model user use, @@ -107,7 +105,6 @@ public IngestDocument execute(IngestDocument ingestDocument) { @Override public void execute(final IngestDocument ingestDocument, final BiConsumer handler) { try { - validateEmbeddingFieldsValue(ingestDocument); Map knnMap = buildMapWithKnnKeyAndOriginalValue(ingestDocument); Map inferenceMap = createInferences(knnMap); if (inferenceMap.isEmpty()) { @@ -173,20 +170,6 @@ Map buildTextEmbeddingResult(final String knnKey, List m return result; } - private void validateEmbeddingFieldsValue(final IngestDocument ingestDocument) { - Map sourceAndMetadataMap = ingestDocument.getSourceAndMetadata(); - String indexName = sourceAndMetadataMap.get(IndexFieldMapper.NAME).toString(); - ProcessorDocumentUtils.validateMapTypeValue( - FIELD_MAP_FIELD, - sourceAndMetadataMap, - fieldMap, - indexName, - clusterService, - environment, - false - ); - } - @Override public String getType() { return TYPE; diff --git a/src/test/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessorIT.java b/src/test/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessorIT.java index 667591789..313c5cb07 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessorIT.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessorIT.java @@ -8,6 +8,7 @@ import java.nio.file.Path; import org.junit.Before; +import org.opensearch.client.ResponseException; import org.opensearch.neuralsearch.BaseNeuralSearchIT; /** @@ -21,19 +22,15 @@ public class TextImageEmbeddingProcessorIT extends BaseNeuralSearchIT { private static final String INGEST_DOCUMENT = "{\n" + " \"title\": \"This is a good day\",\n" + " \"description\": \"daily logging\",\n" - + " \"passage_text\": \"A very nice day today\",\n" + + " \"passage_text\": \"passage_text_value\",\n" + + " \"text\": \"\",\n" + + " \"image\": null,\n" + " \"favorites\": {\n" + " \"game\": \"overwatch\",\n" + " \"movie\": null\n" + " }\n" + "}\n"; - private static final String INGEST_DOCUMENT_UNMAPPED_FIELDS = "{\n" - + " \"title\": \"This is a good day\",\n" - + " \"description\": \"daily logging\",\n" - + " \"some_random_field\": \"Today is a sunny weather\"\n" - + "}\n"; - @Before public void setUp() throws Exception { super.setUp(); @@ -49,10 +46,21 @@ public void testEmbeddingProcessor_whenIngestingDocumentWithOrWithoutSourceMatch ingestDocument(INDEX_NAME, INGEST_DOCUMENT); assertEquals(1, getDocCount(INDEX_NAME)); // verify doc without mapping - ingestDocument(INDEX_NAME, INGEST_DOCUMENT_UNMAPPED_FIELDS); + String documentWithUnmappedFields; + documentWithUnmappedFields = INGEST_DOCUMENT.replace("passage_text", "random_field_1"); + ingestDocument(INDEX_NAME, documentWithUnmappedFields); assertEquals(2, getDocCount(INDEX_NAME)); } + public void testEmbeddingProcessor_whenIngestingDocumentWithNullMappingValue_thenThrowException() throws Exception { + String modelId = uploadModel(); + loadModel(modelId); + createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.TEXT_IMAGE_EMBEDDING); + createIndexWithPipeline(INDEX_NAME, "IndexMappings.json", PIPELINE_NAME); + + expectThrows(ResponseException.class, () -> ingestDocument(INDEX_NAME, INGEST_DOCUMENT.replace("\"passage_text_value\"", "null"))); + } + private String uploadModel() throws Exception { String requestBody = Files.readString(Path.of(classLoader.getResource("processor/UploadModelRequestBody.json").toURI())); return registerModelGroupAndUploadModel(requestBody); diff --git a/src/test/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessorTests.java b/src/test/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessorTests.java index 90cc2e8fc..e6306523e 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessorTests.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessorTests.java @@ -185,7 +185,9 @@ public void testExecute_successful() { sourceAndMetadata.put(IndexFieldMapper.NAME, "my_index"); sourceAndMetadata.put("key1", "value1"); sourceAndMetadata.put("my_text_field", "value2"); - sourceAndMetadata.put("key3", "value3"); + sourceAndMetadata.put("text", ""); + sourceAndMetadata.put("image", null); + sourceAndMetadata.put("key5", Map.of("inner_field", "innerValue1")); sourceAndMetadata.put("image_field", "base64_of_image_1234567890"); IngestDocument ingestDocument = new IngestDocument(sourceAndMetadata, new HashMap<>()); TextImageEmbeddingProcessor processor = createInstance();