opensearch-project · junqiu-lei · Mar 20, 2025 · Mar 13, 2025 · Mar 14, 2025 · Mar 19, 2025
@@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Lower bound for min-max normalization technique in hybrid query ([#1195](https://github.com/opensearch-project/neural-search/pull/1195))
 ### Enhancements
 ### Bug Fixes
+- Remove validations for unmapped fields (text and image) in TextImageEmbeddingProcessor ([#1230](https://github.com/opensearch-project/neural-search/pull/1230))
 ### Infrastructure
 ### Documentation
 ### Maintenance

@@ -17,15 +17,13 @@
 import org.opensearch.cluster.service.ClusterService;
 import org.opensearch.core.action.ActionListener;
 import org.opensearch.env.Environment;
-import org.opensearch.index.mapper.IndexFieldMapper;
 import org.opensearch.ingest.AbstractProcessor;
 import org.opensearch.ingest.IngestDocument;
 import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor;
 
 import com.google.common.annotations.VisibleForTesting;
 
 import lombok.extern.log4j.Log4j2;
-import org.opensearch.neuralsearch.util.ProcessorDocumentUtils;
 
 /**
  * This processor is used for user input data text and image embedding processing, model_id can be used to indicate which model user use,
@@ -107,7 +105,6 @@ public IngestDocument execute(IngestDocument ingestDocument) {
     @Override
     public void execute(final IngestDocument ingestDocument, final BiConsumer<IngestDocument, Exception> handler) {
         try {
-            validateEmbeddingFieldsValue(ingestDocument);
             Map<String, String> knnMap = buildMapWithKnnKeyAndOriginalValue(ingestDocument);
             Map<String, String> inferenceMap = createInferences(knnMap);
             if (inferenceMap.isEmpty()) {
@@ -173,20 +170,6 @@ Map<String, Object> buildTextEmbeddingResult(final String knnKey, List<Number> m
         return result;
     }
 
-    private void validateEmbeddingFieldsValue(final IngestDocument ingestDocument) {
-        Map<String, Object> sourceAndMetadataMap = ingestDocument.getSourceAndMetadata();
-        String indexName = sourceAndMetadataMap.get(IndexFieldMapper.NAME).toString();
-        ProcessorDocumentUtils.validateMapTypeValue(
-            FIELD_MAP_FIELD,
-            sourceAndMetadataMap,
-            fieldMap,
-            indexName,
-            clusterService,
-            environment,
-            false
-        );
-    }
-
     @Override
     public String getType() {
         return TYPE;

@@ -8,6 +8,7 @@
 import java.nio.file.Path;
 
 import org.junit.Before;
+import org.opensearch.client.ResponseException;
 import org.opensearch.neuralsearch.BaseNeuralSearchIT;
 
 /**
@@ -21,19 +22,15 @@ public class TextImageEmbeddingProcessorIT extends BaseNeuralSearchIT {
     private static final String INGEST_DOCUMENT = "{\n"
         + "  \"title\": \"This is a good day\",\n"
         + "  \"description\": \"daily logging\",\n"
-        + "  \"passage_text\": \"A very nice day today\",\n"
+        + "  \"passage_text\": \"passage_text_value\",\n"
+        + "  \"text\": \"\",\n"
+        + "  \"image\": null,\n"
         + "  \"favorites\": {\n"
         + "    \"game\": \"overwatch\",\n"
         + "    \"movie\": null\n"
         + "  }\n"
         + "}\n";
 
-    private static final String INGEST_DOCUMENT_UNMAPPED_FIELDS = "{\n"
-        + "  \"title\": \"This is a good day\",\n"
-        + "  \"description\": \"daily logging\",\n"
-        + "  \"some_random_field\": \"Today is a sunny weather\"\n"
-        + "}\n";
-
     @Before
     public void setUp() throws Exception {
         super.setUp();
@@ -49,10 +46,21 @@ public void testEmbeddingProcessor_whenIngestingDocumentWithOrWithoutSourceMatch
         ingestDocument(INDEX_NAME, INGEST_DOCUMENT);
         assertEquals(1, getDocCount(INDEX_NAME));
         // verify doc without mapping
-        ingestDocument(INDEX_NAME, INGEST_DOCUMENT_UNMAPPED_FIELDS);
+        String documentWithUnmappedFields;
+        documentWithUnmappedFields = INGEST_DOCUMENT.replace("passage_text", "random_field_1");
+        ingestDocument(INDEX_NAME, documentWithUnmappedFields);
         assertEquals(2, getDocCount(INDEX_NAME));
     }
 
+    public void testEmbeddingProcessor_whenIngestingDocumentWithNullMappingValue_thenThrowException() throws Exception {
+        String modelId = uploadModel();
+        loadModel(modelId);
+        createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.TEXT_IMAGE_EMBEDDING);
+        createIndexWithPipeline(INDEX_NAME, "IndexMappings.json", PIPELINE_NAME);
+
+        expectThrows(ResponseException.class, () -> ingestDocument(INDEX_NAME, INGEST_DOCUMENT.replace("\"passage_text_value\"", "null")));
+    }
+
     private String uploadModel() throws Exception {
         String requestBody = Files.readString(Path.of(classLoader.getResource("processor/UploadModelRequestBody.json").toURI()));
         return registerModelGroupAndUploadModel(requestBody);

@@ -185,7 +185,9 @@ public void testExecute_successful() {
         sourceAndMetadata.put(IndexFieldMapper.NAME, "my_index");
         sourceAndMetadata.put("key1", "value1");
         sourceAndMetadata.put("my_text_field", "value2");
-        sourceAndMetadata.put("key3", "value3");
+        sourceAndMetadata.put("text", "");
+        sourceAndMetadata.put("image", null);
+        sourceAndMetadata.put("key5", Map.of("inner_field", "innerValue1"));
         sourceAndMetadata.put("image_field", "base64_of_image_1234567890");
         IngestDocument ingestDocument = new IngestDocument(sourceAndMetadata, new HashMap<>());
         TextImageEmbeddingProcessor processor = createInstance();