Skip to content

Commit 129123c

Browse files
authored
Merge branch 'opensearch-project:main' into main
2 parents fa247b4 + 972acb6 commit 129123c

File tree

6 files changed

+225
-183
lines changed

6 files changed

+225
-183
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3232
### Infrastructure
3333
### Documentation
3434
### Maintenance
35+
- Add reindex integration tests for ingest processors ([#1075](https://github.com/opensearch-project/neural-search/pull/1075))
3536
### Refactoring

src/test/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessIT.java

+35-50
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,11 @@
44
*/
55
package org.opensearch.neuralsearch.processor;
66

7-
import java.nio.file.Files;
8-
import java.nio.file.Path;
97
import java.util.Map;
108

11-
import org.apache.hc.core5.http.HttpHeaders;
12-
import org.apache.hc.core5.http.io.entity.EntityUtils;
13-
import org.apache.hc.core5.http.message.BasicHeader;
149
import org.junit.Before;
15-
import org.opensearch.client.Response;
16-
import org.opensearch.common.xcontent.XContentHelper;
17-
import org.opensearch.common.xcontent.XContentType;
1810
import org.opensearch.neuralsearch.BaseNeuralSearchIT;
1911

20-
import com.google.common.collect.ImmutableList;
2112
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder;
2213

2314
public class SparseEncodingProcessIT extends BaseNeuralSearchIT {
@@ -26,6 +17,20 @@ public class SparseEncodingProcessIT extends BaseNeuralSearchIT {
2617

2718
private static final String PIPELINE_NAME = "pipeline-sparse-encoding";
2819

20+
private static final String INGEST_DOCUMENT = "{\n"
21+
+ " \"title\": \"This is a good day\",\n"
22+
+ " \"description\": \"daily logging\",\n"
23+
+ " \"favor_list\": [\n"
24+
+ " \"test\",\n"
25+
+ " \"hello\",\n"
26+
+ " \"mock\"\n"
27+
+ " ],\n"
28+
+ " \"favorites\": {\n"
29+
+ " \"game\": \"overwatch\",\n"
30+
+ " \"movie\": null\n"
31+
+ " }\n"
32+
+ "}\n";
33+
2934
@Before
3035
public void setUp() throws Exception {
3136
super.setUp();
@@ -37,8 +42,8 @@ public void testSparseEncodingProcessor() throws Exception {
3742
try {
3843
modelId = prepareSparseEncodingModel();
3944
createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.SPARSE_ENCODING);
40-
createSparseEncodingIndex();
41-
ingestDocument();
45+
createIndexWithPipeline(INDEX_NAME, "SparseEncodingIndexMappings.json", PIPELINE_NAME);
46+
ingestDocument(INDEX_NAME, INGEST_DOCUMENT);
4247
assertEquals(1, getDocCount(INDEX_NAME));
4348

4449
NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder();
@@ -58,8 +63,8 @@ public void testSparseEncodingProcessorWithPrune() throws Exception {
5863
try {
5964
modelId = prepareSparseEncodingModel();
6065
createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.SPARSE_ENCODING_PRUNE);
61-
createSparseEncodingIndex();
62-
ingestDocument();
66+
createIndexWithPipeline(INDEX_NAME, "SparseEncodingIndexMappings.json", PIPELINE_NAME);
67+
ingestDocument(INDEX_NAME, INGEST_DOCUMENT);
6368
assertEquals(1, getDocCount(INDEX_NAME));
6469

6570
NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder();
@@ -74,42 +79,22 @@ public void testSparseEncodingProcessorWithPrune() throws Exception {
7479
}
7580
}
7681

77-
private void createSparseEncodingIndex() throws Exception {
78-
createIndexWithConfiguration(
79-
INDEX_NAME,
80-
Files.readString(Path.of(classLoader.getResource("processor/SparseEncodingIndexMappings.json").toURI())),
81-
PIPELINE_NAME
82-
);
83-
}
84-
85-
private void ingestDocument() throws Exception {
86-
String ingestDocument = "{\n"
87-
+ " \"title\": \"This is a good day\",\n"
88-
+ " \"description\": \"daily logging\",\n"
89-
+ " \"favor_list\": [\n"
90-
+ " \"test\",\n"
91-
+ " \"hello\",\n"
92-
+ " \"mock\"\n"
93-
+ " ],\n"
94-
+ " \"favorites\": {\n"
95-
+ " \"game\": \"overwatch\",\n"
96-
+ " \"movie\": null\n"
97-
+ " }\n"
98-
+ "}\n";
99-
Response response = makeRequest(
100-
client(),
101-
"POST",
102-
INDEX_NAME + "/_doc?refresh",
103-
null,
104-
toHttpEntity(ingestDocument),
105-
ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, "Kibana"))
106-
);
107-
Map<String, Object> map = XContentHelper.convertToMap(
108-
XContentType.JSON.xContent(),
109-
EntityUtils.toString(response.getEntity()),
110-
false
111-
);
112-
assertEquals("created", map.get("result"));
82+
public void testSparseEncodingProcessorWithReindex() throws Exception {
83+
// create a simple index and indexing data into this index.
84+
String fromIndexName = "test-reindex-from";
85+
createIndexWithConfiguration(fromIndexName, "{ \"settings\": { \"number_of_shards\": 1, \"number_of_replicas\": 0 } }", null);
86+
ingestDocument(fromIndexName, "{ \"text\": \"hello world\" }");
87+
// create text embedding index for reindex
88+
String modelId = null;
89+
try {
90+
modelId = prepareSparseEncodingModel();
91+
String toIndexName = "test-reindex-to";
92+
createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.SPARSE_ENCODING);
93+
createIndexWithPipeline(toIndexName, "SparseEncodingIndexMappings.json", PIPELINE_NAME);
94+
reindex(fromIndexName, toIndexName);
95+
assertEquals(1, getDocCount(toIndexName));
96+
} finally {
97+
wipeOfTestResources(fromIndexName, PIPELINE_NAME, modelId, null);
98+
}
11399
}
114-
115100
}

src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorIT.java

+39-30
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,7 @@
44
*/
55
package org.opensearch.neuralsearch.processor;
66

7-
import com.google.common.collect.ImmutableList;
87
import lombok.SneakyThrows;
9-
import org.apache.hc.core5.http.HttpHeaders;
10-
import org.apache.hc.core5.http.io.entity.EntityUtils;
11-
import org.apache.hc.core5.http.message.BasicHeader;
128
import org.junit.Before;
139

1410
import java.net.URL;
@@ -19,9 +15,6 @@
1915
import java.util.Map;
2016
import java.util.Objects;
2117

22-
import org.opensearch.client.Response;
23-
import org.opensearch.common.xcontent.XContentHelper;
24-
import org.opensearch.common.xcontent.XContentType;
2518
import org.opensearch.index.query.MatchAllQueryBuilder;
2619
import org.opensearch.neuralsearch.BaseNeuralSearchIT;
2720

@@ -73,7 +66,9 @@ public void testTextChunkingProcessor_withFixedTokenLengthAlgorithmStandardToken
7366
try {
7467
createPipelineProcessor(FIXED_TOKEN_LENGTH_PIPELINE_WITH_STANDARD_TOKENIZER_NAME);
7568
createTextChunkingIndex(INDEX_NAME, FIXED_TOKEN_LENGTH_PIPELINE_WITH_STANDARD_TOKENIZER_NAME);
76-
ingestDocument(TEST_DOCUMENT);
69+
70+
String document = getDocumentFromFilePath(TEST_DOCUMENT);
71+
ingestDocument(INDEX_NAME, document);
7772

7873
List<String> expectedPassages = new ArrayList<>();
7974
expectedPassages.add("This is an example document to be chunked. The document ");
@@ -90,7 +85,9 @@ public void testTextChunkingProcessor_withFixedTokenLengthAlgorithmLetterTokeniz
9085
try {
9186
createPipelineProcessor(FIXED_TOKEN_LENGTH_PIPELINE_WITH_LETTER_TOKENIZER_NAME);
9287
createTextChunkingIndex(INDEX_NAME, FIXED_TOKEN_LENGTH_PIPELINE_WITH_LETTER_TOKENIZER_NAME);
93-
ingestDocument(TEST_DOCUMENT);
88+
89+
String document = getDocumentFromFilePath(TEST_DOCUMENT);
90+
ingestDocument(INDEX_NAME, document);
9491

9592
List<String> expectedPassages = new ArrayList<>();
9693
expectedPassages.add("This is an example document to be chunked. The document ");
@@ -107,7 +104,9 @@ public void testTextChunkingProcessor_withFixedTokenLengthAlgorithmLowercaseToke
107104
try {
108105
createPipelineProcessor(FIXED_TOKEN_LENGTH_PIPELINE_WITH_LOWERCASE_TOKENIZER_NAME);
109106
createTextChunkingIndex(INDEX_NAME, FIXED_TOKEN_LENGTH_PIPELINE_WITH_LOWERCASE_TOKENIZER_NAME);
110-
ingestDocument(TEST_DOCUMENT);
107+
108+
String document = getDocumentFromFilePath(TEST_DOCUMENT);
109+
ingestDocument(INDEX_NAME, document);
111110

112111
List<String> expectedPassages = new ArrayList<>();
113112
expectedPassages.add("This is an example document to be chunked. The document ");
@@ -124,7 +123,10 @@ public void testTextChunkingProcessor_withFixedTokenLengthAlgorithmStandardToken
124123
try {
125124
createPipelineProcessor(FIXED_TOKEN_LENGTH_PIPELINE_WITH_STANDARD_TOKENIZER_NAME);
126125
createTextChunkingIndex(INDEX_NAME, FIXED_TOKEN_LENGTH_PIPELINE_WITH_STANDARD_TOKENIZER_NAME);
127-
Exception exception = assertThrows(Exception.class, () -> ingestDocument(TEST_LONG_DOCUMENT));
126+
Exception exception = assertThrows(Exception.class, () -> {
127+
String document = getDocumentFromFilePath(TEST_LONG_DOCUMENT);
128+
ingestDocument(INDEX_NAME, document);
129+
});
128130
// max_token_count is 100 by index settings
129131
assert (exception.getMessage()
130132
.contains("The number of tokens produced by calling _analyze has exceeded the allowed maximum of [100]."));
@@ -139,7 +141,9 @@ public void testTextChunkingProcessor_withDelimiterAlgorithm_successful() {
139141
try {
140142
createPipelineProcessor(DELIMITER_PIPELINE_NAME);
141143
createTextChunkingIndex(INDEX_NAME, DELIMITER_PIPELINE_NAME);
142-
ingestDocument(TEST_DOCUMENT);
144+
145+
String document = getDocumentFromFilePath(TEST_DOCUMENT);
146+
ingestDocument(INDEX_NAME, document);
143147

144148
List<String> expectedPassages = new ArrayList<>();
145149
expectedPassages.add("This is an example document to be chunked.");
@@ -157,7 +161,9 @@ public void testTextChunkingProcessor_withCascadePipeline_successful() {
157161
try {
158162
createPipelineProcessor(CASCADE_PIPELINE_NAME);
159163
createTextChunkingIndex(INDEX_NAME, CASCADE_PIPELINE_NAME);
160-
ingestDocument(TEST_DOCUMENT);
164+
165+
String document = getDocumentFromFilePath(TEST_DOCUMENT);
166+
ingestDocument(INDEX_NAME, document);
161167

162168
List<String> expectedPassages = new ArrayList<>();
163169
expectedPassages.add("This is an example document to be chunked.");
@@ -176,6 +182,23 @@ public void testTextChunkingProcessor_withCascadePipeline_successful() {
176182
}
177183
}
178184

185+
public void testTextChunkingProcessor_withFixedTokenLengthAlgorithmStandardTokenizer_whenReindexingDocument_thenSuccessful()
186+
throws Exception {
187+
try {
188+
String fromIndexName = "test-reindex-from";
189+
createIndexWithConfiguration(fromIndexName, "{ \"settings\": { \"number_of_shards\": 1, \"number_of_replicas\": 0 } }", null);
190+
String document = getDocumentFromFilePath(TEST_DOCUMENT);
191+
ingestDocument(fromIndexName, document);
192+
193+
createPipelineProcessor(FIXED_TOKEN_LENGTH_PIPELINE_WITH_STANDARD_TOKENIZER_NAME);
194+
createTextChunkingIndex(INDEX_NAME, FIXED_TOKEN_LENGTH_PIPELINE_WITH_STANDARD_TOKENIZER_NAME);
195+
reindex(fromIndexName, INDEX_NAME);
196+
assertEquals(1, getDocCount(INDEX_NAME));
197+
} finally {
198+
wipeOfTestResources(INDEX_NAME, FIXED_TOKEN_LENGTH_PIPELINE_WITH_STANDARD_TOKENIZER_NAME, null, null);
199+
}
200+
}
201+
179202
private void validateIndexIngestResults(String indexName, String fieldName, Object expected) {
180203
assertEquals(1, getDocCount(indexName));
181204
MatchAllQueryBuilder query = new MatchAllQueryBuilder();
@@ -205,23 +228,9 @@ private void createTextChunkingIndex(String indexName, String pipelineName) thro
205228
createIndexWithConfiguration(indexName, Files.readString(Path.of(indexSettingsURLPath.toURI())), pipelineName);
206229
}
207230

208-
private void ingestDocument(String documentPath) throws Exception {
209-
URL documentURLPath = classLoader.getResource(documentPath);
231+
private String getDocumentFromFilePath(String filePath) throws Exception {
232+
URL documentURLPath = classLoader.getResource(filePath);
210233
Objects.requireNonNull(documentURLPath);
211-
String document = Files.readString(Path.of(documentURLPath.toURI()));
212-
Response response = makeRequest(
213-
client(),
214-
"POST",
215-
INDEX_NAME + "/_doc?refresh",
216-
null,
217-
toHttpEntity(document),
218-
ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, "Kibana"))
219-
);
220-
Map<String, Object> map = XContentHelper.convertToMap(
221-
XContentType.JSON.xContent(),
222-
EntityUtils.toString(response.getEntity()),
223-
false
224-
);
225-
assertEquals("created", map.get("result"));
234+
return Files.readString(Path.of(documentURLPath.toURI()));
226235
}
227236
}

0 commit comments

Comments
 (0)