Skip to content

Commit 032f409

Browse files
authored
Pass in order terms as sorted to TermInSetQuery() (#17714)
* pass in order terms as sorted to TermInSetQuery() Signed-off-by: Mikhail Khludnev <[email protected]> * slightly more elegant solution Signed-off-by: Mikhail Khludnev <[email protected]> * Attempting mocking TermInSetQ constructor. Signed-off-by: Mikhail Khludnev <[email protected]> * Handle ids as well. Signed-off-by: Mikhail Khludnev <[email protected]> * forbidden api Signed-off-by: Mikhail Khludnev <[email protected]> * make unnecessary method slow but correct. Signed-off-by: Mikhail Khludnev <[email protected]> * make unnecessary method slow but correct. Signed-off-by: Mikhail Khludnev <[email protected]> * Polish test coverage Signed-off-by: Mikhail Khludnev <[email protected]> * CHANGELOG.md Signed-off-by: Mikhail Khludnev <[email protected]> * assertThrows Signed-off-by: Mikhail Khludnev <[email protected]> * spotlessApply Signed-off-by: Mikhail Khludnev <[email protected]> * coverage tests and refactoring Signed-off-by: Mikhail Khludnev <[email protected]> * javadoc Signed-off-by: Mikhail Khludnev <[email protected]> * javadoc Signed-off-by: Mikhail Khludnev <[email protected]> * mark nocommit Signed-off-by: Mikhail Khludnev <[email protected]> * one more nocommit test Signed-off-by: Mikhail Khludnev <[email protected]> * forbidden api Signed-off-by: Mikhail Khludnev <[email protected]> * no commit for out of line tests Signed-off-by: Mikhail Khludnev <[email protected]> * Review Signed-off-by: Mikhail Khludnev <[email protected]> --------- Signed-off-by: Mikhail Khludnev <[email protected]> Signed-off-by: Mikhail Khludnev <[email protected]>
1 parent cd8fa4f commit 032f409

File tree

7 files changed

+367
-21
lines changed

7 files changed

+367
-21
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1616
- Add FilterFieldType for developers who want to wrap MappedFieldType ([#17627](https://github.com/opensearch-project/OpenSearch/pull/17627))
1717
- [Rule Based Auto-tagging] Add in-memory rule processing service ([#17365](https://github.com/opensearch-project/OpenSearch/pull/17365))
1818
- [Security Manager Replacement] Create initial Java Agent to intercept Socket::connect calls ([#17724](https://github.com/opensearch-project/OpenSearch/pull/17724))
19+
- Faster `terms_query` with already sorted terms ([#17714](https://github.com/opensearch-project/OpenSearch/pull/17714))
1920
- Add ingestion management APIs for pause, resume and get ingestion state ([#17631](https://github.com/opensearch-project/OpenSearch/pull/17631))
2021
- [Security Manager Replacement] Enhance Java Agent to intercept System::exit ([#17746](https://github.com/opensearch-project/OpenSearch/pull/17746))
2122
- [Security Manager Replacement] Add a policy parser for Java agent security policies ([#17753](https://github.com/opensearch-project/OpenSearch/pull/17753))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.mapper;
10+
11+
import org.apache.lucene.search.TermInSetQuery;
12+
import org.apache.lucene.util.BytesRef;
13+
14+
import java.util.AbstractSet;
15+
import java.util.ArrayList;
16+
import java.util.Collection;
17+
import java.util.Comparator;
18+
import java.util.Iterator;
19+
import java.util.List;
20+
import java.util.SortedSet;
21+
import java.util.function.Consumer;
22+
import java.util.function.Function;
23+
import java.util.function.Supplier;
24+
25+
/**
26+
* Purposed for passing terms into {@link TermInSetQuery}.
27+
* If the given terms are sorted already, it wrap it with a SortedSet stub.
28+
* Otherwise, it passes terms as list.
29+
*/
30+
public class BytesRefsCollectionBuilder implements Consumer<BytesRef>, Supplier<Collection<BytesRef>> {
31+
32+
/**
33+
* Strategy for building BytesRef collection.
34+
* */
35+
protected interface ConsumerStrategy extends Function<BytesRef, ConsumerStrategy>, Supplier<Collection<BytesRef>> {}
36+
37+
public BytesRefsCollectionBuilder(int sizeExpected) {
38+
terms = new ArrayList<>(sizeExpected);
39+
}
40+
41+
protected final List<BytesRef> terms;
42+
protected ConsumerStrategy delegate = createStartStrategy();
43+
44+
@Override
45+
public void accept(BytesRef bytesRef) {
46+
delegate = delegate.apply(bytesRef);
47+
}
48+
49+
@Override
50+
public Collection<BytesRef> get() {
51+
Collection<BytesRef> result = delegate.get();
52+
delegate = createFrozenStrategy(result);
53+
return result;
54+
}
55+
56+
protected ConsumerStrategy createStartStrategy() {
57+
return new ConsumerStrategy() {
58+
@Override
59+
public ConsumerStrategy apply(BytesRef firstBytes) {
60+
terms.add(firstBytes); // firstly, just store
61+
return createSortedStrategy(firstBytes);
62+
}
63+
64+
@Override
65+
public Collection<BytesRef> get() {
66+
return terms; // empty list
67+
}
68+
};
69+
}
70+
71+
protected ConsumerStrategy createSortedStrategy(BytesRef firstBytes) {
72+
return new ConsumerStrategy() {
73+
BytesRef prev = firstBytes;
74+
75+
@Override
76+
public ConsumerStrategy apply(BytesRef bytesRef) {
77+
terms.add(bytesRef);
78+
if (bytesRef.compareTo(prev) >= 0) { // keep checking sorted
79+
prev = bytesRef;
80+
return this;
81+
} else { // isn't sorted
82+
return createUnsortedStrategy();
83+
}
84+
}
85+
86+
@Override
87+
public Collection<BytesRef> get() {
88+
return new SortedBytesSet(terms);
89+
}
90+
};
91+
}
92+
93+
protected ConsumerStrategy createUnsortedStrategy() {
94+
return new ConsumerStrategy() {
95+
@Override
96+
public ConsumerStrategy apply(BytesRef bytesRef) { // just storing
97+
terms.add(bytesRef);
98+
return this;
99+
}
100+
101+
@Override
102+
public Collection<BytesRef> get() {
103+
return terms;
104+
}
105+
};
106+
}
107+
108+
protected ConsumerStrategy createFrozenStrategy(Collection<BytesRef> result) {
109+
return new ConsumerStrategy() {
110+
111+
@Override
112+
public ConsumerStrategy apply(BytesRef bytesRef) {
113+
throw new IllegalStateException("already build");
114+
}
115+
116+
@Override
117+
public Collection<BytesRef> get() {
118+
return result;
119+
}
120+
};
121+
}
122+
123+
/**
124+
* {@link SortedSet<BytesRef>} for passing into TermInSetQuery()
125+
* */
126+
protected static class SortedBytesSet extends AbstractSet<BytesRef> implements SortedSet<BytesRef> {
127+
128+
private final List<BytesRef> bytesRefs;
129+
130+
public SortedBytesSet(List<BytesRef> bytesRefs) {
131+
this.bytesRefs = bytesRefs;
132+
}
133+
134+
@Override
135+
public Iterator<BytesRef> iterator() {
136+
return bytesRefs.iterator();
137+
}
138+
139+
@Override
140+
public int size() {
141+
return bytesRefs.size();
142+
}
143+
144+
@Override
145+
public Comparator<? super BytesRef> comparator() {
146+
return null;
147+
}
148+
149+
@Override
150+
public SortedSet<BytesRef> subSet(BytesRef fromElement, BytesRef toElement) {
151+
throw new UnsupportedOperationException();
152+
}
153+
154+
@Override
155+
public SortedSet<BytesRef> headSet(BytesRef toElement) {
156+
throw new UnsupportedOperationException();
157+
}
158+
159+
@Override
160+
public SortedSet<BytesRef> tailSet(BytesRef fromElement) {
161+
throw new UnsupportedOperationException();
162+
}
163+
164+
@Override
165+
public BytesRef first() {
166+
throw new UnsupportedOperationException();
167+
}
168+
169+
@Override
170+
public BytesRef last() {
171+
throw new UnsupportedOperationException();
172+
}
173+
174+
/**
175+
* Dedicated for {@link TermInSetQuery#TermInSetQuery(String, Collection)}.
176+
*/
177+
@Override
178+
public <T> T[] toArray(T[] a) {
179+
return bytesRefs.toArray(a);
180+
}
181+
}
182+
}

server/src/main/java/org/opensearch/index/mapper/IdFieldMapper.java

+3-5
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,7 @@
6666
import org.opensearch.search.sort.SortOrder;
6767

6868
import java.io.IOException;
69-
import java.util.ArrayList;
7069
import java.util.Arrays;
71-
import java.util.Collection;
7270
import java.util.Collections;
7371
import java.util.List;
7472
import java.util.function.Supplier;
@@ -166,15 +164,15 @@ public Query existsQuery(QueryShardContext context) {
166164
@Override
167165
public Query termsQuery(List<?> values, QueryShardContext context) {
168166
failIfNotIndexed();
169-
Collection<BytesRef> bytesRefs = new ArrayList<>(values.size());
167+
BytesRefsCollectionBuilder bytesRefs = new BytesRefsCollectionBuilder(values.size());
170168
for (int i = 0; i < values.size(); i++) {
171169
Object idObject = values.get(i);
172170
if (idObject instanceof BytesRef) {
173171
idObject = ((BytesRef) idObject).utf8ToString();
174172
}
175-
bytesRefs.add(Uid.encodeId(idObject.toString()));
173+
bytesRefs.accept(Uid.encodeId(idObject.toString()));
176174
}
177-
return new TermInSetQuery(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, name(), bytesRefs);
175+
return new TermInSetQuery(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, name(), bytesRefs.get());
178176
}
179177

180178
@Override

server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java

+12-11
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,7 @@
7070

7171
import java.io.IOException;
7272
import java.io.UncheckedIOException;
73-
import java.util.ArrayList;
7473
import java.util.Arrays;
75-
import java.util.Collection;
7674
import java.util.Collections;
7775
import java.util.List;
7876
import java.util.Map;
@@ -449,23 +447,26 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
449447
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
450448
return super.termsQuery(values, context);
451449
}
452-
Collection<BytesRef> iBytesRefs = new ArrayList<>(values.size());
453-
Collection<BytesRef> dVByteRefs = new ArrayList<>(values.size());
450+
BytesRefsCollectionBuilder iBytesRefs = new BytesRefsCollectionBuilder(values.size());
451+
BytesRefsCollectionBuilder dVByteRefs = new BytesRefsCollectionBuilder(values.size());
454452
for (int i = 0; i < values.size(); i++) {
455-
iBytesRefs.add(indexedValueForSearch(values.get(i)));
456-
dVByteRefs.add(indexedValueForSearch(rewriteForDocValue(values.get(i))));
453+
BytesRef idxBytes = indexedValueForSearch(values.get(i));
454+
iBytesRefs.accept(idxBytes);
455+
BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(values.get(i)));
456+
dVByteRefs.accept(dvBytes);
457457
}
458-
Query indexQuery = new TermInSetQuery(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, name(), iBytesRefs);
459-
Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), dVByteRefs);
458+
Query indexQuery = new TermInSetQuery(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, name(), iBytesRefs.get());
459+
Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), dVByteRefs.get());
460460
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
461461
}
462462
// if we only have doc_values enabled, we construct a new query with doc_values re-written
463463
if (hasDocValues()) {
464-
Collection<BytesRef> bytesRefs = new ArrayList<>(values.size());
464+
BytesRefsCollectionBuilder bytesCollector = new BytesRefsCollectionBuilder(values.size());
465465
for (int i = 0; i < values.size(); i++) {
466-
bytesRefs.add(indexedValueForSearch(rewriteForDocValue(values.get(i))));
466+
BytesRef dvBytes = indexedValueForSearch(rewriteForDocValue(values.get(i)));
467+
bytesCollector.accept(dvBytes);
467468
}
468-
return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesRefs);
469+
return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesCollector.get());
469470
}
470471
// has index enabled, we're going to return the query as is
471472
return super.termsQuery(values, context);

server/src/main/java/org/opensearch/index/mapper/TermBasedFieldType.java

+4-5
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@
4343
import org.opensearch.common.lucene.search.AutomatonQueries;
4444
import org.opensearch.index.query.QueryShardContext;
4545

46-
import java.util.ArrayList;
47-
import java.util.Collection;
4846
import java.util.List;
4947
import java.util.Map;
5048

@@ -96,11 +94,12 @@ public Query termQuery(Object value, QueryShardContext context) {
9694
@Override
9795
public Query termsQuery(List<?> values, QueryShardContext context) {
9896
failIfNotIndexed();
99-
Collection<BytesRef> bytesRefs = new ArrayList<>(values.size());
97+
BytesRefsCollectionBuilder bytesCollector = new BytesRefsCollectionBuilder(values.size());
10098
for (int i = 0; i < values.size(); i++) {
101-
bytesRefs.add(indexedValueForSearch(values.get(i)));
99+
BytesRef elem = indexedValueForSearch(values.get(i));
100+
bytesCollector.accept(elem);
102101
}
103-
return new TermInSetQuery(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, name(), bytesRefs);
102+
return new TermInSetQuery(MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, name(), bytesCollector.get());
104103
}
105104

106105
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.mapper;
10+
11+
import org.apache.lucene.util.BytesRef;
12+
import org.opensearch.test.OpenSearchTestCase;
13+
14+
import java.util.Arrays;
15+
import java.util.Collection;
16+
import java.util.Iterator;
17+
import java.util.List;
18+
import java.util.SortedSet;
19+
import java.util.stream.Stream;
20+
21+
public class BytesRefsCollectionBuilderTests extends OpenSearchTestCase {
22+
23+
public void testBuildSortedNotSorted() {
24+
String[] seedStrings = generateRandomStringArray(10, 10, false, true);
25+
List<BytesRef> bytesRefList = Arrays.stream(seedStrings).map(BytesRef::new).toList();
26+
List<BytesRef> sortedBytesRefs = bytesRefList.stream().sorted().toList();
27+
28+
Collection<BytesRef> sortedSet = assertCollectionBuilt(sortedBytesRefs);
29+
assertCollectionBuilt(bytesRefList);
30+
31+
assertTrue(sortedSet instanceof SortedSet<BytesRef>);
32+
assertNull(((SortedSet<BytesRef>) sortedSet).comparator());
33+
}
34+
35+
public void testBuildFooBar() {
36+
String[] reverseOrderStrings = new String[] { "foo", "bar" };
37+
List<BytesRef> bytesRefList = Arrays.stream(reverseOrderStrings).map(BytesRef::new).toList();
38+
List<BytesRef> sortedBytesRefs = bytesRefList.stream().sorted().toList();
39+
40+
Collection<BytesRef> sortedSet = assertCollectionBuilt(sortedBytesRefs);
41+
Collection<BytesRef> reverseList = assertCollectionBuilt(bytesRefList);
42+
43+
assertTrue(sortedSet instanceof SortedSet<BytesRef>);
44+
assertNull(((SortedSet<BytesRef>) sortedSet).comparator());
45+
46+
assertTrue(reverseList instanceof List<BytesRef>);
47+
}
48+
49+
public void testFrozen() {
50+
BytesRefsCollectionBuilder builder = new BytesRefsCollectionBuilder(1);
51+
String[] seedStrings = generateRandomStringArray(5, 10, false, true);
52+
Arrays.stream(seedStrings).map(BytesRef::new).forEachOrdered(builder);
53+
Collection<BytesRef> bytesRefCollection = builder.get();
54+
assertNotNull(bytesRefCollection);
55+
assertEquals(seedStrings.length, bytesRefCollection.size());
56+
assertThrows(IllegalStateException.class, () -> builder.accept(new BytesRef("illegal state")));
57+
assertSame(bytesRefCollection, builder.get());
58+
}
59+
60+
private static Collection<BytesRef> assertCollectionBuilt(List<BytesRef> sortedBytesRefs) {
61+
BytesRefsCollectionBuilder builder = new BytesRefsCollectionBuilder(1);
62+
sortedBytesRefs.stream().forEachOrdered(builder);
63+
Collection<BytesRef> bytesRefCollection = builder.get();
64+
assertEquals(bytesRefCollection.size(), sortedBytesRefs.size());
65+
for (Iterator<BytesRef> iterator = bytesRefCollection.iterator(), iterator2 = sortedBytesRefs.iterator(); iterator.hasNext()
66+
|| iterator2.hasNext();) {
67+
assertTrue(iterator.next().bytesEquals(iterator2.next()));
68+
}
69+
return bytesRefCollection;
70+
}
71+
72+
public void testCoverUnsupported() {
73+
BytesRefsCollectionBuilder builder = new BytesRefsCollectionBuilder(1);
74+
Stream.of("in", "order").map(BytesRef::new).forEachOrdered(builder);
75+
SortedSet<BytesRef> bytesRefCollection = (SortedSet<BytesRef>) builder.get();
76+
assertThrows(UnsupportedOperationException.class, () -> bytesRefCollection.subSet(new BytesRef("a"), new BytesRef("z")));
77+
assertThrows(UnsupportedOperationException.class, () -> bytesRefCollection.headSet(new BytesRef("a")));
78+
assertThrows(UnsupportedOperationException.class, () -> bytesRefCollection.tailSet(new BytesRef("a")));
79+
assertThrows(UnsupportedOperationException.class, bytesRefCollection::first);
80+
assertThrows(UnsupportedOperationException.class, bytesRefCollection::last);
81+
}
82+
}

0 commit comments

Comments
 (0)