Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BooleanQuery rewrite for must_not RangeQuery clauses #17655

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Rule Based Auto-tagging] Add rule schema for auto tagging ([#17238](https://github.com/opensearch-project/OpenSearch/pull/17238))
- Renaming the node role search to warm ([#17573](https://github.com/opensearch-project/OpenSearch/pull/17573))
- Introduce a new search node role to hold search only shards ([#17620](https://github.com/opensearch-project/OpenSearch/pull/17620))
- Fix systemd integTest on deb regarding path ownership check ([#17641](https://github.com/opensearch-project/OpenSearch/pull/17641))
- Fix systemd integTest on deb regarding path ownership check ([#17641](https://github.com/opensearch-project/OpenSearch/pull/17641))
- Add dfs transformation function in XContentMapValues ([#17612](https://github.com/opensearch-project/OpenSearch/pull/17612))
- Add BooleanQuery rewrite for must_not RangeQuery clauses ([#17655](https://github.com/opensearch-project/OpenSearch/pull/17655))

### Changed
- Migrate BC libs to their FIPS counterparts ([#14912](https://github.com/opensearch-project/OpenSearch/pull/14912))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.query;

import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;

import org.opensearch.common.settings.Settings;
import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase;

import java.util.Arrays;
import java.util.Collection;

import static org.opensearch.index.query.QueryBuilders.boolQuery;
import static org.opensearch.index.query.QueryBuilders.rangeQuery;
import static org.opensearch.index.query.QueryBuilders.termQuery;
import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount;

public class BooleanQueryIT extends ParameterizedStaticSettingsOpenSearchIntegTestCase {

public BooleanQueryIT(Settings staticSettings) {
super(staticSettings);
}

@ParametersFactory
public static Collection<Object[]> parameters() {
return Arrays.asList(
new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() },
new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() }
);
}

public void testMustNotRangeRewrite() throws Exception {
String intField = "int_field";
String termField1 = "term_field_1";
String termField2 = "term_field_2";
// If we don't explicitly set this mapping, term_field_2 is not correctly mapped as a keyword field and queries don't work as
// expected
createIndex(
"test",
Settings.EMPTY,
"{\"properties\":{\"int_field\":{\"type\": \"integer\"},\"term_field_1\":{\"type\": \"keyword\"},\"term_field_2\":{\"type\": \"keyword\"}}}}"
);
int numDocs = 100;

for (int i = 0; i < numDocs; i++) {
String termValue1 = "odd";
String termValue2 = "A";
if (i % 2 == 0) {
termValue1 = "even";
}
if (i >= numDocs / 2) {
termValue2 = "B";
}
client().prepareIndex("test")
.setId(Integer.toString(i))
.setSource(intField, i, termField1, termValue1, termField2, termValue2)
.get();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

int lt = 80;
int gte = 20;
int expectedHitCount = numDocs - (lt - gte);

assertHitCount(
client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(lt).gte(gte))).get(),
expectedHitCount
);

assertHitCount(
client().prepareSearch()
.setQuery(boolQuery().must(termQuery(termField1, "even")).mustNot(rangeQuery(intField).lt(lt).gte(gte)))
.get(),
expectedHitCount / 2
);

// Test with preexisting should fields, to ensure the original default minimumShouldMatch is respected
// once we add a must clause during rewrite, even if minimumShouldMatch is not explicitly set
assertHitCount(
client().prepareSearch().setQuery(boolQuery().should(termQuery(termField1, "even")).should(termQuery(termField2, "B"))).get(),
3 * numDocs / 4
);
assertHitCount(
client().prepareSearch()
.setQuery(
boolQuery().should(termQuery(termField1, "even"))
.should(termQuery(termField2, "A"))
.mustNot(rangeQuery(intField).lt(lt).gte(gte))
)
.get(),
3 * expectedHitCount / 4
);

assertHitCount(client().prepareSearch().setQuery(boolQuery().mustNot(rangeQuery(intField).lt(numDocs * 2))).get(), 0);
}

public void testMustNotDateRangeRewrite() throws Exception {
int minDay = 10;
int maxDay = 31;
String dateField = "date_field";
createIndex("test");
for (int day = minDay; day <= maxDay; day++) {
client().prepareIndex("test").setSource(dateField, getDate(day, 0)).get();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

int minExcludedDay = 15;
int maxExcludedDay = 25;
int expectedHitCount = maxDay + 1 - minDay - (maxExcludedDay - minExcludedDay + 1);

assertHitCount(
client().prepareSearch("test")
.setQuery(boolQuery().mustNot(rangeQuery(dateField).gte(getDate(minExcludedDay, 0)).lte(getDate(maxExcludedDay, 0))))
.get(),
expectedHitCount
);
}

public void testMustNotDateRangeWithFormatAndTimeZoneRewrite() throws Exception {
// Index a document for each hour of 1/1 in UTC. Then, do a boolean query excluding 1/1 for UTC+3.
// If the time zone is respected by the rewrite, we should see 3 matching documents,
// as there will be 3 hours that are 1/1 in UTC but not UTC+3.

String dateField = "date_field";
createIndex("test");
String format = "strict_date_hour_minute_second_fraction";

for (int hour = 0; hour < 24; hour++) {
client().prepareIndex("test").setSource(dateField, getDate(1, hour)).get();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

int zoneOffset = 3;
assertHitCount(
client().prepareSearch("test")
.setQuery(
boolQuery().mustNot(
rangeQuery(dateField).gte(getDate(1, 0)).lte(getDate(1, 23)).timeZone("+" + zoneOffset).format(format)
)
)
.get(),
zoneOffset
);
}

private String padZeros(int value, int length) {
// String.format() not allowed
String ret = Integer.toString(value);
if (ret.length() < length) {
ret = "0".repeat(length - ret.length()) + ret;
}
return ret;
}

private String getDate(int day, int hour) {
return "2016-01-" + padZeros(day, 2) + "T" + padZeros(hour, 2) + ":00:00.000";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,13 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
import java.util.stream.Stream;

Expand Down Expand Up @@ -393,9 +396,13 @@
return any.get();
}

changed |= rewriteMustNotRangeClausesToShould(newBuilder);

if (changed) {
newBuilder.adjustPureNegative = adjustPureNegative;
newBuilder.minimumShouldMatch = minimumShouldMatch;
if (minimumShouldMatch != null) {
newBuilder.minimumShouldMatch = minimumShouldMatch;
}
newBuilder.boost(boost());
newBuilder.queryName(queryName());
return newBuilder;
Expand Down Expand Up @@ -460,4 +467,47 @@

}

private boolean rewriteMustNotRangeClausesToShould(BoolQueryBuilder newBuilder) {
// If there is a range query on a given field in a must_not clause, it's more performant to execute it as
// multiple should clauses representing everything outside the target range.

boolean changed = false;
// For now, only handle the case where there's exactly 1 range query for this field.
Map<String, Integer> fieldCounts = new HashMap<>();
Set<RangeQueryBuilder> rangeQueries = new HashSet<>();
for (QueryBuilder clause : mustNotClauses) {
if (clause instanceof RangeQueryBuilder rq) {
fieldCounts.merge(rq.fieldName(), 1, Integer::sum);
rangeQueries.add(rq);
}
}

for (RangeQueryBuilder rq : rangeQueries) {
String fieldName = rq.fieldName();
if (fieldCounts.getOrDefault(fieldName, 0) == 1) {
List<RangeQueryBuilder> complement = rq.getComplement();
if (complement != null) {
BoolQueryBuilder nestedBoolQuery = new BoolQueryBuilder();
nestedBoolQuery.minimumShouldMatch(1);
for (RangeQueryBuilder complementComponent : complement) {
nestedBoolQuery.should(complementComponent);
}
newBuilder.must(nestedBoolQuery);
newBuilder.mustNotClauses.remove(rq);
changed = true;
}
}
}

if (minimumShouldMatch == null && changed) {
if ((!shouldClauses.isEmpty()) && mustClauses.isEmpty() && filterClauses.isEmpty()) {
// If there were originally should clauses and no must/filter clauses, null minimumShouldMatch is set to a default of 1
// within Lucene.
// But if there was originally a must or filter clause, the default is 0.
// If we added a must clause due to this rewrite, we should respect what the original default would have been.
newBuilder.minimumShouldMatch(1);

Check warning on line 508 in server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java#L508

Added line #L508 was not covered by tests
}
}
return changed;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
import java.io.IOException;
import java.time.DateTimeException;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

/**
Expand Down Expand Up @@ -542,4 +544,42 @@
&& Objects.equals(includeUpper, other.includeUpper)
&& Objects.equals(format, other.format);
}

/**
* Returns a list of RangeQueryBuilder whose elements, when combined, form the complement of this range query.
* May be null.
* @return the complement
*/
public List<RangeQueryBuilder> getComplement() {
if (relation != null && relation != ShapeRelation.INTERSECTS) {
return null;

Check warning on line 555 in server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java#L555

Added line #L555 was not covered by tests
}
List<RangeQueryBuilder> complement = new ArrayList<>();
if (from != null) {
RangeQueryBuilder belowRange = new RangeQueryBuilder(fieldName);
belowRange.to(from);
belowRange.includeUpper(!includeLower);
complement.add(belowRange);
}

if (to != null) {
RangeQueryBuilder aboveRange = new RangeQueryBuilder(fieldName);
aboveRange.from(to);
aboveRange.includeLower(!includeUpper);
complement.add(aboveRange);
}

if (format != null) {
for (RangeQueryBuilder rq : complement) {
rq.format(format);
}

Check warning on line 575 in server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java#L574-L575

Added lines #L574 - L575 were not covered by tests
}
if (timeZone != null) {
for (RangeQueryBuilder rq : complement) {
rq.timeZone = timeZone;
}

Check warning on line 580 in server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java#L579-L580

Added lines #L579 - L580 were not covered by tests
}

return complement;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -497,4 +497,79 @@ public void testVisit() {
assertEquals(0, set.size());

}

public void testOneMustNotRangeRewritten() throws Exception {
int from = 10;
int to = 20;
for (boolean includeLower : new boolean[] { true, false }) {
for (boolean includeUpper : new boolean[] { true, false }) {
BoolQueryBuilder qb = new BoolQueryBuilder();
QueryBuilder rq = getRangeQueryBuilder(INT_FIELD_NAME, from, to, includeLower, includeUpper);
qb.mustNot(rq);

BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext());
assertFalse(rewritten.mustNot().contains(rq));

QueryBuilder expectedLowerQuery = getRangeQueryBuilder(INT_FIELD_NAME, null, from, false, !includeLower);
QueryBuilder expectedUpperQuery = getRangeQueryBuilder(INT_FIELD_NAME, to, null, !includeUpper, true);
assertEquals(1, rewritten.must().size());

BoolQueryBuilder nestedBoolQuery = (BoolQueryBuilder) rewritten.must().get(0);
assertEquals(2, nestedBoolQuery.should().size());
assertEquals("1", nestedBoolQuery.minimumShouldMatch());
assertTrue(nestedBoolQuery.should().contains(expectedLowerQuery));
assertTrue(nestedBoolQuery.should().contains(expectedUpperQuery));
}
}
}

public void testOneSingleEndedMustNotRangeRewritten() throws Exception {
// Test a must_not range query with only one endpoint is rewritten correctly
int from = 10;
BoolQueryBuilder qb = new BoolQueryBuilder();
QueryBuilder rq = getRangeQueryBuilder(INT_FIELD_NAME, from, null, false, false);
qb.mustNot(rq);
BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext());
assertFalse(rewritten.mustNot().contains(rq));

QueryBuilder expectedQuery = getRangeQueryBuilder(INT_FIELD_NAME, null, from, false, true);
assertEquals(1, rewritten.must().size());
BoolQueryBuilder nestedBoolQuery = (BoolQueryBuilder) rewritten.must().get(0);
assertEquals(1, nestedBoolQuery.should().size());
assertTrue(nestedBoolQuery.should().contains(expectedQuery));
assertEquals("1", nestedBoolQuery.minimumShouldMatch());
}

public void testMultipleMustNotRangesNotRewritten() throws Exception {
BoolQueryBuilder qb = new BoolQueryBuilder();
// Test a field with two ranges is not rewritten
QueryBuilder rq1of2 = new RangeQueryBuilder(INT_RANGE_FIELD_NAME).gt(10).lt(20);
QueryBuilder rq2of2 = new RangeQueryBuilder(INT_RANGE_FIELD_NAME).gt(30).lt(40);
qb.mustNot(rq1of2);
qb.mustNot(rq2of2);
BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext());

assertTrue(rewritten.mustNot().contains(rq1of2));
assertTrue(rewritten.mustNot().contains(rq2of2));
assertEquals(0, rewritten.should().size());
}

private QueryBuilder getRangeQueryBuilder(String fieldName, Integer lower, Integer upper, boolean includeLower, boolean includeUpper) {
RangeQueryBuilder rq = new RangeQueryBuilder(fieldName);
if (lower != null) {
if (includeLower) {
rq.gte(lower);
} else {
rq.gt(lower);
}
}
if (upper != null) {
if (includeUpper) {
rq.lte(upper);
} else {
rq.lt(upper);
}
}
return rq;
}
}
Loading