Skip to content

Commit 6db810b

Browse files
[Backport 2.x] Fix auto date histogram rounding assertion bug (#17175)
* Fix auto date histogram rounding assertion bug (#17023) * Add comments explanations for auto date histo increaseRoundingIfNeeded. Signed-off-by: Finn Carroll <[email protected]> * Add testFilterRewriteWithTZRoundingRangeAssert() to reproduce auto date histo assertion bug per #16932 Signed-off-by: Finn Carroll <[email protected]> * Fix #16932. Ensure optimized path can only increase preparedRounding of agg. Signed-off-by: Finn Carroll <[email protected]> * Spotless apply Signed-off-by: Finn Carroll <[email protected]> * Fast fail filter rewrite opt in data histo aggs for non UTC timezones Signed-off-by: Finn Carroll <[email protected]> * Remove redundant UTC check from getInterval(). Signed-off-by: Finn Carroll <[email protected]> * Save a call to prepareRounding if roundingIdx is unchanged. Signed-off-by: Finn Carroll <[email protected]> * Spotless apply Signed-off-by: Finn Carroll <[email protected]> * Changelog Signed-off-by: Finn Carroll <[email protected]> * Add ZoneId getter for date histo filter rewrite canOptimize check. Signed-off-by: Finn Carroll <[email protected]> * Spotless apply Signed-off-by: Finn Carroll <[email protected]> * Disable ff optimzation for composite agg in canOptimize. Signed-off-by: Finn Carroll <[email protected]> * Spotless apply Signed-off-by: Finn Carroll <[email protected]> * Handle utc timezone check Signed-off-by: bowenlan-amzn <[email protected]> * Remove redundant timeZone getter. Signed-off-by: Finn Carroll <[email protected]> * Simplify ff prepared rounding check. Signed-off-by: Finn Carroll <[email protected]> --------- Signed-off-by: Finn Carroll <[email protected]> Signed-off-by: bowenlan-amzn <[email protected]> Co-authored-by: bowenlan-amzn <[email protected]> (cherry picked from commit de59264) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> * Remove breaking abstract isUTC() getter from Rounding.java. Signed-off-by: Finn Carroll <[email protected]> * Remove unused ZoneId getter. Signed-off-by: Finn Carroll <[email protected]> --------- Signed-off-by: Finn Carroll <[email protected]> Signed-off-by: bowenlan-amzn <[email protected]> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: bowenlan-amzn <[email protected]> (cherry picked from commit a79c6e8) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 56b5726 commit 6db810b

File tree

8 files changed

+161
-27
lines changed

8 files changed

+161
-27
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
117117
- [WLM] Fix the QueryGroupTask logging bug ([#17169](https://github.com/opensearch-project/OpenSearch/pull/17169))
118118
- Use OpenSearch version to deserialize remote custom metadata([#16494](https://github.com/opensearch-project/OpenSearch/pull/16494))
119119
- Fix the failing CI's with `Failed to load eclipse jdt formatter` error ([#17172](https://github.com/opensearch-project/OpenSearch/pull/17172))
120+
- Fix AutoDateHistogramAggregator rounding assertion failure ([#17023](https://github.com/opensearch-project/OpenSearch/pull/17023))
121+
120122
### Security
121123

122124
[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.18...2.x

server/src/main/java/org/opensearch/common/Rounding.java

+23-16
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,14 @@ public DateTimeUnit unit() {
275275
return null;
276276
}
277277

278+
/**
279+
* Helper function for checking if the time zone requested for date histogram
280+
* aggregation is utc or not
281+
*/
282+
public boolean isUTC() {
283+
throw new UnsupportedOperationException();
284+
}
285+
278286
/**
279287
* A strategy for rounding milliseconds since epoch.
280288
*
@@ -676,6 +684,11 @@ public String toString() {
676684
return "Rounding[" + unit + " in " + timeZone + "]";
677685
}
678686

687+
@Override
688+
public boolean isUTC() {
689+
return "Z".equals(timeZone.getDisplayName(TextStyle.FULL, Locale.ENGLISH));
690+
}
691+
679692
private abstract class TimeUnitPreparedRounding extends PreparedRounding {
680693
@Override
681694
public double roundingSize(long utcMillis, DateTimeUnit timeUnit) {
@@ -1057,6 +1070,11 @@ public String toString() {
10571070
return "Rounding[" + interval + " in " + timeZone + "]";
10581071
}
10591072

1073+
@Override
1074+
public boolean isUTC() {
1075+
return "Z".equals(timeZone.getDisplayName(TextStyle.FULL, Locale.ENGLISH));
1076+
}
1077+
10601078
private long roundKey(long value, long interval) {
10611079
if (value < 0) {
10621080
return (value - interval + 1) / interval;
@@ -1379,6 +1397,11 @@ public boolean equals(Object obj) {
13791397
public String toString() {
13801398
return delegate + " offset by " + offset;
13811399
}
1400+
1401+
@Override
1402+
public boolean isUTC() {
1403+
return delegate.isUTC();
1404+
}
13821405
}
13831406

13841407
public static Rounding read(StreamInput in) throws IOException {
@@ -1406,28 +1429,12 @@ public static OptionalLong getInterval(Rounding rounding) {
14061429

14071430
if (rounding instanceof TimeUnitRounding) {
14081431
interval = (((TimeUnitRounding) rounding).unit).extraLocalOffsetLookup();
1409-
if (!isUTCTimeZone(((TimeUnitRounding) rounding).timeZone)) {
1410-
// Fast filter aggregation cannot be used if it needs time zone rounding
1411-
return OptionalLong.empty();
1412-
}
14131432
} else if (rounding instanceof TimeIntervalRounding) {
14141433
interval = ((TimeIntervalRounding) rounding).interval;
1415-
if (!isUTCTimeZone(((TimeIntervalRounding) rounding).timeZone)) {
1416-
// Fast filter aggregation cannot be used if it needs time zone rounding
1417-
return OptionalLong.empty();
1418-
}
14191434
} else {
14201435
return OptionalLong.empty();
14211436
}
14221437

14231438
return OptionalLong.of(interval);
14241439
}
1425-
1426-
/**
1427-
* Helper function for checking if the time zone requested for date histogram
1428-
* aggregation is utc or not
1429-
*/
1430-
private static boolean isUTCTimeZone(final ZoneId zoneId) {
1431-
return "Z".equals(zoneId.getDisplayName(TextStyle.FULL, Locale.ENGLISH));
1432-
}
14331440
}

server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java

+8
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,14 @@ protected boolean canOptimize() {
182182
});
183183
}
184184

185+
/**
186+
* The filter rewrite optimized path does not support bucket intervals which are not fixed.
187+
* For this reason we exclude non UTC timezones.
188+
*/
189+
if (valuesSource.getRounding().isUTC() == false) {
190+
return false;
191+
}
192+
185193
// bucketOrds is used for saving the date histogram results got from the optimization path
186194
bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), CardinalityUpperBound.ONE);
187195
return true;

server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,15 @@ public abstract class DateHistogramAggregatorBridge extends AggregatorBridge {
3434

3535
int maxRewriteFilters;
3636

37-
protected boolean canOptimize(ValuesSourceConfig config) {
37+
protected boolean canOptimize(ValuesSourceConfig config, Rounding rounding) {
38+
/**
39+
* The filter rewrite optimized path does not support bucket intervals which are not fixed.
40+
* For this reason we exclude non UTC timezones.
41+
*/
42+
if (rounding.isUTC() == false) {
43+
return false;
44+
}
45+
3846
if (config.script() == null && config.missing() == null) {
3947
MappedFieldType fieldType = config.fieldType();
4048
if (fieldType instanceof DateFieldMapper.DateFieldType) {

server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java

+46-4
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ private AutoDateHistogramAggregator(
149149
Aggregator parent,
150150
Map<String, Object> metadata
151151
) throws IOException {
152-
153152
super(name, factories, aggregationContext, parent, metadata);
154153
this.targetBuckets = targetBuckets;
155154
// TODO: Remove null usage here, by using a different aggregator for create
@@ -162,22 +161,34 @@ private AutoDateHistogramAggregator(
162161
DateHistogramAggregatorBridge bridge = new DateHistogramAggregatorBridge() {
163162
@Override
164163
protected boolean canOptimize() {
165-
return canOptimize(valuesSourceConfig);
164+
return canOptimize(valuesSourceConfig, roundingInfos[0].rounding);
166165
}
167166

168167
@Override
169168
protected void prepare() throws IOException {
170169
buildRanges(context);
171170
}
172171

172+
/**
173+
* The filter rewrite optimization uses this method to pre-emptively update the preparedRounding
174+
* when considering the optimized path for a single segment. This is necessary since the optimized path
175+
* skips doc collection entirely which is where the preparedRounding is normally updated.
176+
*
177+
* @param low lower bound of rounding to prepare
178+
* @param high upper bound of rounding to prepare
179+
* @return select a prepared rounding which satisfies the conditions:
180+
* 1. Is at least as large as our previously prepared rounding
181+
* 2. Must span a range of [low, high] with buckets <= targetBuckets
182+
*/
173183
@Override
174184
protected Rounding getRounding(final long low, final long high) {
175185
// max - min / targetBuckets = bestDuration
176186
// find the right innerInterval this bestDuration belongs to
177187
// since we cannot exceed targetBuckets, bestDuration should go up,
178188
// so the right innerInterval should be an upper bound
179189
long bestDuration = (high - low) / targetBuckets;
180-
// reset so this function is idempotent
190+
191+
int prevRoundingIdx = roundingIdx;
181192
roundingIdx = 0;
182193
while (roundingIdx < roundingInfos.length - 1) {
183194
final RoundingInfo curRoundingInfo = roundingInfos[roundingIdx];
@@ -190,7 +201,11 @@ protected Rounding getRounding(final long low, final long high) {
190201
roundingIdx++;
191202
}
192203

193-
preparedRounding = prepareRounding(roundingIdx);
204+
// Ensure preparedRounding never shrinks
205+
if (roundingIdx > prevRoundingIdx) {
206+
preparedRounding = prepareRounding(roundingIdx);
207+
}
208+
194209
return roundingInfos[roundingIdx].rounding;
195210
}
196211

@@ -403,12 +418,39 @@ private void collectValue(int doc, long rounded) throws IOException {
403418
increaseRoundingIfNeeded(rounded);
404419
}
405420

421+
/**
422+
* Examine our current bucket count and the most recently added bucket to determine if an update to
423+
* preparedRounding is required to keep total bucket count in compliance with targetBuckets.
424+
*
425+
* @param rounded the most recently collected value rounded
426+
*/
406427
private void increaseRoundingIfNeeded(long rounded) {
428+
// If we are already using the rounding with the largest interval nothing can be done
407429
if (roundingIdx >= roundingInfos.length - 1) {
408430
return;
409431
}
432+
433+
// Re calculate the max and min values we expect to bucket according to most recently rounded val
410434
min = Math.min(min, rounded);
411435
max = Math.max(max, rounded);
436+
437+
/**
438+
* Quick explanation of the two below conditions:
439+
*
440+
* 1. [targetBuckets * roundingInfos[roundingIdx].getMaximumInnerInterval()]
441+
* Represents the total bucket count possible before we will exceed targetBuckets
442+
* even if we use the maximum inner interval of our current rounding. For example, consider the
443+
* DAYS_OF_MONTH rounding where the maximum inner interval is 7 days (i.e. 1 week buckets).
444+
* targetBuckets * roundingInfos[roundingIdx].getMaximumInnerInterval() would then be the number of
445+
* 1 day buckets possible such that if we re-bucket to 1 week buckets we will have more 1 week buckets
446+
* than our targetBuckets limit. If the current count of buckets exceeds this limit we must update
447+
* our rounding.
448+
*
449+
* 2. [targetBuckets * roundingInfos[roundingIdx].getMaximumRoughEstimateDurationMillis()]
450+
* The total duration of ms covered by our current rounding. In the case of MINUTES_OF_HOUR rounding
451+
* getMaximumRoughEstimateDurationMillis is 60000. If our current total range in millis (max - min)
452+
* exceeds this range we must update our rounding.
453+
*/
412454
if (bucketOrds.size() <= targetBuckets * roundingInfos[roundingIdx].getMaximumInnerInterval()
413455
&& max - min <= targetBuckets * roundingInfos[roundingIdx].getMaximumRoughEstimateDurationMillis()) {
414456
return;

server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg
144144
DateHistogramAggregatorBridge bridge = new DateHistogramAggregatorBridge() {
145145
@Override
146146
protected boolean canOptimize() {
147-
return canOptimize(valuesSourceConfig);
147+
return canOptimize(valuesSourceConfig, rounding);
148148
}
149149

150150
@Override

server/src/test/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregatorTests.java

+55
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@
3838
import org.apache.lucene.document.SortedSetDocValuesField;
3939
import org.apache.lucene.index.DirectoryReader;
4040
import org.apache.lucene.index.IndexReader;
41+
import org.apache.lucene.index.IndexWriterConfig;
4142
import org.apache.lucene.index.IndexableField;
43+
import org.apache.lucene.index.NoMergePolicy;
4244
import org.apache.lucene.search.IndexSearcher;
4345
import org.apache.lucene.search.MatchAllDocsQuery;
4446
import org.apache.lucene.search.MatchNoDocsQuery;
@@ -72,6 +74,7 @@
7274
import java.time.Instant;
7375
import java.time.LocalDate;
7476
import java.time.YearMonth;
77+
import java.time.ZoneId;
7578
import java.time.ZoneOffset;
7679
import java.time.ZonedDateTime;
7780
import java.util.ArrayList;
@@ -912,6 +915,58 @@ public void testWithPipelineReductions() throws IOException {
912915
);
913916
}
914917

918+
// Bugfix: https://github.com/opensearch-project/OpenSearch/issues/16932
919+
public void testFilterRewriteWithTZRoundingRangeAssert() throws IOException {
920+
/*
921+
multiBucketIndexData must overlap with DST to produce a 'LinkedListLookup' prepared rounding.
922+
This lookup rounding style maintains a strict max/min input range and will assert each value is in range.
923+
*/
924+
final List<ZonedDateTime> multiBucketIndexData = Arrays.asList(
925+
ZonedDateTime.of(2023, 10, 10, 0, 0, 0, 0, ZoneOffset.UTC),
926+
ZonedDateTime.of(2023, 11, 11, 0, 0, 0, 0, ZoneOffset.UTC)
927+
);
928+
929+
final List<ZonedDateTime> singleBucketIndexData = Arrays.asList(ZonedDateTime.of(2023, 12, 27, 0, 0, 0, 0, ZoneOffset.UTC));
930+
931+
try (Directory directory = newDirectory()) {
932+
/*
933+
Ensure we produce two segments on one shard such that the documents in seg 1 will be out of range of the
934+
prepared rounding produced by the filter rewrite optimization considering seg 2 for optimized path.
935+
*/
936+
IndexWriterConfig c = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
937+
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, c)) {
938+
indexSampleData(multiBucketIndexData, indexWriter);
939+
indexWriter.flush();
940+
indexSampleData(singleBucketIndexData, indexWriter);
941+
}
942+
943+
try (IndexReader indexReader = DirectoryReader.open(directory)) {
944+
final IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
945+
946+
// Force agg to update rounding when it begins collecting from the second segment.
947+
final AutoDateHistogramAggregationBuilder aggregationBuilder = new AutoDateHistogramAggregationBuilder("_name");
948+
aggregationBuilder.setNumBuckets(3).field(DATE_FIELD).timeZone(ZoneId.of("America/New_York"));
949+
950+
Map<String, Integer> expectedDocCount = new TreeMap<>();
951+
expectedDocCount.put("2023-10-01T00:00:00.000-04:00", 1);
952+
expectedDocCount.put("2023-11-01T00:00:00.000-04:00", 1);
953+
expectedDocCount.put("2023-12-01T00:00:00.000-05:00", 1);
954+
955+
final InternalAutoDateHistogram histogram = searchAndReduce(
956+
indexSearcher,
957+
DEFAULT_QUERY,
958+
aggregationBuilder,
959+
false,
960+
new DateFieldMapper.DateFieldType(aggregationBuilder.field()),
961+
new NumberFieldMapper.NumberFieldType(INSTANT_FIELD, NumberFieldMapper.NumberType.LONG),
962+
new NumberFieldMapper.NumberFieldType(NUMERIC_FIELD, NumberFieldMapper.NumberType.LONG)
963+
);
964+
965+
assertThat(bucketCountsAsMap(histogram), equalTo(expectedDocCount));
966+
}
967+
}
968+
}
969+
915970
@Override
916971
protected IndexSettings createIndexSettings() {
917972
final Settings nodeSettings = Settings.builder().put("search.max_buckets", 25000).build();

test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java

+17-5
Original file line numberDiff line numberDiff line change
@@ -607,9 +607,19 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
607607
IndexSearcher searcher,
608608
Query query,
609609
AggregationBuilder builder,
610+
boolean shardFanOut,
610611
MappedFieldType... fieldTypes
611612
) throws IOException {
612-
return searchAndReduce(createIndexSettings(), searcher, query, builder, DEFAULT_MAX_BUCKETS, fieldTypes);
613+
return searchAndReduce(createIndexSettings(), searcher, query, builder, DEFAULT_MAX_BUCKETS, shardFanOut, fieldTypes);
614+
}
615+
616+
protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
617+
IndexSearcher searcher,
618+
Query query,
619+
AggregationBuilder builder,
620+
MappedFieldType... fieldTypes
621+
) throws IOException {
622+
return searchAndReduce(createIndexSettings(), searcher, query, builder, DEFAULT_MAX_BUCKETS, randomBoolean(), fieldTypes);
613623
}
614624

615625
protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
@@ -619,7 +629,7 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
619629
AggregationBuilder builder,
620630
MappedFieldType... fieldTypes
621631
) throws IOException {
622-
return searchAndReduce(indexSettings, searcher, query, builder, DEFAULT_MAX_BUCKETS, fieldTypes);
632+
return searchAndReduce(indexSettings, searcher, query, builder, DEFAULT_MAX_BUCKETS, randomBoolean(), fieldTypes);
623633
}
624634

625635
protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
@@ -629,7 +639,7 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
629639
int maxBucket,
630640
MappedFieldType... fieldTypes
631641
) throws IOException {
632-
return searchAndReduce(createIndexSettings(), searcher, query, builder, maxBucket, fieldTypes);
642+
return searchAndReduce(createIndexSettings(), searcher, query, builder, maxBucket, randomBoolean(), fieldTypes);
633643
}
634644

635645
protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
@@ -638,9 +648,10 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
638648
Query query,
639649
AggregationBuilder builder,
640650
int maxBucket,
651+
boolean shardFanOut,
641652
MappedFieldType... fieldTypes
642653
) throws IOException {
643-
return searchAndReduce(indexSettings, searcher, query, builder, maxBucket, false, fieldTypes);
654+
return searchAndReduce(indexSettings, searcher, query, builder, maxBucket, false, shardFanOut, fieldTypes);
644655
}
645656

646657
/**
@@ -658,6 +669,7 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
658669
AggregationBuilder builder,
659670
int maxBucket,
660671
boolean hasNested,
672+
boolean shardFanOut,
661673
MappedFieldType... fieldTypes
662674
) throws IOException {
663675
final IndexReaderContext ctx = searcher.getTopReaderContext();
@@ -673,7 +685,7 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
673685
);
674686
C root = createAggregator(query, builder, searcher, bucketConsumer, fieldTypes);
675687

676-
if (randomBoolean() && searcher.getIndexReader().leaves().size() > 0) {
688+
if (shardFanOut && searcher.getIndexReader().leaves().size() > 0) {
677689
assertThat(ctx, instanceOf(CompositeReaderContext.class));
678690
final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
679691
final int size = compCTX.leaves().size();

0 commit comments

Comments
 (0)