From 71579741701db4dd07672bfb6fa5b0653c4d11a3 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 22 Nov 2022 09:58:37 +0100 Subject: [PATCH 1/2] Speedup time_series agg by caching current tsid ordinal, parent bucket ordinal and buck ordinal. This avoids needlessly adding the same parent bucket ordinal or TSIDs to `BytesKeyedBucketOrds`. Relates to #74660 --- .../bucket/timeseries/TimeSeriesAggregator.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java index 2d1e451839865..340fcb568a53f 100644 --- a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java +++ b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java @@ -92,8 +92,18 @@ protected void doClose() { protected LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCtx, LeafBucketCollector sub) throws IOException { return new LeafBucketCollectorBase(sub, null) { + // This helps significantly reducing time spent attempting to add bucket + tsid combos that already were added. + long currentTsidOrd = -1; + long currentBucket = -1; + long currentBucketOrdinal; + @Override public void collect(int doc, long bucket) throws IOException { + if (currentBucket == bucket && currentTsidOrd == aggCtx.getTsidOrd()) { + collectExistingBucket(sub, doc, currentBucketOrdinal); + return; + } + long bucketOrdinal = bucketOrds.add(bucket, aggCtx.getTsid()); if (bucketOrdinal < 0) { // already seen bucketOrdinal = -1 - bucketOrdinal; @@ -101,6 +111,10 @@ public void collect(int doc, long bucket) throws IOException { } else { collectBucket(sub, doc, bucketOrdinal); } + + currentBucketOrdinal = bucketOrdinal; + currentTsidOrd = aggCtx.getTsidOrd(); + currentBucket = bucket; } }; } From f23de4976776c7f7de2b481f59b8f7170ee6630f Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 23 Nov 2022 13:44:37 +0100 Subject: [PATCH 2/2] added comment --- .../bucket/timeseries/TimeSeriesAggregator.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java index 340fcb568a53f..d30a825264d01 100644 --- a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java +++ b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java @@ -92,13 +92,18 @@ protected void doClose() { protected LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCtx, LeafBucketCollector sub) throws IOException { return new LeafBucketCollectorBase(sub, null) { - // This helps significantly reducing time spent attempting to add bucket + tsid combos that already were added. + // Keeping track of these fields helps to reduce time spent attempting to add bucket + tsid combos that already were added. long currentTsidOrd = -1; long currentBucket = -1; long currentBucketOrdinal; @Override public void collect(int doc, long bucket) throws IOException { + // Naively comparing bucket against currentBucket and tsid ord to currentBucket can work really well. + // TimeSeriesIndexSearcher ensures that docs are emitted in tsid and timestamp order, so if tsid ordinal + // changes to what is stored in currentTsidOrd then that ordinal well never occur again. Same applies + // currentBucket if there is no parent aggregation or the immediate parent aggregation creates buckets + // based on @timestamp field or dimension fields (fields that make up the tsid). if (currentBucket == bucket && currentTsidOrd == aggCtx.getTsidOrd()) { collectExistingBucket(sub, doc, currentBucketOrdinal); return;