Skip to content

Commit e3a6cca

Browse files
asimmahmood1maitreya2954cwperks
authored
Introduce execution_hint for Cardinality aggregation (#17312)
--------- Signed-off-by: Siddharth Rayabharam <[email protected]> Signed-off-by: Asim Mahmood <[email protected]> Signed-off-by: Asim M <[email protected]> Co-authored-by: Siddharth Rayabharam <[email protected]> Co-authored-by: Craig Perkins <[email protected]>
1 parent 43e589a commit e3a6cca

File tree

7 files changed

+222
-22
lines changed

7 files changed

+222
-22
lines changed

CHANGELOG-3.0.md

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1616
- Views, simplify data access and manipulation by providing a virtual layer over one or more indices ([#11957](https://github.com/opensearch-project/OpenSearch/pull/11957))
1717
- Added pull-based Ingestion (APIs, for ingestion source, a Kafka plugin, and IngestionEngine that pulls data from the ingestion source) ([#16958](https://github.com/opensearch-project/OpenSearch/pull/16958))
1818
- Added ConfigurationUtils to core for the ease of configuration parsing [#17223](https://github.com/opensearch-project/OpenSearch/pull/17223)
19+
- Add execution_hint to cardinality aggregator request (#[17312](https://github.com/opensearch-project/OpenSearch/pull/17312))
1920

2021
### Dependencies
2122
- Update Apache Lucene to 10.1.0 ([#16366](https://github.com/opensearch-project/OpenSearch/pull/16366))

server/src/main/java/org/opensearch/search/aggregations/metrics/CardinalityAggregationBuilder.java

+29-10
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ public final class CardinalityAggregationBuilder extends ValuesSourceAggregation
6868

6969
private static final ParseField REHASH = new ParseField("rehash").withAllDeprecated("no replacement - values will always be rehashed");
7070
public static final ParseField PRECISION_THRESHOLD_FIELD = new ParseField("precision_threshold");
71+
public static final ParseField EXECUTION_HINT_FIELD = new ParseField("execution_hint");
7172

7273
public static final ObjectParser<CardinalityAggregationBuilder, String> PARSER = ObjectParser.fromBuilder(
7374
NAME,
@@ -76,6 +77,7 @@ public final class CardinalityAggregationBuilder extends ValuesSourceAggregation
7677
static {
7778
ValuesSourceAggregationBuilder.declareFields(PARSER, true, false, false);
7879
PARSER.declareLong(CardinalityAggregationBuilder::precisionThreshold, CardinalityAggregationBuilder.PRECISION_THRESHOLD_FIELD);
80+
PARSER.declareString(CardinalityAggregationBuilder::executionHint, CardinalityAggregationBuilder.EXECUTION_HINT_FIELD);
7981
PARSER.declareLong((b, v) -> {/*ignore*/}, REHASH);
8082
}
8183

@@ -85,6 +87,8 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) {
8587

8688
private Long precisionThreshold = null;
8789

90+
private String executionHint = null;
91+
8892
public CardinalityAggregationBuilder(String name) {
8993
super(name);
9094
}
@@ -96,6 +100,7 @@ public CardinalityAggregationBuilder(
96100
) {
97101
super(clone, factoriesBuilder, metadata);
98102
this.precisionThreshold = clone.precisionThreshold;
103+
this.executionHint = clone.executionHint;
99104
}
100105

101106
@Override
@@ -111,6 +116,9 @@ public CardinalityAggregationBuilder(StreamInput in) throws IOException {
111116
if (in.readBoolean()) {
112117
precisionThreshold = in.readLong();
113118
}
119+
if (in.getVersion().onOrAfter(Version.V_3_0_0)) {
120+
executionHint = in.readOptionalString();
121+
}
114122
}
115123

116124
@Override
@@ -125,6 +133,9 @@ protected void innerWriteTo(StreamOutput out) throws IOException {
125133
if (hasPrecisionThreshold) {
126134
out.writeLong(precisionThreshold);
127135
}
136+
if (out.getVersion().onOrAfter(Version.V_3_0_0)) {
137+
out.writeOptionalString(executionHint);
138+
}
128139
}
129140

130141
@Override
@@ -146,13 +157,9 @@ public CardinalityAggregationBuilder precisionThreshold(long precisionThreshold)
146157
return this;
147158
}
148159

149-
/**
150-
* Get the precision threshold. Higher values improve accuracy but also
151-
* increase memory usage. Will return <code>null</code> if the
152-
* precisionThreshold has not been set yet.
153-
*/
154-
public Long precisionThreshold() {
155-
return precisionThreshold;
160+
public CardinalityAggregationBuilder executionHint(String executionHint) {
161+
this.executionHint = executionHint;
162+
return this;
156163
}
157164

158165
@Override
@@ -162,20 +169,32 @@ protected CardinalityAggregatorFactory innerBuild(
162169
AggregatorFactory parent,
163170
AggregatorFactories.Builder subFactoriesBuilder
164171
) throws IOException {
165-
return new CardinalityAggregatorFactory(name, config, precisionThreshold, queryShardContext, parent, subFactoriesBuilder, metadata);
172+
return new CardinalityAggregatorFactory(
173+
name,
174+
config,
175+
precisionThreshold,
176+
queryShardContext,
177+
parent,
178+
subFactoriesBuilder,
179+
metadata,
180+
executionHint
181+
);
166182
}
167183

168184
@Override
169185
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
170186
if (precisionThreshold != null) {
171187
builder.field(PRECISION_THRESHOLD_FIELD.getPreferredName(), precisionThreshold);
172188
}
189+
if (executionHint != null) {
190+
builder.field(EXECUTION_HINT_FIELD.getPreferredName(), executionHint);
191+
}
173192
return builder;
174193
}
175194

176195
@Override
177196
public int hashCode() {
178-
return Objects.hash(super.hashCode(), precisionThreshold);
197+
return Objects.hash(super.hashCode(), precisionThreshold, executionHint);
179198
}
180199

181200
@Override
@@ -184,7 +203,7 @@ public boolean equals(Object obj) {
184203
if (obj == null || getClass() != obj.getClass()) return false;
185204
if (super.equals(obj) == false) return false;
186205
CardinalityAggregationBuilder other = (CardinalityAggregationBuilder) obj;
187-
return Objects.equals(precisionThreshold, other.precisionThreshold);
206+
return Objects.equals(precisionThreshold, other.precisionThreshold) && Objects.equals(executionHint, other.executionHint);
188207
}
189208

190209
@Override

server/src/main/java/org/opensearch/search/aggregations/metrics/CardinalityAggregator.java

+13-7
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ public class CardinalityAggregator extends NumericMetricsAggregator.SingleValue
8989

9090
private static final Logger logger = LogManager.getLogger(CardinalityAggregator.class);
9191

92+
private final CardinalityAggregatorFactory.ExecutionMode executionMode;
9293
private final int precision;
9394
private final ValuesSource valuesSource;
9495

@@ -113,14 +114,16 @@ public CardinalityAggregator(
113114
int precision,
114115
SearchContext context,
115116
Aggregator parent,
116-
Map<String, Object> metadata
117+
Map<String, Object> metadata,
118+
CardinalityAggregatorFactory.ExecutionMode executionMode
117119
) throws IOException {
118120
super(name, context, parent, metadata);
119121
// TODO: Stop using nulls here
120122
this.valuesSource = valuesSourceConfig.hasValues() ? valuesSourceConfig.getValuesSource() : null;
121123
this.precision = precision;
122124
this.counts = valuesSource == null ? null : new HyperLogLogPlusPlus(precision, context.bigArrays(), 1);
123125
this.valuesSourceConfig = valuesSourceConfig;
126+
this.executionMode = executionMode;
124127
}
125128

126129
@Override
@@ -144,14 +147,17 @@ private Collector pickCollector(LeafReaderContext ctx) throws IOException {
144147
}
145148

146149
Collector collector = null;
147-
if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals) {
148-
ValuesSource.Bytes.WithOrdinals source = (ValuesSource.Bytes.WithOrdinals) valuesSource;
150+
if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals source) {
149151
final SortedSetDocValues ordinalValues = source.ordinalsValues(ctx);
150152
final long maxOrd = ordinalValues.getValueCount();
151153
if (maxOrd == 0) {
152154
emptyCollectorsUsed++;
153155
return new EmptyCollector();
154-
} else {
156+
} else if (executionMode == CardinalityAggregatorFactory.ExecutionMode.ORDINALS) { // Force OrdinalsCollector
157+
ordinalsCollectorsUsed++;
158+
collector = new OrdinalsCollector(counts, ordinalValues, context.bigArrays());
159+
} else if (executionMode == null) {
160+
// no hint provided, fall back to heuristics
155161
final long ordinalsMemoryUsage = OrdinalsCollector.memoryOverhead(maxOrd);
156162
final long countsMemoryUsage = HyperLogLogPlusPlus.memoryUsage(precision);
157163
// only use ordinals if they don't increase memory usage by more than 25%
@@ -164,7 +170,7 @@ private Collector pickCollector(LeafReaderContext ctx) throws IOException {
164170
}
165171
}
166172

167-
if (collector == null) { // not able to build an OrdinalsCollector
173+
if (collector == null) { // not able to build an OrdinalsCollector, or hint is direct
168174
stringHashingCollectorsUsed++;
169175
collector = new DirectCollector(counts, MurmurHash3Values.hash(valuesSource.bytesValues(ctx)));
170176
}
@@ -480,7 +486,7 @@ public void close() {
480486
*
481487
* @opensearch.internal
482488
*/
483-
private static class DirectCollector extends Collector {
489+
static class DirectCollector extends Collector {
484490

485491
private final MurmurHash3Values hashes;
486492
private final HyperLogLogPlusPlus counts;
@@ -517,7 +523,7 @@ public void close() {
517523
*
518524
* @opensearch.internal
519525
*/
520-
private static class OrdinalsCollector extends Collector {
526+
static class OrdinalsCollector extends Collector {
521527

522528
private static final long SHALLOW_FIXEDBITSET_SIZE = RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class);
523529

server/src/main/java/org/opensearch/search/aggregations/metrics/CardinalityAggregatorFactory.java

+33-3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import org.opensearch.search.internal.SearchContext;
4545

4646
import java.io.IOException;
47+
import java.util.Locale;
4748
import java.util.Map;
4849

4950
/**
@@ -53,6 +54,33 @@
5354
*/
5455
class CardinalityAggregatorFactory extends ValuesSourceAggregatorFactory {
5556

57+
/**
58+
* Execution mode for cardinality agg
59+
*
60+
* @opensearch.internal
61+
*/
62+
public enum ExecutionMode {
63+
DIRECT,
64+
ORDINALS;
65+
66+
ExecutionMode() {}
67+
68+
public static ExecutionMode fromString(String value) {
69+
try {
70+
return ExecutionMode.valueOf(value.toUpperCase(Locale.ROOT));
71+
} catch (IllegalArgumentException e) {
72+
throw new IllegalArgumentException("Unknown execution_hint: [" + value + "], expected any of [direct, ordinals]");
73+
}
74+
}
75+
76+
@Override
77+
public String toString() {
78+
return this.name().toLowerCase(Locale.ROOT);
79+
}
80+
}
81+
82+
private final ExecutionMode executionMode;
83+
5684
private final Long precisionThreshold;
5785

5886
CardinalityAggregatorFactory(
@@ -62,10 +90,12 @@ class CardinalityAggregatorFactory extends ValuesSourceAggregatorFactory {
6290
QueryShardContext queryShardContext,
6391
AggregatorFactory parent,
6492
AggregatorFactories.Builder subFactoriesBuilder,
65-
Map<String, Object> metadata
93+
Map<String, Object> metadata,
94+
String executionHint
6695
) throws IOException {
6796
super(name, config, queryShardContext, parent, subFactoriesBuilder, metadata);
6897
this.precisionThreshold = precisionThreshold;
98+
this.executionMode = executionHint == null ? null : ExecutionMode.fromString(executionHint);
6999
}
70100

71101
public static void registerAggregators(ValuesSourceRegistry.Builder builder) {
@@ -74,7 +104,7 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) {
74104

75105
@Override
76106
protected Aggregator createUnmapped(SearchContext searchContext, Aggregator parent, Map<String, Object> metadata) throws IOException {
77-
return new CardinalityAggregator(name, config, precision(), searchContext, parent, metadata);
107+
return new CardinalityAggregator(name, config, precision(), searchContext, parent, metadata, executionMode);
78108
}
79109

80110
@Override
@@ -86,7 +116,7 @@ protected Aggregator doCreateInternal(
86116
) throws IOException {
87117
return queryShardContext.getValuesSourceRegistry()
88118
.getAggregator(CardinalityAggregationBuilder.REGISTRY_KEY, config)
89-
.build(name, config, precision(), searchContext, parent, metadata);
119+
.build(name, config, precision(), searchContext, parent, metadata, executionMode);
90120
}
91121

92122
@Override

server/src/main/java/org/opensearch/search/aggregations/metrics/CardinalityAggregatorSupplier.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ Aggregator build(
5151
int precision,
5252
SearchContext context,
5353
Aggregator parent,
54-
Map<String, Object> metadata
54+
Map<String, Object> metadata,
55+
CardinalityAggregatorFactory.ExecutionMode executionMode
5556
) throws IOException;
5657
}

0 commit comments

Comments
 (0)