Skip to content

Commit a5f7214

Browse files
committed
doc tests + initial unit tests
Signed-off-by: Kenrick Yap <[email protected]>
1 parent 77f1f6e commit a5f7214

File tree

16 files changed

+244
-139
lines changed

16 files changed

+244
-139
lines changed

core/src/main/java/org/opensearch/sql/analysis/Analyzer.java

+25-11
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
import java.util.stream.Collectors;
3535
import org.apache.commons.lang3.tuple.ImmutablePair;
3636
import org.apache.commons.lang3.tuple.Pair;
37-
import org.apache.commons.math3.analysis.function.Exp;
3837
import org.opensearch.sql.DataSourceSchemaName;
3938
import org.opensearch.sql.analysis.symbol.Namespace;
4039
import org.opensearch.sql.analysis.symbol.Symbol;
@@ -84,9 +83,7 @@
8483
import org.opensearch.sql.expression.LiteralExpression;
8584
import org.opensearch.sql.expression.NamedExpression;
8685
import org.opensearch.sql.expression.ReferenceExpression;
87-
import org.opensearch.sql.expression.aggregation.AggregationState;
8886
import org.opensearch.sql.expression.aggregation.Aggregator;
89-
import org.opensearch.sql.expression.aggregation.AvgAggregator;
9087
import org.opensearch.sql.expression.aggregation.NamedAggregator;
9188
import org.opensearch.sql.expression.function.BuiltinFunctionName;
9289
import org.opensearch.sql.expression.function.BuiltinFunctionRepository;
@@ -350,26 +347,42 @@ public LogicalPlan visitFieldSummary(FieldSummary node, AnalysisContext context)
350347
TypeEnvironment env = context.peek();
351348
Map<String, ExprType> fieldsMap = env.lookupAllFields(Namespace.FIELD_NAME);
352349

350+
if (node.getIncludeFields() != null) {
351+
List<String> includeFields =
352+
node.getIncludeFields().stream()
353+
.map(expr -> ((Field) expr).getField().toString())
354+
.toList();
355+
356+
Map<String, ExprType> filteredFields = new HashMap<>();
357+
for (String field : includeFields) {
358+
if (fieldsMap.containsKey(field)) {
359+
filteredFields.put(field, fieldsMap.get(field));
360+
}
361+
}
362+
fieldsMap = filteredFields;
363+
}
364+
353365
ImmutableList.Builder<NamedAggregator> aggregatorBuilder = new ImmutableList.Builder<>();
354-
Map<String, String> aggregatorToFieldNameMap = new HashMap<String, String>();
366+
Map<String, Map.Entry<String, ExprType>> aggregatorToFieldNameMap = new HashMap<>();
355367

356368
for (Map.Entry<String, ExprType> entry : fieldsMap.entrySet()) {
357369
ExprType fieldType = entry.getValue();
358370
String fieldName = entry.getKey();
359371
ReferenceExpression fieldExpression = DSL.ref(fieldName, fieldType);
360372

361373
aggregatorBuilder.add(new NamedAggregator("Count" + fieldName, DSL.count(fieldExpression)));
362-
aggregatorToFieldNameMap.put("Count" + fieldName, fieldName);
363-
aggregatorBuilder.add(new NamedAggregator("Distinct" + fieldName, DSL.distinctCount(fieldExpression)));
364-
aggregatorToFieldNameMap.put("Distinct" + fieldName, fieldName);
374+
aggregatorToFieldNameMap.put("Count" + fieldName, entry);
375+
aggregatorBuilder.add(
376+
new NamedAggregator("Distinct" + fieldName, DSL.distinctCount(fieldExpression)));
377+
aggregatorToFieldNameMap.put("Distinct" + fieldName, entry);
365378

366379
if (ExprCoreType.numberTypes().contains(fieldType)) {
367-
aggregatorBuilder.add(new NamedAggregator("Avg" + fieldName, DSL.avg(fieldExpression)));
368-
aggregatorToFieldNameMap.put("Avg" + fieldName, fieldName);
369380
aggregatorBuilder.add(new NamedAggregator("Max" + fieldName, DSL.max(fieldExpression)));
370-
aggregatorToFieldNameMap.put("Max" + fieldName, fieldName);
381+
aggregatorToFieldNameMap.put("Max" + fieldName, entry);
371382
aggregatorBuilder.add(new NamedAggregator("Min" + fieldName, DSL.min(fieldExpression)));
372-
aggregatorToFieldNameMap.put("Min" + fieldName, fieldName);
383+
aggregatorToFieldNameMap.put("Min" + fieldName, entry);
384+
aggregatorBuilder.add(new NamedAggregator("Avg" + fieldName, DSL.avg(fieldExpression)));
385+
aggregatorToFieldNameMap.put("Avg" + fieldName, entry);
373386
}
374387
}
375388

@@ -386,6 +399,7 @@ public LogicalPlan visitFieldSummary(FieldSummary node, AnalysisContext context)
386399
newEnv.define(new Symbol(Namespace.FIELD_NAME, "Avg"), ExprCoreType.DOUBLE);
387400
newEnv.define(new Symbol(Namespace.FIELD_NAME, "Max"), ExprCoreType.DOUBLE);
388401
newEnv.define(new Symbol(Namespace.FIELD_NAME, "Min"), ExprCoreType.DOUBLE);
402+
newEnv.define(new Symbol(Namespace.FIELD_NAME, "Type"), ExprCoreType.STRING);
389403

390404
return new LogicalFieldSummary(child, aggregators, groupBys, aggregatorToFieldNameMap);
391405
}

core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -509,9 +509,7 @@ public static FillNull fillNull(
509509
}
510510

511511
public static FieldSummary fieldSummary(
512-
UnresolvedPlan input,
513-
List<UnresolvedExpression> includeFields
514-
) {
512+
UnresolvedPlan input, List<UnresolvedExpression> includeFields) {
515513
return new FieldSummary(includeFields).attach(input);
516514
}
517515
}

core/src/main/java/org/opensearch/sql/ast/expression/FieldList.java

+11-12
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,28 @@
66
package org.opensearch.sql.ast.expression;
77

88
import com.google.common.collect.ImmutableList;
9+
import java.util.List;
910
import lombok.AllArgsConstructor;
1011
import lombok.EqualsAndHashCode;
1112
import lombok.Getter;
1213
import lombok.ToString;
1314
import org.opensearch.sql.ast.AbstractNodeVisitor;
1415

15-
import java.util.List;
16-
1716
/** Expression node that includes a list of fields nodes. */
1817
@Getter
1918
@ToString
2019
@EqualsAndHashCode(callSuper = false)
2120
@AllArgsConstructor
2221
public class FieldList extends UnresolvedExpression {
23-
private final List<Field> fieldList;
22+
private final List<Field> fieldList;
2423

25-
@Override
26-
public List<UnresolvedExpression> getChild() {
27-
return ImmutableList.copyOf(fieldList);
28-
}
24+
@Override
25+
public List<UnresolvedExpression> getChild() {
26+
return ImmutableList.copyOf(fieldList);
27+
}
2928

30-
@Override
31-
public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
32-
return nodeVisitor.visitFieldList(this, context);
33-
}
34-
}
29+
@Override
30+
public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
31+
return nodeVisitor.visitFieldList(this, context);
32+
}
33+
}

core/src/main/java/org/opensearch/sql/ast/tree/FieldSummary.java

+27-28
Original file line numberDiff line numberDiff line change
@@ -5,45 +5,44 @@
55

66
package org.opensearch.sql.ast.tree;
77

8+
import java.util.List;
89
import lombok.EqualsAndHashCode;
910
import lombok.Getter;
1011
import lombok.ToString;
1112
import org.opensearch.sql.ast.AbstractNodeVisitor;
1213
import org.opensearch.sql.ast.Node;
13-
import org.opensearch.sql.ast.expression.Argument;
1414
import org.opensearch.sql.ast.expression.AttributeList;
1515
import org.opensearch.sql.ast.expression.UnresolvedExpression;
1616

17-
import java.util.List;
18-
1917
@Getter
2018
@ToString
2119
@EqualsAndHashCode(callSuper = false)
2220
public class FieldSummary extends UnresolvedPlan {
23-
private List<UnresolvedExpression> includeFields;
24-
private UnresolvedPlan child;
25-
26-
public FieldSummary(List<UnresolvedExpression> collect) {
27-
collect.forEach(exp -> {
28-
if (exp instanceof AttributeList) {
29-
this.includeFields = ((AttributeList)exp).getAttrList();
30-
}
21+
private UnresolvedPlan child;
22+
private List<UnresolvedExpression> includeFields;
23+
24+
public FieldSummary(List<UnresolvedExpression> collect) {
25+
collect.forEach(
26+
exp -> {
27+
if (exp instanceof AttributeList) {
28+
this.includeFields = ((AttributeList) exp).getAttrList();
29+
}
3130
});
32-
}
33-
34-
@Override
35-
public List<? extends Node> getChild() {
36-
return child == null ? List.of() : List.of(child);
37-
}
38-
39-
@Override
40-
public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
41-
return nodeVisitor.visitFieldSummary(this, context);
42-
}
43-
44-
@Override
45-
public FieldSummary attach(UnresolvedPlan child) {
46-
this.child = child;
47-
return this;
48-
}
31+
}
32+
33+
@Override
34+
public List<? extends Node> getChild() {
35+
return child == null ? List.of() : List.of(child);
36+
}
37+
38+
@Override
39+
public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
40+
return nodeVisitor.visitFieldSummary(this, context);
41+
}
42+
43+
@Override
44+
public FieldSummary attach(UnresolvedPlan child) {
45+
this.child = child;
46+
return this;
47+
}
4948
}
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,28 @@
11
package org.opensearch.sql.planner.logical;
22

3+
import java.util.List;
4+
import java.util.Map;
35
import lombok.EqualsAndHashCode;
46
import lombok.Getter;
57
import lombok.ToString;
8+
import org.opensearch.sql.data.type.ExprType;
69
import org.opensearch.sql.expression.NamedExpression;
710
import org.opensearch.sql.expression.aggregation.NamedAggregator;
811

9-
import java.util.Collections;
10-
import java.util.HashMap;
11-
import java.util.List;
12-
import java.util.Map;
13-
1412
/** Logical Field Summary. */
1513
@Getter
1614
@ToString
1715
@EqualsAndHashCode(callSuper = true)
1816
public class LogicalFieldSummary extends LogicalAggregation {
1917

20-
Map<String, String> aggregationToFieldNameMap;
18+
Map<String, Map.Entry<String, ExprType>> aggregationToFieldNameMap;
2119

22-
public LogicalFieldSummary(
23-
LogicalPlan child, List<NamedAggregator> aggregatorList, List<NamedExpression> groupByList, Map<String, String> aggregationToFieldNameMap) {
24-
super(child, aggregatorList, groupByList);
25-
this.aggregationToFieldNameMap = aggregationToFieldNameMap;
26-
}
27-
}
20+
public LogicalFieldSummary(
21+
LogicalPlan child,
22+
List<NamedAggregator> aggregatorList,
23+
List<NamedExpression> groupByList,
24+
Map<String, Map.Entry<String, ExprType>> aggregationToFieldNameMap) {
25+
super(child, aggregatorList, groupByList);
26+
this.aggregationToFieldNameMap = aggregationToFieldNameMap;
27+
}
28+
}

core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java

+7
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
import org.opensearch.sql.ast.tree.AD;
8686
import org.opensearch.sql.ast.tree.CloseCursor;
8787
import org.opensearch.sql.ast.tree.FetchCursor;
88+
import org.opensearch.sql.ast.tree.FieldSummary;
8889
import org.opensearch.sql.ast.tree.FillNull;
8990
import org.opensearch.sql.ast.tree.Kmeans;
9091
import org.opensearch.sql.ast.tree.ML;
@@ -1871,4 +1872,10 @@ public void visit_close_cursor() {
18711872
() ->
18721873
assertEquals("pewpew", ((LogicalFetchCursor) analyzed.getChild().get(0)).getCursor()));
18731874
}
1875+
1876+
@Test
1877+
public void visit_fieldsummary() {
1878+
LogicalPlan actual = analyze(new FieldSummary(List.of()));
1879+
assertEquals()
1880+
}
18741881
}

docs/user/ppl/cmd/fields.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ fields
1111

1212
Description
1313
============
14-
| Using ``field`` command to keep or remove fields from the search result.
14+
| Use ``field`` command to keep or remove fields from the search result.
1515
1616

1717
Syntax

docs/user/ppl/cmd/fieldsummary.rst

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
=============
2+
fieldsummary
3+
=============
4+
5+
.. rubric:: Table of contents
6+
7+
.. contents::
8+
:local:
9+
:depth: 2
10+
11+
12+
Description
13+
============
14+
| Use ``fieldsummary`` command to provide summary statistics for all fields in the current result set.
15+
16+
This command will:
17+
- Calculate basic statistics for each field (count, distinct count and min, max, avg for numeric fields)
18+
- Determine the data type of each field
19+
20+
Syntax
21+
============
22+
fieldsummary [includefields="<field_name>[,<field_name>]"]
23+
24+
* includefields: optional. specify which fields to include in the summary.
25+
26+
Example 1: Perform fieldsummary without additional fields
27+
==============================================
28+
29+
PPL query::
30+
31+
os> source=nyc_taxi | fieldsummary;
32+
fetched rows / total rows = 4/4
33+
+--------------+-------+----------+--------------------+---------+--------+--------+
34+
| Field | Count | Distinct | Avg | Max | Min | Type |
35+
|--------------+-------+----------|--------------------+---------+--------+--------|
36+
| anomaly_type | 973 | 1 | | | | string |
37+
| category | 973 | 2 | | | | string |
38+
| value | 973 | 953 | 14679.110996916752 | 29985.0 | 1769.0 | float |
39+
| timestamp | 973 | 973 | | | | date |
40+
+--------------+-------+----------+--------------------+---------+--------+--------+
41+
42+
Example 2: Perform fieldsummary with includefields
43+
==============================================
44+
45+
PPL query::
46+
47+
os> source=accounts | fieldsummary includefields="category,value" ;
48+
fetched rows / total rows = 2/2
49+
+--------------+-------+----------+--------------------+---------+--------+--------+
50+
| Field | Count | Distinct | Avg | Max | Min | Type |
51+
|--------------+-------+----------|--------------------+---------+--------+--------|
52+
| category | 973 | 2 | | | | string |
53+
| value | 973 | 953 | 14679.110996916752 | 29985.0 | 1769.0 | float |
54+
+--------------+-------+----------+--------------------+---------+--------+--------+
55+
56+

opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java

+16-10
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import java.util.ArrayList;
1616
import java.util.Arrays;
1717
import java.util.Collection;
18-
import java.util.HashMap;
1918
import java.util.List;
2019
import java.util.Map;
2120
import java.util.Set;
@@ -291,15 +290,22 @@ public void pushTypeMapping(Map<String, OpenSearchDataType> typeMapping) {
291290
}
292291

293292
public void pushFieldSummaryTypeMapping() {
294-
Map<String, OpenSearchDataType> typeMapping = Map.of(
295-
"Field", OpenSearchDataType.of(OpenSearchDataType.MappingType.Text),
296-
"Count", OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer),
297-
"Distinct", OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer),
298-
"Avg", OpenSearchDataType.of(OpenSearchDataType.MappingType.Double),
299-
"Min", OpenSearchDataType.of(OpenSearchDataType.MappingType.Double),
300-
"Max", OpenSearchDataType.of(OpenSearchDataType.MappingType.Double),
301-
"Sum", OpenSearchDataType.of(OpenSearchDataType.MappingType.Double)
302-
);
293+
Map<String, OpenSearchDataType> typeMapping =
294+
Map.of(
295+
"Field",
296+
OpenSearchDataType.of(OpenSearchDataType.MappingType.Text),
297+
"Count",
298+
OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer),
299+
"Distinct",
300+
OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer),
301+
"Avg",
302+
OpenSearchDataType.of(OpenSearchDataType.MappingType.Double),
303+
"Min",
304+
OpenSearchDataType.of(OpenSearchDataType.MappingType.Double),
305+
"Max",
306+
OpenSearchDataType.of(OpenSearchDataType.MappingType.Double),
307+
"Type",
308+
OpenSearchDataType.of(OpenSearchDataType.MappingType.Text));
303309
exprValueFactory.extendTypeMapping(typeMapping);
304310
}
305311

0 commit comments

Comments
 (0)