Skip to content

Commit 748a715

Browse files
alambcrepererum
authored andcommitted
(Re)Support old syntax for approx_percentile_cont and approx_percentile_cont_with_weight (apache#16999)
* Add sqllogictests * Allow both new and old sytanx for approx_percentile_cont and approx_percentile_cont_with_weight * Update docs * Add documentation and more tests
1 parent 9a8d14a commit 748a715

File tree

5 files changed

+99
-7
lines changed

5 files changed

+99
-7
lines changed

datafusion/functions-aggregate/src/approx_percentile_cont.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,24 @@ pub fn approx_percentile_cont(
9191
+-----------------------------------------------------------------------+
9292
| 65.0 |
9393
+-----------------------------------------------------------------------+
94-
```"#,
94+
```
95+
An alternate syntax is also supported:
96+
```sql
97+
> SELECT approx_percentile_cont(column_name, 0.75) FROM table_name;
98+
+-----------------------------------------------+
99+
| approx_percentile_cont(column_name, 0.75) |
100+
+-----------------------------------------------+
101+
| 65.0 |
102+
+-----------------------------------------------+
103+
104+
> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
105+
+----------------------------------------------------------+
106+
| approx_percentile_cont(column_name, 0.75, 100) |
107+
+----------------------------------------------------------+
108+
| 65.0 |
109+
+----------------------------------------------------------+
110+
```
111+
"#,
95112
standard_argument(name = "expression",),
96113
argument(
97114
name = "percentile",

datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,16 @@ pub fn approx_percentile_cont_with_weight(
8585
+--------------------------------------------------------------------------------------------------+
8686
| 78.5 |
8787
+--------------------------------------------------------------------------------------------------+
88+
```
89+
An alternative syntax is also supported:
90+
91+
```sql
92+
> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name;
93+
+--------------------------------------------------+
94+
| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
95+
+--------------------------------------------------+
96+
| 78.5 |
97+
+--------------------------------------------------+
8898
```"#,
8999
standard_argument(name = "expression", prefix = "The"),
90100
argument(

datafusion/sql/src/expr/function.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -380,10 +380,6 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
380380
} else {
381381
// User defined aggregate functions (UDAF) have precedence in case it has the same name as a scalar built-in function
382382
if let Some(fm) = self.context_provider.get_aggregate_meta(&name) {
383-
if fm.is_ordered_set_aggregate() && within_group.is_empty() {
384-
return plan_err!("WITHIN GROUP clause is required when calling ordered set aggregate function({})", fm.name());
385-
}
386-
387383
if null_treatment.is_some() && !fm.supports_null_handling_clause() {
388384
return plan_err!(
389385
"[IGNORE | RESPECT] NULLS are not permitted for {}",
@@ -403,7 +399,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
403399
None,
404400
)?;
405401

406-
// add target column expression in within group clause to function arguments
402+
// Add the WITHIN GROUP ordering expressions to the front of the argument list
403+
// So function(arg) WITHIN GROUP (ORDER BY x) becomes function(x, arg)
407404
if !within_group.is_empty() {
408405
args = within_group
409406
.iter()

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1287,7 +1287,7 @@ SELECT approx_distinct(c9) AS a, approx_distinct(c9) AS b FROM aggregate_test_10
12871287
## Column `c12` is omitted due to a large relative error (~10%) due to the small
12881288
## float values.
12891289

1290-
#csv_query_approx_percentile_cont (c2)
1290+
# csv_query_approx_percentile_cont (c2)
12911291
query B
12921292
SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c2) AS DOUBLE) / 1.0) < 0.05) AS q FROM aggregate_test_100
12931293
----
@@ -1303,6 +1303,23 @@ SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c2) AS D
13031303
----
13041304
true
13051305

1306+
1307+
# csv_query_approx_percentile_cont (c2, alternate syntax, should be the same as above)
1308+
query B
1309+
SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.1) AS DOUBLE) / 1.0) < 0.05) AS q FROM aggregate_test_100
1310+
----
1311+
true
1312+
1313+
query B
1314+
SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.5) AS DOUBLE) / 3.0) < 0.05) AS q FROM aggregate_test_100
1315+
----
1316+
true
1317+
1318+
query B
1319+
SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.9) AS DOUBLE) / 5.0) < 0.05) AS q FROM aggregate_test_100
1320+
----
1321+
true
1322+
13061323
# csv_query_approx_percentile_cont (c3)
13071324
query B
13081325
SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c3) AS DOUBLE) / -95.3) < 0.05) AS q FROM aggregate_test_100
@@ -1743,6 +1760,17 @@ c 122
17431760
d 124
17441761
e 115
17451762

1763+
1764+
# csv_query_approx_percentile_cont_with_weight (should be the same as above)
1765+
query TI
1766+
SELECT c1, approx_percentile_cont(c3, 0.95) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
1767+
----
1768+
a 73
1769+
b 68
1770+
c 122
1771+
d 124
1772+
e 115
1773+
17461774
query TI
17471775
SELECT c1, approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY c3 DESC) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
17481776
----
@@ -1762,6 +1790,17 @@ c 122
17621790
d 124
17631791
e 115
17641792

1793+
# csv_query_approx_percentile_cont_with_weight alternate syntax
1794+
query TI
1795+
SELECT c1, approx_percentile_cont_with_weight(c3, 1, 0.95) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
1796+
----
1797+
a 73
1798+
b 68
1799+
c 122
1800+
d 124
1801+
e 115
1802+
1803+
17651804
query TI
17661805
SELECT c1, approx_percentile_cont_with_weight(1, 0.95) WITHIN GROUP (ORDER BY c3 DESC) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
17671806
----

docs/source/user-guide/sql/aggregate_functions.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,24 @@ approx_percentile_cont(percentile [, centroids]) WITHIN GROUP (ORDER BY expressi
860860
+-----------------------------------------------------------------------+
861861
```
862862

863+
An alternate syntax is also supported:
864+
865+
```sql
866+
> SELECT approx_percentile_cont(column_name, 0.75) FROM table_name;
867+
+-----------------------------------------------+
868+
| approx_percentile_cont(column_name, 0.75) |
869+
+-----------------------------------------------+
870+
| 65.0 |
871+
+-----------------------------------------------+
872+
873+
> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
874+
+----------------------------------------------------------+
875+
| approx_percentile_cont(column_name, 0.75, 100) |
876+
+----------------------------------------------------------+
877+
| 65.0 |
878+
+----------------------------------------------------------+
879+
```
880+
863881
### `approx_percentile_cont_with_weight`
864882

865883
Returns the weighted approximate percentile of input values using the t-digest algorithm.
@@ -891,3 +909,14 @@ approx_percentile_cont_with_weight(weight, percentile [, centroids]) WITHIN GROU
891909
| 78.5 |
892910
+--------------------------------------------------------------------------------------------------+
893911
```
912+
913+
An alternative syntax is also supported:
914+
915+
```sql
916+
> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name;
917+
+--------------------------------------------------+
918+
| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
919+
+--------------------------------------------------+
920+
| 78.5 |
921+
+--------------------------------------------------+
922+
```

0 commit comments

Comments
 (0)