Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions datafusion/sql/src/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
use crate::query::to_order_by_exprs_with_select;
use crate::utils::{
CheckColumnsMustReferenceAggregatePurpose, CheckColumnsSatisfyExprsPurpose,
check_columns_satisfy_exprs, extract_aliases, rebase_expr, resolve_aliases_to_exprs,
resolve_columns, resolve_positions_to_exprs, rewrite_recursive_unnests_bottom_up,
check_columns_satisfy_exprs, deduplicate_select_expr_names, extract_aliases,
rebase_expr, resolve_aliases_to_exprs, resolve_columns, resolve_positions_to_exprs,
rewrite_recursive_unnests_bottom_up,
};

use datafusion_common::error::DataFusionErrorBuilder;
Expand Down Expand Up @@ -109,6 +110,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
planner_context,
)?;

// Auto-suffix duplicate expression names (e.g. cov, cov → cov, cov:1)
// before projection so that the unique-name constraint is satisfied.
let select_exprs = deduplicate_select_expr_names(select_exprs);

// Having and group by clause may reference aliases defined in select projection
let projected_plan = self.project(base_plan.clone(), select_exprs)?;
let select_exprs = projected_plan.expressions();
Expand Down
37 changes: 37 additions & 0 deletions datafusion/sql/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use datafusion_expr::expr::{
Alias, GroupingSet, Unnest, WindowFunction, WindowFunctionParams,
};
use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs};
use datafusion_expr::select_expr::SelectExpr;
use datafusion_expr::{
ColumnUnnestList, Expr, ExprSchemable, LogicalPlan, col, expr_vec_fmt,
};
Expand Down Expand Up @@ -633,6 +634,42 @@ fn push_projection_dedupl(projection: &mut Vec<Expr>, expr: Expr) {
projection.push(expr);
}
}

/// Auto-suffix duplicate SELECT expression names with `:{count}`.
///
/// The first occurrence keeps its original name so that ORDER BY / HAVING
/// references resolve correctly. Wildcards are left untouched because they
/// are expanded later in `project_with_validation`.
///
/// Duplicates are detected by the schema name of each expression, which
/// identifies logically identical expressions before column normalization.
pub(crate) fn deduplicate_select_expr_names(
exprs: Vec<SelectExpr>,
) -> Vec<SelectExpr> {
let mut seen: HashMap<String, usize> = HashMap::new();
exprs
.into_iter()
.map(|select_expr| match select_expr {
SelectExpr::Expression(expr) => {
let name = expr.schema_name().to_string();
let count = seen.entry(name.clone()).or_insert(0);
let result = if *count > 0 {
let (_qualifier, field_name) = expr.qualified_name();
SelectExpr::Expression(
expr.alias(format!("{field_name}:{count}")),
)
} else {
SelectExpr::Expression(expr)
};
*count += 1;
result
}
// Leave wildcards alone — they are expanded later
other => other,
})
.collect()
}

/// The context is we want to rewrite unnest() into InnerProjection->Unnest->OuterProjection
/// Given an expression which contains unnest expr as one of its children,
/// Try transform depends on unnest type
Expand Down
54 changes: 34 additions & 20 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -790,11 +790,13 @@ fn select_column_does_not_exist() {
#[test]
fn select_repeated_column() {
let sql = "SELECT age, age FROM person";
let err = logical_plan(sql).expect_err("query should have failed");

let plan = logical_plan(sql).unwrap();
assert_snapshot!(
err.strip_backtrace(),
@r#"Error during planning: Projections require unique expression names but the expression "person.age" at position 0 and "person.age" at position 1 have the same name. Consider aliasing ("AS") one of them."#
plan,
@r"
Projection: person.age, person.age AS age:1
TableScan: person
"
);
}

Expand Down Expand Up @@ -1532,11 +1534,14 @@ fn select_simple_aggregate_column_does_not_exist() {
#[test]
fn select_simple_aggregate_repeated_aggregate() {
let sql = "SELECT MIN(age), MIN(age) FROM person";
let err = logical_plan(sql).expect_err("query should have failed");

let plan = logical_plan(sql).unwrap();
assert_snapshot!(
err.strip_backtrace(),
@r#"Error during planning: Projections require unique expression names but the expression "min(person.age)" at position 0 and "min(person.age)" at position 1 have the same name. Consider aliasing ("AS") one of them."#
plan,
@r"
Projection: min(person.age), min(person.age) AS min(person.age):1
Aggregate: groupBy=[[]], aggr=[[min(person.age)]]
TableScan: person
"
);
}

Expand Down Expand Up @@ -1585,11 +1590,14 @@ fn select_from_typed_string_values() {
#[test]
fn select_simple_aggregate_repeated_aggregate_with_repeated_aliases() {
let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person";
let err = logical_plan(sql).expect_err("query should have failed");

let plan = logical_plan(sql).unwrap();
assert_snapshot!(
err.strip_backtrace(),
@r#"Error during planning: Projections require unique expression names but the expression "min(person.age) AS a" at position 0 and "min(person.age) AS a" at position 1 have the same name. Consider aliasing ("AS") one of them."#
plan,
@r"
Projection: min(person.age) AS a, min(person.age) AS a AS a:1
Aggregate: groupBy=[[]], aggr=[[min(person.age)]]
TableScan: person
"
);
}

Expand Down Expand Up @@ -1626,11 +1634,14 @@ fn select_simple_aggregate_with_groupby_with_aliases() {
#[test]
fn select_simple_aggregate_with_groupby_with_aliases_repeated() {
let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state";
let err = logical_plan(sql).expect_err("query should have failed");

let plan = logical_plan(sql).unwrap();
assert_snapshot!(
err.strip_backtrace(),
@r#"Error during planning: Projections require unique expression names but the expression "person.state AS a" at position 0 and "min(person.age) AS a" at position 1 have the same name. Consider aliasing ("AS") one of them."#
plan,
@r"
Projection: person.state AS a, min(person.age) AS a AS a:1
Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
TableScan: person
"
);
}

Expand Down Expand Up @@ -1751,11 +1762,14 @@ fn select_simple_aggregate_with_groupby_can_use_alias() {
#[test]
fn select_simple_aggregate_with_groupby_aggregate_repeated() {
let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state";
let err = logical_plan(sql).expect_err("query should have failed");

let plan = logical_plan(sql).unwrap();
assert_snapshot!(
err.strip_backtrace(),
@r#"Error during planning: Projections require unique expression names but the expression "min(person.age)" at position 1 and "min(person.age)" at position 2 have the same name. Consider aliasing ("AS") one of them."#
plan,
@r"
Projection: person.state, min(person.age), min(person.age) AS min(person.age):1
Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
TableScan: person
"
);
}

Expand Down
19 changes: 7 additions & 12 deletions datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -7941,21 +7941,16 @@ select count(), count() * count() from t;
----
2 4

# DataFusion error: Error during planning: Projections require unique expression names but the expression "count\(Int64\(1\)\)" at position 0 and "count\(Int64\(1\)\)" at position 1 have the same name\. Consider aliasing \("AS"\) one of them\.
query error
# Duplicate aggregate expressions are now auto-suffixed
query II
select count(1), count(1) from t;
----
2 2

# DataFusion error: Error during planning: Projections require unique expression names but the expression "count\(Int64\(1\)\)" at position 0 and "count\(Int64\(1\)\)" at position 1 have the same name\. Consider aliasing \("AS"\) one of them\.
query error
explain select count(1), count(1) from t;

# DataFusion error: Error during planning: Projections require unique expression names but the expression "count\(Int64\(1\) AS \)" at position 0 and "count\(Int64\(1\) AS \)" at position 1 have the same name\. Consider aliasing \("AS"\) one of them\.
query error
query II
select count(), count() from t;

# DataFusion error: Error during planning: Projections require unique expression names but the expression "count\(Int64\(1\) AS \)" at position 0 and "count\(Int64\(1\) AS \)" at position 1 have the same name\. Consider aliasing \("AS"\) one of them\.
query error
explain select count(), count() from t;
----
2 2

query II
select count(1), count(2) from t;
Expand Down
175 changes: 175 additions & 0 deletions datafusion/sqllogictest/test_files/duplicate_column_alias.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Tests for duplicate column names/aliases in projections.
# DataFusion auto-suffixes duplicates with :{count} (e.g. cov, cov:1).

# Setup
statement ok
CREATE TABLE t(x INT, y INT) AS VALUES (1, 2), (3, 4);

#
# Basic duplicate alias
#
query II
SELECT x AS c1, y AS c1 FROM t;
----
1 2
3 4

#
# Duplicate literal expressions
#
query II
SELECT 1, 1;
----
1 1

#
# Same column selected twice
#
query II
SELECT x, x FROM t;
----
1 1
3 3

#
# Subquery with duplicate column names
#
query II
SELECT * FROM (SELECT x AS c1, y AS c1 FROM t);
----
1 2
3 4

#
# ORDER BY referencing a duplicated alias resolves to first occurrence
#
query II
SELECT x AS c1, y AS c1 FROM t ORDER BY c1;
----
1 2
3 4

#
# CTE join producing duplicate column names (TPC-DS Q39 pattern)
#
statement ok
CREATE TABLE inv(warehouse_sk INT, item_sk INT, moy INT, cov DOUBLE) AS VALUES
(1, 10, 1, 1.5),
(1, 10, 2, 2.0),
(2, 20, 1, 0.8),
(2, 20, 2, 1.2);

query IIIRIIIR
WITH inv1 AS (
SELECT warehouse_sk, item_sk, moy, cov FROM inv WHERE moy = 1
),
inv2 AS (
SELECT warehouse_sk, item_sk, moy, cov FROM inv WHERE moy = 2
)
SELECT inv1.warehouse_sk, inv1.item_sk, inv1.moy, inv1.cov,
inv2.warehouse_sk, inv2.item_sk, inv2.moy, inv2.cov
FROM inv1 JOIN inv2
ON inv1.item_sk = inv2.item_sk AND inv1.warehouse_sk = inv2.warehouse_sk
ORDER BY inv1.warehouse_sk, inv1.item_sk;
----
1 10 1 1.5 1 10 2 2
2 20 1 0.8 2 20 2 1.2

#
# Three-way duplicate
#
query III
SELECT x AS a, y AS a, x + y AS a FROM t;
----
1 2 3
3 4 7

#
# CAST produces same schema_name as original column (TPC-DS Q39 pattern).
# CAST is transparent to schema_name, so CAST(x AS DOUBLE) and x
# both have schema_name "x" — this must be deduped.
#
query RI
SELECT CAST(x AS DOUBLE), x FROM t;
----
1 1
3 3

#
# GROUP BY with duplicate expressions in SELECT
#
query II
SELECT x, x FROM t GROUP BY x;
----
1 1
3 3

#
# Aggregate with GROUP BY producing duplicate column names
#
query III
SELECT x, SUM(y) AS total, SUM(y) AS total FROM t GROUP BY x ORDER BY x;
----
1 2 2
3 4 4

#
# ORDER BY referencing the second (renamed) column by position
#
query II
SELECT y AS c1, x AS c1 FROM t ORDER BY 2;
----
2 1
4 3

#
# Function calls that produce the same schema_name after argument
# normalization (reported in issue #6543 for iszero).
#
query BB
SELECT iszero(0.0), iszero(-0.0);
----
true true

#
# Duplicate expressions inside a UNION subquery
#
query II
SELECT * FROM (SELECT x AS a, y AS a FROM t UNION ALL SELECT y AS a, x AS a FROM t) ORDER BY 1, 2;
----
1 2
2 1
3 4
4 3

#
# Known limitation: wildcard expansion happens after dedup, so
# SELECT *, col FROM t still errors when col overlaps with *.
# This will be addressed in a follow-up PR.
#
query error DataFusion error: Error during planning: Projections require unique expression names but the expression "t\.x" at position 0 and "t\.x" at position 2 have the same name\. Consider aliasing \("AS"\) one of them\.
SELECT *, x FROM t;

# Cleanup
statement ok
DROP TABLE t;

statement ok
DROP TABLE inv;
10 changes: 9 additions & 1 deletion datafusion/sqllogictest/test_files/unnest.slt
Original file line number Diff line number Diff line change
Expand Up @@ -547,8 +547,16 @@ select unnest(column1) from (select * from (values([1,2,3]), ([4,5,6])) limit 1
5
6

query error DataFusion error: Error during planning: Projections require unique expression names but the expression "UNNEST\(unnest_table.column1\)" at position 0 and "UNNEST\(unnest_table.column1\)" at position 1 have the same name. Consider aliasing \("AS"\) one of them.
query II
select unnest(column1), unnest(column1) from unnest_table;
----
1 1
2 2
3 3
4 4
5 5
6 6
12 12

query II
select unnest(column1), unnest(column1) u1 from unnest_table;
Expand Down
Loading