Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions datafusion/optimizer/src/analyzer/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
use arrow::compute::can_cast_types;
use datafusion_expr::binary::BinaryTypeCoercer;
use itertools::{Itertools as _, izip};
use std::sync::Arc;
use std::sync::{Arc, LazyLock};

use crate::analyzer::AnalyzerRule;
use crate::utils::NamePreserver;
Expand Down Expand Up @@ -91,11 +91,11 @@ impl AnalyzerRule for TypeCoercion {
}

fn analyze(&self, plan: LogicalPlan, config: &ConfigOptions) -> Result<LogicalPlan> {
let empty_schema = DFSchema::empty();
static EMPTY_SCHEMA: LazyLock<DFSchema> = LazyLock::new(DFSchema::empty);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

empty DFSchema isn't free, similar to #20534


// recurse
let transformed_plan = plan
.transform_up_with_subqueries(|plan| analyze_internal(&empty_schema, plan))?
.transform_up_with_subqueries(|plan| analyze_internal(&EMPTY_SCHEMA, plan))?
.data;

// finish
Expand Down
6 changes: 1 addition & 5 deletions datafusion/optimizer/src/common_subexpr_eliminate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -325,11 +325,7 @@ impl CommonSubexprEliminate {
.map(|expr| Some(name_preserver.save(expr)))
.collect::<Vec<_>>()
} else {
new_aggr_expr
.clone()
.into_iter()
.map(|_| None)
.collect::<Vec<_>>()
(0..new_aggr_expr.len()).map(|_| None).collect()
};

let mut agg_exprs = common_exprs
Expand Down
4 changes: 2 additions & 2 deletions datafusion/optimizer/src/optimize_unions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ impl OptimizerRule for OptimizeUnions {
let inputs = inputs
.into_iter()
.flat_map(extract_plans_from_union)
.map(|plan| coerce_plan_expr_for_schema(plan, &schema))
.map(|plan| Ok(Arc::new(coerce_plan_expr_for_schema(plan, &schema)?)))
.collect::<Result<Vec<_>>>()?;

Ok(Transformed::yes(LogicalPlan::Union(Union {
inputs: inputs.into_iter().map(Arc::new).collect_vec(),
inputs,
schema,
})))
}
Expand Down
36 changes: 25 additions & 11 deletions datafusion/optimizer/src/push_down_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,10 +320,8 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> Result<bool> {
/// * do nothing.
fn extract_or_clauses_for_join<'a>(
filters: &'a [Expr],
schema: &'a DFSchema,
schema_cols: &'a HashSet<Column>,
) -> impl Iterator<Item = Expr> + 'a {
let schema_columns = schema_columns(schema);

// new formed OR clauses and their column references
filters.iter().filter_map(move |expr| {
if let Expr::BinaryExpr(BinaryExpr {
Expand All @@ -332,8 +330,8 @@ fn extract_or_clauses_for_join<'a>(
right,
}) = expr
{
let left_expr = extract_or_clause(left.as_ref(), &schema_columns);
let right_expr = extract_or_clause(right.as_ref(), &schema_columns);
let left_expr = extract_or_clause(left.as_ref(), schema_cols);
let right_expr = extract_or_clause(right.as_ref(), schema_cols);

// If nothing can be extracted from any sub clauses, do nothing for this OR clause.
if let (Some(left_expr), Some(right_expr)) = (left_expr, right_expr) {
Expand Down Expand Up @@ -421,6 +419,10 @@ fn push_down_all_join(
// 3) should be kept as filter conditions
let left_schema = join.left.schema();
let right_schema = join.right.schema();

let left_schema_columns = schema_columns(left_schema.as_ref());
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there might be an even better thing here, but I need to do another pass

let right_schema_columns = schema_columns(right_schema.as_ref());

let mut left_push = vec![];
let mut right_push = vec![];
let mut keep_predicates = vec![];
Expand Down Expand Up @@ -467,26 +469,38 @@ fn push_down_all_join(
// Extract from OR clause, generate new predicates for both side of join if possible.
// We only track the unpushable predicates above.
if left_preserved {
left_push.extend(extract_or_clauses_for_join(&keep_predicates, left_schema));
left_push.extend(extract_or_clauses_for_join(&join_conditions, left_schema));
left_push.extend(extract_or_clauses_for_join(
&keep_predicates,
&left_schema_columns,
));
left_push.extend(extract_or_clauses_for_join(
&join_conditions,
&left_schema_columns,
));
}
if right_preserved {
right_push.extend(extract_or_clauses_for_join(&keep_predicates, right_schema));
right_push.extend(extract_or_clauses_for_join(&join_conditions, right_schema));
right_push.extend(extract_or_clauses_for_join(
&keep_predicates,
&right_schema_columns,
));
right_push.extend(extract_or_clauses_for_join(
&join_conditions,
&right_schema_columns,
));
}

// For predicates from join filter, we should check with if a join side is preserved
// in term of join filtering.
if on_left_preserved {
left_push.extend(extract_or_clauses_for_join(
&on_filter_join_conditions,
left_schema,
&left_schema_columns,
));
}
if on_right_preserved {
right_push.extend(extract_or_clauses_for_join(
&on_filter_join_conditions,
right_schema,
&right_schema_columns,
));
}

Expand Down
2 changes: 1 addition & 1 deletion datafusion/optimizer/src/push_down_limit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ impl OptimizerRule for PushDownLimit {
true
}

#[expect(clippy::only_used_in_recursion)]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is just wasteful, just a lint away.

fn rewrite(
&self,
plan: LogicalPlan,
config: &dyn OptimizerConfig,
) -> Result<Transformed<LogicalPlan>> {
let _ = config.options();
let LogicalPlan::Limit(mut limit) = plan else {
return Ok(Transformed::no(plan));
};
Expand Down
53 changes: 25 additions & 28 deletions datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use std::borrow::Cow;
use std::collections::HashSet;
use std::ops::Not;
use std::sync::Arc;
use std::sync::LazyLock;

use datafusion_common::config::ConfigOptions;
use datafusion_common::nested_struct::has_one_of_more_common_fields;
Expand Down Expand Up @@ -498,8 +499,6 @@ struct ConstEvaluator {
/// The `config_options` are passed from the session to allow scalar functions
/// to access configuration like timezone.
execution_props: ExecutionProps,
input_schema: DFSchema,
input_batch: RecordBatch,
}

/// The simplify result of ConstEvaluator
Expand Down Expand Up @@ -575,6 +574,18 @@ impl TreeNodeRewriter for ConstEvaluator {
}
}

static DUMMY_SCHEMA: LazyLock<Arc<Schema>> =
LazyLock::new(|| Arc::new(Schema::new(vec![Field::new(".", DataType::Null, true)])));

static DUMMY_DF_SCHEMA: LazyLock<DFSchema> =
LazyLock::new(|| DFSchema::try_from(Arc::clone(&*DUMMY_SCHEMA)).unwrap());

static DUMMY_BATCH: LazyLock<RecordBatch> = LazyLock::new(|| {
// Need a single "input" row to produce a single output row
let col = new_null_array(&DataType::Null, 1);
RecordBatch::try_new(DUMMY_SCHEMA.clone(), vec![col]).unwrap()
});

impl ConstEvaluator {
/// Create a new `ConstantEvaluator`.
///
Expand All @@ -588,25 +599,13 @@ impl ConstEvaluator {
pub fn try_new(config_options: Option<Arc<ConfigOptions>>) -> Result<Self> {
// The dummy column name is unused and doesn't matter as only
// expressions without column references can be evaluated
static DUMMY_COL_NAME: &str = ".";
let schema = Arc::new(Schema::new(vec![Field::new(
DUMMY_COL_NAME,
DataType::Null,
true,
)]));
let input_schema = DFSchema::try_from(Arc::clone(&schema))?;
// Need a single "input" row to produce a single output row
let col = new_null_array(&DataType::Null, 1);
let input_batch = RecordBatch::try_new(schema, vec![col])?;

let mut execution_props = ExecutionProps::new();
execution_props.config_options = config_options;

Ok(Self {
can_evaluate: vec![],
execution_props,
input_schema,
input_batch,
})
}

Expand Down Expand Up @@ -702,16 +701,13 @@ impl ConstEvaluator {
return ConstSimplifyResult::NotSimplified(s, m);
}

let phys_expr = match create_physical_expr(
&expr,
&self.input_schema,
&self.execution_props,
) {
Ok(e) => e,
Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
};
let phys_expr =
match create_physical_expr(&expr, &DUMMY_DF_SCHEMA, &self.execution_props) {
Ok(e) => e,
Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
};
let metadata = phys_expr
.return_field(self.input_batch.schema_ref())
.return_field(DUMMY_BATCH.schema_ref())
.ok()
.and_then(|f| {
let m = f.metadata();
Expand All @@ -720,7 +716,7 @@ impl ConstEvaluator {
false => Some(FieldMetadata::from(m)),
}
});
let col_val = match phys_expr.evaluate(&self.input_batch) {
let col_val = match phys_expr.evaluate(&DUMMY_BATCH) {
Ok(v) => v,
Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr),
};
Expand Down Expand Up @@ -1698,10 +1694,11 @@ impl TreeNodeRewriter for Simplifier<'_> {
{
// Repeated occurrences of wildcard are redundant so remove them
// exp LIKE '%%' --> exp LIKE '%'
let simplified_pattern = Regex::new("%%+")
.unwrap()
.replace_all(pattern_str, "%")
.to_string();

static LIKE_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new("%%+").unwrap());
let simplified_pattern =
LIKE_REGEX.replace_all(pattern_str, "%").to_string();
Transformed::yes(Expr::Like(Like {
pattern: Box::new(
string_scalar.to_expr(&simplified_pattern),
Expand Down
Loading