From 0b20a9bbd8818dba7dffce1f388397ea8907356e Mon Sep 17 00:00:00 2001 From: Colm McHugh Date: Thu, 20 Feb 2025 12:31:45 +0000 Subject: [PATCH] Fix #7782, #7783 - catch when Postgres planner removes Citus tables DESCRIPTION: fix a planning error caused by a redundant WHERE clause (of the form WHERE true OR ) Fix a Citus planning glitch, where the WHERE clause of the query is of the form: ` WHERE true OR ` and this is the only place in the query referencing a citus table. Postgres' standard planner transforms the WHERE clause to: ` WHERE true ` So the query now has no citus tables, confusing the Citus planner as described in issues #7782 and #7783. The fix is to check, after Postgres standard planner, if the Query has been transformed as shown, and re-run the check of whether or not the query needs distributed planning. --- .../distributed/planner/distributed_planner.c | 48 +++++++- .../regress/expected/subquery_in_where.out | 110 ++++++++++++++++++ src/test/regress/sql/subquery_in_where.sql | 84 +++++++++++++ 3 files changed, 241 insertions(+), 1 deletion(-) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index a388c767c82..36da10e8cf6 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -151,7 +151,10 @@ static RouterPlanType GetRouterPlanType(Query *query, bool hasUnresolvedParams); static void ConcatenateRTablesAndPerminfos(PlannedStmt *mainPlan, PlannedStmt *concatPlan); - +static bool CheckPostPlanDistribution(bool isDistributedQuery, + Query *origQuery, + List *rangeTableList, + Query *plannedQuery); /* Distributed planner hook */ PlannedStmt * @@ -272,6 +275,11 @@ distributed_planner(Query *parse, planContext.plan = standard_planner(planContext.query, NULL, planContext.cursorOptions, planContext.boundParams); + needsDistributedPlanning = CheckPostPlanDistribution(needsDistributedPlanning, + planContext.originalQuery, + rangeTableList, + planContext.query); + if (needsDistributedPlanning) { result = PlanDistributedStmt(&planContext, rteIdCounter); @@ -2729,3 +2737,41 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList) } } } + + +static bool +CheckPostPlanDistribution(bool isDistributedQuery, + Query *origQuery, List *rangeTableList, + Query *plannedQuery) +{ + if (isDistributedQuery) + { + Node *origQuals = origQuery->jointree->quals; + Node *plannedQuals = plannedQuery->jointree->quals; + + #if PG_VERSION_NUM >= PG_VERSION_17 + if (IsMergeQuery(origQuery)) + { + origQuals = origQuery->mergeJoinCondition; + plannedQuals = plannedQuery->mergeJoinCondition; + } + #endif + + /* + * The WHERE quals have been eliminated by the Postgres planner, possibly by + * an OR clause that was simplified to TRUE. In such cases, we need to check + * if the planned query still requires distributed planning. + */ + if (origQuals != NULL && plannedQuals == NULL) + { + List *rtesPostPlan = ExtractRangeTableEntryList(plannedQuery); + if (list_length(rtesPostPlan) < list_length(rangeTableList)) + { + isDistributedQuery = ListContainsDistributedTableRTE( + rtesPostPlan, NULL); + } + } + } + + return isDistributedQuery; +} diff --git a/src/test/regress/expected/subquery_in_where.out b/src/test/regress/expected/subquery_in_where.out index 990c29084e7..901954265b2 100644 --- a/src/test/regress/expected/subquery_in_where.out +++ b/src/test/regress/expected/subquery_in_where.out @@ -1146,7 +1146,117 @@ WHERE (SELECT COUNT(DISTINCT e1.value_2) (1 row) +-- Test redundant WHERE clause (fix #7782, #7783) +CREATE TABLE t0 (vkey int4, pkey int4, c0 timestamp); +CREATE TABLE t1 (vkey int4, pkey int4, c4 timestamp, c5 text, c6 text); +CREATE TABLE t3 (vkey int4, pkey int4, c9 timestamp); +CREATE TABLE t7 (vkey int4, pkey int4); +-- DEBUG messages not needed for these tests SET client_min_messages TO DEFAULT; +INSERT INTO t0 (vkey, pkey, c0) values +(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0)); +INSERT INTO t7 (vkey, pkey) values +(3, 59525); +SELECT create_reference_table('t1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('t3', 'c9'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +UPDATE t0 set vkey = 117 +where (((t0.pkey) in (select t7.vkey from t7 where false + union all + select t3.pkey from t3 where false + ))) + or TRUE; +-- Local table t0 is updated +SELECT vkey, pkey, c0 FROM t0; + vkey | pkey | c0 +--------------------------------------------------------------------- + 117 | 13000 | Sat Sep 04 13:38:00 2032 +(1 row) + +-- MERGE command with redundant join can be planned locally +EXPLAIN (costs off, timing off) +MERGE INTO t0 USING t7 ON + (((t0.pkey) in (select t7.vkey from t7 where false + union all + select t1.pkey from t1 where false + ))) + or TRUE +WHEN MATCHED THEN + UPDATE SET vkey = 113; + QUERY PLAN +--------------------------------------------------------------------- + Merge on t0 + -> Nested Loop + -> Seq Scan on t7 + -> Materialize + -> Seq Scan on t0 +(5 rows) + +-- UPDATE via MERGE with redundant join clause: +MERGE INTO t0 USING t7 ON + (((t0.pkey) in (select t7.vkey from t7 where false + union all + select t1.pkey from t1 where false + ))) + or TRUE +WHEN MATCHED THEN + UPDATE SET vkey = 113; +-- Local table t0 is updated +SELECT vkey, pkey, c0 FROM t0; + vkey | pkey | c0 +--------------------------------------------------------------------- + 113 | 13000 | Sat Sep 04 13:38:00 2032 +(1 row) + +DELETE FROM t0 +where TRUE or (((t0.vkey) >= (select + pg_catalog.regexp_count(ref_0.c5, ref_0.c6) + from t1 as ref_0 where true))); +-- Local table t0 is now empty (0 rows) +SELECT vkey, pkey, c0 FROM t0; + vkey | pkey | c0 +--------------------------------------------------------------------- +(0 rows) + +INSERT INTO t3 (vkey, pkey, c9) values +(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0)); +-- Distributed table update with redundant WHERE +UPDATE t3 set vkey = 117 +where (((t3.pkey) in (select t1.vkey from t1 where false + union all + select t0.pkey from t0 join t7 on t0.pkey=t7.vkey where false + ))) + or TRUE; +SELECT vkey, pkey FROM t3; + vkey | pkey +--------------------------------------------------------------------- + 117 | 13000 +(1 row) + +-- Distributed table delete with redundant WHERE +DELETE FROM t3 +where TRUE or (((t3.vkey) >= (select + pg_catalog.regexp_count(ref_0.c5, ref_0.c6) + from t1 as ref_0 where true)) and (select max(vkey) from t0) > 0); +-- Distributed table t3 is now empty +SELECT vkey, pkey FROM t3; + vkey | pkey +--------------------------------------------------------------------- +(0 rows) + DROP TABLE local_table; +DROP TABLE t0; +DROP TABLE t1; +DROP TABLE t3; +DROP TABLE t7; DROP SCHEMA subquery_in_where CASCADE; SET search_path TO public; diff --git a/src/test/regress/sql/subquery_in_where.sql b/src/test/regress/sql/subquery_in_where.sql index 8316508b78b..ebdb60890a1 100644 --- a/src/test/regress/sql/subquery_in_where.sql +++ b/src/test/regress/sql/subquery_in_where.sql @@ -847,8 +847,92 @@ WHERE (SELECT COUNT(DISTINCT e1.value_2) WHERE e1.user_id = u1.user_id ) > 115 AND false; +-- Test redundant WHERE clause (fix #7782, #7783) +CREATE TABLE t0 (vkey int4, pkey int4, c0 timestamp); +CREATE TABLE t1 (vkey int4, pkey int4, c4 timestamp, c5 text, c6 text); +CREATE TABLE t3 (vkey int4, pkey int4, c9 timestamp); +CREATE TABLE t7 (vkey int4, pkey int4); + +-- DEBUG messages not needed for these tests SET client_min_messages TO DEFAULT; +INSERT INTO t0 (vkey, pkey, c0) values +(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0)); + +INSERT INTO t7 (vkey, pkey) values +(3, 59525); + +SELECT create_reference_table('t1'); +SELECT create_distributed_table('t3', 'c9'); + +UPDATE t0 set vkey = 117 +where (((t0.pkey) in (select t7.vkey from t7 where false + union all + select t3.pkey from t3 where false + ))) + or TRUE; + +-- Local table t0 is updated +SELECT vkey, pkey, c0 FROM t0; + +-- MERGE command with redundant join can be planned locally +EXPLAIN (costs off, timing off) +MERGE INTO t0 USING t7 ON + (((t0.pkey) in (select t7.vkey from t7 where false + union all + select t1.pkey from t1 where false + ))) + or TRUE +WHEN MATCHED THEN + UPDATE SET vkey = 113; + +-- UPDATE via MERGE with redundant join clause: +MERGE INTO t0 USING t7 ON + (((t0.pkey) in (select t7.vkey from t7 where false + union all + select t1.pkey from t1 where false + ))) + or TRUE +WHEN MATCHED THEN + UPDATE SET vkey = 113; + +-- Local table t0 is updated +SELECT vkey, pkey, c0 FROM t0; + +DELETE FROM t0 +where TRUE or (((t0.vkey) >= (select + pg_catalog.regexp_count(ref_0.c5, ref_0.c6) + from t1 as ref_0 where true))); + +-- Local table t0 is now empty (0 rows) +SELECT vkey, pkey, c0 FROM t0; + +INSERT INTO t3 (vkey, pkey, c9) values +(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0)); + +-- Distributed table update with redundant WHERE +UPDATE t3 set vkey = 117 +where (((t3.pkey) in (select t1.vkey from t1 where false + union all + select t0.pkey from t0 join t7 on t0.pkey=t7.vkey where false + ))) + or TRUE; + +SELECT vkey, pkey FROM t3; + +-- Distributed table delete with redundant WHERE +DELETE FROM t3 +where TRUE or (((t3.vkey) >= (select + pg_catalog.regexp_count(ref_0.c5, ref_0.c6) + from t1 as ref_0 where true)) and (select max(vkey) from t0) > 0); + +-- Distributed table t3 is now empty +SELECT vkey, pkey FROM t3; + DROP TABLE local_table; +DROP TABLE t0; +DROP TABLE t1; +DROP TABLE t3; +DROP TABLE t7; DROP SCHEMA subquery_in_where CASCADE; SET search_path TO public;