Skip to content

Commit

Permalink
#7782 - catch when Postgres planning removes all Citus tables (#7907)
Browse files Browse the repository at this point in the history
DESCRIPTION: fix a planning error caused by a redundant WHERE clause

Fix a Citus planning glitch that occurs in a DML query when the WHERE
clause of the query is of the form:
    ` WHERE true OR <expression with 1 or more citus tables> `
and this is the only place in the query referencing a citus table.
Postgres' standard planner transforms the WHERE clause to:
    ` WHERE true `
So the query now has no citus tables, confusing the Citus planner as
described in issues #7782 and #7783. The fix is to check, after Postgres
standard planner, if the Query has been transformed as shown, and re-run
the check of whether or not the query needs distributed planning.
  • Loading branch information
colm-mchugh authored Feb 27, 2025
1 parent 2b96422 commit 86107ca
Show file tree
Hide file tree
Showing 3 changed files with 241 additions and 1 deletion.
48 changes: 47 additions & 1 deletion src/backend/distributed/planner/distributed_planner.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,10 @@ static RouterPlanType GetRouterPlanType(Query *query,
bool hasUnresolvedParams);
static void ConcatenateRTablesAndPerminfos(PlannedStmt *mainPlan,
PlannedStmt *concatPlan);

static bool CheckPostPlanDistribution(bool isDistributedQuery,
Query *origQuery,
List *rangeTableList,
Query *plannedQuery);

/* Distributed planner hook */
PlannedStmt *
Expand Down Expand Up @@ -272,6 +275,11 @@ distributed_planner(Query *parse,
planContext.plan = standard_planner(planContext.query, NULL,
planContext.cursorOptions,
planContext.boundParams);
needsDistributedPlanning = CheckPostPlanDistribution(needsDistributedPlanning,
planContext.originalQuery,
rangeTableList,
planContext.query);

if (needsDistributedPlanning)
{
result = PlanDistributedStmt(&planContext, rteIdCounter);
Expand Down Expand Up @@ -2729,3 +2737,41 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList)
}
}
}


static bool
CheckPostPlanDistribution(bool isDistributedQuery,
Query *origQuery, List *rangeTableList,
Query *plannedQuery)
{
if (isDistributedQuery)
{
Node *origQuals = origQuery->jointree->quals;
Node *plannedQuals = plannedQuery->jointree->quals;

#if PG_VERSION_NUM >= PG_VERSION_17
if (IsMergeQuery(origQuery))
{
origQuals = origQuery->mergeJoinCondition;
plannedQuals = plannedQuery->mergeJoinCondition;
}
#endif

/*
* The WHERE quals have been eliminated by the Postgres planner, possibly by
* an OR clause that was simplified to TRUE. In such cases, we need to check
* if the planned query still requires distributed planning.
*/
if (origQuals != NULL && plannedQuals == NULL)
{
List *rtesPostPlan = ExtractRangeTableEntryList(plannedQuery);
if (list_length(rtesPostPlan) < list_length(rangeTableList))
{
isDistributedQuery = ListContainsDistributedTableRTE(
rtesPostPlan, NULL);
}
}
}

return isDistributedQuery;
}
110 changes: 110 additions & 0 deletions src/test/regress/expected/subquery_in_where.out
Original file line number Diff line number Diff line change
Expand Up @@ -1146,7 +1146,117 @@ WHERE (SELECT COUNT(DISTINCT e1.value_2)

(1 row)

-- Test redundant WHERE clause (fix #7782, #7783)
CREATE TABLE t0 (vkey int4, pkey int4, c0 timestamp);
CREATE TABLE t1 (vkey int4, pkey int4, c4 timestamp, c5 text, c6 text);
CREATE TABLE t3 (vkey int4, pkey int4, c9 timestamp);
CREATE TABLE t7 (vkey int4, pkey int4);
-- DEBUG messages not needed for these tests
SET client_min_messages TO DEFAULT;
INSERT INTO t0 (vkey, pkey, c0) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));
INSERT INTO t7 (vkey, pkey) values
(3, 59525);
SELECT create_reference_table('t1');
create_reference_table
---------------------------------------------------------------------

(1 row)

SELECT create_distributed_table('t3', 'c9');
create_distributed_table
---------------------------------------------------------------------

(1 row)

UPDATE t0 set vkey = 117
where (((t0.pkey) in (select t7.vkey from t7 where false
union all
select t3.pkey from t3 where false
)))
or TRUE;
-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;
vkey | pkey | c0
---------------------------------------------------------------------
117 | 13000 | Sat Sep 04 13:38:00 2032
(1 row)

-- MERGE command with redundant join can be planned locally
EXPLAIN (costs off, timing off)
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;
QUERY PLAN
---------------------------------------------------------------------
Merge on t0
-> Nested Loop
-> Seq Scan on t7
-> Materialize
-> Seq Scan on t0
(5 rows)

-- UPDATE via MERGE with redundant join clause:
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;
-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;
vkey | pkey | c0
---------------------------------------------------------------------
113 | 13000 | Sat Sep 04 13:38:00 2032
(1 row)

DELETE FROM t0
where TRUE or (((t0.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)));
-- Local table t0 is now empty (0 rows)
SELECT vkey, pkey, c0 FROM t0;
vkey | pkey | c0
---------------------------------------------------------------------
(0 rows)

INSERT INTO t3 (vkey, pkey, c9) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));
-- Distributed table update with redundant WHERE
UPDATE t3 set vkey = 117
where (((t3.pkey) in (select t1.vkey from t1 where false
union all
select t0.pkey from t0 join t7 on t0.pkey=t7.vkey where false
)))
or TRUE;
SELECT vkey, pkey FROM t3;
vkey | pkey
---------------------------------------------------------------------
117 | 13000
(1 row)

-- Distributed table delete with redundant WHERE
DELETE FROM t3
where TRUE or (((t3.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)) and (select max(vkey) from t0) > 0);
-- Distributed table t3 is now empty
SELECT vkey, pkey FROM t3;
vkey | pkey
---------------------------------------------------------------------
(0 rows)

DROP TABLE local_table;
DROP TABLE t0;
DROP TABLE t1;
DROP TABLE t3;
DROP TABLE t7;
DROP SCHEMA subquery_in_where CASCADE;
SET search_path TO public;
84 changes: 84 additions & 0 deletions src/test/regress/sql/subquery_in_where.sql
Original file line number Diff line number Diff line change
Expand Up @@ -847,8 +847,92 @@ WHERE (SELECT COUNT(DISTINCT e1.value_2)
WHERE e1.user_id = u1.user_id
) > 115 AND false;

-- Test redundant WHERE clause (fix #7782, #7783)
CREATE TABLE t0 (vkey int4, pkey int4, c0 timestamp);
CREATE TABLE t1 (vkey int4, pkey int4, c4 timestamp, c5 text, c6 text);
CREATE TABLE t3 (vkey int4, pkey int4, c9 timestamp);
CREATE TABLE t7 (vkey int4, pkey int4);

-- DEBUG messages not needed for these tests
SET client_min_messages TO DEFAULT;

INSERT INTO t0 (vkey, pkey, c0) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));

INSERT INTO t7 (vkey, pkey) values
(3, 59525);

SELECT create_reference_table('t1');
SELECT create_distributed_table('t3', 'c9');

UPDATE t0 set vkey = 117
where (((t0.pkey) in (select t7.vkey from t7 where false
union all
select t3.pkey from t3 where false
)))
or TRUE;

-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;

-- MERGE command with redundant join can be planned locally
EXPLAIN (costs off, timing off)
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;

-- UPDATE via MERGE with redundant join clause:
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;

-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;

DELETE FROM t0
where TRUE or (((t0.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)));

-- Local table t0 is now empty (0 rows)
SELECT vkey, pkey, c0 FROM t0;

INSERT INTO t3 (vkey, pkey, c9) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));

-- Distributed table update with redundant WHERE
UPDATE t3 set vkey = 117
where (((t3.pkey) in (select t1.vkey from t1 where false
union all
select t0.pkey from t0 join t7 on t0.pkey=t7.vkey where false
)))
or TRUE;

SELECT vkey, pkey FROM t3;

-- Distributed table delete with redundant WHERE
DELETE FROM t3
where TRUE or (((t3.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)) and (select max(vkey) from t0) > 0);

-- Distributed table t3 is now empty
SELECT vkey, pkey FROM t3;

DROP TABLE local_table;
DROP TABLE t0;
DROP TABLE t1;
DROP TABLE t3;
DROP TABLE t7;
DROP SCHEMA subquery_in_where CASCADE;
SET search_path TO public;

0 comments on commit 86107ca

Please sign in to comment.