Skip to content

Commit

Permalink
Fix #7782, #7783 - catch when Postgres planner removes Citus tables
Browse files Browse the repository at this point in the history
DESCRIPTION: fix a planning error caused by a redundant WHERE clause
(of the form WHERE true OR <expression>)

Fix a Citus planning glitch, where the WHERE clause of
the query is of the form:
` WHERE true OR <expression with 1 or more citus tables> `
and this is the only place in the query referencing a citus
table. Postgres' standard planner transforms the WHERE clause to:
` WHERE true `
So the query now has no citus tables, confusing the Citus planner
as described in issues #7782 and #7783. The fix is to check, after
Postgres standard planner, if the Query has been transformed as
shown, and re-run the check of whether or not the query needs
distributed planning.
  • Loading branch information
colm-mchugh committed Feb 26, 2025
1 parent 2b96422 commit 0b20a9b
Show file tree
Hide file tree
Showing 3 changed files with 241 additions and 1 deletion.
48 changes: 47 additions & 1 deletion src/backend/distributed/planner/distributed_planner.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,10 @@ static RouterPlanType GetRouterPlanType(Query *query,
bool hasUnresolvedParams);
static void ConcatenateRTablesAndPerminfos(PlannedStmt *mainPlan,
PlannedStmt *concatPlan);

static bool CheckPostPlanDistribution(bool isDistributedQuery,
Query *origQuery,
List *rangeTableList,
Query *plannedQuery);

/* Distributed planner hook */
PlannedStmt *
Expand Down Expand Up @@ -272,6 +275,11 @@ distributed_planner(Query *parse,
planContext.plan = standard_planner(planContext.query, NULL,
planContext.cursorOptions,
planContext.boundParams);
needsDistributedPlanning = CheckPostPlanDistribution(needsDistributedPlanning,
planContext.originalQuery,
rangeTableList,
planContext.query);

if (needsDistributedPlanning)
{
result = PlanDistributedStmt(&planContext, rteIdCounter);
Expand Down Expand Up @@ -2729,3 +2737,41 @@ WarnIfListHasForeignDistributedTable(List *rangeTableList)
}
}
}


static bool
CheckPostPlanDistribution(bool isDistributedQuery,
Query *origQuery, List *rangeTableList,
Query *plannedQuery)
{
if (isDistributedQuery)
{
Node *origQuals = origQuery->jointree->quals;
Node *plannedQuals = plannedQuery->jointree->quals;

#if PG_VERSION_NUM >= PG_VERSION_17
if (IsMergeQuery(origQuery))
{
origQuals = origQuery->mergeJoinCondition;
plannedQuals = plannedQuery->mergeJoinCondition;

Check warning on line 2756 in src/backend/distributed/planner/distributed_planner.c

View check run for this annotation

Codecov / codecov/patch

src/backend/distributed/planner/distributed_planner.c#L2755-L2756

Added lines #L2755 - L2756 were not covered by tests
}
#endif

/*
* The WHERE quals have been eliminated by the Postgres planner, possibly by
* an OR clause that was simplified to TRUE. In such cases, we need to check
* if the planned query still requires distributed planning.
*/
if (origQuals != NULL && plannedQuals == NULL)
{
List *rtesPostPlan = ExtractRangeTableEntryList(plannedQuery);

Check warning on line 2767 in src/backend/distributed/planner/distributed_planner.c

View check run for this annotation

Codecov / codecov/patch

src/backend/distributed/planner/distributed_planner.c#L2767

Added line #L2767 was not covered by tests
if (list_length(rtesPostPlan) < list_length(rangeTableList))
{
isDistributedQuery = ListContainsDistributedTableRTE(

Check warning on line 2770 in src/backend/distributed/planner/distributed_planner.c

View check run for this annotation

Codecov / codecov/patch

src/backend/distributed/planner/distributed_planner.c#L2770

Added line #L2770 was not covered by tests
rtesPostPlan, NULL);
}
}
}

return isDistributedQuery;
}
110 changes: 110 additions & 0 deletions src/test/regress/expected/subquery_in_where.out
Original file line number Diff line number Diff line change
Expand Up @@ -1146,7 +1146,117 @@ WHERE (SELECT COUNT(DISTINCT e1.value_2)

(1 row)

-- Test redundant WHERE clause (fix #7782, #7783)
CREATE TABLE t0 (vkey int4, pkey int4, c0 timestamp);
CREATE TABLE t1 (vkey int4, pkey int4, c4 timestamp, c5 text, c6 text);
CREATE TABLE t3 (vkey int4, pkey int4, c9 timestamp);
CREATE TABLE t7 (vkey int4, pkey int4);
-- DEBUG messages not needed for these tests
SET client_min_messages TO DEFAULT;
INSERT INTO t0 (vkey, pkey, c0) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));
INSERT INTO t7 (vkey, pkey) values
(3, 59525);
SELECT create_reference_table('t1');
create_reference_table
---------------------------------------------------------------------

(1 row)

SELECT create_distributed_table('t3', 'c9');
create_distributed_table
---------------------------------------------------------------------

(1 row)

UPDATE t0 set vkey = 117
where (((t0.pkey) in (select t7.vkey from t7 where false
union all
select t3.pkey from t3 where false
)))
or TRUE;
-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;
vkey | pkey | c0
---------------------------------------------------------------------
117 | 13000 | Sat Sep 04 13:38:00 2032
(1 row)

-- MERGE command with redundant join can be planned locally
EXPLAIN (costs off, timing off)
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;
QUERY PLAN
---------------------------------------------------------------------
Merge on t0
-> Nested Loop
-> Seq Scan on t7
-> Materialize
-> Seq Scan on t0
(5 rows)

-- UPDATE via MERGE with redundant join clause:
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;
-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;
vkey | pkey | c0
---------------------------------------------------------------------
113 | 13000 | Sat Sep 04 13:38:00 2032
(1 row)

DELETE FROM t0
where TRUE or (((t0.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)));
-- Local table t0 is now empty (0 rows)
SELECT vkey, pkey, c0 FROM t0;
vkey | pkey | c0
---------------------------------------------------------------------
(0 rows)

INSERT INTO t3 (vkey, pkey, c9) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));
-- Distributed table update with redundant WHERE
UPDATE t3 set vkey = 117
where (((t3.pkey) in (select t1.vkey from t1 where false
union all
select t0.pkey from t0 join t7 on t0.pkey=t7.vkey where false
)))
or TRUE;
SELECT vkey, pkey FROM t3;
vkey | pkey
---------------------------------------------------------------------
117 | 13000
(1 row)

-- Distributed table delete with redundant WHERE
DELETE FROM t3
where TRUE or (((t3.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)) and (select max(vkey) from t0) > 0);
-- Distributed table t3 is now empty
SELECT vkey, pkey FROM t3;
vkey | pkey
---------------------------------------------------------------------
(0 rows)

DROP TABLE local_table;
DROP TABLE t0;
DROP TABLE t1;
DROP TABLE t3;
DROP TABLE t7;
DROP SCHEMA subquery_in_where CASCADE;
SET search_path TO public;
84 changes: 84 additions & 0 deletions src/test/regress/sql/subquery_in_where.sql
Original file line number Diff line number Diff line change
Expand Up @@ -847,8 +847,92 @@ WHERE (SELECT COUNT(DISTINCT e1.value_2)
WHERE e1.user_id = u1.user_id
) > 115 AND false;

-- Test redundant WHERE clause (fix #7782, #7783)
CREATE TABLE t0 (vkey int4, pkey int4, c0 timestamp);
CREATE TABLE t1 (vkey int4, pkey int4, c4 timestamp, c5 text, c6 text);
CREATE TABLE t3 (vkey int4, pkey int4, c9 timestamp);
CREATE TABLE t7 (vkey int4, pkey int4);

-- DEBUG messages not needed for these tests
SET client_min_messages TO DEFAULT;

INSERT INTO t0 (vkey, pkey, c0) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));

INSERT INTO t7 (vkey, pkey) values
(3, 59525);

SELECT create_reference_table('t1');
SELECT create_distributed_table('t3', 'c9');

UPDATE t0 set vkey = 117
where (((t0.pkey) in (select t7.vkey from t7 where false
union all
select t3.pkey from t3 where false
)))
or TRUE;

-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;

-- MERGE command with redundant join can be planned locally
EXPLAIN (costs off, timing off)
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;

-- UPDATE via MERGE with redundant join clause:
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;

-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;

DELETE FROM t0
where TRUE or (((t0.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)));

-- Local table t0 is now empty (0 rows)
SELECT vkey, pkey, c0 FROM t0;

INSERT INTO t3 (vkey, pkey, c9) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));

-- Distributed table update with redundant WHERE
UPDATE t3 set vkey = 117
where (((t3.pkey) in (select t1.vkey from t1 where false
union all
select t0.pkey from t0 join t7 on t0.pkey=t7.vkey where false
)))
or TRUE;

SELECT vkey, pkey FROM t3;

-- Distributed table delete with redundant WHERE
DELETE FROM t3
where TRUE or (((t3.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)) and (select max(vkey) from t0) > 0);

-- Distributed table t3 is now empty
SELECT vkey, pkey FROM t3;

DROP TABLE local_table;
DROP TABLE t0;
DROP TABLE t1;
DROP TABLE t3;
DROP TABLE t7;
DROP SCHEMA subquery_in_where CASCADE;
SET search_path TO public;

0 comments on commit 0b20a9b

Please sign in to comment.