Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#7782 - catch when Postgres planning removes all Citus tables #7907

Merged
merged 1 commit into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion src/backend/distributed/planner/distributed_planner.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,10 @@
bool hasUnresolvedParams);
static void ConcatenateRTablesAndPerminfos(PlannedStmt *mainPlan,
PlannedStmt *concatPlan);

static bool CheckPostPlanDistribution(bool isDistributedQuery,
Query *origQuery,
List *rangeTableList,
Query *plannedQuery);

/* Distributed planner hook */
PlannedStmt *
Expand Down Expand Up @@ -272,6 +275,11 @@
planContext.plan = standard_planner(planContext.query, NULL,
planContext.cursorOptions,
planContext.boundParams);
needsDistributedPlanning = CheckPostPlanDistribution(needsDistributedPlanning,
planContext.originalQuery,
rangeTableList,
planContext.query);

if (needsDistributedPlanning)
{
result = PlanDistributedStmt(&planContext, rteIdCounter);
Expand Down Expand Up @@ -2729,3 +2737,41 @@
}
}
}


static bool
CheckPostPlanDistribution(bool isDistributedQuery,
Query *origQuery, List *rangeTableList,
Query *plannedQuery)
{
if (isDistributedQuery)
Copy link
Member

@naisila naisila Feb 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I am a bit confused.
Wasn't the original issue that we skipped distributed planning due to isDistributedQuery being false when it should have been true?
It looks like the original issue was the other way around, since you are checking if isDistributedQuery is true here ...

EDIT: I now understood the issue. I had to re-read the PR description carefully.

{
Node *origQuals = origQuery->jointree->quals;
Node *plannedQuals = plannedQuery->jointree->quals;
Comment on lines +2749 to +2750
Copy link
Member

@naisila naisila Feb 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any way we can run into this while in a MERGE query? Given that in PG17 we have a separate mergeJoinClause?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly... if the target is a local table ... I will verify, thanks for noticing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So a PG17+ MERGE query was not covered by the fix - have updated the PR correspondingly.


#if PG_VERSION_NUM >= PG_VERSION_17
if (IsMergeQuery(origQuery))
{
origQuals = origQuery->mergeJoinCondition;
plannedQuals = plannedQuery->mergeJoinCondition;

Check warning on line 2756 in src/backend/distributed/planner/distributed_planner.c

View check run for this annotation

Codecov / codecov/patch

src/backend/distributed/planner/distributed_planner.c#L2755-L2756

Added lines #L2755 - L2756 were not covered by tests
}
#endif

/*
* The WHERE quals have been eliminated by the Postgres planner, possibly by
* an OR clause that was simplified to TRUE. In such cases, we need to check
* if the planned query still requires distributed planning.
*/
if (origQuals != NULL && plannedQuals == NULL)
{
List *rtesPostPlan = ExtractRangeTableEntryList(plannedQuery);

Check warning on line 2767 in src/backend/distributed/planner/distributed_planner.c

View check run for this annotation

Codecov / codecov/patch

src/backend/distributed/planner/distributed_planner.c#L2767

Added line #L2767 was not covered by tests
if (list_length(rtesPostPlan) < list_length(rangeTableList))
{
isDistributedQuery = ListContainsDistributedTableRTE(

Check warning on line 2770 in src/backend/distributed/planner/distributed_planner.c

View check run for this annotation

Codecov / codecov/patch

src/backend/distributed/planner/distributed_planner.c#L2770

Added line #L2770 was not covered by tests
rtesPostPlan, NULL);
}
}
}

return isDistributedQuery;
}
110 changes: 110 additions & 0 deletions src/test/regress/expected/subquery_in_where.out
Original file line number Diff line number Diff line change
Expand Up @@ -1146,7 +1146,117 @@ WHERE (SELECT COUNT(DISTINCT e1.value_2)

(1 row)

-- Test redundant WHERE clause (fix #7782, #7783)
CREATE TABLE t0 (vkey int4, pkey int4, c0 timestamp);
CREATE TABLE t1 (vkey int4, pkey int4, c4 timestamp, c5 text, c6 text);
CREATE TABLE t3 (vkey int4, pkey int4, c9 timestamp);
CREATE TABLE t7 (vkey int4, pkey int4);
-- DEBUG messages not needed for these tests
SET client_min_messages TO DEFAULT;
INSERT INTO t0 (vkey, pkey, c0) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));
INSERT INTO t7 (vkey, pkey) values
(3, 59525);
SELECT create_reference_table('t1');
create_reference_table
---------------------------------------------------------------------

(1 row)

SELECT create_distributed_table('t3', 'c9');
create_distributed_table
---------------------------------------------------------------------

(1 row)

UPDATE t0 set vkey = 117
where (((t0.pkey) in (select t7.vkey from t7 where false
union all
select t3.pkey from t3 where false
)))
or TRUE;
-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;
vkey | pkey | c0
---------------------------------------------------------------------
117 | 13000 | Sat Sep 04 13:38:00 2032
(1 row)

-- MERGE command with redundant join can be planned locally
EXPLAIN (costs off, timing off)
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;
QUERY PLAN
---------------------------------------------------------------------
Merge on t0
-> Nested Loop
-> Seq Scan on t7
-> Materialize
-> Seq Scan on t0
(5 rows)

-- UPDATE via MERGE with redundant join clause:
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;
-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;
vkey | pkey | c0
---------------------------------------------------------------------
113 | 13000 | Sat Sep 04 13:38:00 2032
(1 row)

DELETE FROM t0
where TRUE or (((t0.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)));
-- Local table t0 is now empty (0 rows)
SELECT vkey, pkey, c0 FROM t0;
vkey | pkey | c0
---------------------------------------------------------------------
(0 rows)

INSERT INTO t3 (vkey, pkey, c9) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));
-- Distributed table update with redundant WHERE
UPDATE t3 set vkey = 117
where (((t3.pkey) in (select t1.vkey from t1 where false
union all
select t0.pkey from t0 join t7 on t0.pkey=t7.vkey where false
)))
or TRUE;
SELECT vkey, pkey FROM t3;
vkey | pkey
---------------------------------------------------------------------
117 | 13000
(1 row)

-- Distributed table delete with redundant WHERE
DELETE FROM t3
where TRUE or (((t3.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)) and (select max(vkey) from t0) > 0);
-- Distributed table t3 is now empty
SELECT vkey, pkey FROM t3;
vkey | pkey
---------------------------------------------------------------------
(0 rows)

DROP TABLE local_table;
DROP TABLE t0;
DROP TABLE t1;
DROP TABLE t3;
DROP TABLE t7;
DROP SCHEMA subquery_in_where CASCADE;
SET search_path TO public;
84 changes: 84 additions & 0 deletions src/test/regress/sql/subquery_in_where.sql
Original file line number Diff line number Diff line change
Expand Up @@ -847,8 +847,92 @@ WHERE (SELECT COUNT(DISTINCT e1.value_2)
WHERE e1.user_id = u1.user_id
) > 115 AND false;

-- Test redundant WHERE clause (fix #7782, #7783)
CREATE TABLE t0 (vkey int4, pkey int4, c0 timestamp);
CREATE TABLE t1 (vkey int4, pkey int4, c4 timestamp, c5 text, c6 text);
CREATE TABLE t3 (vkey int4, pkey int4, c9 timestamp);
CREATE TABLE t7 (vkey int4, pkey int4);

-- DEBUG messages not needed for these tests
SET client_min_messages TO DEFAULT;

INSERT INTO t0 (vkey, pkey, c0) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));

INSERT INTO t7 (vkey, pkey) values
(3, 59525);

SELECT create_reference_table('t1');
SELECT create_distributed_table('t3', 'c9');

UPDATE t0 set vkey = 117
where (((t0.pkey) in (select t7.vkey from t7 where false
union all
select t3.pkey from t3 where false
)))
or TRUE;

-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;

-- MERGE command with redundant join can be planned locally
EXPLAIN (costs off, timing off)
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;

-- UPDATE via MERGE with redundant join clause:
MERGE INTO t0 USING t7 ON
(((t0.pkey) in (select t7.vkey from t7 where false
union all
select t1.pkey from t1 where false
)))
or TRUE
WHEN MATCHED THEN
UPDATE SET vkey = 113;

-- Local table t0 is updated
SELECT vkey, pkey, c0 FROM t0;

DELETE FROM t0
where TRUE or (((t0.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)));

-- Local table t0 is now empty (0 rows)
SELECT vkey, pkey, c0 FROM t0;

INSERT INTO t3 (vkey, pkey, c9) values
(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0));

-- Distributed table update with redundant WHERE
UPDATE t3 set vkey = 117
where (((t3.pkey) in (select t1.vkey from t1 where false
union all
select t0.pkey from t0 join t7 on t0.pkey=t7.vkey where false
)))
or TRUE;

SELECT vkey, pkey FROM t3;

-- Distributed table delete with redundant WHERE
DELETE FROM t3
where TRUE or (((t3.vkey) >= (select
pg_catalog.regexp_count(ref_0.c5, ref_0.c6)
from t1 as ref_0 where true)) and (select max(vkey) from t0) > 0);

-- Distributed table t3 is now empty
SELECT vkey, pkey FROM t3;

DROP TABLE local_table;
DROP TABLE t0;
DROP TABLE t1;
DROP TABLE t3;
DROP TABLE t7;
DROP SCHEMA subquery_in_where CASCADE;
SET search_path TO public;
Loading