From 9ae7666d6ce369583fb49988ae8b54c8c26cca91 Mon Sep 17 00:00:00 2001
From: Colm McHugh <colm.mchugh@gmail.com>
Date: Mon, 16 Dec 2024 15:30:26 +0000
Subject: [PATCH] [Bug Fix] SEGV on query with Left Outer Join (#7787)

Fix the SEGV seen in #7787; it occurs because a column in the targetlist
of a worker subquery can contain a non-empty varnullingrels field if the
column is from the inner side of a left outer join.
The issue can also occur with the columns in the HAVING clause, and this
is also tested in the fix. The issue was triggered by the introduction
of the varnullingrels to Vars in Postgres 16 (2489d76c)

There is a related issue, #7705, where a non-empty varnullingrels
was incorrectly copied into the query tree for the combine query.
Here, a non-empty varnullingrels field of a var is incorrectly copied
into the query tree for a worker subquery.

The regress file from #7705 is used (and renamed) to also test this
(#7787). An alternative test output file is required for Postgres 15
because of an optimization to DISTINCT in Postgres 16 (1349d2790bf).
---
 .../planner/query_pushdown_planning.c         |  10 +
 .../expected/multi_outer_join_columns.out     | 426 ++++++++++++++++++
 ...705.out => multi_outer_join_columns_1.out} | 220 +++++++--
 src/test/regress/multi_schedule               |   4 +-
 ..._7705.sql => multi_outer_join_columns.sql} |  51 ++-
 5 files changed, 677 insertions(+), 34 deletions(-)
 create mode 100644 src/test/regress/expected/multi_outer_join_columns.out
 rename src/test/regress/expected/{issue_7705.out => multi_outer_join_columns_1.out} (51%)
 rename src/test/regress/sql/{issue_7705.sql => multi_outer_join_columns.sql} (62%)

diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c
index 20175eac330..5317e578d82 100644
--- a/src/backend/distributed/planner/query_pushdown_planning.c
+++ b/src/backend/distributed/planner/query_pushdown_planning.c
@@ -2097,6 +2097,16 @@ CreateSubqueryTargetListAndAdjustVars(List *columnList)
 		 */
 		column->varno = 1;
 		column->varattno = resNo;
+
+		/*
+		 * 1 subquery means there is one range table entry so with Postgres 16+ we need
+		 * to ensure that column's varnullingrels - the set of join rels that can null
+		 * the var - is empty. Otherwise, when given the query, the Postgres planner
+		 * may attempt to access a non-existent range table and segfault, as in #7787.
+		 */
+#if PG_VERSION_NUM >= PG_VERSION_16
+		column->varnullingrels = NULL;
+#endif
 	}
 
 	return subqueryTargetEntryList;
diff --git a/src/test/regress/expected/multi_outer_join_columns.out b/src/test/regress/expected/multi_outer_join_columns.out
new file mode 100644
index 00000000000..79527f7c65f
--- /dev/null
+++ b/src/test/regress/expected/multi_outer_join_columns.out
@@ -0,0 +1,426 @@
+--- Test for verifying that column references (var nodes) in targets that cannot be pushed down
+--- do not cause issues for the postgres planner, in particular postgres versions 16+, where the
+--- varnullingrels field of a VAR node may contain relids of join relations that can make the var
+--- NULL; in a rewritten distributed query without a join such relids do not have a meaning.
+-- This test has an alternative goldfile because of the following feature in Postgres 16:
+-- https://github.com/postgres/postgres/commit/1349d2790bf48a4de072931c722f39337e72055e
+--
+SHOW server_version \gset
+SELECT substring(:'server_version', '\d+')::int >= 16 AS server_version_ge_16;
+ server_version_ge_16
+---------------------------------------------------------------------
+ t
+(1 row)
+
+CREATE SCHEMA outer_join_columns_testing;
+SET search_path to 'outer_join_columns_testing';
+SET citus.next_shard_id TO 30070000;
+SET citus.shard_replication_factor TO 1;
+SET citus.enable_local_execution TO ON;
+CREATE TABLE t1 (id INT PRIMARY KEY);
+INSERT INTO t1 VALUES (1), (2);
+CREATE TABLE t2 (id INT, account_id INT, a2 INT, PRIMARY KEY(id, account_id));
+INSERT INTO t2 VALUES (3, 1, 10), (4, 2, 20), (5, 1, NULL);
+SELECT create_distributed_table('t1', 'id');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$outer_join_columns_testing.t1$$)
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('t2', 'account_id');
+NOTICE:  Copying data from local table...
+NOTICE:  copying the data has completed
+DETAIL:  The local data in the table is no longer visible, but is still on disk.
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$outer_join_columns_testing.t2$$)
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Test the issue seen in #7705; a target expression with
+-- a window function that cannot be pushed down because the
+-- partion by is not on the distribution column also includes
+-- a column from the inner side of a left outer join, which
+-- produces a non-empty varnullingrels set in PG 16 (and higher)
+SELECT  t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
+ id | max
+---------------------------------------------------------------------
+  1 |  10
+  2 |  20
+  1 |
+(3 rows)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT  t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
+                                                                                                                                                                             QUERY PLAN
+---------------------------------------------------------------------
+ WindowAgg
+   Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3
+   ->  Sort
+         Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max
+         Sort Key: remote_scan.worker_column_3
+         ->  Custom Scan (Citus Adaptive)
+               Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max
+               Task Count: 4
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  Hash Right Join
+                           Output: t1.id, t2.a2, t2.id
+                           Inner Unique: true
+                           Hash Cond: (t2.account_id = t1.id)
+                           ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                 Output: t2.id, t2.account_id, t2.a2
+                           ->  Hash
+                                 Output: t1.id
+                                 ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                       Output: t1.id
+(22 rows)
+
+SELECT  t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
+FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id;
+ id | max
+---------------------------------------------------------------------
+  1 |  10
+  2 |  20
+  1 |
+(3 rows)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT  t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
+FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id;
+                                                                                                                                                                             QUERY PLAN
+---------------------------------------------------------------------
+ WindowAgg
+   Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3
+   ->  Sort
+         Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max
+         Sort Key: remote_scan.worker_column_3
+         ->  Custom Scan (Citus Adaptive)
+               Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max
+               Task Count: 4
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (outer_join_columns_testing.t2_30070004 t2 RIGHT JOIN outer_join_columns_testing.t1_30070000 t1 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  Hash Right Join
+                           Output: t1.id, t2.a2, t2.id
+                           Inner Unique: true
+                           Hash Cond: (t2.account_id = t1.id)
+                           ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                 Output: t2.id, t2.account_id, t2.a2
+                           ->  Hash
+                                 Output: t1.id
+                                 ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                       Output: t1.id
+(22 rows)
+
+SELECT  DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
+ id | max
+---------------------------------------------------------------------
+  1 |
+  1 |  10
+  2 |  20
+(3 rows)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
+                                                                                                                                                                                QUERY PLAN
+---------------------------------------------------------------------
+ HashAggregate
+   Output: remote_scan.id, (max(remote_scan.max) OVER (?)), remote_scan.worker_column_3
+   Group Key: remote_scan.id, max(remote_scan.max) OVER (?)
+   ->  WindowAgg
+         Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3
+         ->  Sort
+               Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max
+               Sort Key: remote_scan.worker_column_3
+               ->  Custom Scan (Citus Adaptive)
+                     Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max
+                     Task Count: 4
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  Hash Right Join
+                                 Output: t1.id, t2.a2, t2.id
+                                 Inner Unique: true
+                                 Hash Cond: (t2.account_id = t1.id)
+                                 ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                       Output: t2.id, t2.account_id, t2.a2
+                                 ->  Hash
+                                       Output: t1.id
+                                       ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                             Output: t1.id
+(25 rows)
+
+CREATE SEQUENCE test_seq START 101;
+CREATE OR REPLACE FUNCTION TEST_F(int) returns INT language sql stable as $$ select $1 + 42; $$ ;
+-- Issue #7705 also occurs if a target expression includes a column
+-- of a distributed table that is on the inner side of a left outer
+-- join and a call to nextval(), because nextval() cannot be pushed
+-- down, and must be run on the coordinator
+SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+ORDER BY t1.id;
+ id | test_f
+---------------------------------------------------------------------
+  1 |    153
+  1 |
+  2 |    165
+(3 rows)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+ORDER BY t1.id;
+                                                                                                                                                         QUERY PLAN
+---------------------------------------------------------------------
+ Result
+   Output: remote_scan.id, ((remote_scan.test_f + (nextval('test_seq'::regclass))::integer) + 42)
+   ->  Sort
+         Output: remote_scan.id, remote_scan.test_f
+         Sort Key: remote_scan.id
+         ->  Custom Scan (Citus Adaptive)
+               Output: remote_scan.id, remote_scan.test_f
+               Task Count: 4
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT worker_column_1 AS id, worker_column_2 AS test_f FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  Hash Right Join
+                           Output: t1.id, t2.a2
+                           Inner Unique: true
+                           Hash Cond: (t2.account_id = t1.id)
+                           ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                 Output: t2.id, t2.account_id, t2.a2
+                           ->  Hash
+                                 Output: t1.id
+                                 ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                       Output: t1.id
+(22 rows)
+
+SELECT t1.id, CASE nextval('test_seq') % 2 = 0 WHEN true THEN t2.a2 ELSE 1 END
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+ORDER BY t1.id;
+ id | case
+---------------------------------------------------------------------
+  1 |   10
+  1 |    1
+  2 |   20
+(3 rows)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT t1.id, CASE nextval('test_seq') %2 = 0 WHEN true THEN t2.a2 ELSE 1 END
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+ORDER BY t1.id;
+                                                                                                                                                         QUERY PLAN
+---------------------------------------------------------------------
+ Result
+   Output: remote_scan.id, CASE ((nextval('test_seq'::regclass) % '2'::bigint) = 0) WHEN CASE_TEST_EXPR THEN remote_scan."case" ELSE 1 END
+   ->  Sort
+         Output: remote_scan.id, remote_scan."case"
+         Sort Key: remote_scan.id
+         ->  Custom Scan (Citus Adaptive)
+               Output: remote_scan.id, remote_scan."case"
+               Task Count: 4
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT worker_column_1 AS id, worker_column_2 AS "case" FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  Hash Right Join
+                           Output: t1.id, t2.a2
+                           Inner Unique: true
+                           Hash Cond: (t2.account_id = t1.id)
+                           ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                 Output: t2.id, t2.account_id, t2.a2
+                           ->  Hash
+                                 Output: t1.id
+                                 ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                       Output: t1.id
+(22 rows)
+
+-- Issue #7787: count distinct of a column from the inner side of a
+-- left outer join will have a non-empty varnullingrels in the query
+-- tree returned by Postgres 16+, so ensure this is not reflected in
+-- the worker subquery constructed by Citus; it has just one relation,
+-- for the pushed down subquery.
+SELECT COUNT(DISTINCT a2)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
+ count
+---------------------------------------------------------------------
+     2
+(1 row)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT COUNT(DISTINCT a2)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
+                                                                                                                                            QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   Output: count(DISTINCT remote_scan.count)
+   ->  Sort
+         Output: remote_scan.count
+         Sort Key: remote_scan.count
+         ->  Custom Scan (Citus Adaptive)
+               Output: remote_scan.count
+               Task Count: 4
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT worker_column_1 AS count FROM (SELECT t2.a2 AS worker_column_1 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery GROUP BY worker_column_1
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  HashAggregate
+                           Output: t2.a2
+                           Group Key: t2.a2
+                           ->  Hash Right Join
+                                 Output: t2.a2
+                                 Inner Unique: true
+                                 Hash Cond: (t2.account_id = t1.id)
+                                 ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                       Output: t2.id, t2.account_id, t2.a2
+                                 ->  Hash
+                                       Output: t1.id
+                                       ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                             Output: t1.id
+(25 rows)
+
+-- Issue #7787 also occurs with a HAVING clause
+SELECT 1
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+HAVING COUNT(DISTINCT a2) > 1;
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT 1
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+HAVING COUNT(DISTINCT a2) > 1;
+                                                                                                                                                   QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   Output: remote_scan."?column?"
+   Filter: (count(DISTINCT remote_scan.worker_column_2) > 1)
+   ->  Sort
+         Output: remote_scan."?column?", remote_scan.worker_column_2
+         Sort Key: remote_scan.worker_column_2
+         ->  Custom Scan (Citus Adaptive)
+               Output: remote_scan."?column?", remote_scan.worker_column_2
+               Task Count: 4
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT 1, worker_column_1 AS worker_column_2 FROM (SELECT t2.a2 AS worker_column_1 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery GROUP BY worker_column_1
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  HashAggregate
+                           Output: 1, t2.a2
+                           Group Key: t2.a2
+                           ->  Hash Right Join
+                                 Output: t2.a2
+                                 Inner Unique: true
+                                 Hash Cond: (t2.account_id = t1.id)
+                                 ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                       Output: t2.id, t2.account_id, t2.a2
+                                 ->  Hash
+                                       Output: t1.id
+                                       ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                             Output: t1.id
+(26 rows)
+
+-- Check right outer join
+SELECT COUNT(DISTINCT a2)
+FROM t2 RIGHT OUTER JOIN t1 ON t2.account_id = t1.id;
+ count
+---------------------------------------------------------------------
+     2
+(1 row)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT COUNT(DISTINCT a2)
+FROM t2 RIGHT OUTER JOIN t1 ON t2.account_id = t1.id;
+                                                                                                                                             QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   Output: count(DISTINCT remote_scan.count)
+   ->  Sort
+         Output: remote_scan.count
+         Sort Key: remote_scan.count
+         ->  Custom Scan (Citus Adaptive)
+               Output: remote_scan.count
+               Task Count: 4
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT worker_column_1 AS count FROM (SELECT t2.a2 AS worker_column_1 FROM (outer_join_columns_testing.t2_30070004 t2 RIGHT JOIN outer_join_columns_testing.t1_30070000 t1 ON ((t2.account_id OPERATOR(pg_catalog.=) t1.id)))) worker_subquery GROUP BY worker_column_1
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  HashAggregate
+                           Output: t2.a2
+                           Group Key: t2.a2
+                           ->  Hash Right Join
+                                 Output: t2.a2
+                                 Inner Unique: true
+                                 Hash Cond: (t2.account_id = t1.id)
+                                 ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                       Output: t2.id, t2.account_id, t2.a2
+                                 ->  Hash
+                                       Output: t1.id
+                                       ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                             Output: t1.id
+(25 rows)
+
+-- Check both count distinct and having clause
+SELECT COUNT(DISTINCT a2)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+HAVING COUNT(DISTINCT t2.id) > 1;
+ count
+---------------------------------------------------------------------
+     2
+(1 row)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT COUNT(DISTINCT a2)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+HAVING COUNT(DISTINCT t2.id) > 1;
+                                                                                                                                                                          QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   Output: count(DISTINCT remote_scan.count)
+   Filter: (count(DISTINCT remote_scan.worker_column_2) > 1)
+   ->  Sort
+         Output: remote_scan.count, remote_scan.worker_column_2
+         Sort Key: remote_scan.count
+         ->  Custom Scan (Citus Adaptive)
+               Output: remote_scan.count, remote_scan.worker_column_2
+               Task Count: 4
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT worker_column_1 AS count, worker_column_2 FROM (SELECT t2.a2 AS worker_column_1, t2.id AS worker_column_2 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery GROUP BY worker_column_1, worker_column_2
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  HashAggregate
+                           Output: t2.a2, t2.id
+                           Group Key: t2.a2, t2.id
+                           ->  Hash Right Join
+                                 Output: t2.a2, t2.id
+                                 Inner Unique: true
+                                 Hash Cond: (t2.account_id = t1.id)
+                                 ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                       Output: t2.id, t2.account_id, t2.a2
+                                 ->  Hash
+                                       Output: t1.id
+                                       ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                             Output: t1.id
+(26 rows)
+
+--- cleanup
+\set VERBOSITY TERSE
+DROP SCHEMA outer_join_columns_testing CASCADE;
+NOTICE:  drop cascades to 4 other objects
+RESET all;
diff --git a/src/test/regress/expected/issue_7705.out b/src/test/regress/expected/multi_outer_join_columns_1.out
similarity index 51%
rename from src/test/regress/expected/issue_7705.out
rename to src/test/regress/expected/multi_outer_join_columns_1.out
index 20b078226ad..10dc6773d10 100644
--- a/src/test/regress/expected/issue_7705.out
+++ b/src/test/regress/expected/multi_outer_join_columns_1.out
@@ -2,10 +2,18 @@
 --- do not cause issues for the postgres planner, in particular postgres versions 16+, where the
 --- varnullingrels field of a VAR node may contain relids of join relations that can make the var
 --- NULL; in a rewritten distributed query without a join such relids do not have a meaning.
---- Issue #7705: [SEGFAULT] Querying distributed tables with window partition causes segmentation fault
---- https://github.com/citusdata/citus/issues/7705
-CREATE SCHEMA issue_7705;
-SET search_path to 'issue_7705';
+-- This test has an alternative goldfile because of the following feature in Postgres 16:
+-- https://github.com/postgres/postgres/commit/1349d2790bf48a4de072931c722f39337e72055e
+--
+SHOW server_version \gset
+SELECT substring(:'server_version', '\d+')::int >= 16 AS server_version_ge_16;
+ server_version_ge_16
+---------------------------------------------------------------------
+ f
+(1 row)
+
+CREATE SCHEMA outer_join_columns_testing;
+SET search_path to 'outer_join_columns_testing';
 SET citus.next_shard_id TO 30070000;
 SET citus.shard_replication_factor TO 1;
 SET citus.enable_local_execution TO ON;
@@ -17,7 +25,7 @@ SELECT create_distributed_table('t1', 'id');
 NOTICE:  Copying data from local table...
 NOTICE:  copying the data has completed
 DETAIL:  The local data in the table is no longer visible, but is still on disk.
-HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$issue_7705.t1$$)
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$outer_join_columns_testing.t1$$)
  create_distributed_table
 ---------------------------------------------------------------------
 
@@ -27,7 +35,7 @@ SELECT create_distributed_table('t2', 'account_id');
 NOTICE:  Copying data from local table...
 NOTICE:  copying the data has completed
 DETAIL:  The local data in the table is no longer visible, but is still on disk.
-HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$issue_7705.t2$$)
+HINT:  To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$outer_join_columns_testing.t2$$)
  create_distributed_table
 ---------------------------------------------------------------------
 
@@ -50,7 +58,7 @@ FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
 EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
 SELECT  t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
 FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
-                                                                                                                                                             QUERY PLAN
+                                                                                                                                                                             QUERY PLAN
 ---------------------------------------------------------------------
  WindowAgg
    Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3
@@ -62,17 +70,17 @@ FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
                Task Count: 4
                Tasks Shown: One of 4
                ->  Task
-                     Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
+                     Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
                      Node: host=localhost port=xxxxx dbname=regression
                      ->  Hash Right Join
                            Output: t1.id, t2.a2, t2.id
                            Inner Unique: true
                            Hash Cond: (t2.account_id = t1.id)
-                           ->  Seq Scan on issue_7705.t2_30070004 t2
+                           ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
                                  Output: t2.id, t2.account_id, t2.a2
                            ->  Hash
                                  Output: t1.id
-                                 ->  Seq Scan on issue_7705.t1_30070000 t1
+                                 ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
                                        Output: t1.id
 (22 rows)
 
@@ -88,7 +96,7 @@ FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id;
 EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
 SELECT  t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
 FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id;
-                                                                                                                                                             QUERY PLAN
+                                                                                                                                                                             QUERY PLAN
 ---------------------------------------------------------------------
  WindowAgg
    Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3
@@ -100,17 +108,17 @@ FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id;
                Task Count: 4
                Tasks Shown: One of 4
                ->  Task
-                     Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (issue_7705.t2_30070004 t2 RIGHT JOIN issue_7705.t1_30070000 t1 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
+                     Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (outer_join_columns_testing.t2_30070004 t2 RIGHT JOIN outer_join_columns_testing.t1_30070000 t1 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
                      Node: host=localhost port=xxxxx dbname=regression
                      ->  Hash Right Join
                            Output: t1.id, t2.a2, t2.id
                            Inner Unique: true
                            Hash Cond: (t2.account_id = t1.id)
-                           ->  Seq Scan on issue_7705.t2_30070004 t2
+                           ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
                                  Output: t2.id, t2.account_id, t2.a2
                            ->  Hash
                                  Output: t1.id
-                                 ->  Seq Scan on issue_7705.t1_30070000 t1
+                                 ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
                                        Output: t1.id
 (22 rows)
 
@@ -126,7 +134,7 @@ FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
 EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
 SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
 FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
-                                                                                                                                                                QUERY PLAN
+                                                                                                                                                                                QUERY PLAN
 ---------------------------------------------------------------------
  HashAggregate
    Output: remote_scan.id, (max(remote_scan.max) OVER (?)), remote_scan.worker_column_3
@@ -141,17 +149,17 @@ FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
                      Task Count: 4
                      Tasks Shown: One of 4
                      ->  Task
-                           Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
+                           Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
                            Node: host=localhost port=xxxxx dbname=regression
                            ->  Hash Right Join
                                  Output: t1.id, t2.a2, t2.id
                                  Inner Unique: true
                                  Hash Cond: (t2.account_id = t1.id)
-                                 ->  Seq Scan on issue_7705.t2_30070004 t2
+                                 ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
                                        Output: t2.id, t2.account_id, t2.a2
                                  ->  Hash
                                        Output: t1.id
-                                       ->  Seq Scan on issue_7705.t1_30070000 t1
+                                       ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
                                              Output: t1.id
 (25 rows)
 
@@ -175,7 +183,7 @@ EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
 SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int)
 FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
 ORDER BY t1.id;
-                                                                                                                                         QUERY PLAN
+                                                                                                                                                         QUERY PLAN
 ---------------------------------------------------------------------
  Result
    Output: remote_scan.id, ((remote_scan.test_f + (nextval('test_seq'::regclass))::integer) + 42)
@@ -187,17 +195,17 @@ ORDER BY t1.id;
                Task Count: 4
                Tasks Shown: One of 4
                ->  Task
-                     Query: SELECT worker_column_1 AS id, worker_column_2 AS test_f FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
+                     Query: SELECT worker_column_1 AS id, worker_column_2 AS test_f FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
                      Node: host=localhost port=xxxxx dbname=regression
                      ->  Hash Right Join
                            Output: t1.id, t2.a2
                            Inner Unique: true
                            Hash Cond: (t2.account_id = t1.id)
-                           ->  Seq Scan on issue_7705.t2_30070004 t2
+                           ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
                                  Output: t2.id, t2.account_id, t2.a2
                            ->  Hash
                                  Output: t1.id
-                                 ->  Seq Scan on issue_7705.t1_30070000 t1
+                                 ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
                                        Output: t1.id
 (22 rows)
 
@@ -215,7 +223,7 @@ EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
 SELECT t1.id, CASE nextval('test_seq') %2 = 0 WHEN true THEN t2.a2 ELSE 1 END
 FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
 ORDER BY t1.id;
-                                                                                                                                         QUERY PLAN
+                                                                                                                                                         QUERY PLAN
 ---------------------------------------------------------------------
  Result
    Output: remote_scan.id, CASE ((nextval('test_seq'::regclass) % '2'::bigint) = 0) WHEN CASE_TEST_EXPR THEN remote_scan."case" ELSE 1 END
@@ -227,22 +235,180 @@ ORDER BY t1.id;
                Task Count: 4
                Tasks Shown: One of 4
                ->  Task
-                     Query: SELECT worker_column_1 AS id, worker_column_2 AS "case" FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
+                     Query: SELECT worker_column_1 AS id, worker_column_2 AS "case" FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
                      Node: host=localhost port=xxxxx dbname=regression
                      ->  Hash Right Join
                            Output: t1.id, t2.a2
                            Inner Unique: true
                            Hash Cond: (t2.account_id = t1.id)
-                           ->  Seq Scan on issue_7705.t2_30070004 t2
+                           ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                 Output: t2.id, t2.account_id, t2.a2
+                           ->  Hash
+                                 Output: t1.id
+                                 ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                       Output: t1.id
+(22 rows)
+
+-- Issue #7787: count distinct of a column from the inner side of a
+-- left outer join will have a non-empty varnullingrels in the query
+-- tree returned by Postgres 16+, so ensure this is not reflected in
+-- the worker subquery constructed by Citus; it has just one relation,
+-- for the pushed down subquery.
+SELECT COUNT(DISTINCT a2)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
+ count
+---------------------------------------------------------------------
+     2
+(1 row)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT COUNT(DISTINCT a2)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
+                                                                                                                                         QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   Output: count(DISTINCT remote_scan.count)
+   ->  Custom Scan (Citus Adaptive)
+         Output: remote_scan.count
+         Task Count: 4
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT worker_column_1 AS count FROM (SELECT t2.a2 AS worker_column_1 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery GROUP BY worker_column_1
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  HashAggregate
+                     Output: t2.a2
+                     Group Key: t2.a2
+                     ->  Hash Right Join
+                           Output: t2.a2
+                           Inner Unique: true
+                           Hash Cond: (t2.account_id = t1.id)
+                           ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                 Output: t2.id, t2.account_id, t2.a2
+                           ->  Hash
+                                 Output: t1.id
+                                 ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                       Output: t1.id
+(22 rows)
+
+-- Issue #7787 also occurs with a HAVING clause
+SELECT 1
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+HAVING COUNT(DISTINCT a2) > 1;
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT 1
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+HAVING COUNT(DISTINCT a2) > 1;
+                                                                                                                                                QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   Output: remote_scan."?column?"
+   Filter: (count(DISTINCT remote_scan.worker_column_2) > 1)
+   ->  Custom Scan (Citus Adaptive)
+         Output: remote_scan."?column?", remote_scan.worker_column_2
+         Task Count: 4
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT 1, worker_column_1 AS worker_column_2 FROM (SELECT t2.a2 AS worker_column_1 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery GROUP BY worker_column_1
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  HashAggregate
+                     Output: 1, t2.a2
+                     Group Key: t2.a2
+                     ->  Hash Right Join
+                           Output: t2.a2
+                           Inner Unique: true
+                           Hash Cond: (t2.account_id = t1.id)
+                           ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
                                  Output: t2.id, t2.account_id, t2.a2
                            ->  Hash
                                  Output: t1.id
-                                 ->  Seq Scan on issue_7705.t1_30070000 t1
+                                 ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                       Output: t1.id
+(23 rows)
+
+-- Check right outer join
+SELECT COUNT(DISTINCT a2)
+FROM t2 RIGHT OUTER JOIN t1 ON t2.account_id = t1.id;
+ count
+---------------------------------------------------------------------
+     2
+(1 row)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT COUNT(DISTINCT a2)
+FROM t2 RIGHT OUTER JOIN t1 ON t2.account_id = t1.id;
+                                                                                                                                          QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   Output: count(DISTINCT remote_scan.count)
+   ->  Custom Scan (Citus Adaptive)
+         Output: remote_scan.count
+         Task Count: 4
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT worker_column_1 AS count FROM (SELECT t2.a2 AS worker_column_1 FROM (outer_join_columns_testing.t2_30070004 t2 RIGHT JOIN outer_join_columns_testing.t1_30070000 t1 ON ((t2.account_id OPERATOR(pg_catalog.=) t1.id)))) worker_subquery GROUP BY worker_column_1
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  HashAggregate
+                     Output: t2.a2
+                     Group Key: t2.a2
+                     ->  Hash Right Join
+                           Output: t2.a2
+                           Inner Unique: true
+                           Hash Cond: (t2.account_id = t1.id)
+                           ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                 Output: t2.id, t2.account_id, t2.a2
+                           ->  Hash
+                                 Output: t1.id
+                                 ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
                                        Output: t1.id
 (22 rows)
 
+-- Check both count distinct and having clause
+SELECT COUNT(DISTINCT a2)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+HAVING COUNT(DISTINCT t2.id) > 1;
+ count
+---------------------------------------------------------------------
+     2
+(1 row)
+
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT COUNT(DISTINCT a2)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+HAVING COUNT(DISTINCT t2.id) > 1;
+                                                                                                                                                                       QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   Output: count(DISTINCT remote_scan.count)
+   Filter: (count(DISTINCT remote_scan.worker_column_2) > 1)
+   ->  Custom Scan (Citus Adaptive)
+         Output: remote_scan.count, remote_scan.worker_column_2
+         Task Count: 4
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT worker_column_1 AS count, worker_column_2 FROM (SELECT t2.a2 AS worker_column_1, t2.id AS worker_column_2 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery GROUP BY worker_column_1, worker_column_2
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  HashAggregate
+                     Output: t2.a2, t2.id
+                     Group Key: t2.a2, t2.id
+                     ->  Hash Right Join
+                           Output: t2.a2, t2.id
+                           Inner Unique: true
+                           Hash Cond: (t2.account_id = t1.id)
+                           ->  Seq Scan on outer_join_columns_testing.t2_30070004 t2
+                                 Output: t2.id, t2.account_id, t2.a2
+                           ->  Hash
+                                 Output: t1.id
+                                 ->  Seq Scan on outer_join_columns_testing.t1_30070000 t1
+                                       Output: t1.id
+(23 rows)
+
 --- cleanup
 \set VERBOSITY TERSE
-DROP SCHEMA issue_7705 CASCADE;
+DROP SCHEMA outer_join_columns_testing CASCADE;
 NOTICE:  drop cascades to 4 other objects
 RESET all;
diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule
index ae9d9ac6ccd..db27941712f 100644
--- a/src/test/regress/multi_schedule
+++ b/src/test/regress/multi_schedule
@@ -84,7 +84,7 @@ test: forcedelegation_functions system_queries
 # this should be run alone as it gets too many clients
 test: join_pushdown
 test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc statement_cancel_error_message
-test: multi_agg_distinct multi_limit_clause_approximate multi_outer_join_reference multi_single_relation_subquery multi_prepare_plsql set_role_in_transaction
+test: multi_agg_distinct multi_limit_clause_approximate multi_outer_join_reference multi_outer_join_columns multi_single_relation_subquery multi_prepare_plsql set_role_in_transaction
 test: multi_reference_table multi_select_for_update relation_access_tracking pg13_with_ties
 test: custom_aggregate_support aggregate_support tdigest_aggregate_support
 test: multi_average_expression multi_working_columns multi_having_pushdown having_subquery
@@ -103,7 +103,7 @@ test: multi_dropped_column_aliases foreign_key_restriction_enforcement
 test: binary_protocol
 test: alter_table_set_access_method
 test: alter_distributed_table
-test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_7477 issue_7705
+test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_7477
 test: object_propagation_debug
 test: undistribute_table
 test: run_command_on_all_nodes
diff --git a/src/test/regress/sql/issue_7705.sql b/src/test/regress/sql/multi_outer_join_columns.sql
similarity index 62%
rename from src/test/regress/sql/issue_7705.sql
rename to src/test/regress/sql/multi_outer_join_columns.sql
index 950933017ad..eec111cb547 100644
--- a/src/test/regress/sql/issue_7705.sql
+++ b/src/test/regress/sql/multi_outer_join_columns.sql
@@ -2,11 +2,16 @@
 --- do not cause issues for the postgres planner, in particular postgres versions 16+, where the
 --- varnullingrels field of a VAR node may contain relids of join relations that can make the var
 --- NULL; in a rewritten distributed query without a join such relids do not have a meaning.
---- Issue #7705: [SEGFAULT] Querying distributed tables with window partition causes segmentation fault
---- https://github.com/citusdata/citus/issues/7705
 
-CREATE SCHEMA issue_7705;
-SET search_path to 'issue_7705';
+-- This test has an alternative goldfile because of the following feature in Postgres 16:
+-- https://github.com/postgres/postgres/commit/1349d2790bf48a4de072931c722f39337e72055e
+--
+
+SHOW server_version \gset
+SELECT substring(:'server_version', '\d+')::int >= 16 AS server_version_ge_16;
+
+CREATE SCHEMA outer_join_columns_testing;
+SET search_path to 'outer_join_columns_testing';
 SET citus.next_shard_id TO 30070000;
 SET citus.shard_replication_factor TO 1;
 SET citus.enable_local_execution TO ON;
@@ -66,7 +71,43 @@ SELECT t1.id, CASE nextval('test_seq') %2 = 0 WHEN true THEN t2.a2 ELSE 1 END
 FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
 ORDER BY t1.id;
 
+-- Issue #7787: count distinct of a column from the inner side of a
+-- left outer join will have a non-empty varnullingrels in the query
+-- tree returned by Postgres 16+, so ensure this is not reflected in
+-- the worker subquery constructed by Citus; it has just one relation,
+-- for the pushed down subquery.
+SELECT COUNT(DISTINCT a2)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT COUNT(DISTINCT a2)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
+
+-- Issue #7787 also occurs with a HAVING clause
+SELECT 1
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+HAVING COUNT(DISTINCT a2) > 1;
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT 1
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+HAVING COUNT(DISTINCT a2) > 1;
+
+-- Check right outer join
+SELECT COUNT(DISTINCT a2)
+FROM t2 RIGHT OUTER JOIN t1 ON t2.account_id = t1.id;
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT COUNT(DISTINCT a2)
+FROM t2 RIGHT OUTER JOIN t1 ON t2.account_id = t1.id;
+
+-- Check both count distinct and having clause
+SELECT COUNT(DISTINCT a2)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+HAVING COUNT(DISTINCT t2.id) > 1;
+EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
+SELECT COUNT(DISTINCT a2)
+FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
+HAVING COUNT(DISTINCT t2.id) > 1;
+
 --- cleanup
 \set VERBOSITY TERSE
-DROP SCHEMA issue_7705 CASCADE;
+DROP SCHEMA outer_join_columns_testing CASCADE;
 RESET all;