diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 33bba98d5c5..9c0b011f0d3 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -68,7 +68,7 @@ USER citus # build postgres versions separately for effective parrallelism and caching of already built versions when changing only certain versions FROM base AS pg14 -RUN MAKEFLAGS="-j $(nproc)" pgenv build 14.12 +RUN MAKEFLAGS="-j $(nproc)" pgenv build 14.15 RUN rm .pgenv/src/*.tar* RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/src/include install @@ -80,7 +80,7 @@ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/ RUN rm .pgenv-staging/config/default.conf FROM base AS pg15 -RUN MAKEFLAGS="-j $(nproc)" pgenv build 15.7 +RUN MAKEFLAGS="-j $(nproc)" pgenv build 15.10 RUN rm .pgenv/src/*.tar* RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/src/include install @@ -92,7 +92,7 @@ RUN cp -r .pgenv/src .pgenv/pgsql-* .pgenv/config .pgenv-staging/ RUN rm .pgenv-staging/config/default.conf FROM base AS pg16 -RUN MAKEFLAGS="-j $(nproc)" pgenv build 16.3 +RUN MAKEFLAGS="-j $(nproc)" pgenv build 16.6 RUN rm .pgenv/src/*.tar* RUN make -C .pgenv/src/postgresql-*/ clean RUN make -C .pgenv/src/postgresql-*/src/include install @@ -211,7 +211,7 @@ COPY --chown=citus:citus .psqlrc . RUN sudo chown --from=root:root citus:citus -R ~ # sets default pg version -RUN pgenv switch 16.3 +RUN pgenv switch 16.6 # make connecting to the coordinator easy ENV PGPORT=9700 diff --git a/.github/actions/save_logs_and_results/action.yml b/.github/actions/save_logs_and_results/action.yml index 0f238835d19..b344c68f2ef 100644 --- a/.github/actions/save_logs_and_results/action.yml +++ b/.github/actions/save_logs_and_results/action.yml @@ -6,7 +6,7 @@ inputs: runs: using: composite steps: - - uses: actions/upload-artifact@v3.1.1 + - uses: actions/upload-artifact@v4.6.0 name: Upload logs with: name: ${{ inputs.folder }} diff --git a/.github/actions/setup_extension/action.yml b/.github/actions/setup_extension/action.yml index 96b408e7e43..33129f17de6 100644 --- a/.github/actions/setup_extension/action.yml +++ b/.github/actions/setup_extension/action.yml @@ -17,7 +17,7 @@ runs: echo "PG_MAJOR=${{ inputs.pg_major }}" >> $GITHUB_ENV fi shell: bash - - uses: actions/download-artifact@v3.0.1 + - uses: actions/download-artifact@v4.1.8 with: name: build-${{ env.PG_MAJOR }} - name: Install Extension diff --git a/.github/actions/upload_coverage/action.yml b/.github/actions/upload_coverage/action.yml index 0b5f581a6a4..ba80ba63afa 100644 --- a/.github/actions/upload_coverage/action.yml +++ b/.github/actions/upload_coverage/action.yml @@ -21,7 +21,7 @@ runs: mkdir -p /tmp/codeclimate cc-test-reporter format-coverage -t lcov -o /tmp/codeclimate/${{ inputs.flags }}.json lcov.info shell: bash - - uses: actions/upload-artifact@v3.1.1 + - uses: actions/upload-artifact@v4.6.0 with: path: "/tmp/codeclimate/*.json" - name: codeclimate + name: codeclimate-${{ inputs.flags }} diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 70bc0bcb9a6..32c761766d5 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -31,14 +31,14 @@ jobs: pgupgrade_image_name: "ghcr.io/citusdata/pgupgradetester" style_checker_image_name: "ghcr.io/citusdata/stylechecker" style_checker_tools_version: "0.8.18" - sql_snapshot_pg_version: "16.3" - image_suffix: "-v13fd57c" - pg14_version: '{ "major": "14", "full": "14.12" }' - pg15_version: '{ "major": "15", "full": "15.7" }' - pg16_version: '{ "major": "16", "full": "16.3" }' - upgrade_pg_versions: "14.12-15.7-16.3" + sql_snapshot_pg_version: "16.6" + image_suffix: "-v5779674" + pg14_version: '{ "major": "14", "full": "14.15" }' + pg15_version: '{ "major": "15", "full": "15.10" }' + pg16_version: '{ "major": "16", "full": "16.6" }' + upgrade_pg_versions: "14.15-15.10-16.6" steps: - # Since GHA jobs needs at least one step we use a noop step here. + # Since GHA jobs need at least one step we use a noop step here. - name: Set up parameters run: echo 'noop' check-sql-snapshots: @@ -48,7 +48,7 @@ jobs: image: ${{ needs.params.outputs.build_image_name }}:${{ needs.params.outputs.sql_snapshot_pg_version }}${{ needs.params.outputs.image_suffix }} options: --user root steps: - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@v4 - name: Check Snapshots run: | git config --global --add safe.directory ${GITHUB_WORKSPACE} @@ -125,7 +125,7 @@ jobs: - name: Build run: "./ci/build-citus.sh" shell: bash - - uses: actions/upload-artifact@v3.1.1 + - uses: actions/upload-artifact@v4.6.0 with: name: build-${{ env.PG_MAJOR }} path: |- @@ -284,10 +284,12 @@ jobs: check-arbitrary-configs parallel=4 CONFIGS=$TESTS - uses: "./.github/actions/save_logs_and_results" if: always() + with: + folder: ${{ env.PG_MAJOR }}_arbitrary_configs_${{ matrix.parallel }} - uses: "./.github/actions/upload_coverage" if: always() with: - flags: ${{ env.pg_major }}_upgrade + flags: ${{ env.PG_MAJOR }}_arbitrary_configs_${{ matrix.parallel }} codecov_token: ${{ secrets.CODECOV_TOKEN }} test-pg-upgrade: name: PG${{ matrix.old_pg_major }}-PG${{ matrix.new_pg_major }} - check-pg-upgrade @@ -335,6 +337,8 @@ jobs: if: failure() - uses: "./.github/actions/save_logs_and_results" if: always() + with: + folder: ${{ env.old_pg_major }}_${{ env.new_pg_major }}_upgrade - uses: "./.github/actions/upload_coverage" if: always() with: @@ -380,10 +384,12 @@ jobs: done; - uses: "./.github/actions/save_logs_and_results" if: always() + with: + folder: ${{ env.PG_MAJOR }}_citus_upgrade - uses: "./.github/actions/upload_coverage" if: always() with: - flags: ${{ env.pg_major }}_upgrade + flags: ${{ env.PG_MAJOR }}_citus_upgrade codecov_token: ${{ secrets.CODECOV_TOKEN }} upload-coverage: if: always() @@ -399,10 +405,11 @@ jobs: - test-citus-upgrade - test-pg-upgrade steps: - - uses: actions/download-artifact@v3.0.1 + - uses: actions/download-artifact@v4.1.8 with: - name: "codeclimate" - path: "codeclimate" + pattern: codeclimate* + path: codeclimate + merge-multiple: true - name: Upload coverage results to Code Climate run: |- cc-test-reporter sum-coverage codeclimate/*.json -o total.json @@ -516,6 +523,7 @@ jobs: matrix: ${{ fromJson(needs.prepare_parallelization_matrix_32.outputs.json) }} steps: - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4.1.8 - uses: "./.github/actions/setup_extension" - name: Run minimal tests run: |- diff --git a/.github/workflows/flaky_test_debugging.yml b/.github/workflows/flaky_test_debugging.yml index 7135f99fa02..812fbe2418a 100644 --- a/.github/workflows/flaky_test_debugging.yml +++ b/.github/workflows/flaky_test_debugging.yml @@ -34,7 +34,7 @@ jobs: echo "PG_MAJOR=${PG_MAJOR}" >> $GITHUB_ENV ./ci/build-citus.sh shell: bash - - uses: actions/upload-artifact@v3.1.1 + - uses: actions/upload-artifact@v4.6.0 with: name: build-${{ env.PG_MAJOR }} path: |- diff --git a/.github/workflows/packaging-test-pipelines.yml b/.github/workflows/packaging-test-pipelines.yml index 26b5cfc9593..db0fd08efd0 100644 --- a/.github/workflows/packaging-test-pipelines.yml +++ b/.github/workflows/packaging-test-pipelines.yml @@ -116,7 +116,6 @@ jobs: # for each deb based image and we use POSTGRES_VERSION to set # PG_CONFIG variable in each of those runs. packaging_docker_image: - - debian-buster-all - debian-bookworm-all - debian-bullseye-all - ubuntu-focal-all @@ -130,7 +129,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set pg_config path and python parameters for deb based distros run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 78d1d2a7c24..ee3f2d0a203 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,51 @@ +### citus v13.0.1 (February 4th, 2025) ### + +* Drops support for PostgreSQL 14 (#7753) + +### citus v13.0.0 (January 17, 2025) ### + +* Adds support for PostgreSQL 17 (#7699, #7661) + +* Adds `JSON_TABLE()` support in distributed queries (#7816) + +* Propagates `MERGE ... WHEN NOT MATCHED BY SOURCE` (#7807) + +* Propagates `MEMORY` and `SERIALIZE` options of `EXPLAIN` (#7802) + +* Adds support for identity columns in distributed partitioned tables (#7785) + +* Allows specifying an access method for distributed partitioned tables (#7818) + +* Allows exclusion constraints on distributed partitioned tables (#7733) + +* Allows configuring sslnegotiation using `citus.node_conn_info` (#7821) + +* Avoids wal receiver timeouts during large shard splits (#7229) + +* Fixes a bug causing incorrect writing of data to target `MERGE` repartition + command (#7659) + +* Fixes a crash that happens because of unsafe catalog access when re-assigning + the global pid after `application_name` changes (#7791) + +* Fixes incorrect `VALID UNTIL` setting assumption made for roles when syncing + them to new nodes (#7534) + +* Fixes segfault when calling distributed procedure with a parameterized + distribution argument (#7242) + +* Fixes server crash when trying to execute `activate_node_snapshot()` on a + single-node cluster (#7552) + +* Improves `citus_move_shard_placement()` to fail early if there is a new node + without reference tables yet (#7467) + +### citus v12.1.6 (Nov 14, 2024) ### + +* Propagates `SECURITY LABEL .. ON ROLE` statements (#7304) + +* Fixes crash caused by running queries with window partition (#7718) + ### citus v12.1.5 (July 17, 2024) ### * Adds support for MERGE commands with single shard distributed target tables diff --git a/README.md b/README.md index 2cf17098ffc..44fd65f50a2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -| **
The Citus database is 100% open source.

Learn what's new in the [Citus 12.1 release blog](https://www.citusdata.com/blog/2023/09/22/adding-postgres-16-support-to-citus-12-1/) and the [Citus Updates page](https://www.citusdata.com/updates/).

**| +| **
The Citus database is 100% open source.

Learn what's new in the [Citus 13.0 release blog](https://www.citusdata.com/blog/2025/02/06/distribute-postgresql-17-with-citus-13/) and the [Citus Updates page](https://www.citusdata.com/updates/).

**| |---|
@@ -95,14 +95,14 @@ Install packages on Ubuntu / Debian: ```bash curl https://install.citusdata.com/community/deb.sh > add-citus-repo.sh sudo bash add-citus-repo.sh -sudo apt-get -y install postgresql-16-citus-12.1 +sudo apt-get -y install postgresql-17-citus-13.0 ``` -Install packages on CentOS / Red Hat: +Install packages on Red Hat: ```bash curl https://install.citusdata.com/community/rpm.sh > add-citus-repo.sh sudo bash add-citus-repo.sh -sudo yum install -y citus121_16 +sudo yum install -y citus130_17 ``` To add Citus to your local PostgreSQL database, add the following to `postgresql.conf`: diff --git a/citus-tools b/citus-tools deleted file mode 160000 index 3376bd6845f..00000000000 --- a/citus-tools +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 3376bd6845f0614908ed304f5033bd644c82d3bf diff --git a/src/backend/columnar/columnar_tableam.c b/src/backend/columnar/columnar_tableam.c index ca3a5f4c4aa..fd3d171c6f5 100644 --- a/src/backend/columnar/columnar_tableam.c +++ b/src/backend/columnar/columnar_tableam.c @@ -3021,6 +3021,8 @@ AvailableExtensionVersionColumnar(void) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("citus extension is not found"))); + + return NULL; /* keep compiler happy */ } diff --git a/src/backend/distributed/metadata/metadata_cache.c b/src/backend/distributed/metadata/metadata_cache.c index 402dedb8a91..4f1b942a085 100644 --- a/src/backend/distributed/metadata/metadata_cache.c +++ b/src/backend/distributed/metadata/metadata_cache.c @@ -2522,6 +2522,8 @@ AvailableExtensionVersion(void) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("citus extension is not found"))); + + return NULL; /* keep compiler happy */ } diff --git a/src/backend/distributed/planner/insert_select_planner.c b/src/backend/distributed/planner/insert_select_planner.c index 60d6ce466ca..15588025396 100644 --- a/src/backend/distributed/planner/insert_select_planner.c +++ b/src/backend/distributed/planner/insert_select_planner.c @@ -1810,6 +1810,8 @@ CastExpr(Expr *expr, Oid sourceType, Oid targetType, Oid targetCollation, ereport(ERROR, (errmsg("could not find a conversion path from type %d to %d", sourceType, targetType))); } + + return NULL; /* keep compiler happy */ } diff --git a/src/backend/distributed/planner/multi_explain.c b/src/backend/distributed/planner/multi_explain.c index 4584e774024..db30f4b6045 100644 --- a/src/backend/distributed/planner/multi_explain.c +++ b/src/backend/distributed/planner/multi_explain.c @@ -190,6 +190,14 @@ PG_FUNCTION_INFO_V1(worker_save_query_explain_analyze); void CitusExplainScan(CustomScanState *node, List *ancestors, struct ExplainState *es) { +#if PG_VERSION_NUM >= PG_VERSION_16 + if (es->generic) + { + ereport(ERROR, (errmsg( + "EXPLAIN GENERIC_PLAN is currently not supported for Citus tables"))); + } +#endif + CitusScanState *scanState = (CitusScanState *) node; DistributedPlan *distributedPlan = scanState->distributedPlan; EState *executorState = ScanStateGetExecutorState(scanState); @@ -992,18 +1000,12 @@ BuildRemoteExplainQuery(char *queryString, ExplainState *es) appendStringInfo(explainQuery, "EXPLAIN (ANALYZE %s, VERBOSE %s, " "COSTS %s, BUFFERS %s, WAL %s, " -#if PG_VERSION_NUM >= PG_VERSION_16 - "GENERIC_PLAN %s, " -#endif "TIMING %s, SUMMARY %s, FORMAT %s) %s", es->analyze ? "TRUE" : "FALSE", es->verbose ? "TRUE" : "FALSE", es->costs ? "TRUE" : "FALSE", es->buffers ? "TRUE" : "FALSE", es->wal ? "TRUE" : "FALSE", -#if PG_VERSION_NUM >= PG_VERSION_16 - es->generic ? "TRUE" : "FALSE", -#endif es->timing ? "TRUE" : "FALSE", es->summary ? "TRUE" : "FALSE", formatStr, diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 371ba54e650..28680deb036 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -1557,9 +1557,10 @@ MasterAggregateMutator(Node *originalNode, MasterAggregateWalkerContext *walkerC } else if (IsA(originalNode, Var)) { - Var *newColumn = copyObject((Var *) originalNode); - newColumn->varno = masterTableId; - newColumn->varattno = walkerContext->columnId; + Var *origColumn = (Var *) originalNode; + Var *newColumn = makeVar(masterTableId, walkerContext->columnId, + origColumn->vartype, origColumn->vartypmod, + origColumn->varcollid, origColumn->varlevelsup); walkerContext->columnId++; newNode = (Node *) newColumn; diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index bd65fa60c01..6d26b802f64 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -2890,14 +2890,27 @@ ApplicationNameAssignHook(const char *newval, void *extra) DetermineCitusBackendType(newval); /* - * AssignGlobalPID might read from catalog tables to get the the local - * nodeid. But ApplicationNameAssignHook might be called before catalog - * access is available to the backend (such as in early stages of - * authentication). We use StartupCitusBackend to initialize the global pid - * after catalogs are available. After that happens this hook becomes - * responsible to update the global pid on later application_name changes. - * So we set the FinishedStartupCitusBackend flag in StartupCitusBackend to - * indicate when this responsibility handoff has happened. + * We use StartupCitusBackend to initialize the global pid after catalogs + * are available. After that happens this hook becomes responsible to update + * the global pid on later application_name changes. So we set the + * FinishedStartupCitusBackend flag in StartupCitusBackend to indicate when + * this responsibility handoff has happened. + * + * Also note that when application_name changes, we don't actually need to + * try re-assigning the global pid for external client backends and + * background workers because application_name doesn't affect the global + * pid for such backends - note that !IsExternalClientBackend() check covers + * both types of backends. Plus, + * trying to re-assign the global pid for such backends would unnecessarily + * cause performing a catalog access when the cached local node id is + * invalidated. However, accessing to the catalog tables is dangerous in + * certain situations like when we're not in a transaction block. And for + * the other types of backends, i.e., the Citus internal backends, we need + * to re-assign the global pid when the application_name changes because for + * such backends we simply extract the global pid inherited from the + * originating backend from the application_name -that's specified by + * originating backend when openning that connection- and this doesn't require + * catalog access. * * Another solution to the catalog table acccess problem would be to update * global pid lazily, like we do for HideShards. But that's not possible @@ -2907,7 +2920,7 @@ ApplicationNameAssignHook(const char *newval, void *extra) * as reasonably possible, which is also why we extract global pids in the * AuthHook already (extracting doesn't require catalog access). */ - if (FinishedStartupCitusBackend) + if (FinishedStartupCitusBackend && !IsExternalClientBackend()) { AssignGlobalPID(newval); } diff --git a/src/backend/distributed/test/run_from_same_connection.c b/src/backend/distributed/test/run_from_same_connection.c index 52b2e0b181b..d22ee442834 100644 --- a/src/backend/distributed/test/run_from_same_connection.c +++ b/src/backend/distributed/test/run_from_same_connection.c @@ -190,6 +190,9 @@ run_commands_on_session_level_connection_to_node(PG_FUNCTION_ARGS) /* * override_backend_data_gpid is a wrapper around SetBackendDataGpid(). + * Also sets distributedCommandOriginator to true since the only caller of + * this method calls this function actually wants this backend to + * be treated as a distributed command originator with the given global pid. */ Datum override_backend_data_gpid(PG_FUNCTION_ARGS) @@ -199,6 +202,7 @@ override_backend_data_gpid(PG_FUNCTION_ARGS) uint64 gpid = PG_GETARG_INT64(0); SetBackendDataGlobalPID(gpid); + SetBackendDataDistributedCommandOriginator(true); PG_RETURN_VOID(); } diff --git a/src/backend/distributed/transaction/backend_data.c b/src/backend/distributed/transaction/backend_data.c index 67acadd2940..9b6e7d122ba 100644 --- a/src/backend/distributed/transaction/backend_data.c +++ b/src/backend/distributed/transaction/backend_data.c @@ -855,6 +855,16 @@ GetCurrentDistributedTransactionId(void) void AssignDistributedTransactionId(void) { + /* + * MyBackendData should always be available. However, we observed some + * crashes where certain hooks were not executed. + * Bug 3697586: Server crashes when assigning distributed transaction + */ + if (!MyBackendData) + { + ereport(ERROR, (errmsg("backend is not ready for distributed transactions"))); + } + pg_atomic_uint64 *transactionNumberSequence = &backendManagementShmemData->nextTransactionNumber; @@ -964,6 +974,23 @@ SetBackendDataGlobalPID(uint64 gpid) } +/* + * SetBackendDataDistributedCommandOriginator sets the distributedCommandOriginator + * field on MyBackendData. + */ +void +SetBackendDataDistributedCommandOriginator(bool distributedCommandOriginator) +{ + if (!MyBackendData) + { + return; + } + SpinLockAcquire(&MyBackendData->mutex); + MyBackendData->distributedCommandOriginator = distributedCommandOriginator; + SpinLockRelease(&MyBackendData->mutex); +} + + /* * GetGlobalPID returns the global process id of the current backend. */ diff --git a/src/include/distributed/backend_data.h b/src/include/distributed/backend_data.h index 8014fe5a6a9..5b3fcf2ac22 100644 --- a/src/include/distributed/backend_data.h +++ b/src/include/distributed/backend_data.h @@ -61,6 +61,7 @@ extern void AssignGlobalPID(const char *applicationName); extern uint64 GetGlobalPID(void); extern void SetBackendDataDatabaseId(void); extern void SetBackendDataGlobalPID(uint64 gpid); +extern void SetBackendDataDistributedCommandOriginator(bool distributedCommandOriginator); extern uint64 ExtractGlobalPID(const char *applicationName); extern int ExtractNodeIdFromGlobalPID(uint64 globalPID, bool missingOk); extern int ExtractProcessIdFromGlobalPID(uint64 globalPID); diff --git a/src/test/regress/citus_tests/upgrade/citus_upgrade_test.py b/src/test/regress/citus_tests/upgrade/citus_upgrade_test.py index 1ab44803117..c25a3448238 100755 --- a/src/test/regress/citus_tests/upgrade/citus_upgrade_test.py +++ b/src/test/regress/citus_tests/upgrade/citus_upgrade_test.py @@ -62,10 +62,16 @@ def run_citus_upgrade_tests(config, before_upgrade_schedule, after_upgrade_sched install_citus(config.post_tar_path) + # disable 2pc recovery for all nodes to work around https://github.com/citusdata/citus/issues/7875 + disable_2pc_recovery_for_all_nodes(config.bindir, config) + restart_databases(config.bindir, config.datadir, config.mixed_mode, config) run_alter_citus(config.bindir, config.mixed_mode, config) verify_upgrade(config, config.mixed_mode, config.node_name_to_ports.values()) + # re-enable 2pc recovery for all nodes + enable_2pc_recovery_for_all_nodes(config.bindir, config) + run_test_on_coordinator(config, after_upgrade_schedule) remove_citus(config.post_tar_path) @@ -146,6 +152,18 @@ def restart_database(pg_path, abs_data_path, node_name, node_ports, logfile_pref subprocess.run(command, check=True) +def disable_2pc_recovery_for_all_nodes(pg_path, config): + for port in config.node_name_to_ports.values(): + utils.psql(pg_path, port, "ALTER SYSTEM SET citus.recover_2pc_interval TO -1;") + utils.psql(pg_path, port, "SELECT pg_reload_conf();") + + +def enable_2pc_recovery_for_all_nodes(pg_path, config): + for port in config.node_name_to_ports.values(): + utils.psql(pg_path, port, "ALTER SYSTEM RESET citus.recover_2pc_interval;") + utils.psql(pg_path, port, "SELECT pg_reload_conf();") + + def run_alter_citus(pg_path, mixed_mode, config): for port in config.node_name_to_ports.values(): if mixed_mode and port in ( diff --git a/src/test/regress/expected/issue_7705.out b/src/test/regress/expected/issue_7705.out new file mode 100644 index 00000000000..20b078226ad --- /dev/null +++ b/src/test/regress/expected/issue_7705.out @@ -0,0 +1,248 @@ +--- Test for verifying that column references (var nodes) in targets that cannot be pushed down +--- do not cause issues for the postgres planner, in particular postgres versions 16+, where the +--- varnullingrels field of a VAR node may contain relids of join relations that can make the var +--- NULL; in a rewritten distributed query without a join such relids do not have a meaning. +--- Issue #7705: [SEGFAULT] Querying distributed tables with window partition causes segmentation fault +--- https://github.com/citusdata/citus/issues/7705 +CREATE SCHEMA issue_7705; +SET search_path to 'issue_7705'; +SET citus.next_shard_id TO 30070000; +SET citus.shard_replication_factor TO 1; +SET citus.enable_local_execution TO ON; +CREATE TABLE t1 (id INT PRIMARY KEY); +INSERT INTO t1 VALUES (1), (2); +CREATE TABLE t2 (id INT, account_id INT, a2 INT, PRIMARY KEY(id, account_id)); +INSERT INTO t2 VALUES (3, 1, 10), (4, 2, 20), (5, 1, NULL); +SELECT create_distributed_table('t1', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$issue_7705.t1$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('t2', 'account_id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$issue_7705.t2$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Test the issue seen in #7705; a target expression with +-- a window function that cannot be pushed down because the +-- partion by is not on the distribution column also includes +-- a column from the inner side of a left outer join, which +-- produces a non-empty varnullingrels set in PG 16 (and higher) +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; + id | max +--------------------------------------------------------------------- + 1 | 10 + 2 | 20 + 1 | +(3 rows) + +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; + QUERY PLAN +--------------------------------------------------------------------- + WindowAgg + Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3 + -> Sort + Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max + Sort Key: remote_scan.worker_column_3 + -> Custom Scan (Citus Adaptive) + Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery + Node: host=localhost port=xxxxx dbname=regression + -> Hash Right Join + Output: t1.id, t2.a2, t2.id + Inner Unique: true + Hash Cond: (t2.account_id = t1.id) + -> Seq Scan on issue_7705.t2_30070004 t2 + Output: t2.id, t2.account_id, t2.a2 + -> Hash + Output: t1.id + -> Seq Scan on issue_7705.t1_30070000 t1 + Output: t1.id +(22 rows) + +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id; + id | max +--------------------------------------------------------------------- + 1 | 10 + 2 | 20 + 1 | +(3 rows) + +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id; + QUERY PLAN +--------------------------------------------------------------------- + WindowAgg + Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3 + -> Sort + Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max + Sort Key: remote_scan.worker_column_3 + -> Custom Scan (Citus Adaptive) + Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (issue_7705.t2_30070004 t2 RIGHT JOIN issue_7705.t1_30070000 t1 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery + Node: host=localhost port=xxxxx dbname=regression + -> Hash Right Join + Output: t1.id, t2.a2, t2.id + Inner Unique: true + Hash Cond: (t2.account_id = t1.id) + -> Seq Scan on issue_7705.t2_30070004 t2 + Output: t2.id, t2.account_id, t2.a2 + -> Hash + Output: t1.id + -> Seq Scan on issue_7705.t1_30070000 t1 + Output: t1.id +(22 rows) + +SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; + id | max +--------------------------------------------------------------------- + 1 | + 1 | 10 + 2 | 20 +(3 rows) + +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate + Output: remote_scan.id, (max(remote_scan.max) OVER (?)), remote_scan.worker_column_3 + Group Key: remote_scan.id, max(remote_scan.max) OVER (?) + -> WindowAgg + Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3 + -> Sort + Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max + Sort Key: remote_scan.worker_column_3 + -> Custom Scan (Citus Adaptive) + Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery + Node: host=localhost port=xxxxx dbname=regression + -> Hash Right Join + Output: t1.id, t2.a2, t2.id + Inner Unique: true + Hash Cond: (t2.account_id = t1.id) + -> Seq Scan on issue_7705.t2_30070004 t2 + Output: t2.id, t2.account_id, t2.a2 + -> Hash + Output: t1.id + -> Seq Scan on issue_7705.t1_30070000 t1 + Output: t1.id +(25 rows) + +CREATE SEQUENCE test_seq START 101; +CREATE OR REPLACE FUNCTION TEST_F(int) returns INT language sql stable as $$ select $1 + 42; $$ ; +-- Issue #7705 also occurs if a target expression includes a column +-- of a distributed table that is on the inner side of a left outer +-- join and a call to nextval(), because nextval() cannot be pushed +-- down, and must be run on the coordinator +SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; + id | test_f +--------------------------------------------------------------------- + 1 | 153 + 1 | + 2 | 165 +(3 rows) + +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; + QUERY PLAN +--------------------------------------------------------------------- + Result + Output: remote_scan.id, ((remote_scan.test_f + (nextval('test_seq'::regclass))::integer) + 42) + -> Sort + Output: remote_scan.id, remote_scan.test_f + Sort Key: remote_scan.id + -> Custom Scan (Citus Adaptive) + Output: remote_scan.id, remote_scan.test_f + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Query: SELECT worker_column_1 AS id, worker_column_2 AS test_f FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery + Node: host=localhost port=xxxxx dbname=regression + -> Hash Right Join + Output: t1.id, t2.a2 + Inner Unique: true + Hash Cond: (t2.account_id = t1.id) + -> Seq Scan on issue_7705.t2_30070004 t2 + Output: t2.id, t2.account_id, t2.a2 + -> Hash + Output: t1.id + -> Seq Scan on issue_7705.t1_30070000 t1 + Output: t1.id +(22 rows) + +SELECT t1.id, CASE nextval('test_seq') % 2 = 0 WHEN true THEN t2.a2 ELSE 1 END +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; + id | case +--------------------------------------------------------------------- + 1 | 10 + 1 | 1 + 2 | 20 +(3 rows) + +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, CASE nextval('test_seq') %2 = 0 WHEN true THEN t2.a2 ELSE 1 END +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; + QUERY PLAN +--------------------------------------------------------------------- + Result + Output: remote_scan.id, CASE ((nextval('test_seq'::regclass) % '2'::bigint) = 0) WHEN CASE_TEST_EXPR THEN remote_scan."case" ELSE 1 END + -> Sort + Output: remote_scan.id, remote_scan."case" + Sort Key: remote_scan.id + -> Custom Scan (Citus Adaptive) + Output: remote_scan.id, remote_scan."case" + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Query: SELECT worker_column_1 AS id, worker_column_2 AS "case" FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery + Node: host=localhost port=xxxxx dbname=regression + -> Hash Right Join + Output: t1.id, t2.a2 + Inner Unique: true + Hash Cond: (t2.account_id = t1.id) + -> Seq Scan on issue_7705.t2_30070004 t2 + Output: t2.id, t2.account_id, t2.a2 + -> Hash + Output: t1.id + -> Seq Scan on issue_7705.t1_30070000 t1 + Output: t1.id +(22 rows) + +--- cleanup +\set VERBOSITY TERSE +DROP SCHEMA issue_7705 CASCADE; +NOTICE: drop cascades to 4 other objects +RESET all; diff --git a/src/test/regress/expected/pg16.out b/src/test/regress/expected/pg16.out index a035fcfc4a2..546c0a832de 100644 --- a/src/test/regress/expected/pg16.out +++ b/src/test/regress/expected/pg16.out @@ -81,29 +81,9 @@ SELECT create_distributed_table('tenk1', 'unique1'); (1 row) SET citus.log_remote_commands TO on; -EXPLAIN (GENERIC_PLAN) SELECT unique1 FROM tenk1 WHERE thousand = 1000; -NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing SAVEPOINT citus_explain_savepoint -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing EXPLAIN (ANALYZE FALSE, VERBOSE FALSE, COSTS TRUE, BUFFERS FALSE, WAL FALSE, GENERIC_PLAN TRUE, TIMING FALSE, SUMMARY FALSE, FORMAT TEXT) SELECT unique1 FROM pg16.tenk1_950001 tenk1 WHERE (thousand OPERATOR(pg_catalog.=) 1000) -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing ROLLBACK TO SAVEPOINT citus_explain_savepoint -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx -NOTICE: issuing COMMIT -DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx - QUERY PLAN ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) - Task Count: 1 - Tasks Shown: All - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Seq Scan on tenk1_950001 tenk1 (cost=0.00..35.50 rows=10 width=4) - Filter: (thousand = 1000) -(7 rows) - -EXPLAIN (GENERIC_PLAN, ANALYZE) SELECT unique1 FROM tenk1 WHERE thousand = 1000; +EXPLAIN (GENERIC_PLAN) SELECT unique1 FROM tenk1 WHERE thousand = $1; +ERROR: EXPLAIN GENERIC_PLAN is currently not supported for Citus tables +EXPLAIN (GENERIC_PLAN, ANALYZE) SELECT unique1 FROM tenk1 WHERE thousand = $1; ERROR: EXPLAIN options ANALYZE and GENERIC_PLAN cannot be used together SET citus.log_remote_commands TO off; -- Proper error when creating statistics without a name on a Citus table diff --git a/src/test/regress/expected/remove_coordinator.out b/src/test/regress/expected/remove_coordinator.out index 0226a7cd073..e2fd5df027b 100644 --- a/src/test/regress/expected/remove_coordinator.out +++ b/src/test/regress/expected/remove_coordinator.out @@ -5,6 +5,37 @@ SELECT master_remove_node('localhost', :master_port); (1 row) +-- to silence -potentially flaky- "could not establish connection after" warnings in below test +SET client_min_messages TO ERROR; +-- to fail fast when the hostname is not resolvable, as it will be the case below +SET citus.node_connection_timeout to '1s'; +BEGIN; + SET application_name TO 'new_app_name'; + -- that should fail because of bad hostname & port + SELECT citus_add_node('200.200.200.200', 1, 200); +ERROR: connection to the remote node postgres@200.200.200.200:1 failed + -- Since above command failed, now Postgres will need to revert the + -- application_name change made in this transaction and this will + -- happen within abort-transaction callback, so we won't be in a + -- transaction block while Postgres does that. + -- + -- And when the application_name changes, Citus tries to re-assign + -- the global pid but it does so only for Citus internal backends, + -- and doing so for Citus internal backends doesn't require being + -- in a transaction block and is safe. + -- + -- However, for the client external backends (like us here), Citus + -- doesn't re-assign the global pid because it's not needed and it's + -- not safe to do so outside of a transaction block. This is because, + -- it would require performing a catalog access to retrive the local + -- node id when the cached local node is invalidated like what just + -- happened here because of the failed citus_add_node() call made + -- above. + -- + -- So by failing here (rather than crashing), we ensure this behavior. +ROLLBACK; +RESET client_min_messages; +RESET citus.node_connection_timeout; -- restore coordinator for the rest of the tests SELECT citus_set_coordinator_host('localhost', :master_port); citus_set_coordinator_host diff --git a/src/test/regress/expected/upgrade_pg_dist_cleanup_after_0.out b/src/test/regress/expected/upgrade_pg_dist_cleanup_after_0.out index d71fad887c9..168c64ccaa3 100644 --- a/src/test/regress/expected/upgrade_pg_dist_cleanup_after_0.out +++ b/src/test/regress/expected/upgrade_pg_dist_cleanup_after_0.out @@ -28,3 +28,12 @@ SELECT * FROM pg_dist_cleanup; CALL citus_cleanup_orphaned_resources(); NOTICE: cleaned up 1 orphaned resources DROP TABLE table_with_orphaned_shards; +-- Re-enable automatic shard cleanup by maintenance daemon as +-- we have disabled it in upgrade_pg_dist_cleanup_before.sql +ALTER SYSTEM RESET citus.defer_shard_delete_interval; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + diff --git a/src/test/regress/expected/upgrade_pg_dist_cleanup_before_0.out b/src/test/regress/expected/upgrade_pg_dist_cleanup_before_0.out index a0cf9ceb1ea..dd6c8868e32 100644 --- a/src/test/regress/expected/upgrade_pg_dist_cleanup_before_0.out +++ b/src/test/regress/expected/upgrade_pg_dist_cleanup_before_0.out @@ -30,6 +30,23 @@ SELECT COUNT(*) FROM pg_dist_placement WHERE shardstate = 1 AND shardid IN (SELE (1 row) -- create an orphaned placement based on an existing one +-- +-- But before doing that, first disable automatic shard cleanup +-- by maintenance daemon so that we can reliably test the cleanup +-- in upgrade_pg_dist_cleanup_after.sql. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT pg_sleep(0.1); + pg_sleep +--------------------------------------------------------------------- + +(1 row) + INSERT INTO pg_dist_placement(placementid, shardid, shardstate, shardlength, groupid) SELECT nextval('pg_dist_placement_placementid_seq'::regclass), shardid, 4, shardlength, 3-groupid FROM pg_dist_placement diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 220ce1964f1..bbb4047a950 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -103,7 +103,7 @@ test: multi_dropped_column_aliases foreign_key_restriction_enforcement test: binary_protocol test: alter_table_set_access_method test: alter_distributed_table -test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_7477 +test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_7477 issue_7705 test: object_propagation_debug test: undistribute_table test: run_command_on_all_nodes diff --git a/src/test/regress/sql/issue_7705.sql b/src/test/regress/sql/issue_7705.sql new file mode 100644 index 00000000000..950933017ad --- /dev/null +++ b/src/test/regress/sql/issue_7705.sql @@ -0,0 +1,72 @@ +--- Test for verifying that column references (var nodes) in targets that cannot be pushed down +--- do not cause issues for the postgres planner, in particular postgres versions 16+, where the +--- varnullingrels field of a VAR node may contain relids of join relations that can make the var +--- NULL; in a rewritten distributed query without a join such relids do not have a meaning. +--- Issue #7705: [SEGFAULT] Querying distributed tables with window partition causes segmentation fault +--- https://github.com/citusdata/citus/issues/7705 + +CREATE SCHEMA issue_7705; +SET search_path to 'issue_7705'; +SET citus.next_shard_id TO 30070000; +SET citus.shard_replication_factor TO 1; +SET citus.enable_local_execution TO ON; + +CREATE TABLE t1 (id INT PRIMARY KEY); +INSERT INTO t1 VALUES (1), (2); + +CREATE TABLE t2 (id INT, account_id INT, a2 INT, PRIMARY KEY(id, account_id)); +INSERT INTO t2 VALUES (3, 1, 10), (4, 2, 20), (5, 1, NULL); + +SELECT create_distributed_table('t1', 'id'); +SELECT create_distributed_table('t2', 'account_id'); + +-- Test the issue seen in #7705; a target expression with +-- a window function that cannot be pushed down because the +-- partion by is not on the distribution column also includes +-- a column from the inner side of a left outer join, which +-- produces a non-empty varnullingrels set in PG 16 (and higher) +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; + +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id; +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id; + +SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; + +CREATE SEQUENCE test_seq START 101; +CREATE OR REPLACE FUNCTION TEST_F(int) returns INT language sql stable as $$ select $1 + 42; $$ ; + +-- Issue #7705 also occurs if a target expression includes a column +-- of a distributed table that is on the inner side of a left outer +-- join and a call to nextval(), because nextval() cannot be pushed +-- down, and must be run on the coordinator +SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int) +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; + +SELECT t1.id, CASE nextval('test_seq') % 2 = 0 WHEN true THEN t2.a2 ELSE 1 END +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; +EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) +SELECT t1.id, CASE nextval('test_seq') %2 = 0 WHEN true THEN t2.a2 ELSE 1 END +FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id +ORDER BY t1.id; + +--- cleanup +\set VERBOSITY TERSE +DROP SCHEMA issue_7705 CASCADE; +RESET all; diff --git a/src/test/regress/sql/pg16.sql b/src/test/regress/sql/pg16.sql index 99024edcba8..0312fcdff93 100644 --- a/src/test/regress/sql/pg16.sql +++ b/src/test/regress/sql/pg16.sql @@ -58,8 +58,8 @@ CREATE TABLE tenk1 ( SELECT create_distributed_table('tenk1', 'unique1'); SET citus.log_remote_commands TO on; -EXPLAIN (GENERIC_PLAN) SELECT unique1 FROM tenk1 WHERE thousand = 1000; -EXPLAIN (GENERIC_PLAN, ANALYZE) SELECT unique1 FROM tenk1 WHERE thousand = 1000; +EXPLAIN (GENERIC_PLAN) SELECT unique1 FROM tenk1 WHERE thousand = $1; +EXPLAIN (GENERIC_PLAN, ANALYZE) SELECT unique1 FROM tenk1 WHERE thousand = $1; SET citus.log_remote_commands TO off; -- Proper error when creating statistics without a name on a Citus table diff --git a/src/test/regress/sql/remove_coordinator.sql b/src/test/regress/sql/remove_coordinator.sql index b0df327d15f..35a8a57189b 100644 --- a/src/test/regress/sql/remove_coordinator.sql +++ b/src/test/regress/sql/remove_coordinator.sql @@ -1,5 +1,41 @@ -- removing coordinator from pg_dist_node should update pg_dist_colocation SELECT master_remove_node('localhost', :master_port); +-- to silence -potentially flaky- "could not establish connection after" warnings in below test +SET client_min_messages TO ERROR; + +-- to fail fast when the hostname is not resolvable, as it will be the case below +SET citus.node_connection_timeout to '1s'; + +BEGIN; + SET application_name TO 'new_app_name'; + + -- that should fail because of bad hostname & port + SELECT citus_add_node('200.200.200.200', 1, 200); + + -- Since above command failed, now Postgres will need to revert the + -- application_name change made in this transaction and this will + -- happen within abort-transaction callback, so we won't be in a + -- transaction block while Postgres does that. + -- + -- And when the application_name changes, Citus tries to re-assign + -- the global pid but it does so only for Citus internal backends, + -- and doing so for Citus internal backends doesn't require being + -- in a transaction block and is safe. + -- + -- However, for the client external backends (like us here), Citus + -- doesn't re-assign the global pid because it's not needed and it's + -- not safe to do so outside of a transaction block. This is because, + -- it would require performing a catalog access to retrive the local + -- node id when the cached local node is invalidated like what just + -- happened here because of the failed citus_add_node() call made + -- above. + -- + -- So by failing here (rather than crashing), we ensure this behavior. +ROLLBACK; + +RESET client_min_messages; +RESET citus.node_connection_timeout; + -- restore coordinator for the rest of the tests SELECT citus_set_coordinator_host('localhost', :master_port); diff --git a/src/test/regress/sql/upgrade_pg_dist_cleanup_after.sql b/src/test/regress/sql/upgrade_pg_dist_cleanup_after.sql index e84c35b608c..333ac60ca93 100644 --- a/src/test/regress/sql/upgrade_pg_dist_cleanup_after.sql +++ b/src/test/regress/sql/upgrade_pg_dist_cleanup_after.sql @@ -13,3 +13,8 @@ SELECT COUNT(*) FROM pg_dist_placement WHERE shardid IN (SELECT shardid FROM pg_ SELECT * FROM pg_dist_cleanup; CALL citus_cleanup_orphaned_resources(); DROP TABLE table_with_orphaned_shards; + +-- Re-enable automatic shard cleanup by maintenance daemon as +-- we have disabled it in upgrade_pg_dist_cleanup_before.sql +ALTER SYSTEM RESET citus.defer_shard_delete_interval; +SELECT pg_reload_conf(); diff --git a/src/test/regress/sql/upgrade_pg_dist_cleanup_before.sql b/src/test/regress/sql/upgrade_pg_dist_cleanup_before.sql index 62ec8a1fb46..ec0eef353dc 100644 --- a/src/test/regress/sql/upgrade_pg_dist_cleanup_before.sql +++ b/src/test/regress/sql/upgrade_pg_dist_cleanup_before.sql @@ -16,6 +16,16 @@ SELECT create_distributed_table('table_with_orphaned_shards', 'a'); -- show all 32 placements are active SELECT COUNT(*) FROM pg_dist_placement WHERE shardstate = 1 AND shardid IN (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='table_with_orphaned_shards'::regclass); -- create an orphaned placement based on an existing one +-- +-- But before doing that, first disable automatic shard cleanup +-- by maintenance daemon so that we can reliably test the cleanup +-- in upgrade_pg_dist_cleanup_after.sql. + +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + +SELECT pg_sleep(0.1); + INSERT INTO pg_dist_placement(placementid, shardid, shardstate, shardlength, groupid) SELECT nextval('pg_dist_placement_placementid_seq'::regclass), shardid, 4, shardlength, 3-groupid FROM pg_dist_placement