From 0aeafce8a0c258f5731c6a5aeab5a5c94755e6fc Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Fri, 20 Dec 2024 10:09:27 +0300 Subject: [PATCH] reland the alternative approach - works this time --- .../distributed/metadata/metadata_cache.c | 10 ----- src/backend/distributed/shared_library_init.c | 43 +++++++++---------- .../test/run_from_same_connection.c | 4 ++ .../distributed/transaction/backend_data.c | 17 ++++++++ src/include/distributed/backend_data.h | 1 + src/include/distributed/metadata_cache.h | 1 - .../regress/expected/remove_coordinator.out | 16 ++++--- src/test/regress/sql/remove_coordinator.sql | 16 ++++--- 8 files changed, 61 insertions(+), 47 deletions(-) diff --git a/src/backend/distributed/metadata/metadata_cache.c b/src/backend/distributed/metadata/metadata_cache.c index 3bf525dd8a6..4f1b942a085 100644 --- a/src/backend/distributed/metadata/metadata_cache.c +++ b/src/backend/distributed/metadata/metadata_cache.c @@ -4545,16 +4545,6 @@ GetLocalNodeId(void) } -/* - * CachedLocalNodeIdIsValid return true if the cached local node id is valid. - */ -bool -CachedLocalNodeIdIsValid(void) -{ - return LocalNodeId != -1; -} - - /* * RegisterLocalGroupIdCacheCallbacks registers the callbacks required to * maintain LocalGroupId at a consistent value. It's separate from diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index 9553600d701..6d26b802f64 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -2890,26 +2890,27 @@ ApplicationNameAssignHook(const char *newval, void *extra) DetermineCitusBackendType(newval); /* - * AssignGlobalPID might read from catalog tables to get the the local - * nodeid. But ApplicationNameAssignHook might be called before catalog - * access is available to the backend (such as in early stages of - * authentication). We use StartupCitusBackend to initialize the global pid - * after catalogs are available. After that happens this hook becomes - * responsible to update the global pid on later application_name changes. - * So we set the FinishedStartupCitusBackend flag in StartupCitusBackend to - * indicate when this responsibility handoff has happened. + * We use StartupCitusBackend to initialize the global pid after catalogs + * are available. After that happens this hook becomes responsible to update + * the global pid on later application_name changes. So we set the + * FinishedStartupCitusBackend flag in StartupCitusBackend to indicate when + * this responsibility handoff has happened. * - * On the other hand, even if now it's this hook's responsibility to update - * the global pid, we cannot do so if we might need to read from catalog - * but it's unsafe to do so. For Citus internal backends, this cannot be the - * case because in that case AssignGlobalPID() just extracts its global pid - * from the application_name and extracting doesn't require catalog access. - * But for external client backends, we either need to guarantee that we won't - * read from catalog tables or that it's safe to do so. The only case where - * AssignGlobalPID() could read from catalog tables is when the cached local - * node id is invalidated. So for this reason, for external client backends, - * we require that either the cached local node id is valid or that we are in - * a transaction block -because in that case it's safe to read from catalog. + * Also note that when application_name changes, we don't actually need to + * try re-assigning the global pid for external client backends and + * background workers because application_name doesn't affect the global + * pid for such backends - note that !IsExternalClientBackend() check covers + * both types of backends. Plus, + * trying to re-assign the global pid for such backends would unnecessarily + * cause performing a catalog access when the cached local node id is + * invalidated. However, accessing to the catalog tables is dangerous in + * certain situations like when we're not in a transaction block. And for + * the other types of backends, i.e., the Citus internal backends, we need + * to re-assign the global pid when the application_name changes because for + * such backends we simply extract the global pid inherited from the + * originating backend from the application_name -that's specified by + * originating backend when openning that connection- and this doesn't require + * catalog access. * * Another solution to the catalog table acccess problem would be to update * global pid lazily, like we do for HideShards. But that's not possible @@ -2919,9 +2920,7 @@ ApplicationNameAssignHook(const char *newval, void *extra) * as reasonably possible, which is also why we extract global pids in the * AuthHook already (extracting doesn't require catalog access). */ - if (FinishedStartupCitusBackend && - (!IsExternalClientBackend() || CachedLocalNodeIdIsValid() || - IsTransactionState())) + if (FinishedStartupCitusBackend && !IsExternalClientBackend()) { AssignGlobalPID(newval); } diff --git a/src/backend/distributed/test/run_from_same_connection.c b/src/backend/distributed/test/run_from_same_connection.c index 52b2e0b181b..d22ee442834 100644 --- a/src/backend/distributed/test/run_from_same_connection.c +++ b/src/backend/distributed/test/run_from_same_connection.c @@ -190,6 +190,9 @@ run_commands_on_session_level_connection_to_node(PG_FUNCTION_ARGS) /* * override_backend_data_gpid is a wrapper around SetBackendDataGpid(). + * Also sets distributedCommandOriginator to true since the only caller of + * this method calls this function actually wants this backend to + * be treated as a distributed command originator with the given global pid. */ Datum override_backend_data_gpid(PG_FUNCTION_ARGS) @@ -199,6 +202,7 @@ override_backend_data_gpid(PG_FUNCTION_ARGS) uint64 gpid = PG_GETARG_INT64(0); SetBackendDataGlobalPID(gpid); + SetBackendDataDistributedCommandOriginator(true); PG_RETURN_VOID(); } diff --git a/src/backend/distributed/transaction/backend_data.c b/src/backend/distributed/transaction/backend_data.c index 67acadd2940..85fb0f6cfb9 100644 --- a/src/backend/distributed/transaction/backend_data.c +++ b/src/backend/distributed/transaction/backend_data.c @@ -964,6 +964,23 @@ SetBackendDataGlobalPID(uint64 gpid) } +/* + * SetBackendDataDistributedCommandOriginator sets the distributedCommandOriginator + * field on MyBackendData. + */ +void +SetBackendDataDistributedCommandOriginator(bool distributedCommandOriginator) +{ + if (!MyBackendData) + { + return; + } + SpinLockAcquire(&MyBackendData->mutex); + MyBackendData->distributedCommandOriginator = distributedCommandOriginator; + SpinLockRelease(&MyBackendData->mutex); +} + + /* * GetGlobalPID returns the global process id of the current backend. */ diff --git a/src/include/distributed/backend_data.h b/src/include/distributed/backend_data.h index 8014fe5a6a9..5b3fcf2ac22 100644 --- a/src/include/distributed/backend_data.h +++ b/src/include/distributed/backend_data.h @@ -61,6 +61,7 @@ extern void AssignGlobalPID(const char *applicationName); extern uint64 GetGlobalPID(void); extern void SetBackendDataDatabaseId(void); extern void SetBackendDataGlobalPID(uint64 gpid); +extern void SetBackendDataDistributedCommandOriginator(bool distributedCommandOriginator); extern uint64 ExtractGlobalPID(const char *applicationName); extern int ExtractNodeIdFromGlobalPID(uint64 globalPID, bool missingOk); extern int ExtractProcessIdFromGlobalPID(uint64 globalPID); diff --git a/src/include/distributed/metadata_cache.h b/src/include/distributed/metadata_cache.h index d4779a98aa6..f1120497b72 100644 --- a/src/include/distributed/metadata_cache.h +++ b/src/include/distributed/metadata_cache.h @@ -181,7 +181,6 @@ extern CitusTableCacheEntry * LookupCitusTableCacheEntry(Oid relationId); extern DistObjectCacheEntry * LookupDistObjectCacheEntry(Oid classid, Oid objid, int32 objsubid); extern int32 GetLocalGroupId(void); -extern bool CachedLocalNodeIdIsValid(void); extern int32 GetLocalNodeId(void); extern void CitusTableCacheFlushInvalidatedEntries(void); extern Oid LookupShardRelationFromCatalog(int64 shardId, bool missing_ok); diff --git a/src/test/regress/expected/remove_coordinator.out b/src/test/regress/expected/remove_coordinator.out index cf3711a1926..e2fd5df027b 100644 --- a/src/test/regress/expected/remove_coordinator.out +++ b/src/test/regress/expected/remove_coordinator.out @@ -20,15 +20,17 @@ ERROR: connection to the remote node postgres@200.200.200.200:1 failed -- transaction block while Postgres does that. -- -- And when the application_name changes, Citus tries to re-assign - -- the global pid and doing so for Citus internal backends doesn't - -- require being in a transaction block and is safe. + -- the global pid but it does so only for Citus internal backends, + -- and doing so for Citus internal backends doesn't require being + -- in a transaction block and is safe. -- -- However, for the client external backends (like us here), Citus - -- doesn't try to re-assign the global pid if doing so requires catalog - -- access and we're outside of a transaction block. Note that in that - -- case the catalog access may only be needed to retrive the local node - -- id when the cached local node is invalidated like what just happened - -- here because of the failed citus_add_node() call made above. + -- doesn't re-assign the global pid because it's not needed and it's + -- not safe to do so outside of a transaction block. This is because, + -- it would require performing a catalog access to retrive the local + -- node id when the cached local node is invalidated like what just + -- happened here because of the failed citus_add_node() call made + -- above. -- -- So by failing here (rather than crashing), we ensure this behavior. ROLLBACK; diff --git a/src/test/regress/sql/remove_coordinator.sql b/src/test/regress/sql/remove_coordinator.sql index 10d703aba33..35a8a57189b 100644 --- a/src/test/regress/sql/remove_coordinator.sql +++ b/src/test/regress/sql/remove_coordinator.sql @@ -19,15 +19,17 @@ BEGIN; -- transaction block while Postgres does that. -- -- And when the application_name changes, Citus tries to re-assign - -- the global pid and doing so for Citus internal backends doesn't - -- require being in a transaction block and is safe. + -- the global pid but it does so only for Citus internal backends, + -- and doing so for Citus internal backends doesn't require being + -- in a transaction block and is safe. -- -- However, for the client external backends (like us here), Citus - -- doesn't try to re-assign the global pid if doing so requires catalog - -- access and we're outside of a transaction block. Note that in that - -- case the catalog access may only be needed to retrive the local node - -- id when the cached local node is invalidated like what just happened - -- here because of the failed citus_add_node() call made above. + -- doesn't re-assign the global pid because it's not needed and it's + -- not safe to do so outside of a transaction block. This is because, + -- it would require performing a catalog access to retrive the local + -- node id when the cached local node is invalidated like what just + -- happened here because of the failed citus_add_node() call made + -- above. -- -- So by failing here (rather than crashing), we ensure this behavior. ROLLBACK;