Skip to content

Commit 1d55deb

Browse files
authored
Support CREATE / DROP database commands from any node (#7359)
DESCRIPTION: Adds support for issuing `CREATE`/`DROP` DATABASE commands from worker nodes With this commit, we allow issuing CREATE / DROP DATABASE commands from worker nodes too. As in #7278, this is not allowed when the coordinator is not added to metadata because we don't ever sync metadata changes to coordinator when adding coordinator to the metadata via `SELECT citus_set_coordinator_host('<hostname>')`, or equivalently, via `SELECT citus_add_node(<coordinator_node_name>, <coordinator_node_port>, 0)`. We serialize database management commands by acquiring a Citus specific advisory lock on the first primary worker node if there are any workers in the cluster. As opposed to what we've done in #7278 for role management commands, we try to avoid from running into distributed deadlocks as much as possible. This is because, while distributed deadlocks that can happen around role management commands can be detected by Citus, this is not the case for database management commands because most of them cannot be run inside in a transaction block. In that case, Citus cannot even detect the distributed deadlock because the command is not part of a distributed transaction at all, then the command execution might not return the control back to the user for an indefinite amount of time.
1 parent 20dc58c commit 1d55deb

33 files changed

+1438
-85
lines changed

src/backend/distributed/commands/database.c

Lines changed: 76 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "distributed/metadata_utility.h"
4242
#include "distributed/multi_executor.h"
4343
#include "distributed/relation_access_tracking.h"
44+
#include "distributed/serialize_distributed_ddls.h"
4445
#include "distributed/worker_protocol.h"
4546
#include "distributed/worker_transaction.h"
4647

@@ -248,6 +249,9 @@ IsSetTablespaceStatement(AlterDatabaseStmt *stmt)
248249
*
249250
* In this stage we can prepare the commands that need to be run on all workers to grant
250251
* on databases.
252+
*
253+
* We also serialize database commands globally by acquiring a Citus specific advisory
254+
* lock based on OCLASS_DATABASE on the first primary worker node.
251255
*/
252256
List *
253257
PreprocessAlterDatabaseStmt(Node *node, const char *queryString,
@@ -264,6 +268,7 @@ PreprocessAlterDatabaseStmt(Node *node, const char *queryString,
264268
}
265269

266270
EnsureCoordinator();
271+
SerializeDistributedDDLsOnObjectClassObject(OCLASS_DATABASE, stmt->dbname);
267272

268273
char *sql = DeparseTreeNode((Node *) stmt);
269274

@@ -291,11 +296,14 @@ PreprocessAlterDatabaseStmt(Node *node, const char *queryString,
291296
#if PG_VERSION_NUM >= PG_VERSION_15
292297

293298
/*
294-
* PreprocessAlterDatabaseSetStmt is executed before the statement is applied to the local
295-
* postgres instance.
299+
* PreprocessAlterDatabaseRefreshCollStmt is executed before the statement is applied to
300+
* the local postgres instance.
296301
*
297302
* In this stage we can prepare the commands that need to be run on all workers to grant
298303
* on databases.
304+
*
305+
* We also serialize database commands globally by acquiring a Citus specific advisory
306+
* lock based on OCLASS_DATABASE on the first primary worker node.
299307
*/
300308
List *
301309
PreprocessAlterDatabaseRefreshCollStmt(Node *node, const char *queryString,
@@ -312,6 +320,7 @@ PreprocessAlterDatabaseRefreshCollStmt(Node *node, const char *queryString,
312320
}
313321

314322
EnsureCoordinator();
323+
SerializeDistributedDDLsOnObjectClassObject(OCLASS_DATABASE, stmt->dbname);
315324

316325
char *sql = DeparseTreeNode((Node *) stmt);
317326

@@ -325,8 +334,51 @@ PreprocessAlterDatabaseRefreshCollStmt(Node *node, const char *queryString,
325334

326335
#endif
327336

337+
328338
/*
329-
* PreprocessAlterDatabaseRenameStmt is executed before the statement is applied to the local
339+
* PreprocessAlterDatabaseRenameStmt is executed before the statement is applied to
340+
* the local postgres instance.
341+
*
342+
* We also serialize database commands globally by acquiring a Citus specific advisory
343+
* lock based on OCLASS_DATABASE on the first primary worker node.
344+
*
345+
* We acquire this lock here instead of PostprocessAlterDatabaseRenameStmt because the
346+
* command renames the database and SerializeDistributedDDLsOnObjectClass resolves the
347+
* object on workers based on database name. For this reason, we need to acquire the lock
348+
* before the command is applied to the local postgres instance.
349+
*/
350+
List *
351+
PreprocessAlterDatabaseRenameStmt(Node *node, const char *queryString,
352+
ProcessUtilityContext processUtilityContext)
353+
{
354+
bool missingOk = true;
355+
RenameStmt *stmt = castNode(RenameStmt, node);
356+
ObjectAddress *dbAddress = GetDatabaseAddressFromDatabaseName(stmt->subname,
357+
missingOk);
358+
359+
if (!ShouldPropagate() || !IsAnyObjectDistributed(list_make1(dbAddress)))
360+
{
361+
return NIL;
362+
}
363+
364+
EnsureCoordinator();
365+
366+
/*
367+
* Different than other ALTER DATABASE commands, we first acquire a lock
368+
* by providing InvalidOid because we want ALTER TABLE .. RENAME TO ..
369+
* commands to block not only with ALTER DATABASE operations but also
370+
* with CREATE DATABASE operations because they might cause name conflicts
371+
* and that could also cause deadlocks too.
372+
*/
373+
SerializeDistributedDDLsOnObjectClass(OCLASS_DATABASE);
374+
SerializeDistributedDDLsOnObjectClassObject(OCLASS_DATABASE, stmt->subname);
375+
376+
return NIL;
377+
}
378+
379+
380+
/*
381+
* PostprocessAlterDatabaseRenameStmt is executed after the statement is applied to the local
330382
* postgres instance. In this stage we prepare ALTER DATABASE RENAME statement to be run on
331383
* all workers.
332384
*/
@@ -361,6 +413,9 @@ PostprocessAlterDatabaseRenameStmt(Node *node, const char *queryString)
361413
*
362414
* In this stage we can prepare the commands that need to be run on all workers to grant
363415
* on databases.
416+
*
417+
* We also serialize database commands globally by acquiring a Citus specific advisory
418+
* lock based on OCLASS_DATABASE on the first primary worker node.
364419
*/
365420
List *
366421
PreprocessAlterDatabaseSetStmt(Node *node, const char *queryString,
@@ -377,6 +432,7 @@ PreprocessAlterDatabaseSetStmt(Node *node, const char *queryString,
377432
}
378433

379434
EnsureCoordinator();
435+
SerializeDistributedDDLsOnObjectClassObject(OCLASS_DATABASE, stmt->dbname);
380436

381437
char *sql = DeparseTreeNode((Node *) stmt);
382438

@@ -389,12 +445,15 @@ PreprocessAlterDatabaseSetStmt(Node *node, const char *queryString,
389445

390446

391447
/*
392-
* PostprocessAlterDatabaseStmt is executed before the statement is applied to the local
448+
* PreprocessCreateDatabaseStmt is executed before the statement is applied to the local
393449
* Postgres instance.
394450
*
395451
* In this stage, we perform validations that we want to ensure before delegating to
396452
* previous utility hooks because it might not be convenient to throw an error in an
397453
* implicit transaction that creates a database.
454+
*
455+
* We also serialize database commands globally by acquiring a Citus specific advisory
456+
* lock based on OCLASS_DATABASE on the first primary worker node.
398457
*/
399458
List *
400459
PreprocessCreateDatabaseStmt(Node *node, const char *queryString,
@@ -405,11 +464,13 @@ PreprocessCreateDatabaseStmt(Node *node, const char *queryString,
405464
return NIL;
406465
}
407466

408-
EnsureCoordinator();
467+
EnsurePropagationToCoordinator();
409468

410469
CreatedbStmt *stmt = castNode(CreatedbStmt, node);
411470
EnsureSupportedCreateDatabaseCommand(stmt);
412471

472+
SerializeDistributedDDLsOnObjectClass(OCLASS_DATABASE);
473+
413474
return NIL;
414475
}
415476

@@ -430,7 +491,7 @@ PostprocessCreateDatabaseStmt(Node *node, const char *queryString)
430491
return NIL;
431492
}
432493

433-
EnsureCoordinator();
494+
EnsurePropagationToCoordinator();
434495

435496
/*
436497
* Given that CREATE DATABASE doesn't support "IF NOT EXISTS" and we're
@@ -448,16 +509,19 @@ PostprocessCreateDatabaseStmt(Node *node, const char *queryString)
448509
(void *) createDatabaseCommand,
449510
ENABLE_DDL_PROPAGATION);
450511

451-
return NontransactionalNodeDDLTaskList(NON_COORDINATOR_NODES, commands);
512+
return NontransactionalNodeDDLTaskList(REMOTE_NODES, commands);
452513
}
453514

454515

455516
/*
456-
* PreprocessDropDatabaseStmt is executed after the statement is applied to the local
517+
* PreprocessDropDatabaseStmt is executed before the statement is applied to the local
457518
* postgres instance. In this stage we can prepare the commands that need to be run on
458519
* all workers to drop the database. Since the DROP DATABASE statement gives error in
459520
* transaction context, we need to use NontransactionalNodeDDLTaskList to send the
460521
* DROP DATABASE statement to the workers.
522+
*
523+
* We also serialize database commands globally by acquiring a Citus specific advisory
524+
* lock based on OCLASS_DATABASE on the first primary worker node.
461525
*/
462526
List *
463527
PreprocessDropDatabaseStmt(Node *node, const char *queryString,
@@ -468,7 +532,7 @@ PreprocessDropDatabaseStmt(Node *node, const char *queryString,
468532
return NIL;
469533
}
470534

471-
EnsureCoordinator();
535+
EnsurePropagationToCoordinator();
472536

473537
DropdbStmt *stmt = (DropdbStmt *) node;
474538

@@ -488,13 +552,15 @@ PreprocessDropDatabaseStmt(Node *node, const char *queryString,
488552
return NIL;
489553
}
490554

555+
SerializeDistributedDDLsOnObjectClassObject(OCLASS_DATABASE, stmt->dbname);
556+
491557
char *dropDatabaseCommand = DeparseTreeNode(node);
492558

493559
List *commands = list_make3(DISABLE_DDL_PROPAGATION,
494560
(void *) dropDatabaseCommand,
495561
ENABLE_DDL_PROPAGATION);
496562

497-
return NontransactionalNodeDDLTaskList(NON_COORDINATOR_NODES, commands);
563+
return NontransactionalNodeDDLTaskList(REMOTE_NODES, commands);
498564
}
499565

500566

src/backend/distributed/commands/distribute_object_ops.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ static DistributeObjectOps Database_Set = {
536536
static DistributeObjectOps Database_Rename = {
537537
.deparse = DeparseAlterDatabaseRenameStmt,
538538
.qualify = NULL,
539-
.preprocess = NULL,
539+
.preprocess = PreprocessAlterDatabaseRenameStmt,
540540
.postprocess = PostprocessAlterDatabaseRenameStmt,
541541
.objectType = OBJECT_DATABASE,
542542
.operationType = DIST_OPS_ALTER,

src/backend/distributed/commands/utility_hook.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -726,9 +726,9 @@ citus_ProcessUtilityInternal(PlannedStmt *pstmt,
726726
ereport(NOTICE, (errmsg("Citus partially supports CREATE DATABASE for "
727727
"distributed databases"),
728728
errdetail("Citus does not propagate CREATE DATABASE "
729-
"command to workers"),
729+
"command to other nodes"),
730730
errhint("You can manually create a database and its "
731-
"extensions on workers.")));
731+
"extensions on other nodes.")));
732732
}
733733
}
734734
else if (IsA(parsetree, CreateRoleStmt) && !EnableCreateRolePropagation)

src/backend/distributed/metadata/node_metadata.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2771,12 +2771,24 @@ EnsureCoordinatorIsInMetadata(void)
27712771
{
27722772
bool isCoordinatorInMetadata = false;
27732773
PrimaryNodeForGroup(COORDINATOR_GROUP_ID, &isCoordinatorInMetadata);
2774-
if (!isCoordinatorInMetadata)
2774+
if (isCoordinatorInMetadata)
2775+
{
2776+
return;
2777+
}
2778+
2779+
/* be more descriptive when we're not on coordinator */
2780+
if (IsCoordinator())
27752781
{
27762782
ereport(ERROR, (errmsg("coordinator is not added to the metadata"),
27772783
errhint("Use SELECT citus_set_coordinator_host('<hostname>') "
27782784
"to configure the coordinator hostname")));
27792785
}
2786+
else
2787+
{
2788+
ereport(ERROR, (errmsg("coordinator is not added to the metadata"),
2789+
errhint("Use SELECT citus_set_coordinator_host('<hostname>') "
2790+
"on coordinator to configure the coordinator hostname")));
2791+
}
27802792
}
27812793

27822794

0 commit comments

Comments
 (0)