From 7cc9d5bb903fa0eb125f2c834c446034479159a3 Mon Sep 17 00:00:00 2001 From: Oleg Tselebrovskiy Date: Tue, 18 Mar 2025 12:57:31 +0700 Subject: [PATCH 1/2] Add profile_extended and history_extended views with additional dimensions Sometimes it can be useful to have additional info collected with wait_events, so we add two new views/functions that include more information. The structure of those views could be changed in new versions of pg_wait_sampling extension --- Makefile | 2 +- collector.c | 119 ++++- expected/queries.out | 45 ++ meson.build | 1 + pg_wait_sampling--1.1--1.2.sql | 79 ++++ pg_wait_sampling.c | 765 +++++++++++++++++++++++++++++++++ pg_wait_sampling.control | 2 +- pg_wait_sampling.h | 63 ++- sql/queries.sql | 14 + 9 files changed, 1067 insertions(+), 23 deletions(-) create mode 100644 pg_wait_sampling--1.1--1.2.sql diff --git a/Makefile b/Makefile index 32711a3..f9de6d9 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ MODULE_big = pg_wait_sampling OBJS = pg_wait_sampling.o collector.o EXTENSION = pg_wait_sampling -DATA = pg_wait_sampling--1.1.sql pg_wait_sampling--1.0--1.1.sql +DATA = pg_wait_sampling--1.1.sql pg_wait_sampling--1.0--1.1.sql pg_wait_sampling--1.1--1.2.sql REGRESS = load queries diff --git a/collector.c b/collector.c index 721299f..e073062 100644 --- a/collector.c +++ b/collector.c @@ -10,6 +10,7 @@ #include "postgres.h" #include +#include #include "compat.h" #include "miscadmin.h" @@ -30,6 +31,13 @@ #include "utils/resowner.h" #include "utils/timestamp.h" +#define check_bestatus_dimensions(dimensions) \ + (dimensions & (PGWS_DIMENSIONS_BE_TYPE |\ + PGWS_DIMENSIONS_BE_STATE |\ + PGWS_DIMENSIONS_BE_START_TIME |\ + PGWS_DIMENSIONS_CLIENT_ADDR |\ + PGWS_DIMENSIONS_CLIENT_HOSTNAME |\ + PGWS_DIMENSIONS_APPNAME)) static volatile sig_atomic_t shutdown_requested = false; static void handle_sigterm(SIGNAL_ARGS); @@ -162,25 +170,103 @@ probe_waits(History *observations, HTAB *profile_hash, LWLockAcquire(ProcArrayLock, LW_SHARED); for (i = 0; i < ProcGlobal->allProcCount; i++) { - HistoryItem item, + HistoryItem item_history, *observation; + ProfileItem item_profile; PGPROC *proc = &ProcGlobal->allProcs[i]; + int pid; + uint32 wait_event_info; - if (!pgws_should_sample_proc(proc, &item.pid, &item.wait_event_info)) + /* Check if we need to sample this process */ + if (!pgws_should_sample_proc(proc, &pid, &wait_event_info)) continue; + /* We zero whole HistoryItem to avoid doing it field-by-field */ + memset(&item_history, 0, sizeof(HistoryItem)); + memset(&item_profile, 0, sizeof(ProfileItem)); + + item_history.pid = pid; + item_profile.pid = pid; + + item_history.wait_event_info = wait_event_info; + item_profile.wait_event_info = wait_event_info; + if (pgws_profileQueries) - item.queryId = pgws_proc_queryids[i]; - else - item.queryId = 0; + { + item_history.queryId = pgws_proc_queryids[i]; + item_profile.queryId = pgws_proc_queryids[i]; + } - item.ts = ts; + item_history.ts = ts; + + /* Copy everything we need from PGPROC */ + if (pgws_history_dimensions & PGWS_DIMENSIONS_ROLE_ID) + item_history.role_id = proc->roleId; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_ROLE_ID) + item_profile.role_id = proc->roleId; + + if (pgws_history_dimensions & PGWS_DIMENSIONS_DB_ID) + item_history.database_id = proc->databaseId; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_DB_ID) + item_profile.database_id = proc->databaseId; + + if (pgws_history_dimensions & PGWS_DIMENSIONS_PARALLEL_LEADER_PID) + item_history.parallel_leader_pid = (proc->lockGroupLeader ? + proc->lockGroupLeader->pid : + 0); + if (pgws_profile_dimensions & PGWS_DIMENSIONS_PARALLEL_LEADER_PID) + item_profile.parallel_leader_pid = (proc->lockGroupLeader ? + proc->lockGroupLeader->pid : + 0); + /* Look into BackendStatus only if necessary */ + if (check_bestatus_dimensions(pgws_history_dimensions) || + check_bestatus_dimensions(pgws_profile_dimensions)) + { +#if PG_VERSION_NUM >= 170000 + PgBackendStatus *bestatus = pgstat_get_beentry_by_proc_number(GetNumberFromPGProc(proc)); +#else + PgBackendStatus *bestatus = get_beentry_by_procpid(proc->pid); +#endif + /* Copy everything we need from BackendStatus */ + if (bestatus) + { + if (pgws_history_dimensions & PGWS_DIMENSIONS_BE_TYPE) + item_history.backend_type = bestatus->st_backendType; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_BE_TYPE) + item_profile.backend_type = bestatus->st_backendType; + + if (pgws_history_dimensions & PGWS_DIMENSIONS_BE_STATE) + item_history.backend_state = bestatus->st_state; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_BE_STATE) + item_profile.backend_state = bestatus->st_state; + + if (pgws_history_dimensions & PGWS_DIMENSIONS_BE_START_TIME) + item_history.proc_start = bestatus->st_proc_start_timestamp; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_BE_START_TIME) + item_profile.proc_start = bestatus->st_proc_start_timestamp; + + if (pgws_history_dimensions & PGWS_DIMENSIONS_CLIENT_ADDR) + item_history.client_addr = bestatus->st_clientaddr; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_CLIENT_ADDR) + item_profile.client_addr = bestatus->st_clientaddr; + + if (pgws_history_dimensions & PGWS_DIMENSIONS_CLIENT_HOSTNAME) + strcpy(item_history.client_hostname, bestatus->st_clienthostname); + if (pgws_profile_dimensions & PGWS_DIMENSIONS_CLIENT_HOSTNAME) + strcpy(item_profile.client_hostname, bestatus->st_clienthostname); + + if (pgws_history_dimensions & PGWS_DIMENSIONS_APPNAME) + strcpy(item_history.appname, bestatus->st_appname); + if (pgws_profile_dimensions & PGWS_DIMENSIONS_APPNAME) + strcpy(item_profile.appname, bestatus->st_appname); + } + } /* Write to the history if needed */ if (write_history) { observation = get_next_observation(observations); - *observation = item; + *observation = item_history; } /* Write to the profile if needed */ @@ -190,9 +276,9 @@ probe_waits(History *observations, HTAB *profile_hash, bool found; if (!profile_pid) - item.pid = 0; + item_profile.pid = 0; - profileItem = (ProfileItem *) hash_search(profile_hash, &item, HASH_ENTER, &found); + profileItem = (ProfileItem *) hash_search(profile_hash, &item_profile, HASH_ENTER, &found); if (found) profileItem->count++; else @@ -200,6 +286,11 @@ probe_waits(History *observations, HTAB *profile_hash, } } LWLockRelease(ProcArrayLock); +#if PG_VERSION_NUM >= 140000 + pgstat_clear_backend_activity_snapshot(); +#else + pgstat_clear_snapshot(); +#endif } /* @@ -287,10 +378,12 @@ make_profile_hash() { HASHCTL hash_ctl; - if (pgws_profileQueries) - hash_ctl.keysize = offsetof(ProfileItem, count); - else - hash_ctl.keysize = offsetof(ProfileItem, queryId); + /* + * Since adding additional dimensions we include everyting except count + * into hashtable key. This is fine for cases when some fields are 0 since + * it doesn't impede our ability to search the hash table for entries + */ + hash_ctl.keysize = offsetof(ProfileItem, count); hash_ctl.entrysize = sizeof(ProfileItem); return hash_create("Waits profile hash", 1024, &hash_ctl, diff --git a/expected/queries.out b/expected/queries.out index 722df5f..6718c14 100644 --- a/expected/queries.out +++ b/expected/queries.out @@ -20,6 +20,27 @@ WITH t as (SELECT sum(0) FROM pg_wait_sampling_profile) 0 (1 row) +WITH t as (SELECT sum(0) FROM pg_wait_sampling_current_extended) + SELECT sum(0) FROM generate_series(1, 2), t; + sum +----- + 0 +(1 row) + +WITH t as (SELECT sum(0) FROM pg_wait_sampling_history_extended) + SELECT sum(0) FROM generate_series(1, 2), t; + sum +----- + 0 +(1 row) + +WITH t as (SELECT sum(0) FROM pg_wait_sampling_profile_extended) + SELECT sum(0) FROM generate_series(1, 2), t; + sum +----- + 0 +(1 row) + -- Some dummy checks just to be sure that all our functions work and return something. SELECT count(*) = 1 as test FROM pg_wait_sampling_get_current(pg_backend_pid()); test @@ -45,4 +66,28 @@ SELECT pg_wait_sampling_reset_profile(); (1 row) +SELECT count(*) = 1 as test FROM pg_wait_sampling_get_current_extended(pg_backend_pid()); + test +------ + t +(1 row) + +SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_profile_extended(); + test +------ + t +(1 row) + +SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_history_extended(); + test +------ + t +(1 row) + +SELECT pg_wait_sampling_reset_profile(); + pg_wait_sampling_reset_profile +-------------------------------- + +(1 row) + DROP EXTENSION pg_wait_sampling; diff --git a/meson.build b/meson.build index c3c3dc9..162bb0e 100644 --- a/meson.build +++ b/meson.build @@ -24,6 +24,7 @@ install_data( 'pg_wait_sampling.control', 'pg_wait_sampling--1.0--1.1.sql', 'pg_wait_sampling--1.1.sql', + 'pg_wait_sampling--1.1--1.2.sql', kwargs: contrib_data_args, ) diff --git a/pg_wait_sampling--1.1--1.2.sql b/pg_wait_sampling--1.1--1.2.sql new file mode 100644 index 0000000..df95826 --- /dev/null +++ b/pg_wait_sampling--1.1--1.2.sql @@ -0,0 +1,79 @@ +/* contrib/pg_wait_sampling/pg_wait_sampling--1.1--1.2.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pg_wait_sampling UPDATE TO 1.2" to load this file. \quit + +CREATE FUNCTION pg_wait_sampling_get_current_extended ( + pid int4, + OUT pid int4, + OUT event_type text, + OUT event text, + OUT queryid int8, + OUT role_id int8, + OUT database_id int8, + OUT parallel_leader_pid int4, + OUT backend_type text, + OUT backend_state text, + OUT proc_start timestamptz, + OUT client_addr text, + OUT client_hostname text, + OUT appname text +) +RETURNS SETOF record +AS 'MODULE_PATHNAME' +LANGUAGE C VOLATILE CALLED ON NULL INPUT; + +CREATE VIEW pg_wait_sampling_current_extended AS + SELECT * FROM pg_wait_sampling_get_current_extended(NULL::integer); + +GRANT SELECT ON pg_wait_sampling_current TO PUBLIC; + +CREATE FUNCTION pg_wait_sampling_get_history_extended ( + OUT pid int4, + OUT ts timestamptz, + OUT event_type text, + OUT event text, + OUT queryid int8, + OUT role_id int8, + OUT database_id int8, + OUT parallel_leader_pid int4, + OUT backend_type text, + OUT backend_state text, + OUT proc_start timestamptz, + OUT client_addr text, + OUT client_hostname text, + OUT appname text +) +RETURNS SETOF record +AS 'MODULE_PATHNAME' +LANGUAGE C VOLATILE STRICT; + +CREATE VIEW pg_wait_sampling_history_extended AS + SELECT * FROM pg_wait_sampling_get_history_extended(); + +GRANT SELECT ON pg_wait_sampling_history_extended TO PUBLIC; + +CREATE FUNCTION pg_wait_sampling_get_profile_extended ( + OUT pid int4, + OUT event_type text, + OUT event text, + OUT queryid int8, + OUT role_id int8, + OUT database_id int8, + OUT parallel_leader_pid int4, + OUT backend_type text, + OUT backend_state text, + OUT proc_start timestamptz, + OUT client_addr text, + OUT client_hostname text, + OUT appname text, + OUT count int8 +) +RETURNS SETOF record +AS 'MODULE_PATHNAME' +LANGUAGE C VOLATILE STRICT; + +CREATE VIEW pg_wait_sampling_profile_extended AS + SELECT * FROM pg_wait_sampling_get_profile_extended(); + +GRANT SELECT ON pg_wait_sampling_profile_extended TO PUBLIC; diff --git a/pg_wait_sampling.c b/pg_wait_sampling.c index a35fb94..6006b8e 100644 --- a/pg_wait_sampling.c +++ b/pg_wait_sampling.c @@ -13,6 +13,7 @@ #include "access/htup_details.h" #include "catalog/pg_type_d.h" +#include "common/ip.h" #include "executor/executor.h" #include "funcapi.h" #include "miscadmin.h" @@ -32,6 +33,7 @@ #include "utils/guc.h" #include "utils/memutils.h" #include "utils/timestamp.h" +#include "utils/varlena.h" #if PG_VERSION_NUM < 150000 #include "postmaster/autovacuum.h" @@ -139,6 +141,10 @@ int pgws_profilePeriod = 10; bool pgws_profilePid = true; int pgws_profileQueries = PGWS_PROFILE_QUERIES_TOP; bool pgws_sampleCpu = true; +static char *pgws_history_dimensions_string = NULL; +static char *pgws_profile_dimensions_string = NULL; +int pgws_history_dimensions; /* bit mask that is derived from GUC */ +int pgws_profile_dimensions; /* bit mask that is derived from GUC */ #define pgws_enabled(level) \ ((pgws_profileQueries == PGWS_PROFILE_QUERIES_ALL) || \ @@ -307,6 +313,109 @@ pgws_cleanup_callback(int code, Datum arg) LockRelease(&queueTag, ExclusiveLock, false); } +/* + * Check tokens of string and fill bitmask accordingly + * Mostly copied from plpgsql_extra_checks_check_hook + */ +static bool +pgws_general_dimensions_check_hook (char **newvalue, void **extra, GucSource source) +{ + char *rawstring; + List *elemlist; + ListCell *l; + int extrachecks = 0; + int *myextra; + + /* Check special cases when we turn all or none dimensions */ + if (pg_strcasecmp(*newvalue, "all") == 0) + extrachecks = PGWS_DIMENSIONS_ALL; + else if (pg_strcasecmp(*newvalue, "none") == 0) + extrachecks = PGWS_DIMENSIONS_NONE; + else + { + /* Need a modifiable copy of string */ + rawstring = pstrdup(*newvalue); + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawstring, ',', &elemlist)) + { + /* syntax error in list */ + GUC_check_errdetail("List syntax is invalid."); + pfree(rawstring); + list_free(elemlist); + return false; + } + + /* Loop over all recieved options */ + foreach(l, elemlist) + { + char *tok = (char *) lfirst(l); + + /* Process all allowed values */ + if (pg_strcasecmp(tok, "role_id") == 0) + extrachecks |= PGWS_DIMENSIONS_ROLE_ID; + else if (pg_strcasecmp(tok, "database_id") == 0) + extrachecks |= PGWS_DIMENSIONS_DB_ID; + else if (pg_strcasecmp(tok, "parallel_leader_pid") == 0) + extrachecks |= PGWS_DIMENSIONS_PARALLEL_LEADER_PID; + else if (pg_strcasecmp(tok, "backend_type") == 0) + extrachecks |= PGWS_DIMENSIONS_BE_TYPE; + else if (pg_strcasecmp(tok, "backend_state") == 0) + extrachecks |= PGWS_DIMENSIONS_BE_STATE; + else if (pg_strcasecmp(tok, "backend_start_time") == 0) + extrachecks |= PGWS_DIMENSIONS_BE_START_TIME; + else if (pg_strcasecmp(tok, "client_addr") == 0) + extrachecks |= PGWS_DIMENSIONS_CLIENT_ADDR; + else if (pg_strcasecmp(tok, "client_hostname") == 0) + extrachecks |= PGWS_DIMENSIONS_CLIENT_HOSTNAME; + else if (pg_strcasecmp(tok, "appname") == 0) + extrachecks |= PGWS_DIMENSIONS_APPNAME; + else if (pg_strcasecmp(tok, "all") == 0 || pg_strcasecmp(tok, "none") == 0) + { + GUC_check_errdetail("Key word \"%s\" cannot be combined with other key words.", tok); + pfree(rawstring); + list_free(elemlist); + return false; + } + else + { + GUC_check_errdetail("Unrecognized key word: \"%s\".", tok); + pfree(rawstring); + list_free(elemlist); + return false; + } + } + + pfree(rawstring); + list_free(elemlist); + } +#if PG_VERSION_NUM >= 160000 + myextra = (int *) guc_malloc(LOG, sizeof(int)); +#else + myextra = (int *) malloc(sizeof(int)); +#endif + if (!myextra) + return false; + *myextra = extrachecks; + *extra = myextra; + + return true; +} + +/* Assign actual value to dimension bitmask */ +static void +pgws_history_dimensions_assign_hook (const char *newvalue, void *extra) +{ + pgws_history_dimensions = *((int *) extra); +} + +/* Assign actual value to dimension bitmask */ +static void +pgws_profile_dimensions_assign_hook (const char *newvalue, void *extra) +{ + pgws_profile_dimensions = *((int *) extra); +} + /* * Module load callback */ @@ -427,6 +536,28 @@ _PG_init(void) NULL, NULL); + DefineCustomStringVariable("pg_wait_sampling.history_dimensions", + "Sets sampling dimensions for history", + NULL, + &pgws_history_dimensions_string, + "none", + PGC_SIGHUP, + GUC_LIST_INPUT, + pgws_general_dimensions_check_hook, + pgws_history_dimensions_assign_hook, + NULL); + + DefineCustomStringVariable("pg_wait_sampling.profile_dimensions", + "Sets sampling dimensions for profile", + NULL, + &pgws_profile_dimensions_string, + "none", + PGC_SIGHUP, + GUC_LIST_INPUT, + pgws_general_dimensions_check_hook, + pgws_profile_dimensions_assign_hook, + NULL); + #if PG_VERSION_NUM >= 150000 MarkGUCPrefixReserved("pg_wait_sampling"); #endif @@ -615,6 +746,332 @@ pg_wait_sampling_get_current(PG_FUNCTION_ARGS) } } +static Datum +GetBackendState(BackendState state, bool *is_null) +{ + switch (state) + { +#if PG_VERSION_NUM >= 180000 + case STATE_STARTING: + return CStringGetTextDatum("starting"); +#endif + case STATE_IDLE: + return CStringGetTextDatum("idle"); + case STATE_RUNNING: + return CStringGetTextDatum("active"); + case STATE_IDLEINTRANSACTION: + return CStringGetTextDatum("idle in transaction"); + case STATE_FASTPATH: + return CStringGetTextDatum("fastpath function call"); + case STATE_IDLEINTRANSACTION_ABORTED: + return CStringGetTextDatum("idle in transaction (aborted)"); + case STATE_DISABLED: + return CStringGetTextDatum("disabled"); + case STATE_UNDEFINED: + *is_null = true; + } + return (Datum) 0; +} + +/* Copied from pg_stat_get_backend_client_addr */ +static Datum +get_backend_client_addr(SockAddr client_addr, bool *is_null) +{ + char remote_host[NI_MAXHOST]; + int ret; + + /* A zeroed client addr means we don't know */ +#if PG_VERSION_NUM >= 180000 + if (pg_memory_is_all_zeros(&client_addr, + sizeof(client_addr))) +#else + SockAddr zero_clientaddr; + + memset(&zero_clientaddr, 0, sizeof(zero_clientaddr)); + if (memcmp(&client_addr, &zero_clientaddr, + sizeof(zero_clientaddr)) == 0) +#endif + { + *is_null = true; + return (Datum) 0; + } + + switch (client_addr.addr.ss_family) + { + case AF_INET: + case AF_INET6: + break; + default: + *is_null = true; + return (Datum) 0; + } + + remote_host[0] = '\0'; + ret = pg_getnameinfo_all(&client_addr.addr, + client_addr.salen, + remote_host, sizeof(remote_host), + NULL, 0, + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret != 0) + { + *is_null = true; + return (Datum) 0; + } + + clean_ipv6_addr(client_addr.addr.ss_family, remote_host); + + return (DirectFunctionCall1(inet_in, CStringGetDatum(remote_host))); +} + +/* + * Needed for PostgreSQL 16 and earlier since there is no good way to get + * PgBackendStatus when having only PGPROC structure. + * + * pgstat_fetch_stat_beentry (13-15) works with indices of localBackendStatusTable + * pgstat_get_beentry_by_backend_id (16) works with "backend_ids", but we still + * cannot get them without looking into LocalPgBackendStatus, so work with indices + * + * This function is very inefficient + * + * Maybe we should just iterate over localBackendStatusTable and somehow get + * PGPROC entries from there but it is up for discussion + */ +PgBackendStatus * +get_beentry_by_procpid(int pid) +{ + int backend_num = pgstat_fetch_stat_numbackends(), cur_be_idx; + + for (cur_be_idx = 1; cur_be_idx <= backend_num; cur_be_idx++) + { + LocalPgBackendStatus *local_beentry; + +#if PG_VERSION_NUM >= 160000 + local_beentry = pgstat_get_local_beentry_by_index(cur_be_idx); +#else + /* Here beid is just index in localBackendStatusTable */ + local_beentry = pgstat_fetch_stat_local_beentry(cur_be_idx); +#endif + if (local_beentry->backendStatus.st_procpid == pid) + return &local_beentry->backendStatus; + } + return NULL; +} + +PG_FUNCTION_INFO_V1(pg_wait_sampling_get_current_extended); +Datum +pg_wait_sampling_get_current_extended(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + WaitCurrentContext *params; + + check_shmem(); + + /* Initialization, done only on the first call */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + TupleDesc tupdesc; + + funcctx = SRF_FIRSTCALL_INIT(); + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + params = (WaitCurrentContext *) palloc0(sizeof(WaitCurrentContext)); + params->ts = GetCurrentTimestamp(); + + funcctx->user_fctx = params; + /* Setup tuple desc */ + tupdesc = CreateTemplateTupleDesc(13); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "type", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "event", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "queryid", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "role_id", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "database_id", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "parallel_leader_pid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "backend_type", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "backend_state", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 10, "proc_start", + TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 11, "client_addr", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 12, "client_hostname", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 13, "appname", + TEXTOID, -1, 0); + + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + LWLockAcquire(ProcArrayLock, LW_SHARED); + + if (!PG_ARGISNULL(0)) + { + /* pg_wait_sampling_get_current_extended(pid int4) function */ + HistoryItem *item; + PGPROC *proc; + PgBackendStatus *bestatus; + + proc = search_proc(PG_GETARG_UINT32(0)); +#if PG_VERSION_NUM >= 170000 + bestatus = pgstat_get_beentry_by_proc_number(GetNumberFromPGProc(proc)); +#else + bestatus = get_beentry_by_procpid(proc->pid); +#endif + params->items = (HistoryItem *) palloc0(sizeof(HistoryItem)); + item = ¶ms->items[0]; + /* Show all fields without looking at GUC variables */ + item->pid = proc->pid; + item->wait_event_info = proc->wait_event_info; + item->queryId = pgws_proc_queryids[proc - ProcGlobal->allProcs]; + item->role_id = proc->roleId; + item->database_id = proc->databaseId; + item->parallel_leader_pid = (proc->lockGroupLeader ? + proc->lockGroupLeader->pid : + 0); + if (bestatus) + { + item->backend_type = bestatus->st_backendType; + item->backend_state = bestatus->st_state; + item->proc_start = bestatus->st_proc_start_timestamp; + item->client_addr = bestatus->st_clientaddr; + strcpy(item->client_hostname, bestatus->st_clienthostname); + strcpy(item->appname, bestatus->st_appname); + } + funcctx->max_calls = 1; + } + else + { + /* pg_wait_sampling_current view */ + int procCount = ProcGlobal->allProcCount, + i, + j = 0; + + params->items = (HistoryItem *) palloc0(sizeof(HistoryItem) * procCount); + for (i = 0; i < procCount; i++) + { + PGPROC *proc = &ProcGlobal->allProcs[i]; +#if PG_VERSION_NUM >= 170000 + PgBackendStatus *bestatus = pgstat_get_beentry_by_proc_number(GetNumberFromPGProc(proc)); +#else + PgBackendStatus *bestatus = get_beentry_by_procpid(proc->pid); +#endif + + if (!pgws_should_sample_proc(proc, + ¶ms->items[j].pid, + ¶ms->items[j].wait_event_info)) + continue; + + /* Show all fields without looking at GUC variables */ + params->items[j].pid = proc->pid; + params->items[j].wait_event_info = proc->wait_event_info; + params->items[j].queryId = pgws_proc_queryids[i]; + params->items[j].role_id = proc->roleId; + params->items[j].database_id = proc->databaseId; + params->items[j].parallel_leader_pid = (proc->lockGroupLeader ? + proc->lockGroupLeader->pid : + 0); + if (bestatus) + { + params->items[j].backend_type = bestatus->st_backendType; + params->items[j].backend_state = bestatus->st_state; + params->items[j].proc_start = bestatus->st_proc_start_timestamp; + params->items[j].client_addr = bestatus->st_clientaddr; + strcpy(params->items[j].client_hostname, bestatus->st_clienthostname); + strcpy(params->items[j].appname, bestatus->st_appname); + } + j++; + } + funcctx->max_calls = j; + } + + LWLockRelease(ProcArrayLock); +#if PG_VERSION_NUM >= 140000 + pgstat_clear_backend_activity_snapshot(); +#else + pgstat_clear_snapshot(); +#endif + + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + params = (WaitCurrentContext *) funcctx->user_fctx; + + if (funcctx->call_cntr < funcctx->max_calls) + { + HeapTuple tuple; + Datum values[13]; + bool nulls[13]; + const char *event_type, + *event, + *backend_type; + Datum backend_state, proc_start, client_addr; + bool is_null_be_state = false, + is_null_client_addr = false; + HistoryItem *item; + + item = ¶ms->items[funcctx->call_cntr]; + + /* Make and return next tuple to caller */ + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + event_type = pgstat_get_wait_event_type(item->wait_event_info); + event = pgstat_get_wait_event(item->wait_event_info); + backend_type = GetBackendTypeDesc(item->backend_type); + backend_state = GetBackendState(item->backend_state, &is_null_be_state); + proc_start = TimestampTzGetDatum(item->proc_start); + client_addr = get_backend_client_addr(item->client_addr, &is_null_client_addr); + + values[0] = Int32GetDatum(item->pid); + if (event_type) + values[1] = PointerGetDatum(cstring_to_text(event_type)); + else + nulls[1] = true; + if (event) + values[2] = PointerGetDatum(cstring_to_text(event)); + else + nulls[2] = true; + values[3] = UInt64GetDatum(item->queryId); + values[4] = ObjectIdGetDatum(item->role_id); + values[5] = ObjectIdGetDatum(item->database_id); + values[6] = Int32GetDatum(item->parallel_leader_pid); + if (backend_type) + values[7] = PointerGetDatum(cstring_to_text(backend_type)); + else + nulls[7] = true; + if (!is_null_be_state) + values[8] = backend_state; + else + nulls[8] = true; + values[9] = proc_start; + if (!is_null_client_addr) + values[10] = client_addr; + else + nulls[10] = true; + values[11] = PointerGetDatum(cstring_to_text(item->client_hostname)); + values[12] = PointerGetDatum(cstring_to_text(item->appname)); + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + else + { + /* nothing left */ + SRF_RETURN_DONE(funcctx); + } +} + typedef struct { Size count; @@ -808,6 +1265,161 @@ pg_wait_sampling_get_profile(PG_FUNCTION_ARGS) } } +PG_FUNCTION_INFO_V1(pg_wait_sampling_get_profile_extended); +Datum +pg_wait_sampling_get_profile_extended(PG_FUNCTION_ARGS) +{ + Profile *profile; + FuncCallContext *funcctx; + + check_shmem(); + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + TupleDesc tupdesc; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* Receive profile from shmq */ + profile = (Profile *) palloc0(sizeof(Profile)); + profile->items = (ProfileItem *) receive_array(PROFILE_REQUEST, + sizeof(ProfileItem), &profile->count); + + funcctx->user_fctx = profile; + funcctx->max_calls = profile->count; + + /* Make tuple descriptor */ + tupdesc = CreateTemplateTupleDesc(14); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "type", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "event", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "queryid", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "role_id", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "database_id", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "parallel_leader_pid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "backend_type", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "backend_state", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 10, "proc_start", + TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 11, "client_addr", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 12, "client_hostname", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 13, "appname", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 14, "count", + INT8OID, -1, 0); + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + profile = (Profile *) funcctx->user_fctx; + + if (funcctx->call_cntr < funcctx->max_calls) + { + /* for each row */ + Datum values[14]; + bool nulls[14]; + HeapTuple tuple; + ProfileItem *item; + const char *event_type, + *event, + *backend_type; + Datum backend_state, proc_start, client_addr; + bool is_null_be_state = false, + is_null_client_addr = false; + + item = &profile->items[funcctx->call_cntr]; + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + /* Make and return next tuple to caller */ + event_type = pgstat_get_wait_event_type(item->wait_event_info); + event = pgstat_get_wait_event(item->wait_event_info); + backend_type = GetBackendTypeDesc(item->backend_type); + backend_state = GetBackendState(item->backend_state, &is_null_be_state); + proc_start = TimestampTzGetDatum(item->proc_start); + client_addr = get_backend_client_addr(item->client_addr, &is_null_client_addr); + + values[0] = Int32GetDatum(item->pid); + if (event_type) + values[1] = PointerGetDatum(cstring_to_text(event_type)); + else + nulls[1] = true; + if (event) + values[2] = PointerGetDatum(cstring_to_text(event)); + else + nulls[2] = true; + if (pgws_profileQueries) + values[3] = UInt64GetDatum(item->queryId); + else + values[3] = (Datum) 0; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_ROLE_ID) + values[4] = ObjectIdGetDatum(item->role_id); + else + nulls[4] = true; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_DB_ID) + values[5] = ObjectIdGetDatum(item->database_id); + else + nulls[5] = true; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_PARALLEL_LEADER_PID) + values[6] = Int32GetDatum(item->parallel_leader_pid); + else + nulls[6] = true; + if (backend_type && (pgws_profile_dimensions & PGWS_DIMENSIONS_BE_TYPE)) + values[7] = PointerGetDatum(cstring_to_text(backend_type)); + else + nulls[7] = true; + if (!is_null_be_state && (pgws_profile_dimensions & PGWS_DIMENSIONS_BE_STATE)) + values[8] = backend_state; + else + nulls[8] = true; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_BE_START_TIME) + values[9] = proc_start; + else + nulls[9] = true; + if (!is_null_client_addr && pgws_profile_dimensions & PGWS_DIMENSIONS_CLIENT_ADDR) + values[10] = client_addr; + else + nulls[10] = true; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_CLIENT_HOSTNAME) + values[11] = PointerGetDatum(cstring_to_text(item->client_hostname)); + else + nulls[11] = true; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_APPNAME) + values[12] = PointerGetDatum(cstring_to_text(item->appname)); + else + nulls[12] = true; + + values[13] = UInt64GetDatum(item->count); + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + else + { + /* nothing left */ + SRF_RETURN_DONE(funcctx); + } +} + PG_FUNCTION_INFO_V1(pg_wait_sampling_reset_profile); Datum pg_wait_sampling_reset_profile(PG_FUNCTION_ARGS) @@ -922,6 +1534,159 @@ pg_wait_sampling_get_history(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +PG_FUNCTION_INFO_V1(pg_wait_sampling_get_history_extended); +Datum +pg_wait_sampling_get_history_extended(PG_FUNCTION_ARGS) +{ + History *history; + FuncCallContext *funcctx; + + check_shmem(); + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + TupleDesc tupdesc; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* Receive history from shmq */ + history = (History *) palloc0(sizeof(History)); + history->items = (HistoryItem *) receive_array(HISTORY_REQUEST, + sizeof(HistoryItem), &history->count); + + funcctx->user_fctx = history; + funcctx->max_calls = history->count; + + /* Make tuple descriptor */ + tupdesc = CreateTemplateTupleDesc(14); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "sample_ts", + TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "type", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "event", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "queryid", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "role_id", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "database_id", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "parallel_leader_pid", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "backend_type", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 10, "backend_state", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 11, "proc_start", + TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 12, "client_addr", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 13, "client_hostname", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 14, "appname", + TEXTOID, -1, 0); + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + history = (History *) funcctx->user_fctx; + + if (history->index < history->count) + { + HeapTuple tuple; + HistoryItem *item; + Datum values[14]; + bool nulls[14]; + const char *event_type, + *event, + *backend_type; + Datum backend_state, proc_start, client_addr; + bool is_null_be_state = false, + is_null_client_addr = false; + + item = &history->items[history->index]; + + /* Make and return next tuple to caller */ + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + event_type = pgstat_get_wait_event_type(item->wait_event_info); + event = pgstat_get_wait_event(item->wait_event_info); + backend_type = GetBackendTypeDesc(item->backend_type); + backend_state = GetBackendState(item->backend_state, &is_null_be_state); + proc_start = TimestampTzGetDatum(item->proc_start); + client_addr = get_backend_client_addr(item->client_addr, &is_null_client_addr); + + values[0] = Int32GetDatum(item->pid); + values[1] = TimestampTzGetDatum(item->ts); + if (event_type) + values[2] = PointerGetDatum(cstring_to_text(event_type)); + else + nulls[2] = true; + if (event) + values[3] = PointerGetDatum(cstring_to_text(event)); + else + nulls[3] = true; + values[4] = UInt64GetDatum(item->queryId); + if (pgws_profile_dimensions & PGWS_DIMENSIONS_ROLE_ID) + values[5] = ObjectIdGetDatum(item->role_id); + else + nulls[5] = true; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_DB_ID) + values[6] = ObjectIdGetDatum(item->database_id); + else + nulls[6] = true; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_PARALLEL_LEADER_PID) + values[7] = Int32GetDatum(item->parallel_leader_pid); + else + nulls[7] = true; + if (backend_type && (pgws_profile_dimensions & PGWS_DIMENSIONS_BE_TYPE)) + values[8] = PointerGetDatum(cstring_to_text(backend_type)); + else + nulls[8] = true; + if (!is_null_be_state && (pgws_profile_dimensions & PGWS_DIMENSIONS_BE_STATE)) + values[9] = backend_state; + else + nulls[9] = true; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_BE_START_TIME) + values[10] = proc_start; + else + nulls[10] = true; + if (!is_null_client_addr && pgws_profile_dimensions & PGWS_DIMENSIONS_CLIENT_ADDR) + values[11] = client_addr; + else + nulls[11] = true; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_CLIENT_HOSTNAME) + values[12] = PointerGetDatum(cstring_to_text(item->client_hostname)); + else + nulls[12] = true; + if (pgws_profile_dimensions & PGWS_DIMENSIONS_APPNAME) + values[13] = PointerGetDatum(cstring_to_text(item->appname)); + else + nulls[13] = true; + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + + history->index++; + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + else + { + /* nothing left */ + SRF_RETURN_DONE(funcctx); + } + + PG_RETURN_VOID(); +} + /* * planner_hook hook, save queryId for collector */ diff --git a/pg_wait_sampling.control b/pg_wait_sampling.control index 97d9a34..d2d0ffe 100644 --- a/pg_wait_sampling.control +++ b/pg_wait_sampling.control @@ -1,5 +1,5 @@ # pg_wait_sampling extension comment = 'sampling based statistics of wait events' -default_version = '1.1' +default_version = '1.2' module_pathname = '$libdir/pg_wait_sampling' relocatable = true diff --git a/pg_wait_sampling.h b/pg_wait_sampling.h index dab773c..9009fe8 100644 --- a/pg_wait_sampling.h +++ b/pg_wait_sampling.h @@ -15,26 +15,70 @@ #include "storage/lock.h" #include "storage/shm_mq.h" +#if PG_VERSION_NUM >= 140000 +#include "utils/backend_status.h" +#else +#include "pgstat.h" +#endif + #define PG_WAIT_SAMPLING_MAGIC 0xCA94B107 #define COLLECTOR_QUEUE_SIZE (16 * 1024) #define HISTORY_TIME_MULTIPLIER 10 #define PGWS_QUEUE_LOCK 0 #define PGWS_COLLECTOR_LOCK 1 +/* Values for sampling dimensions */ +#define PGWS_DIMENSIONS_NONE 0 + +#define PGWS_DIMENSIONS_ROLE_ID (1 << 1) +#define PGWS_DIMENSIONS_DB_ID (1 << 2) +#define PGWS_DIMENSIONS_PARALLEL_LEADER_PID (1 << 3) +#define PGWS_DIMENSIONS_BE_TYPE (1 << 4) +#define PGWS_DIMENSIONS_BE_STATE (1 << 5) +#define PGWS_DIMENSIONS_BE_START_TIME (1 << 6) +#define PGWS_DIMENSIONS_CLIENT_ADDR (1 << 7) +#define PGWS_DIMENSIONS_CLIENT_HOSTNAME (1 << 8) +#define PGWS_DIMENSIONS_APPNAME (1 << 9) + +#define PGWS_DIMENSIONS_ALL ((int) ~0) +/* ^ all 1 in binary */ + +/* + * Next two structures must match in fields until count/ts so make_profile_hash + * works properly + */ typedef struct { - int pid; - uint32 wait_event_info; - uint64 queryId; - uint64 count; + int pid; + uint32 wait_event_info; + uint64 queryId; + Oid role_id; + Oid database_id; + int parallel_leader_pid; + BackendType backend_type; + BackendState backend_state; + TimestampTz proc_start; + SockAddr client_addr; + char client_hostname[NAMEDATALEN]; + char appname[NAMEDATALEN]; + uint64 count; } ProfileItem; typedef struct { - int pid; - uint32 wait_event_info; - uint64 queryId; - TimestampTz ts; + int pid; + uint32 wait_event_info; + uint64 queryId; + Oid role_id; + Oid database_id; + int parallel_leader_pid; + BackendType backend_type; + BackendState backend_state; + TimestampTz proc_start; + SockAddr client_addr; + char client_hostname[NAMEDATALEN]; + char appname[NAMEDATALEN]; + TimestampTz ts; } HistoryItem; typedef struct @@ -73,6 +117,9 @@ extern shm_mq *pgws_collector_mq; extern uint64 *pgws_proc_queryids; extern void pgws_init_lock_tag(LOCKTAG *tag, uint32 lock); extern bool pgws_should_sample_proc(PGPROC *proc, int *pid_p, uint32 *wait_event_info_p); +extern int pgws_history_dimensions; /* bit mask that is derived from GUC */ +extern int pgws_profile_dimensions; /* bit mask that is derived from GUC */ +extern PgBackendStatus* get_beentry_by_procpid(int pid); /* collector.c */ extern void pgws_register_wait_collector(void); diff --git a/sql/queries.sql b/sql/queries.sql index de44c6d..6658c74 100644 --- a/sql/queries.sql +++ b/sql/queries.sql @@ -9,10 +9,24 @@ WITH t as (SELECT sum(0) FROM pg_wait_sampling_history) WITH t as (SELECT sum(0) FROM pg_wait_sampling_profile) SELECT sum(0) FROM generate_series(1, 2), t; +WITH t as (SELECT sum(0) FROM pg_wait_sampling_current_extended) + SELECT sum(0) FROM generate_series(1, 2), t; + +WITH t as (SELECT sum(0) FROM pg_wait_sampling_history_extended) + SELECT sum(0) FROM generate_series(1, 2), t; + +WITH t as (SELECT sum(0) FROM pg_wait_sampling_profile_extended) + SELECT sum(0) FROM generate_series(1, 2), t; + -- Some dummy checks just to be sure that all our functions work and return something. SELECT count(*) = 1 as test FROM pg_wait_sampling_get_current(pg_backend_pid()); SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_profile(); SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_history(); SELECT pg_wait_sampling_reset_profile(); +SELECT count(*) = 1 as test FROM pg_wait_sampling_get_current_extended(pg_backend_pid()); +SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_profile_extended(); +SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_history_extended(); +SELECT pg_wait_sampling_reset_profile(); + DROP EXTENSION pg_wait_sampling; From 15974399ebb2a71fe9952719e0c1215b52f74c74 Mon Sep 17 00:00:00 2001 From: Oleg Tselebrovskiy Date: Wed, 26 Mar 2025 16:21:17 +0700 Subject: [PATCH 2/2] Update README to include information about new *_extended views Also fix some typos/reword some sentences --- README.md | 135 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 107 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index bbdbd20..f5f68cd 100644 --- a/README.md +++ b/README.md @@ -9,13 +9,13 @@ Introduction PostgreSQL provides information about current wait event of particular process. However, in order to gather descriptive statistics of server -behavior user have to sample current wait event multiple times. +behavior users have to sample current wait events multiple times. `pg_wait_sampling` is an extension for collecting sampling statistics of wait events. The module must be loaded by adding `pg_wait_sampling` to `shared_preload_libraries` in postgresql.conf, because it requires additional -shared memory and launches background worker. This means that a server restart +shared memory and launches a background worker. This means that a server restart is needed to add or remove the module. When used with `pg_stat_statements` it is recommended to put `pg_stat_statements` @@ -25,17 +25,16 @@ utility statements are not rewritten by the former. When `pg_wait_sampling` is enabled, it collects two kinds of statistics. * History of waits events. It's implemented as in-memory ring buffer where - samples of each process wait events are written with given (configurable) + samples of each process' wait events are written with given (configurable) period. Therefore, for each running process user can see some number of - recent samples depending on history size (configurable). Assuming there is - a client who periodically read this history and dump it somewhere, user - can have continuous history. - * Waits profile. It's implemented as in-memory hash table where count - of samples are accumulated per each process and each wait event - (and each query with `pg_stat_statements`). This hash - table can be reset by user request. Assuming there is a client who - periodically dumps profile and resets it, user can have statistics of - intensivity of wait events among time. + recent samples depending on history size (configurable). Assuming there is + a client who periodically reads this history and dumps it somewhere, user + can have continuous history of wait events. + * Waits profile. It's implemented as in-memory hash table where samples + are accumulated per each wait event and can be divided by process, + query and other dimensions. This hash table can be reset by user request. + Assuming there is a client who periodically dumps profile and resets it, + user can have statistics of wait events over time. In combination with `pg_stat_statements` this extension can also provide per query statistics. @@ -66,10 +65,10 @@ Manual build higher. Before build and install you should ensure following: * PostgreSQL version is 13 or higher. - * You have development package of PostgreSQL installed or you built + * You have development package of PostgreSQL installed or you have built PostgreSQL from source. * Your PATH variable is configured so that `pg_config` command available, or - set PG_CONFIG variable. + PG_CONFIG variable is set. Typical installation procedure may look like this: @@ -98,9 +97,9 @@ Usage `pg_wait_sampling` interacts with user by set of views and functions. `pg_wait_sampling_current` view – information about current wait events for -all processed including background workers. +all processes including background workers. -| Column name | Column type | Description | +| Column name | Column type | Description | | ----------- | ----------- | ----------------------- | | pid | int4 | Id of process | | event_type | text | Name of wait event type | @@ -110,10 +109,33 @@ all processed including background workers. `pg_wait_sampling_get_current(pid int4)` returns the same table for single given process. +`pg_wait_sampling_current_extended` view – information about current wait events for +all processes including background workers. Structure of this view can be changed +between verions. + +| Column name | Column type | Description | +| ------------------- | ----------- | --------------------------- | +| pid | int4 | Id of process | +| event_type | text | Name of wait event type | +| event | text | Name of wait event | +| queryid | int8 | Id of query | +| role_id | int4 | Id of role | +| database_id | int4 | Id of database | +| parallel_leader_pid | int4 | Id of parallel query leader | +| backend_type | text | Name of backend type | +| backend_state | text | Name of backend state | +| proc_start | timestamptz | Timestamp of process start | +| client_addr | text | Client address | +| client_hostname | text | Client hostname | +| appname | text | Application name | + +`pg_wait_sampling_get_current_extended(pid int4)` returns the same table for single given +process. + `pg_wait_sampling_history` view – history of wait events obtained by sampling into in-memory ring buffer. -| Column name | Column type | Description | +| Column name | Column type | Description | | ----------- | ----------- | ----------------------- | | pid | int4 | Id of process | | ts | timestamptz | Sample timestamp | @@ -121,30 +143,74 @@ in-memory ring buffer. | event | text | Name of wait event | | queryid | int8 | Id of query | +`pg_wait_sampling_history_extended` view – history of wait events obtained by +sampling into in-memory ring buffer. Structure of this view can be changed +between verions + +| Column name | Column type | Description | +| ------------------- | ----------- | --------------------------- | +| pid | int4 | Id of process | +| ts | timestamptz | Sample timestamp | +| event_type | text | Name of wait event type | +| event | text | Name of wait event | +| queryid | int8 | Id of query | +| role_id | int4 | Id of role | +| database_id | int4 | Id of database | +| parallel_leader_pid | int4 | Id of parallel query leader | +| backend_type | text | Name of backend type | +| backend_state | text | Name of backend state | +| proc_start | timestamptz | Timestamp of process start | +| client_addr | text | Client address | +| client_hostname | text | Client hostname | +| appname | text | Application name | + `pg_wait_sampling_profile` view – profile of wait events obtained by sampling into in-memory hash table. -| Column name | Column type | Description | +| Column name | Column type | Description | | ----------- | ----------- | ----------------------- | | pid | int4 | Id of process | | event_type | text | Name of wait event type | | event | text | Name of wait event | | queryid | int8 | Id of query | -| count | text | Count of samples | +| count | int8 | Count of samples | + +`pg_wait_sampling_profile_extended` view – history of wait events obtained by +sampling into in-memory ring buffer. Structure of this view can be changed +between verions + +| Column name | Column type | Description | +| ------------------- | ----------- | --------------------------- | +| pid | int4 | Id of process | +| event_type | text | Name of wait event type | +| event | text | Name of wait event | +| queryid | int8 | Id of query | +| role_id | int4 | Id of role | +| database_id | int4 | Id of database | +| parallel_leader_pid | int4 | Id of parallel query leader | +| backend_type | text | Name of backend type | +| backend_state | text | Name of backend state | +| proc_start | timestamptz | Timestamp of process start | +| client_addr | text | Client address | +| client_hostname | text | Client hostname | +| appname | text | Application name | +| count | int8 | Count of samples | `pg_wait_sampling_reset_profile()` function resets the profile. The work of wait event statistics collector worker is controlled by following GUCs. -| Parameter name | Data type | Description | Default value | -|----------------------------------| --------- |---------------------------------------------|--------------:| -| pg_wait_sampling.history_size | int4 | Size of history in-memory ring buffer | 5000 | -| pg_wait_sampling.history_period | int4 | Period for history sampling in milliseconds | 10 | -| pg_wait_sampling.profile_period | int4 | Period for profile sampling in milliseconds | 10 | -| pg_wait_sampling.profile_pid | bool | Whether profile should be per pid | true | -| pg_wait_sampling.profile_queries | enum | Whether profile should be per query | top | -| pg_wait_sampling.sample_cpu | bool | Whether on CPU backends should be sampled | true | +| Parameter name | Data type | Description | Default value | +|-------------------------------------| --------- |---------------------------------------------|--------------:| +| pg_wait_sampling.history_size | int4 | Size of history in-memory ring buffer | 5000 | +| pg_wait_sampling.history_period | int4 | Period for history sampling in milliseconds | 10 | +| pg_wait_sampling.profile_period | int4 | Period for profile sampling in milliseconds | 10 | +| pg_wait_sampling.profile_pid | bool | Whether profile should be per pid | true | +| pg_wait_sampling.profile_queries | enum | Whether profile should be per query | top | +| pg_wait_sampling.sample_cpu | bool | Whether on CPU backends should be sampled | true | +| pg_wait_sampling.history_dimensions | text | Additional columns in extended history view | 'none' | +| pg_wait_sampling.profile_dimensions | text | Additional columns in extended profile view | 'none' | If `pg_wait_sampling.profile_pid` is set to false, sampling profile wouldn't be collected in per-process manner. In this case the value of pid could would @@ -158,6 +224,19 @@ If `pg_wait_sampling.sample_cpu` is set to true then processes that are not waiting on anything are also sampled. The wait event columns for such processes will be NULL. +`pg_wait_sampling.history_dimenstions` and `pg_wait_sampling.profile_dimensions` +determine what additional columns will be sampled in `history/profile_extended` +views. Possible values are `none`, `all`, `role_id`, `database_id`, +`parallel_leader_pid`, `backend_type`, `backend_state`, `backend_start_time`, +`client_addr`, `client_hostname`, `appname` and any combination of column names. +`none` and `all` cannot be used together with any other values and must be used alone. + +> [!WARNING] +> Turning on any of the following columns: `backend_type`, `backend_state`, +> `backend_start_time`, `client_addr`, `client_hostname`, `appname` will reduce +> performance compared to sampling none of those due to the need to look into +> BackendStatusTable. This is especially noticeable with PostgreSQL 13-16 + Values of these GUC variables can be changed only in config file or with ALTER SYSTEM. Then you need to reload server's configuration (such as with pg_reload_conf function) for changes to take effect. @@ -170,7 +249,7 @@ Contribution ------------ Please, notice, that `pg_wait_sampling` is still under development and while -it's stable and tested, it may contains some bugs. Don't hesitate to raise +it's stable and tested, it may contain some bugs. Don't hesitate to raise [issues at github](https://github.com/postgrespro/pg_wait_sampling/issues) with your bug reports.