diff --git a/internal/collector/generated/postgres_5m_metrics.json b/internal/collector/generated/postgres_5m_metrics.json index a9a3500a02..371a7fa182 100644 --- a/internal/collector/generated/postgres_5m_metrics.json +++ b/internal/collector/generated/postgres_5m_metrics.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["dbname"],"description":"Database size in bytes","metric_name":"ccp_database_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes"}],"sql":"SELECT datname as dbname , pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false;\n"},{"metrics":[{"description":"Count of sequences that have reached greater than or equal to 75% of their max available numbers.\nFunction monitor.sequence_status() can provide more details if run directly on system.\n","metric_name":"ccp_sequence_exhaustion_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM (\n SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots\n , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used\n FROM pg_catalog.pg_sequences s\n) x WHERE (ROUND(used/slots*100)::int) \u003e 75;\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary","metric_name":"ccp_stat_database_blks_hit","static_attributes":{"server":"localhost:5432"},"value_column":"blks_hit"},{"attribute_columns":["dbname"],"description":"Number of disk blocks read in this database","metric_name":"ccp_stat_database_blks_read","static_attributes":{"server":"localhost:5432"},"value_column":"blks_read"},{"attribute_columns":["dbname"],"description":"Number of queries canceled due to conflicts with recovery in this database","metric_name":"ccp_stat_database_conflicts","static_attributes":{"server":"localhost:5432"},"value_column":"conflicts"},{"attribute_columns":["dbname"],"description":"Number of deadlocks detected in this database","metric_name":"ccp_stat_database_deadlocks","static_attributes":{"server":"localhost:5432"},"value_column":"deadlocks"},{"attribute_columns":["dbname"],"description":"Total amount of data written to temporary files by queries in this database","metric_name":"ccp_stat_database_temp_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"temp_bytes"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_temp_files","static_attributes":{"server":"localhost:5432"},"value_column":"temp_files"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_tup_deleted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_deleted"},{"attribute_columns":["dbname"],"description":"Number of rows fetched by queries in this database","metric_name":"ccp_stat_database_tup_fetched","static_attributes":{"server":"localhost:5432"},"value_column":"tup_fetched"},{"attribute_columns":["dbname"],"description":"Number of rows inserted by queries in this database","metric_name":"ccp_stat_database_tup_inserted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_inserted"},{"attribute_columns":["dbname"],"description":"Number of rows returned by queries in this database","metric_name":"ccp_stat_database_tup_returned","static_attributes":{"server":"localhost:5432"},"value_column":"tup_returned"},{"attribute_columns":["dbname"],"description":"Number of rows updated by queries in this database","metric_name":"ccp_stat_database_tup_updated","static_attributes":{"server":"localhost:5432"},"value_column":"tup_updated"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been committed","metric_name":"ccp_stat_database_xact_commit","static_attributes":{"server":"localhost:5432"},"value_column":"xact_commit"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been rolled back","metric_name":"ccp_stat_database_xact_rollback","static_attributes":{"server":"localhost:5432"},"value_column":"xact_rollback"}],"sql":"SELECT s.datname AS dbname , s.xact_commit , s.xact_rollback , s.blks_read , s.blks_hit , s.tup_returned , s.tup_fetched , s.tup_inserted , s.tup_updated , s.tup_deleted , s.conflicts , s.temp_files , s.temp_bytes , s.deadlocks FROM pg_catalog.pg_stat_database s JOIN pg_catalog.pg_database d ON d.datname = s.datname WHERE d.datistemplate = false;\n"}] +[{"metrics":[{"attribute_columns":["dbname"],"description":"Database size in bytes","metric_name":"ccp_database_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes"}],"sql":"SELECT datname as dbname , pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false;\n"},{"metrics":[{"description":"Count of sequences that have reached greater than or equal to 75% of their max available numbers.\nFunction monitor.sequence_status() can provide more details if run directly on system.\n","metric_name":"ccp_sequence_exhaustion_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM (\n SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots\n , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used\n FROM pg_catalog.pg_sequences s\n) x WHERE (ROUND(used/slots*100)::int) \u003e 75;\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary","metric_name":"ccp_stat_database_blks_hit","static_attributes":{"server":"localhost:5432"},"value_column":"blks_hit"},{"attribute_columns":["dbname"],"description":"Number of disk blocks read in this database","metric_name":"ccp_stat_database_blks_read","static_attributes":{"server":"localhost:5432"},"value_column":"blks_read"},{"attribute_columns":["dbname"],"description":"Number of queries canceled due to conflicts with recovery in this database","metric_name":"ccp_stat_database_conflicts","static_attributes":{"server":"localhost:5432"},"value_column":"conflicts"},{"attribute_columns":["dbname"],"description":"Number of deadlocks detected in this database","metric_name":"ccp_stat_database_deadlocks","static_attributes":{"server":"localhost:5432"},"value_column":"deadlocks"},{"attribute_columns":["dbname"],"description":"Total amount of data written to temporary files by queries in this database","metric_name":"ccp_stat_database_temp_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"temp_bytes"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_temp_files","static_attributes":{"server":"localhost:5432"},"value_column":"temp_files"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_tup_deleted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_deleted"},{"attribute_columns":["dbname"],"description":"Number of rows fetched by queries in this database","metric_name":"ccp_stat_database_tup_fetched","static_attributes":{"server":"localhost:5432"},"value_column":"tup_fetched"},{"attribute_columns":["dbname"],"description":"Number of rows inserted by queries in this database","metric_name":"ccp_stat_database_tup_inserted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_inserted"},{"attribute_columns":["dbname"],"description":"Number of rows returned by queries in this database","metric_name":"ccp_stat_database_tup_returned","static_attributes":{"server":"localhost:5432"},"value_column":"tup_returned"},{"attribute_columns":["dbname"],"description":"Number of rows updated by queries in this database","metric_name":"ccp_stat_database_tup_updated","static_attributes":{"server":"localhost:5432"},"value_column":"tup_updated"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been committed","metric_name":"ccp_stat_database_xact_commit","static_attributes":{"server":"localhost:5432"},"value_column":"xact_commit"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been rolled back","metric_name":"ccp_stat_database_xact_rollback","static_attributes":{"server":"localhost:5432"},"value_column":"xact_rollback"}],"sql":"SELECT s.datname AS dbname , s.xact_commit , s.xact_rollback , s.blks_read , s.blks_hit , s.tup_returned , s.tup_fetched , s.tup_inserted , s.tup_updated , s.tup_deleted , s.conflicts , s.temp_files , s.temp_bytes , s.deadlocks FROM pg_catalog.pg_stat_database s JOIN pg_catalog.pg_database d ON d.datname = s.datname WHERE d.datistemplate = false;\n"},{"metrics":[{"description":"Value of checksum monitoring status for pg_catalog.pg_hba_file_rules (pg_hba.conf).\n0 = valid config. 1 = settings changed. \nSettings history is available for review in the table `monitor.pg_hba_checksum`.\nTo reset current config to valid after alert, run monitor.pg_hba_checksum_set_valid(). Note this will clear the history table.\n","metric_name":"ccp_pg_hba_checksum","static_attributes":{"server":"localhost:5432"},"value_column":"status"}],"sql":"SELECT monitor.pg_hba_checksum() AS status;"}] diff --git a/internal/collector/postgres_5m_metrics.yaml b/internal/collector/postgres_5m_metrics.yaml index 9f5c3212dc..95764fe3e1 100644 --- a/internal/collector/postgres_5m_metrics.yaml +++ b/internal/collector/postgres_5m_metrics.yaml @@ -140,4 +140,15 @@ attribute_columns: ["dbname"] static_attributes: server: "localhost:5432" - + + - sql: SELECT monitor.pg_hba_checksum() AS status; + metrics: + - metric_name: ccp_pg_hba_checksum + value_column: status + description: | + Value of checksum monitoring status for pg_catalog.pg_hba_file_rules (pg_hba.conf). + 0 = valid config. 1 = settings changed. + Settings history is available for review in the table `monitor.pg_hba_checksum`. + To reset current config to valid after alert, run monitor.pg_hba_checksum_set_valid(). Note this will clear the history table. + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/postgres_metrics_test.go b/internal/collector/postgres_metrics_test.go index 8a22f42b52..63a6c654f3 100644 --- a/internal/collector/postgres_metrics_test.go +++ b/internal/collector/postgres_metrics_test.go @@ -17,9 +17,9 @@ func TestRemoveMetricsFromQueries(t *testing.T) { err := json.Unmarshal(fiveMinuteMetrics, &fiveMinuteMetricsArr) assert.NilError(t, err) - assert.Equal(t, len(fiveMinuteMetricsArr), 3) + assert.Equal(t, len(fiveMinuteMetricsArr), 4) newArr := removeMetricsFromQueries([]string{"ccp_database_size_bytes"}, fiveMinuteMetricsArr) - assert.Equal(t, len(newArr), 2) + assert.Equal(t, len(newArr), 3) t.Run("DeleteOneMetric", func(t *testing.T) { sqlMetricsData := `[ diff --git a/internal/controller/postgrescluster/metrics_setup.sql b/internal/controller/postgrescluster/metrics_setup.sql index 728de80c3e..858f95c023 100644 --- a/internal/controller/postgrescluster/metrics_setup.sql +++ b/internal/controller/postgrescluster/metrics_setup.sql @@ -220,3 +220,164 @@ BEGIN END; $$ LANGUAGE plpgsql; +/* +* The `pg_hba_checksum` table, functions, and view are taken from +* https://github.com/CrunchyData/pgmonitor/blob/development/postgres_exporter/common +* +* The goal of these table, functions, and view is to monitor changes +* to the pg_hba_file_rules system catalog. +* +* This material is used in the metric `ccp_pg_hba_checksum`. +*/ + +/* +* `monitor.pg_hba_checksum` table is used to store +* - the pg_hba settings as string (for reference) +* - the pg_hba settings as hash (for quick comparison) +* - the `hba_hash_known_provided` (for overide hash manually given to the `monitor.pg_hba_checksum` function) +* - the `valid` field to signal whether the pg_hba settings have not changed since they were accepted as valid +* +* We create an index on `created_at` in order to pull the most recent entry for +* comparison in the `monitor.pg_hba_checksum` function +*/ +DROP TABLE IF EXISTS monitor.pg_hba_checksum; +CREATE TABLE monitor.pg_hba_checksum ( + hba_hash_generated text NOT NULL + , hba_hash_known_provided text + , hba_string text NOT NULL + , created_at timestamptz DEFAULT now() NOT NULL + , valid smallint NOT NULL ); +COMMENT ON COLUMN monitor.pg_hba_checksum.valid IS 'Set this column to zero if this group of settings is a valid change'; +CREATE INDEX ON monitor.pg_hba_checksum (created_at); + +/* + * `monitor.pg_hba_checksum(text)` is used to compare the previous pg_hba hash + * with a hash made of the current pg_hba hash, derived from the `monitor.pg_hba_hash` view below. + * + * This function returns + * - 0, indicating NO settings have changed + * - 1, indicating something has changed since last known valid state + * + * `monitor.pg_hba_checksum` can take a hash to be used as an override. + * This may be useful when you have a standby with different pg_hba rules; + * since it will have different rules (and therefore a different hash), you + * could alter the metric function to pass the actual hash, which would be + * used in lieu of this table's value (derived from the primary cluster's rules). + */ +DROP FUNCTION IF EXISTS monitor.pg_hba_checksum(text); +CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) + RETURNS smallint + LANGUAGE plpgsql SECURITY DEFINER + SET search_path TO pg_catalog, pg_temp +AS $function$ +DECLARE + +v_hba_hash text; +v_hba_hash_old text; +v_hba_string text; +v_is_in_recovery boolean; +v_valid smallint; + +BEGIN + +-- Retrieve the current settings from the `monitor.pg_hba_hash` view below +IF current_setting('server_version_num')::int >= 100000 THEN + SELECT sha256_hash, hba_string + INTO v_hba_hash, v_hba_string + FROM monitor.pg_hba_hash; +ELSE + RAISE EXCEPTION 'pg_hba change monitoring unsupported in versions older than PostgreSQL 10'; +END IF; + +-- Retrieve the last previous hash from the table +SELECT hba_hash_generated, valid +INTO v_hba_hash_old, v_valid +FROM monitor.pg_hba_checksum +ORDER BY created_at DESC LIMIT 1; + +-- If an manual/override hash has been given, we will use that: +-- Do not base validity on the stored value if manual hash is given. +IF p_known_hba_hash IS NOT NULL THEN + v_hba_hash_old := p_known_hba_hash; + v_valid := 0; +END IF; + +/* If the table is not empty or a manual hash was given, + * then we want to compare the old hash (from the table) + * with the new hash: if those differ, then we set the validity to 1; + * if they are the same, then we honor what the validity was + * in the table (which would be 1). + */ +IF (v_hba_hash_old IS NOT NULL) THEN + IF (v_hba_hash != v_hba_hash_old) THEN + v_valid := 1; + END IF; +ELSE + v_valid := 0; +END IF; + +/* + * We only want to insert into the table if we're on a primary and + * - the table/manually entered hash is empty, e.g., we've just started the cluster; or + * - the hashes don't match + * + * There's no value added by inserting into the table when no change was detected. + */ +IF (v_hba_hash_old IS NULL) OR (v_hba_hash != v_hba_hash_old) THEN + SELECT pg_is_in_recovery() INTO v_is_in_recovery; + IF v_is_in_recovery = false THEN + INSERT INTO monitor.pg_hba_checksum ( + hba_hash_generated + , hba_hash_known_provided + , hba_string + , valid) + VALUES ( + v_hba_hash + , p_known_hba_hash + , v_hba_string + , v_valid); + END IF; +END IF; + +RETURN v_valid; + +END +$function$; + +/* + * The `monitor.pg_hba_hash` view return both a hash and a string aggregate of the + * pg_catalog.pg_hba_file_rules. + * Note: We use `sha256` to hash to allow this to run on FIPS environments. + */ +DROP VIEW IF EXISTS monitor.pg_hba_hash; +CREATE VIEW monitor.pg_hba_hash AS + -- Order by line number so it's caught if no content is changed but the order of entries is changed + WITH hba_ordered_list AS ( + SELECT COALESCE(type, '<>') AS type + , array_to_string(COALESCE(database, ARRAY['<>']), ',') AS database + , array_to_string(COALESCE(user_name, ARRAY['<>']), ',') AS user_name + , COALESCE(address, '<>') AS address + , COALESCE(netmask, '<>') AS netmask + , COALESCE(auth_method, '<>') AS auth_method + , array_to_string(COALESCE(options, ARRAY['<>']), ',') AS options + FROM pg_catalog.pg_hba_file_rules + ORDER BY line_number) + SELECT sha256((string_agg(type||database||user_name||address||netmask||auth_method||options, ','))::bytea) AS sha256_hash + , string_agg(type||database||user_name||address||netmask||auth_method||options, ',') AS hba_string + FROM hba_ordered_list; + +/* + * The `monitor.pg_hba_checksum_set_valid` function provides an interface for resetting the + * checksum monitor. + * Note: configuration history will be cleared. + */ +DROP FUNCTION IF EXISTS monitor.pg_hba_checksum_set_valid(); +CREATE FUNCTION monitor.pg_hba_checksum_set_valid() RETURNS smallint + LANGUAGE sql +AS $function$ + +TRUNCATE monitor.pg_hba_checksum; + +SELECT monitor.pg_hba_checksum(); + +$function$;