/*------------------------------------------------------------------------- * * pg_stat_monitor.h * Track statement execution times across a whole database cluster. * * Portions Copyright © 2018-2020, Percona LLC and/or its affiliates * * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group * * Portions Copyright (c) 1994, The Regents of the University of California * * IDENTIFICATION * contrib/pg_stat_monitor/pg_stat_monitor.h * *------------------------------------------------------------------------- */ #ifndef __PG_STAT_MONITOR_H__ #define __PG_STAT_MONITOR_H__ #include "postgres.h" #include #include #include #include #include #include #include #include "access/hash.h" #include "catalog/pg_authid.h" #include "executor/instrument.h" #include "common/ip.h" #include "funcapi.h" #include "access/twophase.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "optimizer/planner.h" #include "postmaster/bgworker.h" #include "parser/analyze.h" #include "parser/parsetree.h" #include "parser/scanner.h" #include "parser/scansup.h" #include "pgstat.h" #include "storage/fd.h" #include "storage/ipc.h" #include "storage/spin.h" #include "tcop/utility.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/timestamp.h" #include "utils/lsyscache.h" #include "utils/guc.h" #include "utils/guc_tables.h" #define MAX_BACKEND_PROCESES (MaxBackends + NUM_AUXILIARY_PROCS + max_prepared_xacts) #define IntArrayGetTextDatum(x,y) intarray_get_datum(x,y) /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */ #define USAGE_EXEC(duration) (1.0) #define USAGE_INIT (1.0) /* including initial planning */ #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */ #define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */ #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */ #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */ #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */ #define JUMBLE_SIZE 1024 /* query serialization buffer size */ #define MAX_RESPONSE_BUCKET 50 #define INVALID_BUCKET_ID -1 #define MAX_REL_LEN 255 #define MAX_BUCKETS 10 #define TEXT_LEN 255 #define ERROR_MESSAGE_LEN 100 #define REL_LST 10 #define REL_LEN 1000 #define CMD_LST 10 #define CMD_LEN 20 #define APPLICATIONNAME_LEN 100 #define COMMENTS_LEN 512 #define PGSM_OVER_FLOW_MAX 10 #define PLAN_TEXT_LEN 1024 /* the assumption of query max nested level */ #define DEFAULT_MAX_NESTED_LEVEL 10 #define MAX_QUERY_BUF (PGSM_QUERY_SHARED_BUFFER * 1024 * 1024) #define MAX_BUCKETS_MEM (PGSM_MAX * 1024 * 1024) #define BUCKETS_MEM_OVERFLOW() ((hash_get_num_entries(pgss_hash) * sizeof(pgssEntry)) >= MAX_BUCKETS_MEM) #define MAX_BUCKET_ENTRIES (MAX_BUCKETS_MEM / sizeof(pgssEntry)) #define QUERY_BUFFER_OVERFLOW(x,y) ((x + y + sizeof(uint64) + sizeof(uint64)) > MAX_QUERY_BUF) #define QUERY_MARGIN 100 #define MIN_QUERY_LEN 10 #define SQLCODE_LEN 20 #if PG_VERSION_NUM >= 130000 #define MAX_SETTINGS 15 #else #define MAX_SETTINGS 14 #endif /* Update this if need a enum GUC with more options. */ #define MAX_ENUM_OPTIONS 6 typedef struct GucVariables { enum config_type type; /* PGC_BOOL, PGC_INT, PGC_REAL, PGC_STRING, * PGC_ENUM */ int guc_variable; char guc_name[TEXT_LEN]; char guc_desc[TEXT_LEN]; int guc_default; int guc_min; int guc_max; int guc_unit; int *guc_value; bool guc_restart; int n_options; char guc_options[MAX_ENUM_OPTIONS][32]; } GucVariable; #if PG_VERSION_NUM < 130000 typedef struct WalUsage { long wal_records; /* # of WAL records produced */ long wal_fpi; /* # of WAL full page images produced */ uint64 wal_bytes; /* size of WAL records produced */ } WalUsage; #endif typedef enum OVERFLOW_TARGET { OVERFLOW_TARGET_NONE = 0, OVERFLOW_TARGET_DISK } OVERFLOW_TARGET; typedef enum pgssStoreKind { PGSS_INVALID = -1, /* * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to * reference the underlying values in the arrays in the Counters struct, * and this order is required in pg_stat_statements_internal(). */ PGSS_PARSE = 0, PGSS_PLAN, PGSS_EXEC, PGSS_FINISHED, PGSS_ERROR, PGSS_NUMKIND /* Must be last value of this enum */ } pgssStoreKind; /* the assumption of query max nested level */ #define DEFAULT_MAX_NESTED_LEVEL 10 /* * Type of aggregate keys */ typedef enum AGG_KEY { AGG_KEY_DATABASE = 0, AGG_KEY_USER, AGG_KEY_HOST } AGG_KEY; #define MAX_QUERY_LEN 1024 /* shared memory storage for the query */ typedef struct CallTime { double total_time; /* total execution time, in msec */ double min_time; /* minimum execution time in msec */ double max_time; /* maximum execution time in msec */ double mean_time; /* mean execution time in msec */ double sum_var_time; /* sum of variances in execution time in msec */ } CallTime; /* * Entry type for queries hash table (query ID). * * We use a hash table to keep track of query IDs that have their * corresponding query text added to the query buffer (pgsm_query_shared_buffer). * * This allow us to avoid adding duplicated queries to the buffer, therefore * leaving more space for other queries and saving some CPU. */ typedef struct pgssQueryEntry { uint64 queryid; /* query identifier, also the key. */ size_t query_pos; /* query location within query buffer */ } pgssQueryEntry; typedef struct PlanInfo { uint64 planid; /* plan identifier */ char plan_text[PLAN_TEXT_LEN]; /* plan text */ size_t plan_len; /* strlen(plan_text) */ } PlanInfo; typedef struct pgssHashKey { uint64 bucket_id; /* bucket number */ uint64 queryid; /* query identifier */ uint64 userid; /* user OID */ uint64 dbid; /* database OID */ uint64 ip; /* client ip address */ uint64 planid; /* plan identifier */ uint64 appid; /* hash of application name */ uint64 toplevel; /* query executed at top level */ } pgssHashKey; typedef struct QueryInfo { uint64 parentid; /* parent queryid of current query */ int64 type; /* type of query, options are query, info, * warning, error, fatal */ char application_name[APPLICATIONNAME_LEN]; char comments[COMMENTS_LEN]; char relations[REL_LST][REL_LEN]; /* List of relation involved * in the query */ int num_relations; /* Number of relation in the query */ CmdType cmd_type; /* query command type * SELECT/UPDATE/DELETE/INSERT */ } QueryInfo; typedef struct ErrorInfo { int64 elevel; /* error elevel */ char sqlcode[SQLCODE_LEN]; /* error sqlcode */ char message[ERROR_MESSAGE_LEN]; /* error message text */ } ErrorInfo; typedef struct Calls { int64 calls; /* # of times executed */ int64 rows; /* total # of retrieved or affected rows */ double usage; /* usage factor */ } Calls; typedef struct Blocks { int64 shared_blks_hit; /* # of shared buffer hits */ int64 shared_blks_read; /* # of shared disk blocks read */ int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */ int64 shared_blks_written; /* # of shared disk blocks written */ int64 local_blks_hit; /* # of local buffer hits */ int64 local_blks_read; /* # of local disk blocks read */ int64 local_blks_dirtied; /* # of local disk blocks dirtied */ int64 local_blks_written; /* # of local disk blocks written */ int64 temp_blks_read; /* # of temp blocks read */ int64 temp_blks_written; /* # of temp blocks written */ double blk_read_time; /* time spent reading, in msec */ double blk_write_time; /* time spent writing, in msec */ } Blocks; typedef struct SysInfo { float utime; /* user cpu time */ float stime; /* system cpu time */ } SysInfo; typedef struct Wal_Usage { int64 wal_records; /* # of WAL records generated */ int64 wal_fpi; /* # of WAL full page images generated */ uint64 wal_bytes; /* total amount of WAL bytes generated */ } Wal_Usage; typedef struct Counters { uint64 bucket_id; /* bucket id */ Calls calls; QueryInfo info; CallTime time; Calls plancalls; CallTime plantime; PlanInfo planinfo; Blocks blocks; SysInfo sysinfo; ErrorInfo error; Wal_Usage walusage; int resp_calls[MAX_RESPONSE_BUCKET]; /* execution time's in * msec */ uint64 state; /* query state */ } Counters; /* Some global structure to get the cpu usage, really don't like the idea of global variable */ /* * Statistics per statement */ typedef struct pgssEntry { pgssHashKey key; /* hash key of entry - MUST BE FIRST */ Counters counters; /* the statistics for this query */ int encoding; /* query text encoding */ slock_t mutex; /* protects the counters only */ size_t query_pos; /* query location within query buffer */ } pgssEntry; /* * Global shared state */ typedef struct pgssSharedState { LWLock *lock; /* protects hashtable search/modification */ double cur_median_usage; /* current median usage in hashtable */ slock_t mutex; /* protects following fields only: */ Size extent; /* current extent of query file */ int64 n_writers; /* number of active writers to query file */ pg_atomic_uint64 current_wbucket; pg_atomic_uint64 prev_bucket_sec; uint64 bucket_entry[MAX_BUCKETS]; char bucket_start_time[MAX_BUCKETS][60]; /* start time of the * bucket */ LWLock *errors_lock; /* protects errors hashtable * search/modification */ /* * These variables are used when pgsm_overflow_target is ON. * * overflow is set to true when the query buffer overflows. * * n_bucket_cycles counts the number of times we changed bucket since the * query buffer overflowed. When it reaches pgsm_max_buckets we remove the * dump file, also reset the counter. * * This allows us to avoid having a large file on disk that would also * slowdown queries to the pg_stat_monitor view. */ bool overflow; size_t n_bucket_cycles; } pgssSharedState; #define ResetSharedState(x) \ do { \ x->cur_median_usage = ASSUMED_MEDIAN_INIT; \ x->cur_median_usage = ASSUMED_MEDIAN_INIT; \ x->n_writers = 0; \ pg_atomic_init_u64(&x->current_wbucket, 0); \ pg_atomic_init_u64(&x->prev_bucket_sec, 0); \ memset(&x->bucket_entry, 0, MAX_BUCKETS * sizeof(uint64)); \ } while(0) #if PG_VERSION_NUM < 140000 /* * Struct for tracking locations/lengths of constants during normalization */ typedef struct LocationLen { int location; /* start offset in query text */ int length; /* length in bytes, or -1 to ignore */ } LocationLen; /* * Working state for computing a query jumble and producing a normalized * query string */ typedef struct JumbleState { /* Jumble of current query tree */ unsigned char *jumble; /* Number of bytes used in jumble[] */ Size jumble_len; /* Array of locations of constants that should be removed */ LocationLen *clocations; /* Allocated length of clocations array */ int clocations_buf_size; /* Current number of valid entries in clocations array */ int clocations_count; /* highest Param id we've seen, in order to start normalization correctly */ int highest_extern_param_id; } JumbleState; #endif /* Links to shared memory state */ bool SaveQueryText(uint64 bucketid, uint64 queryid, unsigned char *buf, const char *query, uint64 query_len, size_t *query_pos); /* guc.c */ void init_guc(void); GucVariable *get_conf(int i); /* hash_create.c */ bool IsHashInitialize(void); void pgss_shmem_startup(void); void pgss_shmem_shutdown(int code, Datum arg); int pgsm_get_bucket_size(void); pgssSharedState *pgsm_get_ss(void); HTAB *pgsm_get_plan_hash(void); HTAB *pgsm_get_hash(void); HTAB *pgsm_get_query_hash(void); HTAB *pgsm_get_plan_hash(void); void hash_entry_reset(void); void hash_query_entryies_reset(void); void hash_query_entries(); void hash_query_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer[]); void hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer); pgssEntry *hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key, int encoding); Size hash_memsize(void); int read_query_buffer(int bucket_id, uint64 queryid, char *query_txt, size_t pos); uint64 read_query(unsigned char *buf, uint64 queryid, char *query, size_t pos); void pgss_startup(void); void set_qbuf(unsigned char *); /* hash_query.c */ void pgss_startup(void); /*---- GUC variables ----*/ typedef enum { PSGM_TRACK_NONE = 0, /* track no statements */ PGSM_TRACK_TOP, /* only top level statements */ PGSM_TRACK_ALL /* all statements, including nested ones */ } PGSMTrackLevel; static const struct config_enum_entry track_options[] = { {"none", PSGM_TRACK_NONE, false}, {"top", PGSM_TRACK_TOP, false}, {"all", PGSM_TRACK_ALL, false}, {NULL, 0, false} }; #define PGSM_MAX get_conf(0)->guc_variable #define PGSM_QUERY_MAX_LEN get_conf(1)->guc_variable #define PGSM_TRACK_UTILITY get_conf(2)->guc_variable #define PGSM_NORMALIZED_QUERY get_conf(3)->guc_variable #define PGSM_MAX_BUCKETS get_conf(4)->guc_variable #define PGSM_BUCKET_TIME get_conf(5)->guc_variable #define PGSM_HISTOGRAM_MIN get_conf(6)->guc_variable #define PGSM_HISTOGRAM_MAX get_conf(7)->guc_variable #define PGSM_HISTOGRAM_BUCKETS get_conf(8)->guc_variable #define PGSM_QUERY_SHARED_BUFFER get_conf(9)->guc_variable #define PGSM_OVERFLOW_TARGET get_conf(10)->guc_variable #define PGSM_QUERY_PLAN get_conf(11)->guc_variable #define PGSM_TRACK get_conf(12)->guc_variable #define PGSM_EXTRACT_COMMENTS get_conf(13)->guc_variable #define PGSM_TRACK_PLANNING get_conf(14)->guc_variable /*---- Benchmarking ----*/ #ifdef BENCHMARK /* * These enumerator values are used as index in the hook stats array. * STATS_START and STATS_END are used only to delimit the range. * STATS_END is also the length of the valid items in the enum. */ enum pg_hook_stats_id { STATS_START = -1, STATS_PGSS_POST_PARSE_ANALYZE, STATS_PGSS_EXECUTORSTART, STATS_PGSS_EXECUTORUN, STATS_PGSS_EXECUTORFINISH, STATS_PGSS_EXECUTOREND, STATS_PGSS_PROCESSUTILITY, #if PG_VERSION_NUM >= 130000 STATS_PGSS_PLANNER_HOOK, #endif STATS_PGSM_EMIT_LOG_HOOK, STATS_PGSS_EXECUTORCHECKPERMS, STATS_END }; /* Hold time to execute statistics for a hook. */ struct pg_hook_stats_t { char hook_name[64]; double min_time; double max_time; double total_time; uint64 ncalls; }; #define HOOK_STATS_SIZE MAXALIGN((size_t)STATS_END * sizeof(struct pg_hook_stats_t)) /* Allocate a pg_hook_stats_t array of size HOOK_STATS_SIZE on shared memory. */ void init_hook_stats(void); /* Update hook time execution statistics. */ void update_hook_stats(enum pg_hook_stats_id hook_id, double time_elapsed); /* * Macro used to declare a hook function: * Example: * DECLARE_HOOK(void my_hook, const char *query, size_t length); * Will expand to: * static void my_hook(const char *query, size_t length); * static void my_hook_benchmark(const char *query, size_t length); */ #define DECLARE_HOOK(hook, ...) \ static hook(__VA_ARGS__); \ static hook##_benchmark(__VA_ARGS__); /* * Macro used to wrap a hook when pg_stat_monitor is compiled with -DBENCHMARK. * * It is intended to be used as follows in _PG_init(): * pg_hook_function = HOOK(my_hook_function); * Then, if pg_stat_monitor is compiled with -DBENCHMARK this will expand to: * pg_hook_name = my_hook_function_benchmark; * Otherwise it will simple expand to: * pg_hook_name = my_hook_function; */ #define HOOK(name) name##_benchmark #else /* #ifdef BENCHMARK */ #define DECLARE_HOOK(hook, ...) \ static hook(__VA_ARGS__); #define HOOK(name) name #define HOOK_STATS_SIZE 0 #endif #endif