-- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION pg_qualstats UPDATE" to load this file. \quit CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_index_advisor ( min_filter integer DEFAULT 1000, min_selectivity integer DEFAULT 30, forbidden_am text[] DEFAULT '{}') RETURNS json AS $_$ DECLARE v_processed bigint[] = '{}'; v_indexes json[] = '{}'; v_unoptimised json[] = '{}'; rec record; v_nb_processed integer = 1; v_ddl text; v_col text; v_qualnodeid bigint; v_quals_todo bigint[]; v_quals_done bigint[]; v_quals_col_done text[]; v_queryids bigint[] = '{}'; BEGIN -- sanity checks and default values SELECT coalesce(min_filter, 1000), coalesce(min_selectivity, 30), coalesce(forbidden_am, '{}') INTO min_filter, min_selectivity, forbidden_am; -- don't try to generate hash indexes Before pg 10, as those are only WAL -- logged since pg 11. IF pg_catalog.current_setting('server_version_num')::bigint < 100000 THEN forbidden_am := array_append(forbidden_am, 'hash'); END IF; -- first find out unoptimizable quals. -- We need an array of json containing the per-qual info, and a single -- array containing all the underlying qualnodeids, so we need to create -- the wanted final object manually as we can't have two different grouping -- approach. FOR rec IN WITH src AS (SELECT DISTINCT qualnodeid, (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), opno, eval_type)::@extschema@.qual AS qual, queryid FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_database d ON q.dbid = d.oid LEFT JOIN pg_catalog.pg_operator op ON op.oid = q.opno LEFT JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid LEFT JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod WHERE d.datname = current_database() AND eval_type = 'f' AND coalesce(lrelid, rrelid) != 0 AND amname IS NULL ) SELECT pg_catalog.json_build_object( 'qual', @extschema@.pg_qualstats_deparse_qual(qual), -- be careful to generate an empty array if no queryid availiable 'queryids', coalesce(pg_catalog.array_agg(DISTINCT queryid) FILTER (WHERE queryid IS NOT NULL), '{}') ) AS obj, array_agg(qualnodeid) AS qualnodeids FROM src GROUP BY qual LOOP v_unoptimised := array_append(v_unoptimised, rec.obj); v_processed := array_cat(v_processed, rec.qualnodeids); END LOOP; -- The index suggestion is done in multiple iteration, by scoring for each -- relation containing interesting quals a path of possibly AND-ed quals -- that contains other possibly AND-ed quals. Only the higher score path -- will be used to create an index, so we can then compute another set of -- paths ignoring the quals that are now optimized with an index. WHILE v_nb_processed > 0 LOOP v_nb_processed := 0; FOR rec IN -- first, find quals that seems worth to optimize along with the -- possible access methods, discarding any qualnode that are marked as -- already processed. Also apply access method restriction. WITH pgqs AS ( SELECT dbid, amname, qualid, qualnodeid, (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), opno, eval_type)::@extschema@.qual AS qual, queryid, round(avg(execution_count)) AS execution_count, sum(occurences) AS occurences, round(sum(nbfiltered)::numeric / sum(occurences)) AS avg_filter, CASE WHEN sum(execution_count) = 0 THEN 0 ELSE round(sum(nbfiltered::numeric) / sum(execution_count) * 100) END AS avg_selectivity FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_database d ON q.dbid = d.oid JOIN pg_catalog.pg_operator op ON op.oid = q.opno JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod WHERE d.datname = current_database() AND eval_type = 'f' AND amname != ALL (forbidden_am) AND coalesce(lrelid, rrelid) != 0 AND qualnodeid != ALL(v_processed) GROUP BY dbid, amname, qualid, qualnodeid, lrelid, rrelid, lattnum, rattnum, opno, eval_type, queryid ), -- apply cardinality and selectivity restrictions filtered AS ( SELECT (qual).relid, amname, coalesce(qualid, qualnodeid) AS parent, count(*) AS weight, (array_agg(qualnodeid), array_agg(queryid) )::@extschema@.adv_quals AS quals FROM pgqs WHERE avg_filter >= min_filter AND avg_selectivity >= min_selectivity GROUP BY (qual).relid, amname, parent ), -- for each possibly AND-ed qual, build the list of included qualnodeid nodes AS ( SELECT p.relid, p.amname, p.parent, p.quals, c.quals AS children FROM filtered p LEFT JOIN filtered c ON (p.quals).qualnodeids @> (c.quals).qualnodeids AND p.amname = c.amname AND p.parent != c.parent AND (p.quals).qualnodeids != (c.quals).qualnodeids ), -- build the "paths", which is the list of AND-ed quals that entirely -- contains another possibly AND-ed quals, and give a score for each -- path. The scoring method used here is simply the number of -- columns in the quals. paths AS ( SELECT DISTINCT *, coalesce(pg_catalog.array_length((children).qualnodeids, 1), 0) AS weight FROM nodes UNION SELECT DISTINCT p.relid, p.amname, p.parent, p.quals, c.children, coalesce(pg_catalog.array_length((c.children).qualnodeids, 1), 0) AS weight FROM nodes p JOIN nodes c ON (p.children).qualnodeids @> (c.quals).qualnodeids AND (c.quals).qualnodeids IS NOT NULL AND (c.quals).qualnodeids != (p.quals).qualnodeids AND p.amname = c.amname ), -- compute the final paths. -- The scoring method used here is simply the sum of total -- number of columns in each possibly AND-ed quals, so that we can -- later chose to create indexes that optimize as many queries as -- possible with as few indexes as possible. -- We also compute here an access method weight, so that we can later -- choose a btree index rather than another access method if btree is -- available. computed AS ( SELECT relid, amname, parent, quals, array_agg(to_json(children) ORDER BY weight) FILTER (WHERE children IS NOT NULL) AS included, pg_catalog.array_length((quals).qualnodeids, 1) + sum(weight) AS path_weight, CASE amname WHEN 'btree' THEN 1 ELSE 2 END AS amweight FROM paths GROUP BY relid, amname, parent, quals ), -- compute a rank for each final paths, per relation. final AS ( SELECT relid, amname, parent, quals, included, path_weight, amweight, row_number() OVER ( PARTITION BY relid ORDER BY path_weight DESC, amweight) AS rownum FROM computed ) -- and finally choose the higher rank final path for each relation. SELECT relid, amname, parent, (quals).qualnodeids as quals, (quals).queryids as queryids, included, path_weight FROM final WHERE rownum = 1 LOOP v_nb_processed := v_nb_processed + 1; v_ddl := ''; v_quals_todo := '{}'; v_quals_done := '{}'; v_quals_col_done := '{}'; -- put columns from included quals, if any, first for order dependency DECLARE v_cur json; BEGIN IF rec.included IS NOT NULL THEN FOREACH v_cur IN ARRAY rec.included LOOP -- Direct cast from json to bigint is only possible since pg10 FOR v_qualnodeid IN SELECT pg_catalog.json_array_elements(v_cur->'qualnodeids')::text::bigint LOOP v_quals_todo := v_quals_todo || v_qualnodeid; END LOOP; END LOOP; END IF; END; -- and append qual's own columns v_quals_todo := v_quals_todo || rec.quals; -- generate the index DDL FOREACH v_qualnodeid IN ARRAY v_quals_todo LOOP -- skip quals already present in the index CONTINUE WHEN v_quals_done @> ARRAY[v_qualnodeid]; -- skip other quals for the same column v_col := @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, false); CONTINUE WHEN v_quals_col_done @> ARRAY[v_col]; -- mark this qual as present in a generated index so it's ignore at -- next round of best quals to optimize v_processed := pg_catalog.array_append(v_processed, v_qualnodeid); -- mark this qual and col as present in this index v_quals_done := v_quals_done || v_qualnodeid; v_quals_col_done := v_quals_col_done || v_col; -- if underlying table has been dropped, stop here CONTINUE WHEN coalesce(v_col, '') = ''; -- append the column to the index IF v_ddl != '' THEN v_ddl := v_ddl || ', '; END IF; v_ddl := v_ddl || @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, true); END LOOP; -- if underlying table has been dropped, skip this (broken) index CONTINUE WHEN coalesce(v_ddl, '') = ''; -- generate the full CREATE INDEX ddl v_ddl = pg_catalog.format('CREATE INDEX ON %s USING %I (%s)', @extschema@.pg_qualstats_get_qualnode_rel(v_qualnodeid), rec.amname, v_ddl); -- get the underlyings queryid(s) DECLARE v_queryid text; v_cur json; BEGIN v_queryids = rec.queryids; IF rec.included IS NOT NULL THEN FOREACH v_cur IN ARRAY rec.included LOOP -- Direct cast from json to bigint is only possible since pg10 FOR v_queryid IN SELECT pg_catalog.json_array_elements(v_cur->'queryids')::text LOOP CONTINUE WHEN v_queryid = 'null'; v_queryids := v_queryids || v_queryid::text::bigint; END LOOP; END LOOP; END IF; END; -- remove any duplicates SELECT pg_catalog.array_agg(DISTINCT v) INTO v_queryids FROM (SELECT unnest(v_queryids)) s(v); -- sanitize the queryids IF v_queryids IS NULL OR v_queryids = '{null}' THEN v_queryids = '{}'; END IF; -- and finally append the index to the list of generated indexes v_indexes := pg_catalog.array_append(v_indexes, pg_catalog.json_build_object( 'ddl', v_ddl, 'queryids', v_queryids ) ); END LOOP; END LOOP; RETURN pg_catalog.json_build_object( 'indexes', v_indexes, 'unoptimised', v_unoptimised); END; $_$ LANGUAGE plpgsql; /* end of pg_qualstats_index_advisor */