CREATE FUNCTION @extschema@.run_maintenance( p_parent_table text DEFAULT NULL -- If these defaults change reflect them in `run_maintenance_proc`! , p_analyze boolean DEFAULT false , p_jobmon boolean DEFAULT true ) RETURNS void LANGUAGE plpgsql AS $$ DECLARE ex_context text; ex_detail text; ex_hint text; ex_message text; v_adv_lock boolean; v_analyze boolean := FALSE; v_check_subpart int; v_child_timestamp timestamptz; v_control_type text; v_time_encoder text; v_time_decoder text; v_create_count int := 0; v_current_partition_id bigint; v_current_partition_timestamp timestamptz; v_default_tablename text; v_drop_count int := 0; v_exact_control_type text; v_is_default text; v_job_id bigint; v_jobmon_schema text; v_last_partition text; v_last_partition_created boolean; v_last_partition_id bigint; v_last_partition_timestamp timestamptz; v_max_id bigint; v_max_id_default bigint; v_max_time_default timestamptz; v_new_search_path text; v_next_partition_id bigint; v_next_partition_timestamp timestamptz; v_old_search_path text; v_parent_exists text; v_parent_oid oid; v_parent_schema text; v_parent_tablename text; v_partition_expression text; v_premade_count int; v_row record; v_row_max_id record; v_row_max_time record; v_sql text; v_step_id bigint; v_step_overflow_id bigint; v_sub_id_max bigint; v_sub_id_max_suffix bigint; v_sub_id_min bigint; v_sub_parent text; v_sub_timestamp_max timestamptz; v_sub_timestamp_max_suffix timestamptz; v_sub_timestamp_min timestamptz; v_tables_list_sql text; BEGIN /* * Function to manage pre-creation of the next partitions in a set. * Also manages dropping old partitions if the retention option is set. * If p_parent_table is passed, will only run run_maintenance() on that one table (no matter what the configuration table may have set for it) * Otherwise, will run on all tables in the config table with p_automatic_maintenance() set to true. * For large partition sets, running analyze can cause maintenance to take longer than expected so is not done by default. Can set p_analyze to true to force analyze. Be aware that constraint exclusion may not work properly until an analyze on the partition set is run. */ v_adv_lock := pg_try_advisory_xact_lock(hashtext('pg_partman run_maintenance')); IF v_adv_lock = 'false' THEN RAISE NOTICE 'Partman maintenance already running.'; RETURN; END IF; IF pg_is_in_recovery() THEN RAISE DEBUG 'pg_partmain maintenance called on replica. Doing nothing.'; RETURN; END IF; SELECT current_setting('search_path') INTO v_old_search_path; IF length(v_old_search_path) > 0 THEN v_new_search_path := '@extschema@,pg_temp,'||v_old_search_path; ELSE v_new_search_path := '@extschema@,pg_temp'; END IF; IF p_jobmon THEN SELECT nspname INTO v_jobmon_schema FROM pg_catalog.pg_namespace n, pg_catalog.pg_extension e WHERE e.extname = 'pg_jobmon'::name AND e.extnamespace = n.oid; IF v_jobmon_schema IS NOT NULL THEN v_new_search_path := format('%s,%s',v_jobmon_schema, v_new_search_path); END IF; END IF; EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_new_search_path, 'false'); IF v_jobmon_schema IS NOT NULL THEN v_job_id := add_job('PARTMAN RUN MAINTENANCE'); v_step_id := add_step(v_job_id, 'Running maintenance loop'); END IF; v_tables_list_sql := 'SELECT parent_table , partition_type , partition_interval , control , premake , undo_in_progress , sub_partition_set_full , epoch , infinite_time_partitions , retention , ignore_default_data , datetime_string , maintenance_order , date_trunc_interval FROM @extschema@.part_config WHERE undo_in_progress = false'; IF p_parent_table IS NULL THEN v_tables_list_sql := v_tables_list_sql || format(' AND automatic_maintenance = %L ', 'on'); ELSE v_tables_list_sql := v_tables_list_sql || format(' AND parent_table = %L ', p_parent_table); END IF; v_tables_list_sql := v_tables_list_sql || format(' ORDER BY maintenance_order ASC NULLS LAST, parent_table ASC NULLS LAST '); RAISE DEBUG 'run_maint: v_tables_list_sql: %', v_tables_list_sql; FOR v_row IN EXECUTE v_tables_list_sql LOOP CONTINUE WHEN v_row.undo_in_progress; -- When sub-partitioning, retention may drop tables that were already put into the query loop values. -- Check if they still exist in part_config before continuing v_parent_exists := NULL; SELECT parent_table, time_encoder, time_decoder INTO v_parent_exists, v_time_encoder, v_time_decoder FROM @extschema@.part_config WHERE parent_table = v_row.parent_table; IF v_parent_exists IS NULL THEN RAISE DEBUG 'run_maint: Parent table possibly removed from part_config by retenion'; END IF; CONTINUE WHEN v_parent_exists IS NULL; -- Check for old quarterly and ISO weekly partitioning from prior to version 5.x. Error out to avoid breaking these partition sets -- with new datetime_string formats IF v_row.datetime_string IN ('YYYY"q"Q', 'IYYY"w"IW') THEN RAISE EXCEPTION 'Quarterly and ISO weekly partitioning is no longer supported in pg_partman 5.0.0 and greater. Please see documentation for migrating away from these partitioning patterns. Partition set: %', v_row.parent_table; END IF; -- Check for consistent data in part_config_sub table. Was unable to get this working properly as either a constraint or trigger. -- Would either delay raising an error until the next write (which I cannot predict) or disallow future edits to update a sub-partition set's configuration. -- This way at least provides a consistent way to check that I know will run. If anyone can get a working constraint/trigger, please help! SELECT sub_parent INTO v_sub_parent FROM @extschema@.part_config_sub WHERE sub_parent = v_row.parent_table; IF v_sub_parent IS NOT NULL THEN SELECT count(*) INTO v_check_subpart FROM @extschema@.check_subpart_sameconfig(v_row.parent_table); IF v_check_subpart > 1 THEN RAISE EXCEPTION 'Inconsistent data in part_config_sub table. Sub-partition tables that are themselves sub-partitions cannot have differing configuration values among their siblings. Run this query: "SELECT * FROM @extschema@.check_subpart_sameconfig(''%'');" This should only return a single row or nothing. If multiple rows are returned, the results are differing configurations in the part_config_sub table for children of the given parent. Determine the child tables of the given parent and look up their entries based on the "part_config_sub.sub_parent" column. Update the differing values to be consistent for your desired values.', v_row.parent_table; END IF; END IF; SELECT n.nspname, c.relname, c.oid INTO v_parent_schema, v_parent_tablename, v_parent_oid FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid WHERE n.nspname = split_part(v_row.parent_table, '.', 1)::name AND c.relname = split_part(v_row.parent_table, '.', 2)::name; -- Always returns the default partition first if it exists SELECT partition_tablename INTO v_default_tablename FROM @extschema@.show_partitions(v_row.parent_table, p_include_default := true) LIMIT 1; SELECT pg_get_expr(relpartbound, v_parent_oid) INTO v_is_default FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n on c.relnamespace = n.oid WHERE n.nspname = v_parent_schema AND c.relname = v_default_tablename; IF v_is_default != 'DEFAULT' THEN -- Parent table will never have data, but allows code below to "just work" v_default_tablename := v_parent_tablename; END IF; SELECT general_type, exact_type INTO v_control_type, v_exact_control_type FROM @extschema@.check_control_type(v_parent_schema, v_parent_tablename, v_row.control); v_partition_expression := CASE WHEN v_row.epoch = 'seconds' THEN format('to_timestamp(%I)', v_row.control) WHEN v_row.epoch = 'milliseconds' THEN format('to_timestamp((%I/1000)::float)', v_row.control) WHEN v_row.epoch = 'microseconds' THEN format('to_timestamp((%I/1000000)::float)', v_row.control) WHEN v_row.epoch = 'nanoseconds' THEN format('to_timestamp((%I/1000000000)::float)', v_row.control) ELSE format('%I', v_row.control) END; RAISE DEBUG 'run_maint: v_partition_expression: %', v_partition_expression; SELECT partition_tablename INTO v_last_partition FROM @extschema@.show_partitions(v_row.parent_table, 'DESC') LIMIT 1; RAISE DEBUG 'run_maint: parent_table: %, v_last_partition: %', v_row.parent_table, v_last_partition; IF v_control_type = 'time' OR (v_control_type = 'id' AND v_row.epoch <> 'none') OR (v_control_type IN ('text', 'uuid')) THEN IF v_row.sub_partition_set_full THEN UPDATE @extschema@.part_config SET maintenance_last_run = clock_timestamp() WHERE parent_table = v_row.parent_table; CONTINUE; END IF; SELECT child_start_time INTO v_last_partition_timestamp FROM @extschema@.show_partition_info(v_parent_schema||'.'||v_last_partition, v_row.partition_interval, v_row.parent_table); -- Do not create child tables if they would be dropped by retention anyway. Edge case where maintenance was missed for -- an extended period of time IF v_row.retention IS NOT NULL THEN v_last_partition_timestamp := greatest(v_last_partition_timestamp, CURRENT_TIMESTAMP - v_row.retention::interval); -- Need to properly truncate the interval and account for custom date truncation SELECT base_timestamp INTO v_last_partition_timestamp FROM @extschema@.calculate_time_partition_info(v_row.partition_interval::interval, v_last_partition_timestamp, v_row.date_trunc_interval); END IF; -- Must be reset to null otherwise if the next partition set in the loop is empty, the previous partition set's value could be used v_current_partition_timestamp := NULL; -- Loop through child tables starting from highest to get a timestamp from the highest non-empty partition in the set -- Avoids doing a scan on entire partition set and/or getting any values accidentally in default. FOR v_row_max_time IN SELECT partition_schemaname, partition_tablename FROM @extschema@.show_partitions(v_row.parent_table, 'DESC', false) LOOP IF v_control_type = 'time' OR (v_control_type = 'id' AND v_row.epoch <> 'none') THEN EXECUTE format('SELECT %s::text FROM %I.%I LIMIT 1' , v_partition_expression , v_row_max_time.partition_schemaname , v_row_max_time.partition_tablename ) INTO v_child_timestamp; ELSIF v_control_type IN ('text', 'uuid') THEN EXECUTE format('SELECT %s(%s::text) FROM %I.%I LIMIT 1' , v_time_decoder , v_partition_expression , v_row_max_time.partition_schemaname , v_row_max_time.partition_tablename ) INTO v_child_timestamp; END IF; IF v_row.infinite_time_partitions AND v_child_timestamp < CURRENT_TIMESTAMP THEN -- No new data has been inserted relative to "now", but keep making child tables anyway v_current_partition_timestamp = CURRENT_TIMESTAMP; -- Nothing else to do in this case so just end early EXIT; END IF; IF v_child_timestamp IS NOT NULL THEN SELECT suffix_timestamp INTO v_current_partition_timestamp FROM @extschema@.show_partition_name(v_row.parent_table, v_child_timestamp::text); EXIT; END IF; END LOOP; IF v_row.infinite_time_partitions AND v_child_timestamp IS NULL THEN -- If partition set is completely empty, still keep making child tables anyway -- Has to be separate check outside above loop since "future" tables are likely going to be empty, hence ignored in that loop v_current_partition_timestamp = CURRENT_TIMESTAMP; END IF; -- If not ignoring the default table, check for max values there. If they are there and greater than all child values, use that instead -- Note the default is NOT to care about data in the default, so maintenance will fail if new child table boundaries overlap with -- data that exists in the default. This is intentional so user removes data from default to avoid larger problems. IF v_row.ignore_default_data THEN v_max_time_default := NULL; ELSE EXECUTE format('SELECT max(%s) FROM ONLY %I.%I', v_partition_expression, v_parent_schema, v_default_tablename) INTO v_max_time_default; END IF; RAISE DEBUG 'run_maint: v_current_partition_timestamp: %, v_max_time_default: %', v_current_partition_timestamp, v_max_time_default; IF v_current_partition_timestamp IS NULL AND v_max_time_default IS NULL THEN -- Partition set is completely empty and infinite time partitions not set -- Still need to run retention if needed. Note similar call below for non-empty sets. Keep in sync. IF v_row.retention IS NOT NULL THEN v_drop_count := v_drop_count + @extschema@.drop_partition_time(v_row.parent_table); END IF; -- Nothing else to do UPDATE @extschema@.part_config SET maintenance_last_run = clock_timestamp() WHERE parent_table = v_row.parent_table; CONTINUE; END IF; RAISE DEBUG 'run_maint: v_child_timestamp: %, v_current_partition_timestamp: %, v_max_time_default: %', v_child_timestamp, v_current_partition_timestamp, v_max_time_default; IF v_current_partition_timestamp IS NULL OR (v_max_time_default > v_current_partition_timestamp) THEN SELECT suffix_timestamp INTO v_current_partition_timestamp FROM @extschema@.show_partition_name(v_row.parent_table, v_max_time_default::text); END IF; -- If this is a subpartition, determine if the last child table has been made. If so, mark it as full so future maintenance runs can skip it SELECT sub_min::timestamptz, sub_max::timestamptz INTO v_sub_timestamp_min, v_sub_timestamp_max FROM @extschema@.check_subpartition_limits(v_row.parent_table, 'time'); IF v_sub_timestamp_max IS NOT NULL THEN SELECT suffix_timestamp INTO v_sub_timestamp_max_suffix FROM @extschema@.show_partition_name(v_row.parent_table, v_sub_timestamp_max::text); IF v_sub_timestamp_max_suffix = v_last_partition_timestamp THEN -- Final partition for this set is created. Set full and skip it UPDATE @extschema@.part_config SET sub_partition_set_full = true, maintenance_last_run = clock_timestamp() WHERE parent_table = v_row.parent_table; CONTINUE; END IF; END IF; -- Check and see how many premade partitions there are. v_premade_count = round(EXTRACT('epoch' FROM age(v_last_partition_timestamp, v_current_partition_timestamp)) / EXTRACT('epoch' FROM v_row.partition_interval::interval)); v_next_partition_timestamp := v_last_partition_timestamp; RAISE DEBUG 'run_maint before loop: last_partition_timestamp: %, current_partition_timestamp: %, v_premade_count: %, v_sub_timestamp_min: %, v_sub_timestamp_max: %' , v_last_partition_timestamp , v_current_partition_timestamp , v_premade_count , v_sub_timestamp_min , v_sub_timestamp_max; -- Loop premaking until config setting is met. Allows it to catch up if it fell behind or if premake changed WHILE (v_premade_count < v_row.premake) LOOP RAISE DEBUG 'run_maint: parent_table: %, v_premade_count: %, v_next_partition_timestamp: %', v_row.parent_table, v_premade_count, v_next_partition_timestamp; IF v_next_partition_timestamp < v_sub_timestamp_min OR v_next_partition_timestamp > v_sub_timestamp_max THEN -- With subpartitioning, no need to run if the timestamp is not in the parent table's range EXIT; END IF; BEGIN v_next_partition_timestamp := v_next_partition_timestamp + v_row.partition_interval::interval; EXCEPTION WHEN datetime_field_overflow THEN v_premade_count := v_row.premake; -- do this so it can exit the premake check loop and continue in the outer for loop IF v_jobmon_schema IS NOT NULL THEN v_step_overflow_id := add_step(v_job_id, 'Attempted partition time interval is outside PostgreSQL''s supported time range.'); PERFORM update_step(v_step_overflow_id, 'CRITICAL', format('Child partition creation skipped for parent table: %s', v_partition_time)); END IF; RAISE WARNING 'Attempted partition time interval is outside PostgreSQL''s supported time range. Child partition creation skipped for parent table %', v_row.parent_table; CONTINUE; END; v_last_partition_created := @extschema@.create_partition_time(v_row.parent_table , ARRAY[v_next_partition_timestamp]); IF v_last_partition_created THEN v_analyze := true; v_create_count := v_create_count + 1; END IF; v_premade_count = round(EXTRACT('epoch' FROM age(v_next_partition_timestamp, v_current_partition_timestamp)) / EXTRACT('epoch' FROM v_row.partition_interval::interval)); END LOOP; -- Run retention if needed. Note similar call above when partition set is empty. Keep in sync. IF v_row.retention IS NOT NULL THEN v_drop_count := v_drop_count + @extschema@.drop_partition_time(v_row.parent_table); END IF; ELSIF v_control_type = 'id' THEN IF v_row.sub_partition_set_full THEN UPDATE @extschema@.part_config SET maintenance_last_run = clock_timestamp() WHERE parent_table = v_row.parent_table; CONTINUE; END IF; -- Must be reset to null otherwise if the next partition set in the loop is empty, the previous partition set's value could be used v_current_partition_id := NULL; FOR v_row_max_id IN SELECT partition_schemaname, partition_tablename FROM @extschema@.show_partitions(v_row.parent_table, 'DESC', false) LOOP -- Loop through child tables starting from highest to get current max value in partition set -- Avoids doing a scan on entire partition set and/or getting any values accidentally in default. EXECUTE format('SELECT trunc(max(%I))::bigint FROM %I.%I' , v_row.control , v_row_max_id.partition_schemaname , v_row_max_id.partition_tablename) INTO v_max_id; IF v_max_id IS NOT NULL THEN SELECT suffix_id INTO v_current_partition_id FROM @extschema@.show_partition_name(v_row.parent_table, v_max_id::text); EXIT; END IF; END LOOP; -- If not ignoring the default table, check for max values there. If they are there and greater than all child values, use that instead -- Note the default is NOT to care about data in the default, so maintenance will fail if new child table boundaries overlap with -- data that exists in the default. This is intentional so user removes data from default to avoid larger problems. IF v_row.ignore_default_data THEN v_max_id_default := NULL; ELSE EXECUTE format('SELECT trunc(max(%I))::bigint FROM ONLY %I.%I', v_row.control, v_parent_schema, v_default_tablename) INTO v_max_id_default; END IF; RAISE DEBUG 'run_maint: v_max_id: %, v_current_partition_id: %, v_max_id_default: %', v_max_id, v_current_partition_id, v_max_id_default; IF v_current_partition_id IS NULL AND v_max_id_default IS NULL THEN -- Partition set is completely empty. -- Still need to run retention if needed. Note similar call below for non-empty sets. Keep in sync. IF v_row.retention IS NOT NULL THEN v_drop_count := v_drop_count + @extschema@.drop_partition_id(v_row.parent_table); END IF; -- Nothing else to do UPDATE @extschema@.part_config SET maintenance_last_run = clock_timestamp() WHERE parent_table = v_row.parent_table; CONTINUE; END IF; IF v_current_partition_id IS NULL OR (v_max_id_default > v_current_partition_id) THEN SELECT suffix_id INTO v_current_partition_id FROM @extschema@.show_partition_name(v_row.parent_table, v_max_id_default::text); END IF; SELECT child_start_id INTO v_last_partition_id FROM @extschema@.show_partition_info(v_parent_schema||'.'||v_last_partition, v_row.partition_interval, v_row.parent_table); -- Determine if this table is a child of a subpartition parent. If so, get limits to see if run_maintenance even needs to run for it. SELECT sub_min::bigint, sub_max::bigint INTO v_sub_id_min, v_sub_id_max FROM @extschema@.check_subpartition_limits(v_row.parent_table, 'id'); IF v_sub_id_max IS NOT NULL THEN SELECT suffix_id INTO v_sub_id_max_suffix FROM @extschema@.show_partition_name(v_row.parent_table, v_sub_id_max::text); IF v_sub_id_max_suffix = v_last_partition_id THEN -- Final partition for this set is created. Set full and skip it UPDATE @extschema@.part_config SET sub_partition_set_full = true, maintenance_last_run = clock_timestamp() WHERE parent_table = v_row.parent_table; CONTINUE; END IF; END IF; v_next_partition_id := v_last_partition_id; v_premade_count := ((v_last_partition_id - v_current_partition_id) / v_row.partition_interval::bigint); -- Loop premaking until config setting is met. Allows it to catch up if it fell behind or if premake changed. RAISE DEBUG 'run_maint: before child creation loop: parent_table: %, v_last_partition_id: %, v_premade_count: %, v_next_partition_id: %', v_row.parent_table, v_last_partition_id, v_premade_count, v_next_partition_id; WHILE (v_premade_count < v_row.premake) LOOP RAISE DEBUG 'run_maint: parent_table: %, v_premade_count: %, v_next_partition_id: %', v_row.parent_table, v_premade_count, v_next_partition_id; IF v_next_partition_id < v_sub_id_min OR v_next_partition_id > v_sub_id_max THEN -- With subpartitioning, no need to run if the id is not in the parent table's range EXIT; END IF; v_next_partition_id := v_next_partition_id + v_row.partition_interval::bigint; v_last_partition_created := @extschema@.create_partition_id(v_row.parent_table, ARRAY[v_next_partition_id]); IF v_last_partition_created THEN v_analyze := true; v_create_count := v_create_count + 1; END IF; v_premade_count := ((v_next_partition_id - v_current_partition_id) / v_row.partition_interval::bigint); END LOOP; -- Run retention if needed. Note similar call above when partition set is empty. Keep in sync. IF v_row.retention IS NOT NULL THEN v_drop_count := v_drop_count + @extschema@.drop_partition_id(v_row.parent_table); END IF; END IF; -- end main IF check for time or id IF v_analyze AND p_analyze THEN IF v_jobmon_schema IS NOT NULL THEN v_step_id := add_step(v_job_id, format('Analyzing partition set: %s', v_row.parent_table)); END IF; EXECUTE format('ANALYZE %I.%I',v_parent_schema, v_parent_tablename); IF v_jobmon_schema IS NOT NULL THEN PERFORM update_step(v_step_id, 'OK', 'Done'); END IF; END IF; UPDATE @extschema@.part_config SET maintenance_last_run = clock_timestamp() WHERE parent_table = v_row.parent_table; END LOOP; -- end of main loop through part_config IF v_jobmon_schema IS NOT NULL THEN v_step_id := add_step(v_job_id, format('Finished maintenance')); PERFORM update_step(v_step_id, 'OK', format('Partition maintenance finished. %s partitions made. %s partitions dropped.', v_create_count, v_drop_count)); IF v_step_overflow_id IS NOT NULL THEN PERFORM fail_job(v_job_id); ELSE PERFORM close_job(v_job_id); END IF; END IF; EXECUTE format('SELECT set_config(%L, %L, %L)', 'search_path', v_old_search_path, 'false'); EXCEPTION WHEN OTHERS THEN GET STACKED DIAGNOSTICS ex_message = MESSAGE_TEXT, ex_context = PG_EXCEPTION_CONTEXT, ex_detail = PG_EXCEPTION_DETAIL, ex_hint = PG_EXCEPTION_HINT; IF v_jobmon_schema IS NOT NULL THEN IF v_job_id IS NULL THEN EXECUTE format('SELECT %I.add_job(''PARTMAN RUN MAINTENANCE'')', v_jobmon_schema) INTO v_job_id; EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before job logging started'')', v_jobmon_schema, v_job_id, p_parent_table) INTO v_step_id; ELSIF v_step_id IS NULL THEN EXECUTE format('SELECT %I.add_step(%s, ''EXCEPTION before first step logged'')', v_jobmon_schema, v_job_id) INTO v_step_id; END IF; EXECUTE format('SELECT %I.update_step(%s, ''CRITICAL'', %L)', v_jobmon_schema, v_step_id, 'ERROR: '||coalesce(SQLERRM,'unknown')); EXECUTE format('SELECT %I.fail_job(%s)', v_jobmon_schema, v_job_id); END IF; RAISE EXCEPTION '% CONTEXT: % DETAIL: % HINT: %', ex_message, ex_context, ex_detail, ex_hint; END $$;