/** * @file * @brief ProvSQL upgrade script: 1.6.0 -> 1.7.0 * * 1.7.0 is a pure SQL-surface addition: no new gate types (the * @c provenance_gate enum is unchanged, so no @c reset_constants_cache() * is needed), no new composite types, casts, operators or aggregates, * and no in-place migration of existing tracked relations. It adds a * handful of new functions, all defined in @c provsql.common.sql and * reproduced verbatim below: * * - Knowledge-compilation introspection surface: @c compile_to_ddnnf / * @c compile_to_ddnnf_dot (machine-readable .nnf and DOT of the * compiled d-DNNF), @c ddnnf_stats (jsonb structural statistics), * @c tseytin_cnf (DIMACS CNF the extension feeds to external model * counters) with its variable->input mapping helpers * @c tseytin_cnf_mapping_json / @c tseytin_cnf_mapping, and * @c tree_decomposition_dot (DOT of the min-fill decomposition). * * - @c tool_available(text): reports whether an external tool resolves * on the backend's PATH (honouring @c provsql.tool_search_path), * using the same resolver the compilers / counters consult. * * - @c probability_benchmark and its @c _probability_benchmark_one * helper: time every probability-evaluation method on one circuit. * * - @c eval_recursive: the fixpoint driver the planner hook calls when * lowering a recursive CTE over provenance-tracked relations (PG15+ * at the C side; the SQL function itself installs everywhere). * * It also replaces the body of the pre-existing @c circuit_subgraph * introspection helper: its reported @c depth is now the longest-path * (canonical circuit) distance from the root (@c MAX over the BFS), * not the shortest-path distance (@c MIN) it returned in 1.6.0. * * Every statement is @c CREATE OR REPLACE FUNCTION, so the script is * idempotent without explicit guards. Functions are ordered so each * dependency precedes its user (@c tseytin_cnf_mapping_json before * @c tseytin_cnf_mapping, @c _probability_benchmark_one before * @c probability_benchmark). */ -- ---------------------------------------------------------------------- -- 1. Recursive-query fixpoint driver (called by lower_recursive_cte). -- ---------------------------------------------------------------------- CREATE OR REPLACE FUNCTION eval_recursive( body_sql text, work_name text, colnames text, coldef text, max_iter int DEFAULT 1000) RETURNS void AS $$ DECLARE changed boolean; -- circuit changed structurally this round set_stable boolean; -- user-column tuple set unchanged this round iters int := 0; new_count int; -- rows in _new this round (INSERT ROW_COUNT) -- Under an absorptive semiring (guaranteed by provsql.boolean_provenance, -- which is strictly stronger) the provenance *value* converges on cyclic data -- even though the circuit keeps growing structurally. A minimal derivation -- cannot repeat a tuple, so it has depth <= (number of derivable tuples); -- after that many naive rounds the value equals the least fixpoint, and the -- surplus (longer, cyclic) derivations are absorbed at evaluation time. We -- learn that bound from the tuple-set fixpoint and stop there, returning a -- circuit that is sound for absorptive evaluation only. boolean_mode boolean := coalesce(current_setting('provsql.boolean_provenance', true)::bool, false); ntuples int := NULL; -- the bound above, set once the tuple set stabilises BEGIN EXECUTE format('DROP TABLE IF EXISTS %I', work_name); DROP TABLE IF EXISTS _new; -- Tracked working table (carries provsql), initially empty, plus a scratch -- table of the same shape; both reused across rounds. EXECUTE format('CREATE TEMP TABLE %I (%s, provsql uuid)', work_name, coldef); EXECUTE format('CREATE TEMP TABLE _new (LIKE %I)', work_name); LOOP iters := iters + 1; -- Hard safety bound (also catches genuinely unbounded recursion, e.g. an -- unbounded counter, where even the tuple set never stabilises). IF iters > max_iter THEN RAISE EXCEPTION 'eval_recursive: no fixpoint after % rounds (cyclic data?)', max_iter; END IF; -- One round of naive evaluation: re-run the CTE body over the current -- working table. INSERT targets a tracked table, so ProvSQL fills provsql. -- Take the row count from the INSERT itself (counting _new directly would be -- an aggregate over a provenance-tracked table -> an agg_token). EXECUTE 'TRUNCATE _new'; EXECUTE format('INSERT INTO _new(%s) %s', colnames, body_sql); GET DIAGNOSTICS new_count = ROW_COUNT; -- Exact structural fixpoint test (content-addressed tokens => set equality). EXECUTE format( 'SELECT EXISTS((TABLE _new EXCEPT TABLE %1$I) UNION ALL (TABLE %1$I EXCEPT TABLE _new))', work_name) INTO changed; -- In Boolean mode, learn the round bound from the tuple-set fixpoint (the -- set always stabilises after finitely many rounds, even on cyclic data). IF boolean_mode AND ntuples IS NULL THEN EXECUTE format( 'SELECT NOT EXISTS(' || '(SELECT %2$s FROM _new EXCEPT SELECT %2$s FROM %1$I) UNION ALL ' || '(SELECT %2$s FROM %1$I EXCEPT SELECT %2$s FROM _new))', work_name, colnames) INTO set_stable; IF set_stable THEN ntuples := new_count; END IF; END IF; -- Copy _new into the working table (tracked -> tracked carries the tokens). EXECUTE format('TRUNCATE %I', work_name); EXECUTE format('INSERT INTO %1$I(%2$s) SELECT %2$s FROM _new', work_name, colnames); -- Structural fixpoint: done (acyclic / fully converged) -- sound for any -- semiring. EXIT WHEN NOT changed; -- Boolean mode on cyclic data: once the value-fixpoint bound is reached we -- stop, even though the circuit is not structurally stable. EXIT WHEN boolean_mode AND ntuples IS NOT NULL AND iters >= ntuples; END LOOP; END $$ LANGUAGE plpgsql SET client_min_messages = warning; -- ---------------------------------------------------------------------- -- 2. Knowledge-compilation introspection surface. -- ---------------------------------------------------------------------- CREATE OR REPLACE FUNCTION compile_to_ddnnf_dot( token UUID, compiler TEXT = 'd4') RETURNS TEXT AS 'provsql','compile_to_ddnnf_dot' LANGUAGE C; CREATE OR REPLACE FUNCTION compile_to_ddnnf( token UUID, compiler TEXT = 'd4') RETURNS TEXT AS 'provsql','compile_to_ddnnf' LANGUAGE C; CREATE OR REPLACE FUNCTION ddnnf_stats( token UUID, compiler TEXT = 'd4') RETURNS jsonb AS 'provsql','ddnnf_stats' LANGUAGE C; CREATE OR REPLACE FUNCTION tseytin_cnf( token UUID, weighted BOOLEAN = TRUE, mapping BOOLEAN = TRUE) RETURNS TEXT AS 'provsql','tseytin_cnf' LANGUAGE C; CREATE OR REPLACE FUNCTION tseytin_cnf_mapping_json(token UUID) RETURNS jsonb AS 'provsql','tseytin_cnf_mapping_json' LANGUAGE C; CREATE OR REPLACE FUNCTION tseytin_cnf_mapping(token UUID) RETURNS TABLE(variable INT, gate UUID, probability FLOAT8) AS $$ SELECT variable, gate, probability FROM jsonb_to_recordset(tseytin_cnf_mapping_json(token)) AS x(variable INT, gate UUID, probability FLOAT8) ORDER BY variable $$ LANGUAGE SQL STABLE; CREATE OR REPLACE FUNCTION tree_decomposition_dot( token UUID) RETURNS TEXT AS 'provsql','tree_decomposition_dot' LANGUAGE C; -- ---------------------------------------------------------------------- -- 3. Replacement body for the circuit_subgraph introspection helper: -- reported depth is now the longest-path (MAX) distance from the -- root, not the shortest-path (MIN) distance shipped in 1.6.0. -- ---------------------------------------------------------------------- CREATE OR REPLACE FUNCTION circuit_subgraph(root UUID, max_depth INT DEFAULT 8) RETURNS TABLE(node UUID, parent UUID, child_pos INT, gate_type TEXT, info1 TEXT, info2 TEXT, depth INT) AS $$ WITH RECURSIVE bfs(node, parent, child_pos, depth) AS ( SELECT root, NULL::UUID, NULL::INT, 0 UNION ALL SELECT c.t, b.node, c.idx::INT, b.depth + 1 FROM bfs b CROSS JOIN LATERAL unnest(provsql.get_children(b.node)) WITH ORDINALITY AS c(t, idx) WHERE b.depth < max_depth ), -- Each node's canonical depth is its longest-path distance from the -- root (the standard circuit-depth notion: the longest chain of -- gates separating the node from the output). The recursive CTE -- enumerates paths up to @c max_depth, so MAX over those is the -- longest path of length at most @c max_depth. node_depth AS ( SELECT node, MAX(depth) AS depth FROM bfs GROUP BY node ), -- All distinct (parent, child, child_pos) triples seen during the BFS. -- A child reached from k parents within the bound contributes k rows. -- Self-joins (times(x, x)) contribute one row per child position. edges AS ( SELECT DISTINCT parent, node AS child, child_pos FROM bfs WHERE parent IS NOT NULL ) SELECT d.node, e.parent, e.child_pos, provsql.get_gate_type(d.node)::TEXT, i.info1::TEXT, i.info2::TEXT, d.depth FROM node_depth d LEFT JOIN edges e ON e.child = d.node LEFT JOIN LATERAL provsql.get_infos(d.node) i ON TRUE ORDER BY d.depth, d.node, e.parent; $$ LANGUAGE sql STABLE PARALLEL SAFE; -- ---------------------------------------------------------------------- -- 4. External-tool availability probe. -- ---------------------------------------------------------------------- CREATE OR REPLACE FUNCTION tool_available(name TEXT) RETURNS BOOLEAN AS 'provsql','tool_available' LANGUAGE C STRICT; -- ---------------------------------------------------------------------- -- 5. Probability-method benchmark. -- ---------------------------------------------------------------------- CREATE OR REPLACE FUNCTION _probability_benchmark_one( in_token UUID, in_method TEXT, in_args TEXT = NULL) RETURNS TABLE ( method TEXT, args TEXT, probability DOUBLE PRECISION, milliseconds DOUBLE PRECISION, error TEXT) AS $$ DECLARE t0 TIMESTAMPTZ; p DOUBLE PRECISION; BEGIN t0 := clock_timestamp(); BEGIN p := provsql.probability_evaluate(in_token, in_method, in_args); method := in_method; args := in_args; probability := p; milliseconds := EXTRACT(EPOCH FROM clock_timestamp() - t0) * 1000; error := NULL; EXCEPTION WHEN OTHERS THEN method := in_method; args := in_args; probability := NULL; milliseconds := EXTRACT(EPOCH FROM clock_timestamp() - t0) * 1000; error := SQLERRM; END; RETURN NEXT; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION probability_benchmark( token UUID, monte_carlo_samples INT = 10000, weightmc_args TEXT = '0.8;0.2') RETURNS TABLE ( method TEXT, args TEXT, probability DOUBLE PRECISION, milliseconds DOUBLE PRECISION, error TEXT) AS $$ BEGIN RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'independent'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'possible-worlds'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'tree-decomposition'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one( token, 'monte-carlo', monte_carlo_samples::text); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'compilation', 'd4'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'compilation', 'd4v2'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'compilation', 'c2d'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'compilation', 'minic2d'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'compilation', 'dsharp'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'compilation', 'panini-obdd'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'compilation', 'panini-obdd-and'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'compilation', 'panini-decdnnf'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one( token, 'wmc', 'weightmc;' || weightmc_args); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'wmc', 'ganak'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'wmc', 'sharpsat-td'); RETURN QUERY SELECT * FROM provsql._probability_benchmark_one(token, 'wmc', 'dpmc'); END; $$ LANGUAGE plpgsql;