\set ECHO none -- Karp-Luby FPRAS for DNF-shaped provenance circuits ('karp-luby' method). -- -- The estimator is randomised; we pin provsql.monte_carlo_seed and either -- compare two same-seed runs for exact equality (reproducibility) or round a -- high-sample estimate to two decimals and check it against the exact value -- computed by an exact method. The circuits are built from the public token -- combinators provenance_plus / provenance_times (the same ones the query -- rewriter uses), with each root stashed in a session GUC so the queries below -- can address it. provsql.boolean_provenance is off so the load-time folding -- passes do not rewrite the DNF shape out from under the detector (e.g. -- absorbing x1 OR (x1 AND x2) down to x1). \pset format unaligned SET search_path TO provsql_test,provsql; SET provsql.boolean_provenance = off; SET provsql.monte_carlo_seed = 42; CREATE TABLE kl_in(id int); INSERT INTO kl_in VALUES (1),(2),(3),(4); SELECT add_provenance('kl_in'); DO $$ BEGIN PERFORM set_prob(provsql, 0.5) FROM kl_in WHERE id=1; PERFORM set_prob(provsql, 0.3) FROM kl_in WHERE id=2; PERFORM set_prob(provsql, 0.4) FROM kl_in WHERE id=3; PERFORM set_prob(provsql, 0.1) FROM kl_in WHERE id=4; END $$; DO $$ DECLARE x1 uuid; x2 uuid; x3 uuid; x4 uuid; BEGIN SELECT provsql INTO x1 FROM kl_in WHERE id=1; SELECT provsql INTO x2 FROM kl_in WHERE id=2; SELECT provsql INTO x3 FROM kl_in WHERE id=3; SELECT provsql INTO x4 FROM kl_in WHERE id=4; -- (i) flat DNF, disjoint clauses: (x1 AND x2) OR (x3 AND x4). -- Independent, so exact = 1-(1-.15)(1-.04) = 0.184. PERFORM set_config('kl.flat', provenance_plus(ARRAY[ provenance_times(x1,x2), provenance_times(x3,x4)])::text, false); -- (ii) one-level sharing: (x1 AND x2) OR (x1 AND x3). -- = P(x1)*P(x2 OR x3) = .5*(1-.7*.6) = 0.29. x1 shared across -- clauses, so not read-once: 'independent' refuses, Karp-Luby does not. PERFORM set_config('kl.shared', provenance_plus(ARRAY[ provenance_times(x1,x2), provenance_times(x1,x3)])::text, false); -- (iii) clause subsumption: x1 OR (x1 AND x2). = P(x1) = 0.5. The -- smallest-index coverage rejection must keep the count right. PERFORM set_config('kl.subsume', provenance_plus(ARRAY[ x1, provenance_times(x1,x2)])::text, false); -- (iv) non-DNF (an OR below an AND): (x1 OR x2) AND x3. PERFORM set_config('kl.nondnf', provenance_times( provenance_plus(ARRAY[x1,x2]), x3)::text, false); END $$; \set flat '(current_setting(''kl.flat'')::uuid)' \set shared '(current_setting(''kl.shared'')::uuid)' \set subsume '(current_setting(''kl.subsume'')::uuid)' \set nondnf '(current_setting(''kl.nondnf'')::uuid)' -- (i)-(iii): Karp-Luby vs the exact value of each circuit. SELECT 'flat' AS circuit, round(probability_evaluate(:flat, 'independent')::numeric,2) AS exact, round(probability_evaluate(:flat, 'karp-luby','300000')::numeric,2) AS karp_luby UNION ALL SELECT 'shared', round(probability_evaluate(:shared, 'tree-decomposition')::numeric,2), round(probability_evaluate(:shared, 'karp-luby','300000')::numeric,2) UNION ALL SELECT 'subsume', round(probability_evaluate(:subsume, 'possible-worlds')::numeric,2), round(probability_evaluate(:subsume, 'karp-luby','300000')::numeric,2) ORDER BY circuit; -- (v) reproducibility: two runs at the same pinned seed are bit-identical. SELECT probability_evaluate(:shared,'karp-luby','100000') = probability_evaluate(:shared,'karp-luby','100000') AS reproducible; -- (vii) the argument forms all resolve to the same DNF and estimate 0.29: -- samples=, bare integer, eps=, and eps=,delta=,max_samples=. SELECT round(probability_evaluate(:shared,'karp-luby','samples=300000')::numeric,2) AS samples_kw, round(probability_evaluate(:shared,'karp-luby','300000')::numeric,2) AS samples_bare, round(probability_evaluate(:shared,'karp-luby','eps=0.05')::numeric,2) AS eps_only, round(probability_evaluate(:shared,'karp-luby','eps=0.05,delta=0.01,max_samples=200000')::numeric,2) AS eps_delta_cap; -- (iv) a non-DNF circuit is refused (warning + error). SELECT probability_evaluate(:nondnf,'karp-luby','1000'); -- (viii) argument-grammar errors. SELECT probability_evaluate(:shared,'karp-luby','samples=100,eps=0.1'); -- conflict SELECT probability_evaluate(:shared,'karp-luby','delta=0.01'); -- delta needs epsilon SELECT probability_evaluate(:shared,'karp-luby','samples=100,max_samples=50'); -- max_samples needs adaptive SELECT probability_evaluate(:shared,'karp-luby','foo=1'); -- unknown key SELECT probability_evaluate(:shared,'karp-luby','not_a_number'); -- bad sample count -- The (eps, delta) approximation guarantee is surfaced as a machine-readable -- NOTICE at verbose_level >= 5 (the level Studio sets for evaluation), so a UI -- can render it; plain evaluation at the default level stays quiet. Relative -- guarantee for karp-luby, over m clauses. -- The reported sample count is the number of rounds the self-adjusting -- stopping rule actually ran (it stops once the estimate provably meets the -- target), which here is below the fixed worst-case bound over m clauses. SET provsql.verbose_level = 5; SELECT round(probability_evaluate(:shared,'karp-luby','eps=0.1,delta=0.05')::numeric,2) AS with_guarantee; -- A max_samples below the stopping rule's threshold is hit before the -- (eps, delta) target: a warning fires and the surfaced guarantee is -- downgraded to the relative error achieved over the samples actually spent. SELECT probability_evaluate(:shared,'karp-luby','eps=0.1,delta=0.05,max_samples=100') IS NOT NULL AS capped; RESET provsql.verbose_level; DROP TABLE kl_in; RESET provsql.boolean_provenance; RESET provsql.monte_carlo_seed;