\set ECHO none \pset format unaligned SET search_path TO provsql_test,provsql; -- RangeCheck: support-based interval arithmetic decides gate_cmps -- whose two sides have provably ordered or disjoint supports. When -- it decides, the cmp is replaced by a Bernoulli gate_input carrying -- the certain probability (0 or 1), so the result is exact and no -- Monte Carlo sampling is performed. -- -- These tests verify the decided cases. The (provably 1.0 / 0.0) -- assertions below would not hold if MC ran on top, since MC has -- finite-sample noise; we therefore use exact equality (=) rather -- than abs(... - truth) < tol. -- (1) U(1, 2) > 0 -- support [1, 2] is strictly above 0, so P = 1.0. SELECT provsql.probability_evaluate( provsql.rv_cmp_gt(provsql.uniform(1, 2), 0::random_variable), 'monte-carlo', '1000000') = 1.0 AS uniform_above_zero_decided; -- (2) -U(1, 2) > 0 -- support [-2, -1] is strictly at-or-below 0, -- so P = 0.0. SELECT provsql.probability_evaluate( provsql.rv_cmp_gt(- provsql.uniform(1, 2), 0::random_variable), 'monte-carlo', '1000000') = 0.0 AS uniform_negated_decided; -- (3) Exponential is supported on [0, +inf), so Exp(1) >= 0 is always -- true. Decided exactly without sampling. SELECT provsql.probability_evaluate( provsql.rv_cmp_ge(provsql.exponential(1), 0::random_variable), 'monte-carlo', '1000000') = 1.0 AS exponential_nonnegative_decided; -- (3b) Erlang inherits the [0, +inf) support; Erlang(3, 1) < 0 is -- always false. SELECT provsql.probability_evaluate( provsql.rv_cmp_lt(provsql.erlang(3, 1), 0::random_variable), 'monte-carlo', '1000000') = 0.0 AS erlang_strictly_negative_decided; -- (4) U(1, 2) <= 0 -- the upper bound 2 is not <= 0, but every U -- value is in [1, 2] which is strictly above 0; P(<=0) = 0.0. SELECT provsql.probability_evaluate( provsql.rv_cmp_le(provsql.uniform(1, 2), 0::random_variable), 'monte-carlo', '1000000') = 0.0 AS uniform_le_zero_decided; -- (5) Composition: gate_arith propagates the support through PLUS. -- U(1, 2) + U(1, 2) ranges over [2, 4], so > 1 is always true. SELECT provsql.probability_evaluate( provsql.rv_cmp_gt(provsql.uniform(1, 2) + provsql.uniform(1, 2), 1::random_variable), 'monte-carlo', '1000000') = 1.0 AS arith_plus_above_decided; -- (6) NEG of an exponential: -Exp(1) is supported on (-inf, 0], so -- > 0 is always false. SELECT provsql.probability_evaluate( provsql.rv_cmp_gt(- provsql.exponential(1), 0::random_variable), 'monte-carlo', '1000000') = 0.0 AS arith_neg_exp_decided; -- (7) Inconclusive case: N(0, 1) > 0 has analytical answer 0.5, but -- the normal distribution has unbounded support so RangeCheck cannot -- decide. This must fall through to MC -- demonstrated by the -- result NOT being exact (abs(p - 0.5) is non-zero with very high -- probability for finite samples). We assert it is within tolerance, -- which is the underlying sampler invariant. An analytic-CDF pass -- could decide this case exactly when one is added. SET provsql.monte_carlo_seed = 42; SELECT abs(provsql.probability_evaluate( provsql.rv_cmp_gt(provsql.normal(0, 1), 0::random_variable), 'monte-carlo', '100000') - 0.5) < 0.01 AS normal_undecided_falls_through_to_mc; RESET provsql.monte_carlo_seed; -- (8) End-to-end through the planner hook: WHERE rv > c on a -- provenance-tracked table where the cmp is decided by RangeCheck. -- Result: every row's provsql is gate_one (input gate with p=1), and -- probability_evaluate returns 1.0 exactly for every row. CREATE TABLE rc_sensors(id text, reading provsql.random_variable); INSERT INTO rc_sensors VALUES ('a', provsql.uniform(5, 10)), -- > 0 always ('b', provsql.uniform(0.5, 1.5)); -- > 0 always SELECT add_provenance('rc_sensors'); CREATE TABLE rc_result AS SELECT id, probability_evaluate(provenance(), 'monte-carlo', '1000') AS p FROM rc_sensors WHERE reading > 0; SELECT remove_provenance('rc_result'); SELECT id, p = 1.0 AS exact_one FROM rc_result ORDER BY id; DROP TABLE rc_result; DROP TABLE rc_sensors; -- --------------------------------------------------------------- -- Joint-conjunction pass: AND of cmps over a shared continuous RV. -- The per-cmp pass above leaves these untouched (each cmp is -- individually inconclusive on its own interval). The joint pass -- intersects per-RV intervals over each gate_times's AND-conjunct -- cmps and resolves the gate_times to gate_zero when any RV's -- intersection is empty. -- (J1) Headline: x > 100 AND x < -100 over the SAME N(0, 1). Each -- cmp is individually undecidable (normal has unbounded support). -- Their conjunction constrains x to [100, -100] = empty -> 0 exact. WITH r AS (SELECT provsql.normal(0, 1) AS x) SELECT provsql.probability_evaluate( provsql.provenance_times( provsql.rv_cmp_gt(x, 100::random_variable), provsql.rv_cmp_lt(x, (-100)::random_variable)), 'independent') = 0.0 AS joint_infeasible_normal FROM r; -- (J2) Sanity: SATISFIABLE conjunction is NOT pruned. -1 < x < 1 -- intersects to [-1, 1] which is non-empty, so the joint pass leaves -- the gate_times intact. Asserting > 0 only checks the joint pass -- did not falsely prune. Uses 'tree-decomposition' because the -- hybrid decomposer's fast path now groups the two shared-X cmps -- and inlines a joint-table mulinput block, which 'independent' -- correctly rejects as a dependent circuit ("Not an independent -- circuit") when the cmps combine via AND. The -- tree-decomposition path handles the dependent circuit and -- recovers the exact P(-1 < X < 1) = 2*Phi(1) - 1 ~= 0.6827. WITH r AS (SELECT provsql.normal(0, 1) AS x) SELECT provsql.probability_evaluate( provsql.provenance_times( provsql.rv_cmp_gt(x, (-1)::random_variable), provsql.rv_cmp_lt(x, 1::random_variable)), 'tree-decomposition') > 0.0 AS joint_feasible_not_pruned FROM r; -- (J3) Uniform: u > 0.8 AND u < 0.2 over u ~ U(0,1). Each cmp is -- individually feasible on the [0, 1] support (u > 0.8 covers 20%, -- u < 0.2 covers 20%); the per-cmp pass therefore cannot resolve. -- The joint pass intersects [0.8, 1] with [0, 0.2] (each already -- intersected with the [0, 1] support) -> empty -> 0 exact. Without -- the joint pass, 'independent' would return 0.2 * 0.2 = 0.04. WITH r AS (SELECT provsql.uniform(0, 1) AS u) SELECT provsql.probability_evaluate( provsql.provenance_times( provsql.rv_cmp_gt(u, 0.8::random_variable), provsql.rv_cmp_lt(u, 0.2::random_variable)), 'independent') = 0.0 AS joint_infeasible_uniform FROM r; -- (J4) End-to-end through the planner hook: WHERE clause with two -- conjuncts on the same RV column. The planner-hook lifts both -- comparators into provenance_times and the joint pass collapses -- the gate_times to gate_zero, so probability_evaluate returns 0.0 -- exactly for the surviving row. CREATE TABLE rc_joint_sensors(id text, reading provsql.random_variable); INSERT INTO rc_joint_sensors VALUES ('s1', provsql.normal(0, 1)); SELECT add_provenance('rc_joint_sensors'); CREATE TABLE rc_joint_result AS SELECT id, probability_evaluate(provenance(), 'independent') AS p FROM rc_joint_sensors WHERE reading > 100 AND reading < -100; SELECT remove_provenance('rc_joint_result'); SELECT id, p = 0.0 AS exact_zero FROM rc_joint_result ORDER BY id; DROP TABLE rc_joint_result; DROP TABLE rc_joint_sensors; -- --------------------------------------------------------------- -- Broadened continuous EQ / NE shortcut. -- -- RangeCheck recognises @c P(X = Y) = 0 / @c P(X != Y) = 1 not just -- for bare @c gate_rv leaves but for every sub-circuit that produces -- a continuous distribution: @c gate_arith composites whose leaves -- are all @c gate_rv, and Bernoulli mixtures over two continuous -- arms. Categorical mixtures and pure @c gate_value Diracs fall -- through to the agg / interval / AnalyticEvaluator paths (the -- former has point masses; the latter is bit-comparable to a -- literal). -- -- The exact 0.0 / 1.0 assertions below would not hold if MC ran on -- top, so they pin the analytical resolution. -- --------------------------------------------------------------- -- (E1) Heterogeneous-rate exponential sum: no Erlang closure exists, -- so the simplifier cannot fold to a bare @c gate_rv. RangeCheck's -- broadened EQ shortcut sees a gate_arith composite whose every leaf -- is gate_rv and collapses the cmp to gate_zero. SELECT provsql.probability_evaluate( provsql.rv_cmp_eq(provsql.exponential(0.4) + provsql.exponential(0.3), 1::random_variable), 'independent') = 0.0 AS heterogeneous_exp_sum_eq; -- (E2) Product of two independent continuous RVs: also no closure, -- but every leaf below the gate_arith is gate_rv so EQ collapses -- exactly. SELECT provsql.probability_evaluate( provsql.rv_cmp_eq(provsql.normal(0, 1) * provsql.normal(0, 1), 0::random_variable), 'independent') = 0.0 AS product_two_normals_eq; -- (E3) Bernoulli mixture over two continuous arms is itself a -- continuous distribution (convex combination of two continuous -- densities, no point mass). SELECT provsql.probability_evaluate( provsql.rv_cmp_eq(provsql.mixture(0.3, provsql.normal(0, 1), provsql.uniform(-1, 1)), 0::random_variable), 'independent') = 0.0 AS bernoulli_mixture_continuous_arms_eq; -- (E4) NE is symmetric: same shapes resolve to 1.0 exactly. SELECT provsql.probability_evaluate( provsql.rv_cmp_ne(provsql.exponential(0.4) + provsql.exponential(0.3), 1::random_variable), 'independent') = 1.0 AS heterogeneous_exp_sum_ne; -- (E5) Mixture-of-arith and arith-of-mixture both qualify: the -- recursion through gate_arith into gate_mixture's branches sees -- two continuous arms. A common shape after the simplifier's -- mixture-lift fold. SELECT provsql.probability_evaluate( provsql.rv_cmp_eq(provsql.mixture(0.5, provsql.normal(0, 1) + provsql.uniform(0, 1), provsql.exponential(1) + provsql.uniform(0, 1)), 0::random_variable), 'independent') = 0.0 AS mixture_of_arith_continuous_eq; -- ---- Negative cases (shortcut does NOT fire) ------------------- -- (E6) Categorical is a discrete distribution: it has a Dirac mass -- at 0 with weight 0.5, so P(cat = 0) = 0.5, not 0. The shortcut -- must distinguish the Bernoulli mixture shape (3-wire) from the -- categorical N-wire shape via @c isCategoricalMixture. SELECT provsql.probability_evaluate( provsql.rv_cmp_eq(provsql.categorical(ARRAY[0.5, 0.5], ARRAY[0.0, 1.0]), 0::random_variable), 'independent') = 0.5 AS categorical_dirac_eq_not_shorted; -- (E7) Pure Dirac (@c as_random) compared to itself: handled by the -- identity shortcut (wires[0] == wires[1]); compared to a literal of -- the same value, the cmp drops out via the constant comparator and -- the broadened predicate does not fire on the gate_value side. SELECT provsql.probability_evaluate( provsql.rv_cmp_eq(provsql.as_random(2), 2::random_variable), 'independent') = 1.0 AS dirac_eq_constant; -- --------------------------------------------------------------- -- Exact EQ / NE via Dirac mass-map sum-product. -- -- When both sides of an EQ / NE cmp have statically-extractable -- @c (value -> mass) Dirac maps and are independent (random-leaf -- footprints disjoint), RangeCheck computes -- @c P(X = Y) = Σ_{v ∈ M_X ∩ M_Y} M_X[v] · M_Y[v] -- exactly and resolves the cmp to a Bernoulli with that probability. -- This generalises the disjoint-Diracs case (sum is 0) to overlapping -- Diracs with any mass distribution, and covers categoricals, -- Bernoulli mixtures with as_random branches, and combinations. -- -- The exact = 0.0 / 1.0 assertions pin analytical resolution (MC -- would have finite-sample noise); the fractional cases use exact -- equality on the closed-form sum-product. -- --------------------------------------------------------------- -- (F1) Disjoint categoricals fold to 0 exactly. The shortcut's -- common boundary case. SELECT provsql.probability_evaluate( provsql.rv_cmp_eq( provsql.categorical(ARRAY[0.5, 0.5], ARRAY[0.0, 1.0]), provsql.categorical(ARRAY[0.5, 0.5], ARRAY[2.0, 3.0])), 'independent') = 0.0 AS disjoint_categoricals_eq_zero; -- (F2) Categoricals with one outcome in common (value 1) and -- distinct supports otherwise: P(=) = 0.5 * 0.3 = 0.15 exactly. SELECT provsql.probability_evaluate( provsql.rv_cmp_eq( provsql.categorical(ARRAY[0.5, 0.5], ARRAY[0.0, 1.0]), provsql.categorical(ARRAY[0.3, 0.7], ARRAY[1.0, 2.0])), 'independent') = 0.15 AS overlapping_categoricals_one_match; -- (F3) Categoricals with identical outcome sets but different masses: -- P(=) = 0.4·0.7 + 0.6·0.3 = 0.46. A floating-point sum, so we -- compare with a tight 1e-12 tolerance against the exact value -- the -- analytical path is bit-deterministic, MC would not reach 1e-12. SELECT abs(provsql.probability_evaluate( provsql.rv_cmp_eq( provsql.categorical(ARRAY[0.4, 0.6], ARRAY[0.0, 1.0]), provsql.categorical(ARRAY[0.7, 0.3], ARRAY[0.0, 1.0])), 'independent') - 0.46) < 1e-12 AS same_outcomes_different_masses_eq; -- (F4) Bernoulli mixture of Diracs vs another mixture of Diracs: -- M_lhs = {0: 0.3, 5: 0.7}, M_rhs = {0: 0.5, 7: 0.5}. -- Only value 0 overlaps: P(=) = 0.3 · 0.5 = 0.15. SELECT provsql.probability_evaluate( provsql.rv_cmp_eq( provsql.mixture(0.3, provsql.as_random(0), provsql.as_random(5)), provsql.mixture(0.5, provsql.as_random(0), provsql.as_random(7))), 'independent') = 0.15 AS mixture_of_diracs_overlap_eq; -- (F5) NE: symmetric to (F4): 1 - 0.15 = 0.85. SELECT provsql.probability_evaluate( provsql.rv_cmp_ne( provsql.mixture(0.3, provsql.as_random(0), provsql.as_random(5)), provsql.mixture(0.5, provsql.as_random(0), provsql.as_random(7))), 'independent') = 0.85 AS mixture_of_diracs_overlap_ne; -- (F6) Mixture with one continuous and one Dirac arm vs categorical. -- LHS = mixture(0.3, N(0,1), as_random(5)) has Dirac mass at 5 with -- weight 0.7 (the continuous arm contributes no Dirac mass; vs the -- categorical's discrete points, that arm gives 0 by measure zero). -- RHS = categorical([0.5, 0.5], [5, 6]). Overlap at 5: 0.7 · 0.5 = 0.35. SELECT abs(provsql.probability_evaluate( provsql.rv_cmp_eq( provsql.mixture(0.3, provsql.normal(0, 1), provsql.as_random(5)), provsql.categorical(ARRAY[0.5, 0.5], ARRAY[5.0, 6.0])), 'independent') - 0.35) < 1e-12 AS mixture_continuous_plus_dirac_vs_cat; -- (F7) Independence guard: shared Bernoulli p_token makes the two -- mixtures perfectly correlated through their selector, so the -- sum-product factoring would silently mis-resolve. RangeCheck must -- bail and let the cmp flow through to the downstream path. We -- check this by verifying that 'independent' returns the same MC -- estimate (within tolerance) as a direct 'monte-carlo' call -- a -- silently-wrong shortcut would produce 0.4 · 0.4 = 0.16 instead of -- the true 0.4. DO $$ DECLARE p_tok uuid := public.uuid_generate_v4(); lhs random_variable; rhs random_variable; p_independent double precision; BEGIN PERFORM provsql.create_gate(p_tok, 'input'); PERFORM provsql.set_prob(p_tok, 0.4); lhs := provsql.mixture(p_tok, provsql.as_random(0), provsql.as_random(1)); rhs := provsql.mixture(p_tok, provsql.as_random(0), provsql.as_random(2)); SET LOCAL provsql.monte_carlo_seed = 42; p_independent := provsql.probability_evaluate( provsql.rv_cmp_eq(lhs, rhs), 'independent'); /* True value is 0.4; MC is within ~1% over 10k samples (the hybrid * decomposer's default budget). The silently-wrong shortcut would * have produced 0.16, far outside the tolerance. We pin >= 0.30 * (very loose) so this test is robust to MC seed changes. */ IF p_independent < 0.30 OR p_independent > 0.50 THEN RAISE EXCEPTION 'independence guard failed: got %, expected ~0.4', p_independent; END IF; END $$; SELECT 't'::text AS shared_p_token_correlation_guard; SELECT 'ok'::text AS continuous_rangecheck_done;