\set ECHO none
\pset format unaligned
SET search_path TO provsql_test,provsql;

-- Pin the MC seed: any test below that falls back to MC rejection
-- must be reproducible across runs.
SET provsql.monte_carlo_seed = 42;

-- Sections 1-3 below enforce closed-form only.  The MC-fallback
-- section explicitly raises rv_mc_samples first.
SET provsql.rv_mc_samples = 0;

-- ---------------------------------------------------------------------
-- 1. Truncated Normal: closed form.
--
--   X ~ N(0, 1) conditioned on X > 3.  The truncated-normal mean is
--   φ(3) / (1 − Φ(3)).  The second raw moment is
--   σ² (1 + α·φ(α)/(1−Φ(α))) with α = 3.  Reference values come from
--   the closed form; erf(3/sqrt(2)) is inlined as a precomputed
--   17-significant-digit literal because PG's built-in erf() is
--   PostgreSQL 18+ only.
-- ---------------------------------------------------------------------
WITH r AS (SELECT provsql.normal(0, 1) AS rv),
     ref AS (
       SELECT  0.39894228040143268 * exp(-9.0/2)
             / (1 - (0.5 * (1 + 0.997300203936739793))) AS mean_ref
     )
SELECT abs(expected(rv, rv_cmp_gt(rv, as_random(3))) - ref.mean_ref) < 1e-12
         AS trunc_normal_mean,
       abs(moment(rv, 2, rv_cmp_gt(rv, as_random(3)))
            - (1 + 3 * ref.mean_ref)) < 1e-12
         AS trunc_normal_m2
  FROM r, ref;

-- Conditional support of the same RV: should be (3, +Infinity).
WITH r AS (SELECT provsql.normal(0, 1) AS rv)
SELECT s.lo = 3 AS trunc_normal_lo_eq_3,
       s.hi = 'Infinity'::float8 AS trunc_normal_hi_inf
  FROM r, provsql.rv_support(rv, rv_cmp_gt(rv, as_random(3))) s;

-- ---------------------------------------------------------------------
-- 2. Truncated Uniform with two cmps (AND-conjunct walker).
--
--   X ~ U(0, 10), event = (X > 3 AND X < 7).  Conditional distribution
--   is U(3, 7); mean = 5, variance = (7-3)^2 / 12 = 4/3.
-- ---------------------------------------------------------------------
DO $$
DECLARE
  rv random_variable;
  ev uuid;
BEGIN
  rv := provsql.uniform(0, 10);
  ev := provenance_times(rv_cmp_gt(rv, as_random(3)),
                         rv_cmp_lt(rv, as_random(7)));
  RAISE NOTICE 'trunc_uniform_mean=%',
    abs(expected(rv, ev) - 5.0) < 1e-12;
  RAISE NOTICE 'trunc_uniform_variance=%',
    abs(provsql.variance(rv, ev) - 4.0/3.0) < 1e-12;
  RAISE NOTICE 'trunc_uniform_support=%',
    (SELECT s.lo = 3 AND s.hi = 7 FROM provsql.rv_support(rv, ev) s);
END
$$;

-- ---------------------------------------------------------------------
-- 3. Truncated Exponential: memorylessness.
--
--   X ~ Exp(2), event = X > 1.  Conditional mean = 1 + 1/2 = 1.5.
-- ---------------------------------------------------------------------
WITH r AS (SELECT provsql.exponential(2) AS rv)
SELECT abs(expected(rv, rv_cmp_gt(rv, as_random(1))) - 1.5) < 1e-12
         AS trunc_exp_memorylessness
  FROM r;

-- Truncated Exp on a finite interval: E[X | a < X < b] computed
-- against the closed-form formula.  For X ~ Exp(2), a = 0, b = 1:
--   (1 − 3 e^{-2}) / (2 (1 − e^{-2}))
WITH r AS (SELECT provsql.exponential(2) AS rv),
     ref AS (SELECT (1 - 3 * exp(-2.0)) / (2 * (1 - exp(-2.0))) AS mean_ref)
SELECT abs(expected(rv, rv_cmp_lt(rv, as_random(1))) - ref.mean_ref) < 1e-12
         AS trunc_exp_finite_interval
  FROM r, ref;

-- ---------------------------------------------------------------------
-- 4. Gate_one event -> unconditional.  Passing gate_one() as prov
--    must produce the same answer as omitting it (verified against
--    the closed-form unconditional mean of N(0,1) = 0).
-- ---------------------------------------------------------------------
WITH r AS (SELECT provsql.normal(0, 1) AS rv)
SELECT expected(rv, gate_one()) = expected(rv) AS gate_one_unconditional,
       expected(rv) = 0 AS unconditional_mean_zero
  FROM r;

-- ---------------------------------------------------------------------
-- 5. WHERE end-to-end.  This is the headline test: builds a sensor
--    table, runs the planner hook that lifts `reading > 3` into the
--    row's provenance, and asks for `expected(reading, provenance())`.
--
--    Tests the full pipeline: joint-circuit loader, AND-walker
--    skipping the independent input-Bernoulli factor, and the
--    closed-form Normal truncation.
-- ---------------------------------------------------------------------
CREATE TABLE cond_sensor(reading random_variable);
INSERT INTO cond_sensor VALUES (provsql.normal(0, 1));
SELECT add_provenance('cond_sensor');

WITH ref AS (
  SELECT  0.39894228040143268 * exp(-9.0/2)
        / (1 - (0.5 * (1 + 0.997300203936739793))) AS mean_ref
)
SELECT abs(expected(reading, provenance()) - ref.mean_ref) < 1e-12
         AS where_e2e_mean,
       abs(moment(reading, 2, provenance())
            - (1 + 3 * ref.mean_ref)) < 1e-12
         AS where_e2e_m2
  INTO cond_sensor_result
  FROM cond_sensor, ref WHERE reading > 3;
SELECT remove_provenance('cond_sensor_result');
SELECT * FROM cond_sensor_result;
DROP TABLE cond_sensor_result;
DROP TABLE cond_sensor;

-- ---------------------------------------------------------------------
-- 6. Shared-atom coupling.  The same RV UUID appears in two cmps; the
--    MC rejection sampler must use one draw of the RV per iteration
--    so both indicators see consistent state.  Closed-form path on
--    the first cmp is OK; the AND-walker collects both.  Mean of
--    Exp(1) | (X > 1 AND X < 5) by closed-form vs. the same event
--    expressed as two cmps using the SAME rv must agree.
-- ---------------------------------------------------------------------
DO $$
DECLARE
  rv random_variable;
  ev_one uuid;
  ev_two uuid;
BEGIN
  rv := provsql.exponential(1);
  ev_one := provenance_times(rv_cmp_gt(rv, as_random(1)),
                             rv_cmp_lt(rv, as_random(5)));
  -- Re-conjoin: trailing gate_one factor is independent and should
  -- be transparent for the conditional mean.
  ev_two := provenance_times(rv_cmp_gt(rv, as_random(1)),
                             rv_cmp_lt(rv, as_random(5)),
                             gate_one());
  RAISE NOTICE 'shared_atom_coupling=%',
    abs(expected(rv, ev_one) - expected(rv, ev_two)) < 1e-12;
END
$$;

-- ---------------------------------------------------------------------
-- 7. Sample + histogram honour the conditional event.  These need MC
--    so we enable rv_mc_samples before this section.
-- ---------------------------------------------------------------------
SET provsql.rv_mc_samples = 10000;

-- All accepted samples land inside the truncation interval (3, 7).
WITH r AS (SELECT provsql.uniform(0, 10) AS rv),
     s AS (
       SELECT rv,
              provenance_times(rv_cmp_gt(rv, as_random(3)),
                               rv_cmp_lt(rv, as_random(7))) AS ev
         FROM r
     )
SELECT bool_and(v > 3 AND v < 7) AS samples_in_truncation
  FROM s, provsql.rv_sample(rv, 100, ev) AS v;

-- Histogram bin edges line up with the truncated support.
WITH r AS (SELECT provsql.uniform(0, 10) AS rv),
     s AS (
       SELECT rv,
              provenance_times(rv_cmp_gt(rv, as_random(3)),
                               rv_cmp_lt(rv, as_random(7))) AS ev
         FROM r
     )
SELECT (h.first_bin_lo)::numeric = 3 AS hist_lo_eq_3,
       (h.last_bin_hi)::numeric = 7 AS hist_hi_eq_7
  FROM s,
       LATERAL (
         WITH bins AS (
           SELECT jsonb_array_elements(provsql.rv_histogram(rv, 10, ev)) AS b
         )
         SELECT (SELECT (b->>'bin_lo')::float8 FROM bins LIMIT 1) AS first_bin_lo,
                (SELECT (b->>'bin_hi')::float8 FROM bins
                  ORDER BY (b->>'bin_hi')::float8 DESC LIMIT 1) AS last_bin_hi
       ) h;

-- ---------------------------------------------------------------------
-- 8. Zero-probability event raises an actionable error.
--    P(N(0,1) > 100) is effectively 0; conditional MC accepts nothing.
-- ---------------------------------------------------------------------
DO $$
DECLARE
  rv random_variable;
  ev uuid;
  msg text;
  matched bool;
BEGIN
  rv := provsql.normal(0, 1);
  -- N(0,1) | X*X > 10000: the squared shape defeats the closed-form
  -- walker (cmp side is gate_arith, not bare gate_rv), so MC kicks in.
  -- With finite samples, the acceptance rate is ~0.
  ev := provsql.provenance_cmp(
          (rv * rv)::uuid,
          random_variable_cmp_oid('>'),
          (as_random(10000))::uuid);
  BEGIN
    PERFORM expected(rv, ev);
    RAISE NOTICE 'zero_prob_event_did_not_raise';
  EXCEPTION WHEN OTHERS THEN
    msg := SQLERRM;
    -- Either MC's acceptance check or RangeCheck's infeasibility
    -- collapse fires; both are acceptable signals to the user.
    -- The 0-accepted MC path now says "conditioning event is
    -- infeasible" directly; the partial-acceptance path still
    -- mentions rv_mc_samples; the closed-form-aware short-circuit
    -- says "infeasible" too.
    matched := position('infeasible' IN msg) > 0
            OR position('rv_mc_samples' IN msg) > 0
            OR position('satisfiable' IN msg) > 0
            OR position('accepted' IN msg) > 0;
    RAISE NOTICE 'zero_prob_event_raises_specific=%', matched;
  END;
END
$$;

-- ---------------------------------------------------------------------
-- 8b. Closed-form-aware infeasibility short-circuit.
--     For a bare gate_rv root with an event provably empty against
--     the RV's support (Uniform(0, 10) | X > 100 — the conjunct
--     intersects to an empty interval), eventIsProvablyInfeasible
--     spots it before any MC sampling.  Proof that the short-circuit
--     fires: set provsql.rv_mc_samples = 0, which would otherwise
--     raise "MC fallback disabled"; the conditional moment must
--     still raise with the "infeasible" wording.
-- ---------------------------------------------------------------------
DO $$
DECLARE
  rv random_variable;
  ev uuid;
  msg text;
  matched_infeasible bool;
  mentions_mc bool;
BEGIN
  rv := provsql.uniform(0, 10);
  ev := provsql.rv_cmp_gt(rv, 100::random_variable);
  -- MC budget = 0 so any path that falls through to MC would raise
  -- with a "MC fallback disabled" message instead of an infeasibility
  -- one; reaching "infeasible" without the MC string proves the
  -- closed-form-aware short-circuit fired.
  SET LOCAL provsql.rv_mc_samples = 0;
  BEGIN
    PERFORM expected(rv, ev);
    RAISE NOTICE 'short_circuit_did_not_raise';
  EXCEPTION WHEN OTHERS THEN
    msg := SQLERRM;
    matched_infeasible := position('infeasible' IN msg) > 0;
    mentions_mc := position('rv_mc_samples' IN msg) > 0
                OR position('Monte Carlo' IN msg) > 0;
    RAISE NOTICE 'closed_form_short_circuit=%',
                 matched_infeasible AND NOT mentions_mc;
  END;
END
$$;

-- ---------------------------------------------------------------------
-- 9. gate_delta is transparent in the event walker.
--
--   In the Boolean semiring δ is the identity, so wrapping the event
--   in provsql.provenance_delta() must leave every rv_* answer
--   unchanged.  Surfaced as an "Unsupported gate type in Boolean
--   evaluation: delta" error before the sweep landed; the Sampler's
--   evalBool now treats gate_delta as identity, and the AND-conjunct
--   walker descends through it so the truncated-distribution
--   closed-form still fires.
-- ---------------------------------------------------------------------
SET provsql.rv_mc_samples = 0;  -- closed-form only.

-- 9a. Expected / variance / second raw moment of N(0,1) | X > 3 with
-- and without a δ wrapper around the cmp.  Equality is exact under
-- the closed-form path; if the walker fell back to MC for either
-- side the comparison would fail.
WITH r AS (SELECT provsql.normal(0, 1) AS rv),
     ev AS (
       SELECT rv,
              provsql.rv_cmp_gt(rv, provsql.as_random(3)) AS plain,
              provsql.provenance_delta(
                provsql.rv_cmp_gt(rv, provsql.as_random(3))) AS wrapped
         FROM r
     )
SELECT expected(rv, plain) = expected(rv, wrapped)
         AS delta_transparent_for_expected,
       provsql.variance(rv, plain) = provsql.variance(rv, wrapped)
         AS delta_transparent_for_variance,
       moment(rv, 2, plain) = moment(rv, 2, wrapped)
         AS delta_transparent_for_raw_moment
  FROM ev;

-- 9b. rv_support: closed-form intersected interval must survive a δ
-- wrapper.
WITH r AS (SELECT provsql.normal(0, 1) AS rv),
     ev AS (
       SELECT rv, provsql.provenance_delta(
                    provsql.rv_cmp_gt(rv, provsql.as_random(3))) AS wrapped
         FROM r
     )
SELECT s.lo = 3 AS support_lo_eq_3,
       s.hi = 'Infinity'::float8 AS support_hi_inf
  FROM ev, provsql.rv_support(rv, wrapped) s;

-- 9c. δ around an AND-conjunction: the walker must descend through δ
-- AND recurse into the inner gate_times to collect both cmps.
-- Truncated U(0, 10) | (X > 3 AND X < 7) → conditional mean = 5.
DO $$
DECLARE
  rv random_variable;
  inner_ev uuid;
  wrapped uuid;
BEGIN
  rv := provsql.uniform(0, 10);
  inner_ev := provenance_times(rv_cmp_gt(rv, as_random(3)),
                               rv_cmp_lt(rv, as_random(7)));
  wrapped := provsql.provenance_delta(inner_ev);
  RAISE NOTICE 'delta_around_and_conjunct=%',
    abs(expected(rv, wrapped) - 5.0) < 1e-12;
END
$$;

-- 9d. rv_sample / rv_histogram under a δ-wrapped event need MC, so
-- raise the sample budget for the remaining checks.
SET provsql.rv_mc_samples = 10000;

-- Every accepted sample lies in the truncation interval.
WITH r AS (SELECT provsql.uniform(0, 10) AS rv),
     s AS (
       SELECT rv,
              provsql.provenance_delta(
                provenance_times(rv_cmp_gt(rv, as_random(3)),
                                 rv_cmp_lt(rv, as_random(7)))) AS wrapped
         FROM r
     )
SELECT bool_and(v > 3 AND v < 7) AS rv_sample_respects_delta_event
  FROM s, provsql.rv_sample(rv, 100, wrapped) AS v;

-- Histogram bin edges line up with the truncated support under δ.
WITH r AS (SELECT provsql.uniform(0, 10) AS rv),
     s AS (
       SELECT rv,
              provsql.provenance_delta(
                provenance_times(rv_cmp_gt(rv, as_random(3)),
                                 rv_cmp_lt(rv, as_random(7)))) AS wrapped
         FROM r
     )
SELECT (h.first_bin_lo)::numeric = 3 AS hist_lo_eq_3,
       (h.last_bin_hi)::numeric = 7 AS hist_hi_eq_7
  FROM s,
       LATERAL (
         WITH bins AS (
           SELECT jsonb_array_elements(provsql.rv_histogram(rv, 10, wrapped)) AS b
         )
         SELECT (SELECT (b->>'bin_lo')::float8 FROM bins LIMIT 1) AS first_bin_lo,
                (SELECT (b->>'bin_hi')::float8 FROM bins
                  ORDER BY (b->>'bin_hi')::float8 DESC LIMIT 1) AS last_bin_hi
       ) h;

RESET provsql.monte_carlo_seed;
RESET provsql.rv_mc_samples;