\set ECHO none
\pset format unaligned
SET search_path TO provsql_test, provsql;

-- Truncated-distribution closed-form fast path in rv_sample / rv_histogram.
--
-- The two SRFs route the conditional case (prov != gate_one()) through
-- the truncated-distribution closed-form sampler when the root is a
-- bare gate_rv of a supported family (Uniform / Normal / Exponential)
-- and the event reduces to a single interval on it.  Tight events that
-- previously degraded the MC-rejection budget now produce exactly N
-- samples with 100% acceptance.  Erlang and non-bare-gate_rv shapes
-- fall through to the MC rejection path unchanged.
--
-- Tests pin provsql.monte_carlo_seed so the empirical moments below
-- are deterministic across runs.  The closed-form path is exact in
-- distribution; the empirical-moment assertions use a tolerance scaled
-- to the n=10000 sampling noise (~1/sqrt(n) on the mean, ~1/sqrt(n/2)
-- on the variance).
SET provsql.monte_carlo_seed = 42;

-- ---------------------------------------------------------------
-- Uniform truncation.  U(0, 10) | X > 9.5 -> U(9.5, 10).
-- collectRvConstraints intersects with the RV's natural support, so a
-- plain U(9.5, 10) draw is the conditional distribution; 100%
-- acceptance.  Theoretical mean = 9.75, variance = 1/48 = 0.02083.
-- ---------------------------------------------------------------
WITH r AS (SELECT provsql.uniform(0, 10) AS u),
     ev AS (SELECT provsql.rv_cmp_gt(u, 9.5::random_variable) AS ev,
                   (u)::uuid AS tok
              FROM r),
     s AS (SELECT v FROM ev, provsql.rv_sample(tok, 10000, ev) v)
SELECT count(*) = 10000
       AND count(*) FILTER (WHERE v BETWEEN 9.5 AND 10) = 10000
       AND abs(avg(v) - 9.75) < 0.01
       AND abs(var_samp(v) - 0.02083) < 0.005
       AS uniform_truncation_exact
FROM s;

-- ---------------------------------------------------------------
-- Exponential one-sided.  Exp(0.4) | X > 5 = 5 + Exp(0.4) by
-- memorylessness.  Theoretical mean = 5 + 1/0.4 = 7.5, var = 1/0.16
-- = 6.25.  100% acceptance via the memorylessness shortcut.
-- ---------------------------------------------------------------
WITH r AS (SELECT provsql.exponential(0.4) AS e),
     ev AS (SELECT provsql.rv_cmp_gt(e, 5::random_variable) AS ev,
                   (e)::uuid AS tok
              FROM r),
     s AS (SELECT v FROM ev, provsql.rv_sample(tok, 10000, ev) v)
SELECT count(*) = 10000
       AND count(*) FILTER (WHERE v >= 5) = 10000
       AND abs(avg(v) - 7.5) < 0.1
       AND abs(var_samp(v) - 6.25) < 0.3
       AS exponential_memorylessness_exact
FROM s;

-- ---------------------------------------------------------------
-- Exponential two-sided.  Exp(1) | 1 < X < 3 -> truncated exponential
-- via inverse-CDF (std::log1p / std::expm1 for numerical accuracy).
-- Truncated mean = (F'(a) - F'(b)) / (F(b) - F(a)) - actually:
--   E[X | a < X < b] for Exp(λ) =
--     [exp(-λa)(λa + 1) - exp(-λb)(λb + 1)] / [λ (exp(-λa) - exp(-λb))]
-- For λ=1, a=1, b=3:
--   numerator = e^-1 * (1+1) - e^-3 * (3+1) = 2/e - 4/e^3 ≈ 0.5364
--   denominator = e^-1 - e^-3 ≈ 0.3181
--   mean ≈ 1.6863.
-- ---------------------------------------------------------------
WITH r AS (SELECT provsql.exponential(1) AS e),
     ev AS (SELECT provsql.provenance_times(
                     provsql.rv_cmp_gt(e, 1::random_variable),
                     provsql.rv_cmp_lt(e, 3::random_variable)) AS ev,
                   (e)::uuid AS tok
              FROM r),
     s AS (SELECT v FROM ev, provsql.rv_sample(tok, 10000, ev) v)
SELECT count(*) = 10000
       AND count(*) FILTER (WHERE v >= 1 AND v <= 3) = 10000
       AND abs(avg(v) - 1.6863) < 0.05
       AS exponential_two_sided_inverse_cdf
FROM s;

-- ---------------------------------------------------------------
-- Normal two-sided.  N(0, 1) | -2 < X < 2 via inverse-CDF transform.
-- The forward CDF uses std::erf (same kernel as AnalyticEvaluator),
-- the inverse uses the Beasley-Springer-Moro approximation.
-- Theoretical truncated moments:
--   Z = Phi(2) - Phi(-2) ≈ 0.9545
--   mean = 0 (symmetric)
--   variance = 1 - 2 phi(2) / Phi(2,-2) * (2 - (-2)) / 2 ≈ 0.7737
-- Note: this case ALSO exercises the load-time @c runConstantFold
-- pass, which lifts the `-2::random_variable` parser shape
-- (gate_arith NEG over gate_value:2) into a clean gate_value:-2 so
-- @c collectRvConstraints recognises the cmp's constant side.
-- ---------------------------------------------------------------
WITH r AS (SELECT provsql.normal(0, 1) AS n),
     ev AS (SELECT provsql.provenance_times(
                     provsql.rv_cmp_gt(n, -2::random_variable),
                     provsql.rv_cmp_lt(n,  2::random_variable)) AS ev,
                   (n)::uuid AS tok
              FROM r),
     s AS (SELECT v FROM ev, provsql.rv_sample(tok, 10000, ev) v)
SELECT count(*) = 10000
       AND count(*) FILTER (WHERE v >= -2 AND v <= 2) = 10000
       AND abs(avg(v)) < 0.05
       AND abs(var_samp(v) - 0.7737) < 0.05
       AS normal_two_sided_inverse_cdf
FROM s;

-- ---------------------------------------------------------------
-- Asymmetric Normal truncation: N(0, 1) | 1 < X < 3.
-- Theoretical mean of truncated normal:
--   mean = mu + sigma * (phi(alpha) - phi(beta)) / (Phi(beta) - Phi(alpha))
--   alpha = 1, beta = 3
--   phi(1) ≈ 0.24197, phi(3) ≈ 0.00443
--   Phi(1) ≈ 0.84134, Phi(3) ≈ 0.99865
--   mean ≈ 0 + 1 * (0.24197 - 0.00443) / (0.99865 - 0.84134) ≈ 1.5098
-- Strong validation of inv_phi: a sign-flipped or constant-broken BSM
-- would shift this mean by O(0.5) or more, far outside the tolerance.
-- ---------------------------------------------------------------
WITH r AS (SELECT provsql.normal(0, 1) AS n),
     ev AS (SELECT provsql.provenance_times(
                     provsql.rv_cmp_gt(n, 1::random_variable),
                     provsql.rv_cmp_lt(n, 3::random_variable)) AS ev,
                   (n)::uuid AS tok
              FROM r),
     s AS (SELECT v FROM ev, provsql.rv_sample(tok, 10000, ev) v)
SELECT count(*) = 10000
       AND count(*) FILTER (WHERE v >= 1 AND v <= 3) = 10000
       AND abs(avg(v) - 1.5098) < 0.05
       AS normal_asymmetric_truncation
FROM s;

-- ---------------------------------------------------------------
-- Erlang truncation falls through to MC rejection.  Erlang(2, 1) | X > 3.
-- P(X > 3) for Erlang(2, 1) = (1 + 3) * e^-3 ≈ 0.1991, so ~20%
-- acceptance.  We don't pin the count (depends on budget) -- only check
-- that all returned samples are >= 3 and at least some are returned
-- (i.e., the MC fallback ran with a positive acceptance rate).
-- ---------------------------------------------------------------
SET provsql.rv_mc_samples = 100000;   -- enough budget to deliver 10k
\set VERBOSITY terse
WITH r AS (SELECT provsql.erlang(2, 1) AS e),
     ev AS (SELECT provsql.rv_cmp_gt(e, 3::random_variable) AS ev,
                   (e)::uuid AS tok
              FROM r),
     s AS (SELECT v FROM ev, provsql.rv_sample(tok, 10000, ev) v)
SELECT count(*) >= 9000   -- ~10000 expected with 20% acceptance on 100k budget
       AND count(*) FILTER (WHERE v >= 3) = count(*)
       AS erlang_falls_through_to_mc
FROM s;
\set VERBOSITY default
RESET provsql.rv_mc_samples;

-- ---------------------------------------------------------------
-- rv_histogram regression: tight provsql.rv_mc_samples budget that
-- previously caused "conditional MC accepted 0 of N samples" no longer
-- raises on closed-form-handled shapes.  Truncated U(0, 100) | X > 99
-- has 1% acceptance via rejection, but the closed-form path produces
-- exactly N samples.  Pin rv_mc_samples = 100 so the MC fallback would
-- have failed; verify the histogram succeeds and is bounded in [99, 100].
-- ---------------------------------------------------------------
SET provsql.rv_mc_samples = 100;
WITH r AS (SELECT provsql.uniform(0, 100) AS u),
     ev AS (SELECT provsql.rv_cmp_gt(u, 99::random_variable) AS ev,
                   (u)::uuid AS tok
              FROM r),
     h AS (SELECT provsql.rv_histogram(tok, 5, ev)::jsonb AS j FROM ev)
SELECT jsonb_array_length(j) = 5
       AND (j -> 0 ->> 'bin_lo')::float8 = 99.0
       AND (j -> 4 ->> 'bin_hi')::float8 = 100.0
       AS histogram_tight_budget_closed_form
FROM h;
RESET provsql.rv_mc_samples;

RESET provsql.monte_carlo_seed;

SELECT 'ok'::text AS continuous_sample_truncated_done;