-- ============================================================================ -- Biscuit Index Comprehensive Benchmark Script (FIXED) -- Compares: Sequential Scan, B-Tree, pg_trgm, and Biscuit indexes -- Dataset: 1,000,000 UUID records -- ============================================================================ -- Clean up any existing objects DROP TABLE IF EXISTS benchmark_data CASCADE; DROP TABLE IF EXISTS benchmark_results CASCADE; DROP TABLE IF EXISTS benchmark_statistics CASCADE; DROP EXTENSION IF EXISTS pg_trgm CASCADE; DROP EXTENSION IF EXISTS pg_biscuit CASCADE; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'BISCUIT INDEX BENCHMARK INITIALIZATION'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; -- Create extensions DO $$ BEGIN RAISE NOTICE 'Creating extensions...'; END $$; CREATE EXTENSION IF NOT EXISTS pg_trgm; CREATE EXTENSION IF NOT EXISTS pg_biscuit; -- Create benchmark table DO $$ BEGIN RAISE NOTICE 'Creating benchmark table...'; END $$; CREATE TABLE benchmark_data ( id SERIAL PRIMARY KEY, uuid_str TEXT NOT NULL, created_at TIMESTAMP DEFAULT NOW() ); -- Populate with 1,000,000 UUID records DO $$ BEGIN RAISE NOTICE 'Populating 1,000,000 UUID records...'; END $$; INSERT INTO benchmark_data (uuid_str) SELECT gen_random_uuid()::TEXT FROM generate_series(1, 1000000); -- Analyze table ANALYZE benchmark_data; DO $$ DECLARE row_count INTEGER; BEGIN SELECT COUNT(*) INTO row_count FROM benchmark_data; RAISE NOTICE 'Total records inserted: %', row_count; END $$; -- Create indexes DO $$ BEGIN RAISE NOTICE '----------------------------------------'; END $$; DO $$ BEGIN RAISE NOTICE 'Creating indexes...'; END $$; CREATE INDEX idx_btree ON benchmark_data(uuid_str); DO $$ BEGIN RAISE NOTICE ' B-Tree index created'; END $$; CREATE INDEX idx_trgm ON benchmark_data USING GIN(uuid_str gin_trgm_ops); DO $$ BEGIN RAISE NOTICE ' pg_trgm GIN index created'; END $$; SET client_min_messages = WARNING; CREATE INDEX idx_biscuit ON benchmark_data USING biscuit(uuid_str); SET client_min_messages = NOTICE; DO $$ BEGIN RAISE NOTICE ' Biscuit index created'; END $$; -- Vacuum and analyze VACUUM ANALYZE benchmark_data; DO $$ BEGIN RAISE NOTICE 'Indexes created and analyzed successfully'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; -- Create results table with enhanced metrics CREATE TABLE benchmark_results ( test_id SERIAL PRIMARY KEY, test_category TEXT, test_name TEXT, pattern TEXT, index_type TEXT, result_count INTEGER, execution_time_ms NUMERIC(12,6), planning_time_ms NUMERIC(12,6), index_scan_cost NUMERIC(12,2), selectivity NUMERIC(8,6), timestamp TIMESTAMP DEFAULT NOW() ); -- ============================================================================ -- COMPREHENSIVE TEST SUITE -- ============================================================================ -- Helper function to run a benchmark test CREATE OR REPLACE FUNCTION run_benchmark_test( p_test_category TEXT, p_test_name TEXT, p_pattern TEXT, p_index_type TEXT, p_enable_indexscan BOOLEAN, p_enable_bitmapscan BOOLEAN, p_enable_seqscan BOOLEAN, p_use_equality BOOLEAN DEFAULT FALSE ) RETURNS void AS $$ DECLARE v_count INTEGER; v_start_time TIMESTAMP; v_end_time TIMESTAMP; v_exec_time NUMERIC; v_total_rows INTEGER; v_selectivity NUMERIC; BEGIN -- Set scan methods EXECUTE format('SET LOCAL enable_indexscan = %s', p_enable_indexscan); EXECUTE format('SET LOCAL enable_bitmapscan = %s', p_enable_bitmapscan); EXECUTE format('SET LOCAL enable_seqscan = %s', p_enable_seqscan); -- Get total rows SELECT COUNT(*) INTO v_total_rows FROM benchmark_data; -- Execute query and measure time v_start_time := clock_timestamp(); IF p_use_equality THEN SELECT COUNT(*) INTO v_count FROM benchmark_data WHERE uuid_str = p_pattern; ELSE SELECT COUNT(*) INTO v_count FROM benchmark_data WHERE uuid_str LIKE p_pattern; END IF; v_end_time := clock_timestamp(); v_exec_time := EXTRACT(EPOCH FROM (v_end_time - v_start_time)) * 1000; v_selectivity := v_count::NUMERIC / NULLIF(v_total_rows, 0); -- Insert results INSERT INTO benchmark_results ( test_category, test_name, pattern, index_type, result_count, execution_time_ms, selectivity ) VALUES ( p_test_category, p_test_name, p_pattern, p_index_type, v_count, v_exec_time, v_selectivity ); END; $$ LANGUAGE plpgsql; -- Warmup to ensure consistent results DO $$ BEGIN EXECUTE format('SET LOCAL enable_seqscan = TRUE'); PERFORM COUNT(*) FROM benchmark_data WHERE uuid_str = '-'; END $$; DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'RUNNING BENCHMARK TEST SUITE'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; -- ============================================================================ -- Test Category 1: Exact Match Queries -- ============================================================================ DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '[1/7] Exact Match Tests...'; END $$; DO $$ DECLARE test_uuid TEXT; BEGIN SELECT uuid_str INTO test_uuid FROM benchmark_data LIMIT 1; PERFORM run_benchmark_test('Exact Match', 'Single UUID Lookup', test_uuid, 'Sequential Scan', FALSE, FALSE, TRUE, FALSE); PERFORM run_benchmark_test('Exact Match', 'Single UUID Lookup', test_uuid, 'B-Tree (=)', TRUE, FALSE, FALSE, TRUE); PERFORM run_benchmark_test('Exact Match', 'Single UUID Lookup', test_uuid, 'B-Tree (LIKE)', TRUE, FALSE, FALSE, FALSE); PERFORM run_benchmark_test('Exact Match', 'Single UUID Lookup', test_uuid, 'pg_trgm', FALSE, TRUE, FALSE, FALSE); PERFORM run_benchmark_test('Exact Match', 'Single UUID Lookup', test_uuid, 'Biscuit', TRUE, FALSE, FALSE, FALSE); RAISE NOTICE ' Completed: 5 tests'; END $$; -- ============================================================================ -- Test Category 2: Prefix Match Queries (Various Lengths) -- ============================================================================ DO $$ BEGIN RAISE NOTICE '[2/7] Prefix Match Tests...'; END $$; DO $$ DECLARE patterns TEXT[] := ARRAY['a%', 'ab%', 'abc%', 'abcd%', '1%', '12%', '123%']; pattern TEXT; test_count INTEGER := 0; BEGIN FOREACH pattern IN ARRAY patterns LOOP PERFORM run_benchmark_test('Prefix Match', 'Prefix: ' || pattern, pattern, 'Sequential Scan', FALSE, FALSE, TRUE); PERFORM run_benchmark_test('Prefix Match', 'Prefix: ' || pattern, pattern, 'B-Tree', TRUE, FALSE, FALSE); PERFORM run_benchmark_test('Prefix Match', 'Prefix: ' || pattern, pattern, 'pg_trgm', FALSE, TRUE, FALSE); PERFORM run_benchmark_test('Prefix Match', 'Prefix: ' || pattern, pattern, 'Biscuit', TRUE, FALSE, FALSE); test_count := test_count + 4; END LOOP; RAISE NOTICE ' Completed: % tests', test_count; END $$; -- ============================================================================ -- Test Category 3: Suffix Match Queries -- ============================================================================ DO $$ BEGIN RAISE NOTICE '[3/7] Suffix Match Tests...'; END $$; DO $$ DECLARE patterns TEXT[] := ARRAY['%a', '%ab', '%abc', '%1', '%12', '%123']; pattern TEXT; test_count INTEGER := 0; BEGIN FOREACH pattern IN ARRAY patterns LOOP PERFORM run_benchmark_test('Suffix Match', 'Suffix: ' || pattern, pattern, 'Sequential Scan', FALSE, FALSE, TRUE); PERFORM run_benchmark_test('Suffix Match', 'Suffix: ' || pattern, pattern, 'pg_trgm', FALSE, TRUE, FALSE); PERFORM run_benchmark_test('Suffix Match', 'Suffix: ' || pattern, pattern, 'Biscuit', TRUE, FALSE, FALSE); test_count := test_count + 3; END LOOP; RAISE NOTICE ' Completed: % tests', test_count; END $$; -- ============================================================================ -- Test Category 4: Contains/Substring Match Queries -- ============================================================================ DO $$ BEGIN RAISE NOTICE '[4/7] Contains Match Tests...'; END $$; DO $$ DECLARE patterns TEXT[] := ARRAY['%a%', '%ab%', '%abc%', '%4a%', '%4ab%', '%zzz%', '%xyz%']; pattern TEXT; test_count INTEGER := 0; BEGIN FOREACH pattern IN ARRAY patterns LOOP PERFORM run_benchmark_test('Contains Match', 'Contains: ' || pattern, pattern, 'Sequential Scan', FALSE, FALSE, TRUE); PERFORM run_benchmark_test('Contains Match', 'Contains: ' || pattern, pattern, 'pg_trgm', FALSE, TRUE, FALSE); PERFORM run_benchmark_test('Contains Match', 'Contains: ' || pattern, pattern, 'Biscuit', TRUE, FALSE, FALSE); test_count := test_count + 3; END LOOP; RAISE NOTICE ' Completed: % tests', test_count; END $$; -- ============================================================================ -- Test Category 5: Complex Pattern Queries -- ============================================================================ DO $$ BEGIN RAISE NOTICE '[5/7] Complex Pattern Tests...'; END $$; DO $$ DECLARE patterns TEXT[] := ARRAY['%a%b%', '%a%4%b%', '%1%2%3%', 'a%b%c%', '%a%b%c%d%']; pattern TEXT; test_count INTEGER := 0; BEGIN FOREACH pattern IN ARRAY patterns LOOP PERFORM run_benchmark_test('Complex Pattern', 'Pattern: ' || pattern, pattern, 'Sequential Scan', FALSE, FALSE, TRUE); PERFORM run_benchmark_test('Complex Pattern', 'Pattern: ' || pattern, pattern, 'pg_trgm', FALSE, TRUE, FALSE); PERFORM run_benchmark_test('Complex Pattern', 'Pattern: ' || pattern, pattern, 'Biscuit', TRUE, FALSE, FALSE); test_count := test_count + 3; END LOOP; RAISE NOTICE ' Completed: % tests', test_count; END $$; -- ============================================================================ -- Test Category 6: Selectivity Tests (High to Low) -- ============================================================================ DO $$ BEGIN RAISE NOTICE '[6/7] Selectivity Tests...'; END $$; DO $$ DECLARE patterns TEXT[] := ARRAY['%zzzzz%', '%xyz%', '%4a%', '%a%', '%%']; pattern TEXT; pattern_name TEXT; test_count INTEGER := 0; BEGIN FOREACH pattern IN ARRAY patterns LOOP CASE pattern WHEN '%zzzzz%' THEN pattern_name := 'Very Selective'; WHEN '%xyz%' THEN pattern_name := 'Selective'; WHEN '%4a%' THEN pattern_name := 'Moderate'; WHEN '%a%' THEN pattern_name := 'Broad'; WHEN '% percent' THEN pattern_name := 'All Records'; END CASE; PERFORM run_benchmark_test('Selectivity', pattern_name, pattern, 'Sequential Scan', FALSE, FALSE, TRUE); test_count := test_count + 1; IF pattern != '% percent' THEN PERFORM run_benchmark_test('Selectivity', pattern_name, pattern, 'pg_trgm', FALSE, TRUE, FALSE); PERFORM run_benchmark_test('Selectivity', pattern_name, pattern, 'Biscuit', TRUE, FALSE, FALSE); test_count := test_count + 2; END IF; END LOOP; RAISE NOTICE ' Completed: % tests', test_count; END $$; -- ============================================================================ -- Test Category 7: Case Sensitivity Tests -- ============================================================================ DO $$ BEGIN RAISE NOTICE '[7/7] Case Sensitivity Tests...'; END $$; DO $$ DECLARE patterns TEXT[] := ARRAY['%A%', '%a%', '%Ab%', '%aB%']; pattern TEXT; test_count INTEGER := 0; BEGIN FOREACH pattern IN ARRAY patterns LOOP PERFORM run_benchmark_test('Case Sensitivity', 'Case: ' || pattern, pattern, 'Sequential Scan', FALSE, FALSE, TRUE); PERFORM run_benchmark_test('Case Sensitivity', 'Case: ' || pattern, pattern, 'pg_trgm', FALSE, TRUE, FALSE); PERFORM run_benchmark_test('Case Sensitivity', 'Case: ' || pattern, pattern, 'Biscuit', TRUE, FALSE, FALSE); test_count := test_count + 3; END LOOP; RAISE NOTICE ' Completed: % tests', test_count; END $$; DO $$ DECLARE total_tests INTEGER; BEGIN SELECT COUNT(*) INTO total_tests FROM benchmark_results; RAISE NOTICE ''; RAISE NOTICE 'All tests completed: % total benchmark runs', total_tests; RAISE NOTICE '========================================'; END $$; -- ============================================================================ -- STATISTICAL ANALYSIS -- ============================================================================ DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE 'Computing statistical analysis...'; END $$; CREATE TABLE benchmark_statistics AS WITH index_stats AS ( SELECT test_category, index_type, COUNT(*) as test_count, AVG(execution_time_ms) as mean_time, CASE WHEN COUNT(*) > 1 THEN STDDEV(execution_time_ms) ELSE 0 END as stddev_time, MIN(execution_time_ms) as min_time, MAX(execution_time_ms) as max_time, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY execution_time_ms) as median_time, PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY execution_time_ms) as p95_time, PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY execution_time_ms) as p99_time, AVG(selectivity) as avg_selectivity FROM benchmark_results GROUP BY test_category, index_type ), geometric_means AS ( SELECT test_category, index_type, EXP(AVG(LN(NULLIF(execution_time_ms, 0)))) as geometric_mean FROM benchmark_results WHERE execution_time_ms > 0 GROUP BY test_category, index_type ), rankings AS ( SELECT test_category, test_name, pattern, index_type, execution_time_ms, RANK() OVER (PARTITION BY test_category, test_name ORDER BY execution_time_ms) as rank, CASE WHEN RANK() OVER (PARTITION BY test_category, test_name ORDER BY execution_time_ms) = 1 THEN 1 ELSE 0 END as is_winner FROM benchmark_results ) SELECT s.test_category, s.index_type, s.test_count, ROUND(s.mean_time::numeric, 4) as mean_time_ms, ROUND(gm.geometric_mean::numeric, 4) as geometric_mean_ms, ROUND(s.stddev_time::numeric, 4) as stddev_ms, ROUND((s.stddev_time / NULLIF(s.mean_time, 0) * 100)::numeric, 2) as coeff_variation_pct, ROUND(s.min_time::numeric, 4) as min_time_ms, ROUND(s.max_time::numeric, 4) as max_time_ms, ROUND(s.median_time::numeric, 4) as median_time_ms, ROUND(s.p95_time::numeric, 4) as p95_time_ms, ROUND(s.p99_time::numeric, 4) as p99_time_ms, ROUND(s.avg_selectivity::numeric, 6) as avg_selectivity, SUM(r.is_winner) as times_fastest, ROUND((SUM(r.is_winner)::NUMERIC / s.test_count * 100)::numeric, 2) as win_rate_pct FROM index_stats s JOIN geometric_means gm ON s.test_category = gm.test_category AND s.index_type = gm.index_type JOIN rankings r ON s.test_category = r.test_category AND s.index_type = r.index_type GROUP BY s.test_category, s.index_type, s.test_count, s.mean_time, gm.geometric_mean, s.stddev_time, s.min_time, s.max_time, s.median_time, s.p95_time, s.p99_time, s.avg_selectivity ORDER BY s.test_category, s.mean_time; DO $$ BEGIN RAISE NOTICE 'Statistical analysis complete'; END $$; -- ============================================================================ -- COMPREHENSIVE RESULTS OUTPUT -- ============================================================================ DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'BENCHMARK RESULTS & ANALYSIS'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; -- Overall Summary DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'OVERALL PERFORMANCE SUMMARY'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ DECLARE rec RECORD; BEGIN FOR rec IN SELECT index_type, COUNT(*) as total_tests, ROUND(AVG(execution_time_ms)::numeric, 4) as avg_time_ms, ROUND(EXP(AVG(LN(NULLIF(execution_time_ms, 0))))::numeric, 4) as geom_mean_ms, ROUND(MIN(execution_time_ms)::numeric, 4) as min_time_ms, ROUND(MAX(execution_time_ms)::numeric, 4) as max_time_ms, SUM(CASE WHEN rank = 1 THEN 1 ELSE 0 END) as times_fastest, ROUND((SUM(CASE WHEN rank = 1 THEN 1 ELSE 0 END)::NUMERIC / COUNT(*) * 100)::numeric, 1) as win_rate_pct FROM ( SELECT *, RANK() OVER (PARTITION BY test_category, test_name ORDER BY execution_time_ms) as rank FROM benchmark_results ) ranked GROUP BY index_type ORDER BY geom_mean_ms LOOP RAISE NOTICE '% | Tests: % | Avg: % ms | GeoMean: % ms | Min: % ms | Max: % ms | Wins: % (% percent percent)', RPAD(rec.index_type, 20), LPAD(rec.total_tests::TEXT, 3), LPAD(rec.avg_time_ms::TEXT, 8), LPAD(rec.geom_mean_ms::TEXT, 8), LPAD(rec.min_time_ms::TEXT, 8), LPAD(rec.max_time_ms::TEXT, 8), LPAD(rec.times_fastest::TEXT, 3), LPAD(rec.win_rate_pct::TEXT, 5); END LOOP; END $$; -- Category-wise Statistics DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'PERFORMANCE BY TEST CATEGORY'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ DECLARE rec RECORD; prev_category TEXT := ''; BEGIN FOR rec IN SELECT test_category, index_type, test_count, mean_time_ms, geometric_mean_ms, stddev_ms, coeff_variation_pct as cv_pct, median_time_ms, p95_time_ms, times_fastest, win_rate_pct FROM benchmark_statistics ORDER BY test_category, geometric_mean_ms LOOP IF prev_category != rec.test_category THEN RAISE NOTICE ''; RAISE NOTICE '--- % ---', rec.test_category; prev_category := rec.test_category; END IF; RAISE NOTICE ' % | Mean: % ms | GeoMean: % ms | StdDev: % | CV: % percent | Median: % ms | P95: % ms | Wins: %/% (% percent percent)', RPAD(rec.index_type, 18), LPAD(rec.mean_time_ms::TEXT, 8), LPAD(rec.geometric_mean_ms::TEXT, 8), LPAD(rec.stddev_ms::TEXT, 8), LPAD(rec.cv_pct::TEXT, 6), LPAD(rec.median_time_ms::TEXT, 8), LPAD(rec.p95_time_ms::TEXT, 8), LPAD(rec.times_fastest::TEXT, 2), LPAD(rec.test_count::TEXT, 2), LPAD(rec.win_rate_pct::TEXT, 5); END LOOP; END $$; -- Best Performer by Category DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'BEST PERFORMER BY CATEGORY'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ DECLARE rec RECORD; BEGIN FOR rec IN SELECT DISTINCT ON (test_category) test_category, index_type as winner, geometric_mean_ms, times_fastest, win_rate_pct FROM benchmark_statistics ORDER BY test_category, geometric_mean_ms LOOP RAISE NOTICE '% : % (GeoMean: % ms, Won %/% percent tests)', RPAD(rec.test_category, 20), RPAD(rec.winner, 18), LPAD(rec.geometric_mean_ms::TEXT, 8), LPAD(rec.times_fastest::TEXT, 2), LPAD(rec.win_rate_pct::TEXT, 5); END LOOP; END $$; -- Detailed Results by Test (Sample) DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'TOP 20 FASTEST INDIVIDUAL TESTS'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ DECLARE rec RECORD; counter INTEGER := 0; BEGIN FOR rec IN SELECT test_category, test_name, pattern, index_type, result_count, ROUND(execution_time_ms::numeric, 4) as exec_time_ms, ROUND(selectivity::numeric, 6) as selectivity FROM benchmark_results ORDER BY execution_time_ms LIMIT 20 LOOP counter := counter + 1; RAISE NOTICE '%. % | % | % | Pattern: % | Time: % ms | Results: % | Sel: %', LPAD(counter::TEXT, 2), RPAD(rec.test_category, 18), RPAD(rec.index_type, 18), RPAD(rec.test_name, 25), RPAD(rec.pattern, 12), LPAD(rec.exec_time_ms::TEXT, 8), LPAD(rec.result_count::TEXT, 8), LPAD(rec.selectivity::TEXT, 8); END LOOP; END $$; -- Speedup Analysis DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'SPEEDUP ANALYSIS (vs Sequential Scan)'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ DECLARE rec RECORD; prev_category TEXT := ''; BEGIN FOR rec IN WITH baseline AS ( SELECT test_category, test_name, execution_time_ms as seq_time FROM benchmark_results WHERE index_type = 'Sequential Scan' ) SELECT br.test_category, br.index_type, COUNT(*) as tests, ROUND(AVG(b.seq_time / NULLIF(br.execution_time_ms, 0))::numeric, 2) as avg_speedup, ROUND(MIN(b.seq_time / NULLIF(br.execution_time_ms, 0))::numeric, 2) as min_speedup, ROUND(MAX(b.seq_time / NULLIF(br.execution_time_ms, 0))::numeric, 2) as max_speedup FROM benchmark_results br JOIN baseline b ON br.test_category = b.test_category AND br.test_name = b.test_name WHERE br.index_type != 'Sequential Scan' GROUP BY br.test_category, br.index_type ORDER BY br.test_category, avg_speedup DESC LOOP IF prev_category != rec.test_category THEN RAISE NOTICE ''; RAISE NOTICE '--- % ---', rec.test_category; prev_category := rec.test_category; END IF; RAISE NOTICE ' % | Avg Speedup: %x | Range: %x - %x', RPAD(rec.index_type, 18), LPAD(rec.avg_speedup::TEXT, 6), LPAD(rec.min_speedup::TEXT, 6), LPAD(rec.max_speedup::TEXT, 6); END LOOP; END $$; -- Variance Analysis DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'CONSISTENCY ANALYSIS'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ DECLARE rec RECORD; BEGIN FOR rec IN SELECT index_type, ROUND(AVG(stddev_ms)::numeric, 4) as avg_stddev, ROUND(AVG(coeff_variation_pct)::numeric, 2) as avg_cv_pct, CASE WHEN AVG(coeff_variation_pct) < 20 THEN 'Very Consistent' WHEN AVG(coeff_variation_pct) < 50 THEN 'Consistent' WHEN AVG(coeff_variation_pct) < 100 THEN 'Moderate Variance' ELSE 'High Variance' END as consistency_rating FROM benchmark_statistics WHERE test_count > 1 -- Only show for index types with multiple tests GROUP BY index_type ORDER BY avg_cv_pct LOOP RAISE NOTICE '% | Avg StdDev: % ms | Avg CV: % percent | Rating: %', RPAD(rec.index_type, 20), LPAD(rec.avg_stddev::TEXT, 8), LPAD(rec.avg_cv_pct::TEXT, 6), rec.consistency_rating; END LOOP; END $$; -- Selectivity Impact DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'SELECTIVITY IMPACT ANALYSIS'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ DECLARE rec RECORD; prev_range TEXT := ''; BEGIN FOR rec IN SELECT CASE WHEN selectivity < 0.001 THEN 'Very Selective (<0.1%)' WHEN selectivity < 0.01 THEN 'Selective (0.1-1%)' WHEN selectivity < 0.1 THEN 'Moderate (1-10%)' WHEN selectivity < 0.5 THEN 'Broad (10-50%)' ELSE 'Very Broad (>50%)' END as selectivity_range, CASE WHEN selectivity < 0.001 THEN 1 WHEN selectivity < 0.01 THEN 2 WHEN selectivity < 0.1 THEN 3 WHEN selectivity < 0.5 THEN 4 ELSE 5 END as sel_order, index_type, COUNT(*) as test_count, ROUND(AVG(execution_time_ms)::numeric, 4) as avg_time_ms, ROUND(AVG(selectivity * 100)::numeric, 2) as avg_selectivity_pct FROM benchmark_results WHERE index_type != 'Sequential Scan' GROUP BY CASE WHEN selectivity < 0.001 THEN 'Very Selective (<0.1%)' WHEN selectivity < 0.01 THEN 'Selective (0.1-1%)' WHEN selectivity < 0.1 THEN 'Moderate (1-10%)' WHEN selectivity < 0.5 THEN 'Broad (10-50%)' ELSE 'Very Broad (>50%)' END, CASE WHEN selectivity < 0.001 THEN 1 WHEN selectivity < 0.01 THEN 2 WHEN selectivity < 0.1 THEN 3 WHEN selectivity < 0.5 THEN 4 ELSE 5 END, index_type ORDER BY sel_order, avg_time_ms LOOP IF prev_range != rec.selectivity_range THEN RAISE NOTICE ''; RAISE NOTICE '--- % ---', rec.selectivity_range; prev_range := rec.selectivity_range; END IF; RAISE NOTICE ' % | Tests: % | Avg Time: % ms | Avg Selectivity: % percent percent', RPAD(rec.index_type, 18), LPAD(rec.test_count::TEXT, 2), LPAD(rec.avg_time_ms::TEXT, 8), LPAD(rec.avg_selectivity_pct::TEXT, 6); END LOOP; END $$; -- Index Size Analysis (FIXED) DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'INDEX SIZE ANALYSIS'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ DECLARE rec RECORD; table_size TEXT; BEGIN -- Show all indexes with proper sizing FOR rec IN SELECT COALESCE(i.indexname, c.relname) as index_name, pg_size_pretty(pg_relation_size(c.oid)) as index_size, pg_relation_size(c.oid) as size_bytes, CASE WHEN i.indexname IS NULL THEN 'Primary Key' WHEN i.indexname LIKE '%biscuit%' THEN 'Biscuit' WHEN i.indexname LIKE '%trgm%' THEN 'pg_trgm GIN' WHEN i.indexname LIKE '%btree%' THEN 'B-Tree' ELSE 'Other' END as index_type_label FROM pg_class c LEFT JOIN pg_indexes i ON c.relname = i.indexname WHERE c.relname IN ( SELECT indexname FROM pg_indexes WHERE tablename = 'benchmark_data' UNION SELECT 'benchmark_data_pkey' ) ORDER BY pg_relation_size(c.oid) DESC LOOP RAISE NOTICE '% (%) : %', RPAD(rec.index_name, 25), RPAD(rec.index_type_label, 12), rec.index_size; END LOOP; -- Table size SELECT pg_size_pretty(pg_total_relation_size('benchmark_data')) INTO table_size; RAISE NOTICE ''; RAISE NOTICE 'Total table size (with indexes): %', table_size; -- Data-only size SELECT pg_size_pretty(pg_relation_size('benchmark_data')) INTO table_size; RAISE NOTICE 'Table data size (no indexes): %', table_size; END $$; -- Performance Summary and Recommendations DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'KEY FINDINGS & RECOMMENDATIONS'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ DECLARE biscuit_geomean NUMERIC; trgm_geomean NUMERIC; btree_geomean NUMERIC; seq_geomean NUMERIC; biscuit_wins INTEGER; trgm_wins INTEGER; btree_wins INTEGER; total_tests INTEGER; BEGIN -- Get overall geometric means SELECT ROUND(EXP(AVG(LN(NULLIF(execution_time_ms, 0))))::numeric, 4) INTO biscuit_geomean FROM benchmark_results WHERE index_type = 'Biscuit'; SELECT ROUND(EXP(AVG(LN(NULLIF(execution_time_ms, 0))))::numeric, 4) INTO trgm_geomean FROM benchmark_results WHERE index_type = 'pg_trgm'; SELECT ROUND(EXP(AVG(LN(NULLIF(execution_time_ms, 0))))::numeric, 4) INTO btree_geomean FROM benchmark_results WHERE index_type LIKE 'B-Tree%'; SELECT ROUND(EXP(AVG(LN(NULLIF(execution_time_ms, 0))))::numeric, 4) INTO seq_geomean FROM benchmark_results WHERE index_type = 'Sequential Scan'; -- Get win counts SELECT COUNT(*) INTO total_tests FROM (SELECT DISTINCT test_category, test_name FROM benchmark_results) t; WITH rankings AS ( SELECT test_category, test_name, index_type, RANK() OVER (PARTITION BY test_category, test_name ORDER BY execution_time_ms) as rank FROM benchmark_results ) SELECT SUM(CASE WHEN index_type = 'Biscuit' AND rank = 1 THEN 1 ELSE 0 END), SUM(CASE WHEN index_type = 'pg_trgm' AND rank = 1 THEN 1 ELSE 0 END), SUM(CASE WHEN index_type LIKE 'B-Tree%' AND rank = 1 THEN 1 ELSE 0 END) INTO biscuit_wins, trgm_wins, btree_wins FROM rankings; RAISE NOTICE ''; RAISE NOTICE '1. OVERALL PERFORMANCE:'; RAISE NOTICE ' - Biscuit: % ms geometric mean, % wins (% percent percent)', biscuit_geomean, biscuit_wins, ROUND(biscuit_wins::NUMERIC / total_tests * 100, 1); RAISE NOTICE ' - pg_trgm: % ms geometric mean, % wins (% percent percent)', trgm_geomean, trgm_wins, ROUND(trgm_wins::NUMERIC / total_tests * 100, 1); RAISE NOTICE ' - B-Tree: % ms geometric mean, % wins (% percent percent)', btree_geomean, btree_wins, ROUND(btree_wins::NUMERIC / total_tests * 100, 1); RAISE NOTICE ' - Sequential: % ms geometric mean', seq_geomean; RAISE NOTICE ''; RAISE NOTICE '2. BEST USE CASES:'; RAISE NOTICE ' - Biscuit excels at: Contains queries, Suffix matches'; RAISE NOTICE ' - pg_trgm excels at: Complex patterns, Case sensitivity'; RAISE NOTICE ' - B-Tree excels at: Exact matches, Prefix matches'; RAISE NOTICE ''; RAISE NOTICE '3. PERFORMANCE CHARACTERISTICS:'; IF biscuit_geomean < trgm_geomean THEN RAISE NOTICE ' - Biscuit is %x faster than pg_trgm overall (geometric mean)', ROUND((trgm_geomean / biscuit_geomean)::numeric, 2); ELSE RAISE NOTICE ' - pg_trgm is %x faster than Biscuit overall (geometric mean)', ROUND((biscuit_geomean / trgm_geomean)::numeric, 2); END IF; IF biscuit_geomean < btree_geomean THEN RAISE NOTICE ' - Biscuit is %x faster than B-Tree overall (geometric mean)', ROUND((btree_geomean / biscuit_geomean)::numeric, 2); ELSE RAISE NOTICE ' - B-Tree is %x faster than Biscuit overall (geometric mean)', ROUND((biscuit_geomean / btree_geomean)::numeric, 2); END IF; RAISE NOTICE ''; RAISE NOTICE '4. INDEX SIZE COMPARISON:'; RAISE NOTICE ' - Run the INDEX SIZE ANALYSIS section above for detailed sizes'; RAISE NOTICE ' - Note: Biscuit may use different storage mechanisms than traditional indexes'; END $$; DO $$ BEGIN RAISE NOTICE ''; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE 'BENCHMARK ANALYSIS COMPLETE'; END $$; DO $$ BEGIN RAISE NOTICE '========================================'; END $$; DO $$ BEGIN RAISE NOTICE ''; END $$; -- Reset settings SET enable_indexscan = ON; SET enable_bitmapscan = ON; SET enable_seqscan = ON; -- Cleanup helper function DROP FUNCTION run_benchmark_test(TEXT, TEXT, TEXT, TEXT, BOOLEAN, BOOLEAN, BOOLEAN, BOOLEAN);