-- Case Study 6: The City Air-Quality Sensor Network -- Setup script – load into a fresh PostgreSQL database: -- psql -d mydb -f setup.sql SET client_encoding = 'UTF8'; CREATE EXTENSION IF NOT EXISTS "uuid-ossp" WITH SCHEMA public; CREATE EXTENSION IF NOT EXISTS provsql WITH SCHEMA public; SET search_path TO public, provsql; DROP TABLE IF EXISTS readings CASCADE; DROP TABLE IF EXISTS historical_readings CASCADE; DROP TABLE IF EXISTS stations CASCADE; DROP TABLE IF EXISTS calibration_status CASCADE; DROP TABLE IF EXISTS categories CASCADE; -- Four monitoring stations across two districts. CREATE TABLE stations ( id TEXT PRIMARY KEY, name TEXT NOT NULL, district TEXT NOT NULL ); INSERT INTO stations VALUES ('s1', 'City Centre', 'centre'), ('s2', 'Riverside Park', 'centre'), ('s3', 'Industrial Estate', 'east'), ('s4', 'Suburban Reference', 'east'); SELECT add_provenance('stations'); -- Per-station calibration probability (Bernoulli). The high-end and -- multi-pass units are well calibrated; the low-cost and drift-prone -- ones less so; the reference station is always in-spec. CREATE TABLE calibration_status ( station_id TEXT PRIMARY KEY REFERENCES stations(id), p DOUBLE PRECISION NOT NULL ); INSERT INTO calibration_status VALUES ('s1', 0.95), ('s2', 0.70), ('s3', 0.60), ('s4', 1.00); SELECT add_provenance('calibration_status'); -- Regulatory categories (deterministic, no provenance). CREATE TABLE categories ( name TEXT PRIMARY KEY, lo DOUBLE PRECISION NOT NULL, hi DOUBLE PRECISION NOT NULL ); INSERT INTO categories VALUES ('Good', 0.0, 12.0), ('Moderate', 12.1, 35.0), ('Unhealthy', 35.1, 1000.0); -- Today's readings. Five rows of pm25, one per (station, sample), -- exercising every distribution family plus the implicit numeric cast. CREATE TABLE readings ( id INTEGER PRIMARY KEY, station_id TEXT NOT NULL REFERENCES stations(id), ts TIMESTAMP NOT NULL, pm25 provsql.random_variable NOT NULL ); INSERT INTO readings (id, station_id, ts, pm25) VALUES (1, 's1', '2026-05-12 08:00', provsql.normal(28.0, 2.0)), -- high-end Gaussian (2, 's2', '2026-05-12 08:00', provsql.uniform(10.0, 22.0)), -- low-cost uniform window (3, 's3', '2026-05-12 08:00', provsql.exponential(0.04)), -- drift-prone, mean = 25 (4, 's4', '2026-05-12 08:00', 15.0), -- reference (implicit cast) (5, 's1', '2026-05-12 09:00', provsql.normal(40.0, 4.0)), -- high-end, into Unhealthy (6, 's2', '2026-05-12 09:00', provsql.uniform(12.0, 24.0)), -- low-cost, into Moderate (7, 's3', '2026-05-12 09:00', provsql.erlang(3, 0.1)), -- multi-pass Erlang, mean = 30 (8, 's4', '2026-05-12 09:00', 16.5); -- reference SELECT add_provenance('readings'); -- Yesterday's batch, used for the UNION ALL step. Same shape, slightly -- different distributions (a heatwave bumped the means). CREATE TABLE historical_readings ( id INTEGER PRIMARY KEY, station_id TEXT NOT NULL REFERENCES stations(id), ts TIMESTAMP NOT NULL, pm25 provsql.random_variable NOT NULL ); INSERT INTO historical_readings (id, station_id, ts, pm25) VALUES (1, 's1', '2026-05-11 08:00', provsql.normal(34.0, 2.5)), (2, 's2', '2026-05-11 08:00', provsql.uniform(15.0, 28.0)), (3, 's3', '2026-05-11 08:00', provsql.exponential(0.03)), (4, 's4', '2026-05-11 08:00', 18.0), (5, 's1', '2026-05-11 09:00', provsql.normal(42.0, 3.0)), (6, 's2', '2026-05-11 09:00', provsql.uniform(20.0, 35.0)), (7, 's3', '2026-05-11 09:00', provsql.erlang(3, 0.08)), (8, 's4', '2026-05-11 09:00', 19.5); SELECT add_provenance('historical_readings'); -- A provenance mapping so the Studio eval-strip's sr_formula and -- PROV-XML export can label leaves with station names rather than -- raw UUIDs. DROP TABLE IF EXISTS station_mapping; CREATE TABLE station_mapping AS SELECT s.name AS value, r.provsql AS provenance FROM readings r JOIN stations s ON s.id = r.station_id; SELECT remove_provenance('station_mapping'); CREATE INDEX ON station_mapping(provenance);