-- Shape: GROUP BY aggregate on JOIN -- Join: stackoverflow_posts → comments -- Description: Grouped aggregate (COUNT, SUM) with GROUP BY on the parent -- table's title column. Exercises the DataFusion backend's grouped -- aggregate pipeline including custom_scan_tlist for scanrelid=0. -- Query Info (statistics from 100k dataset; larger datasets may have different values): -- - 'code' selectivity on stackoverflow_posts.body: ~75% -- Postgres default plan (custom scan off) SET paradedb.enable_aggregate_custom_scan TO off; SELECT p.title, COUNT(*), SUM(c.score) FROM stackoverflow_posts p JOIN comments c ON p.id = c.post_id WHERE p.body ||| 'code' GROUP BY p.title ORDER BY p.title; -- DataFusion aggregate scan (temporarily increased from 4GB to 8GB) SET work_mem TO '8GB'; SET paradedb.enable_aggregate_custom_scan TO on; SELECT p.title, COUNT(*), SUM(c.score) FROM stackoverflow_posts p JOIN comments c ON p.id = c.post_id WHERE p.body ||| 'code' GROUP BY p.title ORDER BY p.title; -- MPP aggregate scan (GroupByAggOnBinaryJoin shape; default mpp_worker_count=4) SET statement_timeout TO '600s'; SET work_mem TO '8GB'; SET paradedb.enable_aggregate_custom_scan TO on; SET paradedb.enable_mpp TO on; SELECT p.title, COUNT(*), SUM(c.score) FROM stackoverflow_posts p JOIN comments c ON p.id = c.post_id WHERE p.body ||| 'code' GROUP BY p.title ORDER BY p.title;