-- -- MULTI_HAVING_PUSHDOWN -- SET citus.next_shard_id TO 590000; CREATE TABLE lineitem_hash (LIKE lineitem); SELECT create_distributed_table('lineitem_hash', 'l_orderkey', 'hash'); create_distributed_table --------------------------------------------------------------------- (1 row) CREATE TABLE orders_hash (LIKE orders); SELECT create_distributed_table('orders_hash', 'o_orderkey', 'hash'); create_distributed_table --------------------------------------------------------------------- (1 row) -- push down when table is distributed by hash and grouped by partition column EXPLAIN (COSTS FALSE) SELECT l_orderkey, sum(l_extendedprice * l_discount) as revenue FROM lineitem_hash GROUP BY l_orderkey HAVING sum(l_quantity) > 24 ORDER BY 2 DESC, 1 ASC LIMIT 3; QUERY PLAN --------------------------------------------------------------------- Limit -> Sort Sort Key: remote_scan.revenue DESC, remote_scan.l_orderkey -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Limit -> Sort Sort Key: (sum((l_extendedprice * l_discount))) DESC, l_orderkey -> HashAggregate Group Key: l_orderkey Filter: (sum(l_quantity) > '24'::numeric) -> Seq Scan on lineitem_hash_590000 lineitem_hash (15 rows) -- but don't push down when table is distributed by append EXPLAIN (COSTS FALSE) SELECT l_orderkey, sum(l_extendedprice * l_discount) as revenue FROM lineitem GROUP BY l_orderkey HAVING sum(l_quantity) > 24 ORDER BY 2 DESC, 1 ASC LIMIT 3; QUERY PLAN --------------------------------------------------------------------- Limit -> Sort Sort Key: (sum(remote_scan.revenue)) DESC, remote_scan.l_orderkey -> HashAggregate Group Key: remote_scan.l_orderkey Filter: (sum(remote_scan.worker_column_3) > '24'::numeric) -> Custom Scan (Citus Adaptive) Task Count: 2 Tasks Shown: One of 2 -> Task Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: l_orderkey -> Seq Scan on lineitem_290000 lineitem (14 rows) -- and don't push down when not grouped by partition column EXPLAIN (COSTS FALSE) SELECT l_shipmode, sum(l_extendedprice * l_discount) as revenue FROM lineitem_hash GROUP BY l_shipmode HAVING sum(l_quantity) > 24 ORDER BY 2 DESC, 1 ASC LIMIT 3; QUERY PLAN --------------------------------------------------------------------- Limit -> Sort Sort Key: (sum(remote_scan.revenue)) DESC, remote_scan.l_shipmode -> HashAggregate Group Key: remote_scan.l_shipmode Filter: (sum(remote_scan.worker_column_3) > '24'::numeric) -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: l_shipmode -> Seq Scan on lineitem_hash_590000 lineitem_hash (14 rows) -- push down if grouped by multiple rows one of which is partition column EXPLAIN (COSTS FALSE) SELECT l_shipmode, l_orderkey, sum(l_extendedprice * l_discount) as revenue FROM lineitem_hash GROUP BY l_shipmode, l_orderkey HAVING sum(l_quantity) > 24 ORDER BY 3 DESC, 1, 2 LIMIT 3; QUERY PLAN --------------------------------------------------------------------- Limit -> Sort Sort Key: remote_scan.revenue DESC, remote_scan.l_shipmode, remote_scan.l_orderkey -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Limit -> Sort Sort Key: (sum((l_extendedprice * l_discount))) DESC, l_shipmode, l_orderkey -> HashAggregate Group Key: l_shipmode, l_orderkey Filter: (sum(l_quantity) > '24'::numeric) -> Seq Scan on lineitem_hash_590000 lineitem_hash (15 rows) -- couple more checks with joins EXPLAIN (COSTS FALSE) SELECT sum(l_extendedprice * l_discount) as revenue FROM lineitem_hash, orders_hash WHERE o_orderkey = l_orderkey GROUP BY l_orderkey, o_orderkey, l_shipmode HAVING sum(l_quantity) > 24 ORDER BY 1 DESC LIMIT 3; QUERY PLAN --------------------------------------------------------------------- Limit -> Sort Sort Key: remote_scan.revenue DESC -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> Limit -> Sort Sort Key: (sum((lineitem_hash.l_extendedprice * lineitem_hash.l_discount))) DESC -> HashAggregate Group Key: lineitem_hash.l_orderkey, orders_hash.o_orderkey, lineitem_hash.l_shipmode Filter: (sum(lineitem_hash.l_quantity) > '24'::numeric) -> Hash Join Hash Cond: (orders_hash.o_orderkey = lineitem_hash.l_orderkey) -> Seq Scan on orders_hash_590004 orders_hash -> Hash -> Seq Scan on lineitem_hash_590000 lineitem_hash (19 rows) EXPLAIN (COSTS FALSE) SELECT sum(l_extendedprice * l_discount) as revenue FROM lineitem_hash, orders_hash WHERE o_orderkey = l_orderkey GROUP BY l_shipmode, o_clerk HAVING sum(l_quantity) > 24 ORDER BY 1 DESC LIMIT 3; QUERY PLAN --------------------------------------------------------------------- Limit -> Sort Sort Key: (sum(remote_scan.revenue)) DESC -> HashAggregate Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3 Filter: (sum(remote_scan.worker_column_4) > '24'::numeric) -> Custom Scan (Citus Adaptive) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: lineitem_hash.l_shipmode, orders_hash.o_clerk -> Hash Join Hash Cond: (orders_hash.o_orderkey = lineitem_hash.l_orderkey) -> Seq Scan on orders_hash_590004 orders_hash -> Hash -> Seq Scan on lineitem_hash_590000 lineitem_hash (18 rows) DROP TABLE lineitem_hash; DROP TABLE orders_hash; SELECT max(value_1) FROM users_table GROUP BY user_id HAVING max(value_2) > 4 AND min(value_2) < 1 ORDER BY 1; max --------------------------------------------------------------------- 4 5 5 (3 rows) SELECT max(value_1) FROM users_table GROUP BY user_id HAVING max(value_2) > 4 AND min(value_2) < 1 OR count(*) > 10 ORDER BY 1; max --------------------------------------------------------------------- 4 5 5 5 (4 rows) SELECT max(value_1) FROM users_table GROUP BY user_id HAVING max(value_2) > 4 AND min(value_2) < 1 AND count(*) > 20 ORDER BY 1; max --------------------------------------------------------------------- 5 5 (2 rows) SELECT max(value_1) FROM users_table GROUP BY user_id HAVING max(value_2) > 0 AND count(*) FILTER (WHERE value_3=2) > 3 AND min(value_2) IN (0,1,2,3); max --------------------------------------------------------------------- 5 (1 row)