-- -- Test chunk filtering in columnar using min/max values in stripe skip lists. -- -- -- filtered_row_count returns number of rows filtered by the WHERE clause. -- If chunks get filtered by columnar, less rows are passed to WHERE -- clause, so this function should return a lower number. -- CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS $$ DECLARE result bigint; rec text; BEGIN result := 0; FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP IF rec ~ '^\s+Rows Removed by Filter' then result := regexp_replace(rec, '[^0-9]*', '', 'g'); END IF; END LOOP; RETURN result; END; $$ LANGUAGE PLPGSQL; set columnar.qual_pushdown_correlation = 0.0; -- Create and load data -- chunk_group_row_limit '1000', stripe_row_limit '2000' set columnar.stripe_row_limit = 2000; set columnar.chunk_group_row_limit = 1000; CREATE TABLE test_chunk_filtering (a int) USING columnar; INSERT INTO test_chunk_filtering SELECT generate_series(1,10000); -- Verify that filtered_row_count is less than 1000 for the following queries SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering'); SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 200'); SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a > 200'); SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 9900'); SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a > 9900'); SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 0'); -- Verify that filtered_row_count is less than 2000 for the following queries SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a BETWEEN 1 AND 10'); SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a BETWEEN 990 AND 2010'); SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a BETWEEN -10 AND 0'); -- Load data for second time and verify that filtered_row_count is exactly twice as before INSERT INTO test_chunk_filtering SELECT generate_series(1,10000); SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 200'); SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 0'); SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a BETWEEN 990 AND 2010'); set columnar.stripe_row_limit to default; set columnar.chunk_group_row_limit to default; -- Verify that we are fine with collations which use a different alphabet order CREATE TABLE collation_chunk_filtering_test(A text collate "da_DK") USING columnar; COPY collation_chunk_filtering_test FROM STDIN; A Å B \. SELECT * FROM collation_chunk_filtering_test WHERE A > 'B'; CREATE TABLE simple_chunk_filtering(i int) USING COLUMNAR; INSERT INTO simple_chunk_filtering SELECT generate_series(0,234567); EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM simple_chunk_filtering WHERE i > 123456; SET columnar.enable_qual_pushdown = false; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM simple_chunk_filtering WHERE i > 123456; SET columnar.enable_qual_pushdown TO DEFAULT; -- https://github.com/citusdata/citus/issues/4555 TRUNCATE simple_chunk_filtering; INSERT INTO simple_chunk_filtering SELECT generate_series(0,200000); COPY (SELECT * FROM simple_chunk_filtering WHERE i > 180000) TO '/dev/null'; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM simple_chunk_filtering WHERE i > 180000; DROP TABLE simple_chunk_filtering; CREATE TABLE multi_column_chunk_filtering(a int, b int) USING columnar; INSERT INTO multi_column_chunk_filtering SELECT i,i+1 FROM generate_series(0,234567) i; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT count(*) FROM multi_column_chunk_filtering WHERE a > 50000; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT count(*) FROM multi_column_chunk_filtering WHERE a > 50000 AND b > 50000; -- make next tests faster TRUNCATE multi_column_chunk_filtering; INSERT INTO multi_column_chunk_filtering SELECT generate_series(0,5); EXPLAIN (analyze on, costs off, timing off, summary off) SELECT b FROM multi_column_chunk_filtering WHERE a > 50000 AND b > 50000; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT b, a FROM multi_column_chunk_filtering WHERE b > 50000; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT FROM multi_column_chunk_filtering WHERE a > 50000; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT FROM multi_column_chunk_filtering; BEGIN; ALTER TABLE multi_column_chunk_filtering DROP COLUMN a; ALTER TABLE multi_column_chunk_filtering DROP COLUMN b; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM multi_column_chunk_filtering; ROLLBACK; CREATE TABLE another_columnar_table(x int, y int) USING columnar; INSERT INTO another_columnar_table SELECT generate_series(0,5); EXPLAIN (analyze on, costs off, timing off, summary off) SELECT a, y FROM multi_column_chunk_filtering, another_columnar_table WHERE x > 1; EXPLAIN (costs off, timing off, summary off) SELECT y, * FROM another_columnar_table; EXPLAIN (costs off, timing off, summary off) SELECT *, x FROM another_columnar_table; EXPLAIN (costs off, timing off, summary off) SELECT y, another_columnar_table FROM another_columnar_table; EXPLAIN (costs off, timing off, summary off) SELECT another_columnar_table, x FROM another_columnar_table; DROP TABLE multi_column_chunk_filtering, another_columnar_table; -- -- https://github.com/citusdata/citus/issues/4780 -- create table part_table (id int) partition by range (id); create table part_1_row partition of part_table for values from (150000) to (160000); create table part_2_columnar partition of part_table for values from (0) to (150000) using columnar; insert into part_table select generate_series(1,159999); select filtered_row_count('select count(*) from part_table where id > 75000'); drop table part_table; -- test join parameterization set columnar.stripe_row_limit = 2000; set columnar.chunk_group_row_limit = 1000; create table r1(id1 int, n1 int); -- row create table r2(id2 int, n2 int); -- row create table r3(id3 int, n3 int); -- row create table r4(id4 int, n4 int); -- row create table r5(id5 int, n5 int); -- row create table r6(id6 int, n6 int); -- row create table r7(id7 int, n7 int); -- row create table coltest(id int, x1 int, x2 int, x3 int) using columnar; create table coltest_part(id int, x1 int, x2 int, x3 int) partition by range (id); create table coltest_part0 partition of coltest_part for values from (0) to (10000) using columnar; create table coltest_part1 partition of coltest_part for values from (10000) to (20000); -- row set columnar.stripe_row_limit to default; set columnar.chunk_group_row_limit to default; insert into r1 values(1234, 12350); insert into r1 values(4567, 45000); insert into r1 values(9101, 176000); insert into r1 values(14202, 7); insert into r1 values(18942, 189430); insert into r2 values(1234, 123502); insert into r2 values(4567, 450002); insert into r2 values(9101, 1760002); insert into r2 values(14202, 72); insert into r2 values(18942, 1894302); insert into r3 values(1234, 1235075); insert into r3 values(4567, 4500075); insert into r3 values(9101, 17600075); insert into r3 values(14202, 775); insert into r3 values(18942, 18943075); insert into r4 values(1234, -1); insert into r5 values(1234, -1); insert into r6 values(1234, -1); insert into r7 values(1234, -1); insert into coltest select g, g*10, g*100, g*1000 from generate_series(0, 19999) g; insert into coltest_part select g, g*10, g*100, g*1000 from generate_series(0, 19999) g; ANALYZE r1, r2, r3, coltest, coltest_part; -- force nested loop set enable_mergejoin=false; set enable_hashjoin=false; set enable_material=false; -- test different kinds of expressions EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM r1, coltest WHERE id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0; SELECT * FROM r1, coltest WHERE id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0; -- test equivalence classes EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND id4 = id5 AND id5 = id6 AND id6 = id7; SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND id4 = id5 AND id5 = id6 AND id6 = id7; -- test path generation with different thresholds set columnar.planner_debug_level = 'notice'; set columnar.max_custom_scan_paths to 10; EXPLAIN (costs off, timing off, summary off) SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE c1.id = c2.id and c1.id = c3.id and c1.id = c4.id; set columnar.max_custom_scan_paths to 2; EXPLAIN (costs off, timing off, summary off) SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE c1.id = c2.id and c1.id = c3.id and c1.id = c4.id; set columnar.max_custom_scan_paths to default; set columnar.planner_debug_level to default; -- test more complex parameterization set columnar.planner_debug_level = 'notice'; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM r1, r2, r3, coltest WHERE id1 = id2 AND id2 = id3 AND id3 = id AND n1 > x1 AND n2 > x2 AND n3 > x3; set columnar.planner_debug_level to default; SELECT * FROM r1, r2, r3, coltest WHERE id1 = id2 AND id2 = id3 AND id3 = id AND n1 > x1 AND n2 > x2 AND n3 > x3; -- test partitioning parameterization EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM r1, coltest_part WHERE id1 = id AND n1 > x1; SELECT * FROM r1, coltest_part WHERE id1 = id AND n1 > x1; set enable_mergejoin to default; set enable_hashjoin to default; set enable_material to default; set columnar.planner_debug_level = 'notice'; alter table coltest add column x5 int default (random()*20000)::int; analyze coltest; -- test that expressions on whole-row references are not pushed down select * from coltest where coltest = (1,1,1,1); -- test that expressions on uncorrelated attributes are not pushed down set columnar.qual_pushdown_correlation to default; select * from coltest where x5 = 23484; -- test that expressions on volatile functions are not pushed down create function vol() returns int language plpgsql as $$ BEGIN RETURN 1; END; $$; select * from coltest where x3 = vol(); EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM coltest c1 WHERE ceil(x1) > 4222; set columnar.planner_debug_level to default; -- -- https://github.com/citusdata/citus/issues/4488 -- create table columnar_prepared_stmt (x int, y int) using columnar; insert into columnar_prepared_stmt select s, s from generate_series(1,5000000) s; prepare foo (int) as select x from columnar_prepared_stmt where x = $1; execute foo(3); execute foo(3); execute foo(3); execute foo(3); select filtered_row_count('execute foo(3)'); select filtered_row_count('execute foo(3)'); select filtered_row_count('execute foo(3)'); select filtered_row_count('execute foo(3)'); drop table columnar_prepared_stmt; -- -- https://github.com/citusdata/citus/issues/5258 -- set default_table_access_method to columnar; CREATE TABLE atest1 ( a int, b text ); CREATE TABLE atest2 (col1 varchar(10), col2 boolean); INSERT INTO atest1 VALUES (1, 'one'); SELECT * FROM atest1; -- ok SELECT * FROM atest2; -- ok INSERT INTO atest1 VALUES (2, 'two'); -- ok INSERT INTO atest1 SELECT 1, b FROM atest1; -- ok SELECT * FROM atest2 WHERE ( col1 IN ( SELECT b FROM atest1 ) ); CREATE TABLE t1 (name TEXT, n INTEGER); CREATE TABLE t2 (name TEXT, n INTEGER); CREATE TABLE t3 (name TEXT, n INTEGER); INSERT INTO t1 VALUES ( 'bb', 11 ); INSERT INTO t2 VALUES ( 'bb', 12 ); INSERT INTO t2 VALUES ( 'cc', 22 ); INSERT INTO t2 VALUES ( 'ee', 42 ); INSERT INTO t3 VALUES ( 'bb', 13 ); INSERT INTO t3 VALUES ( 'cc', 23 ); INSERT INTO t3 VALUES ( 'dd', 33 ); SELECT * FROM (SELECT name, n as s1_n, 1 as s1_1 FROM t1) as s1 NATURAL INNER JOIN (SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2 NATURAL INNER JOIN (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3; CREATE TABLE numrange_test (nr NUMRANGE); INSERT INTO numrange_test VALUES('[,)'); INSERT INTO numrange_test VALUES('[3,]'); INSERT INTO numrange_test VALUES('[, 5)'); INSERT INTO numrange_test VALUES(numrange(1.1, 2.2)); INSERT INTO numrange_test VALUES('empty'); INSERT INTO numrange_test VALUES(numrange(1.7, 1.7, '[]')); create table numrange_test2(nr numrange); INSERT INTO numrange_test2 VALUES('[, 5)'); INSERT INTO numrange_test2 VALUES(numrange(1.1, 2.2)); INSERT INTO numrange_test2 VALUES(numrange(1.1, 2.2)); INSERT INTO numrange_test2 VALUES(numrange(1.1, 2.2,'()')); INSERT INTO numrange_test2 VALUES('empty'); set enable_nestloop=t; set enable_hashjoin=f; set enable_mergejoin=f; select * from numrange_test natural join numrange_test2 order by nr; DROP TABLE atest1, atest2, t1, t2, t3, numrange_test, numrange_test2; set default_table_access_method to default; set columnar.planner_debug_level to notice; BEGIN; SET LOCAL columnar.stripe_row_limit = 2000; SET LOCAL columnar.chunk_group_row_limit = 1000; create table pushdown_test (a int, b int) using columnar; insert into pushdown_test values (generate_series(1, 200000)); COMMIT; SET columnar.max_custom_scan_paths TO 50; SET columnar.qual_pushdown_correlation_threshold TO 0.0; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a = 104356 or a = 76556; SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a = 104356 or a = 76556; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT sum(a) FROM pushdown_test WHERE a = 194356 or a = 104356 or a = 76556; SELECT sum(a) FROM pushdown_test WHERE a = 194356 or a = 104356 or a = 76556; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a > a*-1 + b; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT sum(a) FROM pushdown_test where (a > 1000 and a < 10000) or (a > 20000 and a < 50000); SELECT sum(a) FROM pushdown_test where (a > 1000 and a < 10000) or (a > 20000 and a < 50000); EXPLAIN (analyze on, costs off, timing off, summary off) SELECT sum(a) FROM pushdown_test where (a > random() and a < 2*a) or (a > 100); SELECT sum(a) FROM pushdown_test where (a > random() and a < 2*a) or (a > 100); EXPLAIN (analyze on, costs off, timing off, summary off) SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010); SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010); SET hash_mem_multiplier = 1.0; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT sum(a) FROM pushdown_test where ( a > random() and ( (a < 200 and a not in (select a from pushdown_test)) or (a > 1000 and a < 2000) ) ) or (a > 200000-2010); RESET hash_mem_multiplier; SELECT sum(a) FROM pushdown_test where ( a > random() and ( (a < 200 and a not in (select a from pushdown_test)) or (a > 1000 and a < 2000) ) ) or (a > 200000-2010); create function stable_1(arg int) returns int language plpgsql STRICT IMMUTABLE as $$ BEGIN RETURN 1+arg; END; $$; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT sum(a) FROM pushdown_test where (a = random() and a < stable_1(a) and a < stable_1(6000)); SELECT sum(a) FROM pushdown_test where (a = random() and a < stable_1(a) and a < stable_1(6000)); RESET columnar.max_custom_scan_paths; RESET columnar.qual_pushdown_correlation_threshold; RESET columnar.planner_debug_level; DROP TABLE pushdown_test; -- https://github.com/citusdata/citus/issues/5803 CREATE TABLE pushdown_test(id int, country text) using columnar; BEGIN; INSERT INTO pushdown_test VALUES(1, 'AL'); INSERT INTO pushdown_test VALUES(2, 'AU'); END; BEGIN; INSERT INTO pushdown_test VALUES(3, 'BR'); INSERT INTO pushdown_test VALUES(4, 'BT'); END; BEGIN; INSERT INTO pushdown_test VALUES(5, 'PK'); INSERT INTO pushdown_test VALUES(6, 'PA'); END; BEGIN; INSERT INTO pushdown_test VALUES(7, 'USA'); INSERT INTO pushdown_test VALUES(8, 'ZW'); END; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT id FROM pushdown_test WHERE country IN ('USA', 'BR', 'ZW'); SELECT id FROM pushdown_test WHERE country IN ('USA', 'BR', 'ZW'); -- test for volatile functions with IN CREATE FUNCTION volatileFunction() returns TEXT language plpgsql AS $$ BEGIN return 'AL'; END; $$; EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM pushdown_test WHERE country IN ('USA', 'ZW', volatileFunction()); SELECT * FROM pushdown_test WHERE country IN ('USA', 'ZW', volatileFunction()); DROP TABLE pushdown_test;