-- -- multi subquery in where queries aims to expand existing subquery pushdown -- regression tests to cover more cases specifically subqueries in WHERE clause -- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql -- -- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests -- subqueries in WHERE with greater operator SELECT user_id FROM users_table WHERE value_2 > (SELECT max(value_2) FROM events_table WHERE users_table.user_id = events_table.user_id AND event_type = 1 GROUP BY user_id ) GROUP BY user_id HAVING count(*) > 2 ORDER BY user_id LIMIT 5; -- same query with one additional join on non distribution column SELECT user_id FROM users_table WHERE value_2 > (SELECT max(value_2) FROM events_table WHERE users_table.user_id = events_table.user_id AND event_type = 1 AND users_table.time > events_table.time GROUP BY user_id ) GROUP BY user_id HAVING count(*) > 1 ORDER BY user_id LIMIT 5; -- the other way around is not supported SELECT user_id FROM users_table WHERE value_2 > (SELECT max(value_2) FROM events_table WHERE users_table.user_id > events_table.user_id AND event_type = 1 AND users_table.time = events_table.time GROUP BY user_id ) GROUP BY user_id HAVING count(*) > 1 ORDER BY user_id LIMIT 5; -- subqueries in where with ALL operator SELECT user_id FROM users_table WHERE value_2 > 1 AND value_2 < ALL (SELECT avg(value_3) FROM events_table WHERE users_table.user_id = events_table.user_id GROUP BY user_id) GROUP BY 1 ORDER BY 1 DESC LIMIT 3; -- IN operator on non-partition key SELECT user_id FROM events_table as e1 WHERE event_type IN (SELECT event_type FROM events_table as e2 WHERE value_2 = 1 AND value_3 > 3 AND e1.user_id = e2.user_id ) ORDER BY 1; -- NOT IN on non-partition key SELECT user_id FROM events_table as e1 WHERE event_type NOT IN (SELECT event_type FROM events_table as e2 WHERE value_2 = 1 AND value_3 > 3 AND e1.user_id = e2.user_id ) GROUP BY 1 HAVING count(*) > 2 ORDER BY 1; -- non-correlated query with =ANY on partition keys SELECT user_id, count(*) FROM users_table WHERE user_id =ANY(SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2) GROUP BY 1 ORDER BY 2 DESC LIMIT 5; -- users that appeared more than 118 times SELECT user_id FROM users_table WHERE 2 <= (SELECT count(*) FROM events_table WHERE users_table.user_id = events_table.user_id GROUP BY user_id) GROUP BY user_id ORDER BY user_id; -- the following query doesn't have a meaningful result -- but it is a valid query with an arbitrary subquery in -- WHERE clause SELECT user_id, value_2 FROM users_table WHERE value_1 > 1 AND value_1 < 3 AND value_2 >= 1 AND user_id IN ( SELECT e1.user_id FROM ( -- Get the first time each user viewed the homepage. SELECT user_id, 1 AS view_homepage, min(time) AS view_homepage_time FROM events_table WHERE event_type IN (0, 1) GROUP BY user_id ) e1 LEFT JOIN LATERAL ( SELECT user_id, 1 AS use_demo, time AS use_demo_time FROM events_table WHERE user_id = e1.user_id AND event_type IN (1, 2) ORDER BY time ) e2 ON true LEFT JOIN LATERAL ( SELECT user_id, 1 AS enter_credit_card, time AS enter_credit_card_time FROM events_table WHERE user_id = e2.user_id AND event_type IN (2, 3) ORDER BY time ) e3 ON true LEFT JOIN LATERAL ( SELECT 1 AS submit_card_info, user_id, time AS enter_credit_card_time FROM events_table WHERE user_id = e3.user_id AND event_type IN (3, 4) ORDER BY time ) e4 ON true LEFT JOIN LATERAL ( SELECT 1 AS see_bought_screen FROM events_table WHERE user_id = e4.user_id AND event_type IN (5, 6) ORDER BY time ) e5 ON true group by e1.user_id HAVING sum(submit_card_info) > 0 ) ORDER BY 1, 2; -- similar to the above query -- the following query doesn't have a meaningful result -- but it is a valid query with an arbitrary subquery in -- WHERE clause SELECT user_id FROM users_table WHERE user_id IN ( SELECT user_id FROM ( SELECT subquery_1.user_id, count_pay FROM ( (SELECT users_table.user_id, 'action=>1' AS event, events_table.time FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND users_table.user_id >= 1 AND users_table.user_id <= 3 AND events_table.event_type > 1 AND events_table.event_type < 3 ) UNION (SELECT users_table.user_id, 'action=>2' AS event, events_table.time FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND users_table.user_id >= 1 AND users_table.user_id <= 3 AND events_table.event_type > 2 AND events_table.event_type < 4 ) ) AS subquery_1 LEFT JOIN (SELECT user_id, COUNT(*) AS count_pay FROM users_table WHERE user_id >= 1 AND user_id <= 3 AND users_table.value_1 > 3 AND users_table.value_1 < 5 GROUP BY user_id HAVING COUNT(*) > 1) AS subquery_2 ON subquery_1.user_id = subquery_2.user_id GROUP BY subquery_1.user_id, count_pay) AS subquery_top GROUP BY count_pay, user_id ) GROUP BY user_id HAVING count(*) > 1 AND sum(value_2) > 29 ORDER BY 1; -- the following query doesn't have a meaningful result -- but it is a valid query with an arbitrary subquery in -- FROM clause involving a complex query in WHERE clause SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( SELECT u.user_id, e.event_type::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND u.user_id IN ( SELECT user_id FROM users_table WHERE value_2 >= 1 AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id) AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id = users_table.user_id) ) ) t GROUP BY user_id ) q ORDER BY 2 DESC, 1; -- -- below tests only aims for cases where all relations -- are not joined on partition key -- -- e4 is not joined on the partition key SELECT user_id, value_2 FROM users_table WHERE value_1 > 1 AND value_1 < 2 AND value_2 >= 1 AND user_id IN ( SELECT e1.user_id FROM ( -- Get the first time each user viewed the homepage. SELECT user_id, 1 AS view_homepage, min(time) AS view_homepage_time FROM events_table WHERE event_type IN (0, 1) GROUP BY user_id ) e1 LEFT JOIN LATERAL ( SELECT user_id, 1 AS use_demo, time AS use_demo_time FROM events_table WHERE user_id = e1.user_id AND event_type IN (1, 2) ORDER BY time ) e2 ON true LEFT JOIN LATERAL ( SELECT user_id, 1 AS enter_credit_card, time AS enter_credit_card_time FROM events_table WHERE user_id = e2.user_id AND event_type IN (2, 3) ORDER BY time ) e3 ON true LEFT JOIN LATERAL ( SELECT 1 AS submit_card_info, user_id, time AS enter_credit_card_time FROM events_table WHERE value_2 = e3.user_id AND event_type IN (3, 4) ORDER BY time ) e4 ON true LEFT JOIN LATERAL ( SELECT 1 AS see_bought_screen FROM events_table WHERE user_id = e4.user_id AND event_type IN (5, 6) ORDER BY time ) e5 ON true group by e1.user_id HAVING sum(submit_card_info) > 0 ); -- left leaf query does not return partition key SELECT user_id FROM users_table WHERE user_id IN ( SELECT user_id FROM ( SELECT subquery_1.user_id, count_pay FROM ( (SELECT 2 * users_table.user_id as user_id, 'action=>1' AS event, events_table.time FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND users_table.user_id >= 1 AND users_table.user_id <= 3 AND events_table.event_type > 1 AND events_table.event_type < 3 ) UNION (SELECT users_table.user_id, 'action=>2' AS event, events_table.time FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND users_table.user_id >= 1 AND users_table.user_id <= 3 AND events_table.event_type > 2 AND events_table.event_type < 4 ) ) AS subquery_1 LEFT JOIN (SELECT user_id, COUNT(*) AS count_pay FROM users_table WHERE user_id >= 1 AND user_id <= 3 AND users_table.value_1 > 3 AND users_table.value_1 < 5 GROUP BY user_id HAVING COUNT(*) > 1) AS subquery_2 ON subquery_1.user_id = subquery_2.user_id GROUP BY subquery_1.user_id, count_pay) AS subquery_top GROUP BY count_pay, user_id ) GROUP BY user_id HAVING count(*) > 1 AND sum(value_2) > 29 ORDER BY 1; -- NOT EXISTS query has non-equi join SELECT user_id, array_length(events_table, 1) FROM ( SELECT user_id, array_agg(event ORDER BY time) AS events_table FROM ( SELECT u.user_id, e.event_type::text AS event, e.time FROM users_table AS u, events_table AS e WHERE u.user_id = e.user_id AND u.user_id IN ( SELECT user_id FROM users_table WHERE value_2 >= 5 AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id) AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id != users_table.user_id) ) ) t GROUP BY user_id ) q ORDER BY 2 DESC, 1; -- subquery in where clause doesn't have a relation, but is constant SELECT user_id FROM users_table WHERE value_2 > (SELECT 1) ORDER BY 1 ASC LIMIT 2; -- subquery in where clause has a volatile function and no relation -- thus we recursively plan it SELECT user_id FROM users_table WHERE value_2 > (SELECT random()) AND user_id < 0 ORDER BY 1 ASC LIMIT 2; -- OFFSET is not supported in the subquey SELECT user_id FROM users_table WHERE value_2 > (SELECT max(value_2) FROM events_table WHERE users_table.user_id = events_table.user_id AND event_type = 2 GROUP BY user_id OFFSET 3 ); -- we can detect unsupported subquerues even if they appear -- in WHERE subquery -> FROM subquery -> WHERE subquery -- but we can recursively plan that anyway SELECT DISTINCT user_id FROM users_table WHERE user_id IN (SELECT f_inner.user_id FROM ( SELECT e1.user_id FROM users_table u1, events_table e1 WHERE e1.user_id = u1.user_id ) as f_inner, ( SELECT e1.user_id FROM users_table u1, events_table e1 WHERE e1.user_id = u1.user_id AND e1.user_id IN (SELECT user_id FROM users_table LIMIT 3 ) ) as f_outer WHERE f_inner.user_id = f_outer.user_id ) ORDER BY 1 LIMIT 3; -- semi join is not on the partition key for the third subquery, and recursively planned SET client_min_messages TO DEBUG1; SELECT user_id FROM users_table WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2) AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 3 AND value_1 <= 4) AND value_2 IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6) ORDER BY 1 DESC LIMIT 3; RESET client_min_messages; CREATE FUNCTION test_join_function(integer, integer) RETURNS bool AS 'select $1 > $2;' LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT; -- we disallow JOINs via functions SELECT user_id, value_2 FROM users_table WHERE value_1 = 1 AND value_2 >= 2 AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=1 AND value_3 > 1 AND test_join_function(events_table.user_id, users_table.user_id)) ORDER BY 1 DESC, 2 DESC LIMIT 3; DROP FUNCTION test_join_function(int,int);