SET datestyle = 'ISO'; CREATE SERVER binary_json_loopback FOREIGN DATA WRAPPER clickhouse_fdw OPTIONS(dbname 'json_test', driver 'binary'); CREATE SERVER http_json_loopback FOREIGN DATA WRAPPER clickhouse_fdw OPTIONS(dbname 'json_test', driver 'http'); CREATE USER MAPPING FOR CURRENT_USER SERVER binary_json_loopback; CREATE USER MAPPING FOR CURRENT_USER SERVER http_json_loopback; SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS json_test'); clickhouse_raw_query ---------------------- (1 row) SELECT clickhouse_raw_query('CREATE DATABASE json_test'); clickhouse_raw_query ---------------------- (1 row) SELECT clickhouse_raw_query($$ CREATE TABLE json_test.things ( id Int32 NOT NULL, data JSON NOT NULL ) ENGINE = MergeTree PARTITION BY id ORDER BY (id); $$); clickhouse_raw_query ---------------------- (1 row) CREATE SCHEMA json_bin; CREATE SCHEMA json_http; IMPORT FOREIGN SCHEMA "json_test" FROM SERVER binary_json_loopback INTO json_bin; \d json_bin.things Foreign table "json_bin.things" Column | Type | Collation | Nullable | Default | FDW options --------+---------+-----------+----------+---------+------------- id | integer | | not null | | data | jsonb | | not null | | Server: binary_json_loopback FDW options: (database 'json_test', table_name 'things', engine 'MergeTree') IMPORT FOREIGN SCHEMA "json_test" FROM SERVER http_json_loopback INTO json_http; \d json_http.things Foreign table "json_http.things" Column | Type | Collation | Nullable | Default | FDW options --------+---------+-----------+----------+---------+------------- id | integer | | not null | | data | jsonb | | not null | | Server: http_json_loopback FDW options: (database 'json_test', table_name 'things', engine 'MergeTree') -- Fails pending https://github.com/ClickHouse/clickhouse-cpp/issues/422 INSERT INTO json_bin.things VALUES (1, '{"id": 1, "name": "widget", "size": "large", "stocked": true}'), (2, '{"id": 2, "name": "sprocket", "size": "small", "stocked": true}') ; ERROR: pg_clickhouse: could not prepare insert - unsupported column type: JSON INSERT INTO json_http.things VALUES (1, '{"id": 1, "name": "widget", "size": "large", "stocked": true}'), (2, '{"id": 2, "name": "sprocket", "size": "small", "stocked": true}'), (3, '{"id": 3, "name": "gizmo", "size": "medium", "stocked": true}'), (4, '{"id": 4, "name": "doodad", "size": "large", "stocked": false}') ; SELECT * FROM json_bin.things ORDER BY id; ERROR: pg_clickhouse: unsupported column type: JSON DETAIL: Remote Query: SELECT id, data FROM json_test.things ORDER BY id ASC NULLS LAST SELECT * FROM json_http.things ORDER BY id; id | data ----+----------------------------------------------------------------- 1 | {"id": 1, "name": "widget", "size": "large", "stocked": true} 2 | {"id": 2, "name": "sprocket", "size": "small", "stocked": true} 3 | {"id": 3, "name": "gizmo", "size": "medium", "stocked": true} 4 | {"id": 4, "name": "doodad", "size": "large", "stocked": false} (4 rows) -- Subscript access on JSON columns must not be pushed down to ClickHouse. -- ClickHouse JSON does not support the jsonb `column['key']` syntax (it -- requires dot notation), so subscripts must be evaluated locally by -- PostgreSQL. EXPLAIN (VERBOSE, COSTS OFF) SELECT data['name'] FROM json_http.things; QUERY PLAN ------------------------------------------------- Foreign Scan on json_http.things Output: data['name'::text] Remote SQL: SELECT data FROM json_test.things (3 rows) SELECT data['name'] FROM json_http.things ORDER BY id; data ------------ "widget" "sprocket" "gizmo" "doodad" (4 rows) -- DISTINCT forces an ORDER BY or HashAgg; the subscript must stay local. EXPLAIN (VERBOSE, COSTS OFF) SELECT DISTINCT data['size'] FROM json_http.things; QUERY PLAN ------------------------------------------------------- HashAggregate Output: (data['size'::text]) Group Key: things.data['size'::text] -> Foreign Scan on json_http.things Output: data['size'::text] Remote SQL: SELECT data FROM json_test.things (6 rows) SELECT DISTINCT data['size'] FROM json_http.things; data ---------- "medium" "small" "large" (3 rows) -- GROUP BY with a JSON subscript expression. EXPLAIN (VERBOSE, COSTS OFF) SELECT data['size'], count(*) FROM json_http.things GROUP BY data['size']; QUERY PLAN ------------------------------------------------------- HashAggregate Output: (data['size'::text]), count(*) Group Key: things.data['size'::text] -> Foreign Scan on json_http.things Output: data['size'::text] Remote SQL: SELECT data FROM json_test.things (6 rows) SELECT data['size'], count(*) FROM json_http.things GROUP BY data['size']; data | count ----------+------- "medium" | 1 "small" | 1 "large" | 2 (3 rows) -- The jsonb ->> operator is pushed down in WHERE / ORDER BY clauses, but -- target-list expressions are evaluated locally (PostgreSQL fetches the whole -- column and applies the operator after). This query runs -> locally. -- N.B.: Binary driver JSON data not yet supported. EXPLAIN (VERBOSE, COSTS OFF) SELECT data ->> 'name' FROM json_http.things; QUERY PLAN ------------------------------------------------- Foreign Scan on json_http.things Output: (data ->> 'name'::text) Remote SQL: SELECT data FROM json_test.things (3 rows) SELECT data ->> 'name' FROM json_http.things ORDER BY id; ?column? ---------- widget sprocket gizmo doodad (4 rows) -- WHERE clause with ->> equality must be pushed down. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.things WHERE data ->> 'name' = 'widget'; QUERY PLAN ------------------------------------------------------------------------------------ Foreign Scan on json_http.things Output: id, data Remote SQL: SELECT id, data FROM json_test.things WHERE ((data.name = 'widget')) (3 rows) SELECT * FROM json_http.things WHERE data ->> 'name' = 'widget'; id | data ----+--------------------------------------------------------------- 1 | {"id": 1, "name": "widget", "size": "large", "stocked": true} (1 row) -- WHERE clause with ->> and LIKE. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.things WHERE data ->> 'name' LIKE 'wid%'; QUERY PLAN ------------------------------------------------------------------------------------- Foreign Scan on json_http.things Output: id, data Remote SQL: SELECT id, data FROM json_test.things WHERE ((data.name LIKE 'wid%')) (3 rows) -- WHERE with multiple ->> conditions (AND). EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.things WHERE data ->> 'size' = 'large' AND data ->> 'stocked' = 'true'; QUERY PLAN ----------------------------------------------------------------------------------------------------------------- Foreign Scan on json_http.things Output: id, data Remote SQL: SELECT id, data FROM json_test.things WHERE ((data.size = 'large')) AND ((data.stocked = 'true')) (3 rows) -- WHERE with ->> in an OR condition. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.things WHERE data ->> 'name' = 'widget' OR data ->> 'name' = 'gizmo'; QUERY PLAN --------------------------------------------------------------------------------------------------------------- Foreign Scan on json_http.things Output: id, data Remote SQL: SELECT id, data FROM json_test.things WHERE (((data.name = 'widget') OR (data.name = 'gizmo'))) (3 rows) -- ORDER BY with ->> pushdown. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.things ORDER BY data ->> 'size'; QUERY PLAN --------------------------------------------------------------------------------------- Foreign Scan on json_http.things Output: id, data, (data ->> 'size'::text) Remote SQL: SELECT id, data FROM json_test.things ORDER BY data.size ASC NULLS LAST (3 rows) -- The jsonb -> operator: target-list expressions are evaluated locally -- (same as ->>). This query evaluates `->` locally. EXPLAIN (VERBOSE, COSTS OFF) SELECT data -> 'name' FROM json_http.things; QUERY PLAN ------------------------------------------------- Foreign Scan on json_http.things Output: (data -> 'name'::text) Remote SQL: SELECT data FROM json_test.things (3 rows) SELECT data -> 'name' FROM json_http.things ORDER BY id; ?column? ------------ "widget" "sprocket" "gizmo" "doodad" (4 rows) -- WHERE clause with -> equality must be pushed down. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.things WHERE data -> 'name' = '"widget"'; QUERY PLAN ---------------------------------------------------------------------------------------------------- Foreign Scan on json_http.things Output: id, data Remote SQL: SELECT id, data FROM json_test.things WHERE ((toJSONString(data.name) = '"widget"')) (3 rows) SELECT * FROM json_http.things WHERE data -> 'name' = '"widget"'; id | data ----+--------------------------------------------------------------- 1 | {"id": 1, "name": "widget", "size": "large", "stocked": true} (1 row) -- WHERE clause with -> JSON boolean literal must push down. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.things WHERE data -> 'stocked' = 'true'::jsonb; QUERY PLAN --------------------------------------------------------------------------------------------------- Foreign Scan on json_http.things Output: id, data Remote SQL: SELECT id, data FROM json_test.things WHERE ((toJSONString(data.stocked) = 'true')) (3 rows) SELECT * FROM json_http.things WHERE data -> 'stocked' = 'true'::jsonb ORDER BY id; id | data ----+----------------------------------------------------------------- 1 | {"id": 1, "name": "widget", "size": "large", "stocked": true} 2 | {"id": 2, "name": "sprocket", "size": "small", "stocked": true} 3 | {"id": 3, "name": "gizmo", "size": "medium", "stocked": true} (3 rows) -- WHERE clause with -> wraps the dot notation in toJSONString() so that the -- result is a proper JSON value (-> returns jsonb, not text like ->>). EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.things WHERE data -> 'name' = '"widget"'::jsonb; QUERY PLAN ---------------------------------------------------------------------------------------------------- Foreign Scan on json_http.things Output: id, data Remote SQL: SELECT id, data FROM json_test.things WHERE ((toJSONString(data.name) = '"widget"')) (3 rows) -- Edge cases: JSON keys that require identifier quoting. SELECT clickhouse_raw_query($$ CREATE TABLE json_test.special_keys ( id Int32 NOT NULL, data JSON NOT NULL ) ENGINE = MergeTree ORDER BY (id); $$); clickhouse_raw_query ---------------------- (1 row) CREATE FOREIGN TABLE json_http.special_keys (id integer NOT NULL, data jsonb NOT NULL) SERVER http_json_loopback OPTIONS (database 'json_test', table_name 'special_keys'); INSERT INTO json_http.special_keys VALUES (1, '{"my field": "hello", "CamelCase": "world", "select": "reserved"}'), (2, E'{"The \\"meaning\\" of life": 42, "back\\\\slash": "bs", "dotted.key": "dot", "it''s": "apos", "key/with!special@chars#": "special", "123numeric": "num"}'); -- Key with a space: must be quoted in the remote SQL. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.special_keys WHERE data ->> 'my field' = 'hello'; QUERY PLAN ----------------------------------------------------------------------------------------------- Foreign Scan on json_http.special_keys Output: id, data Remote SQL: SELECT id, data FROM json_test.special_keys WHERE ((data."my field" = 'hello')) (3 rows) SELECT data ->> 'my field' FROM json_http.special_keys; ?column? ---------- hello (2 rows) -- Key with mixed case. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.special_keys WHERE data ->> 'CamelCase' = 'world'; QUERY PLAN ------------------------------------------------------------------------------------------------ Foreign Scan on json_http.special_keys Output: id, data Remote SQL: SELECT id, data FROM json_test.special_keys WHERE ((data."CamelCase" = 'world')) (3 rows) SELECT data ->> 'CamelCase' FROM json_http.special_keys; ?column? ---------- world (2 rows) -- Key that is a SQL reserved word. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.special_keys WHERE data ->> 'select' = 'reserved'; QUERY PLAN ------------------------------------------------------------------------------------------------ Foreign Scan on json_http.special_keys Output: id, data Remote SQL: SELECT id, data FROM json_test.special_keys WHERE ((data."select" = 'reserved')) (3 rows) SELECT data ->> 'select' FROM json_http.special_keys; ?column? ---------- reserved (2 rows) -- Key containing embedded double quotes. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.special_keys WHERE data ->> 'The "meaning" of life' = '42'; QUERY PLAN ----------------------------------------------------------------------------------------------------------- Foreign Scan on json_http.special_keys Output: id, data Remote SQL: SELECT id, data FROM json_test.special_keys WHERE ((data."The ""meaning"" of life" = '42')) (3 rows) SELECT data ->> 'The "meaning" of life' FROM json_http.special_keys WHERE id = 2; ?column? ---------- 42 (1 row) -- Key containing a backslash. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.special_keys WHERE data ->> 'back\slash' = 'bs'; QUERY PLAN ---------------------------------------------------------------------------------------------- Foreign Scan on json_http.special_keys Output: id, data Remote SQL: SELECT id, data FROM json_test.special_keys WHERE ((data."back\slash" = 'bs')) (3 rows) SELECT data ->> 'back\slash' FROM json_http.special_keys WHERE id = 2; ?column? ---------- bs (1 row) -- Key containing a dot (must not be confused with nested access). EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.special_keys WHERE data ->> 'dotted.key' = 'dot'; QUERY PLAN ----------------------------------------------------------------------------------------------- Foreign Scan on json_http.special_keys Output: id, data Remote SQL: SELECT id, data FROM json_test.special_keys WHERE ((data."dotted.key" = 'dot')) (3 rows) SELECT data ->> 'dotted.key' FROM json_http.special_keys WHERE id = 2; ?column? ---------- (1 row) -- Key containing an apostrophe / single quote. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.special_keys WHERE data ->> 'it''s' = 'apos'; QUERY PLAN ------------------------------------------------------------------------------------------ Foreign Scan on json_http.special_keys Output: id, data Remote SQL: SELECT id, data FROM json_test.special_keys WHERE ((data."it's" = 'apos')) (3 rows) SELECT data ->> 'it''s' FROM json_http.special_keys WHERE id = 2; ?column? ---------- apos (1 row) -- Key with slashes, bangs, at-signs, etc. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.special_keys WHERE data ->> 'key/with!special@chars#' = 'special'; QUERY PLAN ---------------------------------------------------------------------------------------------------------------- Foreign Scan on json_http.special_keys Output: id, data Remote SQL: SELECT id, data FROM json_test.special_keys WHERE ((data."key/with!special@chars#" = 'special')) (3 rows) -- Key that starts with a digit. EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM json_http.special_keys WHERE data ->> '123numeric' = 'num'; QUERY PLAN ----------------------------------------------------------------------------------------------- Foreign Scan on json_http.special_keys Output: id, data Remote SQL: SELECT id, data FROM json_test.special_keys WHERE ((data."123numeric" = 'num')) (3 rows) -- ======================================================================= -- jsonb_extract_path_text / jsonb_extract_path pushdown -- ======================================================================= -- Create a table with nested JSON for multi-level path tests. SELECT clickhouse_raw_query($$ CREATE TABLE json_test.events ( id UInt32, event_name String, props JSON ) ENGINE = MergeTree ORDER BY (event_name, id); $$); clickhouse_raw_query ---------------------- (1 row) SELECT clickhouse_raw_query($$ INSERT INTO json_test.events VALUES (1, 'order', '{"customerId": "C100", "address": {"city": "Paris", "zip": "75001"}}'), (2, 'order', '{"customerId": "C200", "address": {"city": "London", "zip": "SW1A"}}'); $$); clickhouse_raw_query ---------------------- (1 row) CREATE FOREIGN TABLE json_http_events ( id integer, event_name text, props jsonb ) SERVER http_json_loopback OPTIONS (table_name 'events'); -- Target-list: jsonb_extract_path_text is evaluated locally (like -> / ->>). EXPLAIN (VERBOSE, COSTS OFF) SELECT jsonb_extract_path_text(props, 'customerId') FROM json_http_events; QUERY PLAN --------------------------------------------------------------------------- Foreign Scan on public.json_http_events Output: jsonb_extract_path_text(props, VARIADIC '{customerId}'::text[]) Remote SQL: SELECT props FROM json_test.events (3 rows) SELECT jsonb_extract_path_text(props, 'customerId') FROM json_http_events ORDER BY id; jsonb_extract_path_text ------------------------- C100 C200 (2 rows) -- Target-list: multi-level path, still evaluated locally. EXPLAIN (VERBOSE, COSTS OFF) SELECT jsonb_extract_path_text(props, 'address', 'city') FROM json_http_events; QUERY PLAN ----------------------------------------------------------------------------- Foreign Scan on public.json_http_events Output: jsonb_extract_path_text(props, VARIADIC '{address,city}'::text[]) Remote SQL: SELECT props FROM json_test.events (3 rows) SELECT jsonb_extract_path_text(props, 'address', 'city') FROM json_http_events ORDER BY id; jsonb_extract_path_text ------------------------- Paris London (2 rows) -- Target-list: jsonb_extract_path (returns jsonb, not text), evaluated locally. EXPLAIN (VERBOSE, COSTS OFF) SELECT jsonb_extract_path(props, 'address') FROM json_http_events; QUERY PLAN ------------------------------------------------------------------- Foreign Scan on public.json_http_events Output: jsonb_extract_path(props, VARIADIC '{address}'::text[]) Remote SQL: SELECT props FROM json_test.events (3 rows) -- WHERE: single-level jsonb_extract_path_text pushes down as dot notation. EXPLAIN (VERBOSE, COSTS OFF) SELECT id FROM json_http_events WHERE jsonb_extract_path_text(props, 'customerId') = 'C100'; QUERY PLAN ------------------------------------------------------------------------------------- Foreign Scan on public.json_http_events Output: id Remote SQL: SELECT id FROM json_test.events WHERE ((props."customerId" = 'C100')) (3 rows) SELECT id FROM json_http_events WHERE jsonb_extract_path_text(props, 'customerId') = 'C100'; id ---- 1 (1 row) -- WHERE: multi-level jsonb_extract_path_text pushes down as dot notation. EXPLAIN (VERBOSE, COSTS OFF) SELECT id FROM json_http_events WHERE jsonb_extract_path_text(props, 'address', 'city') = 'Paris'; QUERY PLAN -------------------------------------------------------------------------------------- Foreign Scan on public.json_http_events Output: id Remote SQL: SELECT id FROM json_test.events WHERE ((props.address.city = 'Paris')) (3 rows) SELECT id FROM json_http_events WHERE jsonb_extract_path_text(props, 'address', 'city') = 'Paris'; id ---- 1 (1 row) -- WHERE: jsonb_extract_path pushes down with toJSONString wrapping. EXPLAIN (VERBOSE, COSTS OFF) SELECT id FROM json_http_events WHERE jsonb_extract_path(props, 'address', 'city') = '"Paris"'::jsonb; QUERY PLAN ------------------------------------------------------------------------------------------------------ Foreign Scan on public.json_http_events Output: id Remote SQL: SELECT id FROM json_test.events WHERE ((toJSONString(props.address.city) = '"Paris"')) (3 rows) DROP FOREIGN TABLE json_http_events; SELECT clickhouse_raw_query('DROP DATABASE json_test'); clickhouse_raw_query ---------------------- (1 row) DROP USER MAPPING FOR CURRENT_USER SERVER binary_json_loopback; DROP USER MAPPING FOR CURRENT_USER SERVER http_json_loopback; DROP SERVER binary_json_loopback CASCADE; NOTICE: drop cascades to foreign table json_bin.things DROP SERVER http_json_loopback CASCADE; NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to foreign table json_http.things drop cascades to foreign table json_http.special_keys