-- match: basic SELECT re2match('hello world', 'h.*o'); re2match ---------- t (1 row) SELECT re2match('hello world', '^world'); re2match ---------- f (1 row) SELECT re2match('hello world', 'world$'); re2match ---------- t (1 row) SELECT re2match('line1' || chr(10) || 'line2', 'line1.line2'); -- dot matches newline re2match ---------- t (1 row) SELECT re2match(NULL, 'x') IS NULL AS null_propagation; null_propagation ------------------ t (1 row) -- match: CH edge cases SELECT re2match('Hello', ''); -- empty pattern matches re2match ---------- t (1 row) SELECT re2match('Too late', '(?i)too late'); -- inline case-insensitive re2match ---------- t (1 row) SELECT re2match('Too late', '(?i:too late)'); re2match ---------- t (1 row) SELECT re2match('a key="v" ', 'key="(.*?)"'); -- lazy quantifier re2match ---------- t (1 row) -- extract SELECT re2extract('foo bar baz', '(\w+)\s+(\w+)'); -- first capture group re2extract ------------ foo (1 row) SELECT re2extract('foo bar baz', '\w+'); -- no groups: whole match re2extract ------------ foo (1 row) SELECT re2extract('no match here', '\d+'); -- no match: empty string re2extract ------------ (1 row) SELECT re2extract(NULL, '\d+') IS NULL AS ex_null; ex_null --------- t (1 row) -- extractall SELECT re2extractall('abc 123 def 456', '\d+'); re2extractall --------------- {123,456} (1 row) SELECT re2extractall('aaa bbb ccc', '(\w)\w+'); -- first capture group re2extractall --------------- {a,b,c} (1 row) SELECT re2extractall('no match', '\d+'); -- empty array re2extractall --------------- {} (1 row) SELECT re2extractall('{"a":"1","b":"2","c":"","d":"4"}', ':"([^"]*)"'); -- CH: empty values re2extractall --------------- {1,2,"",4} (1 row) SELECT re2extractall(NULL, '\d+') IS NULL AS ea_null; ea_null --------- t (1 row) -- regexpextract SELECT re2regexpextract('100-200', '(\d+)-(\d+)', 1); -- first group re2regexpextract ------------------ 100 (1 row) SELECT re2regexpextract('100-200', '(\d+)-(\d+)', 2); -- second group re2regexpextract ------------------ 200 (1 row) SELECT re2regexpextract('100-200', '(\d+)-(\d+)', 0); -- whole match re2regexpextract ------------------ 100-200 (1 row) SELECT re2regexpextract('100-200', '(\d+).*', 1); -- greedy re2regexpextract ------------------ 100 (1 row) SELECT re2regexpextract('100-200', '([a-z])', 1); -- no match: empty string re2regexpextract ------------------ (1 row) SELECT re2regexpextract('2024-01-15', '(\d{4})-(\d{2})-(\d{2})'); -- default index=1 re2regexpextract ------------------ 2024 (1 row) -- CH: greedy competing groups SELECT re2regexpextract('0123456789', '(\d+)(\d+)', 1); re2regexpextract ------------------ 012345678 (1 row) SELECT re2regexpextract('0123456789', '(\d+)(\d+)', 2); re2regexpextract ------------------ 9 (1 row) -- null propagation SELECT re2regexpextract(NULL, '(\d+)', 1) IS NULL AS re_null1; re_null1 ---------- t (1 row) SELECT re2regexpextract('100', NULL, 1) IS NULL AS re_null2; re_null2 ---------- t (1 row) -- regexpextract errors \set ON_ERROR_STOP off SELECT re2regexpextract('100-200', '(\d+)-(\d+)', 3); -- out of range ERROR: group index 3 out of range [0, 2] SELECT re2regexpextract('100-200', '(\d+)-(\d+)', -1); -- negative ERROR: group index -1 out of range [0, 2] SELECT re2regexpextract('100-200', '\d+-\d+', 1); -- no groups + index 1 ERROR: group index 1 out of range [0, 0] \set ON_ERROR_STOP on -- extractgroups SELECT re2extractgroups('hello world', '(\w+) (\w+)'); re2extractgroups ------------------ {hello,world} (1 row) SELECT re2extractgroups('2024-01-15', '(\d{4})-(\d{2})-(\d{2})'); re2extractgroups ------------------ {2024,01,15} (1 row) SELECT re2extractgroups('no match', '(\d{4})-(\d{2})-(\d{2})'); -- no match: empty array re2extractgroups ------------------ {} (1 row) SELECT re2extractgroups(NULL, '(\d+)') IS NULL AS eg_null; eg_null --------- t (1 row) -- extractgroups errors \set ON_ERROR_STOP off SELECT re2extractgroups('hello', '\w+'); -- no capture groups ERROR: pattern has no capturing groups \set ON_ERROR_STOP on -- replaceregexpone SELECT re2replaceregexpone('Hello', 'l', 'x'); -- first only re2replaceregexpone --------------------- Hexlo (1 row) SELECT re2replaceregexpone('hello world', '(\w+)', 'REPLACED'); re2replaceregexpone --------------------- REPLACED world (1 row) SELECT re2replaceregexpone('hello world', '(\w+)', '[\0]'); -- backref \0 re2replaceregexpone --------------------- [hello] world (1 row) SELECT re2replaceregexpone('2024-01-15', '(\d+)-(\d+)-(\d+)', '\3/\2/\1'); re2replaceregexpone --------------------- 15/01/2024 (1 row) -- CH: trim leading commas only (first match) SELECT re2replaceregexpone(',,1,,', '^[,]*|[,]*$', ''); re2replaceregexpone --------------------- 1,, (1 row) SELECT re2replaceregexpone('1,,', '^[,]*|[,]*$', ''); re2replaceregexpone --------------------- 1,, (1 row) SELECT re2replaceregexpone(',,1', '^[,]*|[,]*$', ''); re2replaceregexpone --------------------- 1 (1 row) -- null propagation SELECT re2replaceregexpone(NULL, '\d+', 'x') IS NULL AS rp1_null; rp1_null ---------- t (1 row) -- replaceregexpone error: invalid backref \set ON_ERROR_STOP off SELECT re2replaceregexpone('Hello', 'l', '\1'); -- \1: backref beyond 0 group(s) ERROR: \1: backref beyond 0 group(s) \set ON_ERROR_STOP on -- replaceregexpall SELECT re2replaceregexpall('Hello', 'l', 'x'); -- all occurrences re2replaceregexpall --------------------- Hexxo (1 row) SELECT re2replaceregexpall('abc 123 def 456', '\d+', 'NUM'); re2replaceregexpall --------------------- abc NUM def NUM (1 row) SELECT re2replaceregexpall('hello', '.', '[\0]'); re2replaceregexpall --------------------- [h][e][l][l][o] (1 row) SELECT re2replaceregexpall('aaa', 'a', 'bb'); re2replaceregexpall --------------------- bbbbbb (1 row) -- CH empty match edge cases SELECT re2replaceregexpall(',,1,,', '^[,]*|[,]*$', ''); -- strip leading/trailing re2replaceregexpall --------------------- 1 (1 row) SELECT re2replaceregexpall(',,1', '^[,]*|[,]*$', ''); re2replaceregexpall --------------------- 1 (1 row) SELECT re2replaceregexpall('1,,', '^[,]*|[,]*$', ''); re2replaceregexpall --------------------- 1 (1 row) SELECT re2replaceregexpall('a', 'z*', ''); -- empty matches preserved re2replaceregexpall --------------------- a (1 row) SELECT re2replaceregexpall('aazzq', 'z*', ''); re2replaceregexpall --------------------- aaq (1 row) -- CH: ^ anchor only SELECT re2replaceregexpall('Hello, World!', '^', 'here: '); re2replaceregexpall --------------------- here: Hello, World! (1 row) -- domain extraction SELECT re2replaceregexpall('https://www.clickhouse.com/', '^https?://(?:www\.)?([^/]+)/.*$', '\1'); re2replaceregexpall --------------------- clickhouse.com (1 row) -- null propagation SELECT re2replaceregexpall(NULL, '\d+', 'x') IS NULL AS rpa_null; rpa_null ---------- t (1 row) -- countmatches SELECT re2countmatches('', 'foo'); -- empty haystack re2countmatches ----------------- 0 (1 row) SELECT re2countmatches('foo', ''); -- empty pattern: 0 (CH compat) re2countmatches ----------------- 0 (1 row) SELECT re2countmatches('foobarfoo', 'foo'); -- basic re2countmatches ----------------- 2 (1 row) SELECT re2countmatches('oooo', 'oo'); -- non-overlapping re2countmatches ----------------- 2 (1 row) SELECT re2countmatches('abc 123 def 456 ghi 789', '\d+'); re2countmatches ----------------- 3 (1 row) SELECT re2countmatches('aaa', 'a'); re2countmatches ----------------- 3 (1 row) SELECT re2countmatches('foo', '[f]{0}'); -- zero-width: 0 (CH compat) re2countmatches ----------------- 0 (1 row) SELECT re2countmatches('foo', '[f]{0}foo'); -- zero-width prefix + content re2countmatches ----------------- 1 (1 row) -- CH: [a-zA-Z]* on mixed content, counts only non-empty matches SELECT re2countmatches(' foo bar ', '[a-zA-Z]*'); re2countmatches ----------------- 2 (1 row) -- CH: capturing groups SELECT re2countmatches('foobarbazfoobarbaz', 'foo(bar)(?:baz|)'); re2countmatches ----------------- 2 (1 row) -- null propagation SELECT re2countmatches(NULL, '\d+') IS NULL AS cm_null; cm_null --------- t (1 row) -- countmatchescaseinsensitive SELECT re2countmatchescaseinsensitive('foobarFOO', 'foo'); re2countmatchescaseinsensitive -------------------------------- 2 (1 row) SELECT re2countmatchescaseinsensitive('ooOO', 'oo'); re2countmatchescaseinsensitive -------------------------------- 2 (1 row) -- multimatchany SELECT re2multimatchany('hello world', '\d+', 'world'); re2multimatchany ------------------ t (1 row) SELECT re2multimatchany('hello world', '\d+', '^\d+$'); re2multimatchany ------------------ f (1 row) SELECT re2multimatchany('abc', ''); -- empty pattern matches re2multimatchany ------------------ t (1 row) SELECT re2multimatchany('', ''); -- empty haystack + empty pattern re2multimatchany ------------------ t (1 row) SELECT re2multimatchany('', 'some string'); -- empty haystack, no match re2multimatchany ------------------ f (1 row) SELECT re2multimatchany('abc', VARIADIC ARRAY[]::text[]); -- empty array: no match re2multimatchany ------------------ f (1 row) SELECT re2multimatchany(NULL, '\d+') IS NULL AS mm_null; mm_null --------- t (1 row) -- multimatchanyindex SELECT re2multimatchanyindex('hello world', '\d+', 'world', 'hello'); re2multimatchanyindex ----------------------- 2 (1 row) SELECT re2multimatchanyindex('hello world', '\d+', '^\d+$'); re2multimatchanyindex ----------------------- 0 (1 row) -- multimatchallindices SELECT re2multimatchallindices('hello world', 'hello', '\d+', 'world', 'o'); re2multimatchallindices ------------------------- {1,3,4} (1 row) SELECT re2multimatchallindices('test', '\d+', '[A-Z]+'); re2multimatchallindices ------------------------- {} (1 row) -- invalid pattern \set ON_ERROR_STOP off SELECT re2match('hello', '[invalid'); ERROR: invalid RE2 pattern: missing ]: [invalid \set ON_ERROR_STOP on -- ==== bytea overloads (zero-byte handling, CH tests 01083/01085) ==== -- match with \0 in haystack (CH: match('\0 key="v" ', 'key="(.*?)"') -> 1) SELECT re2match('\x00'::bytea || ' key="v" '::bytea, 'key="(.*?)"'); re2match ---------- t (1 row) SELECT re2match('a'::bytea || '\x00'::bytea || 'b'::bytea, 'a.b'); -- dot matches \0 re2match ---------- t (1 row) -- extract from bytea with \0 SELECT re2extract('a'::bytea || '\x00'::bytea || 'key="val"'::bytea, 'key="([^"]*)"'); re2extract ------------ \x76616c (1 row) SELECT re2extract('hello'::bytea || '\x00'::bytea || 'world'::bytea, '\w+'); re2extract -------------- \x68656c6c6f (1 row) -- extractall with \0 SELECT re2extractall('a'::bytea || '\x00'::bytea || '1'::bytea || '\x00'::bytea || '2'::bytea || '\x00'::bytea || '3'::bytea, '\d'); re2extractall --------------------------- {"\\x31","\\x32","\\x33"} (1 row) -- regexpextract with \0 SELECT re2regexpextract('a'::bytea || '\x00'::bytea || '100-200'::bytea, '(\d+)-(\d+)', 1); re2regexpextract ------------------ \x313030 (1 row) SELECT re2regexpextract('a'::bytea || '\x00'::bytea || '100-200'::bytea, '(\d+)-(\d+)', 2); re2regexpextract ------------------ \x323030 (1 row) -- extractgroups with \0 SELECT re2extractgroups('a'::bytea || '\x00'::bytea || 'hello world'::bytea, '(\w+) (\w+)'); re2extractgroups ----------------------------------- {"\\x68656c6c6f","\\x776f726c64"} (1 row) -- replaceregexpone/all with \0 in haystack, pattern matches \0 via \x00 SELECT re2replaceregexpone('a'::bytea || '\x00'::bytea || 'b'::bytea || '\x00'::bytea || 'c'::bytea, '\x00', 'X'); re2replaceregexpone --------------------- \x6158620063 (1 row) SELECT re2replaceregexpall('a'::bytea || '\x00'::bytea || 'b'::bytea || '\x00'::bytea || 'c'::bytea, '\x00', 'X'); re2replaceregexpall --------------------- \x6158625863 (1 row) -- countmatches with \0 SELECT re2countmatches('a'::bytea || '\x00'::bytea || 'b'::bytea || '\x00'::bytea || 'c'::bytea, '\x00'); re2countmatches ----------------- 2 (1 row) -- multimatchany with \0 haystack SELECT re2multimatchany('a'::bytea || '\x00'::bytea || 'key="v"'::bytea, 'key', 'nope'); re2multimatchany ------------------ t (1 row)