-- match: basic SELECT re2match('hello world', 'h.*o'); re2match ---------- t (1 row) SELECT re2match('hello world', '^world'); re2match ---------- f (1 row) SELECT re2match('hello world', 'world$'); re2match ---------- t (1 row) SELECT re2match('line1' || chr(10) || 'line2', 'line1.line2'); -- dot matches newline re2match ---------- t (1 row) SELECT re2match(NULL, 'x') IS NULL AS null_propagation; null_propagation ------------------ t (1 row) -- match: CH edge cases SELECT re2match('Hello', ''); -- empty pattern matches re2match ---------- t (1 row) SELECT re2match('Too late', '(?i)too late'); -- inline case-insensitive re2match ---------- t (1 row) SELECT re2match('Too late', '(?i:too late)'); re2match ---------- t (1 row) SELECT re2match('a key="v" ', 'key="(.*?)"'); -- lazy quantifier re2match ---------- t (1 row) -- extract SELECT re2extract('foo bar baz', '(\w+)\s+(\w+)'); -- first capture group re2extract ------------ foo (1 row) SELECT re2extract('foo bar baz', '\w+'); -- no groups: whole match re2extract ------------ foo (1 row) SELECT re2extract('no match here', '\d+'); -- no match: empty string re2extract ------------ (1 row) SELECT re2extract(NULL, '\d+') IS NULL AS ex_null; ex_null --------- t (1 row) -- extractall SELECT re2extractall('abc 123 def 456', '\d+'); re2extractall --------------- {123,456} (1 row) SELECT re2extractall('aaa bbb ccc', '(\w)\w+'); -- first capture group re2extractall --------------- {a,b,c} (1 row) SELECT re2extractall('no match', '\d+'); -- empty array re2extractall --------------- {} (1 row) SELECT re2extractall('{"a":"1","b":"2","c":"","d":"4"}', ':"([^"]*)"'); -- CH: empty values re2extractall --------------- {1,2,"",4} (1 row) SELECT re2extractall(NULL, '\d+') IS NULL AS ea_null; ea_null --------- t (1 row) -- regexpextract SELECT re2regexpextract('100-200', '(\d+)-(\d+)', 1); -- first group re2regexpextract ------------------ 100 (1 row) SELECT re2regexpextract('100-200', '(\d+)-(\d+)', 2); -- second group re2regexpextract ------------------ 200 (1 row) SELECT re2regexpextract('100-200', '(\d+)-(\d+)', 0); -- whole match re2regexpextract ------------------ 100-200 (1 row) SELECT re2regexpextract('100-200', '(\d+).*', 1); -- greedy re2regexpextract ------------------ 100 (1 row) SELECT re2regexpextract('100-200', '([a-z])', 1); -- no match: empty string re2regexpextract ------------------ (1 row) SELECT re2regexpextract('2024-01-15', '(\d{4})-(\d{2})-(\d{2})'); -- default index=1 re2regexpextract ------------------ 2024 (1 row) -- CH: greedy competing groups SELECT re2regexpextract('0123456789', '(\d+)(\d+)', 1); re2regexpextract ------------------ 012345678 (1 row) SELECT re2regexpextract('0123456789', '(\d+)(\d+)', 2); re2regexpextract ------------------ 9 (1 row) -- null propagation SELECT re2regexpextract(NULL, '(\d+)', 1) IS NULL AS re_null1; re_null1 ---------- t (1 row) SELECT re2regexpextract('100', NULL, 1) IS NULL AS re_null2; re_null2 ---------- t (1 row) -- regexpextract errors SELECT re2regexpextract('100-200', '(\d+)-(\d+)', 3); -- out of range ERROR: group index 3 out of range [0, 2] SELECT re2regexpextract('100-200', '(\d+)-(\d+)', -1); -- negative ERROR: group index -1 out of range [0, 2] SELECT re2regexpextract('100-200', '\d+-\d+', 1); -- no groups + index 1 ERROR: group index 1 out of range [0, 0] -- extractgroups SELECT re2extractgroups('hello world', '(\w+) (\w+)'); re2extractgroups ------------------ {hello,world} (1 row) SELECT re2extractgroups('2024-01-15', '(\d{4})-(\d{2})-(\d{2})'); re2extractgroups ------------------ {2024,01,15} (1 row) SELECT re2extractgroups('no match', '(\d{4})-(\d{2})-(\d{2})'); -- no match: empty array re2extractgroups ------------------ {} (1 row) SELECT re2extractgroups(NULL, '(\d+)') IS NULL AS eg_null; eg_null --------- t (1 row) -- extractgroups errors SELECT re2extractgroups('hello', '\w+'); -- no capture groups ERROR: pattern has no capturing groups -- extractallgroupsvertical / horizontal SELECT re2extractallgroupsvertical('abc=111, def=222, ghi=333', '("[^"]+"|\w+)=("[^"]+"|\w+)'); re2extractallgroupsvertical --------------------------------- {{abc,111},{def,222},{ghi,333}} (1 row) SELECT re2extractallgroupshorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\w+)=("[^"]+"|\w+)'); re2extractallgroupshorizontal ------------------------------- {{abc,def,ghi},{111,222,333}} (1 row) SELECT re2extractallgroupsvertical('2024-01-15 2025-06-30', '(\d{4})-(\d{2})-(\d{2})'); re2extractallgroupsvertical ----------------------------- {{2024,01,15},{2025,06,30}} (1 row) SELECT re2extractallgroupshorizontal('2024-01-15 2025-06-30', '(\d{4})-(\d{2})-(\d{2})'); re2extractallgroupshorizontal ------------------------------- {{2024,2025},{01,06},{15,30}} (1 row) SELECT re2extractallgroupsvertical('no match', '(\d+)'); -- empty array re2extractallgroupsvertical ----------------------------- {} (1 row) SELECT re2extractallgroupshorizontal('no match', '(\d+)'); -- empty array re2extractallgroupshorizontal ------------------------------- {} (1 row) SELECT re2extractallgroupsvertical(NULL, '(\d+)') IS NULL AS eav_null; eav_null ---------- t (1 row) SELECT re2extractallgroupshorizontal(NULL, '(\d+)') IS NULL AS eah_null; eah_null ---------- t (1 row) -- extractallgroups errors SELECT re2extractallgroupsvertical('hello', '\w+'); -- no capture groups ERROR: pattern has no capturing groups SELECT re2extractallgroupshorizontal('hello', '\w+'); -- no capture groups ERROR: pattern has no capturing groups -- regexpquotemeta SELECT re2regexpquotemeta('Hello. [World]? (Yes)*'); re2regexpquotemeta ------------------------------- Hello\. \[World\]\? \(Yes\)\* (1 row) SELECT re2regexpquotemeta('a+b*c?'); re2regexpquotemeta -------------------- a\+b\*c\? (1 row) SELECT re2regexpquotemeta('plain text'); -- no metas re2regexpquotemeta -------------------- plain text (1 row) SELECT re2regexpquotemeta(''); -- empty re2regexpquotemeta -------------------- (1 row) SELECT re2regexpquotemeta('a-b:c{d}|e^f$g\h'); re2regexpquotemeta ------------------------- a\-b\:c\{d}\|e\^f\$g\\h (1 row) -- escaped pattern roundtrips: re2match(s, re2regexpquotemeta(s)) is true SELECT re2match('1+1=2', re2regexpquotemeta('1+1')); re2match ---------- t (1 row) SELECT re2regexpquotemeta(NULL) IS NULL AS rqm_null; rqm_null ---------- t (1 row) -- splitbyregexp SELECT re2splitbyregexp('\d+', 'a12bc23de345f'); -- digit splitter re2splitbyregexp ------------------ {a,bc,de,f} (1 row) SELECT re2splitbyregexp('', 'abcde'); -- empty pattern: per char re2splitbyregexp ------------------ {a,b,c,d,e} (1 row) SELECT re2splitbyregexp(',', 'a,b,c'); -- char delimiter re2splitbyregexp ------------------ {a,b,c} (1 row) SELECT re2splitbyregexp(',', ',a,b,'); -- leading/trailing splits re2splitbyregexp ------------------ {"",a,b,""} (1 row) SELECT re2splitbyregexp(',', 'abc'); -- no match: whole string re2splitbyregexp ------------------ {abc} (1 row) SELECT re2splitbyregexp(',', ''); -- empty haystack re2splitbyregexp ------------------ {""} (1 row) SELECT re2splitbyregexp('', ''); -- both empty re2splitbyregexp ------------------ {} (1 row) SELECT re2splitbyregexp(',', 'a,b,c,d', 2); -- max_substrings cap re2splitbyregexp ------------------ {a,b} (1 row) SELECT re2splitbyregexp(',', 'a,b,c,d', 0); -- 0 = unlimited re2splitbyregexp ------------------ {a,b,c,d} (1 row) SELECT re2splitbyregexp('', 'abcdef', 3); -- empty pat + cap re2splitbyregexp ------------------ {a,b,c} (1 row) -- CH: zero-length match (e.g. 'a*') treated as no-match SELECT re2splitbyregexp('x*', 'foo'); re2splitbyregexp ------------------ {foo} (1 row) SELECT re2splitbyregexp(',', NULL) IS NULL AS spr_null; spr_null ---------- t (1 row) -- replaceregexpone SELECT re2replaceregexpone('Hello', 'l', 'x'); -- first only re2replaceregexpone --------------------- Hexlo (1 row) SELECT re2replaceregexpone('hello world', '(\w+)', 'REPLACED'); re2replaceregexpone --------------------- REPLACED world (1 row) SELECT re2replaceregexpone('hello world', '(\w+)', '[\0]'); -- backref \0 re2replaceregexpone --------------------- [hello] world (1 row) SELECT re2replaceregexpone('2024-01-15', '(\d+)-(\d+)-(\d+)', '\3/\2/\1'); re2replaceregexpone --------------------- 15/01/2024 (1 row) -- CH: trim leading commas only (first match) SELECT re2replaceregexpone(',,1,,', '^[,]*|[,]*$', ''); re2replaceregexpone --------------------- 1,, (1 row) SELECT re2replaceregexpone('1,,', '^[,]*|[,]*$', ''); re2replaceregexpone --------------------- 1,, (1 row) SELECT re2replaceregexpone(',,1', '^[,]*|[,]*$', ''); re2replaceregexpone --------------------- 1 (1 row) -- null propagation SELECT re2replaceregexpone(NULL, '\d+', 'x') IS NULL AS rp1_null; rp1_null ---------- t (1 row) -- replaceregexpone error: invalid backref SELECT re2replaceregexpone('Hello', 'l', '\1'); -- \1: backref beyond 0 group(s) ERROR: \1: backref beyond 0 group(s) -- replaceregexpall SELECT re2replaceregexpall('Hello', 'l', 'x'); -- all occurrences re2replaceregexpall --------------------- Hexxo (1 row) SELECT re2replaceregexpall('abc 123 def 456', '\d+', 'NUM'); re2replaceregexpall --------------------- abc NUM def NUM (1 row) SELECT re2replaceregexpall('hello', '.', '[\0]'); re2replaceregexpall --------------------- [h][e][l][l][o] (1 row) SELECT re2replaceregexpall('aaa', 'a', 'bb'); re2replaceregexpall --------------------- bbbbbb (1 row) -- CH empty match edge cases SELECT re2replaceregexpall(',,1,,', '^[,]*|[,]*$', ''); -- strip leading/trailing re2replaceregexpall --------------------- 1 (1 row) SELECT re2replaceregexpall(',,1', '^[,]*|[,]*$', ''); re2replaceregexpall --------------------- 1 (1 row) SELECT re2replaceregexpall('1,,', '^[,]*|[,]*$', ''); re2replaceregexpall --------------------- 1 (1 row) SELECT re2replaceregexpall('a', 'z*', ''); -- empty matches preserved re2replaceregexpall --------------------- a (1 row) SELECT re2replaceregexpall('aazzq', 'z*', ''); re2replaceregexpall --------------------- aaq (1 row) -- CH: ^ anchor only SELECT re2replaceregexpall('Hello, World!', '^', 'here: '); re2replaceregexpall --------------------- here: Hello, World! (1 row) -- domain extraction SELECT re2replaceregexpall('https://www.clickhouse.com/', '^https?://(?:www\.)?([^/]+)/.*$', '\1'); re2replaceregexpall --------------------- clickhouse.com (1 row) -- null propagation SELECT re2replaceregexpall(NULL, '\d+', 'x') IS NULL AS rpa_null; rpa_null ---------- t (1 row) -- countmatches SELECT re2countmatches('', 'foo'); -- empty haystack re2countmatches ----------------- 0 (1 row) SELECT re2countmatches('foo', ''); -- empty pattern: 0 (CH compat) re2countmatches ----------------- 0 (1 row) SELECT re2countmatches('foobarfoo', 'foo'); -- basic re2countmatches ----------------- 2 (1 row) SELECT re2countmatches('oooo', 'oo'); -- non-overlapping re2countmatches ----------------- 2 (1 row) SELECT re2countmatches('abc 123 def 456 ghi 789', '\d+'); re2countmatches ----------------- 3 (1 row) SELECT re2countmatches('aaa', 'a'); re2countmatches ----------------- 3 (1 row) SELECT re2countmatches('foo', '[f]{0}'); -- zero-width: 0 (CH compat) re2countmatches ----------------- 0 (1 row) SELECT re2countmatches('foo', '[f]{0}foo'); -- zero-width prefix + content re2countmatches ----------------- 1 (1 row) -- CH: [a-zA-Z]* on mixed content, counts only non-empty matches SELECT re2countmatches(' foo bar ', '[a-zA-Z]*'); re2countmatches ----------------- 2 (1 row) -- CH: capturing groups SELECT re2countmatches('foobarbazfoobarbaz', 'foo(bar)(?:baz|)'); re2countmatches ----------------- 2 (1 row) -- null propagation SELECT re2countmatches(NULL, '\d+') IS NULL AS cm_null; cm_null --------- t (1 row) -- countmatchescaseinsensitive SELECT re2countmatchescaseinsensitive('foobarFOO', 'foo'); re2countmatchescaseinsensitive -------------------------------- 2 (1 row) SELECT re2countmatchescaseinsensitive('ooOO', 'oo'); re2countmatchescaseinsensitive -------------------------------- 2 (1 row) -- multimatchany SELECT re2multimatchany('hello world', '\d+', 'world'); re2multimatchany ------------------ t (1 row) SELECT re2multimatchany('hello world', '\d+', '^\d+$'); re2multimatchany ------------------ f (1 row) SELECT re2multimatchany('abc', ''); -- empty pattern matches re2multimatchany ------------------ t (1 row) SELECT re2multimatchany('', ''); -- empty haystack + empty pattern re2multimatchany ------------------ t (1 row) SELECT re2multimatchany('', 'some string'); -- empty haystack, no match re2multimatchany ------------------ f (1 row) SELECT re2multimatchany('abc', VARIADIC ARRAY[]::text[]); -- empty array: no match re2multimatchany ------------------ f (1 row) SELECT re2multimatchany(NULL, '\d+') IS NULL AS mm_null; mm_null --------- t (1 row) -- multimatchanyindex SELECT re2multimatchanyindex('hello world', '\d+', 'world', 'hello'); re2multimatchanyindex ----------------------- 2 (1 row) SELECT re2multimatchanyindex('hello world', '\d+', '^\d+$'); re2multimatchanyindex ----------------------- 0 (1 row) -- multimatchallindices SELECT re2multimatchallindices('hello world', 'hello', '\d+', 'world', 'o'); re2multimatchallindices ------------------------- {1,3,4} (1 row) SELECT re2multimatchallindices('test', '\d+', '[A-Z]+'); re2multimatchallindices ------------------------- {} (1 row) -- invalid pattern SELECT re2match('hello', '[invalid'); ERROR: invalid RE2 pattern: missing ]: [invalid -- ==== bytea overloads (zero-byte handling, CH tests 01083/01085) ==== -- match with \0 in haystack (CH: match('\0 key="v" ', 'key="(.*?)"') -> 1) SELECT re2match('\x00'::bytea || ' key="v" '::bytea, 'key="(.*?)"'); re2match ---------- t (1 row) SELECT re2match('a'::bytea || '\x00'::bytea || 'b'::bytea, 'a.b'); -- dot matches \0 re2match ---------- t (1 row) -- extract from bytea with \0 SELECT re2extract('a'::bytea || '\x00'::bytea || 'key="val"'::bytea, 'key="([^"]*)"'); re2extract ------------ \x76616c (1 row) SELECT re2extract('hello'::bytea || '\x00'::bytea || 'world'::bytea, '\w+'); re2extract -------------- \x68656c6c6f (1 row) -- extractall with \0 SELECT re2extractall('a'::bytea || '\x00'::bytea || '1'::bytea || '\x00'::bytea || '2'::bytea || '\x00'::bytea || '3'::bytea, '\d'); re2extractall --------------------------- {"\\x31","\\x32","\\x33"} (1 row) -- regexpextract with \0 SELECT re2regexpextract('a'::bytea || '\x00'::bytea || '100-200'::bytea, '(\d+)-(\d+)', 1); re2regexpextract ------------------ \x313030 (1 row) SELECT re2regexpextract('a'::bytea || '\x00'::bytea || '100-200'::bytea, '(\d+)-(\d+)', 2); re2regexpextract ------------------ \x323030 (1 row) -- extractgroups with \0 SELECT re2extractgroups('a'::bytea || '\x00'::bytea || 'hello world'::bytea, '(\w+) (\w+)'); re2extractgroups ----------------------------------- {"\\x68656c6c6f","\\x776f726c64"} (1 row) -- replaceregexpone/all with \0 in haystack, pattern matches \0 via \x00 SELECT re2replaceregexpone('a'::bytea || '\x00'::bytea || 'b'::bytea || '\x00'::bytea || 'c'::bytea, '\x00', 'X'); re2replaceregexpone --------------------- \x6158620063 (1 row) SELECT re2replaceregexpall('a'::bytea || '\x00'::bytea || 'b'::bytea || '\x00'::bytea || 'c'::bytea, '\x00', 'X'); re2replaceregexpall --------------------- \x6158625863 (1 row) -- countmatches with \0 SELECT re2countmatches('a'::bytea || '\x00'::bytea || 'b'::bytea || '\x00'::bytea || 'c'::bytea, '\x00'); re2countmatches ----------------- 2 (1 row) -- multimatchany with \0 haystack SELECT re2multimatchany('a'::bytea || '\x00'::bytea || 'key="v"'::bytea, 'key', 'nope'); re2multimatchany ------------------ t (1 row) -- extractallgroups with \0 SELECT re2extractallgroupsvertical('a'::bytea || '\x00'::bytea || 'k1=v1 k2=v2'::bytea, '(\w+)=(\w+)'); re2extractallgroupsvertical ----------------------------------------------- {{"\\x6b31","\\x7631"},{"\\x6b32","\\x7632"}} (1 row) SELECT re2extractallgroupshorizontal('a'::bytea || '\x00'::bytea || 'k1=v1 k2=v2'::bytea, '(\w+)=(\w+)'); re2extractallgroupshorizontal ----------------------------------------------- {{"\\x6b31","\\x6b32"},{"\\x7631","\\x7632"}} (1 row) -- regexpquotemeta with \0 SELECT re2regexpquotemeta('a'::bytea || '\x00'::bytea || '.b'::bytea); re2regexpquotemeta -------------------- \x615c005c2e62 (1 row) -- splitbyregexp with \0 SELECT re2splitbyregexp('\x00', 'a'::bytea || '\x00'::bytea || 'b'::bytea || '\x00'::bytea || 'c'::bytea); re2splitbyregexp --------------------------- {"\\x61","\\x62","\\x63"} (1 row)