// Copyright (c) 2023-2025 ParadeDB, Inc. // // This file is part of ParadeDB - Postgres for Search and Analytics // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . #![cfg(feature = "icu")] mod fixtures; use fixtures::*; use pretty_assertions::assert_eq; use rstest::*; use sqlx::PgConnection; #[rstest] fn test_icu_arabic_tokenizer(mut conn: PgConnection) { IcuArabicPostsTable::setup().execute(&mut conn); r#" CREATE INDEX idx_arabic ON icu_arabic_posts USING bm25 (id, author, title, message) WITH ( key_field = 'id', text_fields = '{"author": {"tokenizer": {"type": "icu"}}, "title": {"tokenizer": {"type": "icu"}}, "message": {"tokenizer": {"type": "icu"}}}' );"# .execute(&mut conn); let columns: IcuArabicPostsTableVec = r#"SELECT * FROM icu_arabic_posts WHERE icu_arabic_posts @@@ 'author:"محمد"' ORDER BY id"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![2]); let columns: IcuArabicPostsTableVec = r#"SELECT * FROM icu_arabic_posts WHERE icu_arabic_posts @@@ 'title:"السوق"' ORDER BY id"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![2]); let columns: IcuArabicPostsTableVec = r#"SELECT * FROM icu_arabic_posts WHERE icu_arabic_posts @@@ 'message:"في"' ORDER BY id"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![1, 2, 3]); } #[rstest] fn test_icu_amharic_tokenizer(mut conn: PgConnection) { IcuAmharicPostsTable::setup().execute(&mut conn); r#" CREATE INDEX idx_amharic ON icu_amharic_posts USING bm25 (id, author, title, message) WITH ( key_field = 'id', text_fields = '{"author": {"tokenizer": {"type": "icu"}}, "title": {"tokenizer": {"type": "icu"}}, "message": {"tokenizer": {"type": "icu"}}}' );"# .execute(&mut conn); let columns: IcuAmharicPostsTableVec = r#"SELECT * FROM icu_amharic_posts WHERE icu_amharic_posts @@@ 'author:"አለም"' ORDER BY id"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![3]); let columns: IcuAmharicPostsTableVec = r#"SELECT * FROM icu_amharic_posts WHERE icu_amharic_posts @@@ 'title:"ለመማር"' ORDER BY id"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![3]); let columns: IcuAmharicPostsTableVec = r#"SELECT * FROM icu_amharic_posts WHERE icu_amharic_posts @@@ 'message:"ዝናብ"' ORDER BY id"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![1, 2]); } #[rstest] fn test_icu_greek_tokenizer(mut conn: PgConnection) { IcuGreekPostsTable::setup().execute(&mut conn); r#" CREATE INDEX idx_greek ON icu_greek_posts USING bm25 (id, author, title, message) WITH ( key_field = 'id', text_fields = '{"author": {"tokenizer": {"type": "icu"}}, "title": {"tokenizer": {"type": "icu"}}, "message": {"tokenizer": {"type": "icu"}}}' );"# .execute(&mut conn); let columns: IcuGreekPostsTableVec = r#"SELECT * FROM icu_greek_posts WHERE icu_greek_posts @@@ 'author:"Σοφία"' ORDER BY id"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![2]); let columns: IcuGreekPostsTableVec = r#"SELECT * FROM icu_greek_posts WHERE icu_greek_posts @@@ 'title:"επιτυχία"' ORDER BY id"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![3]); let columns: IcuGreekPostsTableVec = r#"SELECT * FROM icu_greek_posts WHERE icu_greek_posts @@@ 'message:"συμβουλές"' ORDER BY id"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![3]); } #[rstest] fn test_icu_czech_tokenizer(mut conn: PgConnection) { IcuCzechPostsTable::setup().execute(&mut conn); r#" CREATE INDEX idx_czech ON icu_czech_posts USING bm25 (id, author, title, message) WITH ( key_field = 'id', text_fields = '{"author": {"tokenizer": {"type": "icu"}}, "title": {"tokenizer": {"type": "icu"}}, "message": {"tokenizer": {"type": "icu"}}}' );"# .execute(&mut conn); let columns: IcuCzechPostsTableVec = r#"SELECT * FROM icu_czech_posts WHERE icu_czech_posts @@@ 'author:"Tomáš"' ORDER BY id"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![1]); let columns: IcuCzechPostsTableVec = r#"SELECT * FROM icu_czech_posts WHERE icu_czech_posts @@@ 'title:"zdravý"' ORDER BY id"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![2]); let columns: IcuCzechPostsTableVec = r#"SELECT * FROM icu_czech_posts WHERE icu_czech_posts @@@ 'message:"velký"~100' ORDER BY id"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![3]); } #[rstest] fn test_icu_czech_content_tokenizer(mut conn: PgConnection) { IcuCzechPostsTable::setup().execute(&mut conn); r#" CREATE INDEX idx_czech_content ON icu_czech_posts USING bm25 (id, message) WITH ( key_field = 'id', text_fields = '{"message": {"tokenizer": {"type": "icu"}}}' );"# .execute(&mut conn); let columns: IcuCzechPostsTableVec = r#" SELECT * FROM icu_czech_posts WHERE icu_czech_posts @@@ paradedb.phrase( field => 'message', phrases => ARRAY['šla', 'sbírat'] ) ORDER BY id;"# .fetch_collect(&mut conn); assert_eq!(columns.id, vec![1]); } #[rstest] fn test_icu_snippet(mut conn: PgConnection) { IcuArabicPostsTable::setup().execute(&mut conn); r#" CREATE INDEX idx_arabic ON icu_arabic_posts USING bm25 (id, author, title, message) WITH ( key_field = 'id', text_fields = '{"author": {"tokenizer": {"type": "icu"}}, "title": {"tokenizer": {"type": "icu"}}, "message": {"tokenizer": {"type": "icu"}}}' );"# .execute(&mut conn); let columns: Vec<(i32, String)> = r#"SELECT id, paradedb.snippet(title) FROM icu_arabic_posts WHERE title @@@ 'السوق' "# .fetch(&mut conn); assert_eq!( columns, vec![(2, "رحلة إلى السوق مع أبي".to_string())] ); }