// Copyright (c) 2023-2025 ParadeDB, Inc. // // This file is part of ParadeDB - Postgres for Search and Analytics // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . mod fixtures; use fixtures::*; use pretty_assertions::assert_eq; use rstest::*; use sqlx::{types::BigDecimal, PgConnection}; #[rstest] fn hybrid_deprecated(mut conn: PgConnection) { r#" CALL paradedb.create_bm25_test_table( schema_name => 'public', table_name => 'mock_items' ); CREATE INDEX search_idx ON mock_items USING bm25 (id, description, category, rating, in_stock, created_at, metadata) WITH ( key_field = 'id', text_fields = '{"description": {}, "category": {}}', numeric_fields = '{"rating": {}}', boolean_fields = '{"in_stock": {}}', datetime_fields = '{"created_at": {}}', json_fields = '{"metadata": {}}' ); CREATE EXTENSION vector; ALTER TABLE mock_items ADD COLUMN embedding vector(3); UPDATE mock_items m SET embedding = ('[' || ((m.id + 1) % 10 + 1)::integer || ',' || ((m.id + 2) % 10 + 1)::integer || ',' || ((m.id + 3) % 10 + 1)::integer || ']')::vector; "# .execute(&mut conn); let rows: Vec<(i32, BigDecimal)> = r#" WITH semantic_search AS ( SELECT id, RANK () OVER (ORDER BY embedding <=> '[1,2,3]') AS rank FROM mock_items ORDER BY embedding <=> '[1,2,3]' LIMIT 20 ), bm25_search AS ( SELECT id, RANK () OVER (ORDER BY paradedb.score(id) DESC) as rank FROM mock_items WHERE description @@@ 'keyboard' LIMIT 20 ) SELECT COALESCE(semantic_search.id, bm25_search.id) AS id, (COALESCE(1.0 / (60 + semantic_search.rank), 0.0) * 0.1) + (COALESCE(1.0 / (60 + bm25_search.rank), 0.0) * 0.9) AS score FROM semantic_search FULL OUTER JOIN bm25_search ON semantic_search.id = bm25_search.id JOIN mock_items ON mock_items.id = COALESCE(semantic_search.id, bm25_search.id) ORDER BY score DESC LIMIT 5 "# .fetch(&mut conn); assert_eq!( rows.into_iter().map(|t| t.0).collect::>(), vec![2, 1, 19, 9, 29] ); } #[rstest] #[allow(clippy::excessive_precision)] fn reciprocal_rank_fusion(mut conn: PgConnection) { SimpleProductsTable::setup().execute(&mut conn); r#" CREATE EXTENSION vector; ALTER TABLE paradedb.bm25_search ADD COLUMN embedding vector(3); UPDATE paradedb.bm25_search m SET embedding = ('[' || ((m.id + 1) % 10 + 1)::integer || ',' || ((m.id + 2) % 10 + 1)::integer || ',' || ((m.id + 3) % 10 + 1)::integer || ']')::vector; CREATE INDEX on paradedb.bm25_search USING hnsw (embedding vector_l2_ops)"# .execute(&mut conn); let columns: Vec<(i32, f32, String)> = r#" WITH semantic AS ( SELECT id, RANK () OVER (ORDER BY embedding <=> '[1,2,3]') AS rank FROM paradedb.bm25_search ORDER BY embedding <=> '[1,2,3]' LIMIT 20 ), bm25 AS ( SELECT id, RANK () OVER (ORDER BY paradedb.score(id) DESC) as rank FROM paradedb.bm25_search WHERE bm25_search @@@ 'description:keyboard' LIMIT 20 ) SELECT COALESCE(semantic.id, bm25.id) AS id, (COALESCE(1.0 / (60 + semantic.rank), 0.0) + COALESCE(1.0 / (60 + bm25.rank), 0.0))::REAL AS score, paradedb.bm25_search.description FROM semantic FULL OUTER JOIN bm25 ON semantic.id = bm25.id JOIN paradedb.bm25_search ON paradedb.bm25_search.id = COALESCE(semantic.id, bm25.id) ORDER BY score DESC LIMIT 5; "# .fetch(&mut conn); assert_eq!( columns[0], ( 1, 0.03062178588125292193, "Ergonomic metal keyboard".to_string() ) ); assert_eq!( columns[1], (2, 0.02990695613646433318, "Plastic Keyboard".to_string()) ); assert_eq!( columns[2], ( 19, 0.01639344262295081967, "Artistic ceramic vase".to_string() ) ); assert_eq!( columns[3], (9, 0.01639344262295081967, "Modern wall clock".to_string()) ); assert_eq!( columns[4], ( 29, 0.01639344262295081967, "Designer wall paintings".to_string() ) ); }