// Copyright (c) 2023-2025 ParadeDB, Inc.
//
// This file is part of ParadeDB - Postgres for Search and Analytics
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see .
//! Tests for the paradedb.tokenize function
mod fixtures;
use fixtures::*;
use pretty_assertions::assert_eq;
use rstest::*;
use rustc_hash::FxHashSet as HashSet;
use sqlx::PgConnection;
#[rstest]
fn reltuples_are_set(mut conn: PgConnection) {
"CREATE TABLE reltuptest AS SELECT md5(x::text), x FROM generate_series(1, 1024) x;"
.execute(&mut conn);
let (reltuples,) = "SELECT reltuples FROM pg_class WHERE oid = 'reltuptest'::regclass::oid"
.fetch_one::<(f32,)>(&mut conn);
if reltuples > 0.0 {
panic!("expected reltuples to be <= 0.0.")
}
"CREATE INDEX idxreltuptest ON reltuptest USING bm25 (x, md5) WITH (key_field='x')"
.execute(&mut conn);
let (reltuples,) = "SELECT reltuples FROM pg_class WHERE oid = 'reltuptest'::regclass::oid"
.fetch_one::<(f32,)>(&mut conn);
assert_eq!(reltuples, 1024.0);
}
#[rstest]
fn direct_or_queries(mut conn: PgConnection) {
SimpleProductsTable::setup().execute(&mut conn);
for query in &[
"SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@ 'description:keyboard OR category:electronics'",
"SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@ 'description:keyboard' OR bm25_search @@@ 'category:electronics'",
"SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@ paradedb.term('description', 'keyboard') OR bm25_search @@@ paradedb.term('category', 'electronics')",
"SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@ paradedb.term('description', 'keyboard') OR bm25_search @@@ 'category:electronics'",
] {
let columns: SimpleProductsTableVec = query.fetch_collect(&mut conn);
assert_eq!(
columns.description.iter().cloned().collect::>(),
concat!(
"Plastic Keyboard,Ergonomic metal keyboard,Innovative wireless earbuds,",
"Fast charging power bank,Bluetooth-enabled speaker"
)
.split(',')
.map(|s| s.to_string())
.collect::>()
);
assert_eq!(
columns.category.iter().cloned().collect::>(),
"Electronics,Electronics,Electronics,Electronics,Electronics"
.split(',')
.map(|s| s.to_string())
.collect::>()
);
}
}
#[rstest]
fn direct_and_queries(mut conn: PgConnection) {
SimpleProductsTable::setup().execute(&mut conn);
for query in &[
"SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@ 'description:keyboard AND category:electronics'",
"SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@ 'description:keyboard' AND bm25_search @@@ 'category:electronics'",
"SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@ paradedb.term('description', 'keyboard') AND bm25_search @@@ paradedb.term('category', 'electronics')",
"SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@ paradedb.term('description', 'keyboard') AND bm25_search @@@ 'category:electronics'",
] {
let columns: SimpleProductsTableVec = query.fetch_collect(&mut conn);
assert_eq!(
columns.description.iter().cloned().collect::>(),
["Plastic Keyboard","Ergonomic metal keyboard"].iter().map(|s| s.to_string())
.collect::>()
);
assert_eq!(
columns.category.iter().cloned().collect::>(),
["Electronics"].iter()
.map(|s| s.to_string())
.collect::>()
);
}
}
#[rstest]
fn direct_sql_mix(mut conn: PgConnection) {
SimpleProductsTable::setup().execute(&mut conn);
let (description, ) = "SELECT description FROM paradedb.bm25_search WHERE id @@@ 'description:keyboard' AND id = 2".fetch_one::<(String,)>(&mut conn);
assert_eq!(description, "Plastic Keyboard");
}
#[rstest]
fn explain_row_estimate(mut conn: PgConnection) {
use serde_json::Number;
use serde_json::Value;
SimpleProductsTable::setup().execute(&mut conn);
let (plan, ) = "EXPLAIN (ANALYZE, FORMAT JSON) SELECT * FROM paradedb.bm25_search WHERE id @@@ 'description:keyboard'".fetch_one::<(Value,)>(&mut conn);
let plan = plan
.get(0)
.unwrap()
.as_object()
.unwrap()
.get("Plan")
.unwrap()
.as_object()
.unwrap();
eprintln!("{plan:#?}");
// depending on how tantivy distributes docs per segment, it seems the estimated rows could be 2 or 3
// with our little test table
let plan_rows = plan.get("Plan Rows");
assert!(
plan_rows == Some(&Value::Number(Number::from(2)))
|| plan_rows == Some(&Value::Number(Number::from(3)))
);
}