// Copyright (c) 2023-2025 ParadeDB, Inc.
//
// This file is part of ParadeDB - Postgres for Search and Analytics
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see .
mod fixtures;
use fixtures::*;
use pretty_assertions::assert_eq;
use rstest::*;
use sqlx::PgConnection;
#[rstest]
fn boolean_tree(mut conn: PgConnection) {
SimpleProductsTable::setup().execute(&mut conn);
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{
"boolean": {
"should": [
{"parse": {"query_string": "description:shoes"}},
{"phrase_prefix": {"field": "description", "phrases": ["book"]}},
{"term": {"field": "description", "value": "speaker"}},
{"fuzzy_term": {"field": "description", "value": "wolo", "transposition_cost_one": false, "distance": 1, "prefix": true}}
]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(columns.id, vec![3, 4, 5, 7, 10, 32, 33, 34, 37, 39, 41]);
}
#[rstest]
fn fuzzy_term(mut conn: PgConnection) {
SimpleProductsTable::setup().execute(&mut conn);
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search
WHERE bm25_search @@@ '{"fuzzy_term": {"field": "category", "value": "elector", "prefix": true}}'::jsonb
ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.id, vec![1, 2, 12, 22, 32], "wrong results");
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{"term": {"field": "category", "value": "electornics"}}'::jsonb
ORDER BY id"#
.fetch_collect(&mut conn);
assert!(columns.is_empty(), "without fuzzy field should be empty");
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{
"fuzzy_term": {
"field": "description",
"value": "keybaord",
"transposition_cost_one": false,
"distance": 1,
"prefix": true
}
}'::jsonb ORDER BY id"#
.fetch_collect(&mut conn);
assert!(
columns.is_empty(),
"transposition_cost_one false should be empty"
);
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{
"fuzzy_term": {
"field": "description",
"value": "keybaord",
"transposition_cost_one": true,
"distance": 1,
"prefix": true
}
}'::jsonb ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(
columns.id,
vec![1, 2],
"incorrect transposition_cost_one true"
);
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{
"fuzzy_term": {
"field": "description",
"value": "keybaord",
"prefix": true
}
}'::jsonb ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.id, vec![1, 2], "incorrect defaults");
}
#[rstest]
fn single_queries(mut conn: PgConnection) {
SimpleProductsTable::setup().execute(&mut conn);
// All
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{"all": null}'::jsonb ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 41);
// Boost
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{"boost": {"query": {"all": null}, "factor": 1.5}}'::jsonb
ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 41);
// ConstScore
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search
WHERE bm25_search @@@ '{"const_score": {"query": {"all": null}, "score": 3.9}}'::jsonb
ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 41);
// DisjunctionMax
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{"disjunction_max": {"disjuncts": [{"parse": {"query_string": "description:shoes"}}]}}'::jsonb
ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 3);
// Empty
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@ '{"empty": null}'::jsonb ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 0);
// FuzzyTerm
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@ '{
"fuzzy_term": {
"field": "description",
"value": "wolo",
"transposition_cost_one": false,
"distance": 1,
"prefix": true
}
}'::jsonb ORDER BY ID"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 4);
// Parse
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{"parse": {"query_string": "description:teddy"}}'::jsonb ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 1);
// PhrasePrefix
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{"phrase_prefix": {"field": "description", "phrases": ["har"]}}'::jsonb
ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 1);
// Phrase with invalid term list
match r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{"phrase": {"field": "description", "phrases": ["robot"]}}'::jsonb
ORDER BY id"#
.fetch_result::(&mut conn)
{
Err(err) => assert!(err
.to_string()
.contains("required to have strictly more than one term")),
_ => panic!("phrase prefix query should require multiple terms"),
}
// Phrase
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@ '{
"phrase": {
"field": "description",
"phrases": ["robot", "building", "kit"]
}
}'::jsonb ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 1);
// Range
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{
"range": {
"field": "last_updated_date",
"lower_bound": {"included": "2023-05-01T00:00:00.000000Z"},
"upper_bound": {"included": "2023-05-03T00:00:00.000000Z"},
"is_datetime": true
}
}'::jsonb
ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 7);
// Regex
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@ '{
"regex": {
"field": "description",
"pattern": "(hardcover|plush|leather|running|wireless)"
}
}'::jsonb ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 5);
// Term
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search
WHERE bm25_search @@@ '{"term": {"field": "description", "value": "shoes"}}'::jsonb
ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 3);
//
// NB: This once worked, but the capability was removed when the new "pdb.*" builder functions
// were added. The general problem is that there's no longer a clean way to indicate
// the desire to "search all column"
//
// // Term with no field (should search all columns)
// let columns: SimpleProductsTableVec = r#"
// SELECT * FROM paradedb.bm25_search
// WHERE bm25_search @@@ '{"term": {"value": "shoes"}}'::jsonb ORDER BY id"#
// .fetch_collect(&mut conn);
// assert_eq!(columns.len(), 3);
// TermSet
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search
WHERE bm25_search @@@ '{
"term_set": {
"terms": [
{"field": "description", "value": "shoes", "is_datetime": false},
{"field": "description", "value": "novel", "is_datetime": false}
]
}
}'::jsonb ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 5);
}
#[rstest]
fn single_queries_jsonb_build_object(mut conn: PgConnection) {
SimpleProductsTable::setup().execute(&mut conn);
// All
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('all', null) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 41);
// Boost
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('boost', jsonb_build_object(
'query', jsonb_build_object('all', null),
'factor', 1.5)) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 41);
// ConstScore
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('const_score', jsonb_build_object(
'query', jsonb_build_object('all', null),
'score', 3.9)) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 41);
// DisjunctionMax
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('disjunction_max', jsonb_build_object(
'disjuncts', jsonb_build_array(
jsonb_build_object('parse', jsonb_build_object(
'query_string', 'description:shoes'))))) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 3);
// Empty
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('empty', null) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 0);
// FuzzyTerm
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('fuzzy_term', jsonb_build_object(
'field', 'description',
'value', 'wolo',
'transposition_cost_one', false,
'distance', 1,
'prefix', true)) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 4);
// Parse
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('parse', jsonb_build_object(
'query_string', 'description:teddy')) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 1);
// PhrasePrefix
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('phrase_prefix', jsonb_build_object(
'field', 'description',
'phrases', jsonb_build_array('har'))) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 1);
// Phrase with invalid term list
match r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('phrase', jsonb_build_object(
'field', 'description',
'phrases', jsonb_build_array('robot'))) ORDER BY id"#
.fetch_result::(&mut conn)
{
Err(err) => assert!(err
.to_string()
.contains("required to have strictly more than one term")),
_ => panic!("phrase prefix query should require multiple terms"),
}
// Phrase
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('phrase', jsonb_build_object(
'field', 'description',
'phrases', jsonb_build_array('robot', 'building', 'kit'))) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 1);
// Range
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('range', jsonb_build_object(
'field', 'last_updated_date',
'lower_bound', jsonb_build_object('included', '2023-05-01T00:00:00.000000Z'),
'upper_bound', jsonb_build_object('included', '2023-05-03T00:00:00.000000Z'),
'is_datetime', true)) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 7);
// Regex
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('regex', jsonb_build_object(
'field', 'description',
'pattern', '(hardcover|plush|leather|running|wireless)')) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 5);
// Term
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('term', jsonb_build_object(
'field', 'description',
'value', 'shoes')) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 3);
//
// NB: This once worked, but the capability was removed when the new "pdb.*" builder functions
// were added. The general problem is that there's no longer a clean way to indicate
// the desire to "search all column"
//
// // Term with no field (should search all columns)
// let columns: SimpleProductsTableVec = r#"
// SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
// jsonb_build_object('term', jsonb_build_object('value', 'shoes')) ORDER BY id"#
// .fetch_collect(&mut conn);
// assert_eq!(columns.len(), 3);
// TermSet
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
jsonb_build_object('term_set', jsonb_build_object(
'terms', jsonb_build_array(
jsonb_build_array('description', 'shoes', false),
jsonb_build_array('description', 'novel', false)))) ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 5);
}
#[rstest]
fn exists_query(mut conn: PgConnection) {
SimpleProductsTable::setup().execute(&mut conn);
// Simple exists query
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{"exists": {"field": "rating"}}'::jsonb
"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 41);
// Non fast field should fail
match r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{"exists": {"field": "description"}}'::jsonb
"#
.execute_result(&mut conn)
{
Err(err) => assert!(err.to_string().contains("not a fast field")),
_ => panic!("exists() over non-fast field should fail"),
}
// Exists with boolean query
"INSERT INTO paradedb.bm25_search (id, description, rating) VALUES (42, 'shoes', NULL)"
.execute(&mut conn);
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search WHERE bm25_search @@@
'{
"boolean": {
"must": [
{"exists": {"field": "rating"}},
{"parse": {"query_string": "description:shoes"}}
]
}
}'::jsonb
"#
.fetch_collect(&mut conn);
assert_eq!(columns.len(), 3);
}
#[rstest]
fn more_like_this_raw(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id SERIAL PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (flavour) VALUES
('apple'),
('banana'),
('cherry'),
('banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
// Missing keys should fail.
match r#"
SELECT id, flavour FROM test_more_like_this_table WHERE test_more_like_this_table @@@
'{"more_like_this": {}}'::jsonb;
"#
.fetch_result::<()>(&mut conn)
{
Err(err) => {
assert_eq!(err
.to_string()
, "error returned from database: more_like_this must be called with either document_id or document_fields")
}
_ => panic!("document_id or document_fields validation failed"),
}
// Conflicting keys should fail.
match r#"
SELECT id, flavour FROM test_more_like_this_table WHERE test_more_like_this_table @@@
'{"more_like_this": {
"document_id": 0,
"document_fields": [["flavour", "banana"]]
}}'::jsonb;
"#
.fetch_result::<()>(&mut conn)
{
Err(err) => {
assert_eq!(err
.to_string()
, "error returned from database: more_like_this must be called with only one of document_id or document_fields")
}
_ => panic!("document_id or document_fields validation failed"),
}
let rows: Vec<(i32, String)> = r#"
SELECT id, flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
let rows: Vec<(i32, String)> = r#"
SELECT id, flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_id": 2
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_empty(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id SERIAL PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (flavour) VALUES
('apple'),
('banana'),
('cherry'),
('banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
match r#"
SELECT id, flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{"more_like_this": {}}'::jsonb
ORDER BY id;
"#
.fetch_result::<()>(&mut conn)
{
Err(err) => {
assert_eq!(err
.to_string()
, "error returned from database: more_like_this must be called with either document_id or document_fields")
}
_ => panic!("document_id or document_fields validation failed"),
}
}
#[rstest]
fn more_like_this_text(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id SERIAL PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (flavour) VALUES
('apple'),
('banana'),
('cherry'),
('banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(i32, String)> = r#"
SELECT id, flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_boolean_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id BOOLEAN PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
(true, 'apple'),
(false, 'banana')
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(bool, String)> = r#"
SELECT id, flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 1);
}
#[rstest]
fn more_like_this_uuid_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id UUID PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
('f159c89e-2162-48cd-85e3-e42b71d2ecd0', 'apple'),
('38bf27a0-1aa8-42cd-9cb0-993025e0b8d0', 'banana'),
('b5faacc0-9eba-441a-81f8-820b46a3b57e', 'cherry'),
('eb833eb6-c598-4042-b84a-0045828fceea', 'banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(uuid::Uuid, String)> = r#"
SELECT id, flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_i64_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id BIGINT PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
(1, 'apple'),
(2, 'banana'),
(3, 'cherry'),
(4, 'banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(i64, String)> = r#"
SELECT id, flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_i32_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id INT PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
(1, 'apple'),
(2, 'banana'),
(3, 'cherry'),
(4, 'banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(i32, String)> = r#"
SELECT id, flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_i16_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id SMALLINT PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
(1, 'apple'),
(2, 'banana'),
(3, 'cherry'),
(4, 'banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(i16, String)> = r#"
SELECT id, flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_f32_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id FLOAT4 PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
(1.1, 'apple'),
(2.2, 'banana'),
(3.3, 'cherry'),
(4.4, 'banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(f32, String)> = r#"
SELECT id, flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_f64_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id FLOAT8 PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
(1.1, 'apple'),
(2.2, 'banana'),
(3.3, 'cherry'),
(4.4, 'banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(f64, String)> = r#"
SELECT id, flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_literal_cast(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id INT PRIMARY KEY,
year INTEGER
);
INSERT INTO test_more_like_this_table (id, year) VALUES
(1, 2012),
(2, 2013),
(3, 2014),
(4, 2012);
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, year)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(i32, i32)> = r#"
SELECT id, year FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [
["year", 2012]
]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_numeric_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id NUMERIC PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
(1.1, 'apple'),
(2.2, 'banana'),
(3.3, 'cherry'),
(4.4, 'banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(f64, String)> = r#"
SELECT CAST(id AS FLOAT8), flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_date_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id DATE PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
('2023-05-03', 'apple'),
('2023-05-04', 'banana'),
('2023-05-05', 'cherry'),
('2023-05-06', 'banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(String, String)> = r#"
SELECT CAST(id AS TEXT), flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_time_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id TIME PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
('08:09:10', 'apple'),
('09:10:11', 'banana'),
('10:11:12', 'cherry'),
('11:12:13', 'banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(String, String)> = r#"
SELECT CAST(id AS TEXT), flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_timestamp_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id TIMESTAMP PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
('2023-05-03 08:09:10', 'apple'),
('2023-05-04 09:10:11', 'banana'),
('2023-05-05 10:11:12', 'cherry'),
('2023-05-06 11:12:13', 'banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(String, String)> = r#"
SELECT CAST(id AS TEXT), flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_timestamptz_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id TIMESTAMP WITH TIME ZONE PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
('2023-05-03 08:09:10 EST', 'apple'),
('2023-05-04 09:10:11 PST', 'banana'),
('2023-05-05 10:11:12 MST', 'cherry'),
('2023-05-06 11:12:13 CST', 'banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(String, String)> = r#"
SELECT CAST(id AS TEXT), flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn more_like_this_timetz_key(mut conn: PgConnection) {
r#"
CREATE TABLE test_more_like_this_table (
id TIME WITH TIME ZONE PRIMARY KEY,
flavour TEXT
);
INSERT INTO test_more_like_this_table (id, flavour) VALUES
('08:09:10 EST', 'apple'),
('09:10:11 PST', 'banana'),
('10:11:12 MST', 'cherry'),
('11:12:13 CST', 'banana split');
"#
.execute(&mut conn);
r#"
CREATE INDEX test_more_like_this_index ON test_more_like_this_table USING bm25 (id, flavour)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(String, String)> = r#"
SELECT CAST(id AS TEXT), flavour FROM test_more_like_this_table
WHERE test_more_like_this_table @@@ '{
"more_like_this": {
"min_doc_frequency": 0,
"min_term_frequency": 0,
"document_fields": [["flavour", "banana"]]
}
}'::jsonb ORDER BY id;
"#
.fetch_collect(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn match_query(mut conn: PgConnection) {
SimpleProductsTable::setup().execute(&mut conn);
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search
WHERE bm25_search @@@ '{
"match": {
"field": "description",
"value": "ruling shoeez",
"distance": 2
}
}'::jsonb
ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.id, vec![3, 4, 5]);
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search
WHERE bm25_search @@@ '{
"match": {
"field": "description",
"value": "ruling shoeez",
"distance": 2,
"conjunction_mode": true
}
}'::jsonb ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.id, vec![3]);
let columns: SimpleProductsTableVec = r#"
SELECT * FROM paradedb.bm25_search
WHERE bm25_search @@@ '{
"match": {
"field": "description",
"value": "ruling shoeez",
"distance": 1
}
}'::jsonb
ORDER BY id"#
.fetch_collect(&mut conn);
assert_eq!(columns.id.len(), 0);
}
#[rstest]
fn range_term(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'deliveries',
table_type => 'Deliveries'
);
CREATE INDEX deliveries_idx ON deliveries
USING bm25 (delivery_id, weights, quantities, prices, ship_dates, facility_arrival_times, delivery_times)
WITH (key_field='delivery_id');
"#
.execute(&mut conn);
// int4range
let expected: Vec<(i32,)> =
"SELECT delivery_id FROM deliveries WHERE weights @> 1 ORDER BY delivery_id"
.fetch(&mut conn);
let result: Vec<(i32,)> = r#"SELECT delivery_id FROM deliveries WHERE delivery_id @@@ '{"range_term": {"field": "weights", "value": 1}}'::jsonb ORDER BY delivery_id"#.fetch(&mut conn);
assert_eq!(result, expected);
let expected: Vec<(i32,)> =
"SELECT delivery_id FROM deliveries WHERE weights @> 13 ORDER BY delivery_id"
.fetch(&mut conn);
let result: Vec<(i32,)> = r#"SELECT delivery_id FROM deliveries WHERE delivery_id @@@ '{"range_term": {"field": "weights", "value": 13}}'::jsonb ORDER BY delivery_id"#.fetch(&mut conn);
assert_eq!(result, expected);
// int8range
let expected: Vec<(i32,)> =
"SELECT delivery_id FROM deliveries WHERE quantities @> 17000::int8 ORDER BY delivery_id"
.fetch(&mut conn);
let result: Vec<(i32,)> = r#"SELECT delivery_id FROM deliveries WHERE delivery_id @@@ '{"range_term": {"field": "quantities", "value": 17000}}'::jsonb ORDER BY delivery_id"#.fetch(&mut conn);
assert_eq!(result, expected);
// numrange
let expected: Vec<(i32,)> =
"SELECT delivery_id FROM deliveries WHERE prices @> 3.5 ORDER BY delivery_id"
.fetch(&mut conn);
let result: Vec<(i32,)> = r#"SELECT delivery_id FROM deliveries WHERE delivery_id @@@ '{"range_term": {"field": "prices", "value": 3.5}}'::jsonb ORDER BY delivery_id"#.fetch(&mut conn);
assert_eq!(result, expected);
// daterange
let expected: Vec<(i32,)> = "SELECT delivery_id FROM deliveries WHERE ship_dates @> '2023-03-07'::date ORDER BY delivery_id".fetch(&mut conn);
let result: Vec<(i32,)> = r#"SELECT delivery_id FROM deliveries WHERE delivery_id @@@ '{"range_term": {"field": "ship_dates", "value": "2023-03-07T00:00:00.000000Z", "is_datetime": true}}'::jsonb ORDER BY delivery_id"#.fetch(&mut conn);
assert_eq!(result, expected);
let expected: Vec<(i32,)> = "SELECT delivery_id FROM deliveries WHERE ship_dates @> '2023-03-06'::date ORDER BY delivery_id".fetch(&mut conn);
let result: Vec<(i32,)> = r#"SELECT delivery_id FROM deliveries WHERE delivery_id @@@ '{"range_term": {"field": "ship_dates", "value": "2023-03-06T00:00:00.000000Z", "is_datetime": true}}'::jsonb ORDER BY delivery_id"#.fetch(&mut conn);
assert_eq!(result, expected);
// tsrange
let expected: Vec<(i32,)> = "SELECT delivery_id FROM deliveries WHERE facility_arrival_times @> '2024-05-01 14:00:00'::timestamp ORDER BY delivery_id".fetch(&mut conn);
let result: Vec<(i32,)> = r#"SELECT delivery_id FROM deliveries WHERE delivery_id @@@ '{"range_term": {"field": "facility_arrival_times", "value": "2024-05-01T14:00:00.000000Z", "is_datetime": true}}'::jsonb ORDER BY delivery_id"#.fetch(&mut conn);
assert_eq!(result, expected);
let expected: Vec<(i32,)> = "SELECT delivery_id FROM deliveries WHERE facility_arrival_times @> '2024-05-01 15:00:00'::timestamp ORDER BY delivery_id".fetch(&mut conn);
let result: Vec<(i32,)> = r#"SELECT delivery_id FROM deliveries WHERE delivery_id @@@ '{"range_term": {"field": "facility_arrival_times", "value": "2024-05-01T15:00:00.000000Z", "is_datetime": true}}'::jsonb ORDER BY delivery_id"#.fetch(&mut conn);
assert_eq!(result, expected);
// tstzrange
let expected: Vec<(i32,)> = "SELECT delivery_id FROM deliveries WHERE delivery_times @> '2024-05-01 06:31:00-04'::timestamptz ORDER BY delivery_id".fetch(&mut conn);
let result: Vec<(i32,)> = r#"SELECT delivery_id FROM deliveries WHERE delivery_id @@@ '{"range_term": {"field": "delivery_times", "value": "2024-05-01T10:31:00.000000Z", "is_datetime": true}}'::jsonb ORDER BY delivery_id"#.fetch(&mut conn);
assert_eq!(result, expected);
let expected: Vec<(i32,)> = "SELECT delivery_id FROM deliveries WHERE delivery_times @> '2024-05-01T11:30:00Z'::timestamptz ORDER BY delivery_id".fetch(&mut conn);
let result: Vec<(i32,)> = r#"SELECT delivery_id FROM deliveries WHERE delivery_id @@@ '{"range_term": {"field": "delivery_times", "value": "2024-05-01T11:30:00.000000Z", "is_datetime": true}}'::jsonb ORDER BY delivery_id"#.fetch(&mut conn);
assert_eq!(result, expected);
}
#[rstest]
fn parse_error(mut conn: PgConnection) {
SimpleProductsTable::setup().execute(&mut conn);
let result = r#"
SELECT id FROM paradedb.bm25_search WHERE bm25_search @@@
'{"all": {}}'::jsonb ORDER BY id"#
.fetch_result::<(i32,)>(&mut conn);
match result {
Err(err) => assert_eq!(
err.to_string(),
r#"error returned from database: error parsing search query input json at ".": data did not match any variant of untagged enum SearchQueryInput"#
),
_ => {
panic!("search input query variant with no fields should not be able to receive a map")
}
}
}