// Copyright (c) 2023-2025 ParadeDB, Inc.
//
// This file is part of ParadeDB - Postgres for Search and Analytics
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see .
mod fixtures;
use approx::assert_relative_eq;
use fixtures::*;
use num_traits::ToPrimitive;
use pgvector::Vector;
use rstest::*;
use sqlx::types::BigDecimal;
use sqlx::PgConnection;
use std::str::FromStr;
#[rstest]
fn quickstart(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
)
"#
.execute(&mut conn);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
LIMIT 3;
"#
.fetch(&mut conn);
assert_eq!(
rows,
vec![
("Ergonomic metal keyboard".into(), 4, "Electronics".into()),
("Plastic Keyboard".into(), 4, "Electronics".into()),
("Sleek running shoes".into(), 5, "Footwear".into())
]
);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, in_stock, created_at, metadata, weight_range)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes' OR category @@@ 'footwear' AND rating @@@ '>2'
ORDER BY description
LIMIT 5"#
.fetch(&mut conn);
assert_eq!(rows[0].0, "Comfortable slippers".to_string());
assert_eq!(rows[1].0, "Generic shoes".to_string());
assert_eq!(rows[2].0, "Sleek running shoes".to_string());
assert_eq!(rows[3].0, "Sturdy hiking boots".to_string());
assert_eq!(rows[4].0, "White jogging shoes".to_string());
let rows: Vec<(String, i32, String, f32)> = r#"
SELECT description, rating, category, paradedb.score(id)
FROM mock_items
WHERE description @@@ 'shoes' OR category @@@ 'footwear' AND rating @@@ '>2'
ORDER BY score DESC, description
LIMIT 5"#
.fetch(&mut conn);
assert_eq!(rows[0].0, "Generic shoes".to_string());
assert_eq!(rows[1].0, "Sleek running shoes".to_string());
assert_eq!(rows[2].0, "White jogging shoes".to_string());
assert_eq!(rows[3].0, "Comfortable slippers".to_string());
// The BM25 score here is a tie, so the order is arbitrary
assert!(rows[4].0 == *"Sturdy hiking boots" || rows[4].0 == *"Winter woolen socks");
assert_eq!(rows[0].3, 5.8135376);
assert_eq!(rows[1].3, 5.4211845);
assert_eq!(rows[2].3, 5.4211845);
assert_eq!(rows[3].3, 2.9362776);
assert_eq!(rows[4].3, 2.9362776);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ '"white shoes"~1'
LIMIT 5"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0].0, "White jogging shoes");
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'orders',
table_type => 'Orders'
);
ALTER TABLE orders
ADD CONSTRAINT foreign_key_product_id
FOREIGN KEY (product_id)
REFERENCES mock_items(id);
CREATE INDEX orders_idx ON orders
USING bm25 (order_id, customer_name)
WITH (key_field='order_id');
"#
.execute(&mut conn);
let rows: Vec<(i32, i32, i32, BigDecimal, String)> = r#"
SELECT * FROM orders LIMIT 3"#
.fetch(&mut conn);
assert_eq!(
rows,
vec![
(
1,
1,
3,
BigDecimal::from_str("99.99").unwrap(),
"John Doe".into()
),
(
2,
2,
1,
BigDecimal::from_str("49.99").unwrap(),
"Jane Smith".into()
),
(
3,
3,
5,
BigDecimal::from_str("249.95").unwrap(),
"Alice Johnson".into()
),
]
);
let rows: Vec<(i32, String, String)> = r#"
SELECT o.order_id, o.customer_name, m.description
FROM orders o
JOIN mock_items m ON o.product_id = m.id
WHERE o.customer_name @@@ 'Johnson' AND m.description @@@ 'shoes'
ORDER BY order_id
LIMIT 5
"#
.fetch(&mut conn);
assert_eq!(
rows,
vec![
(3, "Alice Johnson".into(), "Sleek running shoes".into()),
(6, "Alice Johnson".into(), "White jogging shoes".into()),
(36, "Alice Johnson".into(), "White jogging shoes".into()),
]
);
r#"
CREATE EXTENSION vector;
ALTER TABLE mock_items ADD COLUMN embedding vector(3);
"#
.execute(&mut conn);
r#"
UPDATE mock_items m
SET embedding = ('[' ||
((m.id + 1) % 10 + 1)::integer || ',' ||
((m.id + 2) % 10 + 1)::integer || ',' ||
((m.id + 3) % 10 + 1)::integer || ']')::vector;
"#
.execute(&mut conn);
let rows: Vec<(String, i32, String, Vector)> = r#"
SELECT description, rating, category, embedding
FROM mock_items LIMIT 3;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
assert_eq!(rows[0].0, "Ergonomic metal keyboard");
assert_eq!(rows[1].0, "Plastic Keyboard");
assert_eq!(rows[2].0, "Sleek running shoes");
assert_eq!(rows[0].3, Vector::from(vec![3.0, 4.0, 5.0]));
assert_eq!(rows[1].3, Vector::from(vec![4.0, 5.0, 6.0]));
assert_eq!(rows[2].3, Vector::from(vec![5.0, 6.0, 7.0]));
r#"
CREATE INDEX on mock_items
USING hnsw (embedding vector_cosine_ops);
"#
.execute(&mut conn);
let rows: Vec<(String, String, i32, Vector)> = r#"
SELECT description, category, rating, embedding
FROM mock_items
ORDER BY embedding <=> '[1,2,3]', description
LIMIT 3;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
assert_eq!(rows[0].0, "Artistic ceramic vase");
assert_eq!(rows[1].0, "Designer wall paintings");
assert_eq!(rows[2].0, "Handcrafted wooden frame");
assert_eq!(rows[0].3, Vector::from(vec![1.0, 2.0, 3.0]));
assert_eq!(rows[1].3, Vector::from(vec![1.0, 2.0, 3.0]));
assert_eq!(rows[2].3, Vector::from(vec![1.0, 2.0, 3.0]));
}
#[rstest]
fn full_text_search(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, in_stock, created_at, metadata)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(i32, String, i32)> = r#"
SELECT id, description, rating
FROM mock_items
WHERE description @@@ 'shoes' AND rating @@@ '>3'
ORDER BY id
"#
.fetch(&mut conn);
assert_eq!(rows[0], (3, "Sleek running shoes".into(), 5));
assert_eq!(rows[1], (5, "Generic shoes".into(), 4));
// Basic term
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
// Multiple terms
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'keyboard' OR category @@@ 'toy'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
// Not term
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ '(shoes running -white)'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
// Basic phrase
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ '"plastic keyboard"'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
// Slop operator
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ '"ergonomic keyboard"~1'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
// Phrase prefix
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ '"plastic keyb"*'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
// Basic filtering
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes' AND rating > 2
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes' AND rating @@@ '>2'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
// Numeric filter
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes' AND rating @@@ '4'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes' AND rating @@@ '>=4'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
// Datetime filter
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes' AND created_at @@@ '"2023-04-20T16:38:02Z"'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
// Boolean filter
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes' AND in_stock @@@ 'true'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
// Range filter
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes' AND rating @@@ '[1 TO 4]'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes' AND created_at @@@ '[2020-01-31T00:00:00Z TO 2024-01-31T00:00:00Z]'
"#.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ '[book TO camera]'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 8);
// Set filter
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes' AND rating @@@ 'IN [2 3 4]'
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
// Pagination
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes'
LIMIT 1 OFFSET 2
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
// BM25 scoring
let rows: Vec<(i32, f32)> = r#"
SELECT id, paradedb.score(id)
FROM mock_items
WHERE description @@@ 'shoes'
LIMIT 5
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'orders',
table_type => 'Orders'
);
ALTER TABLE orders
ADD CONSTRAINT foreign_key_product_id
FOREIGN KEY (product_id)
REFERENCES mock_items(id);
CREATE INDEX orders_idx ON orders
USING bm25 (order_id, customer_name)
WITH (key_field='order_id');
"#
.execute(&mut conn);
let rows: Vec<(i32, f32)> = r#"
SELECT o.order_id, paradedb.score(o.order_id) + paradedb.score(m.id) as score
FROM orders o
JOIN mock_items m ON o.product_id = m.id
WHERE o.customer_name @@@ 'Johnson' AND (m.description @@@ 'shoes' OR m.description @@@ 'running')
ORDER BY score DESC, o.order_id
LIMIT 5
"#
.fetch(&mut conn);
assert_eq!(rows, vec![(3, 8.738735), (6, 5.406531), (36, 5.406531)]);
// Highlighting
let rows: Vec<(i32, String)> = r#"
SELECT id, paradedb.snippet(description)
FROM mock_items
WHERE description @@@ 'shoes'
LIMIT 5
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(i32, String)> = r#"
SELECT id, paradedb.snippet(description, start_tag => '', end_tag => '')
FROM mock_items
WHERE description @@@ 'shoes'
LIMIT 5
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
assert!(rows[0].1.contains(""));
assert!(rows[0].1.contains(""));
// Order by score
let rows: Vec<(String, i32, String, f32)> = r#"
SELECT description, rating, category, paradedb.score(id)
FROM mock_items
WHERE description @@@ 'shoes'
ORDER BY score DESC
LIMIT 5
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
assert_eq!(rows[0].3, 2.8772602);
assert_eq!(rows[1].3, 2.4849067);
assert_eq!(rows[2].3, 2.4849067);
// Order by field
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes'
ORDER BY rating DESC
LIMIT 5
"#
.fetch(&mut conn);
assert_eq!(
rows,
vec![
("Sleek running shoes".into(), 5, "Footwear".into()),
("Generic shoes".into(), 4, "Footwear".into()),
("White jogging shoes".into(), 3, "Footwear".into()),
]
);
// Tiebreaking
let rows: Vec<(String, i32, String, f32)> = r#"
SELECT description, rating, category, paradedb.score(id)
FROM mock_items
WHERE category @@@ 'electronics'
ORDER BY score DESC, rating DESC
LIMIT 5
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 5);
assert_eq!(rows[0].3, 2.1096356);
assert_eq!(rows[1].3, 2.1096356);
assert_eq!(rows[2].3, 2.1096356);
assert_eq!(rows[3].3, 2.1096356);
assert_eq!(rows[4].3, 2.1096356);
// Constant boosting
let rows: Vec<(i32, f32)> = r#"
SELECT id, paradedb.score(id)
FROM mock_items
WHERE description @@@ 'shoes^2' OR category @@@ 'footwear'
ORDER BY score DESC
LIMIT 5
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 5);
assert_eq!(rows[0].1, 7.690798);
assert_eq!(rows[1].1, 6.9060907);
assert_eq!(rows[2].1, 6.9060907);
assert_eq!(rows[3].1, 1.9362776);
assert_eq!(rows[4].1, 1.9362776);
// Boost by field
let rows: Vec<(i32, f64)> = r#"
SELECT id, paradedb.score(id) * COALESCE(rating, 1) as score
FROM mock_items
WHERE description @@@ 'shoes'
ORDER BY score DESC
LIMIT 5
"#
.fetch(&mut conn);
assert_eq!(
rows,
vec![
(3, 12.424533367156982),
(5, 11.509040832519531),
(4, 7.4547200202941895),
]
);
}
#[rstest]
fn match_query(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, in_stock, created_at, metadata, weight_range)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.match('description', 'running shoes');
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"match": {
"field": "description",
"value": "running shoes"
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.match(
'description',
'running shoes',
tokenizer => paradedb.tokenizer('whitespace')
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"match": {
"field": "description",
"value": "running shoes",
"tokenizer": {"type": "whitespace", "lowercase": true, "remove_long": 255}
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.match('description', 'ruining shoez', distance => 2);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"match": {
"field": "description",
"value": "ruining shoez",
"distance": 2
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.match('description', 'running shoes', conjunction_mode => true);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"match": {
"field": "description",
"value": "running shoes",
"conjunction_mode": true
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
}
#[rstest]
fn term_level_queries(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, in_stock, created_at, metadata, weight_range)
WITH (key_field='id');
"#
.execute(&mut conn);
// Exists
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.exists('rating')
LIMIT 5;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 5);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"exists": {
"field": "rating"
}
}'::jsonb
LIMIT 5;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 5);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.boolean(
must => ARRAY[
paradedb.term('description', 'shoes'),
paradedb.exists('rating')
]
)
LIMIT 5;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"boolean": {
"must": [
{"term": {"field": "description", "value": "shoes"}},
{"exists": {"field": "rating"}}
]
}
}'::jsonb
LIMIT 5;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
// Fuzzy term
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.fuzzy_term('description', 'shoez')
LIMIT 5;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"fuzzy_term": {
"field": "description",
"value": "shoez"
}
}'::jsonb
LIMIT 5;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
// Range
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.range(
field => 'rating',
range => int4range(1, 3, '[)')
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 4);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"range": {
"field": "rating",
"lower_bound": {"included": 1},
"upper_bound": {"excluded": 3}
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 4);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.range(
field => 'rating',
range => int4range(1, 3, '[]')
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 13);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"range": {
"field": "rating",
"lower_bound": {"included": 1},
"upper_bound": {"included": 3}
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 13);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.range(
field => 'rating',
range => int4range(1, NULL, '[)')
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 41);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"range": {
"field": "rating",
"lower_bound": {"included": 1},
"upper_bound": null
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 41);
// Range term
let rows: Vec<(i32,)> = r#"
SELECT id, weight_range FROM mock_items
WHERE id @@@ paradedb.range_term('weight_range', 1);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 16);
let rows: Vec<(i32,)> = r#"
SELECT id, weight_range FROM mock_items
WHERE id @@@
'{
"range_term": {
"field": "weight_range",
"value": 1
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 16);
let rows: Vec<(i32,)> = r#"
SELECT id, description, category, weight_range FROM mock_items
WHERE id @@@ paradedb.boolean(
must => ARRAY[
paradedb.range_term('weight_range', 1),
paradedb.term('category', 'footwear')
]
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
let rows: Vec<(i32,)> = r#"
SELECT id, description, category, weight_range FROM mock_items
WHERE id @@@
'{
"boolean": {
"must": [
{
"range_term": {
"field": "weight_range",
"value": 1
}
},
{
"term": {
"field": "category",
"value": "footwear"
}
}
]
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
let rows: Vec<(i32,)> = r#"
SELECT id, weight_range FROM mock_items
WHERE id @@@ paradedb.range_term('weight_range', '(10, 12]'::int4range, 'Intersects');
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 6);
let rows: Vec<(i32,)> = r#"
SELECT id, weight_range FROM mock_items
WHERE id @@@
'{
"range_intersects": {
"field": "weight_range",
"lower_bound": {"excluded": 10},
"upper_bound": {"included": 12}
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 6);
let rows: Vec<(i32,)> = r#"
SELECT id, weight_range FROM mock_items
WHERE id @@@ paradedb.range_term('weight_range', '(3, 9]'::int4range, 'Contains');
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 7);
let rows: Vec<(i32,)> = r#"
SELECT id, weight_range FROM mock_items
WHERE id @@@
'{
"range_contains": {
"field": "weight_range",
"lower_bound": {"excluded": 3},
"upper_bound": {"included": 9}
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 7);
let rows: Vec<(i32,)> = r#"
SELECT id, weight_range FROM mock_items
WHERE id @@@ paradedb.range_term('weight_range', '(2, 11]'::int4range, 'Within');
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 6);
let rows: Vec<(i32,)> = r#"
SELECT id, weight_range FROM mock_items
WHERE id @@@
'{
"range_within": {
"field": "weight_range",
"lower_bound": {"excluded": 2},
"upper_bound": {"included": 11}
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 6);
// Regex
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.regex('description', '(plush|leather)');
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"regex": {
"field": "description",
"pattern": "(plush|leather)"
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.regex('description', 'key.*rd');
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"regex": {
"field": "description",
"pattern": "key.*rd"
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
// Term
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.term('description', 'shoes');
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"term": {
"field": "description",
"value": "shoes"
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.term('rating', 4);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 16);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"term": {
"field": "rating",
"value": 4
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 16);
// Term set
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.term_set(
terms => ARRAY[
paradedb.term('description', 'shoes'),
paradedb.term('description', 'novel')
]
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 5);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"term_set": {
"terms": [
{"field": "description", "value": "shoes"},
{"field": "description", "value": "novel"}
]
}
}'::jsonb ORDER BY id;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 5);
}
#[rstest]
fn phrase_level_queries(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, in_stock, created_at, metadata)
WITH (key_field='id');
"#
.execute(&mut conn);
// Phrase
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.phrase(
field => 'description',
phrases => ARRAY['running', 'shoes']
)"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"phrase": {
"field": "description",
"phrases": ["running", "shoes"]
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.phrase('description', ARRAY['sleek', 'shoes'], slop => 1);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"phrase": {
"field": "description",
"phrases": ["sleek", "shoes"],
"slop": 1
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
// Test both function and JSON syntax for phrase_prefix
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.phrase_prefix('description', ARRAY['running', 'sh'])
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"phrase_prefix": {
"field": "description",
"phrases": ["running", "sh"]
}
}'::jsonb
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
// Regex phrase
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.regex_phrase('description', ARRAY['run.*', 'shoe.*'])
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.regex_phrase('description', ARRAY['run.*', 'sh.*'])
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
}
#[rstest]
fn json_queries(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
UPDATE mock_items
SET metadata = '{"attributes": {"score": 3, "tstz": "2023-05-01T08:12:34Z"}}'::jsonb
WHERE id = 1;
UPDATE mock_items
SET metadata = '{"attributes": {"score": 4, "tstz": "2023-05-01T09:12:34Z"}}'::jsonb
WHERE id = 2;
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, in_stock, created_at, metadata)
WITH (key_field='id', json_fields='{"metadata": {"fast": true}}');
"#
.execute(&mut conn);
// Term
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.term('metadata.color', 'white')
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"term": {
"field": "metadata.color",
"value": "white"
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
// Datetime Handling
let rows: Vec<(i32,)> = r#"
SELECT id FROM mock_items WHERE mock_items @@@ '{
"range": {
"field": "metadata.attributes.tstz",
"lower_bound": {"included": "2023-05-01T08:12:34Z"},
"upper_bound": null,
"is_datetime": true
}
}'::jsonb
ORDER BY id;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn json_arrays(mut conn: PgConnection) {
//
// 1) Create the mock_items test table with ParadeDB helper
//
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
"#
.execute(&mut conn);
//
// 2) Insert some JSON arrays so we can test array-flattening
//
r#"
UPDATE mock_items
SET metadata = '{"colors": ["red", "green", "blue"]}'::jsonb
WHERE id = 1;
UPDATE mock_items
SET metadata = '{"colors": ["red", "yellow"]}'::jsonb
WHERE id = 2;
UPDATE mock_items
SET metadata = '{"colors": ["blue", "purple"]}'::jsonb
WHERE id = 3;
"#
.execute(&mut conn);
//
// 3) Create an index that includes metadata
//
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, metadata)
WITH (key_field='id');
"#
.execute(&mut conn);
//
// 4) Query via function syntax
//
let rows: Vec<(String, serde_json::Value)> = r#"
SELECT description, metadata
FROM mock_items
WHERE id @@@ paradedb.term('metadata.colors', 'blue')
OR id @@@ paradedb.term('metadata.colors', 'red');
"#
.fetch(&mut conn);
// We expect these three rows to match IDs 1, 2, and/or 3
assert_eq!(rows.len(), 3);
//
// 5) Query via JSON syntax
//
let rows2: Vec<(String, serde_json::Value)> = r#"
SELECT description, metadata
FROM mock_items
WHERE id @@@
'{
"term": {
"field": "metadata.colors",
"value": "blue"
}
}'::jsonb
OR id @@@
'{
"term": {
"field": "metadata.colors",
"value": "red"
}
}'::jsonb;
"#
.fetch(&mut conn);
// Same three rows should appear
assert_eq!(rows2.len(), 3);
}
#[rstest]
fn custom_enum(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
CREATE TYPE color AS ENUM ('red', 'green', 'blue');
ALTER TABLE mock_items ADD COLUMN color color;
INSERT INTO mock_items (color) VALUES ('red'), ('green'), ('blue');
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, color, in_stock, created_at, metadata)
WITH (key_field='id');
"#
.execute(&mut conn);
// Term
let rows: Vec<(Option, Option, Option)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.term('color', 'red'::color);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(Option, Option, Option)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"term": {
"field": "color",
"value": 1.0
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
// Parse
let rows: Vec<(Option, Option, Option)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.parse('color:1.0');
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(Option, Option, Option)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"parse": {
"query_string": "color:1.0"
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
}
#[rstest]
fn compound_queries(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, in_stock, created_at, metadata)
WITH (key_field='id');
"#
.execute(&mut conn);
// Overview
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.boolean(
should => ARRAY[
paradedb.boost(query => paradedb.term('description', 'shoes'), factor => 2.0),
paradedb.term('description', 'running')
]
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"boolean": {
"should": [
{"boost": {"query": {"term": {"field": "description", "value": "shoes"}}, "factor": 2.0}},
{"term": {"field": "description", "value": "running"}}
]
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
// All
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.boolean(
should => ARRAY[paradedb.all()],
must_not => ARRAY[paradedb.term('description', 'shoes')]
)
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 38);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"boolean": {
"should": [{"all": null}],
"must_not": [{"term": {"field": "description", "value": "shoes"}}]
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 38);
// Boolean
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.boolean(
should => ARRAY[
paradedb.term('description', 'headphones')
],
must => ARRAY[
paradedb.term('category', 'electronics'),
paradedb.fuzzy_term('description', 'bluetooht')
],
must_not => ARRAY[
paradedb.range('rating', int4range(NULL, 2, '()'))
]
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.boolean(
should => ARRAY[
paradedb.term('description', 'headphones')
],
must => ARRAY[
paradedb.term('category', 'electronics'),
paradedb.fuzzy_term('description', 'bluetooht')
],
must_not => ARRAY[
paradedb.range('rating', int4range(NULL, 2, '()'))
]
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
// Boost
let rows: Vec<(String, i32, String, f32)> = r#"
SELECT description, rating, category, paradedb.score(id)
FROM mock_items
WHERE id @@@ paradedb.boolean(
should => ARRAY[
paradedb.term('description', 'shoes'),
paradedb.boost(2.0, paradedb.term('description', 'running'))
]
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String, f32)> = r#"
SELECT description, rating, category, paradedb.score(id)
FROM mock_items
WHERE id @@@
'{
"boolean": {
"should": [
{"term": {"field": "description", "value": "shoes"}},
{"boost": {"factor": 2.0, "query": {"term": {"field": "description", "value": "running"}}}}
]
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
// Const score
let rows: Vec<(String, i32, String, f32)> = r#"
SELECT description, rating, category, paradedb.score(id)
FROM mock_items
WHERE id @@@ paradedb.boolean(
should => ARRAY[
paradedb.const_score(1.0, paradedb.term('description', 'shoes')),
paradedb.term('description', 'running')
]
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String, f32)> = r#"
SELECT description, rating, category, paradedb.score(id)
FROM mock_items
WHERE id @@@
'{
"boolean": {
"should": [
{"const_score": {"score": 1.0, "query": {"term": {"field": "description", "value": "shoes"}}}},
{"term": {"field": "description", "value": "running"}}
]
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
// Disjunction max
// Test both function and JSON syntax for disjunction_max
let rows: Vec<(String, i32, String, f32)> = r#"
SELECT description, rating, category, paradedb.score(id)
FROM mock_items
WHERE id @@@ paradedb.disjunction_max(ARRAY[
paradedb.term('description', 'shoes'),
paradedb.term('description', 'running')
]);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String, f32)> = r#"
SELECT description, rating, category, paradedb.score(id)
FROM mock_items
WHERE id @@@
'{
"disjunction_max": {
"disjuncts": [
{"term": {"field": "description", "value": "shoes"}},
{"term": {"field": "description", "value": "running"}}
]
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String, f32)> = r#"
SELECT description, rating, category, paradedb.score(id)
FROM mock_items
WHERE id @@@
'{
"disjunction_max": {
"disjuncts": [
{"term": {"field": "description", "value": "shoes"}},
{"term": {"field": "description", "value": "running"}}
]
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
// Empty
let rows: Vec<(String, i32, String, f32)> = r#"
-- Returns no rows
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.empty();
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 0);
let rows: Vec<(String, i32, String, f32)> = r#"
-- Returns no rows
SELECT description, rating, category
FROM mock_items
WHERE id @@@ '{"empty": null}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 0);
// Parse
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.parse('description:"running shoes" OR category:footwear');
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 6);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.boolean(should => ARRAY[
paradedb.phrase('description', ARRAY['running', 'shoes']),
paradedb.term('category', 'footwear')
]);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 6);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ '{
"parse": {"query_string": "description:\"running shoes\" OR category:footwear"}
}'::jsonb
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 6);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ '{
"boolean": {
"should": [
{
"phrase": {
"field": "description",
"phrases": ["running", "shoes"]
}
},
{
"term": {
"field": "category",
"value": "footwear"
}
}
]
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 6);
// Lenient parse
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.parse('speaker electronics', lenient => true);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 5);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"parse": {
"query_string": "speaker electronics",
"lenient": true
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 5);
// Conjunction mode
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.parse('description:speaker category:electronics');
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 5);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.parse('description:speaker OR category:electronics');
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 5);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"parse": {
"query_string": "description:speaker category:electronics"
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 5);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.parse(
'description:speaker category:electronics',
conjunction_mode => true
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.parse(
'description:speaker AND category:electronics'
)"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"parse": {
"query_string": "description:speaker category:electronics",
"conjunction_mode": true
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"parse": {
"query_string": "description:speaker AND category:electronics"
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
// Parse with field
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.parse_with_field(
'description',
'speaker bluetooth',
conjunction_mode => true
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"parse_with_field": {
"field": "description",
"query_string": "speaker bluetooth",
"conjunction_mode": true
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
}
#[rstest]
fn specialized_queries(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, in_stock, created_at, metadata)
WITH (key_field='id');
"#
.execute(&mut conn);
// More like this
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.more_like_this(
document_id => 3,
min_term_frequency => 1
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 16);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.more_like_this(
document_fields => '{"description": "shoes"}',
min_doc_frequency => 0,
max_doc_frequency => 100,
min_term_frequency => 1
);
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"more_like_this": {
"document_id": 3,
"min_term_frequency": 1
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 16);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@
'{
"more_like_this": {
"document_fields": [["description", "shoes"]],
"min_doc_frequency": 0,
"max_doc_frequency": 100,
"min_term_frequency": 1
}
}'::jsonb;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 3);
}
#[rstest]
fn autocomplete(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, in_stock, created_at, metadata)
WITH (key_field='id');
"#
.execute(&mut conn);
let expected = vec![
("Sleek running shoes".into(), 5, "Footwear".into()),
("Generic shoes".into(), 4, "Footwear".into()),
("White jogging shoes".into(), 3, "Footwear".into()),
];
// Fuzzy term
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category FROM mock_items
WHERE id @@@ paradedb.fuzzy_term(
field => 'description',
value => 'shoez'
) ORDER BY rating DESC
"#
.fetch(&mut conn);
assert_eq!(rows, expected);
// Match
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category FROM mock_items
WHERE id @@@ paradedb.match(
field => 'description',
value => 'ruining shoez',
distance => 2
) ORDER BY rating DESC
"#
.fetch(&mut conn);
assert_eq!(rows, expected);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category FROM mock_items
WHERE id @@@ paradedb.match(
field => 'description',
value => 'ruining shoez',
distance => 2,
conjunction_mode => true
)
"#
.fetch(&mut conn);
assert_eq!(rows, vec![expected[0].clone()]);
// Multiple fuzzy fields
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category FROM mock_items
WHERE id @@@ paradedb.boolean(
should => ARRAY[
paradedb.match(field => 'description', value => 'ruining shoez', distance => 2),
paradedb.match(field => 'category', value => 'ruining shoez', distance => 2)
]
) ORDER BY rating DESC
"#
.fetch(&mut conn);
assert_eq!(rows, expected);
r#"
DROP INDEX search_idx;
CREATE INDEX ngrams_idx ON public.mock_items
USING bm25 (id, description)
WITH (
key_field='id',
text_fields='{"description": {"tokenizer": {"type": "ngram", "min_gram": 3, "max_gram": 3, "prefix_only": false}}}'
);
"#
.execute(&mut conn);
// Ngram term
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category FROM mock_items
WHERE description @@@ 'sho'
ORDER BY rating DESC
"#
.fetch(&mut conn);
assert_eq!(rows, expected);
// Ngram term set
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category FROM mock_items
WHERE id @@@ paradedb.match(
field => 'description',
value => 'hsoes',
distance => 0
) ORDER BY rating DESC
"#
.fetch(&mut conn);
assert_eq!(rows, expected);
}
#[rstest]
fn hybrid_search(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, in_stock, created_at, metadata)
WITH (key_field='id');
CREATE EXTENSION vector;
ALTER TABLE mock_items ADD COLUMN embedding vector(3);
UPDATE mock_items m
SET embedding = ('[' ||
((m.id + 1) % 10 + 1)::integer || ',' ||
((m.id + 2) % 10 + 1)::integer || ',' ||
((m.id + 3) % 10 + 1)::integer || ']')::vector;
"#
.execute(&mut conn);
let rows: Vec<(i32, BigDecimal, String, Vector)> = r#"
WITH bm25_ranked AS (
SELECT id, RANK() OVER (ORDER BY score DESC) AS rank
FROM (
SELECT id, paradedb.score(id) AS score
FROM mock_items
WHERE description @@@ 'keyboard'
ORDER BY paradedb.score(id) DESC
LIMIT 20
) AS bm25_score
),
semantic_search AS (
SELECT id, RANK() OVER (ORDER BY embedding <=> '[1,2,3]') AS rank
FROM mock_items
ORDER BY embedding <=> '[1,2,3]'
LIMIT 20
)
SELECT
COALESCE(semantic_search.id, bm25_ranked.id) AS id,
COALESCE(1.0 / (60 + semantic_search.rank), 0.0) +
COALESCE(1.0 / (60 + bm25_ranked.rank), 0.0) AS score,
mock_items.description,
mock_items.embedding
FROM semantic_search
FULL OUTER JOIN bm25_ranked ON semantic_search.id = bm25_ranked.id
JOIN mock_items ON mock_items.id = COALESCE(semantic_search.id, bm25_ranked.id)
ORDER BY score DESC, description
LIMIT 5;
"#
.fetch(&mut conn);
// Expected results
let expected = vec![
(
1,
BigDecimal::from_str("0.03062178588125292193").unwrap(),
String::from("Ergonomic metal keyboard"),
Vector::from(vec![3.0, 4.0, 5.0]),
),
(
2,
BigDecimal::from_str("0.02990695613646433318").unwrap(),
String::from("Plastic Keyboard"),
Vector::from(vec![4.0, 5.0, 6.0]),
),
(
19,
BigDecimal::from_str("0.01639344262295081967").unwrap(),
String::from("Artistic ceramic vase"),
Vector::from(vec![1.0, 2.0, 3.0]),
),
(
29,
BigDecimal::from_str("0.01639344262295081967").unwrap(),
String::from("Designer wall paintings"),
Vector::from(vec![1.0, 2.0, 3.0]),
),
(
39,
BigDecimal::from_str("0.01639344262295081967").unwrap(),
String::from("Handcrafted wooden frame"),
Vector::from(vec![1.0, 2.0, 3.0]),
),
];
// Compare each row individually
for (actual, expected) in rows.iter().zip(expected.iter()) {
assert_eq!(actual.0, expected.0); // Compare IDs
assert_relative_eq!(
actual.1.to_f64().unwrap(),
expected.1.to_f64().unwrap(),
epsilon = 0.000265
); // Compare BigDecimal scores
assert_eq!(actual.2, expected.2); // Compare descriptions
assert_eq!(actual.3, expected.3); // Compare embeddings
}
}
#[rstest]
fn create_bm25_test_tables(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'orders',
table_type => 'Orders'
);
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'parts',
table_type => 'Parts'
);
"#
.execute(&mut conn);
let rows: Vec<(i32, i32, i32, f32, String)> = r#"
SELECT order_id, product_id, order_quantity, order_total::REAL, customer_name FROM orders
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 64);
assert_eq!(rows[0], (1, 1, 3, 99.99, "John Doe".into()));
let rows: Vec<(i32, i32, String)> = r#"
SELECT part_id, parent_part_id, description FROM parts
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 36);
assert_eq!(rows[0], (1, 0, "Chassis Assembly".into()));
}
#[rstest]
fn concurrent_indexing(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating)
WITH (key_field='id');
"#
.execute(&mut conn);
r#"
CREATE INDEX CONCURRENTLY search_idx_v2 ON mock_items
USING bm25 (id, description, category, rating, in_stock)
WITH (key_field='id');
"#
.execute(&mut conn);
r#"
DROP INDEX search_idx;
"#
.execute(&mut conn);
// Verify the new index is being used by running a query that includes in_stock
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE description @@@ 'shoes' AND id @@@ 'in_stock:true'
ORDER BY rating DESC
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 2);
}
#[rstest]
fn schema(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, in_stock, created_at, metadata)
WITH (key_field='id');
"#
.execute(&mut conn);
let rows: Vec<(String, String)> =
"SELECT name, field_type FROM paradedb.schema('search_idx')".fetch(&mut conn);
let expected = vec![
("category".to_string(), "Str".to_string()),
("created_at".to_string(), "Date".to_string()),
("ctid".to_string(), "U64".to_string()),
("description".to_string(), "Str".to_string()),
("id".to_string(), "I64".to_string()),
("in_stock".to_string(), "Bool".to_string()),
("metadata".to_string(), "JsonObject".to_string()),
("rating".to_string(), "I64".to_string()),
];
assert_eq!(rows, expected);
}
#[rstest]
fn index_size(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category, rating, in_stock, created_at, metadata)
WITH (key_field='id');
"#
.execute(&mut conn);
let size: i64 = "SELECT pg_relation_size('search_idx')"
.fetch_one::<(i64,)>(&mut conn)
.0;
assert!(size > 0);
}
#[rstest]
fn field_configuration(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field = 'id',
text_fields = '{
"description": {
"tokenizer": {"type": "ngram", "min_gram": 2, "max_gram": 3, "prefix_only": false}
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, category)
WITH (
key_field = 'id',
text_fields = '{
"description": {
"tokenizer": {"type": "ngram", "min_gram": 2, "max_gram": 3, "prefix_only": false}
},
"category": {
"tokenizer": {"type": "ngram", "min_gram": 2, "max_gram": 3, "prefix_only": false}
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field = 'id',
text_fields = '{
"description": {
"fast": true,
"tokenizer": {"type": "ngram", "min_gram": 2, "max_gram": 3, "prefix_only": false}
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, metadata)
WITH (
key_field = 'id',
json_fields = '{
"metadata": {
"fast": true
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, rating)
WITH (
key_field = 'id',
numeric_fields = '{
"rating": {"fast": true}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, in_stock)
WITH (
key_field = 'id',
boolean_fields = '{
"in_stock": {"fast": true}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, created_at)
WITH (
key_field = 'id',
datetime_fields = '{
"created_at": {"fast": true}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, weight_range)
WITH (key_field='id');
DROP INDEX search_idx;
"#
.execute(&mut conn);
}
#[rstest]
fn available_tokenizers(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field='id',
text_fields='{
"description": {"tokenizer": {"type": "whitespace"}}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field = 'id',
text_fields = '{
"description": {
"tokenizer": {"type": "default"}
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field = 'id',
text_fields = '{
"description": {
"tokenizer": {"type": "whitespace"}
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field = 'id',
text_fields = '{
"description": {
"tokenizer": {"type": "raw"}
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field = 'id',
text_fields = '{
"description": {
"tokenizer": {"type": "regex", "pattern": "\\W+"}
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field = 'id',
text_fields = '{
"description": {
"tokenizer": {"type": "ngram", "min_gram": 2, "max_gram": 3, "prefix_only": false}
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field = 'id',
text_fields = '{
"description": {
"tokenizer": {"type": "source_code"}
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field = 'id',
text_fields = '{
"description": {
"tokenizer": {"type": "chinese_compatible"}
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field = 'id',
text_fields = '{
"description": {
"tokenizer": {"type": "chinese_lindera"}
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
if cfg!(feature = "icu") {
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field = 'id',
text_fields = '{
"description": {
"tokenizer": {"type": "icu"}
}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
}
r#"
SELECT * FROM paradedb.tokenizers();
"#
.execute(&mut conn);
r#"
SELECT * FROM paradedb.tokenize(
paradedb.tokenizer('ngram', min_gram => 3, max_gram => 3, prefix_only => false),
'keyboard'
);
"#
.execute(&mut conn);
// Test multiple tokenizers for the same field
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field='id',
text_fields='{
"description": {"tokenizer": {"type": "whitespace"}},
"description_ngram": {"tokenizer": {"type": "ngram", "min_gram": 3, "max_gram": 3, "prefix_only": false}, "column": "description"},
"description_stem": {"tokenizer": {"type": "default", "stemmer": "English"}, "column": "description"}
}'
);
"#
.execute(&mut conn);
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.parse('description_ngram:cam AND description_stem:digitally')
ORDER BY rating DESC
LIMIT 5;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
assert!(rows[0].0.contains("camera"));
assert!(rows[0].0.contains("digital"));
let rows: Vec<(String, i32, String)> = r#"
SELECT description, rating, category
FROM mock_items
WHERE id @@@ paradedb.parse('description:"Soft cotton" OR description_stem:shirts')
ORDER BY rating DESC
LIMIT 5;
"#
.fetch(&mut conn);
assert_eq!(rows.len(), 1);
assert!(rows.iter().any(|r| r.0.contains("cotton")));
assert!(rows.iter().any(|r| r.0.contains("shirt")));
}
#[rstest]
fn token_filters(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field='id',
text_fields='{
"description": {"tokenizer": {"type": "default", "stemmer": "English"}}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field='id',
text_fields='{
"description": {"tokenizer": {"type": "default", "remove_long": 255}}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field='id',
text_fields='{
"description": {"tokenizer": {"type": "default", "lowercase": false}}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
}
#[rstest]
fn fast_fields(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description, rating)
WITH (
key_field = 'id',
text_fields ='{
"description": {"fast": true}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, category)
WITH (
key_field='id',
text_fields='{
"category": {"fast": true, "normalizer": "raw"}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
}
#[rstest]
fn record(mut conn: PgConnection) {
r#"
CALL paradedb.create_bm25_test_table(
schema_name => 'public',
table_name => 'mock_items'
);
"#
.execute(&mut conn);
r#"
CREATE INDEX search_idx ON mock_items
USING bm25 (id, description)
WITH (
key_field='id',
text_fields='{
"description": {"record": "freq"}
}'
);
DROP INDEX search_idx;
"#
.execute(&mut conn);
}