// Copyright (c) 2023-2025 ParadeDB, Inc.
//
// This file is part of ParadeDB - Postgres for Search and Analytics
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see .
mod fixtures;
use fixtures::*;
use futures::executor::block_on;
use lockfree_object_pool::MutexObjectPool;
use proptest::prelude::*;
use proptest::strategy::{BoxedStrategy, Strategy};
use proptest_derive::Arbitrary;
use rstest::*;
use sqlx::PgConnection;
use std::fmt::Debug;
use crate::fixtures::querygen::opexprgen::Operator;
use crate::fixtures::querygen::{compare, PgGucs};
#[derive(Debug, Clone, Arbitrary)]
pub enum TokenizerType {
Default,
Keyword,
}
impl TokenizerType {
fn to_config(&self) -> &'static str {
match self {
TokenizerType::Default => r#""type": "default""#,
TokenizerType::Keyword => r#""type": "keyword""#,
}
}
}
#[derive(Debug, Clone, Arbitrary)]
pub struct IndexConfig {
tokenizer: TokenizerType,
fast: bool,
}
impl IndexConfig {
fn to_json_fields_config(&self) -> String {
format!(
r#"{{
"metadata": {{
"tokenizer": {{ {} }},
"fast": {}
}}
}}"#,
self.tokenizer.to_config(),
self.fast
)
}
}
#[derive(Debug, Clone, Arbitrary)]
pub enum JsonValueType {
Text,
Numeric,
Boolean,
Null,
}
impl JsonValueType {
fn sample_values(&self) -> BoxedStrategy {
match self {
JsonValueType::Text => proptest::sample::select(vec![
"'apple'".to_string(),
"'banana'".to_string(),
"'cherry'".to_string(),
"'date'".to_string(),
"'elderberry'".to_string(),
"'test'".to_string(),
"'value'".to_string(),
"'red apple'".to_string(),
"'yellow banana'".to_string(),
"'sweet cherry'".to_string(),
"'fresh date'".to_string(),
"'purple elderberry'".to_string(),
"'unit test'".to_string(),
"'test value'".to_string(),
])
.boxed(),
JsonValueType::Numeric => proptest::sample::select(vec![
"42".to_string(),
"100".to_string(),
"3.14".to_string(),
"0".to_string(),
"-1".to_string(),
"999".to_string(),
])
.boxed(),
JsonValueType::Boolean => {
proptest::sample::select(vec!["true".to_string(), "false".to_string()]).boxed()
}
JsonValueType::Null => Just("NULL".to_string()).boxed(),
}
}
fn to_json_literal(&self, value: &str) -> String {
match self {
JsonValueType::Text => format!("'\"{}\"'", value.trim_matches('\'')),
JsonValueType::Numeric => format!("'{value}'"),
JsonValueType::Boolean => format!("'{value}'"),
JsonValueType::Null => "'null'".to_string(),
}
}
fn is_compatible_with_operator(&self, operator: &Operator) -> bool {
match (self, operator) {
// Range operators only work with numeric types
(JsonValueType::Numeric, Operator::Lt | Operator::Le | Operator::Gt | Operator::Ge) => {
true
}
// Equality operators work with all types
(_, Operator::Eq | Operator::Ne) => true,
// Other combinations are not compatible
_ => false,
}
}
}
#[derive(Debug, Clone)]
pub enum JsonPath {
Simple(String),
Nested(String, String),
DeepNested(String, String, String),
}
impl Arbitrary for JsonPath {
type Parameters = ();
type Strategy = BoxedStrategy;
fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
prop_oneof![
proptest::sample::select(vec![
"name", "count", "active", "tags", "user", "settings", "level1", "items", "mixed"
])
.prop_map(|key| JsonPath::Simple(key.to_string())),
(
proptest::sample::select(vec!["user", "settings", "level1", "mixed"]),
proptest::sample::select(vec![
"name",
"age",
"theme",
"level2",
"text",
"number",
"boolean",
"null_value"
])
)
.prop_map(|(key1, key2)| JsonPath::Nested(key1.to_string(), key2.to_string())),
(
proptest::sample::select(vec!["level1"]),
proptest::sample::select(vec!["level2"]),
proptest::sample::select(vec!["level3"])
)
.prop_map(|(key1, key2, key3)| JsonPath::DeepNested(
key1.to_string(),
key2.to_string(),
key3.to_string()
)),
]
.boxed()
}
}
impl JsonPath {
fn is_boolean_field(&self) -> bool {
match self {
JsonPath::Simple(key) => key == "active",
JsonPath::Nested(_, key2) => key2 == "boolean",
JsonPath::DeepNested(_, _, _) => false,
}
}
fn is_numeric_field(&self) -> bool {
match self {
JsonPath::Simple(key) => key == "count",
JsonPath::Nested(_, key2) => key2 == "age" || key2 == "number",
JsonPath::DeepNested(_, _, _) => false,
}
}
}
impl JsonPath {
fn to_sql(&self) -> String {
match self {
JsonPath::Simple(key) => format!("'{key}'"),
JsonPath::Nested(key1, key2) => format!("'{{{key1},{key2}}}'"),
JsonPath::DeepNested(key1, key2, key3) => format!("'{{{key1},{key2},{key3}}}'"),
}
}
}
#[derive(Debug, Clone)]
pub enum JsonOperation {
Comparison {
operator: Operator,
value: JsonValueType,
},
IsNull,
IsNotNull,
IsTrue,
IsFalse,
In {
values: Vec,
},
NotIn {
values: Vec,
},
}
impl Arbitrary for JsonOperation {
type Parameters = ();
type Strategy = BoxedStrategy;
fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
prop_oneof![
(any::(), any::())
.prop_filter(
"operator and value type must be compatible",
|(operator, value)| { value.is_compatible_with_operator(operator) }
)
.prop_map(|(operator, value)| JsonOperation::Comparison { operator, value }),
Just(JsonOperation::IsNull),
Just(JsonOperation::IsNotNull),
Just(JsonOperation::IsTrue),
Just(JsonOperation::IsFalse),
any::()
.prop_flat_map(|value_type| { proptest::collection::vec(Just(value_type), 1..4) })
.prop_map(|values| JsonOperation::In { values }),
any::()
.prop_flat_map(|value_type| { proptest::collection::vec(Just(value_type), 1..4) })
.prop_map(|values| JsonOperation::NotIn { values }),
]
.boxed()
}
}
#[derive(Debug, Clone)]
pub struct JsonExpr {
path: JsonPath,
operation: JsonOperation,
}
impl Arbitrary for JsonExpr {
type Parameters = ();
type Strategy = BoxedStrategy;
fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
(any::(), any::())
.prop_filter(
"operation must be compatible with field type",
|(path, operation)| {
match operation {
JsonOperation::Comparison { operator, value } => {
// For range operators, ensure we're using numeric fields
match operator {
Operator::Lt | Operator::Le | Operator::Gt | Operator::Ge => {
path.is_numeric_field()
&& value.is_compatible_with_operator(operator)
}
_ => {
// For other operators, ensure value type matches field type
match (value, path) {
(JsonValueType::Numeric, path)
if path.is_numeric_field() =>
{
true
}
(JsonValueType::Boolean, path)
if path.is_boolean_field() =>
{
true
}
(JsonValueType::Text, path)
if !path.is_numeric_field()
&& !path.is_boolean_field() =>
{
true
}
(JsonValueType::Null, _) => true, // NULL works with any field
_ => false, // Incompatible combinations
}
}
}
}
JsonOperation::IsTrue | JsonOperation::IsFalse => {
// Boolean operations only work on boolean fields
path.is_boolean_field()
}
JsonOperation::In { values } | JsonOperation::NotIn { values } => {
// For IN/NOT IN, ensure all values are compatible with the field type
values.iter().all(|value_type| {
match (value_type, path) {
(JsonValueType::Numeric, path) if path.is_numeric_field() => {
true
}
(JsonValueType::Boolean, path) if path.is_boolean_field() => {
true
}
(JsonValueType::Text, path)
if !path.is_numeric_field() && !path.is_boolean_field() =>
{
true
}
(JsonValueType::Null, _) => true, // NULL works with any field
_ => false, // Incompatible combinations
}
})
}
_ => true, // Other operations work with any field type
}
},
)
.prop_map(|(path, operation)| JsonExpr { path, operation })
.boxed()
}
}
impl JsonExpr {
fn sample_values(&self) -> BoxedStrategy> {
match &self.operation {
JsonOperation::Comparison { value, .. } => {
let values = value.sample_values();
proptest::collection::vec(values, 1..3).boxed()
}
JsonOperation::In { values: _ } | JsonOperation::NotIn { values: _ } => {
// For IN/NOT IN operations, we'll use simple predefined values
let predefined_values = vec![
vec!["'apple'".to_string()],
vec!["'banana'".to_string(), "'cherry'".to_string()],
vec!["'red apple'".to_string(), "'yellow banana'".to_string()],
vec!["'sweet cherry'".to_string(), "'fresh date'".to_string()],
vec!["42".to_string(), "100".to_string()],
];
proptest::sample::select(predefined_values).boxed()
}
_ => Just(vec![]).boxed(),
}
}
fn to_sql(&self, values: &[String]) -> String {
let column = "metadata";
let json_expr = format!("{column} ->> {}", self.path.to_sql());
match &self.operation {
JsonOperation::Comparison { operator, value } => {
if values.is_empty() {
return format!("{} {} NULL", json_expr, operator.to_sql());
}
let value_literal = value.to_json_literal(&values[0]);
// Determine the target type based on the field path and operation
let target_type = if self.path.is_numeric_field() {
"numeric"
} else if self.path.is_boolean_field() {
"boolean"
} else {
"text"
};
// Add type casting based on the target type and operation
let final_expr = match (operator, value, target_type) {
// Range operations on numeric fields
(
Operator::Lt | Operator::Le | Operator::Gt | Operator::Ge,
JsonValueType::Numeric,
"numeric",
) => {
format!("({json_expr})::numeric")
}
// Boolean comparisons
(_, JsonValueType::Boolean, "boolean") => {
format!("({json_expr})::boolean")
}
// Numeric comparisons on numeric fields
(_, JsonValueType::Numeric, "numeric") => {
format!("({json_expr})::numeric")
}
// Text comparisons (no casting needed for text fields)
(_, JsonValueType::Text, "text") => json_expr,
// Don't do cross-type comparisons - they're invalid
_ => {
// For incompatible types, just return the original expression
// This will likely cause a runtime error, but that's better than invalid SQL
json_expr
}
};
format!("{} {} {}", final_expr, operator.to_sql(), value_literal)
}
JsonOperation::IsNull => format!("{json_expr} IS NULL"),
JsonOperation::IsNotNull => format!("{json_expr} IS NOT NULL"),
JsonOperation::IsTrue => {
// Ensure boolean operations only work on boolean fields
format!("({json_expr})::boolean IS TRUE")
}
JsonOperation::IsFalse => {
// Ensure boolean operations only work on boolean fields
format!("({json_expr})::boolean IS FALSE")
}
JsonOperation::In { values } => {
if values.is_empty() {
return format!("{json_expr} IN ()");
}
// Determine the target type based on the field path
let target_type = if self.path.is_numeric_field() {
"numeric"
} else if self.path.is_boolean_field() {
"boolean"
} else {
"text"
};
// Cast the JSON expression to the appropriate type
let casted_expr = format!("({json_expr})::{target_type}");
// Generate values of the appropriate type (only compatible combinations)
let value_literals: Vec = values
.iter()
.map(|value_type| match value_type {
JsonValueType::Text => "'apple'".to_string(),
JsonValueType::Numeric => "42".to_string(),
JsonValueType::Boolean => "true".to_string(),
JsonValueType::Null => "NULL".to_string(),
})
.collect();
format!("{} IN ({})", casted_expr, value_literals.join(", "))
}
JsonOperation::NotIn { values } => {
if values.is_empty() {
return format!("{json_expr} NOT IN ()");
}
// Determine the target type based on the field path
let target_type = if self.path.is_numeric_field() {
"numeric"
} else if self.path.is_boolean_field() {
"boolean"
} else {
"text"
};
// Cast the JSON expression to the appropriate type
let casted_expr = format!("({json_expr})::{target_type}");
// Generate values of the appropriate type (only compatible combinations)
let value_literals: Vec = values
.iter()
.map(|value_type| match value_type {
JsonValueType::Text => "'banana'".to_string(),
JsonValueType::Numeric => "100".to_string(),
JsonValueType::Boolean => "false".to_string(),
JsonValueType::Null => "NULL".to_string(),
})
.collect();
format!("{} NOT IN ({})", casted_expr, value_literals.join(", "))
}
}
}
}
fn json_pushdown_setup(conn: &mut PgConnection, index_config: &IndexConfig) -> String {
"CREATE EXTENSION IF NOT EXISTS pg_search;".execute(conn);
"SET log_error_verbosity TO VERBOSE;".execute(conn);
"SET log_min_duration_statement TO 1000;".execute(conn);
let json_fields_config = index_config.to_json_fields_config();
let setup_sql = format!(
r#"
DROP TABLE IF EXISTS json_pushdown_test;
CREATE TABLE json_pushdown_test (
id SERIAL8 NOT NULL PRIMARY KEY,
metadata JSONB
);
-- Insert test data with various JSON structures
INSERT INTO json_pushdown_test (metadata) VALUES
('{{"name": "apple", "count": 42, "active": true, "tags": ["fruit", "red"]}}'),
('{{"name": "banana", "count": 100, "active": false, "tags": ["fruit", "yellow"]}}'),
('{{"name": "cherry", "count": 3.14, "active": true, "tags": ["fruit", "red"]}}'),
('{{"name": "date", "count": 0, "active": false, "tags": ["fruit", "brown"]}}'),
('{{"name": "elderberry", "count": -1, "active": true, "tags": ["fruit", "purple"]}}'),
('{{"name": "test", "count": 999, "active": false, "tags": ["test", "data"]}}'),
('{{"name": "value", "count": 1, "active": true, "tags": ["value", "test"]}}'),
('{{"name": "red apple", "count": 50, "active": true, "tags": ["fruit", "red", "multi"]}}'),
('{{"name": "yellow banana", "count": 75, "active": false, "tags": ["fruit", "yellow", "multi"]}}'),
('{{"name": "sweet cherry", "count": 25, "active": true, "tags": ["fruit", "red", "multi"]}}'),
('{{"name": "fresh date", "count": 60, "active": false, "tags": ["fruit", "brown", "multi"]}}'),
('{{"name": "purple elderberry", "count": 30, "active": true, "tags": ["fruit", "purple", "multi"]}}'),
('{{"name": "unit test", "count": 200, "active": false, "tags": ["test", "unit", "multi"]}}'),
('{{"name": "test value", "count": 150, "active": true, "tags": ["test", "value", "multi"]}}'),
('{{"user": {{"name": "alice", "age": 25}}, "settings": {{"theme": "dark"}}}}'),
('{{"user": {{"name": "bob", "age": 30}}, "settings": {{"theme": "light"}}}}'),
('{{"user": {{"name": "charlie", "age": 35}}, "settings": {{"theme": "dark"}}}}'),
('{{"level1": {{"level2": {{"level3": "deep_value"}}}}}}'),
('{{"level1": {{"level2": {{"level3": "another_value"}}}}}}'),
('{{"items": ["item1", "item2", "item3"]}}'),
('{{"items": ["item4", "item5", "item6"]}}'),
('{{"mixed": {{"text": "hello", "number": 123, "boolean": true, "null_value": null}}}}'),
('{{"mixed": {{"text": "world", "number": 456, "boolean": false, "null_value": null}}}}'),
(NULL),
('{{}}');
-- Create BM25 index
CREATE INDEX idx_json_pushdown_test ON json_pushdown_test
USING bm25 (id, metadata)
WITH (
key_field = 'id',
json_fields = '{json_fields_config}'
);
-- help our cost estimates
ANALYZE json_pushdown_test;
"#
);
setup_sql.clone().execute(conn);
setup_sql
}
#[rstest]
#[tokio::test]
async fn json_pushdown_correctness(database: Db) {
let pool = MutexObjectPool::::new(
move || block_on(async { database.connection().await }),
|_| {},
);
proptest!(|(
(expr, selected_values) in any::()
.prop_flat_map(|expr| {
let values_strategy = expr.sample_values();
(Just(expr), values_strategy)
}),
index_config in any::(),
gucs in any::(),
)| {
let setup_sql = json_pushdown_setup(&mut pool.pull(), &index_config);
eprintln!("Setup SQL:\n{setup_sql}");
let json_condition = expr.to_sql(&selected_values);
// Test SELECT queries with actual results
let pg_query = format!(
"SELECT id, metadata FROM json_pushdown_test WHERE {json_condition} ORDER BY id"
);
let bm25_query = format!(
"SELECT id, metadata FROM json_pushdown_test WHERE id @@@ paradedb.all() AND {json_condition} ORDER BY id"
);
compare(
&pg_query,
&bm25_query,
&gucs,
&mut pool.pull(),
&setup_sql,
|query, conn| {
query.fetch::<(i64, Option)>(conn)
},
)?;
});
}