//! Deterministic graph generator for benchmarks. //! //! Produces power-law degree distributions similar to real-world graphs //! (e.g., Panama Papers: 2M nodes, 5.8M edges ≈ avg degree 2.9). //! //! Seed = 42 for all published benchmarks. Reproducible. use graph::bench_support::{EdgeStoreBuilder, FilterIndexBuilder, NodeStoreBuilder}; /// Simple xorshift64 PRNG — deterministic, fast, no external dependency. pub struct Rng(u64); impl Rng { /// Create a deterministic generator from `seed`. pub fn new(seed: u64) -> Self { Self(if seed == 0 { 1 } else { seed }) } /// Return the next pseudo-random `u64`. #[inline] pub fn next_u64(&mut self) -> u64 { let mut x = self.0; x ^= x << 13; x ^= x >> 7; x ^= x << 17; self.0 = x; x } /// Return the next pseudo-random `u32`. #[inline] pub fn next_u32(&mut self) -> u32 { (self.next_u64() >> 16) as u32 } /// Return a value in [0, n) #[inline] pub fn next_bounded(&mut self, n: u32) -> u32 { ((self.next_u32() as u64 * n as u64) >> 32) as u32 } } /// Stores needed for BFS benchmarking. pub struct BenchGraph { /// Synthetic node metadata store. pub node_store: NodeStoreBuilder, /// Synthetic CSR edge store. pub edge_store: EdgeStoreBuilder, /// Empty or synthetic filter index used by traversal benchmarks. pub filter_index: FilterIndexBuilder, } /// Generated source-table row before benchmark indexes are built. #[derive(Clone)] pub struct GeneratedNode { /// Source table OID assigned to the synthetic row. pub table_oid: u32, /// Synthetic primary-key value. pub pk: String, } /// Generated benchmark fixture before CSR and secondary indexes are built. #[derive(Clone)] pub struct GeneratedBenchmarkGraph { /// Synthetic source rows. pub nodes: Vec, /// Synthetic raw directed edges. pub raw_edges: Vec, } /// Build a benchmark graph with power-law degree distribution. /// /// # Parameters /// - `node_count`: Number of nodes /// - `avg_degree`: Average outgoing edges per node (actual is power-law distributed) /// - `seed`: PRNG seed for reproducibility /// - `num_properties`: How many property key-value pairs per node pub fn build_benchmark_graph( node_count: u32, avg_degree: u32, seed: u64, num_properties: u32, ) -> BenchGraph { build_benchmark_graph_from_fixture(build_benchmark_fixture( node_count, avg_degree, seed, num_properties, )) } /// Build a benchmark graph with one numeric filter column populated at the /// requested percentage. Values cycle across 0..100 for predictable selectivity. pub fn build_filtered_benchmark_graph( node_count: u32, avg_degree: u32, seed: u64, populated_percent: u32, ) -> BenchGraph { let mut graph = build_benchmark_graph(node_count, avg_degree, seed, 1); let populated_count = (node_count as usize) .saturating_mul(populated_percent as usize) .saturating_div(100) .max(1) .min(node_count as usize); let column_idx = graph .filter_index .register_typed_column_with_populated_count( 100, "score".to_string(), graph::bench_support::FilterColumnType::Numeric, node_count as usize, populated_count, ); for node_idx in 0..populated_count { graph .filter_index .set_value(column_idx, node_idx as u32, (node_idx % 100) as u32); } graph } /// Generate deterministic rows and raw edges without building graph indexes. /// /// Construction benchmarks use this split to measure fixture generation /// separately from CSR and index construction. pub fn build_benchmark_fixture( node_count: u32, avg_degree: u32, seed: u64, _num_properties: u32, ) -> GeneratedBenchmarkGraph { let mut rng = Rng::new(seed); let mut nodes = Vec::with_capacity(node_count as usize); for i in 0..node_count { nodes.push(GeneratedNode { table_oid: 100, pk: format!("PK-{}", i), }); } let total_edges = (node_count as u64 * avg_degree as u64) as usize; let mut raw_edges = Vec::with_capacity(total_edges * 2); let mut edge_list: Vec = (0..node_count).collect(); for _ in 0..total_edges { let source = rng.next_bounded(node_count); let target_idx = (rng.next_u64() % edge_list.len() as u64) as usize; let target = edge_list[target_idx]; if source == target { continue; } raw_edges.push(graph::bench_support::RawEdge { source, target, type_id: 1, weight: None, }); // Bidirectional raw_edges.push(graph::bench_support::RawEdge { source: target, target: source, type_id: 1, weight: None, }); // Grow the edge list → nodes with more edges get picked more often edge_list.push(source); edge_list.push(target); } GeneratedBenchmarkGraph { nodes, raw_edges } } /// Build CSR/index structures from already-generated benchmark rows and edges. /// /// Construction benchmarks use this to exclude synthetic data generation cost. pub fn build_benchmark_graph_from_fixture(fixture: GeneratedBenchmarkGraph) -> BenchGraph { let mut node_store = NodeStoreBuilder::new(); for node in fixture.nodes { node_store.add_node(node.table_oid, node.pk); } let edge_store = EdgeStoreBuilder::try_from_edges(node_store.node_count(), fixture.raw_edges, false) .expect("benchmark generator emits in-range edge endpoints"); let filter_index = FilterIndexBuilder::new(); BenchGraph { node_store, edge_store, filter_index, } } /// Find a "supernode" — a node with high degree, useful for stress-testing BFS. /// Returns the node_idx with highest degree. pub fn find_supernode(graph: &BenchGraph) -> u32 { let n = graph.node_store.node_count(); let mut best_idx = 0u32; let mut best_degree = 0u32; for i in 0..n { let deg = graph.edge_store.degree(i); if deg > best_degree { best_degree = deg; best_idx = i; } } best_idx } /// Find a "leaf" node — low degree, for testing sparse traversals. pub fn find_leaf(graph: &BenchGraph, rng_seed: u64) -> u32 { let n = graph.node_store.node_count(); let mut rng = Rng::new(rng_seed); // Find a node with degree 1-3 for _ in 0..1000 { let idx = rng.next_bounded(n); let deg = graph.edge_store.degree(idx); if (1..=3).contains(°) { return idx; } } 0 // Fallback to node 0 }