From c5a6544792bc303a31e6a6618e251a5d159b20a6 Mon Sep 17 00:00:00 2001 From: Sarthak Khattar Date: Sat, 13 Dec 2025 19:22:07 -0600 Subject: [PATCH 1/2] added grafite range filter + insert/delete benchmarks for diva --- benches/query_benchmarks.rs | 152 ++++++++++++++++++++++++++++++++++++ src/diva.rs | 2 +- src/grafite_filter.rs | 9 +-- 3 files changed, 155 insertions(+), 8 deletions(-) diff --git a/benches/query_benchmarks.rs b/benches/query_benchmarks.rs index 22fe4f3..1fd389c 100644 --- a/benches/query_benchmarks.rs +++ b/benches/query_benchmarks.rs @@ -3,6 +3,7 @@ use range_filters::{ bloom_filter::BloomFilter, data_gen::generate_smooth_u64, diva::Diva, + grafite_filter::GrafiteFilter, Key, }; use rand::Rng; @@ -118,6 +119,73 @@ fn diva_range_query_large(bencher: Bencher, size: usize) { }); } +#[divan::bench(args = SIZES)] +fn diva_insert(bencher: Bencher, size: usize) { + let keys = generate_smooth_u64(Some(size)); + let target_size = 1024; + + bencher + .with_inputs(|| { + let diva = Diva::new_with_keys(&keys, target_size, 0.01); + let mut rng = rand::thread_rng(); + + let idx = loop { + let i = rng.gen_range(0..keys.len().saturating_sub(1)); + if i % target_size != 0 && (i + 1) % target_size != 0 && i != keys.len() - 2 { + break i; + } + }; + + let key1 = keys[idx]; + let key2 = keys[idx + 1]; + + let insert_key = if rng.gen_bool(0.5) { + key1 + (key2 - key1) / 2 + } else { + let offset = (key2 - key1) / 4; + if rng.gen_bool(0.5) { + key1 + offset + } else { + key2 - offset + } + }; + + (diva, insert_key.max(key1 + 1).min(key2 - 1)) + }) + .bench_local_values(|(mut diva, insert_key)| { + black_box(diva.insert_in_infix(black_box(insert_key))) + }); +} + +#[divan::bench(args = SIZES)] +fn diva_delete_infix(bencher: Bencher, size: usize) { + let keys = generate_smooth_u64(Some(size)); + let target_size = 1024; + + let mut sorted_keys = keys.clone(); + sorted_keys.sort(); + sorted_keys.dedup(); + + bencher + .with_inputs(|| { + let diva = Diva::new_with_keys(&keys, target_size, 0.01); + let mut rng = rand::thread_rng(); + + let idx = loop { + let i = rng.gen_range(0..sorted_keys.len()); + if i % target_size != 0 && i != sorted_keys.len() - 1 { + break i; + } + }; + let delete_key = sorted_keys[idx]; + + (diva, delete_key) + }) + .bench_local_values(|(mut diva, delete_key)| { + black_box(diva.delete(black_box(delete_key))) + }); +} + // ============================================================================ // Bloom Filter Benchmarks // ============================================================================ @@ -199,4 +267,88 @@ fn bloom_range_query_large(bencher: Bencher, size: usize) { query_idx += 1; black_box(bloom.range_query(black_box(start), black_box(end))) }); +} + +// ============================================================================ +// Grafite Filter Benchmarks +// ============================================================================ + +#[divan::bench(args = SIZES)] +fn grafite_construction(bencher: Bencher, size: usize) { + let keys = generate_smooth_u64(Some(size)); + + bencher.bench_local(|| { + black_box(GrafiteFilter::new_with_keys( + black_box(&keys), + black_box(0.01), + )) + }); +} + +#[divan::bench(args = SIZES)] +fn grafite_point_query(bencher: Bencher, size: usize) { + let keys = generate_smooth_u64(Some(size)); + let grafite = GrafiteFilter::new_with_keys(&keys, 0.01); + + // generate query keys (mix of existing and non-existing) + let mut rng = rand::thread_rng(); + let query_keys: Vec = (0..1000) + .map(|i| { + if i % 2 == 0 { + keys[rng.gen_range(0..keys.len())] + } else { + let idx = rng.gen_range(0..keys.len().saturating_sub(1)); + (keys[idx] + keys[idx + 1]) / 2 + } + }) + .collect(); + + let mut query_idx = 0; + bencher.bench_local(|| { + let key = query_keys[query_idx % query_keys.len()]; + query_idx += 1; + black_box(grafite.point_query(black_box(key))) + }); +} + +#[divan::bench(args = SIZES)] +fn grafite_range_query_small(bencher: Bencher, size: usize) { + let keys = generate_smooth_u64(Some(size)); + let grafite = GrafiteFilter::new_with_keys(&keys, 0.01); + let query_ranges = generate_query_ranges(&keys, 0.01, 1000); + + let mut query_idx = 0; + bencher.bench_local(|| { + let (start, end) = query_ranges[query_idx % query_ranges.len()]; + query_idx += 1; + black_box(grafite.range_query(black_box(start), black_box(end))) + }); +} + +#[divan::bench(args = SIZES)] +fn grafite_range_query_medium(bencher: Bencher, size: usize) { + let keys = generate_smooth_u64(Some(size)); + let grafite = GrafiteFilter::new_with_keys(&keys, 0.01); + let query_ranges = generate_query_ranges(&keys, 0.07, 1000); + + let mut query_idx = 0; + bencher.bench_local(|| { + let (start, end) = query_ranges[query_idx % query_ranges.len()]; + query_idx += 1; + black_box(grafite.range_query(black_box(start), black_box(end))) + }); +} + +#[divan::bench(args = SIZES)] +fn grafite_range_query_large(bencher: Bencher, size: usize) { + let keys = generate_smooth_u64(Some(size)); + let grafite = GrafiteFilter::new_with_keys(&keys, 0.01); + let query_ranges = generate_query_ranges(&keys, 0.4, 1000); + + let mut query_idx = 0; + bencher.bench_local(|| { + let (start, end) = query_ranges[query_idx % query_ranges.len()]; + query_idx += 1; + black_box(grafite.range_query(black_box(start), black_box(end))) + }); } \ No newline at end of file diff --git a/src/diva.rs b/src/diva.rs index d8bd19d..7313666 100644 --- a/src/diva.rs +++ b/src/diva.rs @@ -127,7 +127,7 @@ impl Diva { self.insert_as_sample(key) } - fn insert_in_infix(&mut self, key: Key) -> bool { + pub fn insert_in_infix(&mut self, key: Key) -> bool { // key should be inserted as a sample if any of the boundary keys are missing let (s_low, s_high) = match self .y_fast_trie diff --git a/src/grafite_filter.rs b/src/grafite_filter.rs index fa68db2..5936ebc 100644 --- a/src/grafite_filter.rs +++ b/src/grafite_filter.rs @@ -20,14 +20,9 @@ impl GrafiteFilter { pub fn new_with_keys(keys: &[Key], epsilon: f64) -> Self { let num_keys = keys.len(); - // Calculate max_query_range from the keys - let max_query_range = if keys.is_empty() { - 0 - } else { - *keys.iter().max().unwrap_or(&0) - }; + const MAX_QUERY_RANGE: u64 = 1_000_000; - let hasher = PairwiseIndependentHasher::new(num_keys, epsilon, max_query_range) + let hasher = PairwiseIndependentHasher::new(num_keys, epsilon, MAX_QUERY_RANGE) .expect("Invalid parameters for PairwiseIndependentHasher"); let filter = RangeFilter::new(keys.iter().copied(), hasher); From 1da050d8d18c06c687b7c0303fd743eb591c9cbc Mon Sep 17 00:00:00 2001 From: Sarthak Khattar Date: Sat, 13 Dec 2025 19:27:24 -0600 Subject: [PATCH 2/2] removed comment --- benches/query_benchmarks.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/benches/query_benchmarks.rs b/benches/query_benchmarks.rs index 1fd389c..a0bf5fe 100644 --- a/benches/query_benchmarks.rs +++ b/benches/query_benchmarks.rs @@ -290,7 +290,6 @@ fn grafite_point_query(bencher: Bencher, size: usize) { let keys = generate_smooth_u64(Some(size)); let grafite = GrafiteFilter::new_with_keys(&keys, 0.01); - // generate query keys (mix of existing and non-existing) let mut rng = rand::thread_rng(); let query_keys: Vec = (0..1000) .map(|i| { @@ -351,4 +350,4 @@ fn grafite_range_query_large(bencher: Bencher, size: usize) { query_idx += 1; black_box(grafite.range_query(black_box(start), black_box(end))) }); -} \ No newline at end of file +}