apache · Rachelint · Apr 9, 2025 · Apr 9, 2025 · Apr 9, 2025 · Apr 10, 2025
diff --git a/datafusion-examples/examples/udf/advanced_udaf.rs b/datafusion-examples/examples/udf/advanced_udaf.rs
@@ -18,7 +18,7 @@
 //! See `main.rs` for how to run it.
 
 use arrow::datatypes::{Field, Schema};
-use datafusion::physical_expr::NullState;
+use datafusion::physical_expr::FlatNullState;
 use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility};
 use std::sync::Arc;
 
@@ -215,7 +215,7 @@ struct GeometricMeanGroupsAccumulator {
     prods: Vec<f64>,
 
     /// Track nulls in the input / filters
-    null_state: NullState,
+    null_state: FlatNullState,
 }
 
 impl GeometricMeanGroupsAccumulator {
@@ -225,7 +225,7 @@ impl GeometricMeanGroupsAccumulator {
             return_data_type: DataType::Float64,
             counts: vec![],
             prods: vec![],
-            null_state: NullState::new(),
+            null_state: FlatNullState::new(None),
         }
     }
 }
@@ -246,13 +246,17 @@ impl GroupsAccumulator for GeometricMeanGroupsAccumulator {
         // increment counts, update sums
         self.counts.resize(total_num_groups, 0);
         self.prods.resize(total_num_groups, 1.0);
-        // Use the `NullState` structure to generate specialized code for null / non null input elements
+        // Use the `NullState` structure to generate specialized code for null / non null input elements.
+        // `block_id` is ignored in `value_fn`, because `AvgGroupsAccumulator`
+        // still not support blocked groups.
+        // More details can see `GroupsAccumulator::supports_blocked_groups`.
         self.null_state.accumulate(
             group_indices,
             values,
             opt_filter,
             total_num_groups,
-            |group_index, new_value| {
+            |_, group_index, new_value| {
+                let group_index = group_index as usize;
                 let prod = &mut self.prods[group_index];
                 *prod = prod.mul_wrapping(new_value);
 
@@ -277,13 +281,16 @@ impl GroupsAccumulator for GeometricMeanGroupsAccumulator {
         let partial_counts = values[1].as_primitive::<UInt32Type>();
         // update counts with partial counts
         self.counts.resize(total_num_groups, 0);
+        // `block_id` is ignored in `value_fn`, because `AvgGroupsAccumulator`
+        // still not support blocked groups.
+        // More details can see `GroupsAccumulator::supports_blocked_groups`.
         self.null_state.accumulate(
             group_indices,
             partial_counts,
             opt_filter,
             total_num_groups,
-            |group_index, partial_count| {
-                self.counts[group_index] += partial_count;
+            |_, group_index, partial_count| {
+                self.counts[group_index as usize] += partial_count;
             },
         );
 
@@ -294,8 +301,8 @@ impl GroupsAccumulator for GeometricMeanGroupsAccumulator {
             partial_prods,
             opt_filter,
             total_num_groups,
-            |group_index, new_value: <Float64Type as ArrowPrimitiveType>::Native| {
-                let prod = &mut self.prods[group_index];
+            |_, group_index, new_value: <Float64Type as ArrowPrimitiveType>::Native| {
+                let prod = &mut self.prods[group_index as usize];
                 *prod = prod.mul_wrapping(new_value);
             },
         );

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
@@ -660,6 +660,16 @@ config_namespace! {
         /// the remote end point.
         pub objectstore_writer_buffer_size: usize, default = 10 * 1024 * 1024
 
+        /// Should DataFusion use a blocked approach to manage grouping state.
+        /// By default, the blocked approach is used which
+        /// allocates capacity based on a predefined block size firstly.
+        /// When the block reaches its limit, we allocate a new block (also with
+        /// the same predefined block size based capacity) instead of expanding
+        /// the current one and copying the data.
+        /// If `false`, a single allocation approach is used, where
+        /// values are managed within a single large memory block.
+        /// As this block grows, it often triggers numerous copies, resulting in poor performance.
+        pub enable_aggregation_blocked_groups: bool, default = true
         /// Whether to enable ANSI SQL mode.
         ///
         /// The flag is experimental and relevant only for DataFusion Spark built-in functions

diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -18,10 +18,12 @@
 use std::sync::Arc;
 
 use super::record_batch_generator::get_supported_types_columns;
-use crate::fuzz_cases::aggregation_fuzzer::query_builder::QueryBuilder;
 use crate::fuzz_cases::aggregation_fuzzer::{
     AggregationFuzzerBuilder, DatasetGeneratorConfig,
 };
+use crate::fuzz_cases::aggregation_fuzzer::{
+    SessionContextOptions, query_builder::QueryBuilder,
+};
 
 use arrow::array::{
     Array, ArrayRef, AsArray, Int32Array, Int64Array, RecordBatch, StringArray,
@@ -230,6 +232,60 @@ async fn test_median() {
         .await;
 }
 
+// Testing `blocked groups optimization`
+// Details of this optimization can see:
+// https://github.com/apache/datafusion/issues/7065
+//
+// To ensure the blocked groups path is actually exercised, we must satisfy
+// *all* conditions checked by `can_enable_blocked_groups`:
+//
+//   1. `GroupOrdering::None`       — disable sort hints (`sort_hint: Some(false)`)
+//      so the aggregation is not streaming.
+//   2. `OutOfMemoryMode::ReportError` — no memory limit / spilling (default).
+//   3. `group_values.supports_blocked_groups() == true`
+//      — only `GroupValuesPrimitive` returns true, so we restrict to
+//        *single* numeric group-by column (`with_max_group_by_columns(1)`
+//        + `set_group_by_columns(numeric_columns())`).
+//   4. Every accumulator `supports_blocked_groups() == true`
+//      — currently only sum/min/max support this, so we limit aggregates
+//        to those functions with numeric arguments.
+//   5. Config knob enabled — force it via `enable_blocked_groups: Some(true)`.
+//
+// With all five conditions met, `enable_blocked_groups` is guaranteed true
+// and `alter_block_size` will be called on both group values and accumulators.
+#[tokio::test(flavor = "multi_thread")]
+async fn test_blocked_groups_optimization() {
+    let data_gen_config = baseline_config();
+
+    // Test `Numeric aggregation` + `Single group by`
+    let aggr_functions = ["sum", "min", "max"];
+    let aggr_arguments = data_gen_config.numeric_columns();
+    let groups_by_columns = data_gen_config.numeric_columns();
+
+    let mut query_builder = QueryBuilder::new()
+        .with_table_name("fuzz_table")
+        .with_aggregate_arguments(aggr_arguments)
+        .set_group_by_columns(groups_by_columns)
+        .with_min_group_by_columns(1)
+        .with_max_group_by_columns(1)
+        .with_no_grouping(false);
+
+    for func in aggr_functions {
+        query_builder = query_builder.with_aggregate_function(func);
+    }
+
+    AggregationFuzzerBuilder::from(data_gen_config)
+        .add_query_builder(query_builder)
+        .session_context_options(SessionContextOptions {
+            enable_blocked_groups: Some(true),
+            sort_hint: Some(false),
+            ..Default::default()
+        })
+        .build()
+        .run()
+        .await;
+}
+
 /// Return a standard set of columns for testing data generation
 ///
 /// Includes numeric and string types

diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs
@@ -62,10 +62,33 @@ pub struct SessionContextGenerator {
     /// The upper bound of the randomly generated target partitions,
     /// and the lower bound will be 1
     max_target_partitions: usize,
+
+    /// Force-enable or force-disable specific options, overriding random generation
+    options: SessionContextOptions,
+}
+
+/// Options to force-enable or force-disable specific session config knobs.
+///
+/// - `None`        → randomized by the fuzzer (default behavior)
+/// - `Some(true)`  → always enabled
+/// - `Some(false)` → always disabled
+#[derive(Debug, Clone, Default)]
+pub struct SessionContextOptions {
+    pub skip_partial: Option<bool>,
+    pub sort_hint: Option<bool>,
+    pub enable_blocked_groups: Option<bool>,
 }
 
 impl SessionContextGenerator {
     pub fn new(dataset_ref: Arc<Dataset>, table_name: &str) -> Self {
+        Self::new_with_options(dataset_ref, table_name, SessionContextOptions::default())
+    }
+
+    pub fn new_with_options(
+        dataset_ref: Arc<Dataset>,
+        table_name: &str,
+        options: SessionContextOptions,
+    ) -> Self {
         let candidate_skip_partial_params = vec![
             SkipPartialParams::ensure_trigger(),
             SkipPartialParams::ensure_not_trigger(),
@@ -80,6 +103,7 @@ impl SessionContextGenerator {
             max_batch_size,
             candidate_skip_partial_params,
             max_target_partitions,
+            options,
         }
     }
 }
@@ -102,6 +126,7 @@ impl SessionContextGenerator {
             target_partitions,
             skip_partial_params,
             sort_hint: false,
+            enable_aggregation_blocked_groups: false,
             table_name: self.table_name.clone(),
             table_provider: Arc::new(provider),
         };
@@ -126,13 +151,21 @@ impl SessionContextGenerator {
 
         let target_partitions = rng.random_range(1..=self.max_target_partitions);
 
-        let skip_partial_params_idx =
-            rng.random_range(0..self.candidate_skip_partial_params.len());
-        let skip_partial_params =
-            self.candidate_skip_partial_params[skip_partial_params_idx];
+        let skip_partial_params = match self.options.skip_partial {
+            Some(true) => SkipPartialParams::ensure_trigger(),
+            Some(false) => SkipPartialParams::ensure_not_trigger(),
+            None => {
+                let idx = rng.random_range(0..self.candidate_skip_partial_params.len());
+                self.candidate_skip_partial_params[idx]
+            }
+        };
 
+        let sort_hint_enabled = self
+            .options
+            .sort_hint
+            .unwrap_or_else(|| rng.random_bool(0.5));
         let (provider, sort_hint) =
-            if rng.random_bool(0.5) && !self.dataset.sort_keys.is_empty() {
+            if sort_hint_enabled && !self.dataset.sort_keys.is_empty() {
                 // Sort keys exist and random to push down
                 let sort_exprs = self
                     .dataset
@@ -145,11 +178,17 @@ impl SessionContextGenerator {
                 (provider, false)
             };
 
+        let enable_aggregation_blocked_groups = self
+            .options
+            .enable_blocked_groups
+            .unwrap_or_else(|| rng.random_bool(0.5));
+
         let builder = GeneratedSessionContextBuilder {
             batch_size,
             target_partitions,
             sort_hint,
             skip_partial_params,
+            enable_aggregation_blocked_groups,
             table_name: self.table_name.clone(),
             table_provider: Arc::new(provider),
         };
@@ -173,6 +212,7 @@ struct GeneratedSessionContextBuilder {
     target_partitions: usize,
     sort_hint: bool,
     skip_partial_params: SkipPartialParams,
+    enable_aggregation_blocked_groups: bool,
     table_name: String,
     table_provider: Arc<dyn TableProvider>,
 }
@@ -197,6 +237,10 @@ impl GeneratedSessionContextBuilder {
             "datafusion.execution.skip_partial_aggregation_probe_ratio_threshold",
             &ScalarValue::Float64(Some(self.skip_partial_params.ratio_threshold)),
         );
+        session_config = session_config.set(
+            "datafusion.execution.enable_aggregation_blocked_groups",
+            &ScalarValue::Boolean(Some(self.enable_aggregation_blocked_groups)),
+        );
 
         let ctx = SessionContext::new_with_config(session_config);
         ctx.register_table(self.table_name, self.table_provider)?;
@@ -206,6 +250,7 @@ impl GeneratedSessionContextBuilder {
             target_partitions: self.target_partitions,
             sort_hint: self.sort_hint,
             skip_partial_params: self.skip_partial_params,
+            enable_aggregation_blocked_groups: self.enable_aggregation_blocked_groups,
         };
 
         Ok(SessionContextWithParams { ctx, params })
@@ -220,6 +265,7 @@ pub struct SessionContextParams {
     target_partitions: usize,
     sort_hint: bool,
     skip_partial_params: SkipPartialParams,
+    enable_aggregation_blocked_groups: bool,
 }
 
 /// Partial skipping parameters

diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
@@ -26,7 +26,9 @@ use rand::{Rng, rng};
 use crate::fuzz_cases::aggregation_fuzzer::query_builder::QueryBuilder;
 use crate::fuzz_cases::aggregation_fuzzer::{
     check_equality_of_batches,
-    context_generator::{SessionContextGenerator, SessionContextWithParams},
+    context_generator::{
+        SessionContextGenerator, SessionContextOptions, SessionContextWithParams,
+    },
     data_generator::{Dataset, DatasetGenerator, DatasetGeneratorConfig},
     run_sql,
 };
@@ -50,6 +52,9 @@ pub struct AggregationFuzzerBuilder {
 
     /// See `data_gen_rounds` in [`AggregationFuzzer`], default 16
     data_gen_rounds: usize,
+
+    /// Session context options to force-enable or force-disable specific knobs
+    session_context_options: SessionContextOptions,
 }
 
 impl AggregationFuzzerBuilder {
@@ -59,6 +64,7 @@ impl AggregationFuzzerBuilder {
             table_name: None,
             data_gen_config: None,
             data_gen_rounds: 16,
+            session_context_options: SessionContextOptions::default(),
         }
     }
 
@@ -90,6 +96,11 @@ impl AggregationFuzzerBuilder {
         self
     }
 
+    pub fn session_context_options(mut self, options: SessionContextOptions) -> Self {
+        self.session_context_options = options;
+        self
+    }
+
     pub fn build(self) -> AggregationFuzzer {
         assert!(!self.candidate_sqls.is_empty());
         let candidate_sqls = self.candidate_sqls;
@@ -104,6 +115,7 @@ impl AggregationFuzzerBuilder {
             table_name,
             dataset_generator,
             data_gen_rounds,
+            session_context_options: self.session_context_options,
         }
     }
 }
@@ -139,6 +151,9 @@ pub struct AggregationFuzzer {
     /// It is suggested to set value 2x or more bigger than num of
     /// `candidate_sqls` for better test coverage.
     data_gen_rounds: usize,
+
+    /// Session context options to force-enable or force-disable specific knobs
+    session_context_options: SessionContextOptions,
 }
 
 /// Query group including the tested dataset and its sql query
@@ -210,8 +225,11 @@ impl AggregationFuzzer {
         let mut tasks = Vec::with_capacity(query_groups.len() * CTX_GEN_ROUNDS);
         for QueryGroup { dataset, sql } in query_groups {
             let dataset_ref = Arc::new(dataset);
-            let ctx_generator =
-                SessionContextGenerator::new(dataset_ref.clone(), &self.table_name);
+            let ctx_generator = SessionContextGenerator::new_with_options(
+                dataset_ref.clone(),
+                &self.table_name,
+                self.session_context_options.clone(),
+            );
 
             // Generate the baseline context, and get the baseline result firstly
             let baseline_ctx_with_params = ctx_generator

diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/mod.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/mod.rs
@@ -46,6 +46,7 @@ mod fuzzer;
 pub mod query_builder;
 
 pub use crate::fuzz_cases::record_batch_generator::ColumnDescr;
+pub use context_generator::SessionContextOptions;
 pub use data_generator::DatasetGeneratorConfig;
 pub use fuzzer::*;