From 4d31cb86c9edc206d19a2ec688f074fcab8d496b Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 29 Apr 2026 16:40:28 +0200 Subject: [PATCH] Fix non-deterministic stacking failure with faceted bar charts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sort in apply_stack used [group_col, ...partition_by] as sort keys, but partition_by order is non-deterministic (HashMap iteration). When fill sorted before facet, rows from the same facet panel were interleaved, causing compute_group_ids (which relies on adjacency) to treat every row as its own group — making cumsum a no-op. Fix: build the over/group columns first, use them as primary sort keys, then append remaining partition_by columns after. Co-Authored-By: Claude Opus 4.6 --- src/execute/position.rs | 93 ++++++++++++++++++++++++++++++++ src/plot/layer/position/stack.rs | 41 +++++++------- 2 files changed, 115 insertions(+), 19 deletions(-) diff --git a/src/execute/position.rs b/src/execute/position.rs index 40c029b9..add4e064 100644 --- a/src/execute/position.rs +++ b/src/execute/position.rs @@ -448,6 +448,99 @@ mod tests { ); } + #[test] + fn test_stack_groups_by_facet_not_fill_order() { + // Regression: when partition_by listed fill before facet, the sort order + // put fill first, interleaving facet panels. compute_group_ids then + // treated each row as its own group and stacking had no effect. + // + // Data is pre-sorted by (fill, facet) — the worst case for the old code. + // Three facet panels (F1, F2, F3) each with fill groups (X, Y). + let df = df! { + "__ggsql_aes_pos1__" => vec!["A", "A", "A", "A", "A", "A"], + "__ggsql_aes_pos2__" => vec![10.0, 20.0, 30.0, 40.0, 50.0, 60.0], + "__ggsql_aes_pos2end__" => vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + "__ggsql_aes_fill__" => vec!["X", "X", "X", "Y", "Y", "Y"], + "__ggsql_aes_facet1__" => vec!["F1", "F2", "F3", "F1", "F2", "F3"], + } + .unwrap(); + + let mut layer = crate::plot::Layer::new(Geom::bar()); + layer.mappings = { + let mut m = Mappings::new(); + m.insert( + "pos1", + AestheticValue::standard_column("__ggsql_aes_pos1__"), + ); + m.insert( + "pos2", + AestheticValue::standard_column("__ggsql_aes_pos2__"), + ); + m.insert( + "pos2end", + AestheticValue::standard_column("__ggsql_aes_pos2end__"), + ); + m.insert( + "fill", + AestheticValue::standard_column("__ggsql_aes_fill__"), + ); + m.insert( + "facet1", + AestheticValue::standard_column("__ggsql_aes_facet1__"), + ); + m + }; + // fill before facet — the order that triggered the bug + layer.partition_by = vec![ + "__ggsql_aes_fill__".to_string(), + "__ggsql_aes_facet1__".to_string(), + ]; + layer.position = Position::stack(); + layer.data_key = Some("__ggsql_layer_0__".to_string()); + + let mut spec = Plot::new(); + spec.scales.push(make_discrete_scale("pos1")); + spec.scales.push(make_continuous_scale("pos2")); + spec.facet = Some(Facet::new(FacetLayout::Wrap { + variables: vec!["facet_var".to_string()], + })); + let mut data_map = HashMap::new(); + data_map.insert("__ggsql_layer_0__".to_string(), df); + spec.layers.push(layer); + + apply_position_adjustments(&mut spec, &mut data_map).unwrap(); + + let result_df = data_map.get("__ggsql_layer_0__").unwrap(); + + let facet_col = + crate::array_util::as_str(result_df.column("__ggsql_aes_facet1__").unwrap()).unwrap(); + let fill_col = + crate::array_util::as_str(result_df.column("__ggsql_aes_fill__").unwrap()).unwrap(); + let pos2_arr = as_f64(result_df.column("__ggsql_aes_pos2__").unwrap()).unwrap(); + let pos2end_arr = as_f64(result_df.column("__ggsql_aes_pos2end__").unwrap()).unwrap(); + + // Collect (facet, fill) → (pos2, pos2end) + let mut by_key: std::collections::HashMap<(&str, &str), (f64, f64)> = + std::collections::HashMap::new(); + for i in 0..result_df.height() { + by_key.insert( + (facet_col.value(i), fill_col.value(i)), + (pos2_arr.value(i), pos2end_arr.value(i)), + ); + } + + // Within each facet the two fill groups must stack: + // F1: X=10 → [0,10], Y=40 → [10,50] + // F2: X=20 → [0,20], Y=50 → [20,70] + // F3: X=30 → [0,30], Y=60 → [30,90] + assert_eq!(by_key[&("F1", "X")], (10.0, 0.0)); + assert_eq!(by_key[&("F1", "Y")], (50.0, 10.0)); + assert_eq!(by_key[&("F2", "X")], (20.0, 0.0)); + assert_eq!(by_key[&("F2", "Y")], (70.0, 20.0)); + assert_eq!(by_key[&("F3", "X")], (30.0, 0.0)); + assert_eq!(by_key[&("F3", "Y")], (90.0, 30.0)); + } + #[test] fn test_dodge_ignores_facet_columns_in_group_count() { // Dodge should compute n_groups per facet panel, not globally. diff --git a/src/plot/layer/position/stack.rs b/src/plot/layer/position/stack.rs index 97f13a79..431f719c 100644 --- a/src/plot/layer/position/stack.rs +++ b/src/plot/layer/position/stack.rs @@ -248,27 +248,9 @@ fn apply_stack(df: DataFrame, layer: &Layer, spec: &Plot, mode: StackMode) -> Re } } - // Sort by group column and partition_by columns to ensure consistent stacking order - let mut sort_col_names: Vec<&str> = vec![&group_col]; - for partition_col in &layer.partition_by { - sort_col_names.push(partition_col); - } - let df = compute::sort_dataframe(&df, &sort_col_names)?; - - // Cast stack column to f64 if needed, then fill nulls with 0 - let stack_col_array = df.column(&stack_col)?.clone(); - let stack_col_f64 = if stack_col_array.data_type() == &arrow::datatypes::DataType::Float64 { - stack_col_array - } else { - crate::array_util::cast_array(&stack_col_array, &arrow::datatypes::DataType::Float64)? - }; - let filled = compute::fill_null_f64_ref(&stack_col_f64, 0.0)?; - let df = df.with_column(&stack_col, filled)?; - - // Build the group columns for .over(): group column + facet columns. + // Build the group columns: group column + facet columns. // Facet columns must be included so stacking resets per facet panel, // matching ggplot2 where position adjustments are computed per-panel. - // Collect facet column names as owned Strings let facet_col_names: Vec = spec .facet .as_ref() @@ -286,6 +268,27 @@ fn apply_stack(df: DataFrame, layer: &Layer, spec: &Plot, mode: StackMode) -> Re over_col_refs.push(name); } + // Sort by the group columns first, then by remaining partition_by columns. + // The group columns must come first so that compute_group_ids (which relies + // on adjacent rows having the same key) sees all same-group rows together. + let mut sort_col_names: Vec<&str> = over_col_refs.clone(); + for partition_col in &layer.partition_by { + if !sort_col_names.contains(&partition_col.as_str()) { + sort_col_names.push(partition_col); + } + } + let df = compute::sort_dataframe(&df, &sort_col_names)?; + + // Cast stack column to f64 if needed, then fill nulls with 0 + let stack_col_array = df.column(&stack_col)?.clone(); + let stack_col_f64 = if stack_col_array.data_type() == &arrow::datatypes::DataType::Float64 { + stack_col_array + } else { + crate::array_util::cast_array(&stack_col_array, &arrow::datatypes::DataType::Float64)? + }; + let filled = compute::fill_null_f64_ref(&stack_col_f64, 0.0)?; + let df = df.with_column(&stack_col, filled)?; + // Compute group IDs let group_ids = compute::compute_group_ids(&df, &over_col_refs)?;