From 90dd32088bd2f1aff63f67728e71438782beca20 Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Wed, 1 Jul 2026 10:10:01 +0800 Subject: [PATCH] [fix](agg) Fix incorrect aggregate merge with duplicate aliases (#65025) Related PR: #31811 Problem Summary: A nested aggregate query could return incorrect results when multiple outer group-by aliases referenced the same inner grouping expression. For example, after resolving aliases, the inner grouping keys were `(a, b)` while the outer grouping keys became `(a, a)`. `MergeAggregate` determined whether the groupings were identical by comparing their list sizes. Since both lists contained two elements, it incorrectly considered them equivalent. This allowed `SUM(COUNT(DISTINCT c))` to be merged into `COUNT(DISTINCT c)` while removing grouping key `b`. Values repeated across different `b` groups were consequently counted only once, producing an undercount. This change compares the unique grouping-expression sets after projection replacement. Aggregate layers containing `DISTINCT` are now merged only when their grouping semantics are actually identical. ### Release note Fix incorrect query results for nested aggregates with duplicate group-by aliases and DISTINCT aggregate functions. --- .../nereids/rules/rewrite/MergeAggregate.java | 12 +++++--- .../rules/rewrite/MergeAggregateTest.java | 28 ++++++++++++++++- .../merge_aggregate/merge_aggregate.out | 5 ++++ .../merge_aggregate/merge_aggregate.groovy | 30 +++++++++++++++++++ 4 files changed, 70 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeAggregate.java index 346fc7edf92369..ca3bc69649dc5f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeAggregate.java @@ -196,10 +196,12 @@ private boolean commonCheck(LogicalAggregate outerAgg, LogicalAg private boolean canMergeAggregateWithoutProject(LogicalAggregate> outerAgg) { LogicalAggregate innerAgg = outerAgg.child(); - if (!new HashSet<>(innerAgg.getGroupByExpressions()).containsAll(outerAgg.getGroupByExpressions())) { + Set innerGroupByExpressions = new HashSet<>(innerAgg.getGroupByExpressions()); + Set outerGroupByExpressions = new HashSet<>(outerAgg.getGroupByExpressions()); + if (!innerGroupByExpressions.containsAll(outerGroupByExpressions)) { return false; } - boolean sameGroupBy = (innerAgg.getGroupByExpressions().size() == outerAgg.getGroupByExpressions().size()); + boolean sameGroupBy = innerGroupByExpressions.equals(outerGroupByExpressions); return commonCheck(outerAgg, innerAgg, sameGroupBy, Optional.empty()); } @@ -210,7 +212,9 @@ private boolean canMergeAggregateWithProject(LogicalAggregate outerAggGroupByKeys = PlanUtils.replaceExpressionByProjections(project.getProjects(), outerAgg.getGroupByExpressions()); - if (!new HashSet<>(innerAgg.getGroupByExpressions()).containsAll(outerAggGroupByKeys)) { + Set innerGroupByExpressions = new HashSet<>(innerAgg.getGroupByExpressions()); + Set outerGroupByExpressions = new HashSet<>(outerAggGroupByKeys); + if (!innerGroupByExpressions.containsAll(outerGroupByExpressions)) { return false; } // project cannot have expressions like a+1 @@ -218,7 +222,7 @@ private boolean canMergeAggregateWithProject(LogicalAggregate !(expr instanceof SlotReference) && !(expr instanceof Alias))) { return false; } - boolean sameGroupBy = (innerAgg.getGroupByExpressions().size() == outerAgg.getGroupByExpressions().size()); + boolean sameGroupBy = innerGroupByExpressions.equals(outerGroupByExpressions); return commonCheck(outerAgg, innerAgg, sameGroupBy, Optional.of(project)); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeAggregateTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeAggregateTest.java index 6d69cd79f983d3..ccf3d42ab2ac34 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeAggregateTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/MergeAggregateTest.java @@ -27,6 +27,9 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalEmptyRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.nereids.util.MemoPatternMatchSupported; +import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.utframe.TestWithFeService; import com.google.common.collect.ImmutableList; import org.junit.jupiter.api.Assertions; @@ -40,10 +43,21 @@ * Unit tests for {@link MergeAggregate}, specifically testing the fix for filtering * aggregate functions in mergeAggProjectAgg method. */ -public class MergeAggregateTest { +public class MergeAggregateTest extends TestWithFeService implements MemoPatternMatchSupported { private MergeAggregate mergeAggregate; + @Override + protected void runBeforeAll() throws Exception { + createDatabase("merge_aggregate_test"); + connectContext.setDatabase("merge_aggregate_test"); + connectContext.getSessionVariable().setDisableNereidsRules("PRUNE_EMPTY_PARTITION"); + createTable("CREATE TABLE merge_aggregate_test.duplicate_alias_table (" + + "a INT NOT NULL, b INT NOT NULL, c INT NULL) " + + "DUPLICATE KEY(a, b, c) DISTRIBUTED BY HASH(a) BUCKETS 1 " + + "PROPERTIES('replication_num' = '1')"); + } + @BeforeEach public void setUp() { mergeAggregate = new MergeAggregate(); @@ -121,4 +135,16 @@ public void testMergeAggProjectAggWithMixedExpressions() throws Exception { LogicalAggregate aggregate = (LogicalAggregate) resultProject.child(0); Assertions.assertEquals(aggregate.getOutput().size(), 2); } + + @Test + public void testDoNotMergeDistinctAggregateWithDuplicateProjectedGroupBy() { + String sql = "SELECT g1, g2, SUM(s) FROM (" + + "SELECT a AS g1, a AS g2, COUNT(DISTINCT c) AS s " + + "FROM duplicate_alias_table GROUP BY a, b) t GROUP BY g1, g2"; + + PlanChecker.from(connectContext) + .analyze(sql) + .rewrite() + .matches(logicalAggregate(logicalProject(logicalAggregate()))); + } } diff --git a/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out b/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out index f66de4badaaa0c..a6a1d134f7afe2 100644 --- a/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out +++ b/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out @@ -302,3 +302,8 @@ PhysicalResultSink 8 5 5 9 3 3 +-- !duplicate_alias_distinct_result -- +1 1 3 +2 2 3 +3 3 0 + diff --git a/regression-test/suites/nereids_rules_p0/merge_aggregate/merge_aggregate.groovy b/regression-test/suites/nereids_rules_p0/merge_aggregate/merge_aggregate.groovy index 4ca62279ee8273..db0ece8637a20f 100644 --- a/regression-test/suites/nereids_rules_p0/merge_aggregate/merge_aggregate.groovy +++ b/regression-test/suites/nereids_rules_p0/merge_aggregate/merge_aggregate.groovy @@ -264,4 +264,34 @@ suite("merge_aggregate") { (select a,max(b) as col1, count(b) as col4, a as col10, a as col11 from mal_test1 group by a) t group by col10, col11 order by 1,2,3; """ + + sql "drop table if exists mal_duplicate_alias_distinct" + sql """ + create table mal_duplicate_alias_distinct ( + a int not null, + b int not null, + c int null + ) + duplicate key (a, b, c) + distributed by hash(a) buckets 1 + properties("replication_num" = "1"); + """ + sql """ + insert into mal_duplicate_alias_distinct values + (1, 10, 100), (1, 10, 101), (1, 20, 100), + (2, 10, 200), (2, 20, 200), (2, 30, 201), + (3, 30, null), (3, 40, null); + """ + sql "sync" + + order_qt_duplicate_alias_distinct_result """ + select g1, g2, sum(s) as total_s + from ( + select a as g1, a as g2, count(distinct c) as s + from mal_duplicate_alias_distinct + group by a, b + ) t + group by g1, g2 + order by g1, g2; + """ }