From 235f180bda29532a4cc21e3c7133e30503efcc28 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Sat, 14 Mar 2026 03:50:51 +0800 Subject: [PATCH 1/2] ORCA: Fix use-after-free in flatten_join_alias_var_optimizer Guard pfree/list_free calls with pointer-equality checks to avoid freeing live nodes when flatten_join_alias_vars returns the same pointer unchanged (e.g., outer-reference Vars with varlevelsup > 0). The unconditional pfree(havingQual) freed the Var node, whose memory was later reused by palloc for a T_List. copyObjectImpl then copied the wrong node type into havingQual, causing ORCA to encounter an unexpected RangeTblEntry and fall back to the Postgres planner. Applies the same guard pattern to all six fields: targetList, returningList, havingQual, scatterClause, limitOffset, limitCount. Reported-in: https://github.com/apache/cloudberry/issues/1618 --- src/backend/optimizer/util/clauses.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 2669f2c3017..9085f3876a4 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -5522,35 +5522,40 @@ flatten_join_alias_var_optimizer(Query *query, int queryLevel) if (NIL != targetList) { queryNew->targetList = (List *) flatten_join_alias_vars(queryNew, (Node *) targetList); - list_free(targetList); + if (targetList != queryNew->targetList) + list_free(targetList); } - List * returningList = queryNew->returningList; + List *returningList = queryNew->returningList; if (NIL != returningList) { queryNew->returningList = (List *) flatten_join_alias_vars(queryNew, (Node *) returningList); - list_free(returningList); + if (returningList != queryNew->returningList) + list_free(returningList); } Node *havingQual = queryNew->havingQual; if (NULL != havingQual) { queryNew->havingQual = flatten_join_alias_vars(queryNew, havingQual); - pfree(havingQual); + if (havingQual != queryNew->havingQual) + pfree(havingQual); } List *scatterClause = queryNew->scatterClause; if (NIL != scatterClause) { queryNew->scatterClause = (List *) flatten_join_alias_vars(queryNew, (Node *) scatterClause); - list_free(scatterClause); + if (scatterClause != queryNew->scatterClause) + list_free(scatterClause); } Node *limitOffset = queryNew->limitOffset; if (NULL != limitOffset) { queryNew->limitOffset = flatten_join_alias_vars(queryNew, limitOffset); - pfree(limitOffset); + if (limitOffset != queryNew->limitOffset) + pfree(limitOffset); } List *windowClause = queryNew->windowClause; @@ -5577,7 +5582,8 @@ flatten_join_alias_var_optimizer(Query *query, int queryLevel) if (NULL != limitCount) { queryNew->limitCount = flatten_join_alias_vars(queryNew, limitCount); - pfree(limitCount); + if (limitCount != queryNew->limitCount) + pfree(limitCount); } return queryNew; From fa35d746d3608c801fec28efb2566e1915666e41 Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Sat, 14 Mar 2026 03:51:08 +0800 Subject: [PATCH 2/2] ORCA: Fix incorrect decorrelation of GROUP BY () HAVING Force correlated execution (SubPlan) for scalar subqueries with GROUP BY () and a correlated HAVING clause. Previously ORCA decorrelated such subqueries into Left Outer Join + COALESCE(count,0), which incorrectly returned 0 instead of NULL when the HAVING condition was false. Add FHasCorrelatedSelectAboveGbAgg() to detect the pattern where NormalizeHaving() has converted the HAVING clause into a CLogicalSelect with outer refs above a CLogicalGbAgg with empty grouping columns. When detected, set m_fCorrelatedExecution = true in Psd() to bypass the COALESCE decorrelation path. Update groupingsets_optimizer.out expected output to reflect the new ORCA SubPlan format instead of Postgres planner fallback. Reported-in: https://github.com/apache/cloudberry/issues/1618 --- .../expected/groupingsets_optimizer.out | 28 ++--- .../libgpopt/src/xforms/CSubqueryHandler.cpp | 115 +++++++++++++++++- src/backend/optimizer/util/clauses.c | 7 +- .../expected/groupingsets_optimizer.out | 28 ++--- 4 files changed, 148 insertions(+), 30 deletions(-) diff --git a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out index b3da68b1f9d..382fb46fdfd 100644 --- a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out @@ -949,21 +949,21 @@ select v.c, (select count(*) from gstest2 group by () having v.c) explain (costs off) select v.c, (select count(*) from gstest2 group by () having v.c) from (values (false),(true)) v(c) order by v.c; - QUERY PLAN --------------------------------------------------------------------------- - Sort - Sort Key: "*VALUES*".column1 - -> Values Scan on "*VALUES*" - SubPlan 1 - -> Aggregate - Group Key: () - Filter: "*VALUES*".column1 - -> Result - One-Time Filter: "*VALUES*".column1 - -> Materialize - -> Gather Motion 3:1 (slice1; segments: 3) + QUERY PLAN +-------------------------------------------------------------------- + Result + -> Sort + Sort Key: "Values".column1 + -> Values Scan on "Values" + SubPlan 1 + -> Result + One-Time Filter: "Values".column1 + -> Finalize Aggregate + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate -> Seq Scan on gstest2 - Optimizer: Postgres query optimizer + Optimizer: GPORCA (13 rows) -- HAVING with GROUPING queries diff --git a/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp b/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp index 791ee8345a9..a3c333c5ab1 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp @@ -308,6 +308,93 @@ CSubqueryHandler::FProjectCountSubquery(CExpression *pexprSubquery, } +//--------------------------------------------------------------------------- +// @function: +// FContainsEmptyGbAgg +// +// @doc: +// Return true if pexpr contains a GbAgg with empty grouping columns +// (i.e., GROUP BY ()) +// +//--------------------------------------------------------------------------- +static BOOL +FContainsEmptyGbAgg(CExpression *pexpr) +{ + if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid()) + { + return 0 == CLogicalGbAgg::PopConvert(pexpr->Pop())->Pdrgpcr()->Size(); + } + const ULONG arity = pexpr->Arity(); + for (ULONG ul = 0; ul < arity; ul++) + { + CExpression *pexprChild = (*pexpr)[ul]; + if (pexprChild->Pop()->FLogical() && FContainsEmptyGbAgg(pexprChild)) + { + return true; + } + } + return false; +} + + +//--------------------------------------------------------------------------- +// @function: +// FHasCorrelatedSelectAboveGbAgg +// +// @doc: +// Return true if pexpr has a CLogicalSelect with outer references in its +// filter predicate that sits above a GROUP BY () aggregate. This pattern +// arises when a correlated scalar subquery has a correlated HAVING clause, +// e.g. "SELECT count(*) FROM t GROUP BY () HAVING outer_col". +// +// When such a pattern exists the scalar subquery must NOT be decorrelated +// with COALESCE(count,0) semantics: if the HAVING condition is false the +// subquery should return NULL (no rows), not 0. +// +//--------------------------------------------------------------------------- +static BOOL +FHasCorrelatedSelectAboveGbAgg(CExpression *pexpr) +{ + // Stop recursion at a GbAgg boundary: we are looking for a Select + // that sits *above* a GbAgg, so once we reach the GbAgg there is + // nothing more to check in this branch. + if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid()) + { + return false; + } + + if (COperator::EopLogicalSelect == pexpr->Pop()->Eopid() && + pexpr->HasOuterRefs()) + { + // The Select has outer references somewhere in its subtree. + // Check whether they originate from the filter (child 1) rather + // than from the logical child (child 0). If the logical child has + // no outer refs but the Select as a whole does, the outer refs must + // come from the filter predicate — exactly the correlated-HAVING + // pattern we want to detect. + CExpression *pexprLogicalChild = (*pexpr)[0]; + if (!pexprLogicalChild->HasOuterRefs() && + FContainsEmptyGbAgg(pexprLogicalChild)) + { + return true; + } + } + + // Recurse into logical children only. + const ULONG arity = pexpr->Arity(); + for (ULONG ul = 0; ul < arity; ul++) + { + CExpression *pexprChild = (*pexpr)[ul]; + if (pexprChild->Pop()->FLogical() && + FHasCorrelatedSelectAboveGbAgg(pexprChild)) + { + return true; + } + } + return false; +} + + //--------------------------------------------------------------------------- // @function: // CSubqueryHandler::SSubqueryDesc::SetCorrelatedExecution @@ -382,6 +469,21 @@ CSubqueryHandler::Psd(CMemoryPool *mp, CExpression *pexprSubquery, // set flag of correlated execution psd->SetCorrelatedExecution(); + // A correlated scalar subquery of the form + // SELECT count(*) FROM t GROUP BY () HAVING + // must execute as a correlated SubPlan. After NormalizeHaving() the HAVING + // clause becomes a CLogicalSelect with outer refs sitting above the GbAgg. + // If we decorrelate such a subquery the join filter replaces the HAVING + // condition, but a LEFT JOIN returns 0 (not NULL) for count(*) when no + // rows match — which is semantically wrong. Forcing correlated execution + // preserves the correct NULL-when-no-rows semantics. + if (!psd->m_fCorrelatedExecution && psd->m_fHasCountAgg && + psd->m_fHasOuterRefs && + FHasCorrelatedSelectAboveGbAgg(pexprInner)) + { + psd->m_fCorrelatedExecution = true; + } + return psd; } @@ -753,8 +855,19 @@ CSubqueryHandler::FCreateOuterApplyForScalarSubquery( *ppexprNewOuter = pexprPrj; BOOL fGeneratedByQuantified = popSubquery->FGeneratedByQuantified(); + + // When GROUP BY () has a correlated HAVING clause (now represented as a + // CLogicalSelect with outer refs sitting above the GbAgg), the subquery + // must return NULL — not 0 — when the HAVING condition is false. + // Applying COALESCE(count,0) would incorrectly convert that NULL to 0, + // so we skip the special count(*) semantics in that case. + BOOL fCorrelatedHavingAboveEmptyGby = + (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() && + FHasCorrelatedSelectAboveGbAgg((*pexprSubquery)[0])); + if (fGeneratedByQuantified || - (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size())) + (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() && + !fCorrelatedHavingAboveEmptyGby)) { CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda(); const IMDTypeInt8 *pmdtypeint8 = md_accessor->PtMDType(); diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 9085f3876a4..a039fdd87d2 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -5539,7 +5539,12 @@ flatten_join_alias_var_optimizer(Query *query, int queryLevel) { queryNew->havingQual = flatten_join_alias_vars(queryNew, havingQual); if (havingQual != queryNew->havingQual) - pfree(havingQual); + { + if (IsA(havingQual, List)) + list_free((List *) havingQual); + else + pfree(havingQual); + } } List *scatterClause = queryNew->scatterClause; diff --git a/src/test/regress/expected/groupingsets_optimizer.out b/src/test/regress/expected/groupingsets_optimizer.out index 08ef4c1a68c..3718069c36c 100644 --- a/src/test/regress/expected/groupingsets_optimizer.out +++ b/src/test/regress/expected/groupingsets_optimizer.out @@ -958,21 +958,21 @@ select v.c, (select count(*) from gstest2 group by () having v.c) explain (costs off) select v.c, (select count(*) from gstest2 group by () having v.c) from (values (false),(true)) v(c) order by v.c; - QUERY PLAN --------------------------------------------------------------------------- - Sort - Sort Key: "*VALUES*".column1 - -> Values Scan on "*VALUES*" - SubPlan 1 - -> Aggregate - Group Key: () - Filter: "*VALUES*".column1 - -> Result - One-Time Filter: "*VALUES*".column1 - -> Materialize - -> Gather Motion 3:1 (slice1; segments: 3) + QUERY PLAN +-------------------------------------------------------------------- + Result + -> Sort + Sort Key: "Values".column1 + -> Values Scan on "Values" + SubPlan 1 + -> Result + One-Time Filter: "Values".column1 + -> Finalize Aggregate + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate -> Seq Scan on gstest2 - Optimizer: Postgres query optimizer + Optimizer: GPORCA (13 rows) -- HAVING with GROUPING queries