diff --git a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out index b3da68b1f9d..382fb46fdfd 100644 --- a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out @@ -949,21 +949,21 @@ select v.c, (select count(*) from gstest2 group by () having v.c) explain (costs off) select v.c, (select count(*) from gstest2 group by () having v.c) from (values (false),(true)) v(c) order by v.c; - QUERY PLAN --------------------------------------------------------------------------- - Sort - Sort Key: "*VALUES*".column1 - -> Values Scan on "*VALUES*" - SubPlan 1 - -> Aggregate - Group Key: () - Filter: "*VALUES*".column1 - -> Result - One-Time Filter: "*VALUES*".column1 - -> Materialize - -> Gather Motion 3:1 (slice1; segments: 3) + QUERY PLAN +-------------------------------------------------------------------- + Result + -> Sort + Sort Key: "Values".column1 + -> Values Scan on "Values" + SubPlan 1 + -> Result + One-Time Filter: "Values".column1 + -> Finalize Aggregate + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate -> Seq Scan on gstest2 - Optimizer: Postgres query optimizer + Optimizer: GPORCA (13 rows) -- HAVING with GROUPING queries diff --git a/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp b/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp index 791ee8345a9..a3c333c5ab1 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp @@ -308,6 +308,93 @@ CSubqueryHandler::FProjectCountSubquery(CExpression *pexprSubquery, } +//--------------------------------------------------------------------------- +// @function: +// FContainsEmptyGbAgg +// +// @doc: +// Return true if pexpr contains a GbAgg with empty grouping columns +// (i.e., GROUP BY ()) +// +//--------------------------------------------------------------------------- +static BOOL +FContainsEmptyGbAgg(CExpression *pexpr) +{ + if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid()) + { + return 0 == CLogicalGbAgg::PopConvert(pexpr->Pop())->Pdrgpcr()->Size(); + } + const ULONG arity = pexpr->Arity(); + for (ULONG ul = 0; ul < arity; ul++) + { + CExpression *pexprChild = (*pexpr)[ul]; + if (pexprChild->Pop()->FLogical() && FContainsEmptyGbAgg(pexprChild)) + { + return true; + } + } + return false; +} + + +//--------------------------------------------------------------------------- +// @function: +// FHasCorrelatedSelectAboveGbAgg +// +// @doc: +// Return true if pexpr has a CLogicalSelect with outer references in its +// filter predicate that sits above a GROUP BY () aggregate. This pattern +// arises when a correlated scalar subquery has a correlated HAVING clause, +// e.g. "SELECT count(*) FROM t GROUP BY () HAVING outer_col". +// +// When such a pattern exists the scalar subquery must NOT be decorrelated +// with COALESCE(count,0) semantics: if the HAVING condition is false the +// subquery should return NULL (no rows), not 0. +// +//--------------------------------------------------------------------------- +static BOOL +FHasCorrelatedSelectAboveGbAgg(CExpression *pexpr) +{ + // Stop recursion at a GbAgg boundary: we are looking for a Select + // that sits *above* a GbAgg, so once we reach the GbAgg there is + // nothing more to check in this branch. + if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid()) + { + return false; + } + + if (COperator::EopLogicalSelect == pexpr->Pop()->Eopid() && + pexpr->HasOuterRefs()) + { + // The Select has outer references somewhere in its subtree. + // Check whether they originate from the filter (child 1) rather + // than from the logical child (child 0). If the logical child has + // no outer refs but the Select as a whole does, the outer refs must + // come from the filter predicate — exactly the correlated-HAVING + // pattern we want to detect. + CExpression *pexprLogicalChild = (*pexpr)[0]; + if (!pexprLogicalChild->HasOuterRefs() && + FContainsEmptyGbAgg(pexprLogicalChild)) + { + return true; + } + } + + // Recurse into logical children only. + const ULONG arity = pexpr->Arity(); + for (ULONG ul = 0; ul < arity; ul++) + { + CExpression *pexprChild = (*pexpr)[ul]; + if (pexprChild->Pop()->FLogical() && + FHasCorrelatedSelectAboveGbAgg(pexprChild)) + { + return true; + } + } + return false; +} + + //--------------------------------------------------------------------------- // @function: // CSubqueryHandler::SSubqueryDesc::SetCorrelatedExecution @@ -382,6 +469,21 @@ CSubqueryHandler::Psd(CMemoryPool *mp, CExpression *pexprSubquery, // set flag of correlated execution psd->SetCorrelatedExecution(); + // A correlated scalar subquery of the form + // SELECT count(*) FROM t GROUP BY () HAVING + // must execute as a correlated SubPlan. After NormalizeHaving() the HAVING + // clause becomes a CLogicalSelect with outer refs sitting above the GbAgg. + // If we decorrelate such a subquery the join filter replaces the HAVING + // condition, but a LEFT JOIN returns 0 (not NULL) for count(*) when no + // rows match — which is semantically wrong. Forcing correlated execution + // preserves the correct NULL-when-no-rows semantics. + if (!psd->m_fCorrelatedExecution && psd->m_fHasCountAgg && + psd->m_fHasOuterRefs && + FHasCorrelatedSelectAboveGbAgg(pexprInner)) + { + psd->m_fCorrelatedExecution = true; + } + return psd; } @@ -753,8 +855,19 @@ CSubqueryHandler::FCreateOuterApplyForScalarSubquery( *ppexprNewOuter = pexprPrj; BOOL fGeneratedByQuantified = popSubquery->FGeneratedByQuantified(); + + // When GROUP BY () has a correlated HAVING clause (now represented as a + // CLogicalSelect with outer refs sitting above the GbAgg), the subquery + // must return NULL — not 0 — when the HAVING condition is false. + // Applying COALESCE(count,0) would incorrectly convert that NULL to 0, + // so we skip the special count(*) semantics in that case. + BOOL fCorrelatedHavingAboveEmptyGby = + (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() && + FHasCorrelatedSelectAboveGbAgg((*pexprSubquery)[0])); + if (fGeneratedByQuantified || - (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size())) + (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() && + !fCorrelatedHavingAboveEmptyGby)) { CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda(); const IMDTypeInt8 *pmdtypeint8 = md_accessor->PtMDType(); diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 2669f2c3017..a039fdd87d2 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -5522,35 +5522,45 @@ flatten_join_alias_var_optimizer(Query *query, int queryLevel) if (NIL != targetList) { queryNew->targetList = (List *) flatten_join_alias_vars(queryNew, (Node *) targetList); - list_free(targetList); + if (targetList != queryNew->targetList) + list_free(targetList); } - List * returningList = queryNew->returningList; + List *returningList = queryNew->returningList; if (NIL != returningList) { queryNew->returningList = (List *) flatten_join_alias_vars(queryNew, (Node *) returningList); - list_free(returningList); + if (returningList != queryNew->returningList) + list_free(returningList); } Node *havingQual = queryNew->havingQual; if (NULL != havingQual) { queryNew->havingQual = flatten_join_alias_vars(queryNew, havingQual); - pfree(havingQual); + if (havingQual != queryNew->havingQual) + { + if (IsA(havingQual, List)) + list_free((List *) havingQual); + else + pfree(havingQual); + } } List *scatterClause = queryNew->scatterClause; if (NIL != scatterClause) { queryNew->scatterClause = (List *) flatten_join_alias_vars(queryNew, (Node *) scatterClause); - list_free(scatterClause); + if (scatterClause != queryNew->scatterClause) + list_free(scatterClause); } Node *limitOffset = queryNew->limitOffset; if (NULL != limitOffset) { queryNew->limitOffset = flatten_join_alias_vars(queryNew, limitOffset); - pfree(limitOffset); + if (limitOffset != queryNew->limitOffset) + pfree(limitOffset); } List *windowClause = queryNew->windowClause; @@ -5577,7 +5587,8 @@ flatten_join_alias_var_optimizer(Query *query, int queryLevel) if (NULL != limitCount) { queryNew->limitCount = flatten_join_alias_vars(queryNew, limitCount); - pfree(limitCount); + if (limitCount != queryNew->limitCount) + pfree(limitCount); } return queryNew; diff --git a/src/test/regress/expected/groupingsets_optimizer.out b/src/test/regress/expected/groupingsets_optimizer.out index 08ef4c1a68c..3718069c36c 100644 --- a/src/test/regress/expected/groupingsets_optimizer.out +++ b/src/test/regress/expected/groupingsets_optimizer.out @@ -958,21 +958,21 @@ select v.c, (select count(*) from gstest2 group by () having v.c) explain (costs off) select v.c, (select count(*) from gstest2 group by () having v.c) from (values (false),(true)) v(c) order by v.c; - QUERY PLAN --------------------------------------------------------------------------- - Sort - Sort Key: "*VALUES*".column1 - -> Values Scan on "*VALUES*" - SubPlan 1 - -> Aggregate - Group Key: () - Filter: "*VALUES*".column1 - -> Result - One-Time Filter: "*VALUES*".column1 - -> Materialize - -> Gather Motion 3:1 (slice1; segments: 3) + QUERY PLAN +-------------------------------------------------------------------- + Result + -> Sort + Sort Key: "Values".column1 + -> Values Scan on "Values" + SubPlan 1 + -> Result + One-Time Filter: "Values".column1 + -> Finalize Aggregate + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate -> Seq Scan on gstest2 - Optimizer: Postgres query optimizer + Optimizer: GPORCA (13 rows) -- HAVING with GROUPING queries