Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -949,21 +949,21 @@ select v.c, (select count(*) from gstest2 group by () having v.c)
explain (costs off)
select v.c, (select count(*) from gstest2 group by () having v.c)
from (values (false),(true)) v(c) order by v.c;
QUERY PLAN
--------------------------------------------------------------------------
Sort
Sort Key: "*VALUES*".column1
-> Values Scan on "*VALUES*"
SubPlan 1
-> Aggregate
Group Key: ()
Filter: "*VALUES*".column1
-> Result
One-Time Filter: "*VALUES*".column1
-> Materialize
-> Gather Motion 3:1 (slice1; segments: 3)
QUERY PLAN
--------------------------------------------------------------------
Result
-> Sort
Sort Key: "Values".column1
-> Values Scan on "Values"
SubPlan 1
-> Result
One-Time Filter: "Values".column1
-> Finalize Aggregate
-> Materialize
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Seq Scan on gstest2
Optimizer: Postgres query optimizer
Optimizer: GPORCA
(13 rows)

-- HAVING with GROUPING queries
Expand Down
115 changes: 114 additions & 1 deletion src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,93 @@ CSubqueryHandler::FProjectCountSubquery(CExpression *pexprSubquery,
}


//---------------------------------------------------------------------------
// @function:
// FContainsEmptyGbAgg
//
// @doc:
// Return true if pexpr contains a GbAgg with empty grouping columns
// (i.e., GROUP BY ())
//
//---------------------------------------------------------------------------
static BOOL
FContainsEmptyGbAgg(CExpression *pexpr)
{
if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid())
{
return 0 == CLogicalGbAgg::PopConvert(pexpr->Pop())->Pdrgpcr()->Size();
}
const ULONG arity = pexpr->Arity();
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild = (*pexpr)[ul];
if (pexprChild->Pop()->FLogical() && FContainsEmptyGbAgg(pexprChild))
{
return true;
}
}
return false;
}


//---------------------------------------------------------------------------
// @function:
// FHasCorrelatedSelectAboveGbAgg
//
// @doc:
// Return true if pexpr has a CLogicalSelect with outer references in its
// filter predicate that sits above a GROUP BY () aggregate. This pattern
// arises when a correlated scalar subquery has a correlated HAVING clause,
// e.g. "SELECT count(*) FROM t GROUP BY () HAVING outer_col".
//
// When such a pattern exists the scalar subquery must NOT be decorrelated
// with COALESCE(count,0) semantics: if the HAVING condition is false the
// subquery should return NULL (no rows), not 0.
//
//---------------------------------------------------------------------------
static BOOL
FHasCorrelatedSelectAboveGbAgg(CExpression *pexpr)
{
// Stop recursion at a GbAgg boundary: we are looking for a Select
// that sits *above* a GbAgg, so once we reach the GbAgg there is
// nothing more to check in this branch.
if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid())
{
return false;
}

if (COperator::EopLogicalSelect == pexpr->Pop()->Eopid() &&
pexpr->HasOuterRefs())
{
// The Select has outer references somewhere in its subtree.
// Check whether they originate from the filter (child 1) rather
// than from the logical child (child 0). If the logical child has
// no outer refs but the Select as a whole does, the outer refs must
// come from the filter predicate — exactly the correlated-HAVING
// pattern we want to detect.
CExpression *pexprLogicalChild = (*pexpr)[0];
if (!pexprLogicalChild->HasOuterRefs() &&
FContainsEmptyGbAgg(pexprLogicalChild))
{
return true;
}
}

// Recurse into logical children only.
const ULONG arity = pexpr->Arity();
for (ULONG ul = 0; ul < arity; ul++)
{
CExpression *pexprChild = (*pexpr)[ul];
if (pexprChild->Pop()->FLogical() &&
FHasCorrelatedSelectAboveGbAgg(pexprChild))
{
return true;
}
}
return false;
}


//---------------------------------------------------------------------------
// @function:
// CSubqueryHandler::SSubqueryDesc::SetCorrelatedExecution
Expand Down Expand Up @@ -382,6 +469,21 @@ CSubqueryHandler::Psd(CMemoryPool *mp, CExpression *pexprSubquery,
// set flag of correlated execution
psd->SetCorrelatedExecution();

// A correlated scalar subquery of the form
// SELECT count(*) FROM t GROUP BY () HAVING <outer_ref_condition>
// must execute as a correlated SubPlan. After NormalizeHaving() the HAVING
// clause becomes a CLogicalSelect with outer refs sitting above the GbAgg.
// If we decorrelate such a subquery the join filter replaces the HAVING
// condition, but a LEFT JOIN returns 0 (not NULL) for count(*) when no
// rows match — which is semantically wrong. Forcing correlated execution
// preserves the correct NULL-when-no-rows semantics.
if (!psd->m_fCorrelatedExecution && psd->m_fHasCountAgg &&
psd->m_fHasOuterRefs &&
FHasCorrelatedSelectAboveGbAgg(pexprInner))
{
psd->m_fCorrelatedExecution = true;
}

return psd;
}

Expand Down Expand Up @@ -753,8 +855,19 @@ CSubqueryHandler::FCreateOuterApplyForScalarSubquery(
*ppexprNewOuter = pexprPrj;

BOOL fGeneratedByQuantified = popSubquery->FGeneratedByQuantified();

// When GROUP BY () has a correlated HAVING clause (now represented as a
// CLogicalSelect with outer refs sitting above the GbAgg), the subquery
// must return NULL — not 0 — when the HAVING condition is false.
// Applying COALESCE(count,0) would incorrectly convert that NULL to 0,
// so we skip the special count(*) semantics in that case.
BOOL fCorrelatedHavingAboveEmptyGby =
(fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() &&
FHasCorrelatedSelectAboveGbAgg((*pexprSubquery)[0]));

if (fGeneratedByQuantified ||
(fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size()))
(fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() &&
!fCorrelatedHavingAboveEmptyGby))
{
CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
const IMDTypeInt8 *pmdtypeint8 = md_accessor->PtMDType<IMDTypeInt8>();
Expand Down
25 changes: 18 additions & 7 deletions src/backend/optimizer/util/clauses.c
Original file line number Diff line number Diff line change
Expand Up @@ -5522,35 +5522,45 @@ flatten_join_alias_var_optimizer(Query *query, int queryLevel)
if (NIL != targetList)
{
queryNew->targetList = (List *) flatten_join_alias_vars(queryNew, (Node *) targetList);
list_free(targetList);
if (targetList != queryNew->targetList)
list_free(targetList);
}

List * returningList = queryNew->returningList;
List *returningList = queryNew->returningList;
if (NIL != returningList)
{
queryNew->returningList = (List *) flatten_join_alias_vars(queryNew, (Node *) returningList);
list_free(returningList);
if (returningList != queryNew->returningList)
list_free(returningList);
}

Node *havingQual = queryNew->havingQual;
if (NULL != havingQual)
{
queryNew->havingQual = flatten_join_alias_vars(queryNew, havingQual);
pfree(havingQual);
if (havingQual != queryNew->havingQual)
{
if (IsA(havingQual, List))
list_free((List *) havingQual);
else
pfree(havingQual);
}
}

List *scatterClause = queryNew->scatterClause;
if (NIL != scatterClause)
{
queryNew->scatterClause = (List *) flatten_join_alias_vars(queryNew, (Node *) scatterClause);
list_free(scatterClause);
if (scatterClause != queryNew->scatterClause)
list_free(scatterClause);
}

Node *limitOffset = queryNew->limitOffset;
if (NULL != limitOffset)
{
queryNew->limitOffset = flatten_join_alias_vars(queryNew, limitOffset);
pfree(limitOffset);
if (limitOffset != queryNew->limitOffset)
pfree(limitOffset);
}

List *windowClause = queryNew->windowClause;
Expand All @@ -5577,7 +5587,8 @@ flatten_join_alias_var_optimizer(Query *query, int queryLevel)
if (NULL != limitCount)
{
queryNew->limitCount = flatten_join_alias_vars(queryNew, limitCount);
pfree(limitCount);
if (limitCount != queryNew->limitCount)
pfree(limitCount);
}

return queryNew;
Expand Down
28 changes: 14 additions & 14 deletions src/test/regress/expected/groupingsets_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -958,21 +958,21 @@ select v.c, (select count(*) from gstest2 group by () having v.c)
explain (costs off)
select v.c, (select count(*) from gstest2 group by () having v.c)
from (values (false),(true)) v(c) order by v.c;
QUERY PLAN
--------------------------------------------------------------------------
Sort
Sort Key: "*VALUES*".column1
-> Values Scan on "*VALUES*"
SubPlan 1
-> Aggregate
Group Key: ()
Filter: "*VALUES*".column1
-> Result
One-Time Filter: "*VALUES*".column1
-> Materialize
-> Gather Motion 3:1 (slice1; segments: 3)
QUERY PLAN
--------------------------------------------------------------------
Result
-> Sort
Sort Key: "Values".column1
-> Values Scan on "Values"
SubPlan 1
-> Result
One-Time Filter: "Values".column1
-> Finalize Aggregate
-> Materialize
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Seq Scan on gstest2
Optimizer: Postgres query optimizer
Optimizer: GPORCA
(13 rows)

-- HAVING with GROUPING queries
Expand Down
Loading