Skip to content

Commit 406f088

Browse files
committed
ORCA: Fix incorrect decorrelation of GROUP BY () HAVING <outer_ref>
Force correlated execution (SubPlan) for scalar subqueries with GROUP BY () and a correlated HAVING clause. Previously ORCA decorrelated such subqueries into Left Outer Join + COALESCE(count,0), which incorrectly returned 0 instead of NULL when the HAVING condition was false. Add FHasCorrelatedSelectAboveGbAgg() to detect the pattern where NormalizeHaving() has converted the HAVING clause into a CLogicalSelect with outer refs above a CLogicalGbAgg with empty grouping columns. When detected, set m_fCorrelatedExecution = true in Psd() to bypass the COALESCE decorrelation path. Update groupingsets_optimizer.out expected output to reflect the new ORCA SubPlan format instead of Postgres planner fallback. Reported-in: #1618
1 parent 224f153 commit 406f088

File tree

4 files changed

+148
-30
lines changed

4 files changed

+148
-30
lines changed

contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -949,21 +949,21 @@ select v.c, (select count(*) from gstest2 group by () having v.c)
949949
explain (costs off)
950950
select v.c, (select count(*) from gstest2 group by () having v.c)
951951
from (values (false),(true)) v(c) order by v.c;
952-
QUERY PLAN
953-
--------------------------------------------------------------------------
954-
Sort
955-
Sort Key: "*VALUES*".column1
956-
-> Values Scan on "*VALUES*"
957-
SubPlan 1
958-
-> Aggregate
959-
Group Key: ()
960-
Filter: "*VALUES*".column1
961-
-> Result
962-
One-Time Filter: "*VALUES*".column1
963-
-> Materialize
964-
-> Gather Motion 3:1 (slice1; segments: 3)
952+
QUERY PLAN
953+
--------------------------------------------------------------------
954+
Result
955+
-> Sort
956+
Sort Key: "Values".column1
957+
-> Values Scan on "Values"
958+
SubPlan 1
959+
-> Result
960+
One-Time Filter: "Values".column1
961+
-> Finalize Aggregate
962+
-> Materialize
963+
-> Gather Motion 3:1 (slice1; segments: 3)
964+
-> Partial Aggregate
965965
-> Seq Scan on gstest2
966-
Optimizer: Postgres query optimizer
966+
Optimizer: GPORCA
967967
(13 rows)
968968

969969
-- HAVING with GROUPING queries

src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,93 @@ CSubqueryHandler::FProjectCountSubquery(CExpression *pexprSubquery,
308308
}
309309

310310

311+
//---------------------------------------------------------------------------
312+
// @function:
313+
// FContainsEmptyGbAgg
314+
//
315+
// @doc:
316+
// Return true if pexpr contains a GbAgg with empty grouping columns
317+
// (i.e., GROUP BY ())
318+
//
319+
//---------------------------------------------------------------------------
320+
static BOOL
321+
FContainsEmptyGbAgg(CExpression *pexpr)
322+
{
323+
if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid())
324+
{
325+
return 0 == CLogicalGbAgg::PopConvert(pexpr->Pop())->Pdrgpcr()->Size();
326+
}
327+
const ULONG arity = pexpr->Arity();
328+
for (ULONG ul = 0; ul < arity; ul++)
329+
{
330+
CExpression *pexprChild = (*pexpr)[ul];
331+
if (pexprChild->Pop()->FLogical() && FContainsEmptyGbAgg(pexprChild))
332+
{
333+
return true;
334+
}
335+
}
336+
return false;
337+
}
338+
339+
340+
//---------------------------------------------------------------------------
341+
// @function:
342+
// FHasCorrelatedSelectAboveGbAgg
343+
//
344+
// @doc:
345+
// Return true if pexpr has a CLogicalSelect with outer references in its
346+
// filter predicate that sits above a GROUP BY () aggregate. This pattern
347+
// arises when a correlated scalar subquery has a correlated HAVING clause,
348+
// e.g. "SELECT count(*) FROM t GROUP BY () HAVING outer_col".
349+
//
350+
// When such a pattern exists the scalar subquery must NOT be decorrelated
351+
// with COALESCE(count,0) semantics: if the HAVING condition is false the
352+
// subquery should return NULL (no rows), not 0.
353+
//
354+
//---------------------------------------------------------------------------
355+
static BOOL
356+
FHasCorrelatedSelectAboveGbAgg(CExpression *pexpr)
357+
{
358+
// Stop recursion at a GbAgg boundary: we are looking for a Select
359+
// that sits *above* a GbAgg, so once we reach the GbAgg there is
360+
// nothing more to check in this branch.
361+
if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid())
362+
{
363+
return false;
364+
}
365+
366+
if (COperator::EopLogicalSelect == pexpr->Pop()->Eopid() &&
367+
pexpr->HasOuterRefs())
368+
{
369+
// The Select has outer references somewhere in its subtree.
370+
// Check whether they originate from the filter (child 1) rather
371+
// than from the logical child (child 0). If the logical child has
372+
// no outer refs but the Select as a whole does, the outer refs must
373+
// come from the filter predicate — exactly the correlated-HAVING
374+
// pattern we want to detect.
375+
CExpression *pexprLogicalChild = (*pexpr)[0];
376+
if (!pexprLogicalChild->HasOuterRefs() &&
377+
FContainsEmptyGbAgg(pexprLogicalChild))
378+
{
379+
return true;
380+
}
381+
}
382+
383+
// Recurse into logical children only.
384+
const ULONG arity = pexpr->Arity();
385+
for (ULONG ul = 0; ul < arity; ul++)
386+
{
387+
CExpression *pexprChild = (*pexpr)[ul];
388+
if (pexprChild->Pop()->FLogical() &&
389+
FHasCorrelatedSelectAboveGbAgg(pexprChild))
390+
{
391+
return true;
392+
}
393+
}
394+
return false;
395+
}
396+
397+
311398
//---------------------------------------------------------------------------
312399
// @function:
313400
// CSubqueryHandler::SSubqueryDesc::SetCorrelatedExecution
@@ -382,6 +469,21 @@ CSubqueryHandler::Psd(CMemoryPool *mp, CExpression *pexprSubquery,
382469
// set flag of correlated execution
383470
psd->SetCorrelatedExecution();
384471

472+
// A correlated scalar subquery of the form
473+
// SELECT count(*) FROM t GROUP BY () HAVING <outer_ref_condition>
474+
// must execute as a correlated SubPlan. After NormalizeHaving() the HAVING
475+
// clause becomes a CLogicalSelect with outer refs sitting above the GbAgg.
476+
// If we decorrelate such a subquery the join filter replaces the HAVING
477+
// condition, but a LEFT JOIN returns 0 (not NULL) for count(*) when no
478+
// rows match — which is semantically wrong. Forcing correlated execution
479+
// preserves the correct NULL-when-no-rows semantics.
480+
if (!psd->m_fCorrelatedExecution && psd->m_fHasCountAgg &&
481+
psd->m_fHasOuterRefs &&
482+
FHasCorrelatedSelectAboveGbAgg(pexprInner))
483+
{
484+
psd->m_fCorrelatedExecution = true;
485+
}
486+
385487
return psd;
386488
}
387489

@@ -753,8 +855,19 @@ CSubqueryHandler::FCreateOuterApplyForScalarSubquery(
753855
*ppexprNewOuter = pexprPrj;
754856

755857
BOOL fGeneratedByQuantified = popSubquery->FGeneratedByQuantified();
858+
859+
// When GROUP BY () has a correlated HAVING clause (now represented as a
860+
// CLogicalSelect with outer refs sitting above the GbAgg), the subquery
861+
// must return NULL — not 0 — when the HAVING condition is false.
862+
// Applying COALESCE(count,0) would incorrectly convert that NULL to 0,
863+
// so we skip the special count(*) semantics in that case.
864+
BOOL fCorrelatedHavingAboveEmptyGby =
865+
(fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() &&
866+
FHasCorrelatedSelectAboveGbAgg((*pexprSubquery)[0]));
867+
756868
if (fGeneratedByQuantified ||
757-
(fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size()))
869+
(fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() &&
870+
!fCorrelatedHavingAboveEmptyGby))
758871
{
759872
CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
760873
const IMDTypeInt8 *pmdtypeint8 = md_accessor->PtMDType<IMDTypeInt8>();

src/backend/optimizer/util/clauses.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5539,7 +5539,12 @@ flatten_join_alias_var_optimizer(Query *query, int queryLevel)
55395539
{
55405540
queryNew->havingQual = flatten_join_alias_vars(queryNew, havingQual);
55415541
if (havingQual != queryNew->havingQual)
5542-
pfree(havingQual);
5542+
{
5543+
if (IsA(havingQual, List))
5544+
list_free((List *) havingQual);
5545+
else
5546+
pfree(havingQual);
5547+
}
55435548
}
55445549

55455550
List *scatterClause = queryNew->scatterClause;

src/test/regress/expected/groupingsets_optimizer.out

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -958,21 +958,21 @@ select v.c, (select count(*) from gstest2 group by () having v.c)
958958
explain (costs off)
959959
select v.c, (select count(*) from gstest2 group by () having v.c)
960960
from (values (false),(true)) v(c) order by v.c;
961-
QUERY PLAN
962-
--------------------------------------------------------------------------
963-
Sort
964-
Sort Key: "*VALUES*".column1
965-
-> Values Scan on "*VALUES*"
966-
SubPlan 1
967-
-> Aggregate
968-
Group Key: ()
969-
Filter: "*VALUES*".column1
970-
-> Result
971-
One-Time Filter: "*VALUES*".column1
972-
-> Materialize
973-
-> Gather Motion 3:1 (slice1; segments: 3)
961+
QUERY PLAN
962+
--------------------------------------------------------------------
963+
Result
964+
-> Sort
965+
Sort Key: "Values".column1
966+
-> Values Scan on "Values"
967+
SubPlan 1
968+
-> Result
969+
One-Time Filter: "Values".column1
970+
-> Finalize Aggregate
971+
-> Materialize
972+
-> Gather Motion 3:1 (slice1; segments: 3)
973+
-> Partial Aggregate
974974
-> Seq Scan on gstest2
975-
Optimizer: Postgres query optimizer
975+
Optimizer: GPORCA
976976
(13 rows)
977977

978978
-- HAVING with GROUPING queries

0 commit comments

Comments
 (0)