From df5f06d6fecffb4de64ab4ed2a1deb3a45efa37c Mon Sep 17 00:00:00 2001 From: Hans Zeller Date: Wed, 16 Sep 2020 09:41:33 -0700 Subject: [PATCH] Fall back for selects of outer refs in scalar subquery in ORCA (#10836) This is a backport of the changes in #10799 to 6X_STABLE. When we have an outer ref in a subquery, like this select * from foo where foo.a is null or foo.a = (select foo.b from bar) then we can't simply use the outer reference for the condition when we unnest the subquery into an apply. This is because if the subquery returns no rows, then we must be using a NULL instead of the outer reference. We have code to handle this for quantified subqueries, but not for scalar subqueries. When we translate the generated DXL to a plan, we assert when we find an outer reference in the project list of a subquery. In rare cases, we might also crash, when the subquery contained a project with multiple values below the outer reference (see added test in gporca.sql). The "fix" (more a workaround) is to force a fallback when we detect this situation during unnesting of a scalar subquery. --- .../include/gpopt/xforms/CSubqueryHandler.h | 13 +++++- .../libgpopt/src/xforms/CSubqueryHandler.cpp | 30 ++++++++++--- src/test/regress/expected/gporca.out | 36 +++++++++++++++ .../regress/expected/gporca_optimizer.out | 44 +++++++++++++++++++ src/test/regress/sql/gporca.sql | 23 ++++++++++ 5 files changed, 140 insertions(+), 6 deletions(-) diff --git a/src/backend/gporca/libgpopt/include/gpopt/xforms/CSubqueryHandler.h b/src/backend/gporca/libgpopt/include/gpopt/xforms/CSubqueryHandler.h index 0368223a60..e36001c38e 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/xforms/CSubqueryHandler.h +++ b/src/backend/gporca/libgpopt/include/gpopt/xforms/CSubqueryHandler.h @@ -61,6 +61,9 @@ namespace gpopt // subquery has outer references BOOL m_fHasOuterRefs; + // the returned column is an outer reference + BOOL m_fReturnedPcrIsOuterRef; + // subquery has skip level correlations -- when inner expression refers to columns defined above the immediate outer expression BOOL m_fHasSkipLevelCorrelations; @@ -85,6 +88,7 @@ namespace gpopt m_returns_set(false), m_fHasVolatileFunctions(false), m_fHasOuterRefs(false), + m_fReturnedPcrIsOuterRef(false), m_fHasSkipLevelCorrelations(false), m_fHasCountAgg(false), m_pcrCountAgg(NULL), @@ -251,7 +255,14 @@ namespace gpopt // create subquery descriptor static - SSubqueryDesc *Psd(CMemoryPool *mp, CExpression *pexprSubquery, CExpression *pexprOuter, ESubqueryCtxt esqctxt); + SSubqueryDesc *Psd + ( + CMemoryPool *mp, + CExpression *pexprSubquery, + CExpression *pexprOuter, + const CColRef *pcrSubquery, + ESubqueryCtxt esqctxt + ); // detect subqueries with expressions over count aggregate similar to // (SELECT 'abc' || (SELECT count(*) from X)) diff --git a/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp b/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp index 9e0217e841..55074eb823 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp @@ -348,6 +348,7 @@ CSubqueryHandler::Psd CMemoryPool *mp, CExpression *pexprSubquery, CExpression *pexprOuter, + const CColRef *pcrSubquery, ESubqueryCtxt esqctxt ) { @@ -356,15 +357,21 @@ CSubqueryHandler::Psd GPOS_ASSERT(NULL != pexprOuter); CExpression *pexprInner = (*pexprSubquery)[0]; + CColRefSet *subqueryOutputCols = (*pexprSubquery)[0]->DeriveOutputColumns(); CColRefSet *outer_refs = (*pexprSubquery)[0]->DeriveOuterReferences(); CColRefSet *pcrsOuterOutput = pexprOuter->DeriveOutputColumns(); SSubqueryDesc *psd = GPOS_NEW(mp) SSubqueryDesc(); psd->m_returns_set = (1 < pexprInner->DeriveMaxCard().Ull()); - psd->m_fHasOuterRefs = pexprInner->HasOuterRefs(); + psd->m_fReturnedPcrIsOuterRef = (!subqueryOutputCols->FMember(pcrSubquery)); + psd->m_fHasOuterRefs = pexprInner->HasOuterRefs() || psd->m_fReturnedPcrIsOuterRef; psd->m_fHasVolatileFunctions = (IMDFunction::EfsVolatile == pexprSubquery->DeriveScalarFunctionProperties()->Efs()); - psd->m_fHasSkipLevelCorrelations = 0 < outer_refs->Size() && !pcrsOuterOutput->ContainsAll(outer_refs); - + // We have skip-level outer refs if there are outer refs at all, and at least one of the following is true: + // - the outer refs below the subquery node don't all come from the outer table (the level right above us) + // - the ColRef returned by the subquery is an outer ref that does not come from the outer table + psd->m_fHasSkipLevelCorrelations = psd->m_fHasOuterRefs && + (!pcrsOuterOutput->ContainsAll(outer_refs) || + (psd->m_fReturnedPcrIsOuterRef && !pcrsOuterOutput->FMember(pcrSubquery))); psd->m_fHasCountAgg = CUtils::FHasCountAgg((*pexprSubquery)[0], &psd->m_pcrCountAgg); if (psd->m_fHasCountAgg && @@ -415,7 +422,20 @@ CSubqueryHandler::FRemoveScalarSubquery CScalarSubquery *popScalarSubquery = CScalarSubquery::PopConvert(pexprSubquery->Pop()); const CColRef *pcrSubquery = popScalarSubquery->Pcr(); - SSubqueryDesc *psd = Psd(pmp, pexprSubquery, pexprOuter, esqctxt); + SSubqueryDesc *psd = Psd(pmp, pexprSubquery, pexprOuter, pcrSubquery, esqctxt); + + if (psd->m_fReturnedPcrIsOuterRef) + { + // The subquery returns an outer reference. We can't simply replace the subquery with that + // expression, because we would miss the case where the subquery is an empty table and we + // would have to substitute the outer ref with a NULL. + // We could use a dummy expression from the subquery to perform a check, but for now we'll + // just give up. + // Example: select * from foo where foo.a = (select foo.b from bar); + GPOS_DELETE(psd); + return false; + } + BOOL fSuccess = false; if (psd->m_fProjectCount && !psd->m_fCorrelatedExecution) { @@ -441,7 +461,7 @@ CSubqueryHandler::FRemoveScalarSubquery GPOS_DELETE(psd); CExpression *pexprNewOuter = NULL; CExpression *pexprResidualScalar = NULL; - psd = Psd(m_mp, pexprNewSubq, pexprOuter, esqctxt); + psd = Psd(m_mp, pexprNewSubq, pexprOuter, popInnerSubq->Pcr(), esqctxt); fSuccess = FRemoveScalarSubqueryInternal(m_mp, pexprOuter, pexprNewSubq, EsqctxtValue, psd, m_fEnforceCorrelatedApply, &pexprNewOuter, &pexprResidualScalar); if (fSuccess) diff --git a/src/test/regress/expected/gporca.out b/src/test/regress/expected/gporca.out index f4a7b570cb..42a803000b 100644 --- a/src/test/regress/expected/gporca.out +++ b/src/test/regress/expected/gporca.out @@ -12657,3 +12657,39 @@ ERROR: correlated subquery with skip-level correlations is not supported -- where out.b in (select coalesce(tcorr2.a, 99) -- from tcorr1 full outer join tcorr2 on tcorr1.a=tcorr2.a+out.a); reset optimizer_join_order; +-- test selecting an outer ref from a scalar subquery, this will fall back to planner +-- expect 0 rows +SELECT 1 +FROM tcorr1 +WHERE tcorr1.a IS NULL OR + tcorr1.a = (SELECT tcorr1.a + FROM (SELECT rtrim(tcorr1.a::text) AS userid, + rtrim(tcorr1.b::text) AS part_pls + FROM tcorr2) al + WHERE 3 = tcorr1.a + ); + ?column? +---------- +(0 rows) + +-- expect 1 row, subquery returns a row, falls back in ORCA +select * from tcorr1 where b = (select tcorr1.b from tcorr2); + a | b +---+---- + 1 | 99 +(1 row) + +-- expect 0 rows, subquery returns no rows, falls back in ORCA +select * from tcorr1 where b = (select tcorr1.b from tcorr2 where b=33); + a | b +---+--- +(0 rows) + +-- expect 1 row, subquery returns nothing, so a < 22 is true, falls back in ORCA +select * from tcorr1 where a < coalesce((select tcorr1.a from tcorr2 where a = 11), 22); + a | b +---+---- + 1 | 99 +(1 row) + +reset optimizer_trace_fallback; diff --git a/src/test/regress/expected/gporca_optimizer.out b/src/test/regress/expected/gporca_optimizer.out index 179b4560bb..cc94502ada 100644 --- a/src/test/regress/expected/gporca_optimizer.out +++ b/src/test/regress/expected/gporca_optimizer.out @@ -12864,3 +12864,47 @@ where out.b in (select coalesce(tcorr2_d.c, 99) -- where out.b in (select coalesce(tcorr2.a, 99) -- from tcorr1 full outer join tcorr2 on tcorr1.a=tcorr2.a+out.a); reset optimizer_join_order; +-- test selecting an outer ref from a scalar subquery, this will fall back to planner +-- expect 0 rows +SELECT 1 +FROM tcorr1 +WHERE tcorr1.a IS NULL OR + tcorr1.a = (SELECT tcorr1.a + FROM (SELECT rtrim(tcorr1.a::text) AS userid, + rtrim(tcorr1.b::text) AS part_pls + FROM tcorr2) al + WHERE 3 = tcorr1.a + ); +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: No plan has been computed for required properties + ?column? +---------- +(0 rows) + +-- expect 1 row, subquery returns a row, falls back in ORCA +select * from tcorr1 where b = (select tcorr1.b from tcorr2); +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: No plan has been computed for required properties + a | b +---+---- + 1 | 99 +(1 row) + +-- expect 0 rows, subquery returns no rows, falls back in ORCA +select * from tcorr1 where b = (select tcorr1.b from tcorr2 where b=33); +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: No plan has been computed for required properties + a | b +---+--- +(0 rows) + +-- expect 1 row, subquery returns nothing, so a < 22 is true, falls back in ORCA +select * from tcorr1 where a < coalesce((select tcorr1.a from tcorr2 where a = 11), 22); +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: No plan has been computed for required properties + a | b +---+---- + 1 | 99 +(1 row) + +reset optimizer_trace_fallback; diff --git a/src/test/regress/sql/gporca.sql b/src/test/regress/sql/gporca.sql index ab0ca92e6f..1ef78fcecc 100644 --- a/src/test/regress/sql/gporca.sql +++ b/src/test/regress/sql/gporca.sql @@ -2654,6 +2654,29 @@ where out.b in (select coalesce(tcorr2_d.c, 99) reset optimizer_join_order; +-- test selecting an outer ref from a scalar subquery, this will fall back to planner +-- expect 0 rows +SELECT 1 +FROM tcorr1 +WHERE tcorr1.a IS NULL OR + tcorr1.a = (SELECT tcorr1.a + FROM (SELECT rtrim(tcorr1.a::text) AS userid, + rtrim(tcorr1.b::text) AS part_pls + FROM tcorr2) al + WHERE 3 = tcorr1.a + ); + +-- expect 1 row, subquery returns a row, falls back in ORCA +select * from tcorr1 where b = (select tcorr1.b from tcorr2); + +-- expect 0 rows, subquery returns no rows, falls back in ORCA +select * from tcorr1 where b = (select tcorr1.b from tcorr2 where b=33); + +-- expect 1 row, subquery returns nothing, so a < 22 is true, falls back in ORCA +select * from tcorr1 where a < coalesce((select tcorr1.a from tcorr2 where a = 11), 22); + +reset optimizer_trace_fallback; + -- start_ignore DROP SCHEMA orca CASCADE; -- end_ignore -- GitLab