diff --git a/src/backend/gporca/libgpopt/include/gpopt/xforms/CSubqueryHandler.h b/src/backend/gporca/libgpopt/include/gpopt/xforms/CSubqueryHandler.h index 0368223a607ccc712c1a564b529d11dc93c645dd..e36001c38e583e805b5e309d570318e73ac9cc5e 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/xforms/CSubqueryHandler.h +++ b/src/backend/gporca/libgpopt/include/gpopt/xforms/CSubqueryHandler.h @@ -61,6 +61,9 @@ namespace gpopt // subquery has outer references BOOL m_fHasOuterRefs; + // the returned column is an outer reference + BOOL m_fReturnedPcrIsOuterRef; + // subquery has skip level correlations -- when inner expression refers to columns defined above the immediate outer expression BOOL m_fHasSkipLevelCorrelations; @@ -85,6 +88,7 @@ namespace gpopt m_returns_set(false), m_fHasVolatileFunctions(false), m_fHasOuterRefs(false), + m_fReturnedPcrIsOuterRef(false), m_fHasSkipLevelCorrelations(false), m_fHasCountAgg(false), m_pcrCountAgg(NULL), @@ -251,7 +255,14 @@ namespace gpopt // create subquery descriptor static - SSubqueryDesc *Psd(CMemoryPool *mp, CExpression *pexprSubquery, CExpression *pexprOuter, ESubqueryCtxt esqctxt); + SSubqueryDesc *Psd + ( + CMemoryPool *mp, + CExpression *pexprSubquery, + CExpression *pexprOuter, + const CColRef *pcrSubquery, + ESubqueryCtxt esqctxt + ); // detect subqueries with expressions over count aggregate similar to // (SELECT 'abc' || (SELECT count(*) from X)) diff --git a/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp b/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp index 9e0217e841c906ff181c8fd716e2fc4afdcd6dcc..55074eb82385bbef07bfb299357c88d261e77b91 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp @@ -348,6 +348,7 @@ CSubqueryHandler::Psd CMemoryPool *mp, CExpression *pexprSubquery, CExpression *pexprOuter, + const CColRef *pcrSubquery, ESubqueryCtxt esqctxt ) { @@ -356,15 +357,21 @@ CSubqueryHandler::Psd GPOS_ASSERT(NULL != pexprOuter); CExpression *pexprInner = (*pexprSubquery)[0]; + CColRefSet *subqueryOutputCols = (*pexprSubquery)[0]->DeriveOutputColumns(); CColRefSet *outer_refs = (*pexprSubquery)[0]->DeriveOuterReferences(); CColRefSet *pcrsOuterOutput = pexprOuter->DeriveOutputColumns(); SSubqueryDesc *psd = GPOS_NEW(mp) SSubqueryDesc(); psd->m_returns_set = (1 < pexprInner->DeriveMaxCard().Ull()); - psd->m_fHasOuterRefs = pexprInner->HasOuterRefs(); + psd->m_fReturnedPcrIsOuterRef = (!subqueryOutputCols->FMember(pcrSubquery)); + psd->m_fHasOuterRefs = pexprInner->HasOuterRefs() || psd->m_fReturnedPcrIsOuterRef; psd->m_fHasVolatileFunctions = (IMDFunction::EfsVolatile == pexprSubquery->DeriveScalarFunctionProperties()->Efs()); - psd->m_fHasSkipLevelCorrelations = 0 < outer_refs->Size() && !pcrsOuterOutput->ContainsAll(outer_refs); - + // We have skip-level outer refs if there are outer refs at all, and at least one of the following is true: + // - the outer refs below the subquery node don't all come from the outer table (the level right above us) + // - the ColRef returned by the subquery is an outer ref that does not come from the outer table + psd->m_fHasSkipLevelCorrelations = psd->m_fHasOuterRefs && + (!pcrsOuterOutput->ContainsAll(outer_refs) || + (psd->m_fReturnedPcrIsOuterRef && !pcrsOuterOutput->FMember(pcrSubquery))); psd->m_fHasCountAgg = CUtils::FHasCountAgg((*pexprSubquery)[0], &psd->m_pcrCountAgg); if (psd->m_fHasCountAgg && @@ -415,7 +422,20 @@ CSubqueryHandler::FRemoveScalarSubquery CScalarSubquery *popScalarSubquery = CScalarSubquery::PopConvert(pexprSubquery->Pop()); const CColRef *pcrSubquery = popScalarSubquery->Pcr(); - SSubqueryDesc *psd = Psd(pmp, pexprSubquery, pexprOuter, esqctxt); + SSubqueryDesc *psd = Psd(pmp, pexprSubquery, pexprOuter, pcrSubquery, esqctxt); + + if (psd->m_fReturnedPcrIsOuterRef) + { + // The subquery returns an outer reference. We can't simply replace the subquery with that + // expression, because we would miss the case where the subquery is an empty table and we + // would have to substitute the outer ref with a NULL. + // We could use a dummy expression from the subquery to perform a check, but for now we'll + // just give up. + // Example: select * from foo where foo.a = (select foo.b from bar); + GPOS_DELETE(psd); + return false; + } + BOOL fSuccess = false; if (psd->m_fProjectCount && !psd->m_fCorrelatedExecution) { @@ -441,7 +461,7 @@ CSubqueryHandler::FRemoveScalarSubquery GPOS_DELETE(psd); CExpression *pexprNewOuter = NULL; CExpression *pexprResidualScalar = NULL; - psd = Psd(m_mp, pexprNewSubq, pexprOuter, esqctxt); + psd = Psd(m_mp, pexprNewSubq, pexprOuter, popInnerSubq->Pcr(), esqctxt); fSuccess = FRemoveScalarSubqueryInternal(m_mp, pexprOuter, pexprNewSubq, EsqctxtValue, psd, m_fEnforceCorrelatedApply, &pexprNewOuter, &pexprResidualScalar); if (fSuccess) diff --git a/src/test/regress/expected/gporca.out b/src/test/regress/expected/gporca.out index f4a7b570cbe0ee717fdcdb551184ac260b6a672c..42a803000b5988aedc837e637e4a0692de215118 100644 --- a/src/test/regress/expected/gporca.out +++ b/src/test/regress/expected/gporca.out @@ -12657,3 +12657,39 @@ ERROR: correlated subquery with skip-level correlations is not supported -- where out.b in (select coalesce(tcorr2.a, 99) -- from tcorr1 full outer join tcorr2 on tcorr1.a=tcorr2.a+out.a); reset optimizer_join_order; +-- test selecting an outer ref from a scalar subquery, this will fall back to planner +-- expect 0 rows +SELECT 1 +FROM tcorr1 +WHERE tcorr1.a IS NULL OR + tcorr1.a = (SELECT tcorr1.a + FROM (SELECT rtrim(tcorr1.a::text) AS userid, + rtrim(tcorr1.b::text) AS part_pls + FROM tcorr2) al + WHERE 3 = tcorr1.a + ); + ?column? +---------- +(0 rows) + +-- expect 1 row, subquery returns a row, falls back in ORCA +select * from tcorr1 where b = (select tcorr1.b from tcorr2); + a | b +---+---- + 1 | 99 +(1 row) + +-- expect 0 rows, subquery returns no rows, falls back in ORCA +select * from tcorr1 where b = (select tcorr1.b from tcorr2 where b=33); + a | b +---+--- +(0 rows) + +-- expect 1 row, subquery returns nothing, so a < 22 is true, falls back in ORCA +select * from tcorr1 where a < coalesce((select tcorr1.a from tcorr2 where a = 11), 22); + a | b +---+---- + 1 | 99 +(1 row) + +reset optimizer_trace_fallback; diff --git a/src/test/regress/expected/gporca_optimizer.out b/src/test/regress/expected/gporca_optimizer.out index 179b4560bb2d60054c89132ee3b8547112b2bea0..cc94502ada25b62aaba77c76a0aaffe0fa36281d 100644 --- a/src/test/regress/expected/gporca_optimizer.out +++ b/src/test/regress/expected/gporca_optimizer.out @@ -12864,3 +12864,47 @@ where out.b in (select coalesce(tcorr2_d.c, 99) -- where out.b in (select coalesce(tcorr2.a, 99) -- from tcorr1 full outer join tcorr2 on tcorr1.a=tcorr2.a+out.a); reset optimizer_join_order; +-- test selecting an outer ref from a scalar subquery, this will fall back to planner +-- expect 0 rows +SELECT 1 +FROM tcorr1 +WHERE tcorr1.a IS NULL OR + tcorr1.a = (SELECT tcorr1.a + FROM (SELECT rtrim(tcorr1.a::text) AS userid, + rtrim(tcorr1.b::text) AS part_pls + FROM tcorr2) al + WHERE 3 = tcorr1.a + ); +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: No plan has been computed for required properties + ?column? +---------- +(0 rows) + +-- expect 1 row, subquery returns a row, falls back in ORCA +select * from tcorr1 where b = (select tcorr1.b from tcorr2); +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: No plan has been computed for required properties + a | b +---+---- + 1 | 99 +(1 row) + +-- expect 0 rows, subquery returns no rows, falls back in ORCA +select * from tcorr1 where b = (select tcorr1.b from tcorr2 where b=33); +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: No plan has been computed for required properties + a | b +---+--- +(0 rows) + +-- expect 1 row, subquery returns nothing, so a < 22 is true, falls back in ORCA +select * from tcorr1 where a < coalesce((select tcorr1.a from tcorr2 where a = 11), 22); +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: No plan has been computed for required properties + a | b +---+---- + 1 | 99 +(1 row) + +reset optimizer_trace_fallback; diff --git a/src/test/regress/sql/gporca.sql b/src/test/regress/sql/gporca.sql index ab0ca92e6fad5fbb57c38d5c01fd56871c8d459b..1ef78fcecccb9fa5576d015208ac70fa5a53d093 100644 --- a/src/test/regress/sql/gporca.sql +++ b/src/test/regress/sql/gporca.sql @@ -2654,6 +2654,29 @@ where out.b in (select coalesce(tcorr2_d.c, 99) reset optimizer_join_order; +-- test selecting an outer ref from a scalar subquery, this will fall back to planner +-- expect 0 rows +SELECT 1 +FROM tcorr1 +WHERE tcorr1.a IS NULL OR + tcorr1.a = (SELECT tcorr1.a + FROM (SELECT rtrim(tcorr1.a::text) AS userid, + rtrim(tcorr1.b::text) AS part_pls + FROM tcorr2) al + WHERE 3 = tcorr1.a + ); + +-- expect 1 row, subquery returns a row, falls back in ORCA +select * from tcorr1 where b = (select tcorr1.b from tcorr2); + +-- expect 0 rows, subquery returns no rows, falls back in ORCA +select * from tcorr1 where b = (select tcorr1.b from tcorr2 where b=33); + +-- expect 1 row, subquery returns nothing, so a < 22 is true, falls back in ORCA +select * from tcorr1 where a < coalesce((select tcorr1.a from tcorr2 where a = 11), 22); + +reset optimizer_trace_fallback; + -- start_ignore DROP SCHEMA orca CASCADE; -- end_ignore