未验证 提交 df5f06d6 编写于 作者: H Hans Zeller 提交者: GitHub

Fall back for selects of outer refs in scalar subquery in ORCA (#10836)

This is a backport of the changes in #10799 to 6X_STABLE.

When we have an outer ref in a subquery, like this

select * from foo where foo.a is null or foo.a = (select foo.b from bar)

then we can't simply use the outer reference for the condition when
we unnest the subquery into an apply. This is because if the subquery
returns no rows, then we must be using a NULL instead of the outer
reference.

We have code to handle this for quantified subqueries, but not for
scalar subqueries.

When we translate the generated DXL to a plan, we assert when we find
an outer reference in the project list of a subquery. In rare cases,
we might also crash, when the subquery contained a project with multiple
values below the outer reference (see added test in gporca.sql).

The "fix" (more a workaround) is to force a fallback when we detect
this situation during unnesting of a scalar subquery.
上级 94770ad9
......@@ -61,6 +61,9 @@ namespace gpopt
// subquery has outer references
BOOL m_fHasOuterRefs;
// the returned column is an outer reference
BOOL m_fReturnedPcrIsOuterRef;
// subquery has skip level correlations -- when inner expression refers to columns defined above the immediate outer expression
BOOL m_fHasSkipLevelCorrelations;
......@@ -85,6 +88,7 @@ namespace gpopt
m_returns_set(false),
m_fHasVolatileFunctions(false),
m_fHasOuterRefs(false),
m_fReturnedPcrIsOuterRef(false),
m_fHasSkipLevelCorrelations(false),
m_fHasCountAgg(false),
m_pcrCountAgg(NULL),
......@@ -251,7 +255,14 @@ namespace gpopt
// create subquery descriptor
static
SSubqueryDesc *Psd(CMemoryPool *mp, CExpression *pexprSubquery, CExpression *pexprOuter, ESubqueryCtxt esqctxt);
SSubqueryDesc *Psd
(
CMemoryPool *mp,
CExpression *pexprSubquery,
CExpression *pexprOuter,
const CColRef *pcrSubquery,
ESubqueryCtxt esqctxt
);
// detect subqueries with expressions over count aggregate similar to
// (SELECT 'abc' || (SELECT count(*) from X))
......
......@@ -348,6 +348,7 @@ CSubqueryHandler::Psd
CMemoryPool *mp,
CExpression *pexprSubquery,
CExpression *pexprOuter,
const CColRef *pcrSubquery,
ESubqueryCtxt esqctxt
)
{
......@@ -356,15 +357,21 @@ CSubqueryHandler::Psd
GPOS_ASSERT(NULL != pexprOuter);
CExpression *pexprInner = (*pexprSubquery)[0];
CColRefSet *subqueryOutputCols = (*pexprSubquery)[0]->DeriveOutputColumns();
CColRefSet *outer_refs = (*pexprSubquery)[0]->DeriveOuterReferences();
CColRefSet *pcrsOuterOutput = pexprOuter->DeriveOutputColumns();
SSubqueryDesc *psd = GPOS_NEW(mp) SSubqueryDesc();
psd->m_returns_set = (1 < pexprInner->DeriveMaxCard().Ull());
psd->m_fHasOuterRefs = pexprInner->HasOuterRefs();
psd->m_fReturnedPcrIsOuterRef = (!subqueryOutputCols->FMember(pcrSubquery));
psd->m_fHasOuterRefs = pexprInner->HasOuterRefs() || psd->m_fReturnedPcrIsOuterRef;
psd->m_fHasVolatileFunctions = (IMDFunction::EfsVolatile == pexprSubquery->DeriveScalarFunctionProperties()->Efs());
psd->m_fHasSkipLevelCorrelations = 0 < outer_refs->Size() && !pcrsOuterOutput->ContainsAll(outer_refs);
// We have skip-level outer refs if there are outer refs at all, and at least one of the following is true:
// - the outer refs below the subquery node don't all come from the outer table (the level right above us)
// - the ColRef returned by the subquery is an outer ref that does not come from the outer table
psd->m_fHasSkipLevelCorrelations = psd->m_fHasOuterRefs &&
(!pcrsOuterOutput->ContainsAll(outer_refs) ||
(psd->m_fReturnedPcrIsOuterRef && !pcrsOuterOutput->FMember(pcrSubquery)));
psd->m_fHasCountAgg = CUtils::FHasCountAgg((*pexprSubquery)[0], &psd->m_pcrCountAgg);
if (psd->m_fHasCountAgg &&
......@@ -415,7 +422,20 @@ CSubqueryHandler::FRemoveScalarSubquery
CScalarSubquery *popScalarSubquery = CScalarSubquery::PopConvert(pexprSubquery->Pop());
const CColRef *pcrSubquery = popScalarSubquery->Pcr();
SSubqueryDesc *psd = Psd(pmp, pexprSubquery, pexprOuter, esqctxt);
SSubqueryDesc *psd = Psd(pmp, pexprSubquery, pexprOuter, pcrSubquery, esqctxt);
if (psd->m_fReturnedPcrIsOuterRef)
{
// The subquery returns an outer reference. We can't simply replace the subquery with that
// expression, because we would miss the case where the subquery is an empty table and we
// would have to substitute the outer ref with a NULL.
// We could use a dummy expression from the subquery to perform a check, but for now we'll
// just give up.
// Example: select * from foo where foo.a = (select foo.b from bar);
GPOS_DELETE(psd);
return false;
}
BOOL fSuccess = false;
if (psd->m_fProjectCount && !psd->m_fCorrelatedExecution)
{
......@@ -441,7 +461,7 @@ CSubqueryHandler::FRemoveScalarSubquery
GPOS_DELETE(psd);
CExpression *pexprNewOuter = NULL;
CExpression *pexprResidualScalar = NULL;
psd = Psd(m_mp, pexprNewSubq, pexprOuter, esqctxt);
psd = Psd(m_mp, pexprNewSubq, pexprOuter, popInnerSubq->Pcr(), esqctxt);
fSuccess = FRemoveScalarSubqueryInternal(m_mp, pexprOuter, pexprNewSubq, EsqctxtValue, psd, m_fEnforceCorrelatedApply, &pexprNewOuter, &pexprResidualScalar);
if (fSuccess)
......
......@@ -12657,3 +12657,39 @@ ERROR: correlated subquery with skip-level correlations is not supported
-- where out.b in (select coalesce(tcorr2.a, 99)
-- from tcorr1 full outer join tcorr2 on tcorr1.a=tcorr2.a+out.a);
reset optimizer_join_order;
-- test selecting an outer ref from a scalar subquery, this will fall back to planner
-- expect 0 rows
SELECT 1
FROM tcorr1
WHERE tcorr1.a IS NULL OR
tcorr1.a = (SELECT tcorr1.a
FROM (SELECT rtrim(tcorr1.a::text) AS userid,
rtrim(tcorr1.b::text) AS part_pls
FROM tcorr2) al
WHERE 3 = tcorr1.a
);
?column?
----------
(0 rows)
-- expect 1 row, subquery returns a row, falls back in ORCA
select * from tcorr1 where b = (select tcorr1.b from tcorr2);
a | b
---+----
1 | 99
(1 row)
-- expect 0 rows, subquery returns no rows, falls back in ORCA
select * from tcorr1 where b = (select tcorr1.b from tcorr2 where b=33);
a | b
---+---
(0 rows)
-- expect 1 row, subquery returns nothing, so a < 22 is true, falls back in ORCA
select * from tcorr1 where a < coalesce((select tcorr1.a from tcorr2 where a = 11), 22);
a | b
---+----
1 | 99
(1 row)
reset optimizer_trace_fallback;
......@@ -12864,3 +12864,47 @@ where out.b in (select coalesce(tcorr2_d.c, 99)
-- where out.b in (select coalesce(tcorr2.a, 99)
-- from tcorr1 full outer join tcorr2 on tcorr1.a=tcorr2.a+out.a);
reset optimizer_join_order;
-- test selecting an outer ref from a scalar subquery, this will fall back to planner
-- expect 0 rows
SELECT 1
FROM tcorr1
WHERE tcorr1.a IS NULL OR
tcorr1.a = (SELECT tcorr1.a
FROM (SELECT rtrim(tcorr1.a::text) AS userid,
rtrim(tcorr1.b::text) AS part_pls
FROM tcorr2) al
WHERE 3 = tcorr1.a
);
INFO: GPORCA failed to produce a plan, falling back to planner
DETAIL: No plan has been computed for required properties
?column?
----------
(0 rows)
-- expect 1 row, subquery returns a row, falls back in ORCA
select * from tcorr1 where b = (select tcorr1.b from tcorr2);
INFO: GPORCA failed to produce a plan, falling back to planner
DETAIL: No plan has been computed for required properties
a | b
---+----
1 | 99
(1 row)
-- expect 0 rows, subquery returns no rows, falls back in ORCA
select * from tcorr1 where b = (select tcorr1.b from tcorr2 where b=33);
INFO: GPORCA failed to produce a plan, falling back to planner
DETAIL: No plan has been computed for required properties
a | b
---+---
(0 rows)
-- expect 1 row, subquery returns nothing, so a < 22 is true, falls back in ORCA
select * from tcorr1 where a < coalesce((select tcorr1.a from tcorr2 where a = 11), 22);
INFO: GPORCA failed to produce a plan, falling back to planner
DETAIL: No plan has been computed for required properties
a | b
---+----
1 | 99
(1 row)
reset optimizer_trace_fallback;
......@@ -2654,6 +2654,29 @@ where out.b in (select coalesce(tcorr2_d.c, 99)
reset optimizer_join_order;
-- test selecting an outer ref from a scalar subquery, this will fall back to planner
-- expect 0 rows
SELECT 1
FROM tcorr1
WHERE tcorr1.a IS NULL OR
tcorr1.a = (SELECT tcorr1.a
FROM (SELECT rtrim(tcorr1.a::text) AS userid,
rtrim(tcorr1.b::text) AS part_pls
FROM tcorr2) al
WHERE 3 = tcorr1.a
);
-- expect 1 row, subquery returns a row, falls back in ORCA
select * from tcorr1 where b = (select tcorr1.b from tcorr2);
-- expect 0 rows, subquery returns no rows, falls back in ORCA
select * from tcorr1 where b = (select tcorr1.b from tcorr2 where b=33);
-- expect 1 row, subquery returns nothing, so a < 22 is true, falls back in ORCA
select * from tcorr1 where a < coalesce((select tcorr1.a from tcorr2 where a = 11), 22);
reset optimizer_trace_fallback;
-- start_ignore
DROP SCHEMA orca CASCADE;
-- end_ignore
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册