提交 a65c97da 编写于 作者: C Chris Hajas

Ensure Material nodes generated by Orca always materialize

In cases where Orca generates a NLJ with a parameter on the inner side,
the executor will not pass the EXEC_FLAG_REWIND flag down, as it assumed
the inner side will always need to be rescanned. The material node will therefore
not have its rewind flag set and can act as a no-op.

This is not always correct. While the executor will set EXEC_FLAG_REWIND
if the Materialize is directly above a motion, it does not recognize
the case where the Materialize is on the inner side with other nodes
between it and the motion, even though the Materialize serves to
prevent a rescan of the underlying Motion node.

This causes the execution to fail with:
`Illegal rescan of motion node: invalid plan (nodeMotion.c:1623)` as it
would attempt to rescan a motion.

Since Orca only produces Materialize when necessary, either for
performance reasons or to prevent rescan of an underlying Motion,
EXEC_FLAG_REWIND should be set for any Materialize generated by Orca.

Below is a valid plan generated by Orca:

```
 Result  (cost=0.00..3448.01 rows=1 width=4)
   ->  Nested Loop  (cost=0.00..3448.01 rows=1 width=1)
         Join Filter: true
         ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..431.00 rows=2 width=4)
               ->  Seq Scan on foo1  (cost=0.00..431.00 rows=1 width=4)
         ->  Result  (cost=0.00..431.00 rows=1 width=1)
               Filter: (foo1.a = foo2.a)
               ->  Materialize  (cost=0.00..431.00 rows=1 width=4)
                     ->  Hash Semi Join  (cost=0.00..431.00 rows=1 width=4)
                           Hash Cond: (foo2.b = foo3.b)
                           ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..0.00 rows=1 width=8)
                                 ->  Bitmap Heap Scan on foo2  (cost=0.00..0.00 rows=1 width=8)
                                       Recheck Cond: (c = 3)
                                       ->  Bitmap Index Scan on f2c  (cost=0.00..0.00 rows=0 width=0)
                                             Index Cond: (c = 3)
                           ->  Hash  (cost=431.00..431.00 rows=1 width=4)
                                 ->  Gather Motion 3:1  (slice3; segments: 3)  (cost=0.00..431.00 rows=2 width=4)
                                       ->  Seq Scan on foo3  (cost=0.00..431.00 rows=1 width=4)
 Optimizer: Pivotal Optimizer (GPORCA)
 ```
Co-authored-by: NChris Hajas <chajas@pivotal.io>
Co-authored-by: NShreedhar Hardikar <shardikar@pivotal.io>
上级 c9199036
......@@ -3103,6 +3103,8 @@ CTranslatorDXLToPlStmt::TranslateDXLMaterialize
CDXLPhysicalMaterialize *materialize_dxlop = CDXLPhysicalMaterialize::Cast(materialize_dxlnode->GetOperator());
materialize->cdb_strict = materialize_dxlop->IsEager();
// ensure that executor actually materializes results
materialize->cdb_shield_child_from_rescans = true;
// translate operator costs
TranslatePlanCosts
......
......@@ -11988,3 +11988,63 @@ SELECT a, b FROM atab_old_hash FULL JOIN btab_old_hash ON a |=| b;
(6 rows)
reset optimizer_expand_fulljoin;
-- Test rescanned materialize that is not directly above a motion
DROP TABLE IF EXISTS foo1 CASCADE;
NOTICE: table "foo1" does not exist, skipping
DROP TABLE IF EXISTS foo2 CASCADE;
NOTICE: table "foo2" does not exist, skipping
DROP TABLE IF EXISTS foo3 CASCADE;
NOTICE: table "foo3" does not exist, skipping
CREATE table foo1(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE table foo2(a int, b int, c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE table foo3(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE index f2c on foo2 using bitmap(c);
INSERT INTO foo1 values (1), (2);
INSERT INTO foo2 values (1,1,1), (2,2,2);
INSERT INTO foo3 values (1,1), (2,2);
set optimizer_join_order=query;
select disable_xform('CXformInnerJoin2HashJoin');
disable_xform
--------------------------------------
CXformInnerJoin2HashJoin is disabled
(1 row)
EXPLAIN SELECT 1 FROM foo1, foo2 WHERE foo1.a = foo2.a AND foo2.c = 3 AND foo2.b IN (SELECT b FROM foo3);
QUERY PLAN
----------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice3; segments: 3) (cost=4.12..6.29 rows=4 width=0)
-> Hash Semi Join (cost=4.12..6.29 rows=2 width=0)
Hash Cond: (foo2.b = foo3.b)
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=2.04..4.16 rows=2 width=4)
Hash Key: foo2.b
-> Hash Join (cost=2.04..4.10 rows=2 width=4)
Hash Cond: (foo1.a = foo2.a)
-> Seq Scan on foo1 (cost=0.00..2.02 rows=1 width=4)
-> Hash (cost=2.02..2.02 rows=1 width=8)
-> Seq Scan on foo2 (cost=0.00..2.02 rows=1 width=8)
Filter: (c = 3)
-> Hash (cost=2.06..2.06 rows=1 width=4)
-> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..2.06 rows=1 width=4)
Hash Key: foo3.b
-> Seq Scan on foo3 (cost=0.00..2.02 rows=1 width=4)
Optimizer: Postgres query optimizer
(16 rows)
SELECT 1 FROM foo1, foo2 WHERE foo1.a = foo2.a AND foo2.c = 3 AND foo2.b IN (SELECT b FROM foo3);
?column?
----------
(0 rows)
reset optimizer_join_order;
select enable_xform('CXformInnerJoin2HashJoin');
enable_xform
-------------------------------------
CXformInnerJoin2HashJoin is enabled
(1 row)
......@@ -12184,3 +12184,65 @@ SELECT a, b FROM atab_old_hash FULL JOIN btab_old_hash ON a |=| b;
(6 rows)
reset optimizer_expand_fulljoin;
-- Test rescanned materialize that is not directly above a motion
DROP TABLE IF EXISTS foo1 CASCADE;
NOTICE: table "foo1" does not exist, skipping
DROP TABLE IF EXISTS foo2 CASCADE;
NOTICE: table "foo2" does not exist, skipping
DROP TABLE IF EXISTS foo3 CASCADE;
NOTICE: table "foo3" does not exist, skipping
CREATE table foo1(a int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE table foo2(a int, b int, c int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE table foo3(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE index f2c on foo2 using bitmap(c);
INSERT INTO foo1 values (1), (2);
INSERT INTO foo2 values (1,1,1), (2,2,2);
INSERT INTO foo3 values (1,1), (2,2);
set optimizer_join_order=query;
select disable_xform('CXformInnerJoin2HashJoin');
disable_xform
--------------------------------------
CXformInnerJoin2HashJoin is disabled
(1 row)
EXPLAIN SELECT 1 FROM foo1, foo2 WHERE foo1.a = foo2.a AND foo2.c = 3 AND foo2.b IN (SELECT b FROM foo3);
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.00..3448.01 rows=1 width=1)
Join Filter: true
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=2 width=4)
-> Seq Scan on foo1 (cost=0.00..431.00 rows=1 width=4)
-> Result (cost=0.00..431.00 rows=1 width=1)
Filter: (foo1.a = foo2.a)
-> Materialize (cost=0.00..431.00 rows=1 width=4)
-> Hash Semi Join (cost=0.00..431.00 rows=1 width=4)
Hash Cond: (foo2.b = foo3.b)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..0.00 rows=1 width=8)
-> Bitmap Heap Scan on foo2 (cost=0.00..0.00 rows=1 width=8)
Recheck Cond: (c = 3)
-> Bitmap Index Scan on f2c (cost=0.00..0.00 rows=0 width=0)
Index Cond: (c = 3)
-> Hash (cost=431.00..431.00 rows=2 width=4)
-> Gather Motion 3:1 (slice3; segments: 3) (cost=0.00..431.00 rows=2 width=4)
-> Seq Scan on foo3 (cost=0.00..431.00 rows=1 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(18 rows)
SELECT 1 FROM foo1, foo2 WHERE foo1.a = foo2.a AND foo2.c = 3 AND foo2.b IN (SELECT b FROM foo3);
?column?
----------
(0 rows)
reset optimizer_join_order;
select enable_xform('CXformInnerJoin2HashJoin');
enable_xform
-------------------------------------
CXformInnerJoin2HashJoin is enabled
(1 row)
......@@ -2405,6 +2405,27 @@ EXPLAIN SELECT a, b FROM atab_old_hash FULL JOIN btab_old_hash ON a |=| b;
SELECT a, b FROM atab_old_hash FULL JOIN btab_old_hash ON a |=| b;
reset optimizer_expand_fulljoin;
-- Test rescanned materialize that is not directly above a motion
DROP TABLE IF EXISTS foo1 CASCADE;
DROP TABLE IF EXISTS foo2 CASCADE;
DROP TABLE IF EXISTS foo3 CASCADE;
CREATE table foo1(a int);
CREATE table foo2(a int, b int, c int);
CREATE table foo3(a int, b int);
CREATE index f2c on foo2 using bitmap(c);
INSERT INTO foo1 values (1), (2);
INSERT INTO foo2 values (1,1,1), (2,2,2);
INSERT INTO foo3 values (1,1), (2,2);
set optimizer_join_order=query;
select disable_xform('CXformInnerJoin2HashJoin');
EXPLAIN SELECT 1 FROM foo1, foo2 WHERE foo1.a = foo2.a AND foo2.c = 3 AND foo2.b IN (SELECT b FROM foo3);
SELECT 1 FROM foo1, foo2 WHERE foo1.a = foo2.a AND foo2.c = 3 AND foo2.b IN (SELECT b FROM foo3);
reset optimizer_join_order;
select enable_xform('CXformInnerJoin2HashJoin');
-- start_ignore
DROP SCHEMA orca CASCADE;
-- end_ignore
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册