未验证 提交 0085ad2a 编写于 作者: Z Zhenghua Lyu 提交者: GitHub

Fix cdbpath_dedup_fixup does not consider merge append path.

Greenplum use unique row id path as a candidate to implement semijoin.
It is introduced long before. But GPDB6 has upgraded the kernel
version to Postgres 9.4 and introduced many new path types and
new plan nodes, thus cdbpath_dedup_fixup failed to consider them.
Some typical issues are: https://github.com/greenplum-db/gpdb/issues/9427

On Master branch, Heikki's commit 9628a332 refactored this part of code
so that it is OK on master. And for 4X and 5X, we do not have many new
kinds of plannode and pathnode, it is also OK.

It is very hard to backport commit 9628a332 to 6X, there is no concept of
a Path's target list in 9.4. And to totally remove this kind of path
is too overkilling. So the policy is to fix them one bye one if reported.
上级 619b6f57
......@@ -202,6 +202,9 @@ pathnode_walk_kids(Path *path,
case T_Append:
v = pathnode_walk_list(((AppendPath *)path)->subpaths, walker, context);
break;
case T_MergeAppend:
v = pathnode_walk_list(((MergeAppendPath *)path)->subpaths, walker, context);
break;
case T_Material:
v = pathnode_walk_node(((MaterialPath *)path)->subpath, walker, context);
break;
......
......@@ -4260,30 +4260,46 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
* Output: largest child partition. If there are no child partition because all of them have been eliminated, then
* returns NULL.
*/
static RelOptInfo* largest_child_relation(PlannerInfo *root, RelOptInfo *rel)
static RelOptInfo*
largest_child_relation(PlannerInfo *root, Path *path, bool recursing)
{
AppendPath *append_path = NULL;
ListCell *subpath_lc = NULL;
List *subpaths;
ListCell *subpath_lc;
RelOptInfo *largest_child_in_subpath = NULL;
double max_rows = -1.0;
double max_rows = -1.0;
Assert(IsA(rel->cheapest_total_path, AppendPath));
/* Guard against stack overflow due to overly complex inheritance trees */
check_stack_depth();
append_path = (AppendPath *) rel->cheapest_total_path;
while (IsA(path, ProjectionPath))
path = ((ProjectionPath *) path)->subpath;
foreach(subpath_lc, append_path->subpaths)
/*
* Add the children of an Append or MergeAppend path to the list
* of paths to process.
*/
if (IsA(path, AppendPath))
{
RelOptInfo *candidate_child = NULL;
Path *subpath = lfirst(subpath_lc);
if (IsA(subpath, AppendPath))
{
candidate_child = largest_child_relation(root, subpath->parent);
}
subpaths = ((AppendPath *) path)->subpaths;
}
else if (IsA(path, MergeAppendPath))
{
subpaths = ((MergeAppendPath *) path)->subpaths;
}
else
{
if (recursing)
return path->parent;
else
{
candidate_child = subpath->parent;
}
return NULL;
}
foreach(subpath_lc, subpaths)
{
Path *subpath = lfirst(subpath_lc);
RelOptInfo *candidate_child;
candidate_child = largest_child_relation(root, subpath, true);
if (candidate_child && candidate_child->rows > max_rows)
{
......@@ -4667,7 +4683,7 @@ examine_simple_variable(PlannerInfo *root, Var *var,
if (gp_statistics_pullup_from_child_partition &&
rel->cheapest_total_path != NULL)
{
RelOptInfo *childrel = largest_child_relation(root, rel);
RelOptInfo *childrel = largest_child_relation(root, rel->cheapest_total_path, false);
vardata->statsTuple = NULL;
if (childrel)
......
......@@ -3422,6 +3422,73 @@ EXPLAIN SELECT a, b FROM gp_float1 JOIN gp_float2 ON a = c AND b = float8 '3.0'
Optimizer: Postgres query optimizer
(10 rows)
-- The following case is to test Greenplum specific plan
-- unique row id plan works correctly with merge append path.
-- See Github issue: https://github.com/greenplum-db/gpdb/issues/9427
set optimizer = off;
create table t_9427(a int, b int, c int)
partition by range (a)
(
PARTITION p1 START (1) END (10) exclusive,
PARTITION p2 START (21) END (30) exclusive,
DEFAULT PARTITION default_part
)
;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
NOTICE: CREATE TABLE will create partition "t_9427_1_prt_default_part" for table "t_9427"
NOTICE: CREATE TABLE will create partition "t_9427_1_prt_p1" for table "t_9427"
NOTICE: CREATE TABLE will create partition "t_9427_1_prt_p2" for table "t_9427"
create index idx_c_9427 on t_9427(c);
create index idx_a_9427 on t_9427(a);
insert into t_9427 select i%30, i%30, i from generate_series(1, 100000)i;
set enable_hashjoin = off;
set enable_mergejoin = on;
set enable_nestloop = off;
set enable_seqscan = off;
set enable_bitmapscan = off;
analyze t_9427;
explain (costs off) select * from t_9427 where a in (select a from t_9427 where c < 100 ) and a < 200;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Merge Join
Merge Cond: (t_9427_1_prt_default_part.a = t_9427_1_prt_default_part_1.a)
-> Merge Append
Sort Key: t_9427_1_prt_default_part.a
-> Result
One-Time Filter: PartSelected
-> Index Scan using t_9427_1_prt_default_part_a_idx on t_9427_1_prt_default_part
Index Cond: (a < 200)
-> Result
One-Time Filter: PartSelected
-> Index Scan using t_9427_1_prt_p1_a_idx on t_9427_1_prt_p1
Index Cond: (a < 200)
-> Result
One-Time Filter: PartSelected
-> Index Scan using t_9427_1_prt_p2_a_idx on t_9427_1_prt_p2
Index Cond: (a < 200)
-> Sort
Sort Key: t_9427_1_prt_default_part_1.a
-> Partition Selector for t_9427 (dynamic scan id: 1)
Filter: t_9427_1_prt_default_part_1.a
-> HashAggregate
Group Key: t_9427_1_prt_default_part_1.a
-> Append
-> Index Scan using t_9427_1_prt_default_part_c_idx on t_9427_1_prt_default_part t_9427_1_prt_default_part_1
Index Cond: (c < 100)
Filter: (a < 200)
-> Index Scan using t_9427_1_prt_p1_c_idx on t_9427_1_prt_p1 t_9427_1_prt_p1_1
Index Cond: (c < 100)
Filter: (a < 200)
-> Index Scan using t_9427_1_prt_p2_c_idx on t_9427_1_prt_p2 t_9427_1_prt_p2_1
Index Cond: (c < 100)
Filter: (a < 200)
Optimizer: Postgres query optimizer
(34 rows)
drop table t_9427;
reset optimizer;
-- Clean up. None of the objects we create are very interesting to keep around.
reset search_path;
set client_min_messages='warning';
......
......@@ -3419,6 +3419,73 @@ EXPLAIN SELECT a, b FROM gp_float1 JOIN gp_float2 ON a = c AND b = float8 '3.0'
Optimizer: Postgres query optimizer
(10 rows)
-- The following case is to test Greenplum specific plan
-- unique row id plan works correctly with merge append path.
-- See Github issue: https://github.com/greenplum-db/gpdb/issues/9427
set optimizer = off;
create table t_9427(a int, b int, c int)
partition by range (a)
(
PARTITION p1 START (1) END (10) exclusive,
PARTITION p2 START (21) END (30) exclusive,
DEFAULT PARTITION default_part
)
;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
NOTICE: CREATE TABLE will create partition "t_9427_1_prt_default_part" for table "t_9427"
NOTICE: CREATE TABLE will create partition "t_9427_1_prt_p1" for table "t_9427"
NOTICE: CREATE TABLE will create partition "t_9427_1_prt_p2" for table "t_9427"
create index idx_c_9427 on t_9427(c);
create index idx_a_9427 on t_9427(a);
insert into t_9427 select i%30, i%30, i from generate_series(1, 100000)i;
set enable_hashjoin = off;
set enable_mergejoin = on;
set enable_nestloop = off;
set enable_seqscan = off;
set enable_bitmapscan = off;
analyze t_9427;
explain (costs off) select * from t_9427 where a in (select a from t_9427 where c < 100 ) and a < 200;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Merge Join
Merge Cond: (t_9427_1_prt_default_part.a = t_9427_1_prt_default_part_1.a)
-> Merge Append
Sort Key: t_9427_1_prt_default_part.a
-> Result
One-Time Filter: PartSelected
-> Index Scan using t_9427_1_prt_default_part_a_idx on t_9427_1_prt_default_part
Index Cond: (a < 200)
-> Result
One-Time Filter: PartSelected
-> Index Scan using t_9427_1_prt_p1_a_idx on t_9427_1_prt_p1
Index Cond: (a < 200)
-> Result
One-Time Filter: PartSelected
-> Index Scan using t_9427_1_prt_p2_a_idx on t_9427_1_prt_p2
Index Cond: (a < 200)
-> Sort
Sort Key: t_9427_1_prt_default_part_1.a
-> Partition Selector for t_9427 (dynamic scan id: 1)
Filter: t_9427_1_prt_default_part_1.a
-> HashAggregate
Group Key: t_9427_1_prt_default_part_1.a
-> Append
-> Index Scan using t_9427_1_prt_default_part_c_idx on t_9427_1_prt_default_part t_9427_1_prt_default_part_1
Index Cond: (c < 100)
Filter: (a < 200)
-> Index Scan using t_9427_1_prt_p1_c_idx on t_9427_1_prt_p1 t_9427_1_prt_p1_1
Index Cond: (c < 100)
Filter: (a < 200)
-> Index Scan using t_9427_1_prt_p2_c_idx on t_9427_1_prt_p2 t_9427_1_prt_p2_1
Index Cond: (c < 100)
Filter: (a < 200)
Optimizer: Postgres query optimizer
(34 rows)
drop table t_9427;
reset optimizer;
-- Clean up. None of the objects we create are very interesting to keep around.
reset search_path;
set client_min_messages='warning';
......
......@@ -1207,7 +1207,7 @@ select p from
(box(point(0.8,0.8), point(1.0,1.0)))) as v(bb)
cross join lateral
(select p from gist_tbl_github9733 where p <@ bb order by p <-> bb[0] limit 2) ss;
ERROR: could not devise a query plan for the given query (pathnode.c:417)
ERROR: could not devise a query plan for the given query (pathnode.c:420)
reset enable_seqscan;
explain (costs off)
select p from
......
......@@ -1223,7 +1223,7 @@ select p from
(box(point(0.8,0.8), point(1.0,1.0)))) as v(bb)
cross join lateral
(select p from gist_tbl_github9733 where p <@ bb order by p <-> bb[0] limit 2) ss;
ERROR: could not devise a query plan for the given query (pathnode.c:417)
ERROR: could not devise a query plan for the given query (pathnode.c:420)
reset enable_seqscan;
explain (costs off)
select p from
......
......@@ -370,6 +370,37 @@ EXPLAIN SELECT a, b FROM gp_float1 JOIN gp_float2 ON a = c AND b = float8 '3.0';
-- redistribute based on the compatible constant.
EXPLAIN SELECT a, b FROM gp_float1 JOIN gp_float2 ON a = c AND b = float8 '3.0' AND b = float4 '3.0';
-- The following case is to test Greenplum specific plan
-- unique row id plan works correctly with merge append path.
-- See Github issue: https://github.com/greenplum-db/gpdb/issues/9427
set optimizer = off;
create table t_9427(a int, b int, c int)
partition by range (a)
(
PARTITION p1 START (1) END (10) exclusive,
PARTITION p2 START (21) END (30) exclusive,
DEFAULT PARTITION default_part
)
;
create index idx_c_9427 on t_9427(c);
create index idx_a_9427 on t_9427(a);
insert into t_9427 select i%30, i%30, i from generate_series(1, 100000)i;
set enable_hashjoin = off;
set enable_mergejoin = on;
set enable_nestloop = off;
set enable_seqscan = off;
set enable_bitmapscan = off;
analyze t_9427;
explain (costs off) select * from t_9427 where a in (select a from t_9427 where c < 100 ) and a < 200;
drop table t_9427;
reset optimizer;
-- Clean up. None of the objects we create are very interesting to keep around.
reset search_path;
set client_min_messages='warning';
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册