未验证 提交 9cc1da61 编写于 作者: R Richard Guo 提交者: GitHub

Fix parameterized paths

This patch fixes two issues related to parameterized path logic on
master.

1. When generating unique row ID on the outer/inner side for join
JOIN_DEDUP_SEMI/JOIN_DEDUP_SEMI_REVERSE, we need to pass the param info
of outerpath/innerpath to the projection path. Otherwise we would have
problems when deciding whether a joinclause is movable to this join rel.

2. We should not pick up the parameterized path when its required outer
is beyond a Motion, since we cannot pass a param through Motion.

Fixes issue #10012
Reviewed-by: NHeikki Linnakangas <hlinnakangas@pivotal.io>
Reviewed-by: NJinbao Chen <jinchen@pivotal.io>
上级 f860ff0c
......@@ -191,6 +191,12 @@ cdbpath_create_motion_path(PlannerInfo *root,
/* singleQE-->entry? Don't move. Slice's QE will run on entry db. */
if (CdbPathLocus_IsSingleQE(subpath->locus))
{
/*
* If the subpath requires parameters, we cannot generate Motion atop of it.
*/
if (!bms_is_empty(PATH_REQ_OUTER(subpath)))
return NULL;
/*
* Create CdbMotionPath node to indicate that the slice must be
* dispatched to a singleton gang running on the entry db. We
......@@ -233,6 +239,12 @@ cdbpath_create_motion_path(PlannerInfo *root,
if (CdbPathLocus_IsSegmentGeneral(subpath->locus) ||
CdbPathLocus_IsReplicated(subpath->locus))
{
/*
* If the subpath requires parameters, we cannot generate Motion atop of it.
*/
if (!bms_is_empty(PATH_REQ_OUTER(subpath)))
return NULL;
/*
* Data is only available on segments, to distingush it with
* CdbLocusType_General, adding a motion to indicated this
......@@ -483,6 +495,12 @@ cdbpath_create_motion_path(PlannerInfo *root,
return (Path *) newSubqueryScanPath;
}
/*
* If the subpath requires parameters, we cannot generate Motion atop of it.
*/
if (!bms_is_empty(PATH_REQ_OUTER(subpath)))
return NULL;
/* Create CdbMotionPath node. */
pathnode = makeNode(CdbMotionPath);
pathnode->path.pathtype = T_Motion;
......@@ -1166,7 +1184,9 @@ add_rowid_to_path(PlannerInfo *root, Path *path, int *rowidexpr_id)
newpathtarget = copy_pathtarget(path->pathtarget);
add_column_to_pathtarget(newpathtarget, (Expr *) rowidexpr, 0);
return (Path *) create_projection_path(root, path->parent, path, newpathtarget);
return (Path *) create_projection_path_with_quals(root, path->parent,
path, newpathtarget,
NIL, true);
}
/*
......
......@@ -486,7 +486,8 @@ bring_to_outer_query(PlannerInfo *root, RelOptInfo *rel, List *outer_quals)
rel,
path,
path->parent->reltarget,
outer_quals);
outer_quals,
false);
add_path(rel, path);
}
set_cheapest(rel);
......
......@@ -1910,7 +1910,8 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel,
subpath->parent,
subpath,
subpath->pathtarget,
list_make1(restrict_info));
list_make1(restrict_info),
false);
/*
* We use the skill of Result plannode with one time filter
......@@ -2440,6 +2441,8 @@ create_unique_rowid_path(PlannerInfo *root,
list_make1_int(0),
numsegments);
subpath = cdbpath_create_motion_path(root, subpath, NIL, false, locus);
if (!subpath)
return NULL;
/*
* The motion path has been created correctly, but there's a little
......@@ -3728,15 +3731,18 @@ create_projection_path(PlannerInfo *root,
Path *subpath,
PathTarget *target)
{
return create_projection_path_with_quals(root, rel, subpath, target, NIL);
return create_projection_path_with_quals(root, rel,
subpath, target,
NIL, false);
}
ProjectionPath *
create_projection_path_with_quals(PlannerInfo *root,
RelOptInfo *rel,
Path *subpath,
PathTarget *target,
List *restrict_clauses)
RelOptInfo *rel,
Path *subpath,
PathTarget *target,
List *restrict_clauses,
bool need_param)
{
ProjectionPath *pathnode = makeNode(ProjectionPath);
PathTarget *oldtarget = subpath->pathtarget;
......@@ -3744,8 +3750,7 @@ create_projection_path_with_quals(PlannerInfo *root,
pathnode->path.pathtype = T_Result;
pathnode->path.parent = rel;
pathnode->path.pathtarget = target;
/* For now, assume we are above any joins, so no parameterization */
pathnode->path.param_info = NULL;
pathnode->path.param_info = need_param ? subpath->param_info : NULL;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe &&
......
......@@ -177,7 +177,8 @@ extern ProjectionPath *create_projection_path_with_quals(PlannerInfo *root,
RelOptInfo *rel,
Path *subpath,
PathTarget *target,
List *restrict_clauses);
List *restrict_clauses,
bool need_param);
extern Path *apply_projection_to_path(PlannerInfo *root,
RelOptInfo *rel,
Path *path,
......
......@@ -1536,3 +1536,68 @@ select * from foo where exists (select 1 from bar where foo.a = bar.b);
reset enable_hashagg;
drop table foo;
drop table bar;
-- Fix github issue 10012
create table fix_param_a (i int, j int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table fix_param_b (i int UNIQUE, j int);
create table fix_param_c (i int, j int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into fix_param_a select i, i from generate_series(1,20)i;
insert into fix_param_b select i, i from generate_series(1,2000)i;
insert into fix_param_c select i, i from generate_series(1,2000)i;
analyze fix_param_a;
analyze fix_param_b;
analyze fix_param_c;
explain (costs off)
select * from fix_param_a left join fix_param_b on
fix_param_a.i = fix_param_b.i and fix_param_b.j in
(select j from fix_param_c where fix_param_b.i = fix_param_c.i)
order by 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Merge Key: fix_param_a.i
-> Sort
Sort Key: fix_param_a.i
-> Hash Right Join
Hash Cond: (fix_param_b.i = fix_param_a.i)
-> Hash Semi Join
Hash Cond: ((fix_param_b.i = fix_param_c.i) AND (fix_param_b.j = fix_param_c.j))
-> Seq Scan on fix_param_b
-> Hash
-> Seq Scan on fix_param_c
-> Hash
-> Seq Scan on fix_param_a
Optimizer: Postgres query optimizer
(14 rows)
select * from fix_param_a left join fix_param_b on
fix_param_a.i = fix_param_b.i and fix_param_b.j in
(select j from fix_param_c where fix_param_b.i = fix_param_c.i)
order by 1;
i | j | i | j
----+----+----+----
1 | 1 | 1 | 1
2 | 2 | 2 | 2
3 | 3 | 3 | 3
4 | 4 | 4 | 4
5 | 5 | 5 | 5
6 | 6 | 6 | 6
7 | 7 | 7 | 7
8 | 8 | 8 | 8
9 | 9 | 9 | 9
10 | 10 | 10 | 10
11 | 11 | 11 | 11
12 | 12 | 12 | 12
13 | 13 | 13 | 13
14 | 14 | 14 | 14
15 | 15 | 15 | 15
16 | 16 | 16 | 16
17 | 17 | 17 | 17
18 | 18 | 18 | 18
19 | 19 | 19 | 19
20 | 20 | 20 | 20
(20 rows)
......@@ -1523,3 +1523,68 @@ select * from foo where exists (select 1 from bar where foo.a = bar.b);
reset enable_hashagg;
drop table foo;
drop table bar;
-- Fix github issue 10012
create table fix_param_a (i int, j int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table fix_param_b (i int UNIQUE, j int);
create table fix_param_c (i int, j int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into fix_param_a select i, i from generate_series(1,20)i;
insert into fix_param_b select i, i from generate_series(1,2000)i;
insert into fix_param_c select i, i from generate_series(1,2000)i;
analyze fix_param_a;
analyze fix_param_b;
analyze fix_param_c;
explain (costs off)
select * from fix_param_a left join fix_param_b on
fix_param_a.i = fix_param_b.i and fix_param_b.j in
(select j from fix_param_c where fix_param_b.i = fix_param_c.i)
order by 1;
QUERY PLAN
------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
Merge Key: fix_param_a.i
-> Sort
Sort Key: fix_param_a.i
-> Hash Left Join
Hash Cond: (fix_param_a.i = fix_param_b.i)
-> Seq Scan on fix_param_a
-> Hash
-> Hash Semi Join
Hash Cond: ((fix_param_b.i = fix_param_c.i) AND (fix_param_b.j = fix_param_c.j))
-> Seq Scan on fix_param_b
-> Hash
-> Seq Scan on fix_param_c
Optimizer: Pivotal Optimizer (GPORCA)
(14 rows)
select * from fix_param_a left join fix_param_b on
fix_param_a.i = fix_param_b.i and fix_param_b.j in
(select j from fix_param_c where fix_param_b.i = fix_param_c.i)
order by 1;
i | j | i | j
----+----+----+----
1 | 1 | 1 | 1
2 | 2 | 2 | 2
3 | 3 | 3 | 3
4 | 4 | 4 | 4
5 | 5 | 5 | 5
6 | 6 | 6 | 6
7 | 7 | 7 | 7
8 | 8 | 8 | 8
9 | 9 | 9 | 9
10 | 10 | 10 | 10
11 | 11 | 11 | 11
12 | 12 | 12 | 12
13 | 13 | 13 | 13
14 | 14 | 14 | 14
15 | 15 | 15 | 15
16 | 16 | 16 | 16
17 | 17 | 17 | 17
18 | 18 | 18 | 18
19 | 19 | 19 | 19
20 | 20 | 20 | 20
(20 rows)
......@@ -719,3 +719,26 @@ select * from foo where exists (select 1 from bar where foo.a = bar.b);
reset enable_hashagg;
drop table foo;
drop table bar;
-- Fix github issue 10012
create table fix_param_a (i int, j int);
create table fix_param_b (i int UNIQUE, j int);
create table fix_param_c (i int, j int);
insert into fix_param_a select i, i from generate_series(1,20)i;
insert into fix_param_b select i, i from generate_series(1,2000)i;
insert into fix_param_c select i, i from generate_series(1,2000)i;
analyze fix_param_a;
analyze fix_param_b;
analyze fix_param_c;
explain (costs off)
select * from fix_param_a left join fix_param_b on
fix_param_a.i = fix_param_b.i and fix_param_b.j in
(select j from fix_param_c where fix_param_b.i = fix_param_c.i)
order by 1;
select * from fix_param_a left join fix_param_b on
fix_param_a.i = fix_param_b.i and fix_param_b.j in
(select j from fix_param_c where fix_param_b.i = fix_param_c.i)
order by 1;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册