未验证 提交 343f8826 编写于 作者: J Jinbao Chen 提交者: GitHub

The inner relation of LASJ_NOTIN should not have partition locaus

The result of NULL not in an unempty set is false. The result of
NULL not in an empty set is true. But if an unempty set has
partitioned locus. This set will be divided into several subsets.
Some subsets may be empty. Because NULL not in empty set equals
true. There will be some tuples that shouldn't exist in the result
set.

The patch disable the partitioned locus of inner table by removing
the join clause from the redistribution_clauses.

this commit cherry pick from 6X_STABLE 8c93db54f3d93a890493f6a6d532f841779a9188
Co-authored-by: NHubert Zhang <hubertzhang@apache.org>
Co-authored-by: NRichard Guo <riguo@pivotal.io>
上级 78024fbc
......@@ -1694,6 +1694,89 @@ find_nonnullable_vars_walker(Node *node, NonNullableVarsContext *context)
}
/*
* This function is used to determine whether the parameters of an expression in
* ALL Sublink can be NULL.
*/
static bool
is_param_nullable(Node *node, Query *query, Value *oprname)
{
bool result = false;
NonNullableVarsContext context;
Expr *expr;
ListCell *lc;
Expr *arg;
Assert(query);
context.query = query;
context.nonNullableVars = NIL;
/* Find nullable vars in the jointree */
expression_tree_walker((Node *) query->jointree, find_nonnullable_vars_walker, &context);
/*
* A null value "not in / > all / < all" a non-empty set, the result is
* always false, but a null value "not in / > all / < all" a empty set, the
* result is always true. So if the param is nullable, we should not make
* the locus as "Partitioned".
* If the sql is "... a not in (select ...)", the node should be a BoolExpr.
* if the sql is "... a < all (select ...), the node should be a OpExpr"
*/
if (nodeTag(node) == T_BoolExpr)
{
if(((BoolExpr *) node)->boolop != NOT_EXPR)
return false;
expr = lfirst(list_head(((BoolExpr*) node)->args));
}
else if (nodeTag(node) == T_OpExpr)
{
expr = (Expr *) node;
}
else
return true;
if (nodeTag(expr) != T_OpExpr)
return true;
foreach(lc, ((OpExpr*)expr)->args)
{
arg = lfirst(lc);
if (nodeTag(arg) == T_RelabelType)
arg = ((RelabelType*)arg)->arg;
if (nodeTag(arg) == T_Param)
continue;
else if (nodeTag(arg) == T_Const)
{
/*
* Is the constant entry in the targetlist null?
*/
Const *constant = (Const *) arg;
/*
* Note: the 'dummy' column is not NULL, so we don't need any special handling for it
*/
if (constant->constisnull == true)
result = true;
}
else if (nodeTag(arg) == T_Var)
{
Var *var = (Var *) arg;
/* Was this var determined to be non-nullable? */
if (!list_member(context.nonNullableVars, var))
{
result = true;
}
}
else
result = true;
}
return result;
}
/**
* This method determines if the targetlist of a query is nullable.
* Consider a query of the form: select t1.x, t2.y from t1, t2 where t1.x > 5
......@@ -1818,9 +1901,14 @@ convert_IN_to_antijoin(PlannerInfo *root, List **rtrlist_inout __attribute__((un
bool inner_nullable = is_targetlist_nullable(subselect);
JoinExpr *join_expr = make_join_expr(larg, subq_indx, JOIN_LASJ_NOTIN);
ListCell *lc = list_head(sublink->operName);
bool outer_nullable = is_param_nullable(sublink->testexpr,
root->parse,
lc? list_head(sublink->operName)->data.ptr_value : NULL);
join_expr->quals = make_lasj_quals(root, sublink, subq_indx);
if (inner_nullable)
if (inner_nullable || outer_nullable)
{
join_expr->quals = add_null_match_clause(join_expr->quals);
}
......
......@@ -107,25 +107,26 @@ select c1 from t1 where c1 not in
explain select c1 from t1 where c1 not in
(select c2 from t2 where c2 > 2 and c2 not in
(select c3 from t3));
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice2; segments: 3) (cost=4.49..7.66 rows=4 width=4)
-> Hash Left Anti Semi Join (Not-In) (cost=4.49..7.66 rows=2 width=4)
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice3; segments: 3) (cost=4.71..7.89 rows=4 width=4)
-> Hash Left Anti Semi Join (Not-In) (cost=4.71..7.89 rows=2 width=4)
Hash Cond: t1.c1 = "NotIn_SUBQUERY".c2
-> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4)
-> Hash (cost=4.45..4.45 rows=2 width=4)
-> Subquery Scan "NotIn_SUBQUERY" (cost=2.29..4.45 rows=2 width=4)
-> Hash Left Anti Semi Join (Not-In) (cost=2.29..4.41 rows=2 width=4)
Hash Cond: t2.c2 = "NotIn_SUBQUERY".c3
-> Seq Scan on t2 (cost=0.00..2.06 rows=2 width=4)
Filter: c2 > 2
-> Hash (cost=2.18..2.18 rows=3 width=4)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.18 rows=3 width=4)
-> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.06 rows=1 width=4)
-> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4)
-> Hash (cost=4.58..4.58 rows=4 width=4)
-> Broadcast Motion 3:3 (slice2; segments: 3) (cost=2.29..4.58 rows=4 width=4)
-> Subquery Scan "NotIn_SUBQUERY" (cost=2.29..4.45 rows=2 width=4)
-> Hash Left Anti Semi Join (Not-In) (cost=2.29..4.41 rows=2 width=4)
Hash Cond: t2.c2 = "NotIn_SUBQUERY".c3
-> Seq Scan on t2 (cost=0.00..2.06 rows=2 width=4)
Filter: c2 > 2
-> Hash (cost=2.18..2.18 rows=3 width=4)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..2.18 rows=3 width=4)
-> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..2.06 rows=1 width=4)
-> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4)
Settings: optimizer=off
Optimizer status: legacy query optimizer
(16 rows)
(17 rows)
select c1 from t1 where c1 not in
(select c2 from t2 where c2 > 2 and c2 not in
......@@ -1148,7 +1149,7 @@ select c1 from t1 where not not not c1 in (select c2 from t2);
--q43
--
explain select c1 from t1 where c1 not in (select c2 from t2 where c2 > 4) and c1 is not null;
QUERY PLAN
QUERY PLAN
------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=2.09..5.25 rows=4 width=4)
-> Hash Left Anti Semi Join (Not-In) (cost=2.09..5.25 rows=2 width=4)
......@@ -1192,8 +1193,95 @@ select c1 from t1 where c1 not in (select c2 from t2 where c2 > 4) and c1 > 2;
9
(7 rows)
-- Test the null not in an empty set
-- null not in an unempty set, always returns false
-- null not in an empty set, always returns true
--
-- q46
--
create table table_source (c1 varchar(100),c2 varchar(100),c3 varchar(100),c4 varchar(100));
insert into table_source (c1 ,c2 ,c3 ,c4 ) values ('000181202006010000003158',null,'INC','0000000001') ;
create table table_source2 as select * from table_source distributed by (c2);
create table table_source4 (c1 varchar(100),c2 varchar(100) not null,c3 varchar(100),c4 varchar(100));
insert into table_source4 (c1 ,c2 ,c3 ,c4 ) values ('000181202006010000003158','a','INC','0000000001') ;
create table table_config (c1 varchar(10) ,c2 varchar(10) ,PRIMARY KEY (c1));
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "table_config_pkey" for table "table_config"
insert into table_config select i, 'test' from generate_series(1, 1000)i;
delete from table_config where gp_segment_id = 0;
explain select * from table_source where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
QUERY PLAN
------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice2; segments: 3) (cost=103.00..104.12 rows=10 width=258)
-> Hash Left Anti Semi Join (Not-In) (cost=103.00..104.12 rows=4 width=258)
Hash Cond: table_source.c2::text = "NotIn_SUBQUERY".c1::text
-> Seq Scan on table_source (cost=0.00..1.01 rows=1 width=258)
Filter: c3::text = 'INC'::text AND c4::text = '0000000001'::text
-> Hash (cost=65.50..65.50 rows=1000 width=38)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..65.50 rows=1000 width=38)
-> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..25.50 rows=334 width=38)
-> Seq Scan on table_config (cost=0.00..15.50 rows=334 width=3)
Filter: c2::text = 'test'::text
Settings: optimizer=off
Optimizer status: legacy query optimizer
(12 rows)
select * from table_source where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
c1 | c2 | c3 | c4
----+----+----+----
(0 rows)
explain select * from table_source2 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
QUERY PLAN
------------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice2; segments: 3) (cost=103.00..104.12 rows=10 width=258)
-> Hash Left Anti Semi Join (Not-In) (cost=103.00..104.12 rows=4 width=258)
Hash Cond: table_source2.c2::text = "NotIn_SUBQUERY".c1::text
-> Seq Scan on table_source2 (cost=0.00..1.01 rows=1 width=258)
Filter: c3::text = 'INC'::text AND c4::text = '0000000001'::text
-> Hash (cost=65.50..65.50 rows=1000 width=38)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..65.50 rows=1000 width=38)
-> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..25.50 rows=334 width=38)
-> Seq Scan on table_config (cost=0.00..15.50 rows=334 width=3)
Filter: c2::text = 'test'::text
Settings: optimizer=off
Optimizer status: legacy query optimizer
(12 rows)
select * from table_source2 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
c1 | c2 | c3 | c4
----+----+----+----
(0 rows)
explain select * from table_source4 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
QUERY PLAN
-----------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice2; segments: 3) (cost=38.00..39.14 rows=10 width=42)
-> Hash Left Anti Semi Join (Not-In) (cost=38.00..39.14 rows=4 width=42)
Hash Cond: table_source4.c2::text = "NotIn_SUBQUERY".c1::text
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..1.03 rows=1 width=42)
Hash Key: table_source4.c2
-> Seq Scan on table_source4 (cost=0.00..1.01 rows=1 width=42)
Filter: c3::text = 'INC'::text AND c4::text = '0000000001'::text
-> Hash (cost=25.50..25.50 rows=334 width=38)
-> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..25.50 rows=334 width=38)
-> Seq Scan on table_config (cost=0.00..15.50 rows=334 width=3)
Filter: c2::text = 'test'::text
Settings: optimizer=off
Optimizer status: legacy query optimizer
(13 rows)
select * from table_source4 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
c1 | c2 | c3 | c4
--------------------------+----+-----+------------
000181202006010000003158 | a | INC | 0000000001
(1 row)
reset search_path;
drop schema notin cascade;
NOTICE: drop cascades to table notin.table_config
NOTICE: drop cascades to table notin.table_source4
NOTICE: drop cascades to table notin.table_source2
NOTICE: drop cascades to table notin.table_source
NOTICE: drop cascades to table notin.l1
NOTICE: drop cascades to table notin.g1
NOTICE: drop cascades to table notin.t1n
......
......@@ -1215,8 +1215,89 @@ select c1 from t1 where c1 not in (select c2 from t2 where c2 > 4) and c1 > 2;
7
(7 rows)
-- Test the null not in an empty set
-- null not in an unempty set, always returns false
-- null not in an empty set, always returns true
--
-- q46
--
create table table_source (c1 varchar(100),c2 varchar(100),c3 varchar(100),c4 varchar(100));
insert into table_source (c1 ,c2 ,c3 ,c4 ) values ('000181202006010000003158',null,'INC','0000000001') ;
create table table_source2 as select * from table_source distributed by (c2);
create table table_source4 (c1 varchar(100),c2 varchar(100) not null,c3 varchar(100),c4 varchar(100));
insert into table_source4 (c1 ,c2 ,c3 ,c4 ) values ('000181202006010000003158','a','INC','0000000001') ;
create table table_config (c1 varchar(10) ,c2 varchar(10) ,PRIMARY KEY (c1));
NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "table_config_pkey" for table "table_config"
insert into table_config select i, 'test' from generate_series(1, 1000)i;
delete from table_config where gp_segment_id = 0;
explain select * from table_source where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
QUERY PLAN
---------------------------------------------------------------------------------------------
Hash Left Anti Semi Join (Not-In) (cost=0.00..862.20 rows=1 width=40)
Hash Cond: table_source.c2::text = table_config.c1::text
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=40)
-> Table Scan on table_source (cost=0.00..431.00 rows=1 width=40)
Filter: c3::text = 'INC'::text AND c4::text = '0000000001'::text
-> Hash (cost=431.03..431.03 rows=334 width=3)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.03 rows=1000 width=3)
-> Table Scan on table_config (cost=0.00..431.02 rows=334 width=3)
Filter: c2::text = 'test'::text
Optimizer status: PQO version 3.112.0
(10 rows)
select * from table_source where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
c1 | c2 | c3 | c4
----+----+----+----
(0 rows)
explain select * from table_source2 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
QUERY PLAN
---------------------------------------------------------------------------------------------
Hash Left Anti Semi Join (Not-In) (cost=0.00..862.20 rows=1 width=40)
Hash Cond: table_source2.c2::text = table_config.c1::text
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=40)
-> Table Scan on table_source2 (cost=0.00..431.00 rows=1 width=40)
Filter: c3::text = 'INC'::text AND c4::text = '0000000001'::text
-> Hash (cost=431.03..431.03 rows=334 width=3)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.03 rows=1000 width=3)
-> Table Scan on table_config (cost=0.00..431.02 rows=334 width=3)
Filter: c2::text = 'test'::text
Optimizer status: PQO version 3.112.0
(10 rows)
select * from table_source2 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
c1 | c2 | c3 | c4
----+----+----+----
(0 rows)
explain select * from table_source4 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
QUERY PLAN
-------------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..862.08 rows=1 width=42)
-> Hash Left Anti Semi Join (Not-In) (cost=0.00..862.08 rows=1 width=42)
Hash Cond: table_source4.c2::text = table_config.c1::text
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=42)
Hash Key: table_source4.c2
-> Table Scan on table_source4 (cost=0.00..431.00 rows=1 width=42)
Filter: c3::text = 'INC'::text AND c4::text = '0000000001'::text
-> Hash (cost=431.02..431.02 rows=334 width=3)
-> Table Scan on table_config (cost=0.00..431.02 rows=334 width=3)
Filter: c2::text = 'test'::text
Optimizer status: PQO version 3.112.0
(11 rows)
select * from table_source4 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
c1 | c2 | c3 | c4
--------------------------+----+-----+------------
000181202006010000003158 | a | INC | 0000000001
(1 row)
reset search_path;
drop schema notin cascade;
NOTICE: drop cascades to table notin.table_config
NOTICE: drop cascades to table notin.table_source4
NOTICE: drop cascades to table notin.table_source2
NOTICE: drop cascades to table notin.table_source
NOTICE: drop cascades to table notin.l1
NOTICE: drop cascades to table notin.g1
NOTICE: drop cascades to table notin.t1n
......
......@@ -1230,18 +1230,20 @@ explain delete from TabDel1 where TabDel1.a not in (select a from TabDel3); -- d
(11 rows)
explain delete from TabDel2 where TabDel2.a not in (select a from TabDel4); -- support this
QUERY PLAN
------------------------------------------------------------------------------------
QUERY PLAN
-------------------------------------------------------------------------------------------------------
Delete (slice0; segments: 3) (rows=2 width=10)
-> Hash Left Anti Semi Join (Not-In) (cost=1.03..3.11 rows=2 width=10)
Hash Cond: tabdel2.a = "NotIn_SUBQUERY".a
-> Seq Scan on tabdel2 (cost=0.00..2.03 rows=1 width=14)
Filter: a IS NOT NULL
-> Hash (cost=1.02..1.02 rows=1 width=4)
-> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..1.02 rows=1 width=4)
-> Seq Scan on tabdel4 (cost=0.00..1.01 rows=1 width=4)
-> Explicit Redistribute Motion 3:3 (slice2; segments: 3) (cost=1.10..3.18 rows=2 width=10)
-> Hash Left Anti Semi Join (Not-In) (cost=1.10..3.18 rows=2 width=10)
Hash Cond: tabdel2.a = "NotIn_SUBQUERY".a
-> Seq Scan on tabdel2 (cost=0.00..2.03 rows=1 width=14)
-> Hash (cost=1.06..1.06 rows=1 width=4)
-> Broadcast Motion 3:3 (slice1; segments: 3) (cost=0.00..1.06 rows=1 width=4)
-> Subquery Scan "NotIn_SUBQUERY" (cost=0.00..1.02 rows=1 width=4)
-> Seq Scan on tabdel4 (cost=0.00..1.01 rows=1 width=4)
Settings: optimizer=off
Optimizer status: legacy query optimizer
(9 rows)
(11 rows)
-- start_ignore
delete from TabDel2 where TabDel2.a not in (select a from TabDel4);
......
......@@ -375,5 +375,28 @@ select c1 from t1 where c1 not in (select c2 from t2 where c2 > 4) and c1 is not
--
select c1 from t1 where c1 not in (select c2 from t2 where c2 > 4) and c1 > 2;
-- Test the null not in an empty set
-- null not in an unempty set, always returns false
-- null not in an empty set, always returns true
--
-- q46
--
create table table_source (c1 varchar(100),c2 varchar(100),c3 varchar(100),c4 varchar(100));
insert into table_source (c1 ,c2 ,c3 ,c4 ) values ('000181202006010000003158',null,'INC','0000000001') ;
create table table_source2 as select * from table_source distributed by (c2);
create table table_source4 (c1 varchar(100),c2 varchar(100) not null,c3 varchar(100),c4 varchar(100));
insert into table_source4 (c1 ,c2 ,c3 ,c4 ) values ('000181202006010000003158','a','INC','0000000001') ;
create table table_config (c1 varchar(10) ,c2 varchar(10) ,PRIMARY KEY (c1));
insert into table_config select i, 'test' from generate_series(1, 1000)i;
delete from table_config where gp_segment_id = 0;
explain select * from table_source where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
select * from table_source where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
explain select * from table_source2 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
select * from table_source2 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
explain select * from table_source4 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
select * from table_source4 where c3 = 'INC' and c4 = '0000000001' and c2 not in (SELECT c1 from table_config where c2='test');
reset search_path;
drop schema notin cascade;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册