提交 3b7ca45c 编写于 作者: H Heikki Linnakangas

Fix crashes when a Values Scan needs to create "fake ctids".

When converting semi-join to inner-join, a distinct agg on ctid is added
above the hash-join node. But the fake ctids generated in Values Scan
were invalid, with offset number 0, which caused an assertion failure.

This patch is based on commit d8886cf9, which fixed the same issue for
Function Scans.
Co-authored-by: Ndh-cloud <60729713+dh-cloud@users.noreply.github.com>
Co-authored-by: NJesse Zhang <sbjesse@gmail.com>
上级 ba8f2fe0
...@@ -166,13 +166,19 @@ ValuesNext(ValuesScanState *node) ...@@ -166,13 +166,19 @@ ValuesNext(ValuesScanState *node)
*/ */
ExecStoreVirtualTuple(slot); ExecStoreVirtualTuple(slot);
/* CDB: Label each row with a synthetic ctid for subquery dedup. */ /*
if (node->cdb_want_ctid) * CDB: Label each row with a synthetic ctid for subquery dedup.
*
* Values Scan supports backward scans too, so we can't use
* slot_set_ctid_from_fake() like most scan types do.
*/
if (node->cdb_want_ctid)
{ {
HeapTuple tuple = ExecFetchSlotHeapTuple(slot); HeapTuple tuple = ExecFetchSlotHeapTuple(slot);
ItemPointerSet(&tuple->t_self, node->curr_idx >> 16, ItemPointerSet(&tuple->t_self,
(OffsetNumber)node->curr_idx); (BlockNumber) (node->curr_idx / 1024),
(OffsetNumber) ((node->curr_idx % 1024) + 1));
} }
} }
......
...@@ -500,15 +500,67 @@ EXPLAIN SELECT (EXISTS (SELECT UNNEST(X))) AS B FROM A; ...@@ -500,15 +500,67 @@ EXPLAIN SELECT (EXISTS (SELECT UNNEST(X))) AS B FROM A;
DROP TABLE A; DROP TABLE A;
-- --
-- Test the ctid in function scan -- Test the ctid in Function and Values Scans
-- --
create table t1(a int) ; create table t1(a int) ;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into t1 select i from generate_series(1, 100000) i; insert into t1 select i from generate_series(1, 100000) i;
analyze t1; analyze t1;
-- Function Scan
explain
select count(*) from pg_backend_pid() b(a) where b.a % 100000 in (select a from t1);
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Aggregate (cost=1611.83..1611.84 rows=1 width=8)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=1611.76..1611.81 rows=1 width=8)
-> Aggregate (cost=1611.76..1611.77 rows=1 width=8)
-> HashAggregate (cost=1611.56..1611.72 rows=6 width=6)
Group Key: b.ctid
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=0.02..1611.52 rows=6 width=6)
Hash Key: b.ctid
-> Hash Join (cost=0.02..1611.19 rows=6 width=6)
Hash Cond: (t1.a = (b.a % 100000))
-> Seq Scan on t1 (cost=0.00..1111.00 rows=33334 width=4)
-> Hash (cost=0.01..0.01 rows=1 width=10)
-> Function Scan on b (cost=0.00..0.01 rows=1 width=10)
Optimizer: Postgres query optimizer
(13 rows)
select count(*) from pg_backend_pid() b(a) where b.a % 100000 in (select a from t1); select count(*) from pg_backend_pid() b(a) where b.a % 100000 in (select a from t1);
count count
------- -------
1 1
(1 row) (1 row)
-- Values Scan
-- We use a large number of entries, to make sure the fake ctids are generated
-- correctly even when the offset number in the TID wraps around.
select string_agg('(' || g || ')', ', ') as lots_of_values from generate_series(1, 66000) g
\gset
explain
select count(*) from ( values :lots_of_values ) as b(a) where b.a % 100000 in (select a from t1);
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
Aggregate (cost=5001.06..5001.07 rows=1 width=8)
-> Gather Motion 3:1 (slice2; segments: 3) (cost=5001.00..5001.05 rows=1 width=8)
-> Aggregate (cost=5001.00..5001.01 rows=1 width=8)
-> HashAggregate (cost=4588.50..4918.50 rows=11000 width=6)
Group Key: "*VALUES*".ctid
-> Redistribute Motion 3:3 (slice1; segments: 3) (cost=2361.00..4506.00 rows=11000 width=6)
Hash Key: "*VALUES*".ctid
-> Hash Join (cost=2361.00..3846.00 rows=11000 width=6)
Hash Cond: (("*VALUES*".column1 % 100000) = t1.a)
-> Values Scan on "*VALUES*" (cost=0.00..825.00 rows=22000 width=10)
-> Hash (cost=1111.00..1111.00 rows=33334 width=4)
-> Seq Scan on t1 (cost=0.00..1111.00 rows=33334 width=4)
Optimizer: Postgres query optimizer
(13 rows)
select count(*) from ( values :lots_of_values ) as b(a) where b.a % 100000 in (select a from t1);
count
-------
66000
(1 row)
drop table t1; drop table t1;
...@@ -497,15 +497,63 @@ EXPLAIN SELECT (EXISTS (SELECT UNNEST(X))) AS B FROM A; ...@@ -497,15 +497,63 @@ EXPLAIN SELECT (EXISTS (SELECT UNNEST(X))) AS B FROM A;
DROP TABLE A; DROP TABLE A;
-- --
-- Test the ctid in function scan -- Test the ctid in Function and Values Scans
-- --
create table t1(a int) ; create table t1(a int) ;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into t1 select i from generate_series(1, 100000) i; insert into t1 select i from generate_series(1, 100000) i;
analyze t1; analyze t1;
-- Function Scan
explain
select count(*) from pg_backend_pid() b(a) where b.a % 100000 in (select a from t1);
QUERY PLAN
------------------------------------------------------------------------------------
Aggregate (cost=0.00..450.95 rows=1 width=8)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..450.95 rows=1 width=1)
-> Hash Semi Join (cost=0.00..450.95 rows=1 width=1)
Hash Cond: ((b % 100000) = a)
-> Result (cost=0.00..0.00 rows=1 width=4)
-> Result (cost=0.00..0.00 rows=1 width=4)
-> Result (cost=0.00..0.00 rows=1 width=4)
-> Result (cost=0.00..0.00 rows=1 width=1)
-> Hash (cost=431.62..431.62 rows=33334 width=4)
-> Seq Scan on t1 (cost=0.00..431.62 rows=33334 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(11 rows)
select count(*) from pg_backend_pid() b(a) where b.a % 100000 in (select a from t1); select count(*) from pg_backend_pid() b(a) where b.a % 100000 in (select a from t1);
count count
------- -------
1 1
(1 row) (1 row)
-- Values Scan
-- We use a large number of entries, to make sure the fake ctids are generated
-- correctly even when the offset number in the TID wraps around.
select string_agg('(' || g || ')', ', ') as lots_of_values from generate_series(1, 66000) g
\gset
explain
select count(*) from ( values :lots_of_values ) as b(a) where b.a % 100000 in (select a from t1);
QUERY PLAN
---------------------------------------------------------------------------------------------------
Aggregate (cost=0.00..443.14 rows=1 width=8)
-> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..443.14 rows=1 width=8)
-> Aggregate (cost=0.00..443.14 rows=1 width=8)
-> Hash Semi Join (cost=0.00..443.14 rows=22000 width=1)
Hash Cond: (("Values".column1 % 100000) = t1.a)
-> Result (cost=0.00..0.95 rows=22000 width=4)
-> Result (cost=0.00..0.95 rows=22000 width=4)
-> Values Scan on "Values" (cost=0.00..0.26 rows=22000 width=4)
-> Hash (cost=431.62..431.62 rows=33334 width=4)
-> Seq Scan on t1 (cost=0.00..431.62 rows=33334 width=4)
Optimizer: Pivotal Optimizer (GPORCA)
(11 rows)
select count(*) from ( values :lots_of_values ) as b(a) where b.a % 100000 in (select a from t1);
count
-------
66000
(1 row)
drop table t1; drop table t1;
...@@ -293,11 +293,25 @@ EXPLAIN SELECT (EXISTS (SELECT UNNEST(X))) AS B FROM A; ...@@ -293,11 +293,25 @@ EXPLAIN SELECT (EXISTS (SELECT UNNEST(X))) AS B FROM A;
DROP TABLE A; DROP TABLE A;
-- --
-- Test the ctid in function scan -- Test the ctid in Function and Values Scans
-- --
create table t1(a int) ; create table t1(a int) ;
insert into t1 select i from generate_series(1, 100000) i; insert into t1 select i from generate_series(1, 100000) i;
analyze t1; analyze t1;
-- Function Scan
explain
select count(*) from pg_backend_pid() b(a) where b.a % 100000 in (select a from t1);
select count(*) from pg_backend_pid() b(a) where b.a % 100000 in (select a from t1); select count(*) from pg_backend_pid() b(a) where b.a % 100000 in (select a from t1);
-- Values Scan
-- We use a large number of entries, to make sure the fake ctids are generated
-- correctly even when the offset number in the TID wraps around.
select string_agg('(' || g || ')', ', ') as lots_of_values from generate_series(1, 66000) g
\gset
explain
select count(*) from ( values :lots_of_values ) as b(a) where b.a % 100000 in (select a from t1);
select count(*) from ( values :lots_of_values ) as b(a) where b.a % 100000 in (select a from t1);
drop table t1; drop table t1;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册