Fix auto explain init file

Fix auto_explain init file for #7195 This patch includes 1. fix init_file 2. use float number for memory usage 3. Update the last SQL whose running time may be less than 1ms 4. Update test case: use newly created table other than pg_class 5. Add answer file for orca & enable nestloop

Fix auto explain init file
Fix auto_explain init file for #7195 This patch includes 1. fix init_file 2. use float number for memory usage 3. Update the last SQL whose running time may be less than 1ms 4. Update test case: use newly created table other than pg_class 5. Add answer file for orca & enable nestloop
1348afc0 · Hao Wu · Haozhou Wang · 73ca7e77 · 1348afc0 · 1348afc0
4 changed file
--- a/contrib/auto_explain/expected/auto_explain.out
+++ b/contrib/auto_explain/expected/auto_explain.out
+CREATE SCHEMA auto_explain_test;
+CREATE TABLE auto_explain_test.t1(a int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO auto_explain_test.t1 VALUES(generate_series(0, 1000));
+CREATE TABLE auto_explain_test.t2(b int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO auto_explain_test.t2 VALUES(generate_series(0, 1000));
+SET enable_nestloop = ON;
 SET CLIENT_MIN_MESSAGES = LOG;
 LOAD 'auto_explain';
 LOG:  statement: LOAD 'auto_explain';
@@ -17,11 +27,11 @@ SET auto_explain.log_verbose = FALSE;
 LOG:  statement: SET auto_explain.log_verbose = FALSE;
 SELECT relname FROM pg_class WHERE relname='pg_class';
 LOG:  statement: SELECT relname FROM pg_class WHERE relname='pg_class';
-LOG:  duration: 0.198 ms  plan:
+LOG:  duration: 0.173 ms  plan:
 Query Text: SELECT relname FROM pg_class WHERE relname='pg_class';
-Seq Scan on pg_class  (cost=0.00..11.46 rows=1 width=64) (actual rows=1 loops=1)
+Seq Scan on pg_class  (cost=0.00..12.46 rows=1 width=64) (actual rows=1 loops=1)
  Filter: (relname = 'pg_class'::name)
-  Rows Removed by Filter: 436
+  Rows Removed by Filter: 438
  (slice0)    Executor memory: 32K bytes.
 Memory used:  128000kB
 relname  
@@ -29,25 +39,25 @@ Memory used:  128000kB
 pg_class
 (1 row)

-SELECT count(*)>0 FROM pg_class, pg_index WHERE oid = indrelid AND indisunique;
-LOG:  statement: SELECT count(*)>0 FROM pg_class, pg_index WHERE oid = indrelid AND indisunique;
-LOG:  duration: 5.369 ms  plan:
-Query Text: SELECT count(*)>0 FROM pg_class, pg_index WHERE oid = indrelid AND indisunique;
-Aggregate  (cost=19.62..19.64 rows=1 width=1) (actual rows=1 loops=1)
-  ->  Hash Join  (cost=4.29..19.29 rows=135 width=0) (actual rows=135 loops=1)
-        Hash Cond: (pg_class.oid = pg_index.indrelid)
-        Extra Text: Hash chain length 1.5 avg, 3 max, using 91 of 524288 buckets.
-        ->  Seq Scan on pg_class  (cost=0.00..10.37 rows=437 width=4) (actual rows=437 loops=1)
-        ->  Hash  (cost=2.60..2.60 rows=45 width=4) (actual rows=135 loops=1)
-              Buckets: 524288  Batches: 1  Memory Usage: 4kB
-              ->  Seq Scan on pg_index  (cost=0.00..2.60 rows=135 width=4) (actual rows=135 loops=1)
-                    Filter: indisunique
-                    Rows Removed by Filter: 25
-  (slice0)    Executor memory: 4177K bytes.  Work_mem: 4K bytes max.
+SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+LOG:  statement: SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+LOG:  duration: 165.358 ms  plan:
+Query Text: SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+Aggregate  (cost=10000035148.67..10000035148.68 rows=1 width=8) (actual rows=1 loops=1)
+  ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=10000035148.61..10000035148.66 rows=1 width=8) (actual rows=3 loops=1)
+        ->  Aggregate  (cost=10000035148.61..10000035148.62 rows=1 width=8) (actual rows=1 loops=1)
+              ->  Nested Loop  (cost=10000000000.00..10000032643.60 rows=334001 width=0) (actual rows=340340 loops=1)
+                    ->  Seq Scan on t1  (cost=0.00..13.01 rows=334 width=0) (actual rows=340 loops=1)
+                    ->  Materialize  (cost=0.00..68.06 rows=1001 width=0) (actual rows=1001 loops=340)
+                          ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..53.05 rows=1001 width=0) (actual rows=1001 loops=1)
+                                ->  Seq Scan on t2  (cost=0.00..13.01 rows=334 width=0) (actual rows=340 loops=1)
+  (slice0)    Executor memory: 131K bytes.
+  (slice1)    Executor memory: 53K bytes avg x 3 workers, 58K bytes max (seg1).
+  (slice2)    Executor memory: 216K bytes avg x 3 workers, 216K bytes max (seg0).
 Memory used:  128000kB
- ?column? 
----------
- t
+  count  
+---------
+ 1002001
 (1 row)

 SET auto_explain.log_min_duration = 1;
@@ -56,6 +66,7 @@ SET auto_explain.log_triggers = FALSE;
 LOG:  statement: SET auto_explain.log_triggers = FALSE;
 SET auto_explain.log_verbose = TRUE;
 LOG:  statement: SET auto_explain.log_verbose = TRUE;
+-- this select should not dump execution plan
 SELECT relname FROM pg_class WHERE relname='pg_class';
 LOG:  statement: SELECT relname FROM pg_class WHERE relname='pg_class';
 relname  
@@ -63,30 +74,39 @@ LOG:  statement: SELECT relname FROM pg_class WHERE relname='pg_class';
 pg_class
 (1 row)

-SELECT count(*)>0 FROM pg_class, pg_index WHERE oid = indrelid AND indisunique;
-LOG:  statement: SELECT count(*)>0 FROM pg_class, pg_index WHERE oid = indrelid AND indisunique;
-LOG:  duration: 4.530 ms  plan:
-Query Text: SELECT count(*)>0 FROM pg_class, pg_index WHERE oid = indrelid AND indisunique;
-Aggregate  (cost=19.62..19.64 rows=1 width=1) (actual rows=1 loops=1)
-  Output: (count(*) > 0)
-  ->  Hash Join  (cost=4.29..19.29 rows=135 width=0) (actual rows=135 loops=1)
-        Hash Cond: (pg_class.oid = pg_index.indrelid)
-        Executor Memory: 4kB  Segments: 1  Max: 4kB (segment -1)
-        work_mem: 4kB  Segments: 1  Max: 4kB (segment -1)  Workfile: (0 spilling)
-        Extra Text: Hash chain length 1.5 avg, 3 max, using 91 of 524288 buckets.
-        ->  Seq Scan on pg_catalog.pg_class  (cost=0.00..10.37 rows=437 width=4) (actual rows=437 loops=1)
-              Output: pg_class.oid
-        ->  Hash  (cost=2.60..2.60 rows=45 width=4) (actual rows=135 loops=1)
-              Output: pg_index.indrelid
-              Buckets: 524288  Batches: 1  Memory Usage: 4kB
-              ->  Seq Scan on pg_catalog.pg_index  (cost=0.00..2.60 rows=135 width=4) (actual rows=135 loops=1)
-                    Output: pg_index.indrelid
-                    Filter: pg_index.indisunique
-                    Rows Removed by Filter: 25
-  (slice0)    Executor memory: 4177K bytes.  Work_mem: 4K bytes max.
+-- this select should also dump plan, since it takes too much time to run
+SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+LOG:  statement: SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+LOG:  duration: 208.523 ms  plan:
+Query Text: SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+Aggregate  (cost=10000035148.67..10000035148.68 rows=1 width=8) (actual rows=1 loops=1)
+  Output: count((count(*)))
+  ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=10000035148.61..10000035148.66 rows=1 width=8) (actual rows=3 loops=1)
+        Output: (count(*))
+        ->  Aggregate  (cost=10000035148.61..10000035148.62 rows=1 width=8) (actual rows=1 loops=1)
+              Output: count(*)
+              ->  Nested Loop  (cost=10000000000.00..10000032643.60 rows=334001 width=0) (actual rows=340340 loops=1)
+                    ->  Seq Scan on auto_explain_test.t1  (cost=0.00..13.01 rows=334 width=0) (actual rows=340 loops=1)
+                          Output: t1.a
+                    ->  Materialize  (cost=0.00..68.06 rows=1001 width=0) (actual rows=1001 loops=340)
+                          Output: t2.b
+                          ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..53.05 rows=1001 width=0) (actual rows=1001 loops=1)
+                                Output: t2.b
+                                ->  Seq Scan on auto_explain_test.t2  (cost=0.00..13.01 rows=334 width=0) (actual rows=340 loops=1)
+                                      Output: t2.b
+  (slice0)    Executor memory: 131K bytes.
+  (slice1)    Executor memory: 43K bytes avg x 3 workers, 43K bytes max (seg0).
+  (slice2)    Executor memory: 216K bytes avg x 3 workers, 216K bytes max (seg0).
 Memory used:  128000kB
- ?column? 
----------
- t
+  count  
+---------
+ 1002001
 (1 row)

+-- clean jobs
+DROP TABLE auto_explain_test.t1;
+LOG:  statement: DROP TABLE auto_explain_test.t1;
+DROP TABLE auto_explain_test.t2;
+LOG:  statement: DROP TABLE auto_explain_test.t2;
+DROP SCHEMA auto_explain_test;
+LOG:  statement: DROP SCHEMA auto_explain_test;
--- a/contrib/auto_explain/expected/auto_explain_optimizer.out
+++ b/contrib/auto_explain/expected/auto_explain_optimizer.out
+CREATE SCHEMA auto_explain_test;
+CREATE TABLE auto_explain_test.t1(a int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO auto_explain_test.t1 VALUES(generate_series(0, 1000));
+CREATE TABLE auto_explain_test.t2(b int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO auto_explain_test.t2 VALUES(generate_series(0, 1000));
+SET enable_nestloop = ON;
+SET CLIENT_MIN_MESSAGES = LOG;
+LOAD 'auto_explain';
+LOG:  statement: LOAD 'auto_explain';
+SET auto_explain.log_analyze = TRUE;
+LOG:  statement: SET auto_explain.log_analyze = TRUE;
+SET auto_explain.log_min_duration = 0;
+LOG:  statement: SET auto_explain.log_min_duration = 0;
+SET auto_explain.log_buffers = FALSE;
+LOG:  statement: SET auto_explain.log_buffers = FALSE;
+SET auto_explain.log_triggers = TRUE;
+LOG:  statement: SET auto_explain.log_triggers = TRUE;
+SET auto_explain.log_nested_statements = FALSE;
+LOG:  statement: SET auto_explain.log_nested_statements = FALSE;
+SET auto_explain.log_timing = FALSE;
+LOG:  statement: SET auto_explain.log_timing = FALSE;
+SET auto_explain.log_verbose = FALSE;
+LOG:  statement: SET auto_explain.log_verbose = FALSE;
+SELECT relname FROM pg_class WHERE relname='pg_class';
+LOG:  statement: SELECT relname FROM pg_class WHERE relname='pg_class';
+LOG:  Planner produced plan :0
+LOG:  duration: 0.112 ms  plan:
+Query Text: SELECT relname FROM pg_class WHERE relname='pg_class';
+Seq Scan on pg_class  (cost=0.00..12.46 rows=1 width=64) (actual rows=1 loops=1)
+  Filter: (relname = 'pg_class'::name)
+  Rows Removed by Filter: 438
+  (slice0)    Executor memory: 32K bytes.
+Memory used:  128000kB
+ relname  
+----------
+ pg_class
+(1 row)
+
+SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+LOG:  statement: SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+LOG:  duration: 195.785 ms  plan:
+Query Text: SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+Aggregate  (cost=0.00..1326086.34 rows=1 width=8) (actual rows=1 loops=1)
+  ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..1326086.34 rows=1 width=8) (actual rows=3 loops=1)
+        ->  Aggregate  (cost=0.00..1326086.34 rows=1 width=8) (actual rows=1 loops=1)
+              ->  Nested Loop  (cost=0.00..1326086.34 rows=334001 width=1) (actual rows=340340 loops=1)
+                    Join Filter: true
+                    ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.02 rows=1001 width=1) (actual rows=1001 loops=1)
+                          ->  Seq Scan on t1  (cost=0.00..431.01 rows=334 width=1) (actual rows=340 loops=1)
+                    ->  Seq Scan on t2  (cost=0.00..431.01 rows=334 width=1) (actual rows=340 loops=1002)
+  (slice0)    Executor memory: 67K bytes.
+  (slice1)    Executor memory: 42K bytes avg x 3 workers, 42K bytes max (seg0).
+  (slice2)    Executor memory: 119K bytes avg x 3 workers, 119K bytes max (seg0).
+Memory used:  128000kB
+  count  
+---------
+ 1002001
+(1 row)
+
+SET auto_explain.log_min_duration = 1;
+LOG:  statement: SET auto_explain.log_min_duration = 1;
+SET auto_explain.log_triggers = FALSE;
+LOG:  statement: SET auto_explain.log_triggers = FALSE;
+SET auto_explain.log_verbose = TRUE;
+LOG:  statement: SET auto_explain.log_verbose = TRUE;
+-- this select should not dump execution plan
+SELECT relname FROM pg_class WHERE relname='pg_class';
+LOG:  statement: SELECT relname FROM pg_class WHERE relname='pg_class';
+LOG:  Planner produced plan :0
+ relname  
+----------
+ pg_class
+(1 row)
+
+-- this select should also dump plan, since it takes too much time to run
+SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+LOG:  statement: SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+LOG:  duration: 180.126 ms  plan:
+Query Text: SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+Aggregate  (cost=0.00..1326086.34 rows=1 width=8) (actual rows=1 loops=1)
+  Output: count((count()))
+  ->  Gather Motion 3:1  (slice2; segments: 3)  (cost=0.00..1326086.34 rows=1 width=8) (actual rows=3 loops=1)
+        Output: (count())
+        ->  Aggregate  (cost=0.00..1326086.34 rows=1 width=8) (actual rows=1 loops=1)
+              Output: count()
+              ->  Nested Loop  (cost=0.00..1326086.34 rows=334001 width=1) (actual rows=340340 loops=1)
+                    Join Filter: true
+                    ->  Broadcast Motion 3:3  (slice1; segments: 3)  (cost=0.00..431.02 rows=1001 width=1) (actual rows=1001 loops=1)
+                          ->  Seq Scan on auto_explain_test.t1  (cost=0.00..431.01 rows=334 width=1) (actual rows=340 loops=1)
+                    ->  Seq Scan on auto_explain_test.t2  (cost=0.00..431.01 rows=334 width=1) (actual rows=340 loops=1002)
+  (slice0)    Executor memory: 67K bytes.
+  (slice1)    Executor memory: 42K bytes avg x 3 workers, 42K bytes max (seg0).
+  (slice2)    Executor memory: 119K bytes avg x 3 workers, 119K bytes max (seg0).
+Memory used:  128000kB
+  count  
+---------
+ 1002001
+(1 row)
+
+-- clean jobs
+DROP TABLE auto_explain_test.t1;
+LOG:  statement: DROP TABLE auto_explain_test.t1;
+DROP TABLE auto_explain_test.t2;
+LOG:  statement: DROP TABLE auto_explain_test.t2;
+DROP SCHEMA auto_explain_test;
+LOG:  statement: DROP SCHEMA auto_explain_test;
--- a/contrib/auto_explain/init_file
+++ b/contrib/auto_explain/init_file
 -- start_matchsubs
+m/duration: .*$/
+s/duration:\s+[0-9.]+\s*ms.*/duration: ms/

 m/\(cost=[a-z_A-Z0-9. =]+\)\s+\([a-z_A-Z0-9. =]+\)/
 s/\(cost=[a-z_A-Z0-9. =]+\)\s+\([a-z_A-Z0-9. =]+\)//

-m/duration:.*$/
-s/duration:\s+[0-9.]+\s*ms.*/duration: ms/
-
-m/Rows Removed by Filter: .*/
-s/Rows Removed by Filter: .*/Rows Removed by Filter: /
-
 m/Executor [mM]emory: .*/
 s/Executor [mM]emory: .*/Executor Memory: /

-m/Memory used:\s+[0-9kKmMgGB]+/
-s/Memory used:\s+[0-9kKmMgGB]+/Memory used:/
-
-m/Buckets: [0-9]+\s+Batches: [0-9]+\s+Memory Usage:\s+[0-9kKmMgG]B/
-s/Buckets: [0-9]+\s+Batches: [0-9]+\s+Memory Usage:\s+[0-9kKmMgG]B/Buckets: 524288  Batches: 1  Memory Usage: 4kB/
+m/Rows Removed by Filter: .*/
+s/Rows Removed by Filter: .*/Rows Removed by Filter: /

-m/Extra Text: Hash chain length [0-9.]+ avg, [0-9]+ max, using [0-9]+ of [0-9]+ buckets/
-s/Extra Text: Hash chain length [0-9.]+ avg, [0-9]+ max, using [0-9]+ of [0-9]+ buckets/Extra Text: Hash chain length 1.5 avg, 3 max, using 91 of 524288 buckets/
+m/Memory used:\s+[0-9]+kB.*/
+s/Memory used:\s+[0-9]+kB.*/Memory used:/

-m/work_mem: [0-9kKmMgG]B  Segments: [-0-9]+  Max: [0-9kKmMgG]+B.*/
-s/work_mem: [0-9kKmMgG]B  Segments: [-0-9]+  Max: [0-9kKmMgG]+B.*/work_mem: 4kB  Segments: 1  Max: 4kB/
 -- end_matchsubs
-
-- start_matchignore
-# orca failed to produce the expected plan, ignore this message
-m/LOG:  Planner produced plan :0/
-- end_matchignore
--- a/contrib/auto_explain/sql/auto_explain.sql
+++ b/contrib/auto_explain/sql/auto_explain.sql
+CREATE SCHEMA auto_explain_test;
+CREATE TABLE auto_explain_test.t1(a int);
+INSERT INTO auto_explain_test.t1 VALUES(generate_series(0, 1000));
+CREATE TABLE auto_explain_test.t2(b int);
+INSERT INTO auto_explain_test.t2 VALUES(generate_series(0, 1000));
+
+SET enable_nestloop = ON;
 SET CLIENT_MIN_MESSAGES = LOG;
 LOAD 'auto_explain';
 SET auto_explain.log_analyze = TRUE;
@@ -9,11 +16,18 @@ SET auto_explain.log_timing = FALSE;
 SET auto_explain.log_verbose = FALSE;

 SELECT relname FROM pg_class WHERE relname='pg_class';
-SELECT count(*)>0 FROM pg_class, pg_index WHERE oid = indrelid AND indisunique;
+SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;

 SET auto_explain.log_min_duration = 1;
 SET auto_explain.log_triggers = FALSE;
 SET auto_explain.log_verbose = TRUE;

+-- this select should not dump execution plan
 SELECT relname FROM pg_class WHERE relname='pg_class';
-SELECT count(*)>0 FROM pg_class, pg_index WHERE oid = indrelid AND indisunique;
+-- this select should also dump plan, since it takes too much time to run
+SELECT count(*) FROM auto_explain_test.t1, auto_explain_test.t2;
+
+-- clean jobs
+DROP TABLE auto_explain_test.t1;
+DROP TABLE auto_explain_test.t2;
+DROP SCHEMA auto_explain_test;