Fix create ao table set parallel_workers doesn't take effect (#168)

"create table ao(c int) using ao_row with(parallel_workers=3)" doesn't take effect because only storage options are taken care of in reloptions_gp.c:transformAOStdRdOptions. This PR fixes this by add code processing parallel_workers.
apache · Sep 1, 2023 · 55aacda · 55aacda
1 parent 9e12bd5
commit 55aacda
Show file tree

Hide file tree

Showing 3 changed files with 161 additions and 0 deletions.
diff --git a/src/backend/access/common/reloptions_gp.c b/src/backend/access/common/reloptions_gp.c
@@ -633,6 +633,15 @@ transformAOStdRdOptions(StdRdOptions *opts, Datum withOpts)
 		}
 	}
 
+	if (opts->parallel_workers != -1)
+	{
+		d = CStringGetTextDatum(psprintf("%s=%d",
+		                                 "parallel_workers",
+		                                 opts->parallel_workers));
+		astate = accumArrayResult(astate, d, false, TEXTOID,
+		                          CurrentMemoryContext);
+	}
+
 	if ((opts->blocksize != AO_DEFAULT_BLOCKSIZE) && !foundBlksz)
 	{
 		d = CStringGetTextDatum(psprintf("%s=%d",

diff --git a/src/test/regress/expected/gp_parallel.out b/src/test/regress/expected/gp_parallel.out
@@ -37,6 +37,139 @@ create schema test_parallel;
 set search_path to test_parallel;
 -- set this to default in case regress change it by gpstop.
 set gp_appendonly_insert_files = 4;
+-- CBDB(#131): test parallel_workers during create AO/AOCO table take effect
+begin;
+set local enable_parallel = on;
+create table test_131_ao1(x int, y int) using ao_row with(parallel_workers=2);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create table test_131_ao2(x int, y int) using ao_row with(parallel_workers=2);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create table test_131_ao3(x int, y int) using ao_row with(parallel_workers=0);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create table test_131_ao4(x int, y int) using ao_row with(parallel_workers=0);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create table test_131_aoco1(x int, y int) using ao_column with(parallel_workers=2);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create table test_131_aoco2(x int, y int) using ao_column with(parallel_workers=2);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create table test_131_aoco3(x int, y int) using ao_column with(parallel_workers=0);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create table test_131_aoco4(x int, y int) using ao_column with(parallel_workers=0);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+select relname, reloptions from pg_catalog.pg_class where relname like 'test_131_ao%';
+    relname     |      reloptions      
+----------------+----------------------
+ test_131_ao1   | {parallel_workers=2}
+ test_131_ao2   | {parallel_workers=2}
+ test_131_ao3   | {parallel_workers=0}
+ test_131_ao4   | {parallel_workers=0}
+ test_131_aoco1 | {parallel_workers=2}
+ test_131_aoco2 | {parallel_workers=2}
+ test_131_aoco3 | {parallel_workers=0}
+ test_131_aoco4 | {parallel_workers=0}
+(8 rows)
+
+explain(locus, costs off) select count(*) from test_131_ao1, test_131_ao2 where test_131_ao1.x = test_131_ao2.x;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Finalize Aggregate
+   Locus: Entry
+   ->  Gather Motion 6:1  (slice1; segments: 6)
+         Locus: Entry
+         ->  Partial Aggregate
+               Locus: HashedWorkers
+               Parallel Workers: 2
+               ->  Parallel Hash Join
+                     Locus: HashedWorkers
+                     Parallel Workers: 2
+                     Hash Cond: (test_131_ao1.x = test_131_ao2.x)
+                     ->  Parallel Seq Scan on test_131_ao1
+                           Locus: HashedWorkers
+                           Parallel Workers: 2
+                     ->  Parallel Hash
+                           Locus: Hashed
+                           ->  Parallel Seq Scan on test_131_ao2
+                                 Locus: HashedWorkers
+                                 Parallel Workers: 2
+ Optimizer: Postgres query optimizer
+(20 rows)
+
+explain(locus, costs off) select count(*) from test_131_ao3, test_131_ao4 where test_131_ao3.x = test_131_ao4.x;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Finalize Aggregate
+   Locus: Entry
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Locus: Entry
+         ->  Partial Aggregate
+               Locus: Hashed
+               ->  Hash Join
+                     Locus: Hashed
+                     Hash Cond: (test_131_ao3.x = test_131_ao4.x)
+                     ->  Seq Scan on test_131_ao3
+                           Locus: Hashed
+                     ->  Hash
+                           Locus: Hashed
+                           ->  Seq Scan on test_131_ao4
+                                 Locus: Hashed
+ Optimizer: Postgres query optimizer
+(16 rows)
+
+explain(locus, costs off) select count(*) from test_131_aoco1, test_131_aoco2 where test_131_aoco1.x = test_131_aoco2.x;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Finalize Aggregate
+   Locus: Entry
+   ->  Gather Motion 6:1  (slice1; segments: 6)
+         Locus: Entry
+         ->  Partial Aggregate
+               Locus: HashedWorkers
+               Parallel Workers: 2
+               ->  Parallel Hash Join
+                     Locus: HashedWorkers
+                     Parallel Workers: 2
+                     Hash Cond: (test_131_aoco1.x = test_131_aoco2.x)
+                     ->  Parallel Seq Scan on test_131_aoco1
+                           Locus: HashedWorkers
+                           Parallel Workers: 2
+                     ->  Parallel Hash
+                           Locus: Hashed
+                           ->  Parallel Seq Scan on test_131_aoco2
+                                 Locus: HashedWorkers
+                                 Parallel Workers: 2
+ Optimizer: Postgres query optimizer
+(20 rows)
+
+explain(locus, costs off) select count(*) from test_131_aoco3, test_131_aoco4 where test_131_aoco3.x = test_131_aoco4.x;
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Finalize Aggregate
+   Locus: Entry
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Locus: Entry
+         ->  Partial Aggregate
+               Locus: Hashed
+               ->  Hash Join
+                     Locus: Hashed
+                     Hash Cond: (test_131_aoco3.x = test_131_aoco4.x)
+                     ->  Seq Scan on test_131_aoco3
+                           Locus: Hashed
+                     ->  Hash
+                           Locus: Hashed
+                           ->  Seq Scan on test_131_aoco4
+                                 Locus: Hashed
+ Optimizer: Postgres query optimizer
+(16 rows)
+
+abort;
 create table ao1(x int, y int) with(appendonly=true);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Greenplum Database data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.

diff --git a/src/test/regress/sql/gp_parallel.sql b/src/test/regress/sql/gp_parallel.sql
@@ -39,6 +39,25 @@ set search_path to test_parallel;
 -- set this to default in case regress change it by gpstop.
 set gp_appendonly_insert_files = 4;
 
+-- CBDB(#131): test parallel_workers during create AO/AOCO table take effect
+begin;
+set local enable_parallel = on;
+create table test_131_ao1(x int, y int) using ao_row with(parallel_workers=2);
+create table test_131_ao2(x int, y int) using ao_row with(parallel_workers=2);
+create table test_131_ao3(x int, y int) using ao_row with(parallel_workers=0);
+create table test_131_ao4(x int, y int) using ao_row with(parallel_workers=0);
+create table test_131_aoco1(x int, y int) using ao_column with(parallel_workers=2);
+create table test_131_aoco2(x int, y int) using ao_column with(parallel_workers=2);
+create table test_131_aoco3(x int, y int) using ao_column with(parallel_workers=0);
+create table test_131_aoco4(x int, y int) using ao_column with(parallel_workers=0);
+
+select relname, reloptions from pg_catalog.pg_class where relname like 'test_131_ao%';
+explain(locus, costs off) select count(*) from test_131_ao1, test_131_ao2 where test_131_ao1.x = test_131_ao2.x;
+explain(locus, costs off) select count(*) from test_131_ao3, test_131_ao4 where test_131_ao3.x = test_131_ao4.x;
+explain(locus, costs off) select count(*) from test_131_aoco1, test_131_aoco2 where test_131_aoco1.x = test_131_aoco2.x;
+explain(locus, costs off) select count(*) from test_131_aoco3, test_131_aoco4 where test_131_aoco3.x = test_131_aoco4.x;
+abort;
+
 create table ao1(x int, y int) with(appendonly=true);
 create table ao2(x int, y int) with(appendonly=true);
 create table aocs1(x int, y int) with(appendonly=true, orientation=column);