Skip to content

Commit

Permalink
Fix create ao table set parallel_workers doesn't take effect (#168)
Browse files Browse the repository at this point in the history
"create table ao(c int) using ao_row with(parallel_workers=3)" doesn't
take effect because only storage options are taken care of in
reloptions_gp.c:transformAOStdRdOptions. This PR fixes this by add code
processing parallel_workers.
  • Loading branch information
Ray-Eldath authored Sep 1, 2023
1 parent 9e12bd5 commit 55aacda
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 0 deletions.
9 changes: 9 additions & 0 deletions src/backend/access/common/reloptions_gp.c
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,15 @@ transformAOStdRdOptions(StdRdOptions *opts, Datum withOpts)
}
}

if (opts->parallel_workers != -1)
{
d = CStringGetTextDatum(psprintf("%s=%d",
"parallel_workers",
opts->parallel_workers));
astate = accumArrayResult(astate, d, false, TEXTOID,
CurrentMemoryContext);
}

if ((opts->blocksize != AO_DEFAULT_BLOCKSIZE) && !foundBlksz)
{
d = CStringGetTextDatum(psprintf("%s=%d",
Expand Down
133 changes: 133 additions & 0 deletions src/test/regress/expected/gp_parallel.out
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,139 @@ create schema test_parallel;
set search_path to test_parallel;
-- set this to default in case regress change it by gpstop.
set gp_appendonly_insert_files = 4;
-- CBDB(#131): test parallel_workers during create AO/AOCO table take effect
begin;
set local enable_parallel = on;
create table test_131_ao1(x int, y int) using ao_row with(parallel_workers=2);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table test_131_ao2(x int, y int) using ao_row with(parallel_workers=2);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table test_131_ao3(x int, y int) using ao_row with(parallel_workers=0);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table test_131_ao4(x int, y int) using ao_row with(parallel_workers=0);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table test_131_aoco1(x int, y int) using ao_column with(parallel_workers=2);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table test_131_aoco2(x int, y int) using ao_column with(parallel_workers=2);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table test_131_aoco3(x int, y int) using ao_column with(parallel_workers=0);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table test_131_aoco4(x int, y int) using ao_column with(parallel_workers=0);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select relname, reloptions from pg_catalog.pg_class where relname like 'test_131_ao%';
relname | reloptions
----------------+----------------------
test_131_ao1 | {parallel_workers=2}
test_131_ao2 | {parallel_workers=2}
test_131_ao3 | {parallel_workers=0}
test_131_ao4 | {parallel_workers=0}
test_131_aoco1 | {parallel_workers=2}
test_131_aoco2 | {parallel_workers=2}
test_131_aoco3 | {parallel_workers=0}
test_131_aoco4 | {parallel_workers=0}
(8 rows)

explain(locus, costs off) select count(*) from test_131_ao1, test_131_ao2 where test_131_ao1.x = test_131_ao2.x;
QUERY PLAN
------------------------------------------------------------------
Finalize Aggregate
Locus: Entry
-> Gather Motion 6:1 (slice1; segments: 6)
Locus: Entry
-> Partial Aggregate
Locus: HashedWorkers
Parallel Workers: 2
-> Parallel Hash Join
Locus: HashedWorkers
Parallel Workers: 2
Hash Cond: (test_131_ao1.x = test_131_ao2.x)
-> Parallel Seq Scan on test_131_ao1
Locus: HashedWorkers
Parallel Workers: 2
-> Parallel Hash
Locus: Hashed
-> Parallel Seq Scan on test_131_ao2
Locus: HashedWorkers
Parallel Workers: 2
Optimizer: Postgres query optimizer
(20 rows)

explain(locus, costs off) select count(*) from test_131_ao3, test_131_ao4 where test_131_ao3.x = test_131_ao4.x;
QUERY PLAN
------------------------------------------------------------------
Finalize Aggregate
Locus: Entry
-> Gather Motion 3:1 (slice1; segments: 3)
Locus: Entry
-> Partial Aggregate
Locus: Hashed
-> Hash Join
Locus: Hashed
Hash Cond: (test_131_ao3.x = test_131_ao4.x)
-> Seq Scan on test_131_ao3
Locus: Hashed
-> Hash
Locus: Hashed
-> Seq Scan on test_131_ao4
Locus: Hashed
Optimizer: Postgres query optimizer
(16 rows)

explain(locus, costs off) select count(*) from test_131_aoco1, test_131_aoco2 where test_131_aoco1.x = test_131_aoco2.x;
QUERY PLAN
----------------------------------------------------------------------
Finalize Aggregate
Locus: Entry
-> Gather Motion 6:1 (slice1; segments: 6)
Locus: Entry
-> Partial Aggregate
Locus: HashedWorkers
Parallel Workers: 2
-> Parallel Hash Join
Locus: HashedWorkers
Parallel Workers: 2
Hash Cond: (test_131_aoco1.x = test_131_aoco2.x)
-> Parallel Seq Scan on test_131_aoco1
Locus: HashedWorkers
Parallel Workers: 2
-> Parallel Hash
Locus: Hashed
-> Parallel Seq Scan on test_131_aoco2
Locus: HashedWorkers
Parallel Workers: 2
Optimizer: Postgres query optimizer
(20 rows)

explain(locus, costs off) select count(*) from test_131_aoco3, test_131_aoco4 where test_131_aoco3.x = test_131_aoco4.x;
QUERY PLAN
----------------------------------------------------------------------
Finalize Aggregate
Locus: Entry
-> Gather Motion 3:1 (slice1; segments: 3)
Locus: Entry
-> Partial Aggregate
Locus: Hashed
-> Hash Join
Locus: Hashed
Hash Cond: (test_131_aoco3.x = test_131_aoco4.x)
-> Seq Scan on test_131_aoco3
Locus: Hashed
-> Hash
Locus: Hashed
-> Seq Scan on test_131_aoco4
Locus: Hashed
Optimizer: Postgres query optimizer
(16 rows)

abort;
create table ao1(x int, y int) with(appendonly=true);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
Expand Down
19 changes: 19 additions & 0 deletions src/test/regress/sql/gp_parallel.sql
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,25 @@ set search_path to test_parallel;
-- set this to default in case regress change it by gpstop.
set gp_appendonly_insert_files = 4;

-- CBDB(#131): test parallel_workers during create AO/AOCO table take effect
begin;
set local enable_parallel = on;
create table test_131_ao1(x int, y int) using ao_row with(parallel_workers=2);
create table test_131_ao2(x int, y int) using ao_row with(parallel_workers=2);
create table test_131_ao3(x int, y int) using ao_row with(parallel_workers=0);
create table test_131_ao4(x int, y int) using ao_row with(parallel_workers=0);
create table test_131_aoco1(x int, y int) using ao_column with(parallel_workers=2);
create table test_131_aoco2(x int, y int) using ao_column with(parallel_workers=2);
create table test_131_aoco3(x int, y int) using ao_column with(parallel_workers=0);
create table test_131_aoco4(x int, y int) using ao_column with(parallel_workers=0);

select relname, reloptions from pg_catalog.pg_class where relname like 'test_131_ao%';
explain(locus, costs off) select count(*) from test_131_ao1, test_131_ao2 where test_131_ao1.x = test_131_ao2.x;
explain(locus, costs off) select count(*) from test_131_ao3, test_131_ao4 where test_131_ao3.x = test_131_ao4.x;
explain(locus, costs off) select count(*) from test_131_aoco1, test_131_aoco2 where test_131_aoco1.x = test_131_aoco2.x;
explain(locus, costs off) select count(*) from test_131_aoco3, test_131_aoco4 where test_131_aoco3.x = test_131_aoco4.x;
abort;

create table ao1(x int, y int) with(appendonly=true);
create table ao2(x int, y int) with(appendonly=true);
create table aocs1(x int, y int) with(appendonly=true, orientation=column);
Expand Down

0 comments on commit 55aacda

Please sign in to comment.