Sunday 2 October 2016

All about Greenplum Partitioning - The system catalog tables at your help

I was trying to improve and create ways to identify the partitioning mechanism of the Greenplum tables.

Please see the queries below that will help you find all the partitioning details about a Greenplum table

In the next post, we will see how to leverage this information and automate some of your Data Engineering Activities using Python. Finally, we will create a pipeline to move the data to HDFS using Apache Sqoop.


DROP TABLE db_retail.sales ;
CREATE TABLE db_retail.sales (id int, date date, amt decimal(10,2))
DISTRIBUTED BY (id)
PARTITION BY RANGE (date)
( START (date '2008-01-01') INCLUSIVE
   END (date '2009-01-01') EXCLUSIVE every (interval '1 mon')
  ,Start (date '2009-01-01') inclusive
  end (date '2010-01-01') exclusive
   EVERY (INTERVAL '1 day') );

drop table db_retail.sales_amt;
CREATE TABLE db_retail.sales_amt (id int, date date, amt decimal(10,2))
DISTRIBUTED BY (id)
PARTITION BY RANGE (amt)
( START (1) INCLUSIVE
   END (10000) EXCLUSIVE every (10)
 );

--- check all the partitions for the table
select * from pg_partitions where schemaname = 'db_retail' and tablename = 'sales';

select * from pg_partitions where schemaname = 'db_retail' and tablename = 'sales_amt';


--- query to analyze each inherited partition table

select partitiontablename from pg_partitions where schemaname = 'db_retail' and tablename = 'sales';

select * from pg_inherits
join pg_class c on c.oid = inhrelid
where inhparent = 'sales'::regclass;

---fetch interval wise max and min ranges for each partition

select min(partitionrangestart),max(partitionrangeend)
, current_date 
, partitioneveryclause
, partitiontype 
from pg_partitions where schemaname = 'db_retail' and tablename = 'sales'
group by 3,4,5;

select * from information_schema.tables limit 10;

--- recreate the table with the desired partition and column
select column_name, data_type, character_maximum_length
from INFORMATION_SCHEMA.COLUMNS where table_name = 'sales' and table_schema = 'db_retail';

--- find the partition column list
select columnname, position_in_partition_key from pg_partition_columns where schemaname = 'db_retail' and tablename = 'sales';