您的位置:首页 > 数据库

PostgreSQL从继承到分区(二)

2014-07-04 16:16 411 查看

二、Partitioning_table

2.1 关于分区表

借助表的继承特性PostgreSQL实现了分区表功能,虽然相比Oracle、MySQL的分区表来说其实现过程比较麻烦,但是这种方式同样能达到分区的效果,而且对大表的查询优化效果很明显。PostgreSQL的分区表概念与其它数据库的分区表概念相同,都是将逻辑上的一个大表分割成物理上的多个子块。分区带来的不仅仅是访问性能上的提升,而且还可以使管理与维护变得更加方便、简单。
表分区的优点:可以显著提升表的查询性能,尤其是当频繁的查询访问发生在一个单一的分区上时,性能的提升效果将会非常显著;
分区减小了每个块上的索引大小,使之前由于整个数据集上的索引过大而无法完全放入内存中导致读写操作产生大量的磁盘访问的问题得到改善;
当查询或更新大量发生在单一分区上时,可以通过发挥分区上顺序扫描的优点来提升性能,进而代替使用索引随机访问读取整个表;
批量加载或删除可通过添加或删除分区来完成。使用ALTER TABLE NO INHERIT和DROP TABLE远远要比批量操作快得多,此类命令避免了由于批量DELETE而引发的VACUUM额外开销;
可以将很少用到的数据移到便宜的、速度慢的存储介质上去。

通常建议当表的大小超过了服务器物理内存的时候创建分区表,但是究竟哪个表在分区策略中受益最终取决于应用程序。
目前,PostgreSQL没有单独的分区表功能,而是通过表继承的方式来实现该功能,每个分区被创建为一个唯一父表的子表,父表中通常不存放数据,它的存在只是为了代表整个数据集。
PostgreSQL目前支持的分区类型:Range Partitioning(范围分区)、List Partitioning(列表分区)范围分区:表被一个或多个键字字段分区成“范围”,范围中不重叠的数值分布到不同的分区里。列表分区:明确地列出每个分区里应该出现哪些键字值。组合分区:将现有分区类型组合使用(先范围再列表)。

2.2 分区表实例

2.2.1 范围分区实例

创建主表:
part=# create table people(id int not null,name varchar(20) not null,logdate date not null);
CREATE TABLE


创建分区表:
part=# create table people_y2013m12( check( logdate >= DATE '2013-12-01' AND logdate < DATE '2014-01-01' ) ) inherits(people);
CREATE TABLE
part=# create table people_y2014m01( check( logdate >= DATE '2014-01-01' AND logdate < DATE '2014-02-01' ) ) inherits(people);
CREATE TABLE
part=# create table people_y2014m02( check( logdate >= DATE '2014-02-01' AND logdate < DATE '2014-03-01' ) ) inherits(people);
CREATE TABLE


为分区表创建索引:
part=# create index people_y2013m12_logdate on people_y2013m12(logdate);
CREATE INDEX
part=# create index people_y2014m01_logdate on people_y2014m01(logdate);
CREATE INDEX
part=# create index people_y2014m02_logdate on people_y2014m02(logdate);
CREATE INDEX


建立触发器函数:
part=# create or replace function people_insert_trigger()
part-# returns trigger as $$
part$# begin
part$#   if (new.logdate >= DATE '2013-12-01' and
part$#       new.logdate < DATE '2014-01-01') then
part$#      insert into people_y2013m12 values (new.*);
part$#   elsif (new.logdate >= DATE '2014-01-01' and
part$#          new.logdate < DATE '2014-02-01') then
part$#         insert into people_y2014m01 values (new.*);
part$#   elsif (new.logdate >= DATE '2014-02-01' and
part$#          new.logdate < DATE '2014-03-01') then
part$#         insert into people_y2014m02 values (new.*);
part$#   else
part$#         RAISE EXCEPTION 'Date out of range.  Fix the people_insert_trigger() function!';
part$#   end if;
part$#   return null;
part$# end;
part$# $$
part-# language plpgsql;
CREATE FUNCTION


创建触发器:
part=# CREATE TRIGGER insert_people_trigger
part-#   BEFORE INSERT ON people
part-#   FOR EACH ROW EXECUTE PROCEDURE people_insert_trigger();
CREATE TRIGGER


查看主表信息:
part=# \d+ people
Table "public.people"
Column  |         Type          | Modifiers | Storage  | Stats target | Description
---------+-----------------------+-----------+----------+--------------+-------------
id      | integer               | not null  | plain    |              |
name    | character varying(20) | not null  | extended |              |
logdate | date                  | not null  | plain    |              |
Triggers:
insert_people_trigger BEFORE INSERT ON people FOR EACH ROW EXECUTE PROCEDURE people_insert_trigger()
Child tables: people_y2013m12,
people_y2014m01,
people_y2014m02
Has OIDs: no


测试:
part=# insert into people values (1,'lian1','2013-12-10');
INSERT 0 0
part=# insert into people values (2,'lian2','2014-01-10');
INSERT 0 0
part=# insert into people values (3,'lian3','2014-02-10');
INSERT 0 0
part=# insert into people values (4,'lian4','2014-03-10');
ERROR:  Date out of range.  Fix the people_insert_trigger() function!
STATEMENT:  insert into people values (4,'lian4','2014-03-10');
ERROR:  Date out of range.  Fix the people_insert_trigger() function!

查看数据及数据在分区上的具体分布:
part=# select * from people;
id | name  |  logdate
----+-------+------------
1 | lian1 | 2013-12-10
2 | lian2 | 2014-01-10
3 | lian3 | 2014-02-10
(3 rows)
part=# SELECT p.relname,c.*  FROM people c, pg_class p WHERE c.tableoid = p.oid;
relname     | id | name  |  logdate
-----------------+----+-------+------------
people_y2013m12 |  1 | lian1 | 2013-12-10
people_y2014m01 |  2 | lian2 | 2014-01-10
people_y2014m02 |  3 | lian3 | 2014-02-10
(3 rows)

2.2.2 列表分区实例

创建主表:
list=# create table customer(id int,name varchar(20),city varchar(30));
CREATE TABLE


创建分区表:
list=# create table customer_sd( check(city in ('jinan','qingdao','weifang','zibo')) ) inherits(customer);
CREATE TABLE
list=# create table customer_sx( check(city in ('xian','xianyang','weinan','baoji')) ) inherits(customer);
CREATE TABLE

查看主表结构信息:
list=# \d+ customer
Table "public.customer"
Column |         Type          | Modifiers | Storage  | Stats target | Description
--------+-----------------------+-----------+----------+--------------+-------------
id     | integer               |           | plain    |              |
name   | character varying(20) |           | extended |              |
city   | character varying(30) |           | extended |              |
Child tables: customer_sd,
customer_sx
Has OIDs: no


为分区表创建索引:
list=# create index customer_sd_city on customer_sd(city);
CREATE INDEX
list=# create index customer_sx_city on customer_sx(city);
CREATE INDEX


创建触发器函数:
list=# create or replace function customer_insert_trigger()
list-# returns trigger as $$
list$# begin
list$#   if (new.city in ('jinan','qingdao','weifang','zibo')) then
list$#      insert into customer_sd values (new.*);
list$#   elsif (new.city in ('xian','xianyang','weinan','baoji')) then
list$#      insert into customer_sx values (new.*);
list$#   else
list$#      RAISE EXCEPTION 'City out of list.  Fix the customer_insert_trigger() function!';
list$#   end if;
list$#   return null;
list$# end;
list$# $$
list-# language plpgsql;
CREATE FUNCTION

创建触发器:
list=# CREATE TRIGGER insert_customer_trigger
list-#   BEFORE INSERT ON customer
list-#   FOR EACH ROW EXECUTE PROCEDURE customer_insert_trigger();
CREATE TRIGGER


查看主表结构:
list=# \d+ customer
Table "public.customer"
Column |         Type          | Modifiers | Storage  | Stats target | Description
--------+-----------------------+-----------+----------+--------------+-------------
id     | integer               |           | plain    |              |
name   | character varying(20) |           | extended |              |
city   | character varying(30) |           | extended |              |
Triggers:
insert_customer_trigger BEFORE INSERT ON customer FOR EACH ROW EXECUTE PROCEDURE customer_insert_trigger()
Child tables: customer_sd,
customer_sx
Has OIDs: no


插入测试数据:
list=# insert into customer values (1,'lian1','xian');
INSERT 0 0
list=# insert into customer values (2,'lian2','qingdao');
INSERT 0 0
list=# insert into customer values (3,'lian3','jinan');
INSERT 0 0
list=# insert into customer values (4,'lian4','weinan');
INSERT 0 0
list=# insert into customer values (5,'lian5','xianyang');
INSERT 0 0
list=# insert into customer values (6,'lian6','beijing');
ERROR:  City out of list.  Fix the customer_insert_trigger() function!

查看数据在分区上的分布:
list=# SELECT p.relname,c.*  FROM customer c, pg_class p WHERE c.tableoid = p.oid;
relname   | id | name  |   city
-------------+----+-------+----------
customer_sd |  2 | lian2 | qingdao
customer_sd |  3 | lian3 | jinan
customer_sx |  1 | lian1 | xian
customer_sx |  4 | lian4 | weinan
customer_sx |  5 | lian5 | xianyang
(5 rows)

2.3 分区管理

2.3.1 删除分区

直接删除:
part=# drop table people_y2013m12;
DROP TABLE
part=# select * from people;
id | name  |  logdate
----+-------+------------
2 | lian2 | 2014-01-10
3 | lian3 | 2014-02-10
(2 rows)

part=# \d+ people
Table "public.people"
Column  |         Type          | Modifiers | Storage  | Stats target | Description
---------+-----------------------+-----------+----------+--------------+-------------
id      | integer               | not null  | plain    |              |
name    | character varying(20) | not null  | extended |              |
logdate | date                  | not null  | plain    |              |
Triggers:
insert_people_trigger BEFORE INSERT ON people FOR EACH ROW EXECUTE PROCEDURE people_insert_trigger()
Child tables: people_y2014m01,
people_y2014m02
Has OIDs: no

或不删除分区而是仅取消继承关系:
part=# alter table people_y2014m02 no inherit people;
ALTER TABLE
{将成为一个普通表存在,数据依然存在}
part=# select * from people_y2014m02;
id | name  |  logdate
----+-------+------------
3 | lian3 | 2014-02-10
(1 row)
part=# \d+ people
Table "public.people"
Column  |         Type          | Modifiers | Storage  | Stats target | Description
---------+-----------------------+-----------+----------+--------------+-------------
id      | integer               | not null  | plain    |              |
name    | character varying(20) | not null  | extended |              |
logdate | date                  | not null  | plain    |              |
Triggers:
insert_people_trigger BEFORE INSERT ON people FOR EACH ROW EXECUTE PROCEDURE people_insert_trigger()
Child tables: people_y2014m01
Has OIDs: no


取消继承关系的分区同样可以恢复继承关系(或清空数据后再重新继承):
part=# alter table people_y2014m02 inherit people;
ALTER TABLE
part=# \d+ people Table "public.people" Column | Type | Modifiers | Storage | Stats target | Description ---------+-----------------------+-----------+----------+--------------+------------- id | integer | not null | plain | | name | character varying(20) | not null | extended | | logdate | date | not null | plain | | Triggers: insert_people_trigger BEFORE INSERT ON people FOR EACH ROW EXECUTE PROCEDURE people_insert_trigger() Child tables: people_y2014m01, people_y2014m02 Has OIDs: no
part=# select * from people;
id | name | logdate
----+-------+------------
2 | lian2 | 2014-01-10
3 | lian3 | 2014-02-10
(2 rows)

2.3.2 添加分区

为people重新添加分区people_y2013m12:
part=# create table people_y2013m12( check( logdate >= DATE '2013-12-01' AND logdate < DATE '2014-01-01' ) ) inherits(people);
CREATE TABLE
part=# create index people_y2013m12_logdate on people_y2013m12(logdate);
CREATE INDEX
part=# \d+ people Table "public.people" Column | Type | Modifiers | Storage | Stats target | Description ---------+-----------------------+-----------+----------+--------------+------------- id | integer | not null | plain | | name | character varying(20) | not null | extended | | logdate | date | not null | plain | | Triggers: insert_people_trigger BEFORE INSERT ON people FOR EACH ROW EXECUTE PROCEDURE people_insert_trigger() Child tables: people_y2013m12, people_y2014m01, people_y2014m02 Has OIDs: no
{people_y2013m12已加入进继承关系中}

注意:若添加一个在触发器函数中未定义过范围的新分区则需要同时修改触发器函数。建议:一般创建触发器函数时一个好习惯是将条件写得更未来一些(如多写一年或几年的时间),这样可以避免以后在需要创建新分区的时候需要重新创建触发器函数。
另一种添加新分区的方式{创建一个与主表类似的表 → 为新表增加约束 → 建立继承关系}语法如下:
create table t_name (like parent_name including defaults including constraints);
alter table t_name add constraint constr_name check ( logdate >= DATE '……' AND logdate < DATE '……' );
alter table t_name inherit parent_name;

2.3.3 分区表查询优化通过调整数据库系统参数constraint_exclusion,控制是否检查表约束达到优化查询的效果。
constraint_exclusion = on/off/partitionon:检查所有表中的约束来优化查询off:不检查表中的约束partition:只检查继承子表和UNION ALL子句中涉及的约束(默认)
partition(默认):
part=# show constraint_exclusion ;
constraint_exclusion
----------------------
partition
(1 row)
part=# explain select * from people where logdate >= '2014-01-01';
QUERY PLAN
----------------------------------------------------------------------------------------------
Append  (cost=0.00..39.76 rows=567 width=66)
->  Seq Scan on people  (cost=0.00..0.00 rows=1 width=66)
Filter: (logdate >= '2014-01-01'::date)
->  Bitmap Heap Scan on people_y2014m01  (cost=6.34..19.88 rows=283 width=66)
Recheck Cond: (logdate >= '2014-01-01'::date)
->  Bitmap Index Scan on people_y2014m01_logdate  (cost=0.00..6.27 rows=283 width=0)
Index Cond: (logdate >= '2014-01-01'::date)
->  Bitmap Heap Scan on people_y2014m02  (cost=6.34..19.88 rows=283 width=66)
Recheck Cond: (logdate >= '2014-01-01'::date)
->  Bitmap Index Scan on people_y2014m02_logdate  (cost=0.00..6.27 rows=283 width=0)
Index Cond: (logdate >= '2014-01-01'::date)
(11 rows)

{只扫描符合条件的分区}
on:
part=# set constraint_exclusion = on;
SET
part=# explain select * from people where logdate >= '2014-01-01';
QUERY PLAN
----------------------------------------------------------------------------------------------
Append  (cost=0.00..39.76 rows=567 width=66)
->  Seq Scan on people  (cost=0.00..0.00 rows=1 width=66)
Filter: (logdate >= '2014-01-01'::date)
->  Bitmap Heap Scan on people_y2014m01  (cost=6.34..19.88 rows=283 width=66)
Recheck Cond: (logdate >= '2014-01-01'::date)
->  Bitmap Index Scan on people_y2014m01_logdate  (cost=0.00..6.27 rows=283 width=0)
Index Cond: (logdate >= '2014-01-01'::date)
->  Bitmap Heap Scan on people_y2014m02  (cost=6.34..19.88 rows=283 width=66)
Recheck Cond: (logdate >= '2014-01-01'::date)
->  Bitmap Index Scan on people_y2014m02_logdate  (cost=0.00..6.27 rows=283 width=0)
Index Cond: (logdate >= '2014-01-01'::date)
(11 rows)

{只扫描符合条件的分区}
off:
part=# set constraint_exclusion = off;
SET
part=# explain select * from people where logdate >= '2014-01-01';
QUERY PLAN
----------------------------------------------------------------------------------------------
Append  (cost=0.00..59.64 rows=850 width=66)
->  Seq Scan on people  (cost=0.00..0.00 rows=1 width=66)
Filter: (logdate >= '2014-01-01'::date)
->  Bitmap Heap Scan on people_y2014m01  (cost=6.34..19.88 rows=283 width=66)
Recheck Cond: (logdate >= '2014-01-01'::date)
->  Bitmap Index Scan on people_y2014m01_logdate  (cost=0.00..6.27 rows=283 width=0)
Index Cond: (logdate >= '2014-01-01'::date)
->  Bitmap Heap Scan on people_y2014m02  (cost=6.34..19.88 rows=283 width=66)
Recheck Cond: (logdate >= '2014-01-01'::date)
->  Bitmap Index Scan on people_y2014m02_logdate  (cost=0.00..6.27 rows=283 width=0)
Index Cond: (logdate >= '2014-01-01'::date)
->  Bitmap Heap Scan on people_y2013m12  (cost=6.34..19.88 rows=283 width=66)
Recheck Cond: (logdate >= '2014-01-01'::date)
->  Bitmap Index Scan on people_y2013m12_logdate  (cost=0.00..6.27 rows=283 width=0)
Index Cond: (logdate >= '2014-01-01'::date)
(15 rows)

{对所有分区进行扫描}

2.4 通过rule重定向实现分区规则

实现分区规则不一定要用触发器,也可通过rule的方式来实现。
创建主表:
rule=# create table people(id int not null,name varchar(20) not null,logdate date not null);
CREATE TABLE


创建分区表:
rule=# create table people_y2013m12( check( logdate >= DATE '2013-12-01' AND logdate < DATE '2014-01-01' ) ) inherits(people);
CREATE TABLE
rule=# create table people_y2014m01( check( logdate >= DATE '2014-01-01' AND logdate < DATE '2014-02-01' ) ) inherits(people);
CREATE TABLE
rule=# create table people_y2014m02( check( logdate >= DATE '2014-02-01' AND logdate < DATE '2014-03-01' ) ) inherits(people);
CREATE TABLE


在主表上创建规则:
rule=# create rule people_insert_y2013m12 as
rule-# on insert to people where
rule-#   (logdate >= DATE '2013-12-01' AND logdate < DATE '2014-01-01')
rule-# do instead
rule-#   insert into people_y2013m12 values (new.*);
CREATE RULE
rule=# create rule people_insert_y2014m01 as
rule-#  on insert to people where
rule-#    (logdate >= DATE '2014-01-01' AND logdate < DATE '2014-02-01')
rule-#  do instead
rule-#    insert into people_y2014m01 values (new.*);
CREATE RULE
rule=# create rule people_insert_y2014m02 as
rule-#  on insert to people where
rule-#    (logdate >= DATE '2014-02-01' AND logdate < DATE '2014-03-01')
rule-#  do instead
rule-#    insert into people_y2014m02 values (new.*);
CREATE RULE


查看主表信息:
rule=# \d+ people
Table "public.people"
Column  |         Type          | Modifiers | Storage  | Stats target | Description
---------+-----------------------+-----------+----------+--------------+-------------
id      | integer               | not null  | plain    |              |
name    | character varying(20) | not null  | extended |              |
logdate | date                  | not null  | plain    |              |
Rules:
people_insert_y2013m12 AS
ON INSERT TO people
WHERE new.logdate >= '2013-12-01'::date AND new.logdate < '2014-01-01'::date DO INSTEAD  INSERT INTO people_y2013m12 (id, name, logdate)
VALUES (new.id, new.name, new.logdate)
people_insert_y2014m01 AS
ON INSERT TO people
WHERE new.logdate >= '2014-01-01'::date AND new.logdate < '2014-02-01'::date DO INSTEAD  INSERT INTO people_y2014m01 (id, name, logdate)
VALUES (new.id, new.name, new.logdate)
people_insert_y2014m02 AS
ON INSERT TO people
WHERE new.logdate >= '2014-02-01'::date AND new.logdate < '2014-03-01'::date DO INSTEAD  INSERT INTO people_y2014m02 (id, name, logdate)
VALUES (new.id, new.name, new.logdate)
Child tables: people_y2013m12,
people_y2014m01,
people_y2014m02
Has OIDs: no

插入测试数据:
rule=# insert into people values (1,'lian1','2013-12-05');
INSERT 0 0
rule=# insert into people values (2,'lian2','2014-01-05');
INSERT 0 0
rule=# insert into people values (3,'lian3','2014-02-05');
INSERT 0 0
rule=# insert into people values (4,'lian4','2014-03-05');
INSERT 0 1
rule=# SELECT p.relname,c.*  FROM people c, pg_class p WHERE c.tableoid = p.oid;
relname     | id | name  |  logdate
-----------------+----+-------+------------
people          |  4 | lian4 | 2014-03-05
people_y2013m12 |  1 | lian1 | 2013-12-05
people_y2014m01 |  2 | lian2 | 2014-01-05
people_y2014m02 |  3 | lian3 | 2014-02-05
(4 rows)
{不符合条件的数据被插入到了主表中}


rule方式的优缺点:批量插入(非copy)时,rule方式比trigger方式效率高。因为rule方式的额外开销是基于表而非基于行;
copy会忽略rule。若要使用copy插入数据并且分区是通过rule方式实现,则需要直接copy到对应的分区表中。而trigger方式则不存在这样的问题;
rule方式中不在定义范围内的插入不会报错,而是直接将数据插入到主表中。

2.5 注意事项

分区表

由于在创建分区时系统不会自动检查分区条件的冲突性,因此在创建时需要格外注意创建代码的安全;
在对表进行update的时候,不能对分区键值进行跨区更新,因为每个分区表上都有check约束,如下:
a=# SELECT p.relname,c.*  FROM t c, pg_class p WHERE c.tableoid = p.oid;
relname   | id |  logdate
------------+----+------------
t_y2014m01 |  1 | 2014-01-10
t_y2014m02 |  2 | 2014-02-10
(2 rows)
a=# update t set logdate='2014-02-05' where logdate='2014-01-10';
ERROR:  new row for relation "t_y2014m01" violates check constraint "t_y2014m01_logdate_check"
DETAIL:  Failing row contains (1, 2014-02-05).
STATEMENT:  update t set logdate='2014-02-05' where logdate='2014-01-10';
ERROR:  new row for relation "t_y2014m01" violates check constraint "t_y2014m01_logdate_check"
DETAIL:  Failing row contains (1, 2014-02-05).
a=# update t set logdate='2014-01-05' where logdate='2014-01-10';
UPDATE 1
a=# SELECT p.relname,c.*  FROM t c, pg_class p WHERE c.tableoid = p.oid;
relname   | id |  logdate
------------+----+------------
t_y2014m01 |  1 | 2014-01-05
t_y2014m02 |  2 | 2014-02-10
(2 rows)
a=# update t set logdate='2014-01-05' where logdate='2014-01-10';
UPDATE 1
a=# SELECT p.relname,c.*  FROM t c, pg_class p WHERE c.tableoid = p.oid;
relname   | id |  logdate
------------+----+------------
t_y2014m01 |  1 | 2014-01-05
t_y2014m02 |  2 | 2014-02-10
(2 rows)
a=# update t_y2014m01 set logdate='2014-02-05' where logdate='2014-01-05';
ERROR:  new row for relation "t_y2014m01" violates check constraint "t_y2014m01_logdate_check"
DETAIL:  Failing row contains (1, 2014-02-05).
STATEMENT:  update t_y2014m01 set logdate='2014-02-05' where logdate='2014-01-05';
ERROR:  new row for relation "t_y2014m01" violates check constraint "t_y2014m01_logdate_check"
DETAIL:  Failing row contains (1, 2014-02-05).

在对表进行VACUUM或ANALYZE操作时,需要在每一个分区表上操作,并非直接对主表操作即可。

约束排除

只当查询的where子句中包含约束时有效;
CHECK中应避免跨数据类型的比较,因为规划器无法做出正确的判断;
由于规划器会对主表的所有分区表上的约束都会进行一次检查,所以过多的分区会需要大量的约束排除检查时间,因此要避免在分区多达成百上千的情况下使用。

PostgreSQL从继承到分区(一)http://my.oschina.net/lianshunke/blog/205296PostgreSQL从继承到分区(二)http://my.oschina.net/lianshunke/blog/205296PostgreSQL从继承到分区(三)http://my.oschina.net/lianshunke/blog/205316

本文出自 “O2P” 博客,请务必保留此出处http://lianshunke.blog.51cto.com/2510072/1434537
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: