您的位置:首页 > 其它

历史拉链表的应用--余额历史中每日平均余额的算法

2011-01-01 20:42 543 查看
在数据仓库中,协议余额历史等会以拉链表的形式存储数据,以节省空间;但对拉链表的应用,如计算每日平均余额要比每日全量表麻烦一些。本文针对余额历史表中每日平均余额的算法作一个深入的学习。
1、 建表及插入拉链表测试数据
--1、历史拉链表建表
-- Create table
create table EDW_T100_BAL_H
( ID         VARCHAR2(8) not null,
START_DATE DATE not null,
BAL        NUMBER(22,2),
END_DATE   DATE not null
);
-- Add comments to the columns
comment on column EDW_T100_BAL_H.ID
is 'ID';
comment on column EDW_T100_BAL_H.START_DATE
is '开始日期';
comment on column EDW_T100_BAL_H.BAL
is '余额';
comment on column EDW_T100_BAL_H.END_DATE
is '结束日期';
-- Create/Recreate primary, unique and foreign key constraints
alter table EDW_T100_BAL_H
add constraint EDW_T100_BAL_H primary key (ID, START_DATE);
--2、拉链表测试数据的插入
insert into edw_t100_bal_h (ID, START_DATE, BAL, END_DATE)
values ('1', to_date('01-01-2011', 'dd-mm-yyyy'), 8.00, to_date('03-01-2011', 'dd-mm-yyyy'));
insert into edw_t100_bal_h (ID, START_DATE, BAL, END_DATE)
values ('1', to_date('03-01-2011', 'dd-mm-yyyy'), 10.00, to_date('08-01-2011', 'dd-mm-yyyy'));
insert into edw_t100_bal_h (ID, START_DATE, BAL, END_DATE)
values ('1', to_date('08-01-2011', 'dd-mm-yyyy'), 9.00, to_date('10-02-2011', 'dd-mm-yyyy'));
insert into edw_t100_bal_h (ID, START_DATE, BAL, END_DATE)
values ('2', to_date('01-01-2011', 'dd-mm-yyyy'), 18.00, to_date('01-02-2011', 'dd-mm-yyyy'));
insert into edw_t100_bal_h (ID, START_DATE, BAL, END_DATE)
values ('3', to_date('01-01-2011', 'dd-mm-yyyy'), 6.00, to_date('06-01-2011', 'dd-mm-yyyy'));
commit;
--3、创建临时表(TIL=>Time interval 时间区段)
-- Create table
create global temporary table TMP_T100_BAL_H_TIL
( ID         VARCHAR2(8) not null,
START_DATE DATE not null,
BAL        NUMBER(22,2),
END_DATE   DATE not null
)
on commit delete rows;
--4、创建存放最终数据的目标表
-- Create table
create table EDW_T100_PER_AVG_BAL_TIL
(
ID                      VARCHAR2(8) not null,
INTERVAL_START_DATE     DATE not null,
INTERVAL_END_DATE       DATE not null,
INTERVAL_DAYS           NUMBER,
INTERVAL_SUM_BAL        NUMBER(22,2),
INTERVAL_PERDAY_AVG_BAL NUMBER(22,2)
);
-- Add comments to the table
comment on table EDW_T100_PER_AVG_BAL_TIL
is '区段每日平均余额表';
-- Add comments to the columns
comment on column EDW_T100_PER_AVG_BAL_TIL.ID
is 'ID';
comment on column EDW_T100_PER_AVG_BAL_TIL.INTERVAL_START_DATE
is '区段开始日期';
comment on column EDW_T100_PER_AVG_BAL_TIL.INTERVAL_END_DATE
is '区段结束日期';
comment on column EDW_T100_PER_AVG_BAL_TIL.INTERVAL_DAYS
is '区段天数据';
comment on column EDW_T100_PER_AVG_BAL_TIL.INTERVAL_SUM_BAL
is '区段ID总余额';
comment on column EDW_T100_PER_AVG_BAL_TIL.INTERVAL_PERDAY_AVG_BAL
is '区段ID每日平均余额';
-- Create/Recreate primary, unique and foreign key constraints
alter table EDW_T100_PER_AVG_BAL_TIL
add constraint P_EDW_T100_PER_AVG_BAL_TIL primary key (ID, INTERVAL_START_DATE, INTERVAL_END_DATE);

2、 实现余额每日平均算法的代码
CREATE OR REPLACE PROCEDURE SP_H_TAB_PER_AVG_BAL(
P_INTERVAL_START_DATE VARCHAR2,
P_INTERVAL_END_DATE   VARCHAR2)
IS
BEGIN
--目标表数据恢复
DELETE EDW_T100_PER_AVG_BAL_TIL
WHERE INTERVAL_START_DATE = TO_DATE(P_INTERVAL_START_DATE, 'YYYYMMDD')
AND INTERVAL_END_DATE = TO_DATE(P_INTERVAL_END_DATE, 'YYYYMMDD');

--根据统计区间截取历史拉链表,插入到时临时表TMP_T100_BAL_H_TIL中
INSERT INTO TMP_T100_BAL_H_TIL
(ID                           --ID
,START_DATE                   --开始日期
,BAL                          --余额
,END_DATE                     --结束日期
)
SELECT A.ID
,CASE
WHEN A.START_DATE <= TO_DATE(P_INTERVAL_START_DATE, 'YYYYMMDD') THEN
TO_DATE(P_INTERVAL_START_DATE, 'YYYYMMDD')
ELSE
A.START_DATE
END START_DATE  --开始时间小于区间开始时间的取区间开始时间,否则取开始时间
,A.BAL
,CASE
WHEN A.END_DATE >= TO_DATE(P_INTERVAL_END_DATE, 'YYYYMMDD') THEN
TO_DATE(P_INTERVAL_END_DATE, 'YYYYMMDD') + 1
ELSE
A.END_DATE   --结束时间大于等区间结束时间的取区间结束时间,否则取结束时间
END END_DATE
FROM EDW_T100_BAL_H A
WHERE (A.START_DATE < TO_DATE(P_INTERVAL_END_DATE, 'YYYYMMDD') AND
A.END_DATE >= TO_DATE(P_INTERVAL_START_DATE, 'YYYYMMDD'))
--过滤条件:历史表中开始时间小于区间结束时间且历史表中结束时间大于等于区间开始时间
;

--根据临时表中的数据,统计区间ID的余额总值与余额每日平均值,插入到目标表EDW_T100_PER_AVG_BAL_TIL中
INSERT INTO EDW_T100_PER_AVG_BAL_TIL
(ID                            --ID
,INTERVAL_START_DATE           --区段开始日期
,INTERVAL_END_DATE             --区段结束日期
,INTERVAL_DAYS                 --区段天数据
,INTERVAL_SUM_BAL              --区段ID总余额
,INTERVAL_PERDAY_AVG_BAL       --区段ID每日平均余额
)
SELECT A.ID
,TO_DATE(P_INTERVAL_START_DATE, 'YYYYMMDD')
,TO_DATE(P_INTERVAL_END_DATE, 'YYYYMMDD')
,TO_DATE(P_INTERVAL_END_DATE, 'YYYYMMDD') + 1 - TO_DATE(P_INTERVAL_START_DATE, 'YYYYMMDD')
,SUM((A.END_DATE - A.START_DATE) * A.BAL)  --区间总余额
,SUM((A.END_DATE - A.START_DATE) * A.BAL) /
(TO_DATE(P_INTERVAL_END_DATE, 'YYYYMMDD') + 1 -TO_DATE(P_INTERVAL_START_DATE, 'YYYYMMDD')) --区间总余额/统计天数
FROM TMP_T100_BAL_H_TIL A
GROUP BY A.ID;
COMMIT;
END;
/

说明:以上的方法是根据历史表直接算出每日平均余额;其实,我们也可以根据历史拉链表先展开为一个每日全量表(展开方法见:http://blog.csdn.net/nsj820/archive/2010/12/16/6080701.aspx),再根据展开后的全量表进行每日平均余额的计算。如下:
(1)、历史表展开为全量表的代码(建表语句等略)
DECLARE
CURSOR LSLL_ID IS
SELECT ID,
START_DATE,
BAL,
DECODE(END_DATE,
TO_DATE('30001231', 'YYYYMMDD'),
SYSDATE,      --TO_DATE(P_ETLDATE, 'YYYYMMDD'),
END_DATE) END_DATE
FROM edw_t100_bal_h;
V_ID         VARCHAR2(1000);
V_START_DATE DATE;
V_END_DATE   DATE;
V_DATA_DATE  DATE;
V_BAL     VARCHAR2(2);
BEGIN
BEGIN
EXECUTE IMMEDIATE 'TRUNCATE TABLE EDW_T100_BAL_ALL';
OPEN LSLL_ID;
LOOP
FETCH LSLL_ID
INTO V_ID, V_START_DATE, V_BAL, V_END_DATE;
EXIT WHEN LSLL_ID%NOTFOUND;
INSERT INTO EDW_T100_BAL_ALL
(ID, DATA_DATE, BAL)
SELECT V_ID, V_START_DATE + ROWNUM - 1 AS DATA_DATE, V_BAL
FROM SYS_MATIAN_DATE
WHERE V_START_DATE + ROWNUM - 1 < V_END_DATE;
COMMIT;
END LOOP;
CLOSE LSLL_ID;
END;
END;

(2)、根据全量表进行每日平均余额的计算
SELECT ID, SUM(NVL(BAL, 0)) 余额总值, SUM(NVL(BAL, 0)) / 391 平均每日余额
FROM EDW_T100_BAL_ALL
WHERE DATA_DATE BETWEEN TO_DATE('20100101', 'YYYYMMDD') AND
TO_DATE('20110126', 'YYYYMMDD')
GROUP BY ID;
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: