您的位置:首页 > 其它


2016-10-12 23:46 453 查看


create table emp_function
deptno   string,
sal      int,
hiredate string

row format delimited fields terminated by '|'

10|1300|1982-01-23 00:00:00.0
10|5000|1981-11-17 00:00:00.0
10|2450|1981-06-09 00:00:00.0
20|1100|1987-05-23 00:00:00.0
20|3000|1987-04-19 00:00:00.0
20|800|1980-12-17 00:00:00.0
20|2975|1981-04-02 00:00:00.0
20|3000|1981-12-03 00:00:00.0
30|1500|1981-09-08 00:00:00.0
30|1600|1981-02-20 00:00:00.0
30|2850|1981-05-01 00:00:00.0
30|1250|1981-09-28 00:00:00.0
30|1250|1981-02-22 00:00:00.0
30|950|1981-12-03 00:00:00.0

SUM — 注意,结果和ORDER BY相关,默认为升序

sum(sal)over(partition by deptno order by hiredate) as sal1,--起始行到当前行汇总
sum(sal)over(partition by deptno order by hiredate rows between unbounded preceding and current row ) as sal2,
--起始行到当前行汇总 和sal1一样
sum(sal)over(partition by deptno ) as sal3,--分组内全部汇总
sum(sal)over(partition by deptno order by hiredate rows between 3 preceding and current row ) as sal4,
--分组内 当前行+前3行汇总
sum(sal)over(partition by deptno order by hiredate rows between 3 preceding and 1 following ) as sal5,
--分组内 后一行+前3行汇总+当前行
sum(sal)over(partition by deptno order by hiredate rows between current row and unbounded following ) as sal6
--分组内 当前行+往后所有的行
from emp_function order by deptno,hiredate;

deptno    hiredate               sal     sal1    sal2    sal3     sal4   sal5    sal6
10      1981-06-09 00:00:00.0   2450    2450.0  2450.0  8750.0  2450.0  7450.0  8750.0
10      1981-11-17 00:00:00.0   5000    7450.0  7450.0  8750.0  7450.0  8750.0  6300.0
10      1982-01-23 00:00:00.0   1300    8750.0  8750.0  8750.0  8750.0  8750.0  1300.0
20      1980-12-17 00:00:00.0   800     800.0   800.0   10875.0 800.0   3775.0  10875.0
20      1981-04-02 00:00:00.0   2975    3775.0  3775.0  10875.0 3775.0  6775.0  10075.0
20      1981-12-03 00:00:00.0   3000    6775.0  6775.0  10875.0 6775.0  9775.0  7100.0
20      1987-04-19 00:00:00.0   3000    9775.0  9775.0  10875.0 9775.0  10875.0 4100.0
20      1987-05-23 00:00:00.0   1100    10875.0 10875.0 10875.0 10075.0 10075.0 1100.0
30      1981-02-20 00:00:00.0   1600    1600.0  1600.0  9400.0  1600.0  2850.0  9400.0
30      1981-02-22 00:00:00.0   1250    2850.0  2850.0  9400.0  2850.0  5700.0  7800.0
30      1981-05-01 00:00:00.0   2850    5700.0  5700.0  9400.0  5700.0  7200.0  6550.0
30      1981-09-08 00:00:00.0   1500    7200.0  7200.0  9400.0  7200.0  8450.0  3700.0
30      1981-09-28 00:00:00.0   1250    8450.0  8450.0  9400.0  6850.0  7800.0  2200.0
30      1981-12-03 00:00:00.0   950     9400.0  9400.0  9400.0  6550.0  6550.0  950.0

sal1 等于起始行的值到当前行值的一个累加

sal2 等于起始行的值到当前行值的一个累加 等于sal1

sal3 就是分组内所有值相加

sal4 就是分组内当前行+往前3行的值相加 deptno 等于20 中有一条10075=1100+3000+3000+2975

sal5 就是分组内当前行+往前3行+往后一行 的值相加 deptno 等于30 7800=950+1250+1500+2850+1250

sal6 就是分组内当前行+往后所有行 的值相加 deptno 等于30 3700=950+1250+1500

如果不指定ROWS BETWEEN,默认为从起点到当前行;

如果不指定ORDER BY,则将分组内所有值累加;







avg(sal)over(partition by deptno order by hiredate) as sal1,
avg(sal)over(partition by deptno order by hiredate rows between unbounded preceding and current row ) as sal2,
avg(sal)over(partition by deptno ) as sal3,
avg(sal)over(partition by deptno order by hiredate rows between 3 preceding and current row ) as sal4,
avg(sal)over(partition by deptno order by hiredate rows between 3 preceding and 1 following ) as sal5,
avg(sal)over(partition by deptno order by hiredate rows between current row and unbounded following ) as sal6
from emp_function order by deptno,hiredate;

deptno    hiredate               sal     sal1    sal2    sal3     sal4   sal5    sal6
10      1981-06-09 00:00:00.0   2450    2450.0  2450.0  2916.6666666666665      2450.0  3725.0  2916.6666666666665
10      1981-11-17 00:00:00.0   5000    3725.0  3725.0  2916.6666666666665      3725.0  2916.6666666666665      3150.0
10      1982-01-23 00:00:00.0   1300    2916.6666666666665      2916.6666666666665      2916.6666666666665      2916.6666666666665      2916.6666666666665      1300.0
20      1980-12-17 00:00:00.0   800     800.0   800.0   2175.0  800.0   1887.5  2175.0
20      1981-04-02 00:00:00.0   2975    1887.5  1887.5  2175.0  1887.5  2258.3333333333335      2518.75
20      1981-12-03 00:00:00.0   3000    2258.3333333333335      2258.3333333333335      2175.0  2258.3333333333335      2443.75 2366.6666666666665
20      1987-04-19 00:00:00.0   3000    2443.75 2443.75 2175.0  2443.75 2175.0  2050.0
20      1987-05-23 00:00:00.0   1100    2175.0  2175.0  2175.0  2518.75 2518.75 1100.0
30      1981-02-20 00:00:00.0   1600    1600.0  1600.0  1566.6666666666667      1600.0  1425.0  1566.6666666666667
30      1981-02-22 00:00:00.0   1250    1425.0  1425.0  1566.6666666666667      1425.0  1900.0  1560.0
30      1981-05-01 00:00:00.0   2850    1900.0  1900.0  1566.6666666666667      1900.0  1800.0  1637.5
30      1981-09-08 00:00:00.0   1500    1800.0  1800.0  1566.6666666666667      1800.0  1690.0  1233.3333333333333
30      1981-09-28 00:00:00.0   1250    1690.0  1690.0  1566.6666666666667      1712.5  1560.0  1100.0
30      1981-12-03 00:00:00.0   950     1566.6666666666667      1566.6666666666667      1566.6666666666667      1637.5  1637.5  950.0


min(sal)over(partition by deptno order by hiredate) as sal1,
min(sal)over(partition by deptno order by hiredate rows between unbounded preceding and current row ) as sal2,
min(sal)over(partition by deptno ) as sal3,
min(sal)over(partition by deptno order by hiredate rows between 3 preceding and current row ) as sal4,
min(sal)over(partition by deptno order by hiredate rows between 3 preceding and 1 following ) as sal5,
min(sal)over(partition by deptno order by hiredate rows between current row and unbounded following ) as sal6
from emp_function order by deptno,hiredate;

deptno    hiredate               sal     sal1    sal2    sal3     sal4   sal5    sal6
10      1981-06-09 00:00:00.0   2450    2450    2450    1300    2450    2450    1300
10      1981-11-17 00:00:00.0   5000    2450    2450    1300    2450    1300    1300
10      1982-01-23 00:00:00.0   1300    1300    1300    1300    1300    1300    1300
20      1980-12-17 00:00:00.0   800     800     800     1100    800     2975    1100
20      1981-04-02 00:00:00.0   2975    2975    2975    1100    2975    2975    1100
20      1981-12-03 00:00:00.0   3000    2975    2975    1100    2975    2975    1100
20      1987-04-19 00:00:00.0   3000    2975    2975    1100    2975    1100    1100
20      1987-05-23 00:00:00.0   1100    1100    1100    1100    1100    1100    1100
30      1981-02-20 00:00:00.0   1600    1600    1600    1250    1600    1250    1250
30      1981-02-22 00:00:00.0   1250    1250    1250    1250    1250    1250    1250
30      1981-05-01 00:00:00.0   2850    1250    1250    1250    1250    1250    1250
30      1981-09-08 00:00:00.0   1500    1250    1250    1250    1250    1250    1250
30      1981-09-28 00:00:00.0   1250    1250    1250    1250    1250    1250    1250
30      1981-12-03 00:00:00.0   950     1250    1250    1250    1250    1250    950


max(sal)over(partition by deptno order by hiredate) as sal1,
max(sal)over(partition by deptno order by hiredate rows between unbounded preceding and current row ) as sal2,
max(sal)over(partition by deptno ) as sal3,
max(sal)over(partition by deptno order by hiredate rows between 3 preceding and current row ) as sal4,
max(sal)over(partition by deptno order by hiredate rows between 3 preceding and 1 following ) as sal5,
max(sal)over(partition by deptno order by hiredate rows between current row and unbounded following ) as sal6
from emp_function order by deptno,hiredate;

deptno    hiredate               sal     sal1    sal2    sal3     sal4   sal5    sal6
10      1981-06-09 00:00:00.0   2450    2450    2450    5000    2450    5000    5000
10      1981-11-17 00:00:00.0   5000    5000    5000    5000    5000    5000    5000
10      1982-01-23 00:00:00.0   1300    5000    5000    5000    5000    5000    1300
20      1980-12-17 00:00:00.0   800     800     800     800     800     800     800
20      1981-04-02 00:00:00.0   2975    800     800     800     800     800     3000
20      1981-12-03 00:00:00.0   3000    800     800     800     800     800     3000
20      1987-04-19 00:00:00.0   3000    800     800     800     800     800     3000
20      1987-05-23 00:00:00.0   1100    800     800     800     3000    3000    1100
30      1981-02-20 00:00:00.0   1600    1600    1600    950     1600    1600    950
30      1981-02-22 00:00:00.0   1250    1600    1600    950     1600    2850    950
30      1981-05-01 00:00:00.0   2850    2850    2850    950     2850    2850    950
30      1981-09-08 00:00:00.0   1500    2850    2850    950     2850    2850    950
30      1981-09-28 00:00:00.0   1250    2850    2850    950     2850    950     950
30      1981-12-03 00:00:00.0   950     950     950     950     950     950     950
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  hive 函数 分析函数