您的位置:首页 > 其它

9.16 hive基本命令与窗口函数

2017-09-16 21:25 169 查看
CREATE TABLE CITY(

province_code INT,

province_name string,

city_code INT,

city_name string

)

row FORMAT delimited

fields terminated by ‘,’

lines terminated by ‘\n’;

show create table city;

desc city;

load data local inpath ‘/home/bigdata/hiv/city.txt’ into table city;

select * from city limit 10

set hive.cli.print.header=true;

CREATE TABLE phone_daily(

model string,

os string,

brand string,

resolution string,

launch_date string

)

partitioned by (p_date string);

INSERT OVERWRITE TABLE phone_daily PARTITION (p_date ‘2017-09-13’);

select province_name,cnt from(

select province_name,count(city_name) as cnt

from city

group by province_name

)a

where cnt=1;

select province_name,count(city_name) as cnt from city group by province_name having cnt<2;

select province_name,count(city_name) as cnt from city group by province_name order by cnt desc limit 10

CREATE EXTERNAL TABLE city_ex(

province_code int,

province_name string,

city_code int,

city_name string)

ROW FORMAT DELIMITED

FIELDS TERMINATED BY ‘,’

LINES TERMINATED BY ‘\n’

LOCATION ‘/user/tanqi/city’;

CREATE TABLE city_in(

province_code int,

province_name string,

city_code int,

city_name string)

ROW FORMAT DELIMITED

FIELDS TERMINATED BY ‘,’

LINES TERMINATED BY ‘\n’

LOCATION ‘/user/tanqi/city’;

CREATE TABLE
user
(

uid
INT,

city_code
INT,

model
string,

access
string

)

row FORMAT delimited

fields terminated by ‘,’

lines terminated by ‘\n’;

load data local inpath ‘/home/bigdata/tanqi/hive/user.txt’ into table user;

create table user_daily(

uid int,

city_code int,

model string,

access string

)

partitioned by (p_date string);

INSERT OVERWRITE TABLE user_daily PARTITION (p_date=’2017-09-01’)

SELECT * FROM user; //插入的数据源从user里来 从user里先查出来后再插入

show partitioned user_daily;

set hive.exec.dynamic.partition.mode=nonstrict;

INSERT OVERWRITE TABLE user_daily PARTITION (p_date)

SELECT *,’2017-09-02’ FROM user

UNION ALL

SELECT *,’2017-09-03’ FROM user

;

ALTER TABLE user_daily PARTITION (p_date=’2017-01-01’) RENAME TO PARTITION (p_date=’20170101’);

alter table user_daily drop partition(p_date=’20170901’);

select city_code,count(distinct uid) as sum from user where access=’WIFI’ group by city_code order by sum desc limit 5;

select city_code,access,count(uid)as cnt from user group by access,city_code having cnt>=3 order by cnt desc;

select access,count(uid) as cnt from user where lower(model) like”%ne%” group by access order by cnt desc;

select sum(if(access=’WIFI’,1,0))/count(1) from user;

如果access=wifi 则+1 否则0

case 表达式

when 值1 then 结果1

when 值2 then 结果2

else 结果3

select

case

when uid % 10 in (0, 1, 2, 3) then ‘0-3’

when uid % 10 in (4, 5, 6, 7) then ‘4-7’

else ‘8-9’

end as interval,

count(*) as cnt

from user

group by

case

when uid % 10 in (0, 1, 2, 3) then ‘0-3’

when uid % 10 in (4, 5, 6, 7) then ‘4-7’

else ‘8-9’

end;

select collect_set(access) from user;

[“WIFI”,”2G”,”3G”,”4G”] 所有取值的集合 自动去重

select collect_list(access) from user; 不去重的集合

select user.uid, user.city_code, city.city_name

from

(select * from user where uid <= 100) user

FULL join

(select * from city where province_code <= 30) city

on (user.city_code = city.city_code)

limit 200;

(right left inner full)

A   JOIN   B


左 △ null

右 null △

内 A∩B

全 A∪B

set hive.cli.print.header=true;

set hive.cli.print.row.to.vertical=true;

set hive.cli.print.row.to.vertical.num=1; 不显示列名

分组top N 窗口函数

① 分组 例如 access

②排序 序号 city_code

③top 1

select access,city_code, uid

from

(

select uid, access, city_code,

row_number() over (partition by access order by city_code desc) as row_num

from user

) a

where row_num = 1;

row number() 返回结果集分区内行的序列号,每个分区的第一行从 1 开始。

over

over后的写法:

over(order by salary) 按照salary排序进行累计,order by是个默认的开窗函数

over(partition by deptno)按照部门分区

select p_date,

sum(cnt) over(order by p_date asc rows between unbounded preceding and current row)

from
4000

(

select p_date, count(*) as cnt

from user_daily

where p_date between ‘2017-09-01’ and ‘2017-09-30’

group by p_date

)a

;

unbounded preceding无边界的第一行
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: