您的位置:首页 > 运维架构

hive 分组topN

2017-12-04 17:38 141 查看
Time taken: 0.008 seconds, Fetched: 44 row(s)

hive> show create table jxl_report;

OK

CREATE TABLE `jxl_report`(

  `id` bigint COMMENT '主键', 

 。。。

  `user_name` string COMMENT '用户名', 

  `phone_no` string COMMENT '用户手机号', 

  `create_by` bigint COMMENT '创建用户', 

  `update_by` bigint COMMENT '修改用户', 

  `valid` boolean, 

  `create_time` string COMMENT '创建时间', 

   

  `row_key` bigint, 

  `cid` bigint COMMENT '进件ID')

COMMENT '报告详情'

ROW FORMAT SERDE 

  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 

STORED AS INPUTFORMAT 

  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 

OUTPUTFORMAT 

  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'

LOCATION

  'hdfs://nwdservice/user/hive/warehouse/dataimport.db/jxl_report'

TBLPROPERTIES (

  'COLUMN_STATS_ACCURATE'='true', 

  'numFiles'='18', 

  'numRows'='4589964', 

  'rawDataSize'='5764994784', 

  'totalSize'='613551501', 

  'transient_lastDdlTime'='1512378856')

Time taken: 0.064 seconds, Fetched: 34 row(s)

#  row_number() over

 create table tmp_distinct_rpt as select id from ( select *,row_number() over (partition by cid  order by report_update_time desc  ) as od  from jxl_report ) t1 where od <=1;

#rank() over

select *, rank() over (partition
by sub order by score) as
od from t; 

 create table tmp_distinct_rpt as select id from ( select *,rank() over (partition by cid  order by report_update_time desc  ) as od  from jxl_report ) t1 where od <=1;

#dense_ran

 
 create table tmp_distinct_rpt as select id from ( select *,dense_ran() over (partition by cid  order by report_update_time desc  ) as od  from jxl_report ) t1 where od <=1;

安装cid 分组,按照时间获取报告!

参考:http://www.mamicode.com/info-detail-849458.html
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: