hive 分组topN
2017-12-04 17:38
141 查看
Time taken: 0.008 seconds, Fetched: 44 row(s)
hive> show create table jxl_report;
OK
CREATE TABLE `jxl_report`(
`id` bigint COMMENT '主键',
。。。
`user_name` string COMMENT '用户名',
`phone_no` string COMMENT '用户手机号',
`create_by` bigint COMMENT '创建用户',
`update_by` bigint COMMENT '修改用户',
`valid` boolean,
`create_time` string COMMENT '创建时间',
`row_key` bigint,
`cid` bigint COMMENT '进件ID')
COMMENT '报告详情'
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'hdfs://nwdservice/user/hive/warehouse/dataimport.db/jxl_report'
TBLPROPERTIES (
'COLUMN_STATS_ACCURATE'='true',
'numFiles'='18',
'numRows'='4589964',
'rawDataSize'='5764994784',
'totalSize'='613551501',
'transient_lastDdlTime'='1512378856')
Time taken: 0.064 seconds, Fetched: 34 row(s)
# row_number() over
create table tmp_distinct_rpt as select id from ( select *,row_number() over (partition by cid order by report_update_time desc ) as od from jxl_report ) t1 where od <=1;
#rank() over
select *, rank() over (partition
by sub order by score) as
od from t;
create table tmp_distinct_rpt as select id from ( select *,rank() over (partition by cid order by report_update_time desc ) as od from jxl_report ) t1 where od <=1;
#dense_ran
create table tmp_distinct_rpt as select id from ( select *,dense_ran() over (partition by cid order by report_update_time desc ) as od from jxl_report ) t1 where od <=1;
安装cid 分组,按照时间获取报告!
参考:http://www.mamicode.com/info-detail-849458.html
hive> show create table jxl_report;
OK
CREATE TABLE `jxl_report`(
`id` bigint COMMENT '主键',
。。。
`user_name` string COMMENT '用户名',
`phone_no` string COMMENT '用户手机号',
`create_by` bigint COMMENT '创建用户',
`update_by` bigint COMMENT '修改用户',
`valid` boolean,
`create_time` string COMMENT '创建时间',
`row_key` bigint,
`cid` bigint COMMENT '进件ID')
COMMENT '报告详情'
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'hdfs://nwdservice/user/hive/warehouse/dataimport.db/jxl_report'
TBLPROPERTIES (
'COLUMN_STATS_ACCURATE'='true',
'numFiles'='18',
'numRows'='4589964',
'rawDataSize'='5764994784',
'totalSize'='613551501',
'transient_lastDdlTime'='1512378856')
Time taken: 0.064 seconds, Fetched: 34 row(s)
# row_number() over
create table tmp_distinct_rpt as select id from ( select *,row_number() over (partition by cid order by report_update_time desc ) as od from jxl_report ) t1 where od <=1;
#rank() over
select *, rank() over (partition
by sub order by score) as
od from t;
create table tmp_distinct_rpt as select id from ( select *,rank() over (partition by cid order by report_update_time desc ) as od from jxl_report ) t1 where od <=1;
#dense_ran
create table tmp_distinct_rpt as select id from ( select *,dense_ran() over (partition by cid order by report_update_time desc ) as od from jxl_report ) t1 where od <=1;
安装cid 分组,按照时间获取报告!
参考:http://www.mamicode.com/info-detail-849458.html
相关文章推荐
- Hive TopN+分组TopN
- hive 分组+组内排序 , 求topN
- hive 分组+组内排序 , 求topN
- Hive分组提取TopN操作
- hive常见语法错误分组聚合
- HIVE分组排序问题
- 第20课 :SPARK Top N彻底解秘 TOPN 排序(Scala)SPARK分组TOPN 算法(JAVA) 必须掌握!
- Hive分组统计前top N条记录
- sparksql分组后topN(JAVA)
- hive中分组取前N个值的实现
- Hive分组取Top N
- MongoDB系列之分组topN
- Hive分组统计前top N条记录
- Spark Scala 分组排序取TopN
- sparksql分组后topN(JAVA)
- hive中分组取前N个值的实现
- mysql 分组topN
- Hive Cube RollUP 分组问题
- Hive.分组排序和TOP
- mysql分组取topn