您的位置:首页 > 其它

医疗人群数据包

2015-06-19 16:19 246 查看

需求介绍

医疗人群数据包

http://jira.dev.zamplus.com/browse/ZAMPDMP-1358

需要用到数据详见附件

生成三个数据包(zid 包)

i. 包1:所有搜索过 sheet1 所包含关键词的zid

ii. 包2:访问过 sheet2 + sheet3 所有domain 的zid

iii.包3:包1+包2去重

附件

SQL 统计

create table tmp.domain (d string) location '/bh/warehouse/dmp/tmp/domain';
create table tmp.keyword (d string) location '/bh/warehouse/dmp/tmp/keyword';
load data local inpath '/home/wankun/domain.txt' into table tmp.domain;
load data local inpath '/home/wankun/keywords.txt' into table tmp.keyword;

create table tmp.zid1 (d string) location '/bh/warehouse/dmp/tmp/zid1';
create table tmp.zid2 (d string) location '/bh/warehouse/dmp/tmp/zid2';
create table tmp.zid3 (d string) location '/bh/warehouse/dmp/tmp/zid3';

insert overwrite local directory '/home/wankun/tmp/zids1'
select distinct zid
from (
select zid,java_method('java.net.URLDecoder', 'decode', search_id, 'UTF-8') as search_id
from  (
select zid, map_keys(search) as search_map
FROM insight_cdr_v2 cdr
where concat(year,month,day)>='20150608' and concat(year,month,day)<='20150615'
) cdr
LATERAL VIEW explode(search_map) search_list AS search_id ) t
join tmp.keyword t1
where instr(t.search_id,t1.d)>0 ;

insert overwrite local directory '/home/wankun/tmp/zids2'
select distinct zid
from (
select *
from  (
select zid, map_keys(top_site) as top_site
FROM insight_cdr_v2 cdr
where concat(year,month,day)>='20150608' and concat(year,month,day)<='20150615'
) cdr
LATERAL VIEW explode(top_site) top_site_table AS ts ) t3
join tmp.domain t4
where instr(t3.ts,t4.d)>0;

insert overwrite table tmp.zid3
select distinct d
from (select d  from tmp.zid1
union all
select d  from tmp.zid2) as t;
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: