标签:add asi 开始 inf not 分区表 str count class
1,---导入mds_imei_month_info
set hive.exec.max.dynamic.partitions= 100000; //最大的动态分区表 set hive.support.concurrency=false; //是否支持并发 set hive.exec.max.dynamic.partitions.pernode= 100000; //each mapper or reducer可以创建的最大动态分区数 set hive.exec.dynamic.partition.mode=nonstrict; //strict是避免全分区字段是动态的,必须有至少一个分区字段是指定有值的 insert into table mds_imei_month_info partition(month) select imei_p,dt,cnt ,month from ( select imei_p ,month,dt,cnt from (select imei_p,‘201705‘ as month,sum(pow(2,(dt-1))) as dt,sum(cnt) cnt //将自下面取出的dt进行指数的转换,pow(2,(dt-1))表示2的dt-1次方,cnt表示imei在这个月出现的次数
( select imei_p,cast(substring(dt,7,2) as int) as dt,count(*) cnt from mds_engine_basic where dt>=20170501 and dt<=20170531 and length(dt)=8 group by imei_p,dt //dt一共8位数,从第7位数开始的2位数取出,既01到31 ) a group by imei_p)a where length(imei_p)>=14 and regexp_extract(imei_p,‘([a-z,,.,A-Z,0-9,_,\\-]*)‘,0)=imei_p //这是对imei进行正则匹配 )a;
2,---导入mds_ip_month_info
set hive.exec.max.dynamic.partitions= 100000; set hive.support.concurrency=false; set hive.exec.max.dynamic.partitions.pernode= 100000; set hive.exec.dynamic.partition.mode=nonstrict; insert into table mds_ip_month_info partition(month) select user_ip,country,province,city,longtitude,latitude,isp,dt,cnt ,month from ( select user_ip ,month,dt,cnt,country,city,province,latitude,longtitude,isp from (select user_ip,‘201705‘ as month,sum(pow(2,(dt-1))) as dt,sum(cnt) cnt,country,city,province,latitude,longtitude,isp from( select user_ip,cast(substring(dt,7,2) as int) as dt,count(*) cnt,ipaddressquery(2,user_ip) country, ipaddressquery(3,user_ip) province,ipaddressquery(1,user_ip) city, split(ipaddressquery(5,user_ip),‘,‘)[1] longtitude , split(ipaddressquery(5,user_ip),‘,‘)[0] latitude, ipaddressquery(4,user_ip) isp from mds_engine_basic where dt>=20170501 and dt<=20170531 and length(dt)=8 and user_ip not like ‘%,%‘ and split(user_ip,‘,‘)[0] like ‘%.%.%‘ and regexp_extract( split(user_ip,‘,‘)[0],‘\.([0-9]{0,7})\.([0-9]{0,7})\.([0-9]{0,7})\.([0-9]{0,7})‘,0)= split(user_ip,‘,‘)[0] and split(user_ip,‘\\.‘)[0]<300 group by user_ip,dt,ipaddressquery(2,user_ip) , ipaddressquery(3,user_ip) ,ipaddressquery(1,user_ip) , split(ipaddressquery(5,user_ip),‘,‘)[1] , split(ipaddressquery(5,user_ip),‘,‘)[0] , ipaddressquery(4,user_ip)) a group by user_ip,country,city,province,latitude,longtitude,isp)a )a ;
3,---导入mds_id_month_info
set hive.exec.max.dynamic.partitions= 100000; set hive.support.concurrency=false; set hive.exec.max.dynamic.partitions.pernode= 100000; set hive.exec.dynamic.partition.mode=nonstrict; insert into table mds_id_month_info partition(month) select id,dt,cnt ,month from ( select id ,month,dt,cnt from (select id,‘201705‘ as month,sum(pow(2,(dt-1))) as dt,sum(cnt) cnt from ( select id,cast(substring(dt,7,2) as int) as dt,count(*) cnt from mds_engine_basic where dt>=20170501 and dt<=20170531 and length(dt)=8 group by id,dt ) a group by id)a where length(id)=16 and regexp_extract(id,‘([a-z,,.,A-Z,0-9,_,\\-]*)‘,0)=id )a;
4,---导入mds_bssid_month_info
set hive.exec.max.dynamic.partitions= 100000; set hive.support.concurrency=false; set hive.exec.max.dynamic.partitions.pernode= 100000; set hive.exec.dynamic.partition.mode=nonstrict; insert into table mds_bssid_month_info partition(month) select bssid,dt,cnt ,month from ( select bssid ,month,dt,cnt from (select bssid,‘201709‘ as month,sum(pow(2,(dt-1))) as dt,sum(cnt) cnt from ( select bssid,cast(substring(dt,7,2) as int) as dt,count(*) cnt from mds_engine_wifi where dt>=20170901 and dt<=20170930 and length(dt)=8 group by bssid,dt ) a group by bssid)a where length(bssid)>=14 )a;
标签:add asi 开始 inf not 分区表 str count class
原文地址:http://www.cnblogs.com/huxinga/p/7647219.html