3、装载数据
load data local inpath ‘/home/kwu/data/20150512.dat‘ overwrite into table test_kwu partition (day=‘20150512‘);
insert into table test_kwu PARTITION (day=‘20150507‘) select dateday, datetime,ip,cookieid,userid, logserverip,referer,
requesturl ,remark1,remark2,alexaflag,ua,wirelessflag from test_kwu ;
4、压缩处理
set hive.enforce.bucketing=true;
set hive.exec.compress.output=true;
set mapred.output.compress=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec;
insert overwrite table test_kwu PARTITION (day=‘20150507‘) select dateday, datetime,ip,cookieid,userid, logserverip,referer,
requesturl ,remark1,remark2,alexaflag,ua,wirelessflag from test_kwu ;
5、基本查询语句
查询每天的PV
select dateday,count(*) from tracklog group by dateday;
尽量避全表的聚合函数
select count(*) as cnt from tracklog group by cookieid having cnt=1 ;
可采用子查询代替
select count(t.cookieid) from (select count(cookieid) as cnt,cookieid from tracklog group by cookieid having cnt=1 ) t;