[Author]: kwu
基于Hive及Sqoop的每日PV、UV、IP定时分析
1、创建pvuvip的hive表
hive -e " use stage; CREATE EXTERNAL TABLE pvuvip( day string, pv int, uv int, ipcnt int ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' LOCATION '/dw/stage/pvuvip/';"
2、创建mysql关系库的表
CREATE TABLE pvuvip ( id INT NOT NULL AUTO_INCREMENT, DAY VARCHAR(50), pv INT, uv INT, ipcnt INT, PRIMARY KEY (id) ) ENGINE=MyISAM DEFAULT CHARSET=latin1
#!/bin/sh # upload logs to hdfs yesterday=`date --date='1 days ago' +%Y%m%d` hive -e " use stage; insert overwrite table pvuvip select day,count(*) pv, count(distinct cookieid) uv , count(distinct ip) ipcnt from ods.tracklog where day='${yesterday}' group by day;" sqoop export --connect jdbc:mysql://localhost:3306/charts --username root --password 123456 --table pvuvip --fields-terminated-by ' ' --columns "day,pv,uv,ipcnt" --export-dir /dw/stage/pvuvip/;
18 06 * * * /opt/bin/sqoop_opt/pvuvip.opt
/sbin/service crond reload
原文地址:http://blog.csdn.net/bdchome/article/details/45952415