码迷,mamicode.com
首页 > 其他好文 > 详细

周平均月平均设计

时间:2015-01-19 22:47:08      阅读:249      评论:0      收藏:0      [点我收藏+]

标签:

http://blog.csdn.net/lili72

背景: 有些报表除了需要每天的数据走势图,有时还需要每周的平均和每月的平均,再和上周或月作比较,计算差值,然后分别形成周平均报表和月平均报表。

日报表:每天计算报表数据

周报表:每周某个时间只计算一次

月报表:每月每个时间只计算一次

 

 以下设计可以作为比较通用的周月差值计算方式。

考虑到查询条件的跨年,所以设计表的时候有一个日期是标志某个周期内的数据,这样查询就能通过这个日期连续的查询。etldate

结果展示:

 技术分享

技术分享

技术分享

技术分享

以下是详细代码,包括设计的表和思路:

#!/usr/bin/env bash
# ************************************************************************
# Useage: dwd_base_sall_week_month.sh[data_date]
# Author: author
# Version: V14.00.001
# Func Desc : XXX报表计算周报 月报
# modify describe :
# yyyymmdd       version      author               modified

VERSION="V14.00.001" #初始版本为001,后面修改为递增1

if [ $# -gt 1 ];then # 参数个数,需视具体参数修改 
   echo "Params error."
   echo "Useage: dwd_base_sall_week_month.sh [data_date]"
   exit 1
fi

logfile=$BIPROG_ROOT/logs/`basename $0`.log #定义写日志文件名

#写日志函数
writelog()
{
  echo `date "+%Y-%m-%d %H:%M:%S"`" $1"|tee -a $logfile
}
#执行shell 命令
EXESHELL()
{
vIniNum=`expr ${vIniNum} + 1`
if [ ${vIniNum} -lt ${vCurNum} ]
then
   writelog "STEP ${vIniNum}: < ${vCurNum} SKIP ..."
   return 0
fi

writelog "STEP ${vIniNum}: $CMD "
eval $CMD 
if [ $? -ne 0 ]
then
 writelog "SH Command failed! exit..."
 exit 1
fi
}




vDay=${1:-"`lastday YYYY-MM-DD`"} #如果没有传日期的参数,默认取昨天
dtstr=`date -d "0 day ago " +%Y-%m-%d" "%H:%M:%S`



serven_day_before_etldate=`date -d "7 day ago ${vDay}" +"%Y-%m-%d"`


# 获取该日期在一年中的第几周
third_week=`date -d  "0 day ago $vDay" +%U`
# 获取该日期是星期几
current_day_week=`date -d  "0 day ago $vDay" +%w`
# 该日期的的年份
current_year=`date -d   "0 day ago  $vDay" +%Y`
# 该日期的月份
current_month=`date -d   "0 day ago $vDay " +%m`
# 该日期的7天前的月份
serven_day_before_month=`date -d   "7 day ago $vDay" +%m`
# 该日期的 日
current_day=`date -d   "0 day ago $vDay" +%d`
# 该日期7天前的日
serven_day_before=`date -d   "7 day ago $vDay" +%d`
# 定义变量 判断是否 执行周或月
count_num=0

# 周 月报
#  drop  table   biuser.temp_period_order_cnt ;                   
#  create  table     biuser.temp_period_order_cnt (               
#  etldate                   string  comment  '日期', 
#  year_val                  string  comment   '年份',          
#  week_third                string  comment   '第几周',
#  etldate_tag               string  comment  '范围标签 01.04-01.10',       
#  current_period_avg        string  comment  '当前周/月平均值',
#  last_period_avg           string  comment  '上周/月平均值',  
#  difference_val            string  comment  '差值' ,          
#  hb_val                    string  comment  '环比值',         
#  data_type                 string  comment   '数据类型',      
#  time_dim                  string  comment   '周/月',         
#  addtimes                  string  comment   '加载时间'       
#  ) comment 'XXX报表周/月报表'                                
# partitioned by (dt string,dataType  string)                                    
# row format delimited                                          
# fields terminated by '|'                                      
#  lines terminated by '\n' stored as textfile  ;                

#        data_type =1       pcXXX匹配率
#		     data_type =2       pc在线匹配率
#		     data_type =3       PCXXX搜索网络失败率
#		     data_type =4       PC本地XXX使用占比
#		     data_type =5       PCXXX手动搜索占比




# 开始计算周报  每周跑一次   每周日起跑
# 判断日期

SQL="
INSERT OVERWRITE  TABLE   biuser.temp_period_order_cnt  PARTITION(DT='${vDay}',dataType='1')
   select 
          '${vDay}' etldate ,
          '$current_year'  year_val,
          '$third_week' as week_third,
          '$serven_day_before_month.$serven_day_before-$current_month.$current_day' as etldate_tag,
          a.current_period_avg  as  current_period_avg,
          b.current_period_avg  as  last_period_avg,
          round((b.current_period_avg - a.current_period_avg),2) as difference_val,
          round((b.current_period_avg - a.current_period_avg)*100/b.current_period_avg,2)  hb_val,
          a.data_type,
          a.time_dim,
          '${dtstr}' as addtimes 
          from 
            (select 
                  '1' as data_type,
                  '周' as time_dim,                          
                  round(avg((cast(local_go_net as  bigint) +cast(network_go_net as  bigint))*100/(cast(local_go_net as  bigint) +cast(network_go_net as  bigint)+cast(matche_order_fail_cnt as  bigint))),2) current_period_avg  
        from  biuser.dwd_base_sall   where  dt>='${serven_day_before_etldate}'  and dt<'${vDay}' 
        )a     left  outer  join  
    (select 
          etldate,
          etldate_tag,
          current_period_avg,
          last_period_avg,
          difference_val,
          hb_val,
          data_type,
          time_dim
     from   biuser.temp_period_order_cnt  where dt='${serven_day_before_etldate}'  and datatype='1'  and time_dim='周'
     ) b 
   on  a.data_type=b.data_type and a.time_dim=b.time_dim;
"



if [ $current_day_week = 0 ]
then 
 echo '周末'
EXE_HIVE  "${SQL}"
count_num=1
else 
 echo '非周末'
 
fi




#开始计算月报  每月月初 计算上个月的月报    每个月的月初01 计算上个月的

# 年  
y_report=`date -d ''$vDay' -1 days'  +%Y`  
# 月  
m_report=`date -d ''$vDay' -1 days'  +%m`  
m_Last_report_day=`date -d "${vDay} 1 month ago" +%Y-%m-%d`


# 日  
d_report=`date -d ''$vDay' -1 days'  +%d`  


echo '年'$y_report
echo '月'$m_report
echo '一个月前的日期'$m_Last_report_day
echo '日'$d_report


#是周末 也是月初
SQL="
INSERT into  TABLE   biuser.temp_period_order_cnt  PARTITION(DT='${vDay}',dataType='1')
   select 
          '${vDay}' etldate ,
          '$current_year'  year_val ,
          '$current_month' week_third,
          ' '  etldate_tag,
          a.current_period_avg  as  current_period_avg,
          b.current_period_avg  as  last_period_avg,
          round((b.current_period_avg - a.current_period_avg),2) as difference_val,
          round((b.current_period_avg - a.current_period_avg)*100/b.current_period_avg,2)  hb_val,
          a.data_type,
          a.time_dim,
          '${dtstr}' as addtimes 
         from 
            (select 
                  '1' as data_type,
                  '月' as time_dim,                          
                  round(avg((cast(local_go_net as  bigint) +cast(network_go_net as  bigint))*100/(cast(local_go_net as  bigint) +cast(network_go_net as  bigint)+cast(matche_order_fail_cnt as  bigint))),2) current_period_avg  
        from  biuser.dwd_base_sall   where  dt>='${m_Last_report_day}'  and dt<'${vDay}' 
        )a     left  outer  join 
    (select 
          etldate,
          etldate_tag,
          current_period_avg,
          last_period_avg,
          difference_val,
          hb_val,
          data_type,
          time_dim
     from   biuser.temp_period_order_cnt  where dt='${m_Last_report_day}'  and datatype='1'  and  time_dim='月'
     ) b 
   on  a.data_type=b.data_type and a.time_dim=b.time_dim;
"

# 如果是月初  不是周末
SQL2="
INSERT OVERWRITE  TABLE   biuser.temp_period_order_cnt  PARTITION(DT='${vDay}',dataType='1')
   select 
          '${vDay}' etldate ,
          '$current_year'  year_val ,
          '$current_month' week_third,
          ' '  etldate_tag,
          a.current_period_avg  as  current_period_avg,
          b.current_period_avg  as  last_period_avg,
          round((b.current_period_avg - a.current_period_avg),4) as difference_val,
          round((b.current_period_avg - a.current_period_avg)/b.current_period_avg,4)  hb_val,
          a.data_type,
          a.time_dim,
          '${dtstr}' as addtimes 
         from 
            (select 
                  '1' as data_type,
                  '月' as time_dim,                          
                  round(avg((cast(local_go_net as  bigint) +cast(network_go_net as  bigint))*100/(cast(local_go_net as  bigint) +cast(network_go_net as  bigint)+cast(matche_order_fail_cnt as  bigint))),2) current_period_avg  
        from  biuser.dwd_base_sall   where  dt>='${m_Last_report_day}'  and dt<'${vDay}' 
        )a     left  outer  join 
    (select 
          etldate,
          etldate_tag,
          current_period_avg,
          last_period_avg,
          difference_val,
          hb_val,
          data_type,
          time_dim
     from   biuser.temp_period_order_cnt  where dt='${m_Last_report_day}'  and datatype='1'  and  time_dim='月'
     ) b 
   on  a.data_type=b.data_type and a.time_dim=b.time_dim;
"



if [ $(date -d ''$vDay' -1 days'  +%d) -eq $(cal $m_report $y_report |xargs|awk '{print $NF}') ]  
then 
  echo '月初' 
    if [ $count_num = 1 ]
then 
 echo '是周末,是月初'
 EXE_HIVE  "${SQL}"
else 
 EXE_HIVE  "${SQL2}"
   fi
count_num=2
else   
  echo '非月初'
    false  
fi


echo $count_num


if [ $count_num = 0 ]
then 
 echo '不是周末,也不是月初'
else  



##      
CMD="/usr/local/mysql/bin/mysql -h 192.168.119.132 -uroot -pbi_hive -e \"delete from  biuser.temp_period_order_cnt where etldate='${vDay}' AND data_type='1';\""
EXESHELL

# 同步数据到 mysql
CMD="/home/hadoop/hadoop/sqoop-1.4.4/bin/sqoop export --connect jdbc:mysql://192.168.119.132:3306/bidata?characterEncoding=utf8 --username root --password xxxx --table temp_period_order_cnt --export-dir /user/hive/warehouse/biuser.db/temp_period_order_cnt/dt=${vDay}/datatype=1  --input-fields-terminated-by '|' --null-non-string '0' --null-string '0';"
EXESHELL

 
fi


 

 

周平均月平均设计

标签:

原文地址:http://blog.csdn.net/lili72/article/details/42882743

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!