码迷,mamicode.com
首页 > 其他好文 > 详细

拉链表

时间:2019-05-09 21:39:20      阅读:181      评论:0      收藏:0      [点我收藏+]

标签:create   select   union   use   red   dynamic   mod   start   inner   

1.数据准备

create table sospdm.tmp_ods_user
(
     cust_num   string comment 会员编码
    ,mbl_phone  string comment 会员手机号
)partitioned by (statis_date string comment 统计时间)
stored as rcfile 
;

set hive.exec.dynamic.partition=true;   
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table sospdm.tmp_ods_user partition (statis_date)
select 001 as cust_num,111111 as mbl_phone,20170101 as statis_date from sospdm.dual union all
select 002 as cust_num,222222 as mbl_phone,20170101 as statis_date from sospdm.dual union all
select 003 as cust_num,333333 as mbl_phone,20170101 as statis_date from sospdm.dual union all
select 004 as cust_num,444444 as mbl_phone,20170101 as statis_date from sospdm.dual union all

select 001 as cust_num,111111 as mbl_phone,20170102 as statis_date from sospdm.dual union all
select 002 as cust_num,233333 as mbl_phone,20170102 as statis_date from sospdm.dual union all
select 003 as cust_num,333333 as mbl_phone,20170102 as statis_date from sospdm.dual union all
select 004 as cust_num,432432 as mbl_phone,20170102 as statis_date from sospdm.dual union all
select 005 as cust_num,555555 as mbl_phone,20170102 as statis_date from sospdm.dual union all

select 001 as cust_num,111111 as mbl_phone,20170103 as statis_date from sospdm.dual union all
select 002 as cust_num,233333 as mbl_phone,20170103 as statis_date from sospdm.dual union all
select 003 as cust_num,333333 as mbl_phone,20170103 as statis_date from sospdm.dual union all
select 004 as cust_num,654321 as mbl_phone,20170103 as statis_date from sospdm.dual union all
select 005 as cust_num,115115 as mbl_phone,20170103 as statis_date from sospdm.dual union all
select 006 as cust_num,666666 as mbl_phone,20170103 as statis_date from sospdm.dual union all

select 001 as cust_num,111111 as mbl_phone,20170104 as statis_date from sospdm.dual union all
select 002 as cust_num,233333 as mbl_phone,20170104 as statis_date from sospdm.dual union all
select 003 as cust_num,333333 as mbl_phone,20170104 as statis_date from sospdm.dual union all
select 004 as cust_num,654321 as mbl_phone,20170104 as statis_date from sospdm.dual union all
select 005 as cust_num,115115 as mbl_phone,20170104 as statis_date from sospdm.dual union all
select 006 as cust_num,666767 as mbl_phone,20170104 as statis_date from sospdm.dual;

2.拉链逻辑

-- 全量拉链
-- 初始化拉链表
create table sospdm.tmp_dwd_his
(
     cust_num string comment 会员编码
    ,mbl_phone string comment 手机号
    ,start_date string comment 开始时间
)partitioned by (end_date string comment 结束时间)
;

insert overwrite table sospdm.tmp_dwd_his partition(end_date)
select
     cust_num
    ,mbl_phone
    ,20170101 as statis_date
    ,99991231 as end_date
from tmp_ods_user where statis_date=20170101
;

-- 取开链数据与今日全量数据对比 找出变化的

-- 字段改变或者不变的
insert overwrite table sospdm.tmp_dwd_his partition(end_date)
select 
     t1.cust_num
    ,t1.mbl_phone
    ,${statis_date} as start_date
    ,99991231 as end_date
from
(
    select
         cust_num
        ,mbl_phone
    from tmp_ods_user where statis_date=${statis_date}
) t1
left join 
(
    select
         cust_num
        ,mbl_phone
    from tmp_dwd_his where start_date<=${statis_date} and end_date>=${statis_date}
) t2 
on t1.cust_num=t2.cust_num
where t2.cust_num is null -- 新增
or t1.mbl_phone <> t2.mbl_phone -- 改变

union all 

select 
     t1.cust_num
    ,t1.mbl_phone
    ,start_date
    ,${statis_date} as end_date   -- 改变的进行闭链
from
(
    select
         cust_num
        ,mbl_phone
    from tmp_ods_user where statis_date=${statis_date}
) t1 
inner join 
(
    select
         cust_num
        ,mbl_phone
        ,start_date
        ,end_date
    from tmp_dwd_his where start_date<=${statis_date} and end_date>=${statis_date}
) t2 
on t1.cust_num=t2.cust_num  
where t1.mbl_phone <> t2.mbl_phone-- 改变

union all 

select 
     t1.cust_num
    ,t1.mbl_phone
    ,start_date
    ,end_date
from
(
    select
         cust_num
        ,mbl_phone
    from tmp_ods_user where statis_date=${statis_date}
) t1 
inner join 
(
    select
         cust_num
        ,mbl_phone
        ,start_date
        ,end_date
    from tmp_dwd_his where start_date<=${statis_date} and end_date>=${statis_date}
) t2 
on t1.cust_num=t2.cust_num and t1.mbl_phone = t2.mbl_phone -- 不变
;

3.拉链回滚

-- 回滚
-- 回滚日期之前的闭链 还是闭链

select 
     cust_num
    ,mbl_phone
    ,start_date
    ,end_date
from sospdm.tmp_dwd_his where end_date < 20170103
--回滚日期之前的开链
union all 
select 
     cust_num
    ,mbl_phone
    ,start_date
    ,99991231 as end_date
from sospdm.tmp_dwd_his where end_date >= 20170103
;

 

拉链表

标签:create   select   union   use   red   dynamic   mod   start   inner   

原文地址:https://www.cnblogs.com/yin-fei/p/10840986.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!