标签:
十七、分段维度Segment Name | Band Name | Start Value | End Value |
PROJECT ALPHA | Bottom | 0.01 | 2500.00 |
PROJECT ALPHA | Low | 2500.01 | 3000.00 |
PROJECT ALPHA | Mid-low | 3000.01 | 4000.00 |
PROJECT ALPHA | Mid | 4000.01 | 5500.00 |
PROJECT ALPHA | Mid-high | 5500.01 | 6500.00 |
PROJECT ALPHA | Top | 6500.01 | 99999999.99 |
Grid | LOW | 0.01 | 3000.00 |
Grid | MED | 3000.01 | 6000.00 |
Grid | HIGH | 6000.01 | 99999999.99 |
use dw;
create table annual_order_segment_dim (
segment_sk int,
segment_name varchar(30),
band_name varchar(50),
band_start_amount decimal(10,2),
band_end_amount decimal(10,2),
version int,
effective_date date,
expiry_date date
)
clustered by (segment_sk) into 8 buckets
stored as orc tblproperties (‘transactional‘=‘true‘);
insert into annual_order_segment_dim values (1, ‘project alpha‘, ‘bottom‘, 0.01, 2500.00, 1, ‘1900-01-01‘, ‘2200-01-01‘);
insert into annual_order_segment_dim values (2, ‘project alpha‘, ‘low‘, 2500.01, 3000.00, 1, ‘1900-01-01‘, ‘2200-01-01‘);
insert into annual_order_segment_dim values (3, ‘project alpha‘, ‘mid-low‘, 3000.01, 4000.00, 1, ‘1900-01-01‘, ‘2200-01-01‘);
insert into annual_order_segment_dim values (4, ‘project alpha‘, ‘mid‘, 4000.01, 5500.00, 1, ‘1900-01-01‘, ‘2200-01-01‘);
insert into annual_order_segment_dim values (5, ‘project alpha‘, ‘mid_high‘, 5500.01, 6500.00, 1, ‘1900-01-01‘, ‘2200-01-01‘);
insert into annual_order_segment_dim values (6, ‘project alpha‘, ‘top‘, 6500.01, 99999999.99, 1, ‘ 1900-01-01‘, ‘2200-01-01‘);
insert into annual_order_segment_dim values (7, ‘grid‘, ‘low‘, 0.01, 3000, 1, ‘1900-01-01‘, ‘2200-01-01‘);
insert into annual_order_segment_dim values (8, ‘grid‘, ‘med‘, 3000.01, 6000.00, 1, ‘ 1900-01-01‘, ‘2200-01-01‘);
insert into annual_order_segment_dim values (9, ‘grid‘, ‘high‘, 6000.01, 99999999.99, 1, ‘1900-01-01‘, ‘2200-01-01‘);
create table year_dim (
year_sk int,
year int
);
create table annual_sales_order_fact (
customer_sk int,
year_sk int,
annual_order_amount decimal(10, 2)
);
create table annual_customer_segment_fact (
segment_sk int,
customer_sk int,
year_sk int
);use dw;
insert into year_dim
select row_number() over (order by t1.year) + t2.sk_max, year
from (select distinct year year from order_date_dim) t1
cross join (select coalesce(max(year_sk),0) sk_max from year_dim) t2;
insert into annual_sales_order_fact
select a.customer_sk,
year_sk,
sum(order_amount)
from sales_order_fact a,
year_dim c,
order_date_dim d
where a.order_date_sk = d.order_date_sk
and c.year = d.year
and d.year < 2017
group by a.customer_sk, c.year_sk;
insert into annual_customer_segment_fact
select d.segment_sk,
a.customer_sk,
a.year_sk
from annual_sales_order_fact a,
annual_order_segment_dim d
where annual_order_amount >= band_start_amount
and annual_order_amount <= band_end_amount; 执行初始装载脚本,查询annual_customer_segment_fact表确认初始装载是成功的。select a.customer_sk csk,
a.year_sk ysk,
annual_order_amount amt,
segment_name sn,
band_name bn
from annual_customer_segment_fact a,
annual_order_segment_dim b,
year_dim c,
annual_sales_order_fact d
where a.segment_sk = b.segment_sk
and a.year_sk = c.year_sk
and a.customer_sk = d.customer_sk
and a.year_sk = d.year_sk
cluster by csk, ysk, sn, bn; 查询结果如下图所示。use dw;
insert into annual_sales_order_fact
select a.customer_sk,
year_sk,
sum(order_amount)
from sales_order_fact a,
year_dim c,
order_date_dim d
where a.order_date_sk = d.order_date_sk
and c.year = d.year
and d.year = year(current_date) - 1
group by a.customer_sk, c.year_sk;
insert into annual_customer_segment_fact
select d.segment_sk,
a.customer_sk,
c.year_sk
from annual_sales_order_fact a,
year_dim c,
annual_order_segment_dim d
where a.year_sk = c.year_sk
and c.year = year(current_date) - 1
and annual_order_amount >= band_start_amount
and annual_order_amount <= band_end_amount;基于hadoop生态圈的数据仓库实践 —— 进阶技术(十七)
标签:
原文地址:http://blog.csdn.net/wzy0623/article/details/52184376