标签:
十七、分段维度Segment Name | Band Name | Start Value | End Value |
PROJECT ALPHA | Bottom | 0.01 | 2500.00 |
PROJECT ALPHA | Low | 2500.01 | 3000.00 |
PROJECT ALPHA | Mid-low | 3000.01 | 4000.00 |
PROJECT ALPHA | Mid | 4000.01 | 5500.00 |
PROJECT ALPHA | Mid-high | 5500.01 | 6500.00 |
PROJECT ALPHA | Top | 6500.01 | 99999999.99 |
Grid | LOW | 0.01 | 3000.00 |
Grid | MED | 3000.01 | 6000.00 |
Grid | HIGH | 6000.01 | 99999999.99 |
use dw; create table annual_order_segment_dim ( segment_sk int, segment_name varchar(30), band_name varchar(50), band_start_amount decimal(10,2), band_end_amount decimal(10,2), version int, effective_date date, expiry_date date ) clustered by (segment_sk) into 8 buckets stored as orc tblproperties (‘transactional‘=‘true‘); insert into annual_order_segment_dim values (1, ‘project alpha‘, ‘bottom‘, 0.01, 2500.00, 1, ‘1900-01-01‘, ‘2200-01-01‘); insert into annual_order_segment_dim values (2, ‘project alpha‘, ‘low‘, 2500.01, 3000.00, 1, ‘1900-01-01‘, ‘2200-01-01‘); insert into annual_order_segment_dim values (3, ‘project alpha‘, ‘mid-low‘, 3000.01, 4000.00, 1, ‘1900-01-01‘, ‘2200-01-01‘); insert into annual_order_segment_dim values (4, ‘project alpha‘, ‘mid‘, 4000.01, 5500.00, 1, ‘1900-01-01‘, ‘2200-01-01‘); insert into annual_order_segment_dim values (5, ‘project alpha‘, ‘mid_high‘, 5500.01, 6500.00, 1, ‘1900-01-01‘, ‘2200-01-01‘); insert into annual_order_segment_dim values (6, ‘project alpha‘, ‘top‘, 6500.01, 99999999.99, 1, ‘ 1900-01-01‘, ‘2200-01-01‘); insert into annual_order_segment_dim values (7, ‘grid‘, ‘low‘, 0.01, 3000, 1, ‘1900-01-01‘, ‘2200-01-01‘); insert into annual_order_segment_dim values (8, ‘grid‘, ‘med‘, 3000.01, 6000.00, 1, ‘ 1900-01-01‘, ‘2200-01-01‘); insert into annual_order_segment_dim values (9, ‘grid‘, ‘high‘, 6000.01, 99999999.99, 1, ‘1900-01-01‘, ‘2200-01-01‘); create table year_dim ( year_sk int, year int ); create table annual_sales_order_fact ( customer_sk int, year_sk int, annual_order_amount decimal(10, 2) ); create table annual_customer_segment_fact ( segment_sk int, customer_sk int, year_sk int );
use dw; insert into year_dim select row_number() over (order by t1.year) + t2.sk_max, year from (select distinct year year from order_date_dim) t1 cross join (select coalesce(max(year_sk),0) sk_max from year_dim) t2; insert into annual_sales_order_fact select a.customer_sk, year_sk, sum(order_amount) from sales_order_fact a, year_dim c, order_date_dim d where a.order_date_sk = d.order_date_sk and c.year = d.year and d.year < 2017 group by a.customer_sk, c.year_sk; insert into annual_customer_segment_fact select d.segment_sk, a.customer_sk, a.year_sk from annual_sales_order_fact a, annual_order_segment_dim d where annual_order_amount >= band_start_amount and annual_order_amount <= band_end_amount;执行初始装载脚本,查询annual_customer_segment_fact表确认初始装载是成功的。
select a.customer_sk csk, a.year_sk ysk, annual_order_amount amt, segment_name sn, band_name bn from annual_customer_segment_fact a, annual_order_segment_dim b, year_dim c, annual_sales_order_fact d where a.segment_sk = b.segment_sk and a.year_sk = c.year_sk and a.customer_sk = d.customer_sk and a.year_sk = d.year_sk cluster by csk, ysk, sn, bn;查询结果如下图所示。
use dw; insert into annual_sales_order_fact select a.customer_sk, year_sk, sum(order_amount) from sales_order_fact a, year_dim c, order_date_dim d where a.order_date_sk = d.order_date_sk and c.year = d.year and d.year = year(current_date) - 1 group by a.customer_sk, c.year_sk; insert into annual_customer_segment_fact select d.segment_sk, a.customer_sk, c.year_sk from annual_sales_order_fact a, year_dim c, annual_order_segment_dim d where a.year_sk = c.year_sk and c.year = year(current_date) - 1 and annual_order_amount >= band_start_amount and annual_order_amount <= band_end_amount;
基于hadoop生态圈的数据仓库实践 —— 进阶技术(十七)
标签:
原文地址:http://blog.csdn.net/wzy0623/article/details/52184376