标签:over diff 排名函数 cost ble alt ntile ring split
1、窗口函数
窗口范围限定:
OVER()
CURRENT_ROW 当前行
N PRECEDING 向前N行
N FOLLOWING 向后N行
UNBOUNDED PRECEDING 起点
UNBOUNDED FOLLOWING 终点
order by[asc/desc] 有序
partition by 分组
? 建表:
?
create table if not exists business(
name string,
orderdate string,
cost int
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,‘;
load data local inpath ‘/home/atbwie/a.txt‘ overwrite into table business;
数据:
数据准备:name,orderdate,cost
jack,2017-01-01,10
tony,2017-01-02,15
jack,2017-02-03,23
tony,2017-01-04,29
jack,2017-01-05,46
jack,2017-04-06,42
tony,2017-01-07,50
jack,2017-01-08,55
mart,2017-04-08,62
mart,2017-04-09,68
neil,2017-05-10,12
mart,2017-04-11,75
neil,2017-06-12,80
mart,2017-04-13,94
?
select * from business;
函数:
LAG(col,n,default_value) 向前第n行,有序窗口
select *,lag(orderdate,1,null) over(partition by name order by orderdate ) as o1 from business
? LEAD(col,n,default_value) 向后第n行,有序窗口
select *,lag(orderdate,1,null) over(partition by name order by orderdate ) as o1 from business
?
NTILE(n) 将数据分成n组,有序窗口
select *,ntile(6) over(order by orderdate ) as o1 from business;
? percent_rank() 显示该条记录占窗口数据的百分比
select *,percent_rank() over(order by orderdate ) as o1 from business;
建表 加载数据
create table score(
name string,
subject string,
score int)
row format delimited fields terminated by ",";
load data local inpath ‘/home/atbwie/b.txt‘ into table score;
数据
孙悟空,数学,95
宋宋,数学,86
婷婷,数学,85
大海,数学,56
宋宋,英语,84
大海,英语,84
婷婷,英语,78
孙悟空,英语,68
大海,语文,94
孙悟空,语文,87
婷婷,语文,65
宋宋,语文,64
select * from score1;
2、排名函数
? RANK() 排序相同时会重复,总数不会变
select *,rank() over(partition by subject order by score desc) rp
from score1;
? DENSE_RANK() 排序相同时会重复,总数会减少
select *,dense_rank() over(partition by subject order by score desc) drp
from score1;
? ROW_NUMBER() 会根据顺序计算
select *,row_number() over(partition by subject order by score desc) rmp
from score1;
3、日期函数
CURRENT_DATE() 当前日期
select `current_date`();
?
DATE_ADD(start_date,num_days) 返回开始日期后n天的日期
select date_add(current_date,1);
? DATE_SUB(start_date,num_days) 返回开始日期前n天的日期
select date_sub(current_date,1);
? DATE_DIFF(date_1,date_2) 返回两个日期的差(天数)
select datediff(current_date,“2021-01-01”);
4、其他常用函数
空字段赋值:
NVL(col,defalut_value) 如果col为空,返回默认值
create table person_info(
name string,
constellation string,
blood_type string)
row format delimited fields terminated by "\t";
load data local inpath "/home/atbwie/b.txt" overwrite into table person_info;
select * from person_info;
数据
孙悟空 白羊座 A
大海 射手座 A
宋宋 白羊座 B
猪八戒 白羊座 A
凤姐 射手座 A
? 行转列:
? COLLECT_LIST() 可以重复
? COLLECT_SET() 去重
? 字符串切分:
? SPLIT(str,regex) 以正则将给定的字符串切分为Array
select t1.base,concat_ws(‘|‘, collect_set(t1.name)) name
from(select name, concat(constellation, ",", blood_type) base
from person_info) t1
group by t1.base;
? 列转行:
? EXPLODE(ARRAY)
create table movie_info(
movie string,
category string)
row format delimited fields terminated by "\t";
load data local inpath "/opt/module/datas/movie.txt" into table movie_info;
《疑犯追踪》 悬疑,动作,科幻,剧情
《Lie to me》 悬疑,警匪,动作,心理,剧情
《战狼2》 战争,动作,灾难
select m.movie,tbl.cate
from movie_info m
lateral view explode(split(category, ",")) tbl as cate;
UDTF:一进多出函数
UDAF:多进一出函数
UDF:一进一出函数
标签:over diff 排名函数 cost ble alt ntile ring split
原文地址:https://www.cnblogs.com/songjiadalao/p/14038730.html