2、函数

时间：2018-11-15 00:08:58 阅读：210 评论：0 收藏：0 [点我收藏+]

标签：trim timestamp 去重最小 set sele lse 日期 put

时间函数

当前日期

select current_date();
 2018-11-14

当前时间戳

select current_timestamp();
2018-11-14 21:35:16.237

date_format()

select date_format(current_date(),‘yyyyMMdd‘);
20181114 select date_format(current_timestamp(),‘yyyyMMdd‘);
20181114

unix_timestamp()

select unix_timestamp();
+-----------------------------------------------------------+--+
| unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss)  |
+-----------------------------------------------------------+--+
| 1542202845                                                |
+-----------------------------------------------------------+--+

from_unixtime()

select from_unixtime(unix_timestamp(),‘yyyyMMdd HH:mm:ss‘);
+---------------------------------------------------------------------------------------------
| from_unixtime(unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss), yyyyMMdd HH:mm:ss)  
+---------------------------------------------------------------------------------------------
| 20181114 21:43:14

日期差值

0: jdbc:hive2://s101:10000/lx> select datediff(‘2019-08-02‘,‘2019-08-06‘);
+---------------------------------------------------------------+--+
| datediff(CAST(2019-08-02 AS DATE), CAST(2019-08-06 AS DATE))  |
+---------------------------------------------------------------+--+
| -4                                                            |
+---------------------------------------------------------------+--+

字符串函数

split

select explode(split(‘hello‘,‘‘));

substr

select substr(‘hello‘,1,3);

trim去除前后空格

select trim(‘ hello ‘);

format_number

select format_number(1234.345,1);

concat

length

条件语句

窗口函数

lead

lead(input[, offset[, default]]) 上提
    input：     上提列
    offset： 上提行数，可选，默认是 1 行
    default：填充值，可选，默认是null
    使用：select id,name,lead(id,2,‘qq‘)over(partition by id order by id) lad from www;
    注意：2可选，不写默认1
          ‘qq’可选，不写默认null,类型需要对应，此处id是int，所以还是为null
          partition by id：可选，分组后对每个组进行lead
          order by id：必须写

lag

lag(input[, offset[, default]]) 下拉
    input：     下拉列
    offset： 下拉行数，可选，默认是 1 行
    default：填充值，可选，默认是null
    使用：select id,name,lag(id,2,11)over(partition by id order by id) lag from www;
    注意：2可选，不写默认1
          11可选，不写默认null
          partition by id：可选，分组后对每个组进行lag
          order by id：必须写

first_value

first_value(expr[, isIgnoreNull])
    expr：列名或一个表达式
    isIgnoreNull：true或false，如果是true将跳过null值，可选，默认false
    select id,name,first_value(concat(cast(id as string), name),true)over(partition by name order by id) lag from www;

last_value

select id,name,last_value(concat(cast(id as string), name),true)over(order by id ) lag from www;
+-----+-------+--------+--+
| id  | name  |  lag   |
+-----+-------+--------+--+
| 1   | a     | 1a     |
| 2   | b     | 2b     |
| 3   | c     | 3c     |
| 4   | c     | 4c     |
| 5   | c     | 5c     |
| 6   | d     | 6d     |
| 7   | b     | 7b     |
| 8   | a     | 8a     |
| 9   | a     | 9a     |
| 12  | eee   | 12eee  |
+-----+-------+--------+--+select id,name from www;
+-----+-------+--+
| id  | name  |
+-----+-------+--+
| 12  | eee   |
| 1   | a     |
| 2   | b     |
| 3   | c     |
| 4   | c     |
| 5   | c     |
| 6   | d     |
| 7   | b     |
| 8   | a     |
| 9   | a     |
+-----+-------+--+
select id,name,last_value(concat(cast(id as string), name),true)over() lag from www;
+-----+-------+------+--+
| id  | name  | lag  |
+-----+-------+------+--+
| 12  | eee   | 6d   |
| 7   | b     | 6d   |
| 8   | a     | 6d   |
| 9   | a     | 6d   |
| 1   | a     | 6d   |
| 2   | b     | 6d   |
| 3   | c     | 6d   |
| 4   | c     | 6d   |
| 5   | c     | 6d   |
| 6   | d     | 6d   |
+-----+-------+------+--+

over和标准聚合函数

select distinct name,count(name)over(partition by name) s from www;
求分区个数并去重
select distinct name,sum(id)over(partition by name) s from www;
分区id和并去重
select id,name,max(length(name))over() from www;
总体的最大长度
select name,min(id)over(partition by name) s from www;
每个分区最小id
select name,max(id)over(partition by name) s from www;
每个分区最大id
select name,avg(id)over(partition by name) s from www;
每个分区平均id

over和partiton by

over和partition by order by

select first_value(id)over(partition by id,name) from www;
select first_value(id)over(partition by id,name order by id,name) from www;

分析函数

rank

row_number

dense_rank

cume_dist

percent_rank

ntile

2、函数

标签：trim timestamp 去重最小 set sele lse 日期 put

原文地址：https://www.cnblogs.com/lybpy/p/9961049.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行