hive基本操作

时间：2019-06-14 18:15:12 阅读：93 评论：0 收藏：0 [点我收藏+]

标签：class sele ted 名称匹配 one sel clust col

一、创建分区分桶表

//clustered by (pnl_id) into 40 buckets 分成40个桶，动态分区如果分区列值太多，造成文件数过多引起系统崩溃等问题，因此可以对该列分桶，会根据hash值分桶。
create table test(
pnl_id string,
event_timekey string
)
partitioned by (factory string)
clustered by (pnl_id) into 40 buckets
row format serde ‘org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe‘
with serdeproperties (
‘field.delim‘=‘\t‘,
‘escape.delim‘=‘\n‘,
‘serialization.null.format‘=‘NULL‘,
‘serialization.encoding‘=‘UTF-8‘
) 
stored as parquet;

//select 源表 分区列跟据位置匹配，并不是根据名称
insert into test partition(factory) select pnl_id,event_timekey,factory as fac from dwr_pnl_hist where shift_timekey=‘20190610 180000‘;

set hive.exec.dynamic.partition=true;//开启动态分区
set hive.exec.dynamic.partition.mode=nonstrict;//非严格的分区模式 开启之后才能实现insert动态分区
set hive.exec.max.dynamic.partitions.pernode=100;//每个mapper或reducer可以创建的最大动态分区个数
set hive.exec.max.dynamic.partitions=1000//一个动态分区语句可创建动态分区个数
set hive.exec.max.created.files=100000//全局可创建最多文件个数

set hive.enforce.bucketing=true;//开启强制分桶

二、常用查询命令

//sort by 每个reduce内部排序 order by 数据通过一个reduce全局排序
select * from test sort by event_timekey desc limit 10;

hive基本操作

标签：class sele ted 名称匹配 one sel clust col

原文地址：https://www.cnblogs.com/csyusu/p/11024501.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行