输入 hive;进入hive环境
输入 quit;退出hive环境
show databases;
create database mng;
use mng;
成功显示如下:
show tables;
l 数据类型对比
NUMBER(14) -- BIGINT
NUMBER(m,n) – DOUBLE
VARCHAR2 -- STRING
NUMBER(1) -- TINYINT
l 表只保留一级分区;使用数据时间做分区
l 表分区保留多级分区
l 去掉default和not null声明
l 分区字段的数据不需要写入表文件
l insert数据时,hive默认用\N表示null值,需要改变这个设置,使用‘‘表示null值
alter table tablename set serdeproperties(‘serialization.null.format‘ = ‘‘);
l 表存储设置有两种定义方式,推荐用方式2
表定义1:
create table TEST
(
cell_id string,
user_cnt bigint,
hcsv_cnt bigint,
arpu double,
call_dur_sum double,
sms_cnt_sum bigint,
data_vol_sum double,
erl double
)
partitioned by (data_date bigint)
row format delimited
fields terminated by ‘|‘
collection items terminated by ‘,‘
map keys terminated by ‘-‘
lines terminated by ‘\n‘;
alter table mng_av_in_m set serdeproperties(‘serialization.null.format‘ = ‘‘);
表定义2:
create table TEST
(
cell_id string,
user_cnt bigint,
hcsv_cnt bigint,
arpu double,
call_dur_sum double,
sms_cnt_sum bigint,
data_vol_sum double,
erl double
)
row format serde ‘org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe‘ with serdeproperties
(
‘escape.delim‘=‘//‘,
‘mapkey.delim‘=‘-‘,
‘serialization.format‘=‘|‘,
‘colelction.delim‘=‘,‘,
‘field.delim‘=‘|‘,
‘line.delim‘=‘\n‘,
‘serialization.null.format‘=‘‘
)
stored as textfile;
load data local inpath ‘文件路径‘ overwrite into table TABLE_NAME partition (DATA_DATE=‘20130101000000‘);
备注:分区字段的数据不需要写入表文件
desc formatted tablename;
dfs -ls 文件路径
dfs –cat 文件路径
truncate table TABLE_NAME partition (sum_start_dttm=20131001000000);
ALTER TABLE TABLE_NAME DROP PARTITION (sum_start_dttm = 20131001000000);
原文地址:http://blog.csdn.net/mygrowth/article/details/26082801