标签:try 扩展 border 通过 source task sum fse replica
```
CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, ...)];
```
```
create database db_hive_01;
create database if not exists db_hive_02; --> 标准
create database if not exists db_hive_03
location ‘/user/weblog/hive/warehouse/db_hive_03.db‘;
```
show databases;
show databases like ‘db_hive*‘; --> 查看有几个数据库
desc database db_hive_01;
drop database db_hive_01;
drop database db_hive_01 cascade; --> 数据库中有表用cascade级联删除
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name -- (Note: TEMPORARY available in Hive 0.14.0 and later)
[(col_name data_type [COMMENT col_comment], ... [constraint_specification])]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
[SKEWED BY (col_name, col_name, ...) -- (Note: Available in Hive 0.10.0 and later)]
ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
[STORED AS DIRECTORIES]
[
[ROW FORMAT row_format]
[STORED AS file_format]
| STORED BY ‘storage.handler.class.name‘ [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later)
]
[LOCATION hdfs_path]
[TBLPROPERTIES (property_name=property_value, ...)] -- (Note: Available in Hive 0.6.0 and later)
[AS select_statement]; -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path];
data_type
: primitive_type
| array_type
| map_type
| struct_type
| union_type -- (Note: Available in Hive 0.7.0 and later)
primitive_type
: TINYINT
| SMALLINT
| INT
| BIGINT
| BOOLEAN
| FLOAT
| DOUBLE
| DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later)
| STRING
| BINARY -- (Note: Available in Hive 0.8.0 and later)
| TIMESTAMP -- (Note: Available in Hive 0.8.0 and later)
| DECIMAL -- (Note: Available in Hive 0.11.0 and later)
| DECIMAL(precision, scale) -- (Note: Available in Hive 0.13.0 and later)
| DATE -- (Note: Available in Hive 0.12.0 and later)
| VARCHAR -- (Note: Available in Hive 0.12.0 and later)
| CHAR -- (Note: Available in Hive 0.13.0 and later)
array_type
: ARRAY < data_type >
map_type
: MAP < primitive_type, data_type >
struct_type
: STRUCT < col_name : data_type [COMMENT col_comment], ...>
union_type
: UNIONTYPE < data_type, data_type, ... > -- (Note: Available in Hive 0.7.0 and later)
row_format
: DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
[MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
[NULL DEFINED AS char] -- (Note: Available in Hive 0.13 and later)
| SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
file_format:
: SEQUENCEFILE
| TEXTFILE -- (Default, depending on hive.default.fileformat configuration)
| RCFILE -- (Note: Available in Hive 0.6.0 and later)
| ORC -- (Note: Available in Hive 0.11.0 and later)
| PARQUET -- (Note: Available in Hive 0.13.0 and later)
| AVRO -- (Note: Available in Hive 0.14.0 and later)
| INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
constraint_specification:
: [, PRIMARY KEY (col_name, ...) DISABLE NOVALIDATE ]
[, CONSTRAINT constraint_name FOREIGN KEY (col_name, ...) REFERENCES table_name(col_name, ...) DISABLE NOVALIDATE
create table if not exists default.student(
sno string comment ‘学号‘,
sname string,
sex string,
sage int,
sdept string comment ‘所在班级‘
)
row format delimited fields TERMINATED by ‘ ‘
stored as textfile
location ‘/user/weblog/hive/warehouse/student‘;
-- 或者使用select创建
create table if not exists default.student_tmp
select sno,sname,sex,sage from default.student;
-- 或者使用like创建表
create table if not exists default.student_like
like default.student;
CREATE EXTERNAL TABLE page_view(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT ‘IP Address of the User‘,
country STRING COMMENT ‘country of origination‘)
COMMENT ‘This is the staging page view table‘
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘\054‘
STORED AS TEXTFILE
LOCATION ‘<hdfs_location>‘;
CREATE TABLE page_view(
viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT ‘IP Address of the User‘)
COMMENT ‘This is the page view table‘
PARTITIONED BY(dt STRING, country STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘\001‘
STORED AS SEQUENCEFILE;
--SEQUENCEFILE 存储为压缩序列文件
--当创建的表时未创建分区时,手动创建分区目录后,需更新元数据信息:
方式一(msck repair table table_name)
方式二(alter table table_name add partition( 分区字段="分区规则"))
例如:alter table dept add partition (day=‘20180330‘)
CREATE TABLE page_view(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT ‘IP Address of the User‘)
COMMENT ‘This is the page view table‘
PARTITIONED BY(dt STRING, country STRING)
CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘\001‘
COLLECTION ITEMS TERMINATED BY ‘\002‘
MAP KEYS TERMINATED BY ‘\003‘
STORED AS SEQUENCEFILE;
--员工表
create table if not exists defaule.emp(
empno int,
ename string,
job string,
mgr int,
hiredate string
sal double,
comm double,
deptno int
)
row format delimited fields terminated by ‘ ‘
stored as textfile;
--外部表
--在创建时可以自己制定目录位置(location)
--删除表时,只会删除元数据不会删除表数据
create external table if not exists defaule.emp_ext(
empno int,
ename string,
job string,
mgr int,
hiredate string
sal double,
comm double,
deptno int
)
row format delimited fields terminated by ‘ ‘
stored as textfile
location ‘/user/weblog/hive/warehouse/emp_ext‘;
--部门表
create table if not exists defaule.dept(
deptno int,
dname string,
loc string
)
row format delimited fields terminated by ‘ ‘
stored as textfile;
load data local inpath ‘/home/hadoop/hivedata/emp.txt‘ overwrite into table emp;
load data local inpath ‘/home/hadoop/hivedata/dept.txt‘ overwrite into table dept;
create table if not exists defaule.emp_cats as
select * from emp;
truncate table emp_cats;
alter table emp_cats rename to emp_cats_reanme;
drop table if exists emp_cats_rename ;
LOAD DATA [LOCAL] INPATH ‘filepath‘
[OVERWRITE] INTO TABLE tablename
[PARTITION (partcol1=val1, partcol2=val2 ...)]
分区表加载,特殊性
加载本地文件到hive表:
load data local inpath ‘/home/hadoop/datas/tmp.txt‘ into table default.tmp;
load data inpath ‘/user/weblog/datas/tmp.txt‘ into table default.tmp;
load data inpath ‘/user/weblog/datas/tmp.txt‘ overwrite into table default.tmp;
create table default.student_tmp like default.student;
insert into table default.student_tmp select * from default.student
insert overwrite local directory ‘/home/hadoop/hive_student‘
select * from default.student;
insert overwrite local directory ‘/home/hadoop/hive_student‘
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘ ‘ COLLECTION ITEMS TERMINATED BY ‘\n‘
select * from default.student;
hive -e ‘select * from default.student;‘ > /home/hadoop/hive_student/student.txt
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
FROM table_reference
[WHERE where_condition]
[GROUP BY col_list]
[ORDER BY col_list]
[CLUSTER BY col_list
| [DISTRIBUTE BY col_list] [SORT BY col_list]
]
[LIMIT [offset,] rows]
select count(*) cnt from emp;
select max(sal) max_sal from emp;
select min(sal) min_sal from emp;
select sum(sal) from emp;
select avg(sal) from emp;
分组:
每个部门的平均工资
select deptno,avg(sal) avg_sal from emp group by deptno ;
每个部门中每个岗位的最高薪水
select deptno,job,max(sal) max_sal from emp group by deptno,job;
每个部门的平均薪资大于2000的部门
select deptno,avg(sal) avg_sal from emp group by deptno having avg_sal > 2000;
导入语法:
IMPORT [[EXTERNAL] TABLE new_or_original_tablename [PARTITION (part_column="value"[, ...])]]
FROM ‘source_path‘
[LOCATION ‘import_target_path‘]
导出语法:
EXPORT TABLE tablename [PARTITION (part_column="value"[, ...])]
TO ‘export_target_path‘ [ FOR replication(‘eventid‘) ]
export_target_path:是指HDFS上的路径
导出表数据
export table defaule.emp to ‘/user/hive/export/emp_exp‘;
导入表数据
import table default.emp from ‘/user/hive/export/emp_exp‘
select * from emp order by empno desc;
实现reduce内部排序先要设置reduce task个数
set mapreduce.job.reduces=<number>
select * from emp sort by empno asc;
将结果集写入到本地文件中查看效果
insert overwrite local directory ‘/home/hadoop/hivedata/sortby‘ select * from emp sort by empno asc;
insert overwrite local directory ‘/home/hadoop/hivedata/distby‘ select * from emp distribute by deptno sort by empno asc;
注意事项:distribute by 必须要在 sort by 前面
insert overwrite local directory ‘/home/hadoop/hivedata/clusterby‘ select * from emp cluster by empno;
Hive自带了一些函数,比如:max/min等,但是数量有限,自己可以通过自定义UDF来方便的扩展。
UDF:用户自定义函数,允许用户扩展HiveQL功能;
package com.example.hive.udf;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
//将字符串转换为小写
public final class Lower extends UDF {
public Text evaluate(final Text s) {
if (s == null) { return null; }
return new Text(s.toString().toLowerCase());
}
}
//将编写好的UDF打成jar包
//把jar包加载进hive缓存
add jar /home/hadoop/demojar/hiveudf.jar
//注册成为函数(temporary 临时)
create temporary function my_lower as "com.example.hive.udf.Lower";
//0.13之后注册方法
CREATE FUNCTION my_lower AS ‘com.example.hive.udf.Lower‘ USING JAR ‘hdfs:///path/to/jar‘;
//测试结果
select ename,my_lower(ename) lowername from default.emp;
标签:try 扩展 border 通过 source task sum fse replica
原文地址:https://www.cnblogs.com/Marsm/p/68b9bfbb2c88e852f00c927cc2f08242.html