标签:ace case when nta RKE 生成 not ring dfs enforce
insert into table my_employee -- 将其它表中的数据添加到当前表
select * from ctas_employee;
-- 使用CTE插入数据
with a as (select * from ctas_employee) -- 创建临时表a
insert overwrite table my_employee -- 将a表中的数据导入到my_employee中
select * from a;
-- 插入到多张表中,此操作只扫描一次源表
from ctas_employee
insert overwrite table my_employee
select *
insert overwrite table employee
select *;
-- 开启动态分区
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
-- 将数据插入到文件系统
set hive.insert.into.multilevel.dirs=true; -- 开启多层级目录插入
insert overwrite local directory ‘output1‘ -- 插入到本地目录中 ./output1
select * from employee;
$ hive -e ‘select * from employee‘ >> output/000000_0 -- 追加数据到本地文件中
$ hive -e ‘select * from employee‘ > output/000000_0 -- 覆盖本地文件
$ hive -e ‘select * from employee‘|hdfs dfs -appendToFile /hdfs/hive/output/employee.txt -- 添加到hdfs文件中
$ hive -e ‘select * from employee‘|hdfs dfs -put -f /hdfs/hive/output/employee.txt. -- 覆盖hdfs文件
-- 将metadata和data导出到hdfs上
export table employee to "output3"; -- (内或外)表数据导出
export table partition_employee partition(year=2017, month=7) to "output4"; -- 分区表数据导出
import from "output3"; -- 导入数据并创建同名的表
-- 将数据导入到(内部或分区)表中
import table imported_employee from "output3";
-- 将数据导入到外部表,并指定目标路径
import external table imported_external_employee from "/user/centos/output3"
location "/hdfs/hive/output4";
-- order by(asc| desc) 全局排序,使用一个reducer,效率低
select gender_age from employee_id order by gender_age desc;
-- sort by(asc| desc) 只保证每个reducer的输出有序
set mapred.reduce.tasks = 2;
select gender_age from employee_id sort by gender_age desc;
set mapred.reduce.tasks = 1; -- 全局排序
-- distribute by mapper端预分组,类似于Combiner
select name, gender_age from employee_id
distribute by gender_age.age; -- 分发列必须出现在select列表中
-- sort by, distribute by混合使用
select employee_id from employee_id
distribute by employee_id
sort by employee_id;
<==>
-- cluster by 与reducer的个数有关。
select employee_id from employee_id
cluster by employee_id;
5.4 操作符和函数
show functions;
describe function <function_name>;
describe function extended <function_name>;
-- 复杂数据类型的函数
select size(work_place) as array_size from employee;
select array_contains(work_place, "Toronto") as isToronto,
sort_array(work_place) as sorted_array
from employee;
-- 日期函数
select from_unixtime(unix_timestamp()) as current_time -- 将时间戳转化为日期
from employee limit 1;
select name, start_date from employee_hr
order by unix_timestamp(start_date, "yy-MM-dd"); -- 将指定格式的日期转化为时间戳
toDate(): 从日期—时间列中移除时间
-- case: then或else后的数据类型可以是不同的
select
case when 1 is null then "true" else 0 end
as case_result from employee limit 1;
-- 解析器和搜索工具
insert into table employee
select "Steven" as name, array(null) as work_place,
named_struct("gender", "Male", "age", 30) as gender_age,
map("Pyphon", 90) as skills_score,
map("R&D", array("Developer")) as apart_title
from employee limit 1; -- 向表中插入单条数据
-- 侧视图 忽略explore函数返回null的行
select name, workplace, skills, score
from employee
lateral view explode(work_place) wp as workplace -- explode:压出map或array类型的字段
lateral view explode(skills_score) sc as skills, score;
-- 外侧视图 保留explore函数返回null的行
--reverse(str), split(str, regex)
select reverse(split(reverse("/usr/centos/employee.txt"), "/")[0]) as filename
from employee limit 1;
-- collect_set, collect_list根据每行返回来的元素生成一个集合,前者去重,后者不去重
select collect_set(name) as names -- 将name信息放入集合中
from employee;
--虚拟列 INPUT_FILE_NAME 有问题?
select INPUT_FILE_NAME, BLOCK__OFFSET__INSIDE__FILE -- error
from employee_id_buckets;
select block__offset__inside__file from partition_employee;
select input_file_name from partition_employee;
-- 其它函数
select work_place, isnull(work_place) as is_null, isnotnull(work_place) as is_not_null -- 两函数失效
from employee;
select assert_true(employee.work_place is null) -- Throw an exception if ‘condition‘ is false.
from employee
where name = "Steven";
-- elt(n, str1, str2, ...),returns the n-th string
select elt(1, "hello", "hadoop", "word" )
from employee limit 1;
select current_database(); -- 返回当前所在的数据库名称
-- 开启事务,适用于ORCfile和buketed table
SET hive.support.concurrency = true;
SET hive.enforce.bucketing = true;
SET hive.exec.dynamic.partition.mode = nonstrict;
SET hive.txn.manager = org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
SET hive.compactor.initiator.on = true;
SET hive.compactor.worker.threads = 1;
标签:ace case when nta RKE 生成 not ring dfs enforce
原文地址:https://www.cnblogs.com/StephenMeng/p/9858822.html