码迷,mamicode.com
首页 > 其他好文 > 详细

数据操作

时间:2018-10-27 00:12:36      阅读:171      评论:0      收藏:0      [点我收藏+]

标签:ace   case when   nta   RKE   生成   not   ring   dfs   enforce   

insert into table my_employee  -- 将其它表中的数据添加到当前表
select * from ctas_employee;

-- 使用CTE插入数据
with a as (select * from ctas_employee) -- 创建临时表a
insert overwrite table my_employee  -- 将a表中的数据导入到my_employee中
select * from a;
 
-- 插入到多张表中,此操作只扫描一次源表
from ctas_employee
insert overwrite table my_employee
select *
insert overwrite table employee
select *;

-- 开启动态分区
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;

-- 将数据插入到文件系统
set hive.insert.into.multilevel.dirs=true;  -- 开启多层级目录插入
insert overwrite local directory ‘output1‘ -- 插入到本地目录中 ./output1
select * from employee;
$ hive -e ‘select * from employee‘ >> output/000000_0   -- 追加数据到本地文件中
$ hive -e ‘select * from employee‘ > output/000000_0   -- 覆盖本地文件
$ hive -e ‘select * from employee‘|hdfs dfs -appendToFile /hdfs/hive/output/employee.txt -- 添加到hdfs文件中
$ hive -e ‘select * from employee‘|hdfs dfs -put -f /hdfs/hive/output/employee.txt. -- 覆盖hdfs文件

-- 将metadata和data导出到hdfs上
export table employee to "output3";  -- (内或外)表数据导出
export table partition_employee partition(year=2017, month=7) to "output4"; -- 分区表数据导出
import from "output3"; -- 导入数据并创建同名的表
-- 将数据导入到(内部或分区)表中 
import table imported_employee from "output3";  
-- 将数据导入到外部表,并指定目标路径
import external table imported_external_employee from "/user/centos/output3"
location "/hdfs/hive/output4";

-- order by(asc| desc) 全局排序,使用一个reducer,效率低
select gender_age from employee_id order by gender_age desc;

 
-- sort by(asc| desc) 只保证每个reducer的输出有序 
set mapred.reduce.tasks = 2; 
select gender_age from employee_id sort by gender_age desc;
set mapred.reduce.tasks = 1; -- 全局排序

-- distribute by  mapper端预分组,类似于Combiner
select name, gender_age from employee_id
distribute by gender_age.age;  -- 分发列必须出现在select列表中

-- sort by, distribute by混合使用
select employee_id from employee_id
distribute by employee_id
sort by employee_id;
<==>
-- cluster by 与reducer的个数有关。
select employee_id from employee_id
cluster by employee_id;

5.4 操作符和函数
show functions;
describe function <function_name>;
describe function extended <function_name>;

-- 复杂数据类型的函数
select size(work_place) as array_size from employee;
select array_contains(work_place, "Toronto") as isToronto,
sort_array(work_place) as sorted_array
from employee;
-- 日期函数
select from_unixtime(unix_timestamp()) as current_time   -- 将时间戳转化为日期
from employee limit 1;  

select name, start_date from employee_hr
order by unix_timestamp(start_date, "yy-MM-dd");  -- 将指定格式的日期转化为时间戳

toDate(): 从日期—时间列中移除时间

-- case: then或else后的数据类型可以是不同的
select 
case when 1 is null then "true" else 0 end
as case_result from employee limit 1; 

-- 解析器和搜索工具
insert into table employee
select "Steven" as name, array(null) as work_place,
named_struct("gender", "Male", "age", 30) as gender_age,
map("Pyphon", 90) as skills_score, 
map("R&D", array("Developer")) as apart_title
from employee limit 1;  -- 向表中插入单条数据

-- 侧视图 忽略explore函数返回null的行
select name, workplace, skills, score 
from employee
lateral view explode(work_place) wp as workplace  -- explode:压出map或array类型的字段
lateral view explode(skills_score) sc as skills, score;  

-- 外侧视图 保留explore函数返回null的行
--reverse(str), split(str, regex)
select reverse(split(reverse("/usr/centos/employee.txt"), "/")[0]) as filename
from employee limit 1;
-- collect_set, collect_list根据每行返回来的元素生成一个集合,前者去重,后者不去重 
select collect_set(name) as names   -- 将name信息放入集合中
from employee;

--虚拟列 INPUT_FILE_NAME 有问题?
select INPUT_FILE_NAME, BLOCK__OFFSET__INSIDE__FILE  -- error
from employee_id_buckets;

select block__offset__inside__file from partition_employee;
select input_file_name from partition_employee;

-- 其它函数 
select work_place, isnull(work_place) as is_null, isnotnull(work_place) as is_not_null    -- 两函数失效
from employee;

select assert_true(employee.work_place is null)  -- Throw an exception if ‘condition‘ is false.
from employee
where name = "Steven";

-- elt(n, str1, str2, ...),returns the n-th string
select elt(1, "hello", "hadoop", "word" ) 
from employee limit 1;

select current_database();    -- 返回当前所在的数据库名称

-- 开启事务,适用于ORCfile和buketed table
SET hive.support.concurrency = true; 
SET hive.enforce.bucketing = true; 
SET hive.exec.dynamic.partition.mode = nonstrict; 
SET hive.txn.manager = org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; 
SET hive.compactor.initiator.on = true; 
SET hive.compactor.worker.threads = 1;


数据操作

标签:ace   case when   nta   RKE   生成   not   ring   dfs   enforce   

原文地址:https://www.cnblogs.com/StephenMeng/p/9858822.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!