机器名 | 配置 | 角色 | 软件安装 |
hadoop1 | 4G内存,1核 | hadoop:NN/DN Spark:Master/worker | /app/hadoop/hadoop220
/app/hadoop/spark110
/app/scala2104
/usr/java/jdk1.7.0_21
|
hadoop2 | 4G内存,1核 | hadoop:DN Spark:worker hive0.13客户端 |
/app/hadoop/hadoop220
/app/hadoop/spark110
/app/hadoop/hive013
/app/scala2104
/usr/java/jdk1.7.0_21
|
hadoop3 | 4G内存,1核 | hadoop:DN Spark:worker hive0.13 metaserver service mysql server |
/app/hadoop/hadoop220
/app/hadoop/spark100
/app/hadoop/hive013
/app/scala2104
/usr/java/jdk1.7.0_21
MySQL5.6.12
|
wyy | 16G内存,4核 | client hive0.13客户端 |
/app/hadoop/hadoop220 /app/hadoop/spark110 /app/hadoop/hive013 |
./make-distribution.sh [--name] [--tgz] [--with-tachyon] <maven build options>参数的含义:
./make-distribution.sh --tgz --name 2.2.0 -Pyarn -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl -Phive最后生成部署包spark-1.1.0-bin-2.2.0.tgz,按照测试环境的规划进行安装。
CREATE DATABASE SALEDATA; use SALEDATA; //Date.txt文件定义了日期的分类,将每天分别赋予所属的月份、星期、季度等属性 //日期,年月,年,月,日,周几,第几周,季度,旬、半月 CREATE TABLE tblDate(dateID string,theyearmonth string,theyear string,themonth string,thedate string,theweek string,theweeks string,thequot string,thetenday string,thehalfmonth string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' ; //Stock.txt文件定义了订单表头 //订单号,交易位置,交易日期 CREATE TABLE tblStock(ordernumber string,locationid string,dateID string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' ; //StockDetail.txt文件定义了订单明细 //订单号,行号,货品,数量,金额 CREATE TABLE tblStockDetail(ordernumber STRING,rownum int,itemid string,qty int,price int,amount int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' ; //装载数据 LOAD DATA LOCAL INPATH '/home/mmicky/mboo/MyClass/doc/sparkSQL/data/Date.txt' INTO TABLE tblDate; LOAD DATA LOCAL INPATH '/home/mmicky/mboo/MyClass/doc/sparkSQL/data/Stock.txt' INTO TABLE tblStock; LOAD DATA LOCAL INPATH '/home/mmicky/mboo/MyClass/doc/sparkSQL/data/StockDetail.txt' INTO TABLE tblStockDetail;最终在HDFS可以看到相关的数据:
原文地址:http://blog.csdn.net/book_mmicky/article/details/39176975