码迷,mamicode.com
首页 > 其他好文 > 详细

hbase 结合MapReduce 批量导入

时间:2014-12-15 09:06:45      阅读:236      评论:0      收藏:0      [点我收藏+]

标签:hbase   mapreduce   批量导入   

hbase结合Mapreduce的批量导入:

直接给出代码讲述:(具体操作结合代码中的注释)

package hbase;

import java.io.IOException;
import java.net.URISyntaxException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

public class hbaseApp {
	/**
	 * @user XD 基本思路:先创建表 --> 书写MapReduce批量导入
	 */
	static enum Num{
		exNum
	}
	//创建表
	@SuppressWarnings("deprecation")
	public static void createTable() throws MasterNotRunningException, ZooKeeperConnectionException, IOException{
		//配置 必须书写
		Configuration conf = HBaseConfiguration.create();
		String tableName = "wlan";		//表名
		String family_name = "content";			//列族
		conf.set("hbase.rootdir", "hdfs://localhost:9000/hbase");
		conf.set("hbase.zookeeper.quorum","localhost");
		final HBaseAdmin hbaseAdmin = new HBaseAdmin(conf);
		if(!hbaseAdmin.tableExists(tableName)){
			HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
			HColumnDescriptor family = new HColumnDescriptor(family_name);
			tableDescriptor.addFamily(family);
			hbaseAdmin.createTable(tableDescriptor);
		}
	}
	//导入的文件
	static final String INPUT_PATH = "hdfs://localhost:9000/input1/wlan";
	
	public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
		hbaseApp.createTable();
		final	Configuration conf = new Configuration();
		conf.set("hbase.rootdir", "hdfs://localhost:9000/hbase");
		conf.set("hbase.zookeeper.quorum","localhost");
		//表名
		conf.set(TableOutputFormat.OUTPUT_TABLE,"wlan");
		conf.set("dfs.socket.timeout", "180000");
		
		Job job = new Job(conf,hbaseApp.class.getSimpleName());
		FileInputFormat.setInputPaths(job, INPUT_PATH);
		job.setMapperClass(Map.class);
		
		job.setMapOutputKeyClass(LongWritable.class);
		job.setMapOutputValueClass(Text.class);
		job.setJarByClass(hbaseApp.class);
		job.setReducerClass(Reduce.class);
		
		//直接创建表 和 导入数据 到hbase里面 所以不需要指定 输出文件路径 输出reducer类型
		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass( TableOutputFormat.class);
		
		job.waitForCompletion(true);
	}
	static class Map extends Mapper <LongWritable , Text , LongWritable , Text >{
		//时间格式
		SimpleDateFormat format1 = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
		private Text v2 = new Text();
		
		protected void map(LongWritable key , Text value , Context context) throws IOException, InterruptedException{
			final String[] splited = value.toString().split("\t");
			try{
				final Date date = new Date(Long.parseLong(splited[0].trim()));
				final String dateFormat = format1.format(date);
				String rowKey = splited[1]+":"+dateFormat; 		//行键
				v2.set(rowKey + "\t" + value.toString());
				context.write(key, v2);
			}catch(NumberFormatException e){
				final Counter counter = context.getCounter(Num.exNum);
				counter.increment(1L);
				System.out.println("出错"+splited[0]+" "+e.getMessage());
			}		
		}
	}
	//注意是TableReducer
	static class Reduce extends TableReducer <LongWritable , Text , NullWritable>{
		protected void reduce(LongWritable key , Iterable<Text>values , Context context) throws IOException, InterruptedException{
			for(Text val : values){
				final String[] splited = val.toString().split("\t");
				final Put put = new Put(Bytes.toBytes(splited[0])); 		//行键
				put.add(Bytes.toBytes("content"),Bytes.toBytes("phone"),Bytes.toBytes(splited[1]));	//列族, 列, 列值
				context.write(NullWritable.get(), put);
			}
		}
	}
}

结果如下:

bubuko.com,布布扣

对应表中的行键 列族 列 列值

hbase 结合MapReduce 批量导入

标签:hbase   mapreduce   批量导入   

原文地址:http://blog.csdn.net/xd_122/article/details/41927817

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!