Hadoop之——HBASE结合MapReduce批量导入数据

时间：2015-06-12 01:00:27 阅读：124 评论：0 收藏：0 [点我收藏+]

转载请注明出处:http://blog.csdn.net/l1028386804/article/details/46463889

废话不多说，直接上代码，你懂得

package hbase;

import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
/**
 * HBASE结合MapReduce批量导入
 * @author liuyazhuang
 */
public class BatchImport {
	static class BatchImportMapper extends Mapper<LongWritable, Text, LongWritable, Text>{
		SimpleDateFormat dateformat1=new SimpleDateFormat("yyyyMMddHHmmss");
		Text v2 = new Text();
		
		protected void map(LongWritable key, Text value, Context context) throws java.io.IOException ,InterruptedException {
			final String[] splited = value.toString().split("\t");
			try {
				final Date date = new Date(Long.parseLong(splited[0].trim()));
				final String dateFormat = dateformat1.format(date);
				String rowKey = splited[1]+":"+dateFormat;
				v2.set(rowKey+"\t"+value.toString());
				context.write(key, v2);
			} catch (NumberFormatException e) {
				final Counter counter = context.getCounter("BatchImport", "ErrorFormat");
				counter.increment(1L);
				System.out.println("出错了"+splited[0]+" "+e.getMessage());
			}
		};
	}
	
	static class BatchImportReducer extends TableReducer<LongWritable, Text, NullWritable>{
		protected void reduce(LongWritable key, java.lang.Iterable<Text> values, 	Context context) throws java.io.IOException ,InterruptedException {
			for (Text text : values) {
				final String[] splited = text.toString().split("\t");
				
				final Put put = new Put(Bytes.toBytes(splited[0]));
				put.add(Bytes.toBytes("cf"), Bytes.toBytes("date"), Bytes.toBytes(splited[1]));
				put.add(Bytes.toBytes("cf"), Bytes.toBytes("msisdn"), Bytes.toBytes(splited[2]));
				//省略其他字段，调用put.add(....)即可
				context.write(NullWritable.get(), put);
			}
		};
	}
	
	public static void main(String[] args) throws Exception {
		final Configuration configuration = new Configuration();
		//设置zookeeper
		configuration.set("hbase.zookeeper.quorum", "hadoop0");
		//设置hbase表名称
		configuration.set(TableOutputFormat.OUTPUT_TABLE, "wlan_log");
		//将该值改大，防止hbase超时退出
		configuration.set("dfs.socket.timeout", "180000");
		
		final Job job = new Job(configuration, "HBaseBatchImport");
		
		job.setMapperClass(BatchImportMapper.class);
		job.setReducerClass(BatchImportReducer.class);
		//设置map的输出，不设置reduce的输出类型
		job.setMapOutputKeyClass(LongWritable.class);
		job.setMapOutputValueClass(Text.class);
		
		job.setInputFormatClass(TextInputFormat.class);
		//不再设置输出路径，而是设置输出格式类型
		job.setOutputFormatClass(TableOutputFormat.class);
		
		FileInputFormat.setInputPaths(job, "hdfs://hadoop0:9000/input");
		
		job.waitForCompletion(true);
	}
}

标签：hadoop mapreduce hbase 批量

原文地址：http://blog.csdn.net/l1028386804/article/details/46463889

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行