标签:
package com.ok.hbase.export;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
public class Test {
public static String getStringValue(final Result row, final byte[] family,
final byte[] qualifier) {
final byte[] value = row.getValue(family, qualifier);
return isEmpty(value) ? "" : Bytes.toString(value);
}
public static boolean isEmpty(byte[] a) {
return Bytes.toBytes("content") == a || a.length == 0;
}
public static int getIntegerValue(final Result row, final byte[] family,
final byte[] qualifier) {
final byte[] v = row.getValue(family, qualifier);
if (v == null || v.length == 0) {
return 0;
}
if (8 == v.length) {
return (int) Bytes.toLong(v);
}
return Bytes.toInt(v);
}
public static long getLongValue(final Result row, final byte[] family,
final byte[] qualifier) {
final byte[] v = row.getValue(family, qualifier);
if (v == null || v.length == 0) {
return 0;
}
if (4 == v.length) {
return Bytes.toInt(v);
}
return Bytes.toLong(v);
}
public static Date getDateValue(final Result row, final byte[] family,
final byte[] qualifier) {
final byte[] value = row.getValue(family, qualifier);
return Check.isEmpty(value) ? null : (Bytes.toLong(value) == 0 ? null
: new Date(Bytes.toLong(value)));
}
public static void main(String[] args) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
long start1 = System.currentTimeMillis();
try {
Configuration conf = HBaseConfiguration.create();
//真实环境
// conf.set("hbase.zookeeper.quorum",
// "192.168.5.211,192.168.5.203,192.168.5.202");
//测试环境
conf.set("hbase.zookeeper.quorum",
"192.168.0.181,192.168.0.182,192.168.0.183");
String tableName = "Doc";
byte[] fam = Bytes.toBytes("data");
/**
* 1 电视 2 报刊 4 网络 8 微薄 16 论坛
*/
File f = new File("test_" + args[0] + "_"+ args[1] + ".txt");
if(!f.exists()){
f.createNewFile();
}
FileOutputStream out = new FileOutputStream("test_" + args[0] + "_"+ args[1] + ".txt");
PrintStream p = new PrintStream(out);
HTable table = new HTable(conf, tableName);
Scan scan = new Scan();
scan.addFamily(fam);
//扫描的字段 1 媒体类型 2 发布时间
scan.addColumn(fam, Bytes.toBytes("media"));
scan.addColumn(fam, Bytes.toBytes("time1"));
//获取start code 与 end codes
long start = Long.parseLong(args[0]);
long end = Long.parseLong(args[1]);
scan.setStartRow(Bytes.toBytes(start));
scan.setStopRow(Bytes.toBytes(end));
// FilterList filterList=new FilterList();
//
// 参数区
// 大于等于某日
// long start = TimeUtil.getTimeStamp("2010-09-01");
// long end = TimeUtil.getTimeStamp("2010-09-10");
// int media = 2;
// Filter filter1 = new SingleColumnValueFilter(fam,
// Bytes.toBytes("time1"), CompareOp.GREATER_OR_EQUAL,
// Bytes.toBytes(start));
// Filter filter2 = new SingleColumnValueFilter(fam,
// Bytes.toBytes("time1"), CompareOp.LESS, Bytes.toBytes(end));
// Filter filter3 = new SingleColumnValueFilter(fam,
// Bytes.toBytes("media"), CompareOp.EQUAL , Bytes.toBytes(media));
// filterList.addFilter(filter1);
// filterList.addFilter(filter2);
// filterList.addFilter(filter3);
// scan.setFilter(filterList);
ResultScanner rs = table.getScanner(scan);
int counter = 0;
for (Result row : rs) {
// if(counter > 5){
// break;
// }
// 21
// data anchor
// data author
// data board
// data commment
// data content
// data entry
// data from
// data h
// data media
// data name
// data origin
// data parseTime
// data site
// data tags
// data templet
// data time1
// data time2
// data tvcode
// data type
// data url
// data urls
// System.out.println("=======================================");
long code = Bytes.toLong(row.getRow());
int media1 = getIntegerValue(row, fam, Bytes.toBytes("media"));
System.out.println(code + " " + media1);
if (media1 == 2) {
// System.out.println( "2 " + code);
p.println(code);
}
Date time1 = getDateValue(row, fam, Bytes.toBytes("time1"));
System.out.println("time1 " + sdf.format(time1));
p.println(code + "\t" + media1 + "\t" + sdf.format(time1));
// String anchor = getStringValue(row, fam,
// Bytes.toBytes("anchor"));
// System.out.println("anchor "+anchor);
// String author = getStringValue(row, fam,
// Bytes.toBytes("author"));
// System.out.println("author " + author);
// int board = getIntegerValue(row, fam,
// Bytes.toBytes("board"));
// System.out.println("board " + board);
// int commment = getIntegerValue(row, fam,
// Bytes.toBytes("commment"));
// System.out.println("comment " + commment);
// final byte[] data = row.getValue(fam,
// Bytes.toBytes("content"));
// String content = GZipString.decodeToString(data);
// System.out.println(content);
// int entry = getIntegerValue(row, fam,
// Bytes.toBytes("entry"));
// System.out.println("entry "+entry);
// int from = getIntegerValue(row, fam, Bytes.toBytes("from"));
// System.out.println("from "+from);
// String h = getStringValue(row, fam, Bytes.toBytes("h")); hash
// 可以不要
// int media1 = getIntegerValue(row, fam,
// Bytes.toBytes("media"));
// System.out.println("media "+media1);
//
// String name = getStringValue(row, fam,
// Bytes.toBytes("name"));
// System.out.println("name " + name);
// String origin = getStringValue(row, fam,
// Bytes.toBytes("origin"));
// System.out.println("origin " + origin);
// long parseTime = getLongValue(row, fam,
// Bytes.toBytes("parseTime"));
// System.out.println("parseTime "+parseTime);
// int site = getIntegerValue(row, fam, Bytes.toBytes("site"));
// System.out.println("site " + site);
// String tags = getStringValue(row, fam,
// Bytes.toBytes("tags"));
// System.out.println("tags " + tags);
// int templet = getIntegerValue(row, fam,
// Bytes.toBytes("templet"));
// System.out.println("templet " + templet);
// Date time1 = getDateValue(row, fam, Bytes.toBytes("time1"));
// System.out.println("time1 " + sdf.format(time1));
// Date time2 = getDateValue(row, fam, Bytes.toBytes("time2"));
// System.out.println("time2 " + time2);
// long tvcode = getLongValue(row, fam,
// Bytes.toBytes("tvcode"));
// System.out.println("tvcode " +tvcode);
// int type = getIntegerValue(row, fam, Bytes.toBytes("type"));
// System.out.println("type " + type);
// String url = getStringValue(row, fam, Bytes.toBytes("url"));
// System.out.println("url " + url);
// String urls = getStringValue(row, fam,
// Bytes.toBytes("urls"));
// System.out.println(row.raw().length);
// for (KeyValue k : row.raw()) {
//
// String qua = Bytes.toStringBinary(k.getQualifier());
//
// if("media".equals(qua)){
//
// System.out.println( "media: " + Bytes.toInt(k.getValue()));
// }
// else if("time1".equals(qua)){
// System.out.println("time1: " + Bytes.toLong(k.getValue()));
// }
//
// System.out.println(k.getTimestamp());
// }
counter++;
// System.out.println("sum : " + counter);
}
System.out.println("sum : " + counter);
p.close();
} catch (IOException e) {
e.printStackTrace();
}
long end1 = System.currentTimeMillis();
System.out.println("Injector: finished at " + sdf.format(end1)
+ ", elapsed: " + TimingUtil.elapsedTime(start1, end1));
}
}
标签:
原文地址:http://www.cnblogs.com/i80386/p/4181230.html