一. HBase过滤器
package demo; import javax.swing.RowFilter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.ColumnPrefixFilter; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.FilterList.Operator; import org.apache.hadoop.hbase.filter.MultipleColumnPrefixFilter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; import net.spy.memcached.ops.OperationErrorType; public class TestHBaseFilter { /** * 列值过滤器:SingleColumnValueFilter */ @Test public void testSingleColumnValueFilter() throws Exception{ //查询工资等于3000的员工 //select * from emp where sal=3000 //配置ZK的地址信息 Configuration conf = new Configuration(); conf.set("hbase.zookeeper.quorum", "192.168.153.11"); //得到HTable客户端 HTable client = new HTable(conf, "emp"); //定义一个列值过滤器 SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("empinfo"),//列族 Bytes.toBytes("sal"), //工资 CompareOp.EQUAL, // = Bytes.toBytes("3000"));//? //定义一个扫描器 Scan scan = new Scan(); scan.setFilter(filter); //通过过滤器查询数据 ResultScanner rs = client.getScanner(scan); for (Result result : rs) { String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"))); System.out.println(name); } client.close(); } /** * 列名前缀过滤器:ColumnPrefixFilter */ @Test public void testColumnPrefixFilter() throws Exception{ //列名前缀过滤器 //select ename from emp //配置ZK的地址信息 Configuration conf = new Configuration(); conf.set("hbase.zookeeper.quorum", "192.168.153.11"); //得到HTable客户端 HTable client = new HTable(conf, "emp"); //定义一个列名前缀过滤器 ColumnPrefixFilter filter = new ColumnPrefixFilter(Bytes.toBytes("ename")); //定义一个扫描器 Scan scan = new Scan(); scan.setFilter(filter); //通过过滤器查询数据 ResultScanner rs = client.getScanner(scan); for (Result result : rs) { String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"))); System.out.println(name); } client.close(); } /** * 多个列名前缀过滤器:MultipleColumnPrefixFilter */ @Test public void testMultipleColumnPrefixFilter() throws Exception{ Configuration conf = new Configuration(); conf.set("hbase.zookeeper.quorum", "192.168.153.11"); HTable client = new HTable(conf, "emp"); //员工姓名 薪资 byte[][] names = {Bytes.toBytes("ename"),Bytes.toBytes("sal")}; MultipleColumnPrefixFilter filter = new MultipleColumnPrefixFilter(names); Scan scan = new Scan(); scan.setFilter(filter); ResultScanner rs = client.getScanner(scan); for (Result result : rs) { String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"))); String sal = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"))); System.out.println(name+"\t"+sal); } client.close(); } /** * 行键过滤器:RowFilter */ @Test public void testRowFilter() throws Exception{ Configuration conf = new Configuration(); conf.set("hbase.zookeeper.quorum", "192.168.153.11"); HTable client = new HTable(conf, "emp"); //定义一个行键过滤器 org.apache.hadoop.hbase.filter.RowFilter filter = new org.apache.hadoop.hbase.filter.RowFilter( CompareOp.EQUAL, //= new RegexStringComparator("7839")); //定义一个扫描器 Scan scan = new Scan(); scan.setFilter(filter); //通过过滤器查询数据 ResultScanner rs = client.getScanner(scan); for (Result result : rs) { String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"))); String sal = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"))); System.out.println(name+"\t"+sal); } client.close(); } /** * 组合过滤器 */ @Test public void testFilter() throws Exception{ Configuration conf = new Configuration(); conf.set("hbase.zookeeper.quorum", "192.168.153.11"); HTable client = new HTable(conf, "emp"); //工资=3000 SingleColumnValueFilter filter1 = new SingleColumnValueFilter(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), CompareOp.EQUAL, Bytes.toBytes("3000")); //名字 ColumnPrefixFilter filter2 = new ColumnPrefixFilter(Bytes.toBytes("ename")); FilterList filterList = new FilterList(Operator.MUST_PASS_ALL); filterList.addFilter(filter1); filterList.addFilter(filter2); Scan scan = new Scan(); scan.setFilter(filterList); ResultScanner rs = client.getScanner(scan); for (Result result : rs) { String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"))); String sal = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"))); System.out.println(name+"\t"+sal); } client.close(); } }
二. HDFS上的mapreduce
建立表
create ‘word‘,‘content‘
put ‘word‘,‘1‘,‘content:info‘,‘I love Beijing‘
put ‘word‘,‘2‘,‘content:info‘,‘I love China‘
put ‘word‘,‘3‘,‘content:info‘,‘Beijing is the capital of China‘