码迷,mamicode.com
首页 > 其他好文 > 详细

大数据笔记(十四)——HBase的过滤器与Mapreduce

时间:2018-03-26 16:08:22      阅读:184      评论:0      收藏:0      [点我收藏+]

标签:result   ble   等于   map   cached   int   通过   建立   inf   

一. HBase过滤器

package demo;

import javax.swing.RowFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.filter.MultipleColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import net.spy.memcached.ops.OperationErrorType;

public class TestHBaseFilter {

    /**
     * 列值过滤器:SingleColumnValueFilter
     */
    @Test
    public void testSingleColumnValueFilter() throws Exception{
        //查询工资等于3000的员工
        //select * from emp where sal=3000
        //配置ZK的地址信息
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "192.168.153.11");
        
        //得到HTable客户端
        HTable client  = new HTable(conf, "emp");
        //定义一个列值过滤器
        SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("empinfo"),//列族
                Bytes.toBytes("sal"), //工资
                CompareOp.EQUAL,       // =
                Bytes.toBytes("3000"));//?
        
        //定义一个扫描器
        Scan scan = new Scan();
        scan.setFilter(filter);
        
        //通过过滤器查询数据
        ResultScanner rs = client.getScanner(scan);
        for (Result result : rs) {
            String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
            System.out.println(name);
        }
        
        client.close();
    }
    
    /**
     * 列名前缀过滤器:ColumnPrefixFilter
     */
    @Test
    public void testColumnPrefixFilter() throws Exception{
        //列名前缀过滤器
        //select ename from emp
        //配置ZK的地址信息
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "192.168.153.11");
        
        //得到HTable客户端
        HTable client  = new HTable(conf, "emp");
        
        //定义一个列名前缀过滤器
        ColumnPrefixFilter filter = new ColumnPrefixFilter(Bytes.toBytes("ename"));
        
        //定义一个扫描器
        Scan scan = new Scan();
        scan.setFilter(filter);
        
        //通过过滤器查询数据
        ResultScanner rs = client.getScanner(scan);
        for (Result result : rs) {
            String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
            System.out.println(name);
        }
        
        client.close();
    }
    
    /**
     * 多个列名前缀过滤器:MultipleColumnPrefixFilter
     */
    @Test
    public void testMultipleColumnPrefixFilter() throws Exception{

        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "192.168.153.11");
        
        HTable client  = new HTable(conf, "emp");
        //员工姓名 薪资
        byte[][] names = {Bytes.toBytes("ename"),Bytes.toBytes("sal")};
        
        MultipleColumnPrefixFilter filter = new MultipleColumnPrefixFilter(names);
    
        Scan scan = new Scan();
        scan.setFilter(filter);
        
        ResultScanner rs = client.getScanner(scan);
        for (Result result : rs) {
            String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
            String sal = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal")));
            System.out.println(name+"\t"+sal);
        }
        
        client.close();
    }
    
    /**
     * 行键过滤器:RowFilter
     */
    @Test
    public void testRowFilter() throws Exception{
        
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "192.168.153.11");
        
        HTable client  = new HTable(conf, "emp");
        
        //定义一个行键过滤器
        org.apache.hadoop.hbase.filter.RowFilter filter = new org.apache.hadoop.hbase.filter.RowFilter(
                CompareOp.EQUAL, //=
                new RegexStringComparator("7839"));
        
        //定义一个扫描器
        Scan scan = new Scan();
        scan.setFilter(filter);
        
        //通过过滤器查询数据
        ResultScanner rs = client.getScanner(scan);
        for (Result result : rs) {
            String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
            String sal = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal")));
            System.out.println(name+"\t"+sal);
        }
        
        client.close();
    }
    
    /**
     * 组合过滤器
     */
    @Test
    public void testFilter() throws Exception{

        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum", "192.168.153.11");
        
        HTable client  = new HTable(conf, "emp");
        
        //工资=3000
        SingleColumnValueFilter filter1 = new SingleColumnValueFilter(Bytes.toBytes("empinfo"), 
                Bytes.toBytes("sal"), CompareOp.EQUAL, Bytes.toBytes("3000"));
        //名字
        ColumnPrefixFilter filter2 = new ColumnPrefixFilter(Bytes.toBytes("ename"));
        
        FilterList filterList = new FilterList(Operator.MUST_PASS_ALL);
        filterList.addFilter(filter1);
        filterList.addFilter(filter2);
        
        Scan scan = new Scan();
        scan.setFilter(filterList);
        
        ResultScanner rs = client.getScanner(scan);
        for (Result result : rs) {
            String name = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
            String sal = Bytes.toString(result.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal")));
            System.out.println(name+"\t"+sal);
        }
        
        client.close();
    }
}

二. HDFS上的mapreduce

建立表

   create ‘word‘,‘content‘

   put ‘word‘,‘1‘,‘content:info‘,‘I love Beijing‘

   put ‘word‘,‘2‘,‘content:info‘,‘I love China‘

   put ‘word‘,‘3‘,‘content:info‘,‘Beijing is the capital of China‘

 

大数据笔记(十四)——HBase的过滤器与Mapreduce

标签:result   ble   等于   map   cached   int   通过   建立   inf   

原文地址:https://www.cnblogs.com/lingluo2017/p/8650512.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!