码迷,mamicode.com
首页 > 其他好文 > 详细

HBase过滤器的使用

时间:2019-01-17 00:32:38      阅读:229      评论:0      收藏:0      [点我收藏+]

标签:dmi   pen   match   on()   substr   void   sub   code   long   

一、常用过滤器:

  1、数据准备:  

Rowkey:001    Family:Quilfifier address    value: 昆明市西山区
Rowkey:001    Family:Quilfifier age    value: 23
Rowkey:001    Family:Quilfifier name    value: 小明
Rowkey:001    Family:Quilfifier personType    value: 布控人员,涉恐人员,线索人员
Rowkey:001    Family:Quilfifier zjhm    value: 620302199822332832
Rowkey:002    Family:Quilfifier address    value: 昆明市西山区福海路
Rowkey:002    Family:Quilfifier age    value: 33
Rowkey:002    Family:Quilfifier name    value: 小李
Rowkey:002    Family:Quilfifier personType    value: 重点人员,涉恐人员,线索人员
Rowkey:002    Family:Quilfifier zjhm    value: 620302199822332442
Rowkey:003    Family:Quilfifier address    value: 昆明市西山区福海路
Rowkey:003    Family:Quilfifier age    value: 34
Rowkey:003    Family:Quilfifier name    value: 小王
Rowkey:003    Family:Quilfifier personType    value: 重点人员,涉恐人员,在控人员
Rowkey:003    Family:Quilfifier zjhm    value: 620302192398432442
Rowkey:004    Family:Quilfifier address    value: 昆明市滇池路
Rowkey:004    Family:Quilfifier age    value: 45
Rowkey:004    Family:Quilfifier name    value: 小花
Rowkey:004    Family:Quilfifier personType    value: 涉恐人员,线索人员
Rowkey:004    Family:Quilfifier zjhm    value: 643020304050403436
Rowkey:005    Family:Quilfifier address    value: 云南省西双版纳
Rowkey:005    Family:Quilfifier age    value: 60
Rowkey:005    Family:Quilfifier name    value: 小马
Rowkey:005    Family:Quilfifier personType    value: ,涉案人员,涉恐人员,线索人员
Rowkey:005    Family:Quilfifier zjhm    value: 643020302938413436
Rowkey:006    Family:Quilfifier address    value: 北京市朝阳区
Rowkey:006    Family:Quilfifier age    value: 66
Rowkey:006    Family:Quilfifier name    value: 大壮
Rowkey:006    Family:Quilfifier personType    value: 良民
Rowkey:006    Family:Quilfifier zjhm    value: 673747322344384456

  2、过滤器的使用:

  

package HBase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class Operator {
    public static Admin admin = null;
    public static Connection conn = null;

    public Connection getConn() throws IOException {

        Configuration hbaseConf = HBaseConfiguration.create();
     hbaseConf.set("hbase.zookeeper.quorum","master:2181,slave1:2181,slave2:2181"); hbaseConf.set(
"hbase.zookeeper.quorum", "master:2181"); Connection HbaseConn = ConnectionFactory.createConnection(hbaseConf); return HbaseConn; } public Operator() { try { conn = Hbase.getConnection(); admin = conn.getAdmin(); } catch (Exception e) { e.getMessage(); } } public static void main(String[] args) throws Exception { Operator operator = new Operator(); operator.filter("person"); // operator.pageFilter("person"); } /** * SingleColumnValueFilter和SingleColumnValueExcludeFilter * 用来查找并返回指定条件的列的数据 * a,如果查找时没有该列,两种filter都会把该行所有数据返回 * b,如果查找时有该列,但是不符合条件,则该行所有列都不返回 * c,如果找到该列,并且符合条件,前者返回所有列,后者返回除该列以外的所有 */ public void filter(String tableName) throws Exception { Table table = conn.getTable(TableName.valueOf(tableName)); Scan scan = new Scan(); //SingleColumnValueFilter:二进制比较器,完整匹配字节数组,返回匹配到的整行 Filter filter = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("良民"))); //SingleColumnValueFilter:二进制比较器,只比较前缀是否相同,返回的是匹配到的整行,并非每一列 Filter filter0 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator(Bytes.toBytes("重点"))); //SingleColumnValueFilter:匹配正则表达式,返回匹配到的整行 Filter filter1 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*重点人员.*")); //SingleColumnValueFilter:匹配是否包含子串,大小写不敏感,返回匹配到的整行 Filter filter2 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new SubstringComparator("线索人员")); //查询出匹配的行,但是过滤掉所匹配的列 Filter filter3 = new SingleColumnValueExcludeFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new SubstringComparator("线索人员")); //RandomRowFilter:按照一定的几率来返回随机的结果 Filter filter4 = new RandomRowFilter((float) 0.5); //RowFilter:删选出指定开头行健的所有匹配的行 Filter filter5 = new PrefixFilter(Bytes.toBytes("00")); //ValueFilter:按照value全数据库搜索,返回的是所匹配值的某一列,并非某一行 Filter filter6 = new ValueFilter(CompareFilter.CompareOp.NOT_EQUAL, new BinaryComparator(Bytes.toBytes("23"))); //按family(列族)查找,取回所有符合条件的“family” Filter filter7 = new FamilyFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("info"))); //KeyOnlyFilter:返回所有的行,但是值全是空 Filter filter8 = new KeyOnlyFilter(); //ColumnsPrefixFilter:按照列明的前缀来筛选单元格,返回所有行的指定某列 Filter filter9 = new ColumnPrefixFilter(Bytes.toBytes("ag")); //FirsterKeyOnlyFilter:返回的结果集中只包含第一列的而数据,在找到每一行的第一列后就会停止扫描 Filter filter10 = new FirstKeyOnlyFilter(); //InclusiveStopFilter:返回截止到指定行的所有数据,包含最后一行(005)。使用startRow以及stopRow的时候是左闭右开 Filter filter11 = new InclusiveStopFilter(Bytes.toBytes("005")); //cloumnCountGetFilter:返回每行最多返回多少列,在一行列数超过一定数量的时候,结束整个表的扫描 Filter filter12 = new ColumnCountGetFilter(6); //SkipFilter:附加过滤器,如果发现一行中的某一列不符合条件,则整行就会被过滤 Filter filter13 = new SkipFilter(filter6); //WhileMatchFilter:过滤数据,直到不符合条件,停止扫扫描,返回的是符合条件的每一列数据 Filter filter14 = new WhileMatchFilter(filter6); //QualifierFilter:列名过滤,返回指定的每一列数据 Filter filter15 = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("age"))); //MultipleColumnPrefixFilter:与ColumnsPrefixFilter不同的是可以指定多个列明的前缀 byte[][] prefixs = new byte[][]{Bytes.toBytes("ag"), Bytes.toBytes("na")}; Filter filter16 = new MultipleColumnPrefixFilter(prefixs); //ColumnRangeFilter:可以进行高效的列名内部扫描,因为列名是已经按照字典顺序排好的,返回[minColumn,maxColumn]之间的数据 boolean minColumnlnclusive = true; boolean maxColumnlnclusive = true; Filter filter17 = new ColumnRangeFilter(Bytes.toBytes("name"), minColumnlnclusive, Bytes.toBytes("zjhm"), maxColumnlnclusive); //DependentColumnFilter:尝试找到该列所在的每一行,并返回改行具有相同时间戳的全部键值对,返回的是具体的某一列,并非某一行 Filter filter18 = new DependentColumnFilter(Bytes.toBytes("info"), Bytes.toBytes("age")); //RandomRowFilter:随机选择一行的过滤器,chance是一个浮点数 float chance = 0.6f; Filter filter19 = new RandomRowFilter(chance); //ColumnPaginationFilter:按列分页过滤器,针对列数量很多的情况使用 int limit = 3; int columnOffset = 0; Filter filter20 = new ColumnPaginationFilter(limit, columnOffset); //综合过滤器使用 List<Filter> filters = new ArrayList<>(); filters.add(filter1); filters.add(filter2); FilterList fl = new FilterList(FilterList.Operator.MUST_PASS_ALL, filters); ((SingleColumnValueFilter) filter1).setFilterIfMissing(false); scan.setFilter(filter20); ResultScanner scanner = table.getScanner(scan); for (Result r : scanner) { for (Cell cell : r.rawCells()) { System.out.println( "Rowkey:" + Bytes.toString(r.getRow()) + "\t" + "Family:Quilfifier " + Bytes.toString(CellUtil.cloneQualifier(cell)) + "\t" + "value: " + Bytes.toString(CellUtil.cloneValue(cell)) ); } } scanner.close(); } /** * 分页过滤器 * PageFilter:用于按行分页 */ public void pageFilter(String tableName) throws IOException { Table table = conn.getTable(TableName.valueOf(tableName)); long pageSize = 2; int totalRowsCount = 0; PageFilter pageFilter = new PageFilter(pageSize); byte[] lastRow = null; while (true) { Scan scan = new Scan(); scan.setFilter(pageFilter); if (lastRow != null) { byte[] posfix = Bytes.toBytes("002"); byte[] startRow = Bytes.add(lastRow, posfix); scan.setStartRow(startRow); System.out.println("start row :" + Bytes.toString(startRow)); } ResultScanner scanner = table.getScanner(scan); int localRowsCount = 0; for (Result result : scanner) { System.out.println(localRowsCount++ + ":" + result); totalRowsCount++; lastRow = result.getRow(); } scanner.close(); if (localRowsCount == 0) break; } System.out.println("total rows is :" + totalRowsCount); } }

  3、自定义过滤器

    --后面在补

  

HBase过滤器的使用

标签:dmi   pen   match   on()   substr   void   sub   code   long   

原文地址:https://www.cnblogs.com/Gxiaobai/p/10280083.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!