- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.List;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.hbase.Cell;
- import org.apache.hadoop.hbase.CellUtil;
- import org.apache.hadoop.hbase.HBaseConfiguration;
- import org.apache.hadoop.hbase.TableName;
- import org.apache.hadoop.hbase.client.Admin;
- import org.apache.hadoop.hbase.client.Connection;
- import org.apache.hadoop.hbase.client.ConnectionFactory;
- import org.apache.hadoop.hbase.client.Get;
- import org.apache.hadoop.hbase.client.Result;
- import org.apache.hadoop.hbase.client.ResultScanner;
- import org.apache.hadoop.hbase.client.Scan;
- import org.apache.hadoop.hbase.client.Table;
- import org.apache.hadoop.hbase.filter.BinaryComparator;
- import org.apache.hadoop.hbase.filter.ColumnCountGetFilter;
- import org.apache.hadoop.hbase.filter.ColumnPaginationFilter;
- import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
- import org.apache.hadoop.hbase.filter.ColumnRangeFilter;
- import org.apache.hadoop.hbase.filter.DependentColumnFilter;
- import org.apache.hadoop.hbase.filter.FamilyFilter;
- import org.apache.hadoop.hbase.filter.Filter;
- import org.apache.hadoop.hbase.filter.FilterList;
- import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
- import org.apache.hadoop.hbase.filter.FuzzyRowFilter;
- import org.apache.hadoop.hbase.filter.InclusiveStopFilter;
- import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
- import org.apache.hadoop.hbase.filter.MultipleColumnPrefixFilter;
- import org.apache.hadoop.hbase.filter.PageFilter;
- import org.apache.hadoop.hbase.filter.PrefixFilter;
- import org.apache.hadoop.hbase.filter.QualifierFilter;
- import org.apache.hadoop.hbase.filter.RandomRowFilter;
- import org.apache.hadoop.hbase.filter.RegexStringComparator;
- import org.apache.hadoop.hbase.filter.RowFilter;
- import org.apache.hadoop.hbase.filter.SingleColumnValueExcludeFilter;
- import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
- import org.apache.hadoop.hbase.filter.SkipFilter;
- import org.apache.hadoop.hbase.filter.SubstringComparator;
- import org.apache.hadoop.hbase.filter.TimestampsFilter;
- import org.apache.hadoop.hbase.filter.ValueFilter;
- import org.apache.hadoop.hbase.filter.WhileMatchFilter;
- import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
- import org.apache.hadoop.hbase.util.Bytes;
- import org.apache.hadoop.hbase.util.Pair;
- public class HbaseUtils {
- public static Admin admin = null;
- public static Connection conn = null;
- public HbaseUtils() {
- Configuration conf = HBaseConfiguration.create();
- conf.set("hbase.zookeeper.quorum", "h71:2181");
- conf.set("hbase.rootdir", "hdfs://h71:9000/hbase");
- try {
- conn = ConnectionFactory.createConnection(conf);
- admin = conn.getAdmin();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- public static void main(String[] args) throws Exception {
- HbaseUtils hbase = new HbaseUtils();
- //1,FamilyFilter:基於「列族」來過濾數據;
- // hbase.FamilyFilter("scores");
- //2,QualifierFilter:基於「列名」來過濾數據;
- // hbase.QualifierFilter("scores");
- //3.RowFilter:基於rowkey來過濾數據;
- // hbase.RowFilter("scores","zhangsan01");
- //4.PrefixFilter:基於rowkey前綴來過濾數據;
- // hbase.PrefixFilter("scores","zhang");
- //後綴過濾數據
- // hbase.HouZui("scores");
- //5,ColumnPrefixFilter:基於列名前綴來過濾數據;
- // hbase.ColumnPrefixFilter("scores");
- //6,MultipleColumnPrefixFilter:ColumnPrefixFilter的增強版;
- // hbase.MultipleColumnPrefixFilter("scores");
- //7,ColumnCountGetFilter:限制每行返回多少列;
- // hbase.columnCountGetFilter();
- //8,ColumnPaginationFilter:對一行的全部列分頁,只返回[limit, offset]範圍內的列;
- // hbase.ColumnPaginationFilter("scores");
- //9,ColumnRangeFilter:可用於得到一個範圍的列
- // hbase.ColumnRangeFilter("scores");
- //10,DependentColumnFilter:返回(與(符合條件[列族,列名]或[列族,列名,值]的參考列)具備相同的時間戳)的全部列,即:基於比較器過濾參考列,基於參考列的時間戳過濾其餘列;
- // hbase.DependentColumnFilter("scores");
- //11,FirstKeyOnlyFilter:結果只返回每行的第一個值對;
- // hbase.FirstKeyOnlyFilter("scores");
- //12,FuzzyRowFilter:模糊row查詢;
- // hbase.FuzzyRowFilter("scores");
- //13,InclusiveStopFilter:將stoprow也一塊兒返回;
- // hbase.InclusiveStopFilter("scores");
- //14,KeyOnlyFilter:只返回行鍵;
- // hbase.KeyOnlyFilter("scores");
- //15,PageFilter: 取回XX條數據 ;
- // hbase.PageFilter("scores");
- //16,RandomRowFilter:隨機獲取必定比例(比例爲參數)的數據;
- // hbase.RandomRowFilter("scores");
- //17,SingleColumnValueFilter:基於參考列的值來過濾數據;
- // hbase.SingleColumnValueFilter("scores");
- //18,ValueFilter:基於值來過濾數據;
- // hbase.ValueFilter("scores");
- //19,SkipFilter:當過濾器發現某一行中的一列要過濾時,就將整行數據都過濾掉;
- // hbase.SkipFilter("scores");
- //20,TimestampsFilter:基於時間戳來過濾數據;
- // hbase.TimestampsFilter("scores");
- //21,WhileMatchFilter:一旦遇到一條符合過濾條件的數據,就中止掃描;
- // hbase.WhileMatchFilter("scores");
- //22,FilterList:多個過濾器組合過濾。
- // hbase.FilterList("scores");
- }
- /**
- 1,FamilyFilter
- a,按family(列族)查找,取回全部符合條件的「family」
- b,構造方法第一個參數爲compareOp
- c,第二個參數爲WritableByteArrayComparable,有BinaryComparator, BinaryPrefixComparator,
- BitComparator, NullComparator, RegexStringComparator, SubstringComparator這些類,
- 最經常使用的爲BinaryComparator
- */
- public void FamilyFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new FamilyFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("grc")));
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):224:0> scan 'scores', {FILTER => "FamilyFilter(<=,'binary:grc')"}
- 或者
- hbase(main):011:0> scan 'scores', FILTER => "FamilyFilter(<=,'binary:grc')"
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1498003655021, value=89
- lisi01 column=course:math, timestamp=1498003561726, value=89
- lisi01 column=grade:, timestamp=1498003561726, value=201
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan01 column=course:math, timestamp=1498003561726, value=99
- zhangsan01 column=grade:, timestamp=1498003593575, value=101
- zhangsan02 column=course:art, timestamp=1498003601365, value=90
- zhangsan02 column=course:math, timestamp=1498003561726, value=66
- zhangsan02 column=grade:, timestamp=1498003601365, value=102
- 3 row(s) in 0.0220 seconds
- */
- /**
- 2,QualifierFilter
- 相似於FamilyFilter,取回全部符合條件的「列」
- 構造方法第一個參數 compareOp
- 第二個參數爲WritableByteArrayComparable
- */
- public void QualifierFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new QualifierFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("grc")));
- //這裏輸的參數是相應位置比大小,及當輸入ms的時候,全部列名的第一位小於等於m,若是第一位相等則比較第二位的大小。一開始沒理解,因此一開始參數輸入math或course的時候把我整懵了。
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):221:0> scan 'scores', {FILTER => "QualifierFilter(<=,'binary:b')"}
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1498003655021, value=89
- lisi01 column=grade:, timestamp=1498003561726, value=201
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan01 column=grade:, timestamp=1498003593575, value=101
- zhangsan02 column=course:art, timestamp=1498003601365, value=90
- zhangsan02 column=grade:, timestamp=1498003601365, value=102
- 3 row(s) in 0.0470 seconds
- */
- /**
- 3,RowFilter
- 構造方法參數設置相似於FamilyFilter,符合條件的row都返回
- 可是經過row查詢時,若是知道開始結束的row,仍是用scan的start和end方法更直接而且經測試速度快一半以上
- */
- public void RowFilter(String tableName, String reg) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- //這個參數EQUAL很重要,若是參數不一樣,查詢的結果也會不一樣
- // RowFilter filter = new RowFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(reg)));//這樣寫也行
- // Filter filter = new RowFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(reg)));
- Filter filter = new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes(reg)));
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- /**
- * 更推薦用下面的方法直接指定起止行,由於filter本質上仍是會遍歷所有數據,而設定起止行後會直接從指定行開始,指定行結束,效率高不少。
- */
- // scan.setStartRow(Bytes.toBytes("AAAAAAAAAAAA"));
- // scan.setStopRow(Bytes.toBytes( "AAAAAAAAABBB"));
- }
- /*
- hbase(main):004:0> scan 'scores', {FILTER => "RowFilter(<=,'binary:zhangsan01')"}
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1498003655021, value=89
- lisi01 column=course:math, timestamp=1498003561726, value=89
- lisi01 column=grade:, timestamp=1498003561726, value=201
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan01 column=course:math, timestamp=1498003561726, value=99
- zhangsan01 column=grade:, timestamp=1498003593575, value=101
- 2 row(s) in 0.0210 seconds
- */
- /**
- 4,PrefixFilter
- 取回rowkey以指定prefix開頭的全部行
- */
- public void PrefixFilter(String tableName, String reg) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new PrefixFilter(Bytes.toBytes("zhang"));
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):022:0> scan 'scores', {FILTER => org.apache.hadoop.hbase.filter.PrefixFilter.new(org.apache.hadoop.hbase.util.Bytes.toBytes('li'))}
- 或者
- hbase(main):004:0> scan 'scores', {FILTER => "PrefixFilter('li')"}
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1489747672249, value=89
- lisi01 column=course:math, timestamp=1489747666861, value=89
- lisi01 column=grade:, timestamp=1489747677402, value=201
- 1 row(s) in 0.0110 seconds
- */
- /**
- 因爲其原生帶有PrefixFilter這種對ROWKEY的前綴過濾查詢,所以想着實現的後綴查詢的過程當中,發現這一方面相對來講仍是空白。
- 所以,只能採用一些策略來實現,主要仍是採用正則表達式的方式。
- */
- public void HouZui(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new RowFilter(CompareOp.EQUAL,new RegexStringComparator(".*n01"));
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):020:0> scan 'scores', {FILTER => "RowFilter(=,'regexstring:.*n01')"}
- ROW COLUMN+CELL
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan01 column=course:math, timestamp=1498003561726, value=99
- zhangsan01 column=grade:, timestamp=1498003593575, value=101
- 1 row(s) in 0.0080 seconds
- */
- /**
- 5,ColumnPrefixFilter
- */
- public void ColumnPrefixFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- byte[] prefix = Bytes.toBytes("ar");
- Filter filter = new ColumnPrefixFilter(prefix);
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):021:0> scan 'scores', {FILTER => "ColumnPrefixFilter('ar')"}
- 或者
- hbase(main):022:0> scan 'scores', {FILTER => org.apache.hadoop.hbase.filter.ColumnPrefixFilter.new(org.apache.hadoop.hbase.util.Bytes.toBytes('ar'))}
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1498003655021, value=89
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan02 column=course:art, timestamp=1498003601365, value=90
- 3 row(s) in 0.0140 seconds
- */
- /**
- 6,MultipleColumnPrefixFilter
- a,返回有此前綴的全部列,
- b,在byte[][]中定義全部須要的列前綴,只要知足其中一條約束就會被返回(ColumnPrefixFilter的增強版),
- */
- public void MultipleColumnPrefixFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- byte[][] prefix = {Bytes.toBytes("ar"),Bytes.toBytes("ma")};
- Filter filter = new MultipleColumnPrefixFilter(prefix);
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):023:0> scan 'scores', {FILTER => "MultipleColumnPrefixFilter('ar','ma')"}
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1498003655021, value=89
- lisi01 column=course:math, timestamp=1498003561726, value=89
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan01 column=course:math, timestamp=1498003561726, value=99
- zhangsan02 column=course:art, timestamp=1498003601365, value=90
- zhangsan02 column=course:math, timestamp=1498003561726, value=66
- 3 row(s) in 0.0290 seconds
- */
- /**
- 7,ColumnCountGetFilter
- a,沒法再scan中使用,只能在Get中
- b,若設爲0,則沒法返回數據,設爲幾就按服務器中存儲位置取回幾列
- c,可用size()取到列數,觀察效果
- */
- public void columnCountGetFilter() throws Exception {
- Table table = conn.getTable(TableName.valueOf("scores"));
- Get get = new Get(Bytes.toBytes("zhangsan01"));
- get.setFilter(new ColumnCountGetFilter(2));
- Result result = table.get(get);
- //輸出結果size,觀察效果
- System.out.println(result.size());
- // byte[] value1 = result.getValue("course".getBytes(), "art".getBytes());
- // byte[] value2 = result.getValue("course".getBytes(), "math".getBytes());
- // System.out.println("course:art"+"-->"+new String(value1)+" "
- // +"course:math"+"-->"+new String(value2));
- }
- /*
- hbase(main):026:0> scan 'scores', {FILTER => "ColumnCountGetFilter(2)"}
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1498003655021, value=89
- lisi01 column=course:math, timestamp=1498003561726, value=89
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan01 column=course:math, timestamp=1498003561726, value=99
- zhangsan02 column=course:art, timestamp=1498003601365, value=90
- zhangsan02 column=course:math, timestamp=1498003561726, value=66
- 3 row(s) in 0.0120 seconds
- */
- /**
- 8,ColumnPaginationFilter
- a,limit 表示返回列數
- b,offset 表示返回列的偏移量,若是爲0,則所有取出,若是爲1,則返回第二列及之後
- */
- public void ColumnPaginationFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new ColumnPaginationFilter(2,1);
- scan.setFilter(filter);
- // 用addFamily增長列族後,會只返回指定列族的數據
- scan.addFamily(Bytes.toBytes("course"));
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):031:0> scan 'scores',{FILTER=>org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(2,1)}
- 或者
- hbase(main):030:0> scan 'scores',{FILTER=> "ColumnPaginationFilter(2,1)"}
- ROW COLUMN+CELL
- lisi01 column=course:math, timestamp=1498003561726, value=89
- lisi01 column=grade:, timestamp=1498003561726, value=201
- zhangsan01 column=course:math, timestamp=1498003561726, value=99
- zhangsan01 column=grade:, timestamp=1498003593575, value=101
- zhangsan02 column=course:math, timestamp=1498003561726, value=66
- zhangsan02 column=grade:, timestamp=1498003601365, value=102
- 3 row(s) in 0.0100 seconds
- */
- /**
- 9,ColumnRangeFilter
- 構造函數:
- ColumnRangeFilter(byte[] minColumn, boolean minColumnInclusive, byte[] maxColumn, boolean maxColumnInclusive)
- *可用於得到一個範圍的列,例如,若是你的一行中有百萬個列,可是你只但願查看列名爲bbbb到dddd的範圍
- *該過濾器能夠進行高效的列名內部掃描。(爲什麼是高效呢???由於列名是已經按字典排序好的)HBase-0.9.2 版本引入該功能。
- *一個列名是能夠出如今多個列族中的,該過濾器將返回全部列族中匹配的列
- */
- public void ColumnRangeFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new ColumnRangeFilter(Bytes.toBytes("a"),true, Bytes.toBytes("n"),true);
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):032:0> scan 'scores',{FILTER=> "ColumnRangeFilter('a',true,'n',true)"}
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1498003655021, value=89
- lisi01 column=course:math, timestamp=1498003561726, value=89
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan01 column=course:math, timestamp=1498003561726, value=99
- zhangsan02 column=course:art, timestamp=1498003601365, value=90
- zhangsan02 column=course:math, timestamp=1498003561726, value=66
- 3 row(s) in 0.0140 seconds
- */
- /**
- 10, DependentColumnFilter (該過濾器有兩個參數:family和Qualifier,嘗試找到該列所在的每一行,
- 並返回該行具備相同時間戳的所有鍵值對。若是某一行不包含指定的列,則該行的任何鍵值對都不返回,
- 該過濾器還能夠有一個可選的布爾參數-若是爲true,從屬的列不返回;
- 該過濾器還能夠有兩個可選的參數--一個比較操做符和一個值比較器,用於family和Qualifier
- 的進一步檢查,若是從屬的列找到,其值還必須經過值檢查,而後就是時間戳必須考慮)
- */
- public void DependentColumnFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- // Filter filter = new DependentColumnFilter(Bytes.toBytes("course"), Bytes.toBytes("art"),false);
- // Filter filter = new DependentColumnFilter(Bytes.toBytes("course"), Bytes.toBytes("art"),true);
- Filter filter = new DependentColumnFilter(Bytes.toBytes("course"), Bytes.toBytes("art"),false,CompareOp.EQUAL,new BinaryComparator(Bytes.toBytes("90")));
- // Filter filter = new DependentColumnFilter(Bytes.toBytes("course"), Bytes.toBytes("art"),true,CompareOp.EQUAL,new BinaryComparator(Bytes.toBytes("90")));
- //上面這四種狀況輸出的for循環中的內容也不同,要作相應的修改,不然會報java.lang.NullPointerException
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):036:0> scan 'scores',{FILTER=> "DependentColumnFilter('course','art',false,=,'binary:90')"}
- ROW COLUMN+CELL
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan01 column=course:math, timestamp=1498003561726, value=99
- zhangsan02 column=course:art, timestamp=1498003601365, value=90
- zhangsan02 column=grade:, timestamp=1498003601365, value=102
- 2 row(s) in 0.0160 seconds
- */
- /**
- 11,FirstKeyOnlyFilter
- 如名字所示,結果只返回每行的第一個值對
- */
- public void FirstKeyOnlyFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new FirstKeyOnlyFilter();
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):037:0> scan 'scores',{FILTER=> "FirstKeyOnlyFilter()"}
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1498003655021, value=89
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan02 column=course:art, timestamp=1498003601365, value=90
- 3 row(s) in 0.0160 seconds
- */
- /**
- 12,FuzzyRowFilter
- 模糊row查詢
- pair中第一個參數爲模糊查詢的string
- 第二個參數爲byte[]其中裝與string位數相同的數值0或1,0表示該位必須與string中值相同,1表示能夠不一樣
- */
- public void FuzzyRowFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new FuzzyRowFilter( Arrays.asList(new Pair<byte[], byte[]>(Bytes.toBytes("zhangsan01"),
- new byte[] {0, 0, 0, 0 , 0, 0, 0, 0, 0, 1})));
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- 。。。。。
- */
- /**
- 13,InclusiveStopFilter
- 指定stopRow,程序在scan時從頭掃描所有返回,直到stopRow中止(stopRow這行也會返回,而後scan中止)
- */
- public void InclusiveStopFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new InclusiveStopFilter(Bytes.toBytes("zhangsan01"));
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):012:0> scan 'scores', {FILTER => "InclusiveStopFilter('zhangsan01')"}
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1498003655021, value=89
- lisi01 column=course:math, timestamp=1498003561726, value=89
- lisi01 column=grade:, timestamp=1498003561726, value=201
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan01 column=course:math, timestamp=1498003561726, value=99
- zhangsan01 column=grade:, timestamp=1498003593575, value=101
- 2 row(s) in 0.0170 seconds
- */
- /**
- 14,KeyOnlyFilter
- 只取key值,size正常,說明value不是沒取而是在取的時候被重寫爲空(能打印,不是null)
- lenAsVal這個值沒大搞明白,當設爲false時打印爲空,若是設爲true時打印的將會是「口口口口」
- */
- public void KeyOnlyFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new KeyOnlyFilter(true);
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):016:0> scan 'scores', {FILTER => "KeyOnlyFilter(true)"}
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1498003655021, value=\x00\x00\x00\x02
- lisi01 column=course:math, timestamp=1498003561726, value=\x00\x00\x00\x02
- lisi01 column=grade:, timestamp=1498003561726, value=\x00\x00\x00\x03
- zhangsan01 column=course:art, timestamp=1498003561726, value=\x00\x00\x00\x02
- zhangsan01 column=course:math, timestamp=1498003561726, value=\x00\x00\x00\x02
- zhangsan01 column=grade:, timestamp=1498003593575, value=\x00\x00\x00\x03
- zhangsan02 column=course:art, timestamp=1498003601365, value=\x00\x00\x00\x02
- zhangsan02 column=course:math, timestamp=1498003561726, value=\x00\x00\x00\x02
- zhangsan02 column=grade:, timestamp=1498003601365, value=\x00\x00\x00\x03
- 3 row(s) in 0.0320 seconds
- hbase(main):015:0> scan 'scores', {FILTER => "KeyOnlyFilter(false)"}
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1498003655021, value=
- lisi01 column=course:math, timestamp=1498003561726, value=
- lisi01 column=grade:, timestamp=1498003561726, value=
- zhangsan01 column=course:art, timestamp=1498003561726, value=
- zhangsan01 column=course:math, timestamp=1498003561726, value=
- zhangsan01 column=grade:, timestamp=1498003593575, value=
- zhangsan02 column=course:art, timestamp=1498003601365, value=
- zhangsan02 column=course:math, timestamp=1498003561726, value=
- zhangsan02 column=grade:, timestamp=1498003601365, value=
- 3 row(s) in 0.0190 seconds
- */
- /**
- 15,PageFilter
- 取回XX條數據
- */
- public void PageFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new PageFilter(2);
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):017:0> scan 'scores', {FILTER => "PageFilter(2)"}
- ROW COLUMN+CELL
- lisi01 column=course:art, timestamp=1498003655021, value=89
- lisi01 column=course:math, timestamp=1498003561726, value=89
- lisi01 column=grade:, timestamp=1498003561726, value=201
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan01 column=course:math, timestamp=1498003561726, value=99
- zhangsan01 column=grade:, timestamp=1498003593575, value=101
- 2 row(s) in 0.0130 seconds
- */
- /**
- 16,RandomRowFilter
- 參數小於0時一條查不出大於1值會返回全部,而想取隨機行的話有效區間爲0~1,值表明取到的概率
- */
- public void RandomRowFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new RandomRowFilter((float)0.5);
- //即便是0.5有時候也一條查不出來,有時候卻全出來了,是概率並非必定,那我就不知道這個具體有什麼實際運用了。。。根據rowkey隨機而不是根據列隨機
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- 。。。。。
- */
- /**
- 17,SingleColumnValueFilter和SingleColumnValueExcludeFilter
- 用來查找並返回指定條件的列的數據
- a,若是查找時沒有該列,兩種filter都會把該行全部數據返回
- b,若是查找時有該列,可是不符合條件,則該行全部列都不返回
- c,若是找到該列,而且符合條件,前者返回全部列,後者返回除該列之外的全部列
- */
- public void SingleColumnValueFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- //完整匹配字節數組
- // Filter filter = new SingleColumnValueFilter(Bytes.toBytes("course"), Bytes.toBytes("art"),CompareOp.EQUAL,new BinaryComparator(Bytes.toBytes("90")));
- //匹配正則表達式
- // Filter filter = new SingleColumnValueFilter(Bytes.toBytes("course"), Bytes.toBytes("art"),CompareOp.EQUAL,new RegexStringComparator("8"));
- //匹配是否包含子串,大小寫不敏感
- // Filter filter = new SingleColumnValueFilter(Bytes.toBytes("course"), Bytes.toBytes("art"),CompareOp.EQUAL,new SubstringComparator("9"));
- Filter filter = new SingleColumnValueExcludeFilter(Bytes.toBytes("course"), Bytes.toBytes("art"), CompareOp.EQUAL,new SubstringComparator("9"));
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):032:0> scan 'scores', {FILTER => "SingleColumnValueExcludeFilter('course','art',=,'substring:9')"}
- ROW COLUMN+CELL
- lisi01 column=course:math, timestamp=1498003561726, value=89
- lisi01 column=grade:, timestamp=1498003561726, value=201
- zhangsan01 column=course:math, timestamp=1498003561726, value=99
- zhangsan01 column=grade:, timestamp=1498003593575, value=101
- zhangsan02 column=course:math, timestamp=1498003561726, value=66
- zhangsan02 column=grade:, timestamp=1498003601365, value=102
- 3 row(s) in 0.0150 seconds
- */
- /**
- 18,ValueFilter
- 按value全數據庫搜索(所有列的value均會被檢索)
- */
- public void ValueFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new ValueFilter(CompareOp.NOT_EQUAL,new BinaryComparator(Bytes.toBytes("102")));
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- NOT_EQUAL不知道該咋麼表示。。。。。
- */
- /**
- 19,SkipFilter
- 根據整行中的每一個列來作過濾,只要存在一列不知足條件,整行都被過濾掉。
- 例如,若是一行中的全部列表明的是不一樣物品的重量,則真實場景下這些數值都必須大於零,咱們但願將那些包含任意列值爲0的行都過濾掉。
- 在這個狀況下,咱們結合ValueFilter和SkipFilter共同實現該目的:
- scan.setFilter(new SkipFilter(new ValueFilter(CompareOp.NOT_EQUAL,new BinaryComparator(Bytes.toBytes(0))));
- */
- public void SkipFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new SkipFilter(new ValueFilter(CompareOp.NOT_EQUAL,new BinaryComparator(Bytes.toBytes("102"))));
- // Filter filter = new SkipFilter(new DependentColumnFilter(Bytes.toBytes("course"), Bytes.toBytes("art"),false,CompareOp.NOT_EQUAL,new BinaryComparator(Bytes.toBytes("90"))));
- //該過濾器須要配合其餘過濾器來使用
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- 不知道咋麼把兩個過濾器嵌套使用。。。。。
- */
- /**
- 20,TimestampsFilter
- a,按時間戳搜索數據庫
- b,需設定List<Long> 存放全部須要檢索的時間戳,
- */
- public void TimestampsFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- //ls中存放全部須要查找匹配的時間戳
- List<Long> ls = new ArrayList<Long>();
- ls.add((long)1498003561726L);
- ls.add((long)1498003601365L);
- //java語言的整型常量默認爲int型,聲明long型常量能夠後加」l「或」L「
- Filter filter = new TimestampsFilter(ls);
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- // for (Result result : scanner) {
- // if(new String(result.getRow()).equals("zhangsan01")){
- // System.out.println(new String(result.getRow())+" "
- // +"course:art"+"-->"+new String(result.getValue(Bytes.toBytes("course"), Bytes.toBytes("art")))+" "
- // +"course:math"+"-->"+new String(result.getValue(Bytes.toBytes("course"), Bytes.toBytes("math"))));
- // }else if(new String(result.getRow()).equals("zhangsan02")){
- // System.out.println(new String(result.getRow())+" "
- // +"course:art"+"-->"+new String(result.getValue(Bytes.toBytes("course"), Bytes.toBytes("art")))+" "
- // +"course:math"+"-->"+new String(result.getValue(Bytes.toBytes("course"), Bytes.toBytes("math")))+" "
- // +"grade:"+"-->"+new String(result.getValue(Bytes.toBytes("grade"), Bytes.toBytes(""))));
- // }else{
- // System.out.println(new String(result.getRow())+" "
- // +"course:math"+"-->"+new String(result.getValue(Bytes.toBytes("course"), Bytes.toBytes("math")))+" "
- // +"grade:"+"-->"+new String(result.getValue(Bytes.toBytes("grade"), Bytes.toBytes(""))));
- // }
- // }
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):039:0> scan 'scores', {FILTER => "TimestampsFilter(1498003561726,1498003601365)"}
- ROW COLUMN+CELL
- lisi01 column=course:math, timestamp=1498003561726, value=89
- lisi01 column=grade:, timestamp=1498003561726, value=201
- zhangsan01 column=course:art, timestamp=1498003561726, value=90
- zhangsan01 column=course:math, timestamp=1498003561726, value=99
- zhangsan02 column=course:art, timestamp=1498003601365, value=90
- zhangsan02 column=course:math, timestamp=1498003561726, value=66
- zhangsan02 column=grade:, timestamp=1498003601365, value=102
- 3 row(s) in 0.0160 seconds
- */
- /**
- 21,WhileMatchFilter
- 至關於while執行,直到不match就break了返回了。
- */
- public void WhileMatchFilter(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- Scan scan = new Scan();
- Filter filter = new WhileMatchFilter(new ValueFilter(CompareOp.NOT_EQUAL,new BinaryComparator(Bytes.toBytes("101"))));
- scan.setFilter(filter);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- 。。。。。
- */
- /**
- 22,FilterList
- 表明一個過濾器鏈,它能夠包含一組即將應用於目標數據集的過濾器,過濾器間具備「與」FilterList.Operator.MUST_PASS_ALL和「或」FilterList.Operator.MUST_PASS_ONE關係。
- 官網實例代碼,兩個「或」關係的過濾器的寫法:
- */
- public void FilterList(String tableName) throws Exception {
- Table table = conn.getTable(TableName.valueOf(tableName));
- FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ONE); //數據只要知足一組過濾器中的一個就能夠
- SingleColumnValueFilter filter1 = new SingleColumnValueFilter(Bytes.toBytes("course"), Bytes.toBytes("math"),CompareOp.EQUAL,new BinaryComparator(Bytes.toBytes("89")));
- list.addFilter(filter1);
- SingleColumnValueFilter filter2 = new SingleColumnValueFilter(Bytes.toBytes("course"), Bytes.toBytes("math"),CompareOp.EQUAL,new BinaryComparator(Bytes.toBytes("66")));
- list.addFilter(filter2);
- Scan scan = new Scan();
- scan.setFilter(list);
- ResultScanner scanner = table.getScanner(scan);
- for (Result r : scanner) {
- for (Cell cell : r.rawCells()) {
- System.out.println(
- "Rowkey-->"+Bytes.toString(r.getRow())+" "+
- "Familiy:Quilifier-->"+Bytes.toString(CellUtil.cloneQualifier(cell))+" "+
- "Value-->"+Bytes.toString(CellUtil.cloneValue(cell)));
- }
- }
- }
- /*
- hbase(main):009:0> scan 'scores', {FILTER => "PrefixFilter('zhang') OR QualifierFilter(>=,'binary:b')"}
- ROW COLUMN+CELL
- lisi01 column=course:math, timestamp=1489747666861, value=89
- lisi01 column=grade:, timestamp=1489747677402, value=201
- zhangsan01 column=course:art, timestamp=1489747593360, value=90
- zhangsan01 column=course:math, timestamp=1489747589255, value=99
- zhangsan01 column=grade:, timestamp=1489747598001, value=101
- zhangsan02 column=course:art, timestamp=1489747607561, value=60
- zhangsan02 column=course:math, timestamp=1489747602883, value=66
- zhangsan02 column=grade:, timestamp=1489747614601, value=102
- 3 row(s) in 0.0180 seconds
- */
- }
上面有幾個過濾器在hbase shell中沒有找出,若是你們有找到的告訴我一聲,一塊兒進步。java
參考:
http://blog.csdn.net/blue__yeah/article/details/41040399
http://blog.csdn.net/liangtingac/article/details/40078637
http://blog.csdn.net/u010967382/article/details/37653177
http://blog.csdn.net/sparkexpert/article/details/51942354正則表達式
from:https://blog.csdn.net/m0_37739193/article/details/73615016shell