使用PageFilter分頁效率比較低,應爲每次都須要掃描前面的數據,直到掃描到所須要查的數據,可是查詢下一頁的時候能夠直接利用上一頁的rowkey來直接查出java
Filter是定義每次scan得出多少條記錄,
下面看用PageFilter實現分頁的(最好使用rowksy,不建議使用過濾器,過濾器效率過低,設計表的時候設計一個好的rowkey能夠帶來好多的便利的條件)apache
代碼記錄:dom
package hbasepage; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.PageFilter; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; public class Pager { public static Configuration configuration; static { configuration = HBaseConfiguration.create(); configuration.set("hbase.zookeeper.property.clientPort", "2181"); configuration.set("hbase.zookeeper.quorum", "192.168.10.24,192.168.10.29,192.168.10.64"); configuration.set("hbase.master", "192.168.10.120:60000"); } private String tableName; private static HTable hTable; private static String startRow = null; private static List list = null; public Pager(String tableName) { try { this.hTable = new HTable(configuration, tableName.getBytes()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static List getLast(int pageNum, int pageSize){ getPage(pageNum-1,pageSize); return null; } /** * 取得下一頁 這個類是接着getPage來用 * @param pageSize 分頁的大小 * @return 返回分頁數據 */ public static List getNext(int pageSize) throws Exception{ Filter filter = new PageFilter(pageSize +1); Scan scan = new Scan(); scan.setFilter(filter); scan.setStartRow(startRow.getBytes()); ResultScanner result = hTable.getScanner(scan); Iterator iterator = result.iterator(); list = new ArrayList<>(); int count = 0; for(Result r:result){ count++; if (count==pageSize + 1) { startRow = new String(r.getRow()); scan.setStartRow(startRow.getBytes()); System.out.println("startRow" + startRow); break; }else{ list.add(r); } startRow = new String(r.getRow()); System.out.println(startRow); //把 r的全部的列都取出來 key-value age-20 System.out.println(count); } return list; } // pageNum = 3 pageSize = 10 public static void getPage(int pageNum, int pageSize) { System.out.println("hahha"); // int pageNow = 0; // TODO 這個filter究竟是幹嗎的? Filter page = new PageFilter(pageSize + 1); int totalSize = pageNum * pageSize; Scan scan = new Scan(); scan.setFilter(page); //pageNum = 3 須要掃描3頁 for (int i = 0; i < pageNum; i++) { try { ResultScanner rs = hTable.getScanner(scan); int count = 0; for (Result r : rs) { count++; if (count==pageSize + 1) { startRow = new String(r.getRow()); scan.setStartRow(startRow.getBytes()); System.out.println("startRow" + startRow); break; } startRow = new String(r.getRow()); System.out.println(startRow); //把 r的全部的列都取出來 key-value age-20 for (KeyValue keyValue : r.list()) { System.out.println("列:" + new String(keyValue.getQualifier()) + "====值:" + new String(keyValue.getValue())); } System.out.println(count); } if (count < pageSize) { break; } } catch (IOException e) { e.printStackTrace(); } } } }
下面寫一點經常使用的fiter過濾器oop
HBase爲篩選數據提供了一組過濾器,經過這個過濾器能夠在HBase中的數據的多個維度(行,列,數據版本)上進行對數據的篩選操做,也就是說過濾器最終可以篩選的數據可以細化到具體的一個存儲單元格上(由行鍵,列明,時間戳定位)。一般來講,經過行鍵,值來篩選數據的應用場景較多。性能
Filter rf = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(「row1」))); // OK 篩選出匹配的全部的行
Filter pf = new PrefixFilter(Bytes.toBytes(「row」)); // OK 篩選匹配行鍵的前綴成功的行
Filter kof = new KeyOnlyFilter(); // OK 返回全部的行,但值全是空
Filter rrf = new RandomRowFilter((float) 0.8); // OK 隨機選出一部分的行
Filter isf = new InclusiveStopFilter(Bytes.toBytes(「row1」)); // OK 包含了掃描的上限在結果以內
Filter fkof = new FirstKeyOnlyFilter(); // OK 篩選出第一個每一個第一個單元格
Filter cpf = new ColumnPrefixFilter(Bytes.toBytes(「qual1」)); // OK 篩選出前綴匹配的列
Filter vf = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(「ROW2_QUAL1」)); // OK 篩選某個(值的條件知足的)特定的單元格
Filter ccf = new ColumnCountGetFilter(2); // OK 若是忽然發現一行中的列數超過設定的最大值時,整個掃描操做會中止
SingleColumnValueFilter scvf = new SingleColumnValueFilter( Bytes.toBytes(「colfam1」), Bytes.toBytes(「qual2」), CompareFilter.CompareOp.NOT_EQUAL, new SubstringComparator(「BOGUS」)); scvf.setFilterIfMissing(false); scvf.setLatestVersionOnly(true); // OK
Filter skf = new SkipFilter(vf); // OK 發現某一行中的一列須要過濾時,整個行就會被過濾掉
Filter wmf = new WhileMatchFilter(rf); // OK 相似於Python itertools中的takewhile
List filters = new ArrayList(); filters.add(rf); filters.add(vf); FilterList fl = new FilterList(FilterList.Operator.MUST_PASS_ALL, filters); // OK 綜合使用多個過濾器, AND 和 OR 兩種關係
以上,是對於HBase內置的過濾器的部分總結優化