因爲數據量很是大,客戶須要對過時的數據進行清理,例如:hbase表中有2017年與2016年的數據,如今須要將2016的數據進行清理,即批刪除操做。又由於hbase在刪除方面較弱,提供單行刪除功能。html
本次操做的數據如圖1所示,能夠發現,表rowkey中含有日期時間,因此在批量刪除時採用rowkey的時間來過濾。java
代碼初版,以A表爲例,刪除2016年的數據,則在下面代碼中的arg[0]設爲:2016便可shell
package Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.filter.SubstringComparator; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class ClearDatafromHBase { private static Configuration config = HBaseConfiguration.create(); private static HTable tableEvent = null; /** * Perparing clear data */ public static List<Delete> getDeleteList(ResultScanner rs) { List<Delete> list = new ArrayList<Delete>(); try { for (Result result : rs) { Delete delete = new Delete(result.getRow()); list.add(delete); } } finally { rs.close(); } return list; } /** * Clear data from event */ public static void deleteRowkeyOfEvent(String string) { try { tableEvent = new HTable(config, "A"); RowFilter rf = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(string)); Scan scan = new Scan(); scan.setFilter(rf); ResultScanner scanner = tableEvent.getScanner(scan); List<Delete> list = getDeleteList(scanner); if (list.size() > 0) { tableEvent.delete(list); } } catch (IOException e) { e.printStackTrace(); } finally { if (null != tableEvent) { try { tableEvent.close(); } catch (IOException e) { e.printStackTrace(); } } } } public static void main(String[] args) throws IOException { ClearDatafromHBase dh = new ClearDatafromHBase(); String string = args[0]; dh.deleteRowkeyOfEvent(string); } }
代碼第二版,可以制定表名和時間,運行方法與初版相同,只是參數[0]表示表名,參數[1]表示時間apache
package Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.filter.SubstringComparator; import org.apache.hadoop.hbase.util.Bytes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.List; /** * Created by lihao on 2017/8/9. * Clear the databases of timeout from hbase */ public class ClearDatafromHBase { private static String nsPrefix = "razor:"; private static Logger logger = LoggerFactory.getLogger(ClearDatafromHBase.class); private static HConnection hBaseConn; private static Configuration config = null; private static HTable tableEvent1 = null; private static HTableInterface table = null; // private static HTableInterface tableError = null; // private static HTableInterface tableClientdata = null; // private static HTableInterface tableUsinglog = null; static { try { config = HBaseConfiguration.create(); hBaseConn = HConnectionManager.createConnection(config); } catch (IOException e) { e.printStackTrace(); } } public static void init(String tableName) throws IOException { table = hBaseConn.getTable(tableName); TableName name = table.getName(); System.out.println(name); } /** * Perparing clear data */ public static List<Delete> getDeleteList(ResultScanner rs) { List<Delete> list = new ArrayList<Delete>(); try { for (Result result : rs) { Delete delete = new Delete(result.getRow()); list.add(delete); } } finally { rs.close(); } return list; } public static Scan getScannerByRowkey(String string) { Scan scan = new Scan(); RowFilter rf = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(string)); scan.setFilter(rf); return scan; } public void clearData(Scan scan, HTableInterface table, String date) { ResultScanner resultScan = null; try { resultScan = table.getScanner(scan); List<Delete> list = getDeleteList(resultScan); if (list.size() > 0) { table.delete(list); } } catch (IOException e) { e.printStackTrace(); } finally { if (null != table) { try { table.close(); } catch (IOException e) { e.printStackTrace(); } } } } public void deleteOfHbase(String string) { clearData(getScannerByRowkey(string), table, string); } public static void close(HTableInterface table) { if (table != null) { try { table.close(); } catch (IOException e) { e.printStackTrace(); } } } public static void main(String[] args) { String tablename = nsPrefix+args[0]; String time = args[1]; if (args == null || args.length <= 0) { logger.error("輸入參數錯誤"); throw new RuntimeException("輸入參數錯誤"); } logger.info("開始清理數據"); try { init(tablename); ClearDatafromHBase dh = new ClearDatafromHBase(); dh.deleteOfHbase(time); logger.info("數據清理結束"); } catch (IOException e) { logger.error("清理數據失敗", e); e.printStackTrace(); } finally { close(table); } } }
如何rowkey中不含有時間標誌的話,能夠根據時間戳的範圍進行刪除。api
public static void deleteTimeRange(String tableName, Long minTime, Long maxTime) { Table table = null; Connection connection = null; try { Scan scan = new Scan(); scan.setTimeRange(minTime, maxTime); connection = HBaseOperator.getHbaseConnection(); table = connection.getTable(TableName.valueOf(tableName)); ResultScanner rs = table.getScanner(scan); List<Delete> list = getDeleteList(rs); if (list.size() > 0) { table.delete(list); } } catch (Exception e) { e.printStackTrace(); } finally { if (null != table) { try { table.close(); } catch (IOException e) { e.printStackTrace(); } } if (connection != null) { try { connection.close(); } catch (IOException e) { e.printStackTrace(); } } } } private static List<Delete> getDeleteList(ResultScanner rs) { List<Delete> list = new ArrayList<>(); try { for (Result r : rs) { Delete d = new Delete(r.getRow()); list.add(d); } } finally { rs.close(); } return list; }
參考文獻:工具
這裏是列表文本hbase鏈接對錶的操做oop
hbase shell命令的使用.net
hbase filter的使用code
hbase容災備份htm
hbase的mapreduce任務
本文僅工做之餘所作第一版,後期會進行修改及更新操做,若有轉載,請標明出處.