一、搭建環境 app
新建JAVA項目,添加的包有: dom
有關Hadoop的hadoop-core-0.20.204.0.jar oop
有關Hbase的hbase-0.90.4.jar、hbase-0.90.4-tests.jar以及Hbase資源包中lib目錄下的全部jar包 post
二、主要程序
- package com.wujintao.hbase.test;
-
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.List;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.hbase.HBaseConfiguration;
- import org.apache.hadoop.hbase.HColumnDescriptor;
- import org.apache.hadoop.hbase.HTableDescriptor;
- import org.apache.hadoop.hbase.KeyValue;
- import org.apache.hadoop.hbase.MasterNotRunningException;
- import org.apache.hadoop.hbase.ZooKeeperConnectionException;
- import org.apache.hadoop.hbase.client.Delete;
- import org.apache.hadoop.hbase.client.Get;
- import org.apache.hadoop.hbase.client.HBaseAdmin;
- import org.apache.hadoop.hbase.client.HTable;
- import org.apache.hadoop.hbase.client.HTablePool;
- import org.apache.hadoop.hbase.client.Put;
- import org.apache.hadoop.hbase.client.Result;
- import org.apache.hadoop.hbase.client.ResultScanner;
- import org.apache.hadoop.hbase.client.Scan;
- import org.apache.hadoop.hbase.filter.Filter;
- import org.apache.hadoop.hbase.filter.FilterList;
- import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
- import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
- import org.apache.hadoop.hbase.util.Bytes;
-
- public class JinTaoTest {
-
- public static Configuration configuration;
- static {
- configuration = HBaseConfiguration.create();
- configuration.set("hbase.zookeeper.property.clientPort", "2181");
- configuration.set("hbase.zookeeper.quorum", "192.168.1.100");
- configuration.set("hbase.master", "192.168.1.100:600000");
- }
-
- public static void main(String[] args) {
- // createTable("wujintao");
- // insertData("wujintao");
- // QueryAll("wujintao");
- // QueryByCondition1("wujintao");
- // QueryByCondition2("wujintao");
- //QueryByCondition3("wujintao");
- //deleteRow("wujintao","abcdef");
- deleteByCondition("wujintao","abcdef");
- }
-
- /**
- * 建立表
- * @param tableName
- */
- public static void createTable(String tableName) {
- System.out.println("start create table ......");
- try {
- HBaseAdmin hBaseAdmin = new HBaseAdmin(configuration);
- if (hBaseAdmin.tableExists(tableName)) {// 若是存在要建立的表,那麼先刪除,再建立
- hBaseAdmin.disableTable(tableName);
- hBaseAdmin.deleteTable(tableName);
- System.out.println(tableName + " is exist,detele....");
- }
- HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
- tableDescriptor.addFamily(new HColumnDescriptor("column1"));
- tableDescriptor.addFamily(new HColumnDescriptor("column2"));
- tableDescriptor.addFamily(new HColumnDescriptor("column3"));
- hBaseAdmin.createTable(tableDescriptor);
- } catch (MasterNotRunningException e) {
- e.printStackTrace();
- } catch (ZooKeeperConnectionException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- System.out.println("end create table ......");
- }
-
- /**
- * 插入數據
- * @param tableName
- */
- public static void insertData(String tableName) {
- System.out.println("start insert data ......");
- HTablePool pool = new HTablePool(configuration, 1000);
- HTable table = (HTable) pool.getTable(tableName);
- Put put = new Put("112233bbbcccc".getBytes());// 一個PUT表明一行數據,再NEW一個PUT表示第二行數據,每行一個惟一的ROWKEY,此處rowkey爲put構造方法中傳入的值
- put.add("column1".getBytes(), null, "aaa".getBytes());// 本行數據的第一列
- put.add("column2".getBytes(), null, "bbb".getBytes());// 本行數據的第三列
- put.add("column3".getBytes(), null, "ccc".getBytes());// 本行數據的第三列
- try {
- table.put(put);
- } catch (IOException e) {
- e.printStackTrace();
- }
- System.out.println("end insert data ......");
- }
-
- /**
- * 刪除一張表
- * @param tableName
- */
- public static void dropTable(String tableName) {
- try {
- HBaseAdmin admin = new HBaseAdmin(configuration);
- admin.disableTable(tableName);
- admin.deleteTable(tableName);
- } catch (MasterNotRunningException e) {
- e.printStackTrace();
- } catch (ZooKeeperConnectionException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- }
- /**
- * 根據 rowkey刪除一條記錄
- * @param tablename
- * @param rowkey
- */
- public static void deleteRow(String tablename, String rowkey) {
- try {
- HTable table = new HTable(configuration, tablename);
- List list = new ArrayList();
- Delete d1 = new Delete(rowkey.getBytes());
- list.add(d1);
-
- table.delete(list);
- System.out.println("刪除行成功!");
-
- } catch (IOException e) {
- e.printStackTrace();
- }
-
-
- }
-
- /**
- * 組合條件刪除
- * @param tablename
- * @param rowkey
- */
- public static void deleteByCondition(String tablename, String rowkey) {
- //目前尚未發現有效的API可以實現 根據非rowkey的條件刪除 這個功能能,還有清空表所有數據的API操做
-
- }
-
-
- /**
- * 查詢全部數據
- * @param tableName
- */
- public static void QueryAll(String tableName) {
- HTablePool pool = new HTablePool(configuration, 1000);
- HTable table = (HTable) pool.getTable(tableName);
- try {
- ResultScanner rs = table.getScanner(new Scan());
- for (Result r : rs) {
- System.out.println("得到到rowkey:" + new String(r.getRow()));
- for (KeyValue keyValue : r.raw()) {
- System.out.println("列:" + new String(keyValue.getFamily())
- + "====值:" + new String(keyValue.getValue()));
- }
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- /**
- * 單條件查詢,根據rowkey查詢惟一一條記錄
- * @param tableName
- */
- public static void QueryByCondition1(String tableName) {
-
- HTablePool pool = new HTablePool(configuration, 1000);
- HTable table = (HTable) pool.getTable(tableName);
- try {
- Get scan = new Get("abcdef".getBytes());// 根據rowkey查詢
- Result r = table.get(scan);
- System.out.println("得到到rowkey:" + new String(r.getRow()));
- for (KeyValue keyValue : r.raw()) {
- System.out.println("列:" + new String(keyValue.getFamily())
- + "====值:" + new String(keyValue.getValue()));
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- /**
- * 單條件按查詢,查詢多條記錄
- * @param tableName
- */
- public static void QueryByCondition2(String tableName) {
-
- try {
- HTablePool pool = new HTablePool(configuration, 1000);
- HTable table = (HTable) pool.getTable(tableName);
- Filter filter = new SingleColumnValueFilter(Bytes
- .toBytes("column1"), null, CompareOp.EQUAL, Bytes
- .toBytes("aaa")); // 當列column1的值爲aaa時進行查詢
- Scan s = new Scan();
- s.setFilter(filter);
- ResultScanner rs = table.getScanner(s);
- for (Result r : rs) {
- System.out.println("得到到rowkey:" + new String(r.getRow()));
- for (KeyValue keyValue : r.raw()) {
- System.out.println("列:" + new String(keyValue.getFamily())
- + "====值:" + new String(keyValue.getValue()));
- }
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
-
- }
-
- /**
- * 組合條件查詢
- * @param tableName
- */
- public static void QueryByCondition3(String tableName) {
-
- try {
- HTablePool pool = new HTablePool(configuration, 1000);
- HTable table = (HTable) pool.getTable(tableName);
-
- List<Filter> filters = new ArrayList<Filter>();
-
- Filter filter1 = new SingleColumnValueFilter(Bytes
- .toBytes("column1"), null, CompareOp.EQUAL, Bytes
- .toBytes("aaa"));
- filters.add(filter1);
-
- Filter filter2 = new SingleColumnValueFilter(Bytes
- .toBytes("column2"), null, CompareOp.EQUAL, Bytes
- .toBytes("bbb"));
- filters.add(filter2);
-
- Filter filter3 = new SingleColumnValueFilter(Bytes
- .toBytes("column3"), null, CompareOp.EQUAL, Bytes
- .toBytes("ccc"));
- filters.add(filter3);
-
- FilterList filterList1 = new FilterList(filters);
-
- Scan scan = new Scan();
- scan.setFilter(filterList1);
- ResultScanner rs = table.getScanner(scan);
- for (Result r : rs) {
- System.out.println("得到到rowkey:" + new String(r.getRow()));
- for (KeyValue keyValue : r.raw()) {
- System.out.println("列:" + new String(keyValue.getFamily())
- + "====值:" + new String(keyValue.getValue()));
- }
- }
- rs.close();
-
- } catch (Exception e) {
- e.printStackTrace();
- }
-
- }
-
- }
注意:可能你們沒看到更新數據的操做,其實更新的操做跟添加徹底一致,只不過是添加呢rowkey不存在,更新呢rowkey已經存在,而且timstamp相同的狀況下,還有就是目前好像還沒辦法實現hbase數據的分頁查詢,不知道有沒有人知道怎麼作
HBase性能優化建議:
針對前面的代碼,有不少不足之處,在此我就不修改上面的代碼了,只是提出建議的地方,你們本身加上
1)配置
當你調用create方法時將會加載兩個配置文件:hbase-default.xml and hbase-site.xml,利用的是當前的java類路徑, 代碼中configuration設置的這些配置將會覆蓋hbase-default.xml和hbase-site.xml中相同的配置,若是兩個配置文件都存在而且都設置好了相應參上面的屬性下面的屬性便可
2)關於建表
public void createTable(HTableDescriptor desc)
HTableDescriptor 表明的是表的schema, 提供的方法中比較有用的有
setMaxFileSize,指定最大的region size
setMemStoreFlushSize 指定memstore flush到HDFS上的文件大小
增長family經過 addFamily方法
public void addFamily(final HColumnDescriptor family)
HColumnDescriptor表明的是column的schema,提供的方法比較經常使用的有
setTimeToLive:指定最大的TTL,單位是ms,過時數據會被自動刪除。
setInMemory:指定是否放在內存中,對小表有用,可用於提升效率。默認關閉
setBloomFilter:指定是否使用BloomFilter,可提升隨機查詢效率。默認關閉
setCompressionType:設定數據壓縮類型。默認無壓縮。
setMaxVersions:指定數據最大保存的版本個數。默認爲3。
注意的是,通常咱們不去setInMemory爲true,默認是關閉的
3)關於入庫
官方建議
table.setAutoFlush(false); //數據入庫以前先設置此項爲false
table.setflushCommits();//入庫完成後,手動刷入數據
注意:
在入庫過程當中,put.setWriteToWAL(true/flase);
關於這一項若是不但願大量數據在存儲過程當中丟失,建議設置爲true,若是僅是在測試演練階段,爲了節省入庫時間建議設置爲false
4)關於獲取表實例
HTablePool pool = new HTablePool(configuration, Integer.MAX_VALUE);
HTable table = (HTable) pool.getTable(tableName);
建議用錶鏈接池的方式獲取表,具體池有什麼做用,我想用過數據庫鏈接池的同窗都知道,我就再也不重複
不建議使用new HTable(configuration,tableName);的方式獲取表
5)關於查詢
建議每一個查詢語句都放入try catch語句塊,而且finally中要進行關閉ResultScanner實例以及將不使用的表從新放入到HTablePool中的操做,具體作法以下
- public static void QueryAll(String tableName) {
- HTablePool pool = new HTablePool(configuration, Integer.MAX_VALUE);
- HTable table = null;
- ResultScanner rs = null;
- try {
- Scan scan = new Scan();
- table = (HTable) pool.getTable(tableName);
- rs = table.getScanner(scan);
- for (Result r : rs) {
- System.out.println("得到到rowkey:" + new String(r.getRow()));
- for (KeyValue keyValue : r.raw()) {
- System.out.println("列:" + new String(keyValue.getFamily())
- + "====值:" + new String(keyValue.getValue()));
- }
- }
- } catch (IOException e) {
- e.printStackTrace();
- }finally{
- rs.close();// 最後還得關閉
- pool.putTable(table); //實際應用過程當中,pool獲取實例的方式應該抽取爲單例模式的,不該在每一個方法都從新獲取一次(單例明白?就是抽取到專門獲取pool的邏輯類中,具體邏輯爲若是pool存在着直接使用,若是不存在則new)
- }
- }
因此,以上代碼有缺陷的地方,感興趣的同窗能夠針對優化建議做出相應修改
http://blog.csdn.net/ieicihc/article/details/10604129
http://javacrazyer.iteye.com/blog/1186881