處理很是龐大的表java
行存儲:mysql oracle底層基於行存儲數據的mysql
列存儲:hbase底層基於列存儲數據的sql
==安裝hbase高可用集羣以前首先要保證zookeeper和hadoop已經安裝完成==shell
hbase-1.1.5-bin.tar.gz數據庫
集羣的規劃apache
解壓安裝包vim
[root@uplooking01: /soft]: tar -zxvf hbase-1.1.5-bin.tar.gz -C /opt/
重命名數組
[root@uplooking01: /opt]: mv hbase-1.1.5/ hbase
配置環境變量緩存
[root@uplooking01: /opt]: #配置HBASE的環境變量 export HBASE_HOME=/opt/hbase export PATH=$PATH:$HBASE_HOME/bin
配置vim hbase-env.sh安全
[root@uplooking01: /opt/hbase/conf]: vim hbase-env.sh
export JAVA_HOME=/opt/jdk export HBASE_MANAGES_ZK=false #不使用hbase自帶的zookeeper export HBASE_CLASSPATH=/opt/hadoop/etc/hadoop
配置hbase-site.xml
[root@uplooking01: /opt/hbase/conf]: vim hbase-site.xml
<configuration> <property> <name>hbase.rootdir</name> <value>hdfs://ns1/hbase</value> </property> <property> <name>hbase.tmp.dir</name> <value>/opt/hbase/tmp</value> </property> <property> <name>hbase.cluster.distributed</name> <value>true</value> </property> <property> <name>hbase.zookeeper.quorum</name> <value>uplooking03:2181,uplooking04:2181,uplooking05:2181</value> </property> </configuration>
配置 regionservers
[root@uplooking01: /opt/hbase/conf]: vim regionservers
uplooking03 uplooking04 uplooking05
分發文件
[root@uplooking01: /opt]: scp -r hbase uplooking02:/opt scp -r hbase uplooking03:/opt scp -r hbase uplooking04:/opt scp -r hbase uplooking05:/opt scp /etc/profile uplooking02:/etc/ scp /etc/profile uplooking03:/etc/ scp /etc/profile uplooking04:/etc/ scp /etc/profile uplooking05:/etc/
source /etc/profile(全部節點都作,要使環境變量生效)
啓動hbase集羣
start-hbase.sh
單獨啓動master
[root@uplooking02:/] hbase-daemon.sh start master
==啓動hbase集羣必定要保證整個集羣的時間一致==
若是啓動集羣執行start-hbase.sh,master節點能夠啓動,可是regionserver節點不能啓動,可是單獨啓動regionserver(hbase-daemon.sh start regionserver)是能夠啓動的,也沒有問題,name就須要拷貝一個jar包,
將HADOOP_HOME/share/hadoop/common/lib下的htrace-core-3.0.4.jar 複製到$HBASE_HOME/lib下
表(table)
行鍵(rowKey)
列簇(列族)columnFamily
列限定符(column Qualifier)
時間戳(version)
單元格(cell)
三級定位
Zookeeper
Master
RegionServer
Hlog
Region
Hstore
MemStore
StoreFile
Hfile
zookeeper(尋找元數據信息)
列出全部的命名空間(至關於mysql中的show databases)
列出指定命名空間下的全部表
建立命名空間
建立表
禁用表,由於刪除表以前首先須要禁用了
啓用表
刪除表
添加數據
查詢數據
刪除數據
刪除一行數據
統計表的行數
建立Hbase表時指定列族的顯示版本數
修改Hbase表中的列族的顯示版本數
查詢指定版本數的數據
根據顯示的版本數,查詢出來想要版本的時間戳,根據時間戳找出具體值
<properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <hbase-version>1.1.5</hbase-version> </properties> <dependencies> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>${hbase-version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>${hbase-version}</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-hbase-handler</artifactId> <version>2.1.0</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> </dependencies>
public class HbaseTest { //添加數據 @Test public void testPut() throws IOException { Configuration conf = HBaseConfiguration.create(); //指定zk的地址 conf.set("hbase.zookeeper.quorum", "uplooking03:2181,uplooking04:2181,uplooking05:2181"); Connection conn = ConnectionFactory.createConnection(conf); Table table = conn.getTable(TableName.valueOf("ns1:t1")); Put put = new Put(Bytes.toBytes("row001")); put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("name"), Bytes.toBytes("admin02")); table.put(put); } //刪除數據 @Test public void testDelete() throws IOException { Configuration conf = HBaseConfiguration.create(); //指定zk的地址 conf.set("hbase.zookeeper.quorum", "uplooking03:2181,uplooking04:2181,uplooking05:2181"); Connection conn = ConnectionFactory.createConnection(conf); Table table = conn.getTable(TableName.valueOf("ns1:t1")); Delete delete = new Delete(Bytes.toBytes("row001")); table.delete(delete); } //查詢數據 @Test public void testGet() throws IOException { Configuration conf = HBaseConfiguration.create(); //指定zk的地址 conf.set("hbase.zookeeper.quorum", "uplooking03:2181,uplooking04:2181,uplooking05:2181"); Connection conn = ConnectionFactory.createConnection(conf); Table table = conn.getTable(TableName.valueOf("ns1:t1")); Get get = new Get(Bytes.toBytes("row001")); Result result = table.get(get); String s = Bytes.toString(result.getValue(Bytes.toBytes("f1"),Bytes.toBytes("name"))); System.out.println(s); } }
public class HbaseAdminTest { private Connection connection; @Before public void init() throws Exception { Configuration conf = new Configuration(); conf.set("hbase.zookeeper.quorum", "uplooking03:2181,uplooking04:2181,uplooking05:2181"); connection = ConnectionFactory.createConnection(conf); } /** * 建立表 * * @throws Exception */ @Test public void testCreateTable() throws Exception { //獲取管理對象 Admin admin = connection.getAdmin(); HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("t2")); HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes("f1")); htd.addFamily(hcd); admin.createTable(htd); } /** * 列出全部的表 * @throws Exception */ @Test public void testListTableNames() throws Exception { //獲取管理對象 Admin admin = connection.getAdmin(); TableName[] tableNames = admin.listTableNames("ns1:.*"); for (TableName tableName : tableNames) { System.out.println(tableName); } } }
//查詢數據 @Test public void testScan() throws IOException { Configuration conf = HBaseConfiguration.create(); //指定zk的地址 conf.set("hbase.zookeeper.quorum", "uplooking03:2181,uplooking04:2181,uplooking05:2181"); Connection conn = ConnectionFactory.createConnection(conf); Table table = conn.getTable(TableName.valueOf("ns1:t1")); Scan scan = new Scan(); byte[] cf = Bytes.toBytes("f1"); byte[] column = Bytes.toBytes("name"); Filter filter = new SingleColumnValueFilter(cf, column, CompareFilter.CompareOp.EQUAL, Bytes.toBytes("admin123")); scan.setFilter(filter); //獲取包含多行數據的對象 ResultScanner resultScanner = table.getScanner(scan); for (Result result : resultScanner) { System.out.println(Bytes.toString(result.getValue(Bytes.toBytes("f1"), Bytes.toBytes("age")))); } }
耗時約20分鐘
本身測試10分鐘
8800000ms,插入15851742tiao數據
/** * 百萬數據的插入 */ public class HbaseMiTest { private Connection connection; @Before public void init() throws Exception { Configuration conf = new Configuration(); conf.set("hbase.zookeeper.quorum", "uplooking03:2181,uplooking04:2181,uplooking05:2181"); connection = ConnectionFactory.createConnection(conf); } @Test public void test01() throws IOException { HTable table = (HTable) connection.getTable(TableName.valueOf("ns1:t1")); //不使用每一個put操做都刷出一次 table.setAutoFlush(false); long startTime = System.currentTimeMillis(); for (int i = 0; i < 1000000; i++) { Put put = new Put(Bytes.toBytes("row" + i)); //關閉預寫日誌,可是不建議使用,由於這樣作不安全 put.setWriteToWAL(false); put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("name"), Bytes.toBytes("admin" + i)); table.put(put); if (i % 100000 == 0) { table.flushCommits(); } } table.flushCommits(); long endTime = System.currentTimeMillis(); System.out.println("總耗時:" + (endTime - startTime) + "ms"); } }
大約耗時27s
本身測試,1分20秒 590/80=7.4倍
查詢一行是9秒
97602ms,插入15851742tiao數據 8800/175=50倍
split 'ns1:t1','row040'
move 'f6e6164514db53d660c5414df1f3864e','uplooking05,1602
**0,1539222350164'**
是因爲行健類似、連續且數據量過大操做成單region的數據量過大,進而影響讀寫效率
行健應該儘可能的隨機、不要出現連續行健。
常見的行健設計就是,好比手機號碼倒置+時間戳,好比隨機前綴+關係型數據庫中的主鍵
由於hbase提供的查詢內容很是很是low,可是全部關於hbase的查詢只能經過rowkey,因此
在設計行健的時候,應該考慮將盡可能多的查詢條件放到rowkey中去,造成的行健就成爲複合鍵
列族的設計:
cf1----->"columnFamily"
cf2----->"cf"
建議hbase表是高表,不建議寬表,由於寬表擁有的列族不少,操做並跨越的文件(HFile)就不少,效率會有相應影響,
反之建議使用高表,列族不宜過多(列族通常使用一個)。
在設計表的時候,各個列/列族名稱不宜過長,由於hbase須要對這些數據在內存中作緩存,作索引,進而影響內存容量,因此建議不易過長,以便可以在內存中容納更多的數據。至於閱讀性,有項目文檔搞定。
HbaseExplorer