status
css
建立表:html
# create '表名','columnFamily名','columnFamily名' create 'table','cf1',cf2'
刪除表: 顯示錶設置成disable,而後才能刪除。java
# disable '表名' disable 'table' # drop '表名' drop 'table'
增長、修改columnFamily:python
columnFamily不存在就是添加,存在就是修改columnFamily的信息 # alter '表名',{columnFamily屬性} alter 'table',{NAME=>'cf3',VERSIONS=>3,IN_MEMORY=>TRUE}
刪除columnFamily:linux
# alter '表名',{NAME=>'columnFamily名稱',METHOD=>'delete'} alter 'table',{NAME=>'cf3',METHOD=>'delete'}
寫數據:web
# put '表名','rowkey','colfamily:colname','value' put 'table','1001','cf1:column1','value1'
查數據:算法
# 全表查詢,慎用,數據量大的時候要記得加限制條件 # scan '表名' scan 'table' # 讀一條記錄,根據rowkey去查 # get '表名','rowkey' get 'table','1001' # 根據時間戳查詢 # get '表名','rowkey',{COLUMN=>'列族名:列名',TIMESTAMP=>時間戳} put 'table','1002','cf1:column2','value1' put 'table','1002','cf1:column2','value2' get 'table','1002',{COLUMN=>'cf1:column2',TIMESTAMP=>1548778946757} # 根據版本號查詢 get '表名','rowkey',{OLUMN=>'列族名:列名',VERSIONS=>版本號} get 'table','1002',{COLUMN=>'cf1:column2',VERSIONS=>2} # 根據過濾器查詢,例如value的過濾器 # scan '表名',FILTER=>"過濾器名字(=,'匹配模式:值')" scan 'table',FILTER=>"ValueFilter(=,'binary:value1')" #全匹配 scan 'table',FILTER=>"ValueFilter(=,'substring:lue')" #模糊匹配,這種模式好像有點問題,有部分沒有匹配出來,不知道爲啥 scan 'table',FILTER=>"ColumnPrefixFilter('column2') AND ValueFilter(=,'binary:value1')" # 多條件匹配
刪數據:apache
# truncate '表名' truncate 'table'
注:centos
建立成功後,能夠在hdfs上看到: 緩存
desc 'table'
NAME:列族的名字
BLOOMFILTER:控制粒度爲行級別,具體可查看: https://www.cnblogs.com/cssdongl/p/6396630.html
VERSIONS:默認是 1,意思是數據保留1個版本
IN_MEMORY:
KEEP_DELETED_CELLS:
DATA_B:
LOCK_ENCODING:
TTL:默認是 2147483647秒 ,大概是 68 年,這個參數是說明該列族數據的存活時間,單位是 秒
COMPRESSION:設置壓縮算法
MIN_VERSIONS :最小存儲版本數
BLOCKCACHE:數據塊緩存配置,若是常常順序訪問或不多被訪問,能夠關閉列族的緩存,列族緩存默認打開
BLOCKSIZE:設置HFile數據塊大小(默認64kb)
REPLICATION_SCOPE:
thrift的安裝參考官方文檔: http://thrift.apache.org/docs/install/centos
進入hbase的lib開啓thrift服務
cd /usr/local/src/hbase-0.98.6-hadoop2/bin ./hbase-daemon.sh start thrift jps netstat -antup | grep 25310
將thrift的python模塊複製到寫代碼的文件夾
cd /usr/local/src/thrift-0.8.0/lib/py/build/lib.linux-x86_64-2.7 mkdir /1_hbaseTest cp -r thrift/ /1_hbaseTest
下載,解壓hbase源碼包
wget http://archive.apache.org/dist/hbase/hbase-0.98.6/hbase-0.98.6-src.tar.gz tar -zxvf hbase-0.98.6-src.tar.gz
根據約束文件生成可被python加載的模塊
cd /1_hbaseTest/hbase-0.98.6/hbase-thrift/src/main/resources/org/apache/hadoop/hbase/thrift thrift --gen py Hbase.thrift
複製到寫代碼的文件夾
cd gen-py/ cp -r hbase/ /1_hbaseTest/
上代碼(建立表):
from thrift import Thrift from thrift.transport import TSocket from thrift.transport import TTransport from thrift.protocol import TBinaryProtocol from hbase import Hbase from hbase.ttypes import * transport = TSocket.TSocket('master',9090) transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) transport.open() #============================================= base_info_contents = ColumnDescriptor(name='meta-data',maxVersions=1) other_info_contents = ColumnDescriptor(name='flags',maxVersions=1) client.createTable('new_table',[base_info_contents,other_info_contents]) print client.getTableNames()
上代碼(插入數據):
from thrift import Thrift from thrift.transport import TSocket from thrift.transport import TTransport from thrift.protocol import TBinaryProtocol from hbase import Hbase from hbase.ttypes import * transport = TSocket.TSocket('master', 9090) transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) transport.open() tableName = 'new_table' rowkey = '1001' mutations = [Mutation(column="meta-data:name",value="python"), \ Mutation(column="meta-data:tag",value="pop"), \ Mutation(column="flags:is_valid",value="TRUE")] client.mutateRow(tableName,rowkey,mutations,None)
上代碼(讀數據):
from thrift import Thrift from thrift.transport import TSocket from thrift.transport import TTransport from thrift.protocol import TBinaryProtocol from hbase import Hbase from hbase.ttypes import * transport = TSocket.TSocket('master', 9090) transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) transport.open() table_name = "new_table" rowkey = '1001' result = client.getRow(table_name,rowkey,None) for r in result: print "the row is ",r.row print "the name is ",r.columns.get("meta-data:name").value print "the flag is ",r.column.get("flags:is_valid").value
上代碼(掃描數據):
from thrift import Thrift from thrift.transport import TSocket from thrift.transport import TTransport from thrift.protocol import TBinaryProtocol from hbase import Hbase from hbase.ttypes import * transport = TSocket.TSocket('master', 9090) transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) transport.open() table_name = "new_table" scan = TScan() id = client.scannerOpenWithScan(table_name,scan,None) result = client.scannerGetList(id,10) for r in result: print '=====' print 'the row is ' , r.row for k,v in r.columns.items(): print "\t".join([k,v.value])
mkdir mr_hbase cd mr_hbase/
準備好數據
run.sh:
HADOOP_CMD="/usr/local/src/hadoop-2.6.5/bin/hadoop" STREAM_JAR_PATH="/usr/local/src/hadoop-2.6.5/share/hadoop/tools/lib/hadoop-streaming-2.6.5.jar" INPUT_FILE_PATH="/input.data" OUTPUT_PATH="/output_hbase" $HADOOP_CMD fs -rmr -skipTrash $OUTPUT_PATH $HADOOP_CMD jar $STREAM_JAR_PATH \ -input $INPUT_FILE_PATH \ -output $OUTPUT_PATH \ -mapper "python map.py mapper_func" \ -file ./map.py \ -file "./hbase.tgz" \ -file "./thrift.tgz"
map.py:
#!/usr/bin/python import os import sys os.system('tar zxvf hbase.tgz > /dev/null') os.system('tar zxvf thrift.tgz > /dev/null') reload(sys) sys.setdefaultencoding('utf-8') sys.path.append("./") from thrift import Thrift from thrift.transport import TSocket from thrift.transport import TTransport from thrift.protocol import TBinaryProtocol from hbase import Hbase from hbase.ttypes import * transport = TSocket.TSocket('master', 9090) transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) transport.open() tableName = "new_table" def mapper_func(): for line in sys.stdin: ss = line.strip().split('\t') if len(ss) != 2: continue key = ss[0].strip() val = ss[1].strip() rowkey = key mutations = [Mutation(column="meta-data:name",value=val), \ Mutation(column="flags:is_valid",value="TRUE")] client.mutateRow(tableName,rowkey,mutations,None) if __name__ == "__main__": module = sys.modules[__name__] func = getattr(module, sys.argv[1]) args= None if len(sys.argv) > 1: args = sys.argv[2:] func(*args)
# 準備工做 create 'table_java','cf1' put 'table_java','1001','cf1:name','java1' put 'table_java','1002','cf1:name','java2' put 'table_java','1001','cf1:age','25' put 'table_java','1002','cf1:gender','man' public class PutOne { public static final String tableName = "table_java"; public static final String columnFamily = "cf1"; public static Configuration conf = HBaseConfiguration.create(); private static HTable table; public static void main(String[] argv){ conf.set("habse.master","192.168.119.10"); conf.set("hbase.zookeeper.quorum","192.168.119.10,192.168.119.11,192.168.119.12"); add(); } private static void add() { addOneRecord(tableName, "ip=192.168.87.200-001", columnFamily, "ip", "192.168.87.101"); addOneRecord(tableName, "ip=192.168.87.200-001", columnFamily, "userid", "1100"); addOneRecord(tableName, "ip=192.168.87.200-002", columnFamily, "ip", "192.168.1.201"); addOneRecord(tableName, "ip=192.168.87.200-002", columnFamily, "userid", "1200"); addOneRecord(tableName, "ip=192.168.87.200-003", columnFamily, "ip", "192.168.3.201"); addOneRecord(tableName, "ip=192.168.87.200-003", columnFamily, "userid", "1300"); } private static void addOneRecord(String tableName, String rowKey, String columnFamily, String qualifier, String value) { try { table = new HTable(conf,tableName); Put put = new Put(Bytes.toBytes(rowKey)); put.add(Bytes.toBytes(columnFamily),Bytes.toBytes(qualifier),Bytes.toBytes(value)); table.put(put); System.out.print("insert record " + rowKey + " to table "+ tableName + "success"); } catch (IOException e) { e.printStackTrace(); } } }
在idea下用maven打成jar包後上傳至服務器執行。
可能碰見的問題:
(1)hbaseTest-1.0-SNAPSHOT.jar中沒有主清單屬性
解決方法:在這個jar包的META-INF文件夾下的MANIFEST.MF文件加上類名,詳情能夠參考:https://jingyan.baidu.com/article/db55b60990f6084ba30a2fb8.html
(2)NoClassDefFoundError: org/apache/hadoop/hbase/HBaseConfiguration
解決方法: 在hadoop安裝目錄下找到hadoop-env.sh文件, 添加 : export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/usr/local/src/hbase-0.98.6-hadoop2/lib/*
/usr/local/src/hbase-0.98.6-hadoop2 爲本身hbase安裝路徑
(3)
我在web頁面看到以下
緣由是我本身講這個表設置成disable狀態。。自食其果。。。
解決方法: enable 'table_java'