python Hbase Thrift pycharm 及引入包

cp -r hbase/ /usr/lib/python2.7/site-packages/

官方示例子

http://code.google.com/p/hbase-thrift/source/browse/trunk/python/test/tables.py
http://yannramin.com/2008/07/19/using-facebook-thrift-with-python-and-hbase/
http://wiki.apache.org/hadoop/Hbase/ThriftApi

將生成的hbase目錄copy到python的包下 cp
-r hbase /usr/lib/python2.4/site-packages/ 3。啓動hbase和thrift服務 ./bin/start-hbase.sh ./bin/hbase-daemon.sh start thrift
好像須要源碼,我反正沒找到src目錄,忘記了  。。。。。。 忘記當初本身怎麼裝的了。
# --*-- coding:utf-8 --*--

import sys
import time

# 全部thirft編程都須要的
from thrift import Thrift
from thrift.transport import TSocket, TTransport
from thrift.protocol import TBinaryProtocol
# Hbase的 客戶端代碼
from hbase import ttypes
from hbase.Hbase import Client, ColumnDescriptor, Mutation


# make socket 這裏配置的是hbase zookeeper的地址,由於master只負責負載均衡,讀寫由zookeeper協調
transport = TSocket.TSocket('localhost', 9090)

# buffering is critical . raw sockets are very slow
transport = TTransport.TBufferedTransport(transport)

# wrap in a protocol
protocol = TBinaryProtocol.TBinaryProtocol(transport)

# create a client to use the protocol encoder
client = Client(protocol)

# connect
transport.open()

t = 'tab2'


# 掃描全部表獲取全部表名稱
print 'scanning tables ......'
for table in client.getTableNames():
    print 'found:%s' % table
    if client.isTableEnabled(table):
        print ' disabling table: %s' % t
        # 置爲無效
        client.disableTable(table)
        print 'deleting table: %s' % t
        # 刪除表
        client.deleteTable(table)


# 建立表
columns = []
col = ColumnDescriptor()
col.name = 'entry:'
col.maxVersions = 10
columns.append(col)
col = ColumnDescriptor()
col.name = 'unused:'
columns.append(col)

try:
    print 'creating table : % s' % t
    client.createTable(t, columns)
except Exception, ae:
    print 'Warn:' + ae.message


# 插入數據
invalid = 'foo-\xfc\xa1\xa1\xa1\xa1\xa1'
valid = 'foo-\xE7\x94\x9F\xE3\x83\x93\xE3\x83\xBC\xE3\x83\xAB'

# non-utf8 is fine for data
mutations = [Mutation(column='entry:foo', value=invalid)]
print str(mutations)
client.mutateRow(t, 'foo', mutations)  # foo is row key

# try empty strings
# cell value empty
mutations = [Mutation(column='entry:foo', value='')]
# rowkey empty
client.mutateRow(t, '', mutations)

#this row name is valid utf8
mutations = [Mutation(column='entry:foo', value=valid)]
client.mutateRow(t, valid, mutations)


# run a scanner on the rows we just created
# 全表掃描
print 'starting scanner...'
scanner = client.scannerOpen(t, '', ['entry:'])

r = client.scannerGet(scanner)
while r:
    #printRow(r[0])
    r = client.scannerGet(scanner)
print 'scanner finished '

# 範圍掃描
columnNames = []
for (col, desc) in client.getColumnDescriptors(t).items():
    print 'column with name:', desc.name
    print desc
    columnNames.append(desc.name + ':')

print 'stating scanner...'
scanner = client.scannerOpenWithStop(t, '00020', '00040', columnNames)

r = client.scannerGet(scanner)
while r:
    # printRow(r[0])
    r = client.scannerGet(scanner)

client.scannerClose(scanner)
print 'scanner finished'

# 關閉socket
transport.close()

 






 
 

 






如今咱們就能夠用python來和hbase通訊了 #
-*-coding:utf-8 -*- #!/usr/bin/python from thrift import Thrift from thrift.transport import TSocket from thrift.transport import TTransport from thrift.protocol import TBinaryProtocol from hbase import Hbase from hbase.ttypes import ColumnDescriptor,Mutation,BatchMutation class HbaseWriter: """ IP地址 端口 表名 """ def __init__(self,address,port,table='user'): self.tableName = table #創建與hbase的鏈接 self.transport=TTransport.TBufferedTransport(TSocket.TSocket(address,port)) self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.client=Hbase.Client(self.protocol) self.transport.open() tables = self.client.getTableNames() if self.tableName not in tables: print "not in tables" self.__createTable() self.write("hell,babay!!!") self.read() #關閉 def __del__(self): self.transport.close() #建表 def __createTable(self): col1 = ColumnDescriptor(name="person:",maxVersions=1) col2 = ColumnDescriptor(name="contents:",maxVersions=1) col3 = ColumnDescriptor(name="info:",maxVersions=1) self.client.createTable(self.tableName,[col1,col2,col3]) def write(self,content): row="abc" mutations=[Mutation(column="person:",value=content),Mutation(column="info:",value=content)] self.client.mutateRow(self.tableName,row,mutations) def read(self): scannerId = self.client.scannerOpen(self.tableName,"",["contents:",]) while True: try: result = self.client.scannerGet(scannerId) except: break contents = result.columns["contents:"].value #print contents self.client.scannerClose(scannerId) if __name__ == "__main__": client = HbaseWriter("192.168.239.135","9090","person") 咱們看下使用thrift生成的代碼中都提供了那些方法 提供的方法有: void enableTable(Bytes tableName) enable表 void disableTable(Bytes tableName) disable表 bool isTableEnabled(Bytes tableName) 查看錶狀態 void compact(Bytes tableNameOrRegionName) void majorCompact(Bytes tableNameOrRegionName) getTableNames() getColumnDescriptors(Text tableName) getTableRegions(Text tableName) void createTable(Text tableName, columnFamilies) void deleteTable(Text tableName) get(Text tableName, Text row, Text column) getVer(Text tableName, Text row, Text column, i32 numVersions) getVerTs(Text tableName, Text row, Text column, i64 timestamp, i32 numVersions) getRow(Text tableName, Text row) getRowWithColumns(Text tableName, Text row, columns) getRowTs(Text tableName, Text row, i64 timestamp) getRowWithColumnsTs(Text tableName, Text row, columns, i64 timestamp) getRows(Text tableName, rows) getRowsWithColumns(Text tableName, rows, columns) getRowsTs(Text tableName, rows, i64 timestamp) getRowsWithColumnsTs(Text tableName, rows, columns, i64 timestamp) void mutateRow(Text tableName, Text row, mutations) void mutateRowTs(Text tableName, Text row, mutations, i64 timestamp) void mutateRows(Text tableName, rowBatches) void mutateRowsTs(Text tableName, rowBatches, i64 timestamp) i64 atomicIncrement(Text tableName, Text row, Text column, i64 value) void deleteAll(Text tableName, Text row, Text column) void deleteAllTs(Text tableName, Text row, Text column, i64 timestamp) void deleteAllRow(Text tableName, Text row) void deleteAllRowTs(Text tableName, Text row, i64 timestamp) ScannerID scannerOpenWithScan(Text tableName, TScan scan) ScannerID scannerOpen(Text tableName, Text startRow, columns) ScannerID scannerOpenWithStop(Text tableName, Text startRow, Text stopRow, columns) ScannerID scannerOpenWithPrefix(Text tableName, Text startAndPrefix, columns) ScannerID scannerOpenTs(Text tableName, Text startRow, columns, i64 timestamp) ScannerID scannerOpenWithStopTs(Text tableName, Text startRow, Text stopRow, columns, i64 timestamp) scannerGet(ScannerID id) scannerGetList(ScannerID id, i32 nbRows) void scannerClose(ScannerID id)

 

 

 

http://blog.csdn.net/poechant/article/details/6618264python

http://mmicky.blog.163.com/blog/static/150290154201311801519681/  按照這個配置python hbase開發環境apache

編程前切換到/usr/program/python/hbase   而後運行python編程

>>>from thrift.transport import TSocket
>>>from thrift.protocol import TBinaryProtocol
>>>from hbase import Hbaseapp

都不報錯,可是到pycharm報錯,緣由時python默認搜索當前目錄。負載均衡

到pycharm 須要把 /usr/program/python/hbase 添加到pycharm的path python2.7

操做步驟:File>>setting>>project interpreter>>python interpreter>>>paths>>>+ 把/usr/program/python/hbase 文件夾添加進去就行了。socket

__author__ = 'root'


from thrift.transport import TSocket
from thrift.protocol import TBinaryProtocol
from hbase import Hbase

transport = TSocket.TSocket("localhost", 9090)
protocol = TBinaryProtocol.TBinaryProtocol(transport)
client = Hbase.Client(protocol)
transport.open()
tabs = client.getTableNames()
print tabs
相關文章
相關標籤/搜索