建一張Hive表映射HBase表html
CREATE EXTERNAL TABLE LJKTEST( ID STRING , AGE STRING , NAME STRING , COMPANY STRING , SCHOOL STRING )STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,0:AGE,0:NAME,0:COMPANY,0:SCHOOL") TBLPROPERTIES("hbase.table.name" = "LJKTEST");
這邊使用COUNT(1)和COUNT(*)都不起做用,統計出來是0.只能使用COUNT(字段)。應該跟映射機制有關係。java
SELECT COUNT(ID) FROM LJKTEST;
sql
SELECT COUNT(1) FROM LJKTEST;
apache
HADOOP_CLASSPATH=`hbase classpath` hadoop jar /usr/hdp/current/hbase-client/lib/hbase-server.jar rowcounter LJKTEST
pom文件引入依賴,<font color="red">必須加上exclusion,不然會報錯class "javax.servlet.FilterRegistration"'s signer information does not match signer information of other classes in the same package</font>api
<dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>1.1.2.2.5.0.0-1245</version> <exclusions> <exclusion> <artifactId>servlet-api</artifactId> <groupId>javax.servlet</groupId> </exclusion> <exclusion> <artifactId>jetty</artifactId> <groupId>org.mortbay.jetty</groupId> </exclusion> <exclusion> <artifactId>jetty-util</artifactId> <groupId>org.mortbay.jetty</groupId> </exclusion> <exclusion> <artifactId>servlet-api-2.5</artifactId> <groupId>org.mortbay.jetty</groupId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.10</artifactId> <version>1.6.2.2.5.0.0-1245</version> </dependency>
寫spark統計HBase代碼app
@Test def sparkCountHBase(): Unit = { val sc = new SparkContext("local","hbase-test") val conf = HBaseConfiguration.create() conf.set(TableInputFormat.INPUT_TABLE, "LJKTEST") val hbaseRDD = sc.newAPIHadoopRDD(conf,classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result]) val count = hbaseRDD.count() println(s"總共有 $count 條數據!") }
pom引入依賴oop
<dependency> <groupId>org.apache.phoenix</groupId> <artifactId>phoenix-spark</artifactId> <version>4.7.0-HBase-1.1</version> <exclusions> <exclusion> <artifactId>servlet-api</artifactId> <groupId>javax.servlet</groupId> </exclusion> <exclusion> <artifactId>jetty</artifactId> <groupId>org.mortbay.jetty</groupId> </exclusion> <exclusion> <artifactId>jetty-util</artifactId> <groupId>org.mortbay.jetty</groupId> </exclusion> <exclusion> <artifactId>servlet-api-2.5</artifactId> <groupId>org.mortbay.jetty</groupId> </exclusion> </exclusions> </dependency> <!--https://mvnrepository.com/artifact/org.apache.phoenix/phoenix-spark--> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.10</artifactId> <version>1.6.2.2.5.0.0-1245</version> </dependency>
spark統計Phoenix代碼spa
@Test def sparkCountPhoenix(): Unit = { val sc = new SparkContext("local","phoenix-test") val sqlContext = new SQLContext(sc) val df = sqlContext.load( "org.apache.phoenix.spark", Map("table" -> "LJKTEST", "zkUrl" -> "dn1:2181") ) // df.show() println( s"總共有 ${df.count} 條數據!") }