export JAVA_HOME=/usr/local/soft/jdk1.7.0
#export SPARK_MASTER_IP=hadoop-spark01
export SPARK_MASTER_WEBUI_PORT=8099
#export SPARK_MASTER_IP=localhost
export SPARK_MASTER_PORT=7077
export SPARK_WORKER_CORES=2
export SPARK_WORKER_INSTANCES=2
export SPARK_WORKER_MEMORY=1g
#export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=FILESYSTEM -Dspark.deploy.recoveryDirectory=/nfs/spark/recovery"
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=hadoop-spark01:2181,hadoop
-spark02:2181,hadoop-spark03:2181 -Dspark.deploy.zookeeper.dir=/home/data/spark/zkdir" (這是spark的HA配置)
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HIVE_CONF_DIR=$HIVE_HOME/conf
export SPARK_HOME=/usr/local/soft/spark-1.4.1-bin-hadoop-2.6.0
export SPARK_CLASSPATH=/usr/local/soft/sparkclasspath/
mysql-connector-java-5.1.38-bin.jar:/usr/local/soft/sparkclasspath/hiv
e-hbase-handler-1.2.1.jar:/usr/local/soft/sparkclasspath/hbase-common-1.1.2.jar:/usr/local/soft/sparkclasspath/hbase-client-1.1.2.jar:/usr/local/soft/sparkclasspath/hbase-protocol-1.1.2.jar:/usr/local/soft/sparkclasspath/hbase-server-1.1.2.jar:/usr/local/soft/sparkclasspath/protobuf-java-2.5.0.jar:/usr/local/soft/sparkclasspath/htrace-core-3.1.0-incubating.jar:/usr/local/soft/sparkclasspath/guava-12.0.1.jar:/usr/local/soft/sparkclasspath/
hive-exec-1.2.1.jar
#export SPARK_LIBRARY_PATH=/usr/local/soft/hbase-1.1.2/lib
export SPARK_JAR=/usr/local/soft/spark-1.4.1-bin-hadoop-2.6.0/lib/spark-assembly-1.4.1-hadoop2.6.0.jar
export PATH=$SPARK_HOME/bin:$PATH
四、將spark-assembly-1.4.1-hadoop2.6.0.jar包,拷貝到$HIVE_HOME/lib目錄下
五、修改hive-site.xml
<property>
<name>hive.metastore.uris</name>
<value>thrift:
//hadoop-spark01:9083</value>
<description>Thrift URI
for
the remote metastore. Used by metastore client to connect to remote metastore.</description>
</property>
<property>
<name>hive.server2.thrift.min.worker.threads</name>
<value>
5
</value>
<description>Minimum number of Thrift worker threads</description>
</property>
<property>
<name>hive.server2.thrift.max.worker.threads</name>
<value>
500
</value>
<description>Maximum number of Thrift worker threads</description>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>
10000
</value>
<description>Port number of HiveServer2 Thrift
interface
. Can be overridden by setting $HIVE_SERVER2_THRIFT_PORT</description>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>hadoop-spark01</value>
<description>Bind host on which to run the HiveServer2 Thrift
interface
.Can be overridden by setting$HIVE_SERVER2_THRIFT_BIND_HOST</description>
</property>
<property>
<name>spark.serializer</name>
<value>org.apache.spark.serializer.KryoSerializer</value>
</property>
<property>
<name>spark.eventLog.enabled</name>
<value>true</value>
</property>
<property>
<name>spark.eventLog.dir</name>
<value>hdfs://founder/sparklog/logs</value>
</property>
<property>
<name>spark.master</name>
<value>spark://hadoop-spark01:7077,hadoop-spark02:7077</value>
</property>
還有這些參數也要配置上
一、hive.exec.local.scratchdir
/opt/hive-1.2/tmp
二、hive.downloaded.resources.dir
/opt/hive-1.2/resources
配置Mysql數據庫
一、javax.jdo.option.ConnectionPassword
123456
二、javax.jdo.option.ConnectionURL
jdbc:mysql://hadoop-spark01:3306/hive_db
三、javax.jdo.option.ConnectionDriverName
com.mysql.jdbc.Driver
四、javax.jdo.option.ConnectionUserName
root
六、啓動
啓動spark
./start-all.sh
在backup-master節點上
./start-master
啓動hive
./hive
------------------------------------------------------------------------------------------------------------------------------------------------------
2、使用beeline鏈接,這個比較使用,由於可使用jdbc讓客戶端鏈接
首先特麼的這個是不用從新編譯spark的源碼的,他須要支持hive
一、啓動spark
二、啓動thriftserver
cd $SPARK_HOME/sbin
./start-thriftserver.sh --master spark://hadoop-spark01:7077 --executor-memory 1g
三、啓動hive metastore
hive --service metastore > metastore.log 2>&1 &
使用beeline鏈接
beeline> !connect jdbc:hive2://hadoop-spark01:10000
0: jdbc:hive2://hadoop-spark01:10000> select count(*) from t_trackinfo;
+------+--+
| _c0 |
+------+--+
| 188 |
+------+--+
1 row selected (16.738 seconds)
須要注意的幾點:
一、個人hive中的數據是從hbase同步過來的。
二、不須要重新編譯hive源碼。直接從apache官網上下載就能夠了。
三、通常使用的都是thriftserver2這種方式,經過客戶端程序經過jdbc操做hive。因此不用編譯源碼,作好相應的配置就能夠了。
這些配置已通過時,而且寫在spark-defaults.conf文件裏面,就能夠了
SPARK_CLASSPATH was detected (set to '/usr/local/soft/sparkclasspath/mysql-connector-java-5.1.38-bin.jar:/usr/local/soft/sparkcla
sspath/hive-hbase-handler-1.2.1.jar:/usr/local/soft/sparkclasspath/hbase-common-1.1.2.jar:/usr/local/soft/sparkclasspath/hbase-client-1.1.2.jar:/usr/local/soft/sparkclasspath/hbase-protocol-1.1.2.jar:/usr/local/soft/sparkclasspath/hbase-server-1.1.2.jar:/usr/local/soft/sparkclasspath/protobuf-java-2.5.0.jar:/usr/local/soft/sparkclasspath/htrace-core-3.1.0-incubating.jar:/usr/local/soft/sparkclasspath/guava-12.0.1.jar:/usr/local/soft/sparkclasspath/hive-exec-1.2.1.jar').This is deprecated in Spark 1.0+.
Please instead use:
- ./spark-submit with --driver-class-path to augment the driver classpath
- spark.executor.extraClassPath to augment the executor classpath
SPARK_WORKER_INSTANCES was detected (set to '2').
This is deprecated in Spark 1.0+.
Please instead use:
- ./spark-submit with --num-executors to specify the number of executors
- Or set SPARK_EXECUTOR_INSTANCES
- spark.executor.instances to configure the number of instances in the spark config.