# 下載 wget https://www.python.org/ftp/python/2.7.9/Python-2.7.9.tgz tar -zxvf Python-2.7.9.tgz cd Python-2.7.9 # 指定打包路徑 ./configure --prefix=/home/tmp/python2.7.9 make && make install
3. 安裝須要用到的庫,以 pykafka 爲例python
# 用 -t 指定安裝路徑,而非默認路徑 pip install -t /home/tmp/python2.7.9/lib/python2.7/site-packages pykafka
4. 打包app
# 注意是在安裝目錄內部打的包,這關係到後續指定python時的路徑,若是這裏不一樣,後續也要相應調整 cd python2.7.9 tar -zcf python2.7.9.tgz *
5. 上傳到hdfspython2.7
hadoop fs -put python2.7.9.tgz /usr/jar/python
spark yarn client模式oop
spark-submit --queue <yarn queue> --conf spark.yarn.dist.archives=hdfs://DClusterNmg4/user/xxx/xxx/python2.7.9.tgz#python2.7.9 --conf spark.pyspark.python=./python2.7.9/bin/python --deploy-mode client --py-files xxxx-dependency.py main.py #後爲後續引用這個包所用的名稱
spark yarn cluster模式spa
spark-submit --queue <yarn queue> --conf spark.yarn.dist.archives=hdfs://DClusterNmg4/user/xxx/xxx/python2.7.9.tgz#python2.7.9 --conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=./python2.7.9/bin/python --deploy-mode cluster --py-files xxxx-dependency.py main.py #後爲後續引用這個包所用的名稱
hive udf模式code
hive > add ARCHIVE /usr/python/anaconda2.tar.gz; hive > add file /usr/test.py; hive > select > TRANSFORM(data) > USING 'anaconda2.tar.gz/anaconda2/bin/python test.py' > as (min_num) > from test_a;