1.hdfs目錄配額node
#設置配額目錄
hdfs dfsadmin -setSpaceQuota 10T /user/hive/warehouser/tmp
查看配額目錄信息
hdfs dfs -count -q -h /user/hive/warehouser/tmp
#清除配額目錄信息
hdfs dfsadmin -clrSpaceQuota /user/hive/warehouser/tmpvim
2.均衡併發
$ hdfs dfsadmin -setBalancerBandwidth 1073741824
$ nohup hdfs balancer \
-Ddfs.datanode.balance.max.concurrent.moves = 10 \
-Ddfs.balancer.dispatcherThreads = 1024 \
-Ddfs.balance.bandwidthPerSec = 1073741824app
#此配置用於限制容許Datanode平衡羣集的最大併發塊移動數
dfs.datanode.balance.max.concurrent.moves, default is 5
#帶寬
dfs.datanode.balance.bandwidthPerSec, default is 1048576 (=1MB/s)
dfsadmin -setBalancerBandwidth <bandwidth in bytes per second>
#mover線程數
dfs.balancer.moverThreads, default is 1000
#datanode傳輸的最大線程數
dfs.datanode.max.transfer.threads
修改dfs.datanode.max.transfer.threads=4096 (若是運行HBase的話建議爲16384),
指定用於在DataNode間傳輸block數據的最大線程數,老版本的對應參數爲dfs.datanode.max.xcievers。性能
#平衡策略,默認爲datanode
[-policy <policy>]
blockpool: Cluster is balanced if each pool in each node is balanced.
datanode: Cluster is balanced if each datanode is balanced.
#閾值
[-threshold <threshold>] [1.0, 100.0]
#包含列表
[-include [-f <hosts-file> | <comma-separated list of hosts>]]
#排除列表
[-exclude [-f <hosts-file> | <comma-separated list of hosts>]]
#最大移動數據大小
dfs.balancer.max-size-to-move, default is 10737418240 (=10GB)
#####################################################spa
問題背景與現象
當HDFS集羣各個DataNode存儲的數據不均衡時,須要使用hdfs balance功能,調整相關參數能夠提高balance性能。
操做步驟
修改以下參數:
dfs.datanode.balance.bandwidthPerSec =209715200
說明:
該參數限定每一個DataNode用來平衡數據時,佔用帶寬的上限;
這個參數的調整要看組網狀況,若是集羣負載較高,能夠改成20971520(200MB),若是集羣空閒,能夠改成1073741824 (1G)。
dfs.datanode.max.transfer.threads = 8192
dfs.namenode.replication.max-streams=20
dfs.datanode.balance.max.concurrent.moves=30線程
3.findserver
find $dir-mtime +22 -name "*.dat" -exec rm -f {} \;排序
find $dir -type f -mtime +22 -exec ls -l {} \; | moreci
find $dir -type f -mtime +7 -print | xargs rm -rf
find ${dir3} -mtime +7 -name "hive.log.*" -exec rm -rf {} \;
4.hive
beeline -u " jdbc:hive2://xxx:10000" -n hive -p hive
set hive.execution.engine=tez;
nohup hive --service hiveserver2 &
nohup hive --service metastore &
hive -S -e "select * from xxx" --S靜音模式不打印MR2的進度信息 e加載hql查詢語句
hive -f test.hql --加載一個hql文件
source test.hql
for f in 'rpm -qa | grep xxx';do rpm -e --nodeps ${f} done;
磁盤空間滿了,kill超時太長的job
cd hive/yarn/local1/usercache/hive/appcache
su yarn
yarn application -kill job名
5.修改sudo
vim /etc/sudoers.d/xxx
xxx ALL=(ALL) ALL
xxx ALL=(ALL) NOPASSWD: ALL
Defaults !env_reset
6.for
for i in `ps -aux | grep -i "xxx" | grep -i "2018" | awk '{print $2}'`; do kill -9 $i; done
7.其餘
salt "xxx*" cmd.run "su - hdfs -c \"hdfs dfsadmin -refreshNodes\""
for i in `seq 1 12` do done
sort -rn -k 2 -t : test.txt | awk -F ":" '{print $1}' | head -n 80 查看最大的,分隔符爲: 按第二列排序
cat xxx.txt |cut -d, -f2 |sort |uniq -c |sort -n 統計重複次數