Zookeeper+HDFS+YARN監控

功能:java

  1. 監控主機是否ping異常node

  2. 監控主機所含ZOOKEEPER+HDFS+YARN服務是否down,若是出現down的狀況自動進行重啓sql



監控腳本shell

beh_serv_mon.sh數據庫

#!/bin/bash
#電話號碼,若不想使用短信發送功能可將phonelist置空
phonelist=(
1234567890X
)
#腳本所在目錄
HD_MON_HOME=/opt/beh/utility/crontab
#日誌目錄
HD_MON_LOG=/opt/beh/utility/crontab
#java home
JAVA_HOME=/opt/beh/core/jdk
#hadoop home
HADOOP_HOME=/opt/beh/core/hadoop
#zookeer home
ZOOKEEPER_HOME=/opt/beh/core/zookeeper
#加載hadoop的環境變量
#source /etc/profile
#獲取須要檢測的主機名
for v_host in `cat $HD_MON_HOME/beh_service.ini |grep -v "#" | awk -F ':' '{print $1}'`
do
  echo $v_host"----------------"
  #循環檢測
  v_ping_c=`ping -i 1 -c 3 $v_host |grep -i unreach |wc -l`
  #若是主機沒法ping通,則發送告警短信,並直接跳轉到下個主機
  if [ $v_ping_c -gt 0 ]
  then
    echo $v_host"_network_error"
    #循環電話列表發送短信
    for phonenumber in ${phonelist[*]}
    do
      echo $v_host"_network_error" >> $HD_MON_LOG/hd_mon_log
      sh $HD_MON_HOME/sms_send.sh $phonenumber  $v_host"_network_error"
    done
  else
    #若是主機ping無異常,則檢測主機上的相關服務
    v_conf_serv=`cat $HD_MON_HOME/beh_service.ini| grep $v_host | awk -F ':' '{print $2}'|awk -F ',' '{print NF-1}'`
    v_conf_serv_e=`cat $HD_MON_HOME/beh_service.ini| grep $v_host  | awk -F ':' '{print $2}' | sed 's/,/ -e /g'`
    v_now_serv_c=`ssh $v_host $JAVA_HOME/bin/jps | awk '{print $2}' |grep -x $v_conf_serv_e  |wc -l`
    
    if [ $v_now_serv_c -eq $v_conf_serv  ]
    then
      echo $v_host"_service_enough" > /dev/null
    else
      echo $v_host"_service_miss" > /dev/null
      v_conf_serv1=`cat $HD_MON_HOME/beh_service.ini| grep $v_host | awk -F ':' '{print $2}'|cut -c2- |sed 's/,/\n/g' |sort`
      #echo $v_conf_serv1
      v_conf_serv_e1=`cat $HD_MON_HOME/beh_service.ini| grep $v_host  | awk -F ':' '{print $2}' |cut -c1- | sed 's/,/ -e /g'`
      #echo $v_conf_serv_e1
      v_now_serv_c1=`ssh $v_host $JAVA_HOME/bin/jps | awk '{print $2}' |grep -x $v_conf_serv_e1 |sort`
      #echo $v_now_serv_c1
      for v_service in  $v_conf_serv1
      do
        #echo $v_service
        v_service_now=`echo "$v_now_serv_c1" |grep -x "$v_service" |wc -l`
        #echo $v_service_now
        if [ $v_service_now -eq  1 ]
        then
            echo "1:include"
        else
            echo "0:not_include"
            for phonenumber in ${phonelist[*]}
            do
              echo $v_host"_"$v_service"_miss"  >> $HD_MON_LOG/hd_mon_log
              sh $HD_MON_HOME/sms_send.sh $phonenumber  $v_host"_"$v_service"_miss"
            done

            case $v_service in
              DFSZKFailoverController)
              ssh $v_host  'source /etc/profile;$HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc'
              ;;
              JournalNode)
              ssh $v_host 'source /etc/profile;$HADOOP_HOME/sbin/hadoop-daemon.sh start journalnode'
              ;;
              NameNode)
              ssh $v_host  'source /etc/profile;$HADOOP_HOME/sbin/hadoop-daemon.sh start namenode'
              ;;
              QuorumPeerMain)
              ssh $v_host 'source /etc/profile;$ZOOKEEPER_HOME/bin/zkServer.sh start'
              ;;
              ResourceManager)
              ssh $v_host 'source /etc/profile;$HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager'
              ;;
              JobHistoryServer)
              ssh $v_host 'source /etc/profile;$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver'
              ;;
              ApplicationHistoryServer)
              ssh $v_host 'source /etc/profile;$HADOOP_HOME/sbin/yarn-daemon.sh start timelineserver'
              ;;
              DataNode)
              ssh $v_host 'source /etc/profile;$HADOOP_HOME/sbin/hadoop-daemon.sh start datanode'
              ;;
              NodeManager)
              ssh $v_host 'source /etc/profile;$HADOOP_HOME/sbin/yarn-daemon.sh start nodemanager'
              ;;
              *)
              echo $v_host"_"$v_service"_not_found"
              ;;
            esac
          fi
        done
      fi
    fi
done


配置文件bash

beh_service.inioracle

hadoop001:,NameNode,ResourceManager,DFSZKFailoverController,QuorumPeerMain,JournalNode,ApplicationHistoryServer
hadoop011:,NameNode,ResourceManager,DFSZKFailoverController,QuorumPeerMain,JournalNode,JobHistoryServer
hadoop012:,DataNode,NodeManager,QuorumPeerMain,JournalNode
hadoop013:,DataNode,NodeManager
hadoop014:,DataNode,NodeManager
hadoop015:,DataNode,NodeManager
hadoop016:,DataNode,NodeManager
hadoop017:,DataNode,NodeManager
hadoop018:,DataNode,NodeManager
hadoop023:,DataNode,NodeManager
hadoop024:,DataNode,NodeManager
hadoop025:,DataNode,NodeManager
hadoop026:,DataNode,NodeManager
hadoop027:,DataNode,NodeManager
hadoop028:,DataNode,NodeManager
hadoop030:,DataNode,NodeManager
hadoop031:,DataNode,NodeManager
hadoop032:,DataNode,NodeManager
hadoop033:,DataNode,NodeManager
hadoop034:,DataNode,NodeManager
hadoop035:,DataNode,NodeManager
hadoop021:,DataNode,NodeManager
hadoop022:,DataNode,NodeManager
hadoop053:,DataNode,NodeManager
hadoop054:,DataNode,NodeManager
hadoop055:,DataNode,NodeManager
hadoop056:,DataNode,NodeManager
hadoop057:,DataNode,NodeManager
hadoop058:,DataNode,NodeManager
hadoop059:,DataNode,NodeManager


短信腳本ssh

備註:可自行修改成適應本地環境的其餘通知腳本
oop

示例:sms_send.sh
日誌

#oracle數據庫
username=username
password=userpasswd
SID=sidname
V_NUMBER=$1      #參數1爲電話號碼
V_STR=$2         #參數2爲發送內容
sqlplus -s $username/$password@$SID <<EOF
INSERT INTO NEWEBA.UNICOM_REPORT_SMS
  (ROW_NO, MSISDN, MESSAGE, FLAG, SEND_TM)
      SELECT SQE_UNICOM_REPORT_SMS.NEXTVAL,
             $V_NUMBER,
             '$V_STR', 0, SYSDATE
FROM DUAL;
EOF



crontab配置

備註:通常使用hadoop用戶啓動hadoop相關服務,而且相互之間配置了ssh互信。使用crontab來定時執行腳本。能夠選取2臺主機,其中一臺爲每小時整點執行,另外一臺爲每小時半點執行。這裏選取了hadoop001和hadoop002.

[hadoop@hadoop001 ~]$ crontab -l 0  * * * * /opt/beh/utility/crontab/beh_serv_mon.sh > /opt/beh/utility/crontab/beh_serv_mon.log 2>&1[hadoop@hadoop002 ~]$ crontab -l 30 * * * * /opt/beh/utility/crontab/beh_serv_mon.sh > /opt/beh/utility/crontab/beh_serv_mon.log 2>&1

相關文章
相關標籤/搜索