在生產環境中,有客戶架構爲阿里雲線上環境及線下IDC須要內網互通,互聯採用阿里雲使用第三方深信服雲產品與線下IDC側Cisco防火牆ipsec打通實現,主要用於定時阿里雲文件及數據備份至IDC,在生產應用中無端隧道會不定時中斷,聯繫深信服及思科售後排查均沒有結果,可是進行手動的重啓阿里雲上深信服設備隧道當即恢復,在兩邊網絡工程師排查無果後,想到去編寫監控腳本,若是隧道終端去利用python重啓深信服設備,從而恢復隧道,數據傳輸延遲timeout及使用斷點續傳,當網絡層面異常沒法解決時,換另外一種思路來解決問題。前端
2.1 編寫隧道監控腳本
因爲線上阿里雲側爲公有云,且爲配置EIP及NAT網關,ecs均採用前端公網SLB負責業務請求接入,其內部沒法出公網,隧道監控腳本想告警出來發送至微信及後續的去操做深信服須要公網連通,所以在線下IDC側放置檢查及重啓腳本。python
2.2 深信服重啓
- 利用Python編寫去操做深信服,web頁面模擬登陸,主要利用到了selenium模塊,logging來記錄日誌。
- 利用阿里雲ECS API來操做重啓深信服設備。
檢測隧道連通性,若是隧道終端告警至微信及釘釘,其次觸發深信服重啓腳本。linux
#!/bin/bash #檢測內網地址 IP=10.10.10.2 dir="/sangfor/Shscripts/pdc/" if [ ! -d ${dir} ];then mkdir -p ${dir} fi echo 1 > ${dir}pdcping.lock while true do #日誌分割歸檔 Time=`date +%F` TIME="${Time} 23:59" if [ "${data}" == "${TIME}" ];then mkdir ${dir}${Time} && mv ${dir}pdcping.log ${dir}${Time}-pingpdc.log mv ${dir}${Time}-pingpdc.log ${dir}${Time} fi find ${dir} -mtime +7 -type d -exec rm -rf {} \; find ${dir} -mtime +7 -name "*-pingpdc.log" -exec rm -rf {} \; data=`date +%F' '%H:%M` data1=`date +%F' '%H:%M:%S` echo "------------${data1}---------------">>${dir}pingpdc.log ping -c 10 ${IP} >>${dir}pingpdc.log if [ $? -eq 1 ];then STAT=`cat ${dir}pdcping.lock` if [ ${STAT} -eq 1 ];then /usr/local/python34/bin/python3 /sangfor/Pysangfor/sangfor_public.py echo 0 > ${dir}pdcping.lock else continue fi else STAT=`cat ${dir}pdcping.lock` if [ ${STAT} -eq 0 ];then echo 1 > ${dir}pdcping.lock else continue fi fi done
爲防止隧道檢測腳本異常,另外編寫監控監測腳本的腳本配合定時任務來定時監控,若是異常,從新拉起。c++
#!/bin/bash num=$(ps -ef |grep pdc.sh|wc -l) cmd="/usr/bin/nohup /bin/bash /sangfor/Shscripts/pdc/pdc.sh &" if [ ${num} -lt 2 ];then ${cmd} fi
配合定時任務git
* * * * * /bin/bash /sangfor/Shscripts/pdc/checkpdc.sh
yum -y install zlib-devel zlib readline-devel openssl-devel wget gcc-c++ Xvfb lrzsz firefox cd /tmp wget -c https://www.python.org/ftp/python/3.4.5/Python-3.4.5.tgz tar -zxvf Python-3.4.5.tgz cd Python-3.4.5 ./configure --prefix=/usr/local/python34 make && make install echo "export PATH=$PATH:/usr/local/python34/bin" >/etc/profile.d/python34.sh source /etc/profile.d/python34.sh
cd /tmp wget https://bootstrap.pypa.io/get-pip.py python3 get-pip.py
pip3 install selenium pip3 install pyvirtualdisplay pip3 install xvfbwrapper
cd /tmp wget -c https://github.com/mozilla/geckodriver/releases/download/v0.16.1/geckodriver-v0.16.1-linux64.tar.gz tar zxvf geckodriver-v0.16.1-linux64.tar.gz cp geckodriver /usr/bin/
github地址
模擬web登陸操做深信服github
cat > /sangfor/Pysangfor/sangfor_public.py<<EOF #!/bin/env python3 # -*- coding:UTF-8 -*- # _author:kaliarch #導入模塊 from pyvirtualdisplay import Display from selenium import webdriver import time import os import logging #定義深信服重啓類 class Glp_SangFor: def __init__(self,logger): self.logger = logger self.logger.info("--------------start log----------------") self.display = Display(visible=0, size=(800, 600)) self.display.start() self.browser = webdriver.Firefox() self.logger.info("start browser successfuly") self.sangfor_url = "深信服公網url" self.username = '深信服登陸用戶名' self.password = '深信服登陸密碼' def login(self): self.browser.get(self.sangfor_url) self.browser.implicitly_wait(5) self.browser.find_element_by_name('user').send_keys(self.username) self.browser.find_element_by_name('password').send_keys(self.password) self.browser.find_element_by_class_name('buttons').click() self.browser.implicitly_wait(5) self.logger.info("loggin sangfor successfuly") def client_reboot(self): self.browser.find_element_by_id("ext-gen111").click() print(self.browser.find_element_by_id("ext-gen111").text) self.browser.implicitly_wait(15) time.sleep(60) self.logger.info("switch mainiframe start") try: print(self.browser.find_element_by_link_text("重啓/重啓服務/關機").text) self.browser.find_element_by_link_text("重啓/重啓服務/關機").click() self.browser.implicitly_wait(3) self.browser.switch_to_frame("mainiframe") self.browser.implicitly_wait(8) time.sleep(10) self.browser.find_element_by_xpath("//button[@id='ext-gen19']").click() print(self.browser.find_element_by_xpath("//button[@id='ext-gen19']").text) self.browser.implicitly_wait(10) #self.browser.find_element_by_xpath("//button[@id='ext-gen42']").click() print(self.browser.find_element_by_xpath("//button[@id='ext-gen42']").text) except Exception as e: self.logger.exception("reboot successful") return 1 self.browser.close() self.logger.info("browser close successful") self.logger.info("--------------end log----------------") return 0 #定義日誌記錄 class Glp_Log: def __init__(self,filename): self.filename = filename def createDir(self): _LOGDIR = os.path.join(os.path.dirname(__file__), 'publiclog') print(_LOGDIR) _TIME = time.strftime('%Y-%m-%d', time.gmtime()) + '-' _LOGNAME = _TIME + self.filename print(_LOGNAME) LOGFILENAME = os.path.join(_LOGDIR, _LOGNAME) print(LOGFILENAME) if not os.path.exists(_LOGDIR): os.mkdir(_LOGDIR) return LOGFILENAME print(LOGFILENAME) def createlogger(self,logfilename): logger= logging.getLogger() logger.setLevel(logging.INFO) handler = logging.FileHandler(logfilename) handler.setLevel(logging.INFO) formater = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formater) logger.addHandler(handler) return logger #主函數調用 if __name__ == '__main__': os.system("pkill firefox") os.system("pkill Xvfb") glploger = Glp_Log('public-***.log') logfilename = glploger.createDir() logger = glploger.createlogger(logfilename) sangfor_oper = Glp_SangFor(logger) sangfor_oper.login() sangfor_oper.client_reboot() EOF
經過阿里雲ECS API操做深信服設備web
#!/bin/env python3 # -*- coding:UTF-8 -*- # _author:kaliarch from aliyunsdkcore import client from aliyunsdkecs.request.v20140526 import RebootInstanceRequest,StartInstanceRequest,StopInstanceRequest import time import os import logging class ecsOper(): def __init__(self,logger): self.clentoper = client.AcsClient('<accessKeyId>', '<accessSecret>', 'cn-hangzhou') self.logger = logger self.logger.info("------------------------start reboot *** ecs of API log-------------") def reboot_instance(self): # 設置參數 request = RebootInstanceRequest.RebootInstanceRequest() request.set_accept_format('json') request.add_query_param('InstanceId', 'i-bpxxzx1rlsgvclq79au') # 發起請求 response = self.clentoper.do_action_with_exception(request) self.logger.info("public ecs *** reboot successful!") self.logger.info(response) print(response) def start_instance(self): request = StartInstanceRequest.StartInstanceRequest() request.set_accept_format('json') request.add_query_param('InstanceId', 'i-bpxxzx1rlsgvclq79au') # 發起請求 response = self.clentoper.do_action_with_exception(request) self.logger.info("public ecs *** start successful!") self.logger.info(response) print(response) def stop_instance(self): request = StopInstanceRequest.StopInstanceRequest() request.set_accept_format('json') request.add_query_param('InstanceId', 'i-bp1djzd1rlsgvclq79au') request.add_query_param('ForceStop', 'false') # 發起請求 response = self.clentoper.do_action_with_exception(request) request.add_query_param('InstanceId', 'i-bpxxzxd1rlsgvclq79au') self.logger.info(response) print(response) def testlog(self): self.logger.info("public test log") class Glp_Log: def __init__(self,filename): self.filename = filename def createDir(self): _LOGDIR = os.path.join(os.path.dirname(__file__), 'publiclog') print(_LOGDIR) _TIME = time.strftime('%Y-%m-%d', time.gmtime()) + '-' _LOGNAME = _TIME + self.filename print(_LOGNAME) LOGFILENAME = os.path.join(_LOGDIR, _LOGNAME) print(LOGFILENAME) if not os.path.exists(_LOGDIR): os.mkdir(_LOGDIR) return LOGFILENAME print(LOGFILENAME) def createlogger(self,logfilename): logger= logging.getLogger() logger.setLevel(logging.INFO) handler = logging.FileHandler(logfilename) handler.setLevel(logging.INFO) formater = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formater) logger.addHandler(handler) return logger if __name__ == "__main__": glploger = Glp_Log('public-***.log') logfilename = glploger.createDir() logger = glploger.createlogger(logfilename) app = ecsOper(logger) app.reboot_instance()
查看檢查腳本日誌已經進行了切割,且保留7天的日誌,防止日誌過大佔用過多磁盤空間
微信告警信息
釘釘告警信息
查看python腳本深信服重啓日誌
json
其簡單的實現了故障自愈,利用其思路客戶配合不少業務,例如簡單的應用重啓等。bootstrap