【python】簡單的服務器監控

需求

由於目前服務器規模較小,使用zabbix,nagios 等開源的監控系統的必要性並不高,加上配置維護花費的時間成本,因此決定經過本身的腳本,配合saltstack來處理。
監控原理很簡單,server端負責處理監控信息,agent 端負責收集信息,並統一發送到服務器端。python

服務器端

腳本目錄
├── weixin.py
├── __init__.py
└── main.pyios

main.py

#!/usr/bin/python2.7
# -*- coding: utf-8 -*-
import time, socket, threading,json
from weixin import senddata,gettoken

def tcplink(sock, addr):
    print 'New Connection from %s:%s...' % addr
    res={}
    while True:
        data = sock.recv(1024)
        time.sleep(1)
        if data == 'exit' or not data:
            break
        res = data
        handler(res)
    sock.close()
    print 'Connection from %s:%s closed.' % addr
    return res

# 錯誤報告
def report(data):
    content = ''
    for d in data:
        content = content + d + "\n"
    print content
    corpid = 'xxxxxxxxxxxx'

    corpsecret = 'xxxxxxxxxxxxxxxxx'

    accesstoken = gettoken(corpid, corpsecret)

    msg = senddata(accesstoken, content)
    print msg
    print data


# 處理客戶端消息,根據閾值判斷
def handler(res):
    try:
        data = json.loads(res)
    except Exception,e:
        print e
        print "Data type wrong."
        return False
    m_type = data['type']

    # 服務器資源監控
    if m_type == 1:
        # ip
        ip = data['ip']
        # ip
        name = data['name']

        # cpu 利用率
        cpu_use = data['cpu_use']

        # cpu load (能夠改進經過獲取cpu核數來動態判斷)
        cpu_load = data['cpu_load']

        # 內存 利用率
        mem_use = data['mem_use']

        # 磁盤利用率
        disk_use = data['disk_use']

        message = ["ip: %s" % ip, "name: %s" % name]
        print ip,cpu_use,cpu_load,mem_use,disk_use
        if cpu_use > 95:
            message.append("cpu_use: %s" % cpu_use)
        if cpu_load > 3:
            message.append("cpu_load: %s" % cpu_load)
        if mem_use > 85:
            message.append("mem_use: %s" % mem_use)
        if disk_use > 75:
            message.append("disk_use: %s" % disk_use)

        if message.__len__() > 2:
            report(message)
            return True
    # 服務監控
    elif m_type == 2:
        print "service eyes..."
        print data
        message = ["oops some service down!"]
        if data["status"] == 1:
            message.append("message: %s" % data)
            report(message)
            return True

if __name__=="__main__":
    print "Minitor Service Listening on 9999 port."
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.bind(('0.0.0.0', 9999))
    s.listen(5)
    while True:
        sock, addr = s.accept()
        t = threading.Thread(target=tcplink, args=(sock, addr))
        t.start()

weixin.py

import requests
import json
import sys

def gettoken(corp_id, corp_secret):

    gettoken_url = 'https://qyapi.weixin.qq.com/cgi-bin/gettoken?corpid=' + corp_id + '&corpsecret=' + corp_secret

    try:

        token_file = requests.get(gettoken_url)

    except requests.HTTPError as e:

        print(e.code)

        print(e.read().decode("utf8"))

    token_data = token_file.text.decode('utf-8')

    token_json = json.loads(token_data)

    token_json.keys()

    token = token_json['access_token']

    return token


def senddata(access_token,content):

    send_url = 'https://qyapi.weixin.qq.com/cgi-bin/message/send?access_token=' + access_token

    send_values = {

        "touser":"187xxxxxxxx|185xxxxxxxx",

        "msgtype":"text",

        "agentid":"17",

        "text":{

            "content":content

            },

        "safe":"0"

        }

    send_data = json.dumps(send_values, ensure_ascii=False).encode(encoding='UTF8')

    response = requests.post(send_url, send_data)


    msg = response.text

    return msg


default_encoding = 'utf-8'

if sys.getdefaultencoding() != default_encoding:

    reload(sys)

    sys.setdefaultencoding(default_encoding)

客戶端 1

# monitor.py
#!/usr/bin/python
# -*- coding: utf-8 -*-

from __future__ import division
import socket
import psutil
import os


# 內存
def getMonitor():

    # 主機信息
    name =  socket.getfqdn(socket.gethostname())
    ip = socket.gethostbyname(name)

    # n內存
    mem=psutil.virtual_memory()
    mem_use = int((mem.available/mem.total)*100)

    # cpu

    cpuload_1, cpuload_5, cpuload_15 = os.getloadavg()
    cpu_load = cpuload_5

    # cpu_use = psutil.cpu_percent(1)
    cpu = psutil.cpu_percent(interval=5, percpu=True)
    cpu_count = psutil.cpu_count()
    cpu_use_total = 0
    for c in cpu:
        cpu_use_total=cpu_use_total + c
    cpu_use = cpu_use_total/cpu_count

    # 磁盤
    disk_use = psutil.disk_usage('/').percent
    data = {
        "type": 1,
        "ip": ip,
        "name": name,
        "cpu_load": cpu_load,
        "cpu_use": cpu_use,
        "mem_use": mem_use,
        "disk_use": disk_use,
    }
    print str(data)
    return str(data).replace("'", '"')


s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# 創建鏈接:
s.connect(('server_ip', 9999))

data = getMonitor()
try:
    s.send(data)
    s.close()
except Exception,e:
    print e
    s.close()

客戶端 2

#!/usr/bin/python
# -*- coding: utf-8 -*-

from __future__ import division
import socket
import os,commands,json

# 獲取經過systemclt 工具管理的系統服務狀態
def check_status(service_name):
    status = os.system('sudo systemctl status ' + service_name+ ' > /dev/null')
    return status 

# 要監控的服務列表
service_lists = ['config.service','xxx.service','xxx.service'] 


def get_status(service_lists):
    """
    type == 1 硬件監控
    type == 2 服務監控
    type == x xxxxxx
    """
    data = {"type": 2, "status": 0}
    for service in service_lists:
        re = check_status(service)
        if re != 0:
            data[service] = "down"
            data["status"] = 1
    print str(data)
    return str(data).replace("'", '"')

data = get_status(service_lists)

if json.loads(data)["status"] == 1:
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    # 創建鏈接:
    s.connect(('server_ip', 9999))
    try:
        s.send(data)
        s.close()
    except Exception,e:
        print e
        s.close()

運行方式

  • 客戶端
    在saltstack 服務器上定時執行 監控腳本shell

*/5 * * * * salt '*' cmd.script salt://scripts/monitor.py python_shell=true
*/5 * * * * salt '*' cmd.script salt://scripts/monitor_service_status.py python_shell=truejson

  • 服務器
    加入系統進程,偵聽tcp端口api

相關文章
相關標籤/搜索