檢查resin的gc相關log的腳本(nagios插件版)-python

輸入:log_path-日誌位置;間隔監控時間(單位爲分鐘);full_gc_count-fullgc次數報警值;gc_interval-gc間隔時間報警值;gc_count-gc次數報警值
輸出:當被檢查log中有任一項報警內容大於報警值的時候,腳本退出狀態爲2,將所需報警內容打印在stdout中。
稍做修改也能夠寫在crontab裏,做爲定時任務執行。
腳本檢測方法:
./check_gc.py -p/home/gc.log -f5 -g50 -i3 -t5
使用./ check_gc.py能夠看到關於輸入簡要的幫助文檔

#! /usr/local/bin/python3

import datetime
import re
import sys
import optparse

def gen_time(minutedelta):
    format='%Y-%m-%dT%H:%M:%S'
    return (datetime.datetime.today()-datetime.timedelta(minutes=minutedelta)).strftime(format)

def sub_times(time1,time2):
    format='%Y-%m-%dT%H:%M:%S'
    translate=datetime.datetime.strptime
    return (translate(time1,format)-translate(time2,format)).total_seconds()

def change_to_data(time1):
    format='%Y-%m-%dT%H:%M:%S'
    return datetime.datetime.strptime(time1,format)

def check_log(log_path,examine_time):
    report_dir={}
    full_gc_re=re.compile('\[Full GC')
    full_gc_count=0
    gc_count=0
    last_line_timestamp=0
    min_gc_interval=999999
    with open(log_path) as file:
        for line in file:
            line=line.strip()
            time_line=line.split('.')[0]
            #這裏加入轉換日期錯誤的捕獲,保證以後所比較的都是指望的時間格式
            try:
                change_to_data(time_line)
            except ValueError:
                pass
            else:
                if time_line >=examine_time:
                    if last_line_timestamp != 0:
                        try:
                            gc_interval=sub_times(time_line,last_line_timestamp)
                            if gc_interval < min_gc_interval and gc_interval != 0:
                                min_gc_interval=gc_interval
                        except ValueError:
                            pass
                        else:
                            last_line_timestamp=time_line
                    else:
                        last_line_timestamp=time_line
                    gc_count+=1
                    if full_gc_re.search(line):
                        full_gc_count+=1
    report_dir['full_gc']=full_gc_count
    report_dir['min_gc_interval']=min_gc_interval
    report_dir['gc_count']=gc_count
    return report_dir

def check_result(report,full_gc,min_gc_interval,gc_count):
    #這裏只是簡要的報警規則,報警規則能夠更加的複雜
    if full_gc <= report['full_gc']:
        display(report)
        sys.exit(2)
    if min_gc_interval >= report['min_gc_interval']:
        display(report)
        sys.exit(2)
    if gc_count <= report['gc_count']:
        display(report)
        sys.exit(2)

def display(display_data):
    display_lines=''
    for key in sorted(display_data):
        display_lines+=(key+':'+str(display_data[key])+';')
    print(display_lines)

def exec_check(begin_time,log_path,full_gc,min_gc_interval,gc_count):
    time_begin=gen_time(begin_time)
    result_dir=check_log(options.log_path,time_begin)
    check_result(result_dir,full_gc,min_gc_interval,gc_count)

def generate_arguments():
#建議使用argparser,這裏爲了兼容舊版使用了optparse    
    parser=optparse.OptionParser()
    parser.add_option('-p',action='store',type='string',dest='log_path',help='gc_log_path')
    parser.add_option('-f',action='store',type='int',dest='full_gc',help='full_gc apper times')
    parser.add_option('-g',action='store',type='int',dest='gc_count',help='gc apper times')
    parser.add_option('-i',action='store',type='int',dest='min_gc_interval',help='the interval of gc')
    parser.add_option('-t',action='store',type='int',dest='begin_time',help='check log from this value to now')
    (options,others)=parser.parse_args()
    return options
    
if __name__ == '__main__':
    options=generate_arguments()
    exec_check(options.begin_time,options.log_path,options.full_gc,options.min_gc_interval,options.gc_count)

附:gc日誌格式:
2013-03-21T16:03:20.036+0800: 46930.500: [GC [PSYoungGen: 987599K->9744K(1013568K)] 1755375K->780204K(1800000K), 0.0511650 secs] [Times: user=0.12 sys=0.00, real=0.05 secs] 
2013-03-21T16:03:39.397+0800: 46949.860: [GC [PSYoungGen: 987856K->11887K(1013120K)] 1758316K->784317K(1799552K), 0.0581740 secs] [Times: user=0.11 sys=0.01, real=0.06 secs] 
2013-03-21T16:03:39.455+0800: 46949.919: [Full GC [PSYoungGen: 11887K->0K(1013120K)] [PSOldGen: 772429K->495896K(786432K)] 784317K->495896K(1799552K) [PSPermGen: 157441K->157441K(170560K)], 2.6535560 secs] [Times: user=2.66 sys=0.00, real=2.66 secs] 
2013-03-21T16:04:28.456+0800: 46998.919: [GC [PSYoungGen: 978112K->34038K(998016K)] 1474008K->539300K(1784448K), 0.0918470 secs] [Times: user=0.24 sys=0.00, real=0.09 secs] 
2013-03-21T16:04:49.390+0800: 47019.854: [GC [PSYoungGen: 998006K->11664K(1006272K)] 1503268K->541030K(1792704K), 0.0869980 secs] [Times: user=0.23 sys=0.00, real=0.08 secs]  python

相關文章
相關標籤/搜索