環境: OS: Linux: Linux version 2.6.32-431.el6.x86_64 (mockbuild@c6b8.bsys.dev.centos.org) x Build: (gcc version 4.4.7 20120313 (Red Hat 4.4.7-4) (GCC) ) x Release : 2.6.32-431.el6.x86_64 x Version : #1 SMP Fri Nov 22 03:15:09 UTC 2013 x cpuinfo: GenuineIntel Intel(R) Core(TM) i3 CPU M 380 @ 2.53GHz x cpuinfo: Hz=2527.069 bogomips=5054.13 x cpuinfo: ProcessorChips=1 PhyscalCores=2 x cpuinfo: Hyperthreads =0 VirtualCPUs =2 Python:Python 3.5.1 功能: 1. 支持總訪問量和總流量的分析; 2. 支持HTTP個狀態碼的統計分析; 3. 支持對結構進行指定條目顯示; 4. 支持分析指定時間內的統計; #!/usr/bin/env python3 #-*- coding: utf8 -*- import fileinput import re import time from collections import Counter import math import sys from datetime import datetime, timedelta #初始化顯示的日誌條目,None表示顯示所有 records = None #腳本使用方法 def usage(): print('Usage: %s nginx_log_file [max_record_nums] [datetime]' % sys.argv[0]) print('Usage: [max_record_nums] for int number. eg: 10 ') print('Usage: [datetime] for [5d | 5h | 5m | 5s] for [5 days | 5 hours | 5 minutes | 5 seconds]') print('eg: ./ngx.py /var/log/nginx/access.log [10] [5d | 5h | 5m | 5s]') sys.exit(0) #過去多長時間的時間點時間戳 def tmstamp(): if len(sys.argv) <= 3: #return datetime.now().timestamp() return 0 elif re.match('^[\d]+d$', sys.argv[3]): return (datetime.now() - timedelta(days=float(sys.argv[3].rstrip('d')))).timestamp() elif re.match('^[\d]+h$', sys.argv[3]): return (datetime.now() - timedelta(hours=float(sys.argv[3].rstrip('h')))).timestamp() elif re.match('^[\d]+m$', sys.argv[3]): return (datetime.now() - timedelta(minutes=float(sys.argv[3].rstrip('m')))).timestamp() elif re.match('^[\d]+s$', sys.argv[3]): return (datetime.now() - timedelta(seconds=float(sys.argv[3].rstrip('s')))).timestamp() else: usage() #轉換字節單位 def convertBytes(bytes, lst=['B','KB','MB','GB','TB','PB']): i = int(math.floor(math.log(bytes, 1024))) if i >= len(lst): i = len(lst) - 1 return ('%.2f ' + lst[i]) % (bytes/math.pow(1024, i)) #日誌解析生成器 def ngx(): try: with fileinput.input(sys.argv[1]) as f: for line in f: ip,_,_,dtime, _, mthd, _, _, status, size, *_ = re.split('[\s"]+', line) dtstamp = time.mktime(time.strptime(dtime.lstrip('['), '%d/%b/%Y:%H:%M:%S')) yield [ip, status, size, dtstamp] except: usage() # 參數判斷 if len(sys.argv) < 2 or len(sys.argv) > 4: usage() if len(sys.argv) < 3: records = None elif len(sys.argv) == 3: try: re.match('[\d]+', sys.argv[2]) records = int(sys.argv[2]) except: usage() elif len(sys.argv) == 4: try: re.match('^[\d]+[dhms]$', sys.argv[3]) except: usage() #初始化各統計變量 iptotal, ipsize, ip200, ip302, ip304, ip403, ip404, ip500, ip502, ip503, totsize = Counter(), Counter(), Counter(), Counter(), Counter(), Counter(), Counter(), Counter(), Counter(), Counter(), 0 #定義映射表頭 header = ['ip', 'statuscode', 'size', 'dtstamp'] #進行迭代統計 for line in ngx(): #將兩個列表轉換爲字典 datadict = dict(zip(header, line)) #統計n天/時/分/秒以前的訪問量和帶寬等信息 if datadict['dtstamp'] > tmstamp(): #每一個IP的流量帶寬 ipsize[datadict['ip']] += int(datadict['size']) #總流量 totsize += int(datadict['size']) #每IP的總訪問量 iptotal[datadict['ip']] += 1 #統計個狀態碼的請求數 if datadict['statuscode'] == '200': ip200[datadict['ip']] += 1 elif datadict['statuscode'] == '302': ip302[datadict['ip']] += 1 elif datadict['statuscode'] == '304': ip304[datadict['ip']] += 1 elif datadict['statuscode'] == '403': ip403[datadict['ip']] += 1 elif datadict['statuscode'] == '404': ip404[datadict['ip']] += 1 elif datadict['statuscode'] == '500': ip500[datadict['ip']] += 1 elif datadict['statuscode'] == '502': ip502[datadict['ip']] += 1 elif datadict['statuscode'] == '503': ip503[datadict['ip']] += 1 #判斷是否有存在數據,存在則打印,不然,輸出錯誤信息! if totsize: #打印網站總流量,總訪問量 print("\nTotal traffic : %s Total request times : %d\n" % (convertBytes(totsize),sum(iptotal.values()))) #打印表頭 print('%-15s %-10s %-12s %-8s %-8s %-8s %-8s %-8s %-8s %-8s %-8s' %('Ip', 'Times', 'Traffic' , '200', '302', '304', '403', '404', '500', '502', '503')) print('%-15s %-10s %-12s %-8s %-8s %-8s %-8s %-8s %-8s %-8s %-8s' %('-'*15, '-'*10, '-'*12, '-'*8, '-'*8, '-'*8, '-'*8, '-'*8, '-'*8, '-'*8, '-'*8)) #打印前多少條數據 #for k, v in sorted(iptotal.items(), key=lambda v: v[1], reverse=True): for k, v in iptotal.most_common(records): print('%-15s %-10s %-12s %-8s %-8s %-8s %-8s %-8s %-8s %-8s %-8s' % (k, v, convertBytes(ipsize[k]), ip200[k], ip302[k], ip304[k], ip403[k], ip404[k], ip500[k], ip502[k], ip503[k])) else: print('Not found data!')
效果圖:python