貼一段 nginx日誌對應放入mysql的python工具

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
用於切分 nginx日誌
nginx日誌 格式:
log_format  access  '$remote_addr - $remote_user [$time_local] "$request" '
              '$status $body_bytes_sent "$http_referer" '
              '"$http_user_agent" $http_x_forwarded_for';
INSERT INTO `nginxlog` (`status`, `remote_user`, `http_referer`, `remote_addr`, `http_x_forwarded_for`, `hostname`, `request`, `request_type`, `http_user_agent`, `time_local`) VALUES ('2', '2', '2', '2', '2', '2', '2', '2', '2', '2')
table sql:
CREATE TABLE `nginxlog` (
  `status` int(4) DEFAULT NULL,
  `remote_user` varchar(20) DEFAULT NULL,
  `http_referer` text,
  `remote_addr` varchar(20) DEFAULT NULL,
  `http_x_forwarded_for` varchar(20) DEFAULT NULL,
  `hostname` varchar(50) DEFAULT NULL,
  `request` varchar(200) DEFAULT NULL,
  `request_type` varchar(10) DEFAULT NULL,
  `http_user_agent` varchar(200) DEFAULT NULL,
  `time_local` int(10) unsigned DEFAULT NULL,
  `server_id` int(5) unsigned DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=utf8
"""
#--------------------------config----------------------------------------------------------------------
#config
mysql = {}
mysql['mhost'] = 'localhost'
mysql['muser'] = 'root'
mysql['mpwd'] = ''
mysql['mport'] = '3306'
mysql['mdbname'] = 'nginxlog'
 
#獲取當前執行目錄  工做目錄
ROOT = '/home/nginxweblog/app/'
#ftp 目錄  用於存放生產 原始 日誌
Sourcedatadir = '/home/nginxweblog/sourcedata/'
#log run
Logfile = ROOT+'nginxlog.log'
#--------------------------------import--------------------------------------------------------
import re,time,os
import MySQLdb
#--------------------------------Sublog--------------------------------------------------------
#分析 每行的 nginxlog 數據
class Sublog:
    def __init__(self, logline=None):
        if logline:
            self.logline = logline.strip()
        self.logdate = {}
        #self.run()
 
    def run(self):
        if self.logline:
            self.splitlog()
            self.getlog()
 
    def splitlog(self):
        # 按照空格切分日誌
        self.splitlogd = re.split(' ', self.logline)
        #print self.splitlogd
        return self.splitlogd
 
    def getlog(self):
        if len(self.splitlogd) >= 20:
            self.logdate['remote_addr'] = self.splitlogd[0]
            self.logdate['remote_user'] = self.splitlogd[2]
            self.logdate['time_local'] = time.mktime(time.strptime(self.splitlogd[3], '[%d/%b/%Y:%H:%M:%S'))
            self.logdate['request_type'] = self.splitlogd[5].replace('\"', '')
            self.logdate['request'] = self.splitlogd[6]
            self.logdate['status'] = self.splitlogd[8]
            self.logdate['http_referer'] = MySQLdb.escape_string(self.splitlogd[10].replace('\"', ''))
            self.logdate['http_x_forwarded_for'] = self.splitlogd[len(self.splitlogd)-1]
            self.get_http_user_agent()
            self.get_hostname()
 
    def get_http_user_agent(self):
        agent = re.compile(r'\"(.*?)\"')
        self.logdate['http_user_agent'] = MySQLdb.escape_string(agent.findall(self.logline)[2])
 
    def get_hostname(self):
        hostname_r = re.compile(r'http://(.*?)/')
        ishostname = hostname_r.findall(self.splitlogd[10])
        #print ishostname
        if len(ishostname) >= 1:
            self.logdate['hostname'] = ishostname[0]
        else:
            self.logdate['hostname'] = '-'
         
#--------------------------------insert_log---------------------------------------------
#   生成 csv 數據  |||  分割 數據  , 主要用於 mysql快速導入
class insert_log(Sublog):
    def __init__(self, logpath, serverid):
        Sublog.__init__(self)
        self.nowtime = time.strftime("%Y_%m_%d",time.localtime(time.time()))
        self.sunlogfile = "nginxlog_"+self.nowtime+'.csv'
        self.logpath = logpath
        self.serverid = serverid
        self.numbres = 0
 
        #self.Mysql_db_instance = Mysql_db()
 
    def createsubfile(self):
        self.opencsvfile()
        self.subfile = open(ROOT+'CSV/'+self.sunlogfile, 'a')
             
        olog = open(self.logpath)
        for i in olog:
     
            #self.logdate = {}
            self.logline = i.strip()
            self.run()
            if self.logdate:
                #print self.logdate
                self.createsql()
                #self.insertdb()
                #time.sleep(3)
                self.numbres = self.numbres + 1
        self.subfile.close()
        return [ROOT+'CSV/'+self.sunlogfile, self.sunlogfile, self.numbres]
 
    def insertdb(self):
        sql = "INSERT INTO `nginxlog` (`status`, `remote_user`, `http_referer`, `remote_addr`, `http_x_forwarded_for`, `hostname`, `request`, `request_type`, `http_user_agent`, `time_local`) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');" % (self.logdate['status'], self.logdate['remote_user'], self.logdate['http_referer'], self.logdate['remote_addr'], self.logdate['http_x_forwarded_for'], self.logdate['hostname'], self.logdate['request'], self.logdate['request_type'], self.logdate['http_user_agent'], self.logdate['time_local'])
        #print sql
        self.subfile.write(sql + '\n')
        #self.Mysql_db_instance.insertsql(sql)
        #self.Mysql_db_instance.commit()
    def createsql(self):
        c = "%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s|||%s" % (self.logdate['status'], self.logdate['remote_user'], self.logdate['http_referer'], self.logdate['remote_addr'], self.logdate['http_x_forwarded_for'], self.logdate['hostname'], self.logdate['request'], self.logdate['request_type'], self.logdate['http_user_agent'], self.logdate['time_local'],self.serverid)
        self.subfile.write(c + '\n')
    def opencsvfile(self):
        if os.path.isdir(ROOT+'CSV'):
            pass
        else:
            os.mkdir(ROOT+'CSV')
#--------------------------------function ---------------------------------------------
def load_mysql(csvfile):
    if mysql['mpwd']:
        os.system("""mysql -h%s -u%s -p%s -e  "LOAD DATA INFILE '%s' INTO TABLE nginxlog.nginxlog FIELDS TERMINATED BY '|||';" && rm -rf %s """ % (mysql['mhost'], mysql['muser'], mysql['mpwd'], csvfile, csvfile))
    else:
        os.system("""mysql -h%s -u%s -e  "LOAD DATA INFILE '%s' INTO TABLE nginxlog.nginxlog FIELDS TERMINATED BY '|||';" && rm -rf %s """ % (mysql['mhost'], mysql['muser'], csvfile, csvfile))
 
#索引處理 type = 0  刪除索引, 1建立索引
def mysqlindex(type):
    if mysql['mpwd']:
        ism = "mysql -h%s -u%s -p%s -e " % (mysql['mhost'], mysql['muser'], mysql['mpwd'])
    else:
        ism = "mysql -h%s -u%s -e  " % (mysql['mhost'], mysql['muser'])
    if type == 0:
        os.system(" %s 'alter table nginxlog.nginxlog drop index time_local' " % ism)
        os.system(" %s 'alter table nginxlog.nginxlog drop index hostname' " % ism)
        os.system(" %s 'alter table nginxlog.nginxlog drop index remote_addr' " % ism)
    elif type == 1:
        os.system(" %s 'alter table nginxlog.nginxlog add index time_local(time_local)' " % ism)
        os.system(" %s 'alter table nginxlog.nginxlog add index hostname(hostname)' " % ism)
        os.system(" %s 'alter table nginxlog.nginxlog add index remote_addr(remote_addr)' " % ism)
""" 日誌記錄函數 """
def write_logs(logconten):
    logfile_path = Logfile
    if logfile_path and logconten:
        log_write = open(logfile_path, 'a')
        log_write.write(logconten+'\n')
        log_write.close()
#
#--------------------------------運行---------------------------------------------
def run():
    list = os.listdir(Sourcedatadir)
    print 'start.......'
    write_logs('-----------%s start.....----%s-----------------' % ( time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), str(list)) ) 
    for line in list:
        if os.path.isfile(Sourcedatadir+line):
             
            s = re.split('_', line)
            if re.match(r'^\d.*$', s[0]):
                write_logs('time(%s) logfile(%s) start ....' % (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), line))
                insert_log_instance = insert_log(Sourcedatadir+line, s[0])
                gfiledata = insert_log_instance.createsubfile()
                load_mysql(gfiledata[0])
 
                os.remove(Sourcedatadir+line)
                write_logs('time(%s) logfile(%s) log numbers(%s) stop ....' % (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), line, str(gfiledata[2])) )
                print Sourcedatadir+line+ ' OK ....... '
 
#--------------------------------  exece ---------------------------------------------
if __name__ == "__main__":
    #run_inotify(Sourcedatadir, run)
    run()
相關文章
相關標籤/搜索