class ActionHandler(object): ''' 負責把達到報警條件的trigger進行分析 ,並根據 action 表中的配置來進行報警 ''' def __init__(self,trigger_data,alert_counter_dic): self.trigger_data = trigger_data #self.trigger_process() self.alert_counter_dic = alert_counter_dic def record_log(self,action_obj,action_operation,host_id,trigger_data): """record alert log into DB""" models.EventLog.objects.create( event_type = 0, host_id=host_id, trigger_id = trigger_data.get('trigger_id'), log = trigger_data )
def action_email(self,action_obj,action_operation_obj,host_id,trigger_data): ''' sending alert email to who concerns. :param action_obj: 觸發這個報警的action對象 :param action_operation_obj: 要報警的動做對象 :param host_id: 要報警的目標主機 :param trigger_data: 要報警的數據 :return: ''' print("要發報警的數據:",self.alert_counter_dic[action_obj.id][host_id]) print("action email:",action_operation_obj.action_type,action_operation_obj.notifiers,trigger_data) notifier_mail_list = [obj.email for obj in action_operation_obj.notifiers.all()] subject = '級別:%s -- 主機:%s -- 服務:%s' %(trigger_data.get('trigger_id'), trigger_data.get('host_id'), trigger_data.get('service_item')) send_mail( subject, action_operation_obj.msg_format, settings.DEFAULT_FROM_EMAIL, notifier_mail_list, )
那是由於一個trigger能夠被多個template關聯,這個trigger觸發了,不必定是哪一個tempalte裏的主機致使的python
一、第一次被 觸,先初始化一個action counter dicspa
二、這個主機第一次觸發這個action的報警orm
你不是觸發一次我報一次,我是到了觸發時間觸發才報警,
三、若是達到報警觸發interval次數,就記數+1對象
四、該報警了blog
def trigger_process(self): ''' 分析trigger並報警 :return: ''' print('Action Processing'.center(50,'-')) if self.trigger_data.get('trigger_id') == None: #trigger id == None print(self.trigger_data) if self.trigger_data.get('msg'): print(self.trigger_data.get('msg')) #既然沒有trigger id,直接報警給管理 員 else: print("\033[41;1mInvalid trigger data %s\033[0m" % self.trigger_data) else:#正經的trigger 報警要觸發了 print("\033[33;1m%s\033[0m" %self.trigger_data) trigger_id = self.trigger_data.get('trigger_id') host_id = self.trigger_data.get('host_id') trigger_obj = models.Trigger.objects.get(id=trigger_id) actions_set = trigger_obj.action_set.select_related() #找到這個trigger所關聯的action list print("actions_set:",actions_set) matched_action_list = set() # 一個空集合 for action in actions_set: #每一個action 都 能夠直接 包含多個主機或主機組, # 爲何tigger裏關聯了template,template裏又關聯了主機,那action還要直接關聯主機呢? #那是由於一個trigger能夠被多個template關聯,這個trigger觸發了,不必定是哪一個tempalte裏的主機致使的 for hg in action.host_groups.select_related(): for h in hg.host_set.select_related(): if h.id == host_id:# 這個action適用於此主機 matched_action_list.add(action) if action.id not in self.alert_counter_dic: #第一次被 觸,先初始化一個action counter dic self.alert_counter_dic[action.id] = {} print("action, ",id(action)) if h.id not in self.alert_counter_dic[action.id]: # 這個主機第一次觸發這個action的報警 self.alert_counter_dic[action.id][h.id] = {'counter': 0, 'last_alert': time.time()} # self.alert_counter_dic.setdefault(action,{h.id:{'counter':0,'last_alert':time.time()}}) else: #若是達到報警觸發interval次數,就記數+1 if time.time() - self.alert_counter_dic[action.id][h.id]['last_alert'] >= action.interval: self.alert_counter_dic[action.id][h.id]['counter'] += 1 #self.alert_counter_dic[action.id][h.id]['last_alert'] = time.time() else: print("沒達到alert interval時間,不報警",action.interval, time.time() - self.alert_counter_dic[action.id][h.id]['last_alert']) #self.alert_counter_dic.setdefault(action.id,{}) for host in action.hosts.select_related(): if host.id == host_id: # 這個action適用於此主機 matched_action_list.add(action) if action.id not in self.alert_counter_dic: # 第一次被 觸,先初始化一個action counter dic self.alert_counter_dic[action.id] = {} if h.id not in self.alert_counter_dic[action.id]: #這個主機第一次觸發這個action的報警 self.alert_counter_dic[action.id][h.id] ={'counter': 0, 'last_alert': time.time()} #self.alert_counter_dic.setdefault(action,{h.id:{'counter':0,'last_alert':time.time()}}) else: # 若是達到報警觸發interval次數,就記數+1 if time.time() - self.alert_counter_dic[action.id][h.id]['last_alert'] >= action.interval: self.alert_counter_dic[action.id][h.id]['counter'] += 1 #self.alert_counter_dic[action.id][h.id]['last_alert'] = time.time() else: print("沒達到alert interval時間,不報警", action.interval, time.time() - self.alert_counter_dic[action.id][h.id]['last_alert']) print("alert_counter_dic:",self.alert_counter_dic) print("matched_action_list:",matched_action_list) for action_obj in matched_action_list:# if time.time() - self.alert_counter_dic[action_obj.id][host_id]['last_alert'] >= action_obj.interval: #該報警 了 print("該報警了.......",time.time() - self.alert_counter_dic[action_obj.id][host_id]['last_alert'],action_obj.interval) for action_operation in action_obj.operations.select_related().order_by('-step'): if action_operation.step > self.alert_counter_dic[action_obj.id][host_id]['counter']: #就 print("##################alert action:%s" % action_operation.action_type,action_operation.notifiers) action_func = getattr(self,'action_%s'% action_operation.action_type) action_func(action_obj,action_operation,host_id,self.trigger_data) #報完警後更新一下報警時間 ,這樣就又從新計算alert interval了 self.alert_counter_dic[action_obj.id][host_id]['last_alert'] = time.time() self.record_log(action_obj,action_operation,host_id,self.trigger_data) # else: # print("離下次觸發報警的時間還有[%s]s" % )