(四) Prometheus 監控思科交換機---Alertmanager 郵件報警展現報警

時間 2020-10-17

標籤 html docker 微信 ssh tcp ide 工具測試 lua rest 欄目 HTML 简体版

原文原文鏈接

Alertmanager 郵件報警展現報警

Prometheus 監控思科交換機文檔完整地址：https://blog.51cto.com/liujingyu/category9.html
總體邏輯效果圖

修改 alertmanager.yml 配置郵件報警對象

[root@localhost alertmanager]# cat alertmanager.yml
global:
  resolve_timeout: 5m
  smtp_smarthost: 'smtp.163.com:25'
  smtp_from: '***@163.com'
  smtp_auth_username: '***@163.com'
  smtp_auth_password: 'PASSWORD'

route:
#  group_by: ['alertname']
  group_wait: 10s
  group_interval: 1m
  repeat_interval: 1m
  receiver: 'jsb'

receivers:
- name: 'jsb'
  email_configs:
  - to: "TARGET_ADDRESS@163.com"

# 使用 alertmanager 自帶的 amtool 工具檢查一下alertmanager.yml 配置文件書寫是否正確
[root@localhost alertmanager]# ./amtool check-config alertmanager.yml
Checking 'alertmanager.yml'  SUCCESS
Found:
 - global config
 - route
 - 0 inhibit rules
 - 1 receivers
 - 0 templates

[root@localhost alertmanager]# systemctl restart alertmanager
[root@localhost alertmanager]# netstat -tnlp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address           Foreign Address         State       PID/Program name
tcp        0      0 0.0.0.0:22              0.0.0.0:*               LISTEN      20451/sshd
tcp        0      0 127.0.0.1:25            0.0.0.0:*               LISTEN      20561/master
tcp6       0      0 :::22                   :::*                    LISTEN      20451/sshd
tcp6       0      0 :::3000                 :::*                    LISTEN      11761/docker-proxy
tcp6       0      0 ::1:25                  :::*                    LISTEN      20561/master
tcp6       0      0 :::9116                 :::*                    LISTEN      27273/snmp_exporter
tcp6       0      0 :::9090                 :::*                    LISTEN      25929/docker-proxy
tcp6       0      0 :::9093                 :::*                    LISTEN      4509/alertmanager
tcp6       0      0 :::9094                 :::*                    LISTEN      4509/alertmanager

配置 prometheus 接入 Alertmanager 報警對象

cat prometheus.yml
···
# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
       - 192.168.202.239:9093  # Alertmanager 的ip地址

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  - "rules/*.rules"
  # - "second_rules.yml"
···
[root@e36188d4c068 prometheus]# cat rules/test.rules
groups:
- name: test_rules
  rules:
  - alert: InstanceDown
    expr: up == 0
    for: 1m
    labels:
      serverity: error
    annotations:
      summary: "Instance {{ $labels.instance}} shutdown one minutes!!!"
      description: "{{ $labels.instance }} of job {{ $labels.job }} yi shutdown 1 minutes"
[root@e36188d4c068 prometheus]# cat rules/cpu.rules
groups:
- name: host
  rules:
  - alert: NodeCPUUsage
    annotations:
      description: "{{ $labels.instance }} CPU > 60% (The current value: {{ $value }})"
      summary: "Instance {{ $labels.instance }} too high!!!"
    expr: avgBusy1 > 60
    for: 1m
    labels:
      severity: warning 

# 使用 promtool 檢查配置文件書寫是否正確，而後重啓 prometheus
[root@e36188d4c068 prometheus]# ./promtool check config prometheus.yml
Checking prometheus.yml
  SUCCESS: 2 rule files found

Checking rules/cpu.rules
  SUCCESS: 1 rules found

Checking rules/test.rules
  SUCCESS: 1 rules found

[root@e36188d4c068 prometheus]# systemctl restart prometheus