//檢查各個端口的放行html
//部署各個模塊與應用node
cd /usr/local/Prometheus_compose vim docker-compose.yml version: "3" services: prom: image: quay.io/prometheus/prometheus:latest container_name: prometheus volumes: - ./prometheus:/etc/prometheus command: "--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus" ports: - 9090:9090 depends_on: - exporter environment: - TZ=Asia/Shanghai exporter: image: prom/node-exporter:latest container_name: node-exporter hostname: cicd ports: - "9100:9100" environment: - TZ=Asia/Shanghai grafana: image: grafana/grafana container_name: grafana ports: - "3000:3000" environment: - 「GF_SECURITY_ADMIN_PASSWORD=123123」 - 「GF_INSTALL_PLUGINS=alexanderzobnin-zabbix-app」 - TZ=Asia/Shanghai restart: "always" volumes: - ./grafana:/etc/grafana/」 - ./grafana/conf/grafana.ini:/etc/grafana/grafana.ini - ./grafana/data:/var/lib/grafana:rw - ./grafana/plugins:/var/lib/grafana/plugins:rw - /etc/localtime:/etc/localtime depends_on: - prom alertmanager: image: prom/alertmanager:latest container_name: alertmanager hostname: alertmanager restart: always ports: - '9093:9093' volumes: - './alertmanager/config:/etc/alertmanager' - './alertmanager/data:/alertmanager/data' command: - '--config.file=/etc/alertmanager/alertmanager.yml' environment: - TZ=Asia/Shanghai cadvisor: image: google/cadvisor container_name: cadvisors restart: always volumes: - /:/rootfs:ro - /var/run:/var/run:rw - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro ports: - 8080:8080 privileged: true environment: - TZ=Asia/Shanghai grafana-reporter: image: izakmarais/grafana-reporter container_name: grafana_reporter ports: - 8686:8686 command: "-ip grafana.mitaiot.com" environment: - TZ=Asia/Shanghai
//編輯報警模塊的配置文件mysql
cd /usr/local/Prometheus_compose/alertmanager/config cat alertmanager.yml global: resolve_timeout: 5m smtp_from: '123456789@sina.com' smtp_smarthost: 'smtp.sina.com:587' smtp_auth_username: '123456789@sina.com' smtp_auth_password: 'aabbccdd' smtp_require_tls: false smtp_hello: 'sina.com' route: group_by: ['alertname'] group_wait: 5s group_interval: 5s repeat_interval: 5m receiver: 'email' receivers: - name: 'email' email_configs: - to: '{{ template "email.to" . }}' html: '{{ template "email.to.html" . }}' send_resolved: true inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname', 'dev', 'instance'] templates: - "/etc/alertmanager/alertmanager-tmpl/email.tmpl"
//編輯發送的郵件模板sql
cd /usr/local/Prometheus_compose/alertmanager/config/alertmanager-tmpl cat email.tmpl {{ define "email.from" }}123456789@sina.com{{ end }} {{ define "email.to" }}123456789@sina.com{{ end }} {{ define "email.to.html" }} {{ range .Alerts }} =========start==========<br> 告警程序: prometheus_alert <br> 告警級別: {{ .Labels.severity }} 級 <br> 告警類型: {{ .Labels.alertname }} <br> 故障主機: {{ .Labels.instance }} <br> 告警主題: {{ .Annotations.summary }} <br> 告警詳情: {{ .Annotations.description }} <br> 觸發時間: {{ .StartsAt.Format "2019-08-04 16:58:15" }} <br> =========end==========<br> {{ end }} {{ end }} cd /usr/local/Prometheus_compose/grafana/conf vim grafana.ini # 配置文件太長,標出修改部分 [auth.anonymous] enabled = true org_name = Main Org. org_role = Viewer [smtp] enabled = true host = smtp.sina.com:587 user = 123456789@sina.com password =dc28ac6ec64af9c1 skip_verify = true from_address = 123456789@sina.com from_name = Grafana ehlo_identity = default_timezone = Asia/Shanghai #添加時區
//修改 普羅米修斯 的配置文件docker
cd /usr/local/Prometheus_compose/prometheus global: scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 5s # Evaluate rules every 15 seconds. The default is every 1 minute. scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['172.16.225.154:9090'] - job_name: 'node' file_sd_configs: - files: ['/etc/prometheus/groups/nodegroups/*.json'] static_configs: - targets: - '172.16.225.154:9100' - '172.16.225.156:9100' - '172.16.225.155:9100' - '172.16.225.157:9100' - '172.16.225.156:8085' - '172.16.225.154:8080' - '172.16.225.155:8085' - '172.16.225.157:8085' - '172.16.225.157:9104' alerting: alertmanagers: - static_configs: - targets: - '172.16.225.154:9093' rule_files: - "/etc/prometheus/rules/*.yml" //配置報警規則 cd /usr/local/Prometheus_compose/prometheus/rules groups: - name: node-up rules: - alert: node-up expr: up{job="node"} == 0 for: 15s labels: severity: 1 team: node annotations: summary: "{{ $labels.instance }} 已中止運行!" description: "{{ $labels.instance }} 檢測到異常中止!請重點關注!!!" - name: node-cpu rules: - alert: node-cpu expr: 100 - ((avg by (instance,job,env)(irate(node_cpu_seconds_total{mode="idle"}[30s]))) *100) > 90 for: 1m labels: severity: 1 team: node level: warning annotations: summary: "{{ $labels.instance }} CPU使用率超過 百分之90!" description: "{{ $labels.instance }} 檢測CPU連續1分鐘佔用率超出90%!請重點關注!!!" - name: node-mem rules: - alert: node-mem expr: ((node_memory_MemTotal_bytes -(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) )/node_memory_MemTotal_bytes ) * 100 > 90 #設置內存使用率高於90時發送告警,計算方式爲 總內存-空閒內存 - buffers - cached for: 5s labels: severity: 1 team: node level: warning annotations: summary: "{{ $labels.instance }} MEM使用率超過 百分之90!" description: "{{ $labels.instance }} 檢測CPU連續1分鐘佔用率超出90%!請重點關注!!!" - name: node-disk_used rules: - alert: node-disk_used expr: 100 - (node_filesystem_free_bytes{fstype=~"ext3|ext4|xfs"} / node_filesystem_size_bytes{fstype=~"ext3|ext4|xfs"} * 100) > 90 #設置掛載分區使用率爲95以上時告警 for: 1m labels: severity: 1 team: node level: warning annotations: summary: "{{ $labels.instance }} 掛在分區使用率超過 百分之90!" description: "{{ $labels.instance }} 掛在分區使用率超出90%!請重點關注!!!"
#如需監控 MySQL和容器和主機信息 須要在 主機上部署 prom/node-exporter 、cadvisor、prom/mysqld-exporter。json
version: "3" services: exporter: image: prom/node-exporter:latest container_name: node-exporter hostname: db01 ports: - "9100:9100" cadvisor: image: google/cadvisor container_name: cadvisor restart: always volumes: - /:/rootfs:ro - /var/run:/var/run:rw - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro ports: - 8085:8080 privileged: true mysqld-exporter: image: prom/mysqld-exporter ports: - 9104:9104 restart: always container_name: mysql_exporter hostname: db01 environment: - DATA_SOURCE_NAME=root:0GXwwchW4rP@(172.16.225.157:3306)/ - TZ=Asia/Shanghai
導入模板,模板 ID 分別是:891九、7362
更多模塊連接訪問:點擊獲取監控模塊,只須要導入對應ID便可vim