#根據實際狀況修改參數node
#rules.linux.yml groups: - name: linux rules: - alert: Node-Down #告警名稱 expr: up == 0 for: 1m #持續多久後發送 labels: severity: warning annotations: #信息 summary: "Node has been down" description: "has been down " - alert: "內存使用率太高" expr: round(100- node_memory_MemAvailable_bytes/node_memory_MemTotal_bytes*100) > 80 for: 1m labels: severity: warning annotations: summary: "內存使用率太高" description: "當前使用率{{ $value }}%" - alert: "CPU使用率太高" expr: round(100 - ((avg by (instance,job)(irate(node_cpu_seconds_total{mode="idle",instance!~'bac-.*'}[5m]))) *100)) > 80 for: 2m labels: severity: warning annotations: summary: "CPU使用率太高" description: "當前使用率{{ $value }}%" - alert: "磁盤使用率太高" expr: round(100-100*(node_filesystem_avail_bytes{fstype=~"ext4|xfs"} / node_filesystem_size_bytes{fstype=~"ext4|xfs"})) > 80 for: 15s labels: severity: warning annotations: summary: "磁盤使用率太高" description: "當前磁盤{{$labels.mountpoint}} 使用率{{ $value }}%" - alert: "分區容量太低" expr: round(node_filesystem_avail_bytes{fstype=~"ext4|xfs",instance!~"testnode",mountpoint!~"/boot.*"}/1024/1024/1024) < 10 for: 15s labels: severity: warning annotations: summary: "分區容量太低" description: "當前分區{{$labels.mountpoint}} 容量{{ $value }}GB" - alert: "網絡流出速率太高" expr: round(irate(node_network_receive_bytes_total{instance!~"data.*",device!~'tap.*|veth.*|br.*|docker.*|vir.*|lo.*|vnet.*'}[1m])/1024) > 2048 for: 1m labels: severity: warning annotations: summary: "網絡流出速率太高" description: "當前速率{{ $value }}KB/s"