統一區域的監控目標,prometheus server兩臺監控相同的目標羣體。node
改變後linux
上面這個變化對於監控目標端,會多出一倍的查詢請求,但在一臺prometheus server宕機的狀況下,能夠不影響監控。docker
這是一個金字塔式的層次結構,而不是分佈式層次結構。Prometheus 的抓取請求也會加載到prometheus work節點上,這是須要考慮的。json
上面這種模式,準備3臺prometheus server進行搭建,這種方式work節點一臺宕機後,其它wokr節點不會去接手故障work節點的機器。bash
一、環境準備分佈式
192.168.31.151(primary)this
192.168.31.144 (worker)lua
192.168.31.82(worker)spa
二、部署prometheus
3d
cd /usr/loacl tar -xvf prometheus-2.8.0.linux-amd64.tar.gz ln -s /usr/local/prometheus-2.8.0.linux-amd64 /usr/local/prometheus cd /usr/local/prometheus;mkdir bin conf data mv ./promtool bin mv ./prometheus bin mv ./prometheus.yml conf
三、worker節點配置(192.168.31.144)
prometheus.yml
# my global config global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. external_labels: worker: 0 # Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: # - alertmanager:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - "rules/*_rules.yml" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - job_name: 'prometheus' static_configs: - targets: - 192.168.31.151:9090 - 192.168.31.144:9090 - 192.168.31.82:9090 relabel_configs: - source_labels: [__address__] modulus: 2 target_label: __tmp_hash action: hashmod - source_labels: [__tmp_hash] regex: ^0$ action: keep - job_name: 'node_exporter' file_sd_configs: - files: - targets/nodes/*.json refresh_interval: 1m relabel_configs: - source_labels: [__address__] modulus: 2 target_label: __tmp_hash action: hashmod - source_labels: [__tmp_hash] regex: ^0$ action: keep - job_name: 'docker' file_sd_configs: - files: - targets/docker/*.json refresh_interval: 1m relabel_configs: - source_labels: [__address__] modulus: 2 target_label: __tmp_hash action: hashmod - source_labels: [__tmp_hash] regex: ^0$ action: keep - job_name: 'alertmanager' static_configs: - targets: - 192.168.31.151:9093 - 192.168.31.144:9093 relabel_configs: - source_labels: [__address__] modulus: 2 target_label: __tmp_hash action: hashmod - source_labels: [__tmp_hash] regex: ^0$ action: keep
worker節點配置(192.168.31.82)
prometheus.yml
# my global config global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. external_labels: worker: 1 # Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: # - alertmanager:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - "rules/*_rules.yml" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - job_name: 'prometheus' static_configs: - targets: - 192.168.31.151:9090 - 192.168.31.144:9090 - 192.168.31.82:9090 relabel_configs: - source_labels: [__address__] modulus: 2 target_label: __tmp_hash action: hashmod - source_labels: [__tmp_hash] regex: ^1$ action: keep - job_name: 'node_exporter' file_sd_configs: - files: - targets/nodes/*.json refresh_interval: 1m relabel_configs: - source_labels: [__address__] modulus: 2 target_label: __tmp_hash action: hashmod - source_labels: [__tmp_hash] regex: ^1$ action: keep - job_name: 'docker' file_sd_configs: - files: - targets/docker/*.json refresh_interval: 1m relabel_configs: - source_labels: [__address__] modulus: 2 target_label: __tmp_hash action: hashmod - source_labels: [__tmp_hash] regex: ^1$ action: keep - job_name: 'alertmanager' static_configs: - targets: - 192.168.31.151:9093 - 192.168.31.144:9093 relabel_configs: - source_labels: [__address__] modulus: 2 target_label: __tmp_hash action: hashmod - source_labels: [__tmp_hash] regex: ^1$ action: keep
primary節點配置(192.168.31.151)
prometheus.yml
# my global config global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: - 192.168.31.151:9093 - 192.168.31.144:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - "rules/*_alerts.yml" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: - job_name: 'node_workers' file_sd_configs: - files: - 'targets/workers/*.json' refresh_interval: 5m honor_labels: true metrics_path: /federate params: 'match[]': - '{__name__=~"^instance:.*"}'
cat ./targets/workers/workers.json
[{ "targets": [ "192.168.31.144:9090", "192.168.31.82:9090" ] }]