接上一篇 快速搭建 Windows Kubernetes , 咱們發現原來在 Windows Kubernetes 會有一些與在 Linux 上使用不同的體驗,俗稱坑,例如 hostAliases。對於咱們但願真的把 Windows 放入生產,感受除了基本的 Pod、Volume、Service 、Log 之外,咱們還須要監控。通常來說咱們會用 Prometheus 來作監控,而後經過 Grafana 來展現,可是 Prometheus 的 Node Exporter 是爲 *nix 設計的,因此在 Windows 上咱們的本身想辦法了。在 Prometheus Node Exporter 裏推薦使用 WMI exporter ,感興趣的童鞋能夠去試試,本文主要仍是想從一個原始的角度去分析處理,來理解怎麼去寫一個 Prometheus 的採集程序。html
<Node_IP>:10255/stats/summary
上 expose metrics, metrics-server 與 kubectl top
的數據也是來源於此,大體以下:{ "node": { "nodeName": "35598k8s9001", "startTime": "2018-08-26T07:25:08Z", "cpu": { "time": "2018-09-10T01:44:52Z", "usageCoreNanoSeconds": 8532520000000 }, "memory": { "time": "2018-09-10T01:44:52Z", "availableBytes": 14297423872, "usageBytes": 1978798080, "workingSetBytes": 734490624, "rssBytes": 0, "pageFaults": 0, "majorPageFaults": 0 }, "fs": { "time": "2018-09-10T01:44:52Z", "availableBytes": 15829303296, "capacityBytes": 32212250624, "usedBytes": 16382947328 }, "runtime": { "imageFs": { "time": "2018-09-10T01:44:53Z", "availableBytes": 15829303296, "capacityBytes": 32212250624, "usedBytes": 16382947328, "inodesUsed": 0 } } }, "pods": [ { "podRef": { "name": "stdlogserverwin-5fbcc5648d-ztqsq", "namespace": "default", "uid": "f461a0b4-ab36-11e8-93c4-0017fa0362de" }, "startTime": "2018-08-29T02:55:15Z", "containers": [ { "name": "stdlogserverwin", "startTime": "2018-08-29T02:56:24Z", "cpu": { "time": "2018-09-10T01:44:54Z", "usageCoreNanoSeconds": 749578125000 }, "memory": { "time": "2018-09-10T01:44:54Z", "workingSetBytes": 83255296 }, "rootfs": { "time": "2018-09-10T01:44:54Z", "availableBytes": 15829303296, "capacityBytes": 32212250624, "usedBytes": 0 }, "logs": { "time": "2018-09-10T01:44:53Z", "availableBytes": 15829303296, "capacityBytes": 32212250624, "usedBytes": 16382947328, "inodesUsed": 0 }, "userDefinedMetrics": null } ], "cpu": { "time": "2018-08-29T02:56:24Z", "usageNanoCores": 0, "usageCoreNanoSeconds": 749578125000 }, "memory": { "time": "2018-09-10T01:44:54Z", "availableBytes": 0, "usageBytes": 0, "workingSetBytes": 83255296, "rssBytes": 0, "pageFaults": 0, "majorPageFaults": 0 }, "volume": [ { "time": "2018-08-29T02:55:16Z", "availableBytes": 17378648064, "capacityBytes": 32212250624, "usedBytes": 14833602560, "inodesFree": 0, "inodes": 0, "inodesUsed": 0, "name": "default-token-wv5fc" } ], "ephemeral-storage": { "time": "2018-09-10T01:44:54Z", "availableBytes": 15829303296, "capacityBytes": 32212250624, "usedBytes": 16382947328 } } ] }
class Node: def __init__(self, name, cpu, memory): self.name = name self.cpu = cpu self.memory = memory class Pod: def __init__(self, name, namespace,cpu, memory): self.name = name self.namespace = namespace self.cpu = cpu self.memory = memory class Stats: def __init__(self, node, pods): self.node = node self.pods = pods
from urllib.request import urlopen from stats import Node from stats import Pod from stats import Stats import json import asyncio import prometheus_client as prom import logging import random def getMetrics(url): #獲取數據集 response = urlopen(url) string = response.read().decode('utf-8') json_obj = json.loads(string) #用以前定義好的 stats 的對象來作 mapping node = Node('','','') node.name = json_obj['node']['nodeName'] node.cpu = json_obj['node']['cpu']['usageCoreNanoSeconds'] node.memory = json_obj['node']['memory']['usageBytes'] pods_array = json_obj['pods'] pods_list = [] for item in pods_array: pod = Pod('','','','') pod.name = item['podRef']['name'] pod.namespace = item['podRef']['namespace'] pod.cpu = item['cpu']['usageCoreNanoSeconds'] pod.memory = item['memory']['workingSetBytes'] pods_list.append(pod) stats = Stats('','') stats.node = node stats.pods = pods_list return stats #寫個簡單的日誌輸出格式 format = "%(asctime)s - %(levelname)s [%(name)s] %(threadName)s %(message)s" logging.basicConfig(level=logging.INFO, format=format) #聲明咱們須要導出的 metrics 及對應的 label 供將來查詢使用 g1 = prom.Gauge('node_cpu_usageCoreNanoSeconds', 'CPU useage of the node', labelnames=['node_name']) g2 = prom.Gauge('node_mem_usageBytes', 'Memory useage of the node', labelnames=['node_name']) g3 = prom.Gauge('pod_cpu_usageCoreNanoSeconds', 'Memory useage of the node', labelnames=['pod_name','pod_namespace']) g4 = prom.Gauge('pod_mem_usageBytes', 'Memory useage of the node', labelnames=['pod_name','pod_namespace']) async def expose_stats(url): while True: stats = getMetrics(url) #以打印 node 自己的監控信息爲例 logging.info("nodename: {} value {}".format(stats.node.name, stats.node.cpu)) # 爲當前要 poll 的 metrics 賦值 g1.labels(node_name=stats.node.name).set(stats.node.cpu) g2.labels(node_name=stats.node.name).set(stats.node.memory) pods_array = stats.pods for item in pods_array: g3.labels(pod_name=item.name,pod_namespace=item.namespace).set(item.memory) g4.labels(pod_name=item.name,pod_namespace=item.namespace).set(item.cpu) await asyncio.sleep(1) if __name__ == '__main__': loop = asyncio.get_event_loop() # 啓動一個 http server 來作 polling prom.start_http_server(8000) t0_value = 50 #能夠在每一臺 Windows 機器上都啓動一個這樣的程序,也能夠遠程部署腳原本作 exposing url = 'http://localhost:10255/stats/summary' tasks = [loop.create_task(expose_stats(url))] try: loop.run_forever() except KeyboardInterrupt: pass finally: loop.close()
- job_name: python_app scrape_interval: 15s scrape_timeout: 10s metrics_path: / scheme: http static_configs: - targets: - localhost:8000
usageNanoCores
和 usageCoreNanoSeconds
怎麼換算成咱們一般理解的 CPU 使用百分比Ref:node