直接上部署配置文件
docker-compose.yml
version: "3"
networks:
monitor:
driver: bridge
services:
prometheus:
image: prom/prometheus
container_name: prometheus
hostname: prometheus
restart: always
volumes:
- /data/monitor/prometheus.yml:/etc/prometheus/prometheus.yml
- /data/monitor/prom_db:/prometheus
- /data/monitor/prom_rules:/etc/prometheus/rules
ports:
- "9090:9090"
networks:
- monitor
grafana:
image: grafana/grafana
container_name: grafana
hostname: grafana
restart: always
volumes:
- /data/monitor/ga_data:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin@123
ports:
- "3000:3000"
networks:
- monitor
node-exporter:
image: quay.io/prometheus/node-exporter
container_name: node-exporter
hostname: node-exporter
restart: always
ports:
- "9100:9100"
networks:
- monitor
alertmanager:
image: prom/alertmanager
container_name: alertmanager
volumes:
- /data/monitor/alertmanager.yml:/etc/alertmanager/alertmanager.yml
ports:
- "9093:9093"
restart: always
networks:
- monitor
promethues.yml
global:
scrape_interval: 60s
evaluation_interval: 60s
scrape_configs:
- job_name: prom-server
static_configs:
- targets: ["localhost:9090"]
labels:
instance: prometheus
- job_name: "node_exporter"
static_configs:
- targets:
- localhost:9100
alerting:
alertmanagers:
- static_configs:
- targets: ["loclhost:9093"]
rule_files:
- /etc/prometheus/rules/rules.yml
alertmanager.yml
global:
smtp_smarthost: "smtp.126.com:25"
smtp_from: "XXX@126.com"
smtp_auth_username: "XXXXX"
smtp_auth_password: "XXXXX"
smtp_require_tls: false
receivers:
- name: default-receiver
email_configs:
- to: "battlescars@qq.com"
require_tls: false
send_resolved: true
route:
group_by: ["alertname", "cluster", "service"]
group_wait: 10s
group_interval: 5m
receiver: default-receiver
repeat_interval: 3h
rule.yml
groups:
- name: response-rule
rules:
- alert: NodeDiskUsageException
expr: (1 - (node_filesystem_free_bytes{mountpoint=~"/|/logs|/backup|/home"} / node_filesystem_size_bytes{mountpoint=~"/|/logs|/backup|/home"})) * 100 > 75
labels:
ai_mon: node
annotations:
description: 服务器{{$labels.instance}}磁盘{{$labels.device}}空间占用比例为{{$value}}%, 大于阈值75%
summary: 磁盘占用超标