feat: add automated backups and Prometheus/Grafana monitoring stack (Steps 17-18)
Some checks failed
Some checks failed
Backups: pg_dump scripts with daily/weekly rotation and Cloudflare R2 offsite sync. Monitoring: Prometheus, Grafana, node-exporter, cAdvisor in docker-compose; /metrics endpoint activated via prometheus_client. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -117,6 +117,94 @@ services:
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
# =========================================================================
|
||||
# MONITORING STACK
|
||||
# =========================================================================
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
restart: always
|
||||
profiles:
|
||||
- full
|
||||
ports:
|
||||
- "127.0.0.1:9090:9090"
|
||||
volumes:
|
||||
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||
- prometheus_data:/prometheus
|
||||
command:
|
||||
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||
- "--storage.tsdb.retention.time=15d"
|
||||
- "--storage.tsdb.retention.size=2GB"
|
||||
- "--web.enable-lifecycle"
|
||||
mem_limit: 256m
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:9090/-/healthy || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
restart: always
|
||||
profiles:
|
||||
- full
|
||||
ports:
|
||||
- "127.0.0.1:3001:3000"
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin}
|
||||
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-changeme}
|
||||
GF_SERVER_ROOT_URL: ${GRAFANA_URL:-https://grafana.wizard.lu}
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
- ./monitoring/grafana/provisioning/dashboards/json:/var/lib/grafana/dashboards:ro
|
||||
mem_limit: 192m
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:3000/api/health || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
node-exporter:
|
||||
image: prom/node-exporter:latest
|
||||
restart: always
|
||||
profiles:
|
||||
- full
|
||||
ports:
|
||||
- "127.0.0.1:9100:9100"
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
command:
|
||||
- "--path.procfs=/host/proc"
|
||||
- "--path.sysfs=/host/sys"
|
||||
- "--path.rootfs=/rootfs"
|
||||
- "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
|
||||
mem_limit: 64m
|
||||
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:latest
|
||||
restart: always
|
||||
profiles:
|
||||
- full
|
||||
ports:
|
||||
- "127.0.0.1:8080:8080"
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
- /dev/disk/:/dev/disk:ro
|
||||
privileged: true
|
||||
devices:
|
||||
- /dev/kmsg
|
||||
mem_limit: 128m
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
name: orion_postgres_data
|
||||
prometheus_data:
|
||||
name: orion_prometheus_data
|
||||
grafana_data:
|
||||
name: orion_grafana_data
|
||||
|
||||
Reference in New Issue
Block a user