All checks were successful
- Prometheus alert rules (host, container, API, Celery, target-down) - Alertmanager with email routing (critical 1h, warning 4h repeat) - Docker network segmentation (frontend/backend/monitoring) - Incident response runbook with 8 copy-paste runbooks - Environment variables reference (55+ vars documented) - Hetzner setup docs updated with Steps 19-24 - Launch readiness updated with Feb 2026 infrastructure status Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
54 lines
1.6 KiB
YAML
54 lines
1.6 KiB
YAML
# Prometheus configuration for Orion platform
|
|
# Docs: https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
|
|
|
global:
|
|
scrape_interval: 15s
|
|
evaluation_interval: 15s
|
|
|
|
# ─── Alerting ────────────────────────────────────────────────────────────
|
|
alerting:
|
|
alertmanagers:
|
|
- static_configs:
|
|
- targets: ["alertmanager:9093"]
|
|
|
|
rule_files:
|
|
- /etc/prometheus/alert.rules.yml
|
|
|
|
# ─── Scrape Configs ─────────────────────────────────────────────────────
|
|
scrape_configs:
|
|
# Orion API — /metrics endpoint (prometheus_client)
|
|
- job_name: "orion-api"
|
|
metrics_path: /metrics
|
|
static_configs:
|
|
- targets: ["api:8000"]
|
|
labels:
|
|
service: "orion-api"
|
|
|
|
# Node Exporter — host-level CPU, RAM, disk metrics
|
|
- job_name: "node-exporter"
|
|
static_configs:
|
|
- targets: ["node-exporter:9100"]
|
|
labels:
|
|
service: "node-exporter"
|
|
|
|
# cAdvisor — per-container resource metrics
|
|
- job_name: "cadvisor"
|
|
static_configs:
|
|
- targets: ["cadvisor:8080"]
|
|
labels:
|
|
service: "cadvisor"
|
|
|
|
# Prometheus self-monitoring
|
|
- job_name: "prometheus"
|
|
static_configs:
|
|
- targets: ["localhost:9090"]
|
|
labels:
|
|
service: "prometheus"
|
|
|
|
# Alertmanager
|
|
- job_name: "alertmanager"
|
|
static_configs:
|
|
- targets: ["alertmanager:9093"]
|
|
labels:
|
|
service: "alertmanager"
|