feat(infra): add alerting, network segmentation, and ops docs (Steps 19-24)
All checks were successful
All checks were successful
- Prometheus alert rules (host, container, API, Celery, target-down) - Alertmanager with email routing (critical 1h, warning 4h repeat) - Docker network segmentation (frontend/backend/monitoring) - Incident response runbook with 8 copy-paste runbooks - Environment variables reference (55+ vars documented) - Hetzner setup docs updated with Steps 19-24 - Launch readiness updated with Feb 2026 infrastructure status Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
57
monitoring/alertmanager/alertmanager.yml
Normal file
57
monitoring/alertmanager/alertmanager.yml
Normal file
@@ -0,0 +1,57 @@
|
||||
# Alertmanager Configuration for Orion Platform
|
||||
# Docs: https://prometheus.io/docs/alerting/latest/configuration/
|
||||
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
|
||||
# ─── SMTP Configuration ──────────────────────────────────────────────
|
||||
# Fill in your SMTP credentials below
|
||||
smtp_smarthost: 'smtp.example.com:587' # TODO: Replace with your SMTP server
|
||||
smtp_from: 'alerts@wizard.lu' # TODO: Replace with your sender address
|
||||
smtp_auth_username: '' # TODO: Fill in SMTP username
|
||||
smtp_auth_password: '' # TODO: Fill in SMTP password
|
||||
smtp_require_tls: true
|
||||
|
||||
route:
|
||||
# Group alerts by name and severity
|
||||
group_by: ['alertname', 'severity']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 4h
|
||||
receiver: 'email-warnings'
|
||||
|
||||
routes:
|
||||
# Critical alerts: repeat every 1 hour
|
||||
- match:
|
||||
severity: critical
|
||||
receiver: 'email-critical'
|
||||
repeat_interval: 1h
|
||||
|
||||
# Warning alerts: repeat every 4 hours
|
||||
- match:
|
||||
severity: warning
|
||||
receiver: 'email-warnings'
|
||||
repeat_interval: 4h
|
||||
|
||||
receivers:
|
||||
- name: 'email-critical'
|
||||
email_configs:
|
||||
- to: 'admin@wizard.lu' # TODO: Replace with your alert recipient
|
||||
send_resolved: true
|
||||
headers:
|
||||
Subject: '[CRITICAL] Orion: {{ .GroupLabels.alertname }}'
|
||||
|
||||
- name: 'email-warnings'
|
||||
email_configs:
|
||||
- to: 'admin@wizard.lu' # TODO: Replace with your alert recipient
|
||||
send_resolved: true
|
||||
headers:
|
||||
Subject: '[WARNING] Orion: {{ .GroupLabels.alertname }}'
|
||||
|
||||
# Inhibition rules — suppress warnings when critical is already firing
|
||||
inhibit_rules:
|
||||
- source_match:
|
||||
severity: 'critical'
|
||||
target_match:
|
||||
severity: 'warning'
|
||||
equal: ['alertname', 'instance']
|
||||
Reference in New Issue
Block a user