diff --git a/.gitignore b/.gitignore index 8e18b447..20f9ca3b 100644 --- a/.gitignore +++ b/.gitignore @@ -192,3 +192,7 @@ exports/ # Security audit (needs revamping) scripts/security-audit/ + +# Alertmanager config is per-host (contains SMTP credentials) — ship +# alertmanager.yml.example as the template, real file lives outside git. +monitoring/alertmanager/alertmanager.yml diff --git a/monitoring/alertmanager/alertmanager.yml b/monitoring/alertmanager/alertmanager.yml deleted file mode 100644 index 0e405d4d..00000000 --- a/monitoring/alertmanager/alertmanager.yml +++ /dev/null @@ -1,58 +0,0 @@ -# Alertmanager Configuration for Orion Platform -# Docs: https://prometheus.io/docs/alerting/latest/configuration/ - -global: - resolve_timeout: 5m - - # ─── SMTP Configuration (SendGrid) ────────────────────────────────── - # Sign up at sendgrid.com, create an API key, authenticate wizard.lu domain - # Username is literally the string "apikey", password is your SG.xxx API key - smtp_smarthost: 'smtp.sendgrid.net:587' # SendGrid SMTP relay - smtp_from: 'alerts@wizard.lu' # Must match authenticated domain - smtp_auth_username: 'apikey' # Always "apikey" for SendGrid - smtp_auth_password: '' # TODO: Paste your SG.xxx API key here - smtp_require_tls: true - -route: - # Group alerts by name and severity - group_by: ['alertname', 'severity'] - group_wait: 30s - group_interval: 5m - repeat_interval: 4h - receiver: 'email-warnings' - - routes: - # Critical alerts: repeat every 1 hour - - match: - severity: critical - receiver: 'email-critical' - repeat_interval: 1h - - # Warning alerts: repeat every 4 hours - - match: - severity: warning - receiver: 'email-warnings' - repeat_interval: 4h - -receivers: - - name: 'email-critical' - email_configs: - - to: 'admin@wizard.lu' # TODO: Replace with your alert recipient - send_resolved: true - headers: - Subject: '[CRITICAL] Orion: {{ .GroupLabels.alertname }}' - - - name: 'email-warnings' - email_configs: - - to: 'admin@wizard.lu' # TODO: Replace with your alert recipient - send_resolved: true - headers: - Subject: '[WARNING] Orion: {{ .GroupLabels.alertname }}' - -# Inhibition rules — suppress warnings when critical is already firing -inhibit_rules: - - source_match: - severity: 'critical' - target_match: - severity: 'warning' - equal: ['alertname', 'instance'] diff --git a/monitoring/alertmanager/alertmanager.yml.example b/monitoring/alertmanager/alertmanager.yml.example new file mode 100644 index 00000000..dc212c3e --- /dev/null +++ b/monitoring/alertmanager/alertmanager.yml.example @@ -0,0 +1,71 @@ +# Alertmanager Configuration for Orion Platform — TEMPLATE +# Docs: https://prometheus.io/docs/alerting/latest/configuration/ +# +# This is the IN-REPO TEMPLATE. The real file on each host lives at +# monitoring/alertmanager/alertmanager.yml (gitignored, never committed). +# Copy this file to that path and fill in the CHANGEME values per +# docs/deployment/hetzner-server-setup.md. + +global: + resolve_timeout: 5m + + # ─── SMTP Configuration (mail1.myservices.hosting relay) ──────────── + # Migrated from SendGrid to mail1.myservices.hosting on 2026-??-?? — + # same SMTP backend the app uses (see /admin/settings). + # + # smtp_from is set to alerts@wizard.lu for inbox routing clarity. Most + # SMTP relays allow the From: header to differ from the authenticated + # user, BUT some require them to match. If you see "550 sender not + # authorized" in the alertmanager logs after a reload, either: + # 1. Configure alerts@wizard.lu as a send-as alias on the support@ + # mailbox in your mail hosting control panel, or + # 2. Change smtp_from to 'support@wizard.lu' (less clear in inbox). + smtp_smarthost: 'mail1.myservices.hosting:587' + smtp_from: 'alerts@wizard.lu' + smtp_auth_username: 'support@wizard.lu' + smtp_auth_password: 'CHANGEME' # The /admin/settings SMTP password. NEVER commit a real value. + smtp_require_tls: true + +route: + # Group alerts by name and severity + group_by: ['alertname', 'severity'] + group_wait: 30s + group_interval: 5m + repeat_interval: 4h + receiver: 'email-warnings' + + routes: + # Critical alerts: repeat every 1 hour + - match: + severity: critical + receiver: 'email-critical' + repeat_interval: 1h + + # Warning alerts: repeat every 4 hours + - match: + severity: warning + receiver: 'email-warnings' + repeat_interval: 4h + +receivers: + - name: 'email-critical' + email_configs: + - to: 'admin@wizard.lu' # Recipient mailbox for critical alerts + send_resolved: true + headers: + Subject: '[CRITICAL] Orion: {{ .GroupLabels.alertname }}' + + - name: 'email-warnings' + email_configs: + - to: 'admin@wizard.lu' # Recipient mailbox for warning alerts + send_resolved: true + headers: + Subject: '[WARNING] Orion: {{ .GroupLabels.alertname }}' + +# Inhibition rules — suppress warnings when critical is already firing +inhibit_rules: + - source_match: + severity: 'critical' + target_match: + severity: 'warning' + equal: ['alertname', 'instance']