orion/monitoring/prometheus.yml

# Prometheus configuration for Orion platform
# Docs: https://prometheus.io/docs/prometheus/latest/configuration/configuration/

global:
  scrape_interval: 15s
  evaluation_interval: 15s

# ─── Alerting ────────────────────────────────────────────────────────────
alerting:
  alertmanagers:
    - static_configs:
        - targets: ["alertmanager:9093"]

rule_files:
  - /etc/prometheus/alert.rules.yml

# ─── Scrape Configs ─────────────────────────────────────────────────────
scrape_configs:
  # Orion API — /metrics endpoint (prometheus_client)
  - job_name: "orion-api"
    metrics_path: /metrics
    static_configs:
      - targets: ["api:8000"]
        labels:
          service: "orion-api"

  # Node Exporter — host-level CPU, RAM, disk metrics
  - job_name: "node-exporter"
    static_configs:
      - targets: ["node-exporter:9100"]
        labels:
          service: "node-exporter"

  # cAdvisor — per-container resource metrics
  - job_name: "cadvisor"
    static_configs:
      - targets: ["cadvisor:8080"]
        labels:
          service: "cadvisor"

  # Prometheus self-monitoring
  - job_name: "prometheus"
    static_configs:
      - targets: ["localhost:9090"]
        labels:
          service: "prometheus"

  # Alertmanager
  - job_name: "alertmanager"
    static_configs:
      - targets: ["alertmanager:9093"]
        labels:
          service: "alertmanager"