orion/docker-compose.yml

# docker-compose.yml
services:
  db:
    image: postgres:15
    restart: always
    environment:
      POSTGRES_DB: orion_db
      POSTGRES_USER: orion_user
      POSTGRES_PASSWORD: secure_password
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - ./init.sql:/docker-entrypoint-initdb.d/init.sql
    ports:
      - "5432:5432"
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U orion_user -d orion_db"]
      interval: 30s
      timeout: 10s
      retries: 3
    networks:
      - backend

  redis:
    image: redis:7-alpine
    restart: always
    ports:
      - "6380:6379"  # Use 6380 to avoid conflict with host Redis
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 30s
      timeout: 10s
      retries: 3
    networks:
      - backend

  api:
    build: .
    restart: always
    profiles:
      - full  # Only start with: docker compose --profile full up -d
    ports:
      - "8001:8000"  # Use 8001 to avoid conflict with local dev server
    env_file: .env
    environment:
      DATABASE_URL: postgresql://orion_user:secure_password@db:5432/orion_db
      JWT_SECRET_KEY: ${JWT_SECRET_KEY:-your-super-secret-key}
      REDIS_URL: redis://redis:6379/0
      USE_CELERY: "true"
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy
    volumes:
      - ./logs:/app/logs
      - ./uploads:/app/uploads
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
    networks:
      - frontend
      - backend
      - monitoring

  # Celery worker for processing background tasks
  celery-worker:
    build: .
    restart: always
    profiles:
      - full  # Only start with: docker compose --profile full up -d
    command: celery -A app.core.celery_config worker --loglevel=info -Q default,long_running,scheduled
    env_file: .env
    environment:
      DATABASE_URL: postgresql://orion_user:secure_password@db:5432/orion_db
      REDIS_URL: redis://redis:6379/0
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy
    volumes:
      - ./logs:/app/logs
      - ./exports:/app/exports
    healthcheck:
      test: ["CMD-SHELL", "celery -A app.core.celery_config inspect ping --timeout 10 || exit 1"]
      interval: 30s
      timeout: 15s
      retries: 3
    networks:
      - backend

  # Celery beat for scheduled tasks
  celery-beat:
    build: .
    restart: always
    profiles:
      - full  # Only start with: docker compose --profile full up -d
    command: celery -A app.core.celery_config beat --loglevel=info
    environment:
      REDIS_URL: redis://redis:6379/0
    depends_on:
      redis:
        condition: service_healthy
    healthcheck:
      disable: true
    networks:
      - backend

  # Flower monitoring dashboard
  flower:
    build: .
    restart: always
    profiles:
      - full  # Only start with: docker compose --profile full up -d
    command: celery -A app.core.celery_config flower --port=5555
    ports:
      - "5555:5555"
    environment:
      REDIS_URL: redis://redis:6379/0
      FLOWER_BASIC_AUTH: ${FLOWER_USER:-admin}:${FLOWER_PASSWORD:-changeme}
    depends_on:
      redis:
        condition: service_healthy
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:5555/ || exit 1"]
      interval: 30s
      timeout: 10s
      retries: 3
    networks:
      - backend

  # =========================================================================
  # MONITORING STACK
  # =========================================================================

  prometheus:
    image: prom/prometheus:latest
    restart: always
    profiles:
      - full
    ports:
      - "127.0.0.1:9090:9090"
    volumes:
      - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
      - ./monitoring/prometheus/alert.rules.yml:/etc/prometheus/alert.rules.yml:ro
      - prometheus_data:/prometheus
    command:
      - "--config.file=/etc/prometheus/prometheus.yml"
      - "--storage.tsdb.retention.time=15d"
      - "--storage.tsdb.retention.size=2GB"
      - "--web.enable-lifecycle"
    mem_limit: 256m
    healthcheck:
      test: ["CMD-SHELL", "wget -qO- http://localhost:9090/-/healthy || exit 1"]
      interval: 30s
      timeout: 10s
      retries: 3
    networks:
      - monitoring

  grafana:
    image: grafana/grafana:latest
    restart: always
    profiles:
      - full
    ports:
      - "127.0.0.1:3001:3000"
    environment:
      GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin}
      GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-changeme}
      GF_SERVER_ROOT_URL: ${GRAFANA_URL:-https://grafana.wizard.lu}
    volumes:
      - grafana_data:/var/lib/grafana
      - ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro
      - ./monitoring/grafana/provisioning/dashboards/json:/var/lib/grafana/dashboards:ro
    mem_limit: 192m
    healthcheck:
      test: ["CMD-SHELL", "wget -qO- http://localhost:3000/api/health || exit 1"]
      interval: 30s
      timeout: 10s
      retries: 3
    networks:
      - monitoring

  node-exporter:
    image: prom/node-exporter:latest
    restart: always
    profiles:
      - full
    ports:
      - "127.0.0.1:9100:9100"
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
    command:
      - "--path.procfs=/host/proc"
      - "--path.sysfs=/host/sys"
      - "--path.rootfs=/rootfs"
      - "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
    mem_limit: 64m
    networks:
      - monitoring

  cadvisor:
    image: gcr.io/cadvisor/cadvisor:latest
    restart: always
    profiles:
      - full
    ports:
      - "127.0.0.1:8080:8080"
    volumes:
      - /:/rootfs:ro
      - /var/run:/var/run:ro
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
      - /dev/disk/:/dev/disk:ro
    privileged: true
    devices:
      - /dev/kmsg
    mem_limit: 128m
    networks:
      - monitoring

  alertmanager:
    image: prom/alertmanager:latest
    restart: always
    profiles:
      - full
    ports:
      - "127.0.0.1:9093:9093"
    volumes:
      - ./monitoring/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
    command:
      - "--config.file=/etc/alertmanager/alertmanager.yml"
      - "--storage.path=/alertmanager"
    mem_limit: 32m
    healthcheck:
      test: ["CMD-SHELL", "wget -qO- http://localhost:9093/-/healthy || exit 1"]
      interval: 30s
      timeout: 10s
      retries: 3
    networks:
      - monitoring

# =========================================================================
# NETWORKS
# =========================================================================
networks:
  frontend:
    name: orion_frontend
  backend:
    name: orion_backend
  monitoring:
    name: orion_monitoring

volumes:
  postgres_data:
    name: orion_postgres_data
  prometheus_data:
    name: orion_prometheus_data
  grafana_data:
    name: orion_grafana_data