# docker-compose.yml services: db: image: postgres:15 restart: always environment: POSTGRES_DB: orion_db POSTGRES_USER: orion_user POSTGRES_PASSWORD: secure_password volumes: - postgres_data:/var/lib/postgresql/data mem_limit: 256m healthcheck: test: ["CMD-SHELL", "pg_isready -U orion_user -d orion_db"] interval: 30s timeout: 10s retries: 3 networks: - backend redis: image: redis:7-alpine restart: always command: redis-server --maxmemory 100mb --maxmemory-policy allkeys-lru --requirepass ${REDIS_PASSWORD:-changeme} mem_limit: 128m healthcheck: test: ["CMD", "redis-cli", "ping"] interval: 30s timeout: 10s retries: 3 networks: - backend api: build: . restart: always profiles: - full # Only start with: docker compose --profile full up -d ports: - "127.0.0.1:8001:8000" # Localhost only — Caddy reverse proxies to this env_file: .env environment: DATABASE_URL: postgresql://orion_user:secure_password@db:5432/orion_db JWT_SECRET_KEY: ${JWT_SECRET_KEY:-your-super-secret-key} REDIS_URL: redis://:${REDIS_PASSWORD:-changeme}@redis:6379/0 USE_CELERY: "true" depends_on: db: condition: service_healthy redis: condition: service_healthy volumes: - ./logs:/app/logs - ./uploads:/app/uploads - ./.build-info:/app/.build-info:ro mem_limit: 512m healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s timeout: 10s retries: 3 networks: - frontend - backend - monitoring # Celery worker for processing background tasks celery-worker: build: . restart: always profiles: - full # Only start with: docker compose --profile full up -d command: celery -A app.core.celery_config worker --loglevel=info -Q default,long_running,scheduled env_file: .env environment: DATABASE_URL: postgresql://orion_user:secure_password@db:5432/orion_db REDIS_URL: redis://:${REDIS_PASSWORD:-changeme}@redis:6379/0 depends_on: db: condition: service_healthy redis: condition: service_healthy volumes: - ./logs:/app/logs - ./exports:/app/exports mem_limit: 768m healthcheck: test: ["CMD-SHELL", "celery -A app.core.celery_config inspect ping --timeout 10 || exit 1"] interval: 30s timeout: 15s retries: 3 networks: - backend # Celery beat for scheduled tasks celery-beat: build: . restart: always profiles: - full # Only start with: docker compose --profile full up -d command: celery -A app.core.celery_config beat --loglevel=info --schedule=/tmp/celerybeat-schedule env_file: .env environment: DATABASE_URL: postgresql://orion_user:secure_password@db:5432/orion_db REDIS_URL: redis://:${REDIS_PASSWORD:-changeme}@redis:6379/0 depends_on: db: condition: service_healthy redis: condition: service_healthy mem_limit: 256m healthcheck: test: ["CMD-SHELL", "test $(( $(date +%s) - $(stat -c %Y /tmp/celerybeat-schedule 2>/dev/null || echo 0) )) -lt 120 || exit 1"] interval: 30s timeout: 10s retries: 3 start_period: 30s networks: - backend # Flower monitoring dashboard flower: build: . restart: always profiles: - full # Only start with: docker compose --profile full up -d command: celery -A app.core.celery_config flower --port=5555 ports: - "127.0.0.1:5555:5555" environment: REDIS_URL: redis://:${REDIS_PASSWORD:-changeme}@redis:6379/0 FLOWER_BASIC_AUTH: ${FLOWER_USER:-admin}:${FLOWER_PASSWORD:-changeme} depends_on: redis: condition: service_healthy mem_limit: 192m healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5555/healthcheck || exit 1"] interval: 30s timeout: 10s retries: 3 networks: - backend # ========================================================================= # MONITORING STACK # ========================================================================= prometheus: image: prom/prometheus:latest restart: always profiles: - full ports: - "127.0.0.1:9090:9090" volumes: - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro - ./monitoring/prometheus/alert.rules.yml:/etc/prometheus/alert.rules.yml:ro - prometheus_data:/prometheus command: - "--config.file=/etc/prometheus/prometheus.yml" - "--storage.tsdb.retention.time=15d" - "--storage.tsdb.retention.size=2GB" - "--web.enable-lifecycle" mem_limit: 256m healthcheck: test: ["CMD-SHELL", "wget -qO- http://localhost:9090/-/healthy || exit 1"] interval: 30s timeout: 10s retries: 3 networks: - monitoring grafana: image: grafana/grafana:latest restart: always profiles: - full ports: - "127.0.0.1:3001:3000" environment: GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin} GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-changeme} GF_SERVER_ROOT_URL: ${GRAFANA_URL:-https://grafana.wizard.lu} volumes: - grafana_data:/var/lib/grafana - ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro - ./monitoring/grafana/provisioning/dashboards/json:/var/lib/grafana/dashboards:ro mem_limit: 192m healthcheck: test: ["CMD-SHELL", "wget -qO- http://localhost:3000/api/health || exit 1"] interval: 30s timeout: 10s retries: 3 networks: - monitoring node-exporter: image: prom/node-exporter:latest restart: always profiles: - full ports: - "127.0.0.1:9100:9100" volumes: - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro command: - "--path.procfs=/host/proc" - "--path.sysfs=/host/sys" - "--path.rootfs=/rootfs" - "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)" mem_limit: 32m healthcheck: test: ["CMD-SHELL", "wget -qO- http://localhost:9100/metrics | grep -q 'node_exporter_build_info' || exit 1"] interval: 30s timeout: 10s retries: 3 networks: - monitoring cadvisor: image: gcr.io/cadvisor/cadvisor:latest restart: always profiles: - full ports: - "127.0.0.1:8080:8080" volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro - /dev/disk/:/dev/disk:ro privileged: true devices: - /dev/kmsg mem_limit: 192m networks: - monitoring redis-exporter: image: oliver006/redis_exporter:alpine restart: always profiles: - full ports: - "127.0.0.1:9121:9121" environment: REDIS_ADDR: redis://:${REDIS_PASSWORD:-changeme}@redis:6379 depends_on: redis: condition: service_healthy mem_limit: 32m healthcheck: test: ["CMD-SHELL", "wget -qO- http://localhost:9121/metrics | grep -q 'redis_up 1' || exit 1"] interval: 30s timeout: 10s retries: 3 networks: - backend - monitoring alertmanager: image: prom/alertmanager:latest restart: always profiles: - full ports: - "127.0.0.1:9093:9093" volumes: - ./monitoring/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro command: - "--config.file=/etc/alertmanager/alertmanager.yml" - "--storage.path=/alertmanager" mem_limit: 32m healthcheck: test: ["CMD-SHELL", "wget -qO- http://localhost:9093/-/healthy || exit 1"] interval: 30s timeout: 10s retries: 3 networks: - monitoring # ========================================================================= # NETWORKS # ========================================================================= networks: frontend: name: orion_frontend backend: name: orion_backend monitoring: name: orion_monitoring volumes: postgres_data: name: orion_postgres_data prometheus_data: name: orion_prometheus_data grafana_data: name: orion_grafana_data