Files
orion/docker-compose.yml
Samir Boulahtit e61e02fb39
Some checks failed
CI / ruff (push) Successful in 11s
CI / pytest (push) Failing after 47m48s
CI / validate (push) Successful in 24s
CI / dependency-scanning (push) Successful in 31s
CI / docs (push) Has been skipped
CI / deploy (push) Has been skipped
fix(redis): configure maxmemory and eviction policy to prevent OOM
Redis had no maxmemory set, causing the Prometheus alert expression
(used/max) to evaluate to +Inf. Set maxmemory to 100mb with allkeys-lru
eviction policy, and guard the alert expression against division by zero.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 17:57:38 +01:00

294 lines
7.5 KiB
YAML

# docker-compose.yml
services:
db:
image: postgres:15
restart: always
environment:
POSTGRES_DB: orion_db
POSTGRES_USER: orion_user
POSTGRES_PASSWORD: secure_password
volumes:
- postgres_data:/var/lib/postgresql/data
ports:
- "5432:5432"
mem_limit: 512m
healthcheck:
test: ["CMD-SHELL", "pg_isready -U orion_user -d orion_db"]
interval: 30s
timeout: 10s
retries: 3
networks:
- backend
redis:
image: redis:7-alpine
restart: always
command: redis-server --maxmemory 100mb --maxmemory-policy allkeys-lru
ports:
- "6380:6379" # Use 6380 to avoid conflict with host Redis
mem_limit: 128m
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 30s
timeout: 10s
retries: 3
networks:
- backend
api:
build: .
restart: always
profiles:
- full # Only start with: docker compose --profile full up -d
ports:
- "8001:8000" # Use 8001 to avoid conflict with local dev server
env_file: .env
environment:
DATABASE_URL: postgresql://orion_user:secure_password@db:5432/orion_db
JWT_SECRET_KEY: ${JWT_SECRET_KEY:-your-super-secret-key}
REDIS_URL: redis://redis:6379/0
USE_CELERY: "true"
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
volumes:
- ./logs:/app/logs
- ./uploads:/app/uploads
mem_limit: 512m
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
networks:
- frontend
- backend
- monitoring
# Celery worker for processing background tasks
celery-worker:
build: .
restart: always
profiles:
- full # Only start with: docker compose --profile full up -d
command: celery -A app.core.celery_config worker --loglevel=info -Q default,long_running,scheduled
env_file: .env
environment:
DATABASE_URL: postgresql://orion_user:secure_password@db:5432/orion_db
REDIS_URL: redis://redis:6379/0
depends_on:
db:
condition: service_healthy
redis:
condition: service_healthy
volumes:
- ./logs:/app/logs
- ./exports:/app/exports
mem_limit: 512m
healthcheck:
test: ["CMD-SHELL", "celery -A app.core.celery_config inspect ping --timeout 10 || exit 1"]
interval: 30s
timeout: 15s
retries: 3
networks:
- backend
# Celery beat for scheduled tasks
celery-beat:
build: .
restart: always
profiles:
- full # Only start with: docker compose --profile full up -d
command: celery -A app.core.celery_config beat --loglevel=info
environment:
REDIS_URL: redis://redis:6379/0
depends_on:
redis:
condition: service_healthy
mem_limit: 256m
healthcheck:
disable: true
networks:
- backend
# Flower monitoring dashboard
flower:
build: .
restart: always
profiles:
- full # Only start with: docker compose --profile full up -d
command: celery -A app.core.celery_config flower --port=5555
ports:
- "127.0.0.1:5555:5555"
environment:
REDIS_URL: redis://redis:6379/0
FLOWER_BASIC_AUTH: ${FLOWER_USER:-admin}:${FLOWER_PASSWORD:-changeme}
depends_on:
redis:
condition: service_healthy
mem_limit: 256m
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:5555/ || exit 1"]
interval: 30s
timeout: 10s
retries: 3
networks:
- backend
# =========================================================================
# MONITORING STACK
# =========================================================================
prometheus:
image: prom/prometheus:latest
restart: always
profiles:
- full
ports:
- "127.0.0.1:9090:9090"
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./monitoring/prometheus/alert.rules.yml:/etc/prometheus/alert.rules.yml:ro
- prometheus_data:/prometheus
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.retention.time=15d"
- "--storage.tsdb.retention.size=2GB"
- "--web.enable-lifecycle"
mem_limit: 256m
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://localhost:9090/-/healthy || exit 1"]
interval: 30s
timeout: 10s
retries: 3
networks:
- monitoring
grafana:
image: grafana/grafana:latest
restart: always
profiles:
- full
ports:
- "127.0.0.1:3001:3000"
environment:
GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin}
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-changeme}
GF_SERVER_ROOT_URL: ${GRAFANA_URL:-https://grafana.wizard.lu}
volumes:
- grafana_data:/var/lib/grafana
- ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro
- ./monitoring/grafana/provisioning/dashboards/json:/var/lib/grafana/dashboards:ro
mem_limit: 192m
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://localhost:3000/api/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
networks:
- monitoring
node-exporter:
image: prom/node-exporter:latest
restart: always
profiles:
- full
ports:
- "127.0.0.1:9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- "--path.procfs=/host/proc"
- "--path.sysfs=/host/sys"
- "--path.rootfs=/rootfs"
- "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
mem_limit: 64m
networks:
- monitoring
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
restart: always
profiles:
- full
ports:
- "127.0.0.1:8080:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
privileged: true
devices:
- /dev/kmsg
mem_limit: 128m
networks:
- monitoring
redis-exporter:
image: oliver006/redis_exporter:latest
restart: always
profiles:
- full
ports:
- "127.0.0.1:9121:9121"
environment:
REDIS_ADDR: redis://redis:6379
depends_on:
redis:
condition: service_healthy
mem_limit: 32m
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://localhost:9121/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
networks:
- backend
- monitoring
alertmanager:
image: prom/alertmanager:latest
restart: always
profiles:
- full
ports:
- "127.0.0.1:9093:9093"
volumes:
- ./monitoring/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
command:
- "--config.file=/etc/alertmanager/alertmanager.yml"
- "--storage.path=/alertmanager"
mem_limit: 32m
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://localhost:9093/-/healthy || exit 1"]
interval: 30s
timeout: 10s
retries: 3
networks:
- monitoring
# =========================================================================
# NETWORKS
# =========================================================================
networks:
frontend:
name: orion_frontend
backend:
name: orion_backend
monitoring:
name: orion_monitoring
volumes:
postgres_data:
name: orion_postgres_data
prometheus_data:
name: orion_prometheus_data
grafana_data:
name: orion_grafana_data