Some checks failed
Celery worker was OOM-killed (41 restarts) at 512MB with 4 concurrent workers. Reduce concurrency to 2, increase worker limit to 768MB, and reclaim memory from over-provisioned services (db 512→256, beat 256→128, flower 256→192). Total allocation stays within 4GB server budget. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
294 lines
7.5 KiB
YAML
294 lines
7.5 KiB
YAML
# docker-compose.yml
|
|
services:
|
|
db:
|
|
image: postgres:15
|
|
restart: always
|
|
environment:
|
|
POSTGRES_DB: orion_db
|
|
POSTGRES_USER: orion_user
|
|
POSTGRES_PASSWORD: secure_password
|
|
volumes:
|
|
- postgres_data:/var/lib/postgresql/data
|
|
ports:
|
|
- "5432:5432"
|
|
mem_limit: 256m
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U orion_user -d orion_db"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
networks:
|
|
- backend
|
|
|
|
redis:
|
|
image: redis:7-alpine
|
|
restart: always
|
|
command: redis-server --maxmemory 100mb --maxmemory-policy allkeys-lru
|
|
ports:
|
|
- "6380:6379" # Use 6380 to avoid conflict with host Redis
|
|
mem_limit: 128m
|
|
healthcheck:
|
|
test: ["CMD", "redis-cli", "ping"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
networks:
|
|
- backend
|
|
|
|
api:
|
|
build: .
|
|
restart: always
|
|
profiles:
|
|
- full # Only start with: docker compose --profile full up -d
|
|
ports:
|
|
- "8001:8000" # Use 8001 to avoid conflict with local dev server
|
|
env_file: .env
|
|
environment:
|
|
DATABASE_URL: postgresql://orion_user:secure_password@db:5432/orion_db
|
|
JWT_SECRET_KEY: ${JWT_SECRET_KEY:-your-super-secret-key}
|
|
REDIS_URL: redis://redis:6379/0
|
|
USE_CELERY: "true"
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
redis:
|
|
condition: service_healthy
|
|
volumes:
|
|
- ./logs:/app/logs
|
|
- ./uploads:/app/uploads
|
|
mem_limit: 512m
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
networks:
|
|
- frontend
|
|
- backend
|
|
- monitoring
|
|
|
|
# Celery worker for processing background tasks
|
|
celery-worker:
|
|
build: .
|
|
restart: always
|
|
profiles:
|
|
- full # Only start with: docker compose --profile full up -d
|
|
command: celery -A app.core.celery_config worker --loglevel=info -Q default,long_running,scheduled
|
|
env_file: .env
|
|
environment:
|
|
DATABASE_URL: postgresql://orion_user:secure_password@db:5432/orion_db
|
|
REDIS_URL: redis://redis:6379/0
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
redis:
|
|
condition: service_healthy
|
|
volumes:
|
|
- ./logs:/app/logs
|
|
- ./exports:/app/exports
|
|
mem_limit: 768m
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "celery -A app.core.celery_config inspect ping --timeout 10 || exit 1"]
|
|
interval: 30s
|
|
timeout: 15s
|
|
retries: 3
|
|
networks:
|
|
- backend
|
|
|
|
# Celery beat for scheduled tasks
|
|
celery-beat:
|
|
build: .
|
|
restart: always
|
|
profiles:
|
|
- full # Only start with: docker compose --profile full up -d
|
|
command: celery -A app.core.celery_config beat --loglevel=info
|
|
environment:
|
|
REDIS_URL: redis://redis:6379/0
|
|
depends_on:
|
|
redis:
|
|
condition: service_healthy
|
|
mem_limit: 128m
|
|
healthcheck:
|
|
disable: true
|
|
networks:
|
|
- backend
|
|
|
|
# Flower monitoring dashboard
|
|
flower:
|
|
build: .
|
|
restart: always
|
|
profiles:
|
|
- full # Only start with: docker compose --profile full up -d
|
|
command: celery -A app.core.celery_config flower --port=5555
|
|
ports:
|
|
- "127.0.0.1:5555:5555"
|
|
environment:
|
|
REDIS_URL: redis://redis:6379/0
|
|
FLOWER_BASIC_AUTH: ${FLOWER_USER:-admin}:${FLOWER_PASSWORD:-changeme}
|
|
depends_on:
|
|
redis:
|
|
condition: service_healthy
|
|
mem_limit: 192m
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -f http://localhost:5555/ || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
networks:
|
|
- backend
|
|
|
|
# =========================================================================
|
|
# MONITORING STACK
|
|
# =========================================================================
|
|
|
|
prometheus:
|
|
image: prom/prometheus:latest
|
|
restart: always
|
|
profiles:
|
|
- full
|
|
ports:
|
|
- "127.0.0.1:9090:9090"
|
|
volumes:
|
|
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
|
- ./monitoring/prometheus/alert.rules.yml:/etc/prometheus/alert.rules.yml:ro
|
|
- prometheus_data:/prometheus
|
|
command:
|
|
- "--config.file=/etc/prometheus/prometheus.yml"
|
|
- "--storage.tsdb.retention.time=15d"
|
|
- "--storage.tsdb.retention.size=2GB"
|
|
- "--web.enable-lifecycle"
|
|
mem_limit: 256m
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -qO- http://localhost:9090/-/healthy || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
networks:
|
|
- monitoring
|
|
|
|
grafana:
|
|
image: grafana/grafana:latest
|
|
restart: always
|
|
profiles:
|
|
- full
|
|
ports:
|
|
- "127.0.0.1:3001:3000"
|
|
environment:
|
|
GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin}
|
|
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-changeme}
|
|
GF_SERVER_ROOT_URL: ${GRAFANA_URL:-https://grafana.wizard.lu}
|
|
volumes:
|
|
- grafana_data:/var/lib/grafana
|
|
- ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro
|
|
- ./monitoring/grafana/provisioning/dashboards/json:/var/lib/grafana/dashboards:ro
|
|
mem_limit: 192m
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -qO- http://localhost:3000/api/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
networks:
|
|
- monitoring
|
|
|
|
node-exporter:
|
|
image: prom/node-exporter:latest
|
|
restart: always
|
|
profiles:
|
|
- full
|
|
ports:
|
|
- "127.0.0.1:9100:9100"
|
|
volumes:
|
|
- /proc:/host/proc:ro
|
|
- /sys:/host/sys:ro
|
|
- /:/rootfs:ro
|
|
command:
|
|
- "--path.procfs=/host/proc"
|
|
- "--path.sysfs=/host/sys"
|
|
- "--path.rootfs=/rootfs"
|
|
- "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
|
|
mem_limit: 64m
|
|
networks:
|
|
- monitoring
|
|
|
|
cadvisor:
|
|
image: gcr.io/cadvisor/cadvisor:latest
|
|
restart: always
|
|
profiles:
|
|
- full
|
|
ports:
|
|
- "127.0.0.1:8080:8080"
|
|
volumes:
|
|
- /:/rootfs:ro
|
|
- /var/run:/var/run:ro
|
|
- /sys:/sys:ro
|
|
- /var/lib/docker/:/var/lib/docker:ro
|
|
- /dev/disk/:/dev/disk:ro
|
|
privileged: true
|
|
devices:
|
|
- /dev/kmsg
|
|
mem_limit: 128m
|
|
networks:
|
|
- monitoring
|
|
|
|
redis-exporter:
|
|
image: oliver006/redis_exporter:latest
|
|
restart: always
|
|
profiles:
|
|
- full
|
|
ports:
|
|
- "127.0.0.1:9121:9121"
|
|
environment:
|
|
REDIS_ADDR: redis://redis:6379
|
|
depends_on:
|
|
redis:
|
|
condition: service_healthy
|
|
mem_limit: 32m
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -qO- http://localhost:9121/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
networks:
|
|
- backend
|
|
- monitoring
|
|
|
|
alertmanager:
|
|
image: prom/alertmanager:latest
|
|
restart: always
|
|
profiles:
|
|
- full
|
|
ports:
|
|
- "127.0.0.1:9093:9093"
|
|
volumes:
|
|
- ./monitoring/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
|
|
command:
|
|
- "--config.file=/etc/alertmanager/alertmanager.yml"
|
|
- "--storage.path=/alertmanager"
|
|
mem_limit: 32m
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -qO- http://localhost:9093/-/healthy || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
networks:
|
|
- monitoring
|
|
|
|
# =========================================================================
|
|
# NETWORKS
|
|
# =========================================================================
|
|
networks:
|
|
frontend:
|
|
name: orion_frontend
|
|
backend:
|
|
name: orion_backend
|
|
monitoring:
|
|
name: orion_monitoring
|
|
|
|
volumes:
|
|
postgres_data:
|
|
name: orion_postgres_data
|
|
prometheus_data:
|
|
name: orion_prometheus_data
|
|
grafana_data:
|
|
name: orion_grafana_data
|