diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md index 0f36095c..16b60dc7 100644 --- a/docs/deployment/docker.md +++ b/docs/deployment/docker.md @@ -94,11 +94,10 @@ services: celery: build: . restart: always - command: celery -A app.celery worker --loglevel=info --concurrency=4 + command: celery -A app.core.celery_config worker --loglevel=info -Q default,long_running,scheduled environment: DATABASE_URL: postgresql://orion_user:${DB_PASSWORD}@db:5432/orion_db REDIS_URL: redis://redis:6379/0 - CELERY_BROKER_URL: redis://redis:6379/1 env_file: - .env depends_on: @@ -109,7 +108,7 @@ services: deploy: resources: limits: - memory: 512M + memory: 768M celery-beat: build: . @@ -148,7 +147,7 @@ services: redis: image: redis:7-alpine restart: always - command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru + command: redis-server --maxmemory 100mb --maxmemory-policy allkeys-lru volumes: - redis_data:/data healthcheck: diff --git a/docs/deployment/hetzner-server-setup.md b/docs/deployment/hetzner-server-setup.md index 12a2edea..c980639d 100644 --- a/docs/deployment/hetzner-server-setup.md +++ b/docs/deployment/hetzner-server-setup.md @@ -161,7 +161,7 @@ Complete step-by-step guide for deploying Orion on a Hetzner Cloud VPS. !!! success "Progress — 2026-02-17" **Launch readiness — fully deployed and verified (44/44 checks pass):** - - **Memory limits** on all 6 app containers (db: 512m, redis: 128m, api: 512m, celery-worker: 512m, celery-beat: 256m, flower: 256m) — beat/flower bumped from 128m after OOM kills + - **Memory limits** on all 6 app containers (db: 256m, redis: 128m, api: 512m, celery-worker: 768m, celery-beat: 128m, flower: 192m) — rebalanced after celery-worker OOM kills (concurrency reduced from 4 to 2) - **Flower port** restricted to localhost only (`127.0.0.1:5555:5555`) — access via Caddy reverse proxy - **Flower password** changed from default - **Infrastructure health checks** — `/health/ready` now checks PostgreSQL (`SELECT 1`) and Redis (`ping`) with individual check details and latency diff --git a/docs/deployment/infrastructure.md b/docs/deployment/infrastructure.md index 2c8cd0f3..a04ace7b 100644 --- a/docs/deployment/infrastructure.md +++ b/docs/deployment/infrastructure.md @@ -409,7 +409,7 @@ services: celery: build: . restart: always - command: celery -A app.celery worker --loglevel=info --concurrency=4 + command: celery -A app.core.celery_config worker --loglevel=info --concurrency=2 environment: DATABASE_URL: postgresql://orion_user:${DB_PASSWORD}@db:5432/orion_db REDIS_URL: redis://redis:6379/0 @@ -676,7 +676,7 @@ find $BACKUP_DIR -name "*.sql.gz" -mtime +7 -delete **Configuration (`redis.conf`):** ```ini -maxmemory 256mb +maxmemory 100mb maxmemory-policy allkeys-lru appendonly yes appendfsync everysec diff --git a/docs/deployment/production.md b/docs/deployment/production.md index 1ae0d09f..d13bf53b 100644 --- a/docs/deployment/production.md +++ b/docs/deployment/production.md @@ -98,7 +98,7 @@ Group=orion WorkingDirectory=/home/orion/app Environment="PATH=/home/orion/app/.venv/bin" EnvironmentFile=/home/orion/app/.env -ExecStart=/home/orion/app/.venv/bin/celery -A app.core.celery_config worker --loglevel=info -Q default,long_running,scheduled --concurrency=4 +ExecStart=/home/orion/app/.venv/bin/celery -A app.core.celery_config worker --loglevel=info -Q default,long_running,scheduled --concurrency=2 Restart=always RestartSec=3 StandardOutput=journal diff --git a/docs/deployment/scaling-guide.md b/docs/deployment/scaling-guide.md index 43e2fd2c..66271634 100644 --- a/docs/deployment/scaling-guide.md +++ b/docs/deployment/scaling-guide.md @@ -18,21 +18,22 @@ Practical playbook for scaling Orion from a single CAX11 server to a multi-serve | Container | Limit | Purpose | |-----------|-------|---------| -| db | 512 MB | PostgreSQL 15 | -| redis | 128 MB | Task broker + cache | +| db | 256 MB | PostgreSQL 15 | +| redis | 128 MB | Task broker (maxmemory 100mb, allkeys-lru) | | api | 512 MB | FastAPI (Uvicorn) | -| celery-worker | 512 MB | Background tasks | -| celery-beat | 256 MB | Task scheduler | -| flower | 256 MB | Celery monitoring | -| **App subtotal** | **2,176 MB** | | +| celery-worker | 768 MB | Background tasks (concurrency=2) | +| celery-beat | 128 MB | Task scheduler | +| flower | 192 MB | Celery monitoring | +| **App subtotal** | **1,984 MB** | | | prometheus | 256 MB | Metrics (15-day retention) | | grafana | 192 MB | Dashboards | | node-exporter | 64 MB | Host metrics | | cadvisor | 128 MB | Container metrics | +| redis-exporter | 32 MB | Redis metrics | | alertmanager | 32 MB | Alert routing | -| **Monitoring subtotal** | **672 MB** | | -| **Total containers** | **2,848 MB** | | -| OS + Caddy + Gitea + CI | ~1,150 MB | Remaining headroom | +| **Monitoring subtotal** | **704 MB** | | +| **Total containers** | **2,688 MB** | | +| OS + Caddy + Gitea + CI | ~1,300 MB | Remaining headroom | --- @@ -164,14 +165,15 @@ Update `DATABASE_URL` in API and Celery to point to PgBouncer instead of `db` di ### 3. Redis Hardening -Set a `maxmemory` policy to prevent OOM: +Redis `maxmemory` is already configured in `docker-compose.yml`: ```yaml -# In docker-compose.yml, add command to redis service redis: command: redis-server --maxmemory 100mb --maxmemory-policy allkeys-lru ``` +If Redis usage grows beyond 80%, the `RedisHighMemoryUsage` alert fires. Increase `maxmemory` and `mem_limit` together. + ### 4. Separate Database Server When the database needs its own resources (typically >50 stores):