From f631322b4e798abdb082757c915b66af63283982 Mon Sep 17 00:00:00 2001 From: Samir Boulahtit Date: Sun, 1 Mar 2026 22:15:35 +0100 Subject: [PATCH] fix(ops): rebalance container memory limits to prevent celery OOM kills MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Celery worker was OOM-killed (41 restarts) at 512MB with 4 concurrent workers. Reduce concurrency to 2, increase worker limit to 768MB, and reclaim memory from over-provisioned services (db 512→256, beat 256→128, flower 256→192). Total allocation stays within 4GB server budget. Co-Authored-By: Claude Opus 4.6 --- app/core/celery_config.py | 2 +- docker-compose.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/core/celery_config.py b/app/core/celery_config.py index ceb8064f..53adb750 100644 --- a/app/core/celery_config.py +++ b/app/core/celery_config.py @@ -91,7 +91,7 @@ celery_app.conf.update( task_soft_time_limit=25 * 60, # 25 minutes soft limit # Worker settings worker_prefetch_multiplier=1, # Disable prefetching for long tasks - worker_concurrency=4, # Number of concurrent workers + worker_concurrency=2, # Keep low on 4GB servers to avoid OOM # Result backend result_expires=86400, # Results expire after 24 hours # Retry policy diff --git a/docker-compose.yml b/docker-compose.yml index c2059abf..91198de2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,7 +11,7 @@ services: - postgres_data:/var/lib/postgresql/data ports: - "5432:5432" - mem_limit: 512m + mem_limit: 256m healthcheck: test: ["CMD-SHELL", "pg_isready -U orion_user -d orion_db"] interval: 30s @@ -86,7 +86,7 @@ services: volumes: - ./logs:/app/logs - ./exports:/app/exports - mem_limit: 512m + mem_limit: 768m healthcheck: test: ["CMD-SHELL", "celery -A app.core.celery_config inspect ping --timeout 10 || exit 1"] interval: 30s @@ -107,7 +107,7 @@ services: depends_on: redis: condition: service_healthy - mem_limit: 256m + mem_limit: 128m healthcheck: disable: true networks: @@ -128,7 +128,7 @@ services: depends_on: redis: condition: service_healthy - mem_limit: 256m + mem_limit: 192m healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:5555/ || exit 1"] interval: 30s