fix(ops): harden deploy/restore/verify scripts
Some checks failed
CI / validate (push) Has been cancelled
CI / dependency-scanning (push) Has been cancelled
CI / docs (push) Has been cancelled
CI / deploy (push) Has been cancelled
CI / ruff (push) Successful in 9s
CI / pytest (push) Has been cancelled

- deploy.sh: add DB health wait before migrations, prune old Docker images
- restore.sh: add redis-exporter to stop list, replace sleep with DB health wait
- verify-server.sh: add redis-exporter to expected containers, add Sentry + Redis exporter checks

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-28 00:23:14 +01:00
parent 35d1559162
commit 93a2d9baff
3 changed files with 85 additions and 15 deletions

View File

@@ -44,24 +44,46 @@ if ! $COMPOSE up -d --build; then
exit 2 exit 2
fi fi
# ── 3. Run database migrations ─────────────────────────────────────────────── # ── 3. Wait for DB to be healthy before running migrations ──────────────────
log "Waiting for database to be healthy …"
for i in $(seq 1 12); do
if $COMPOSE exec -T db pg_isready -U orion_user -d orion_db > /dev/null 2>&1; then
log "Database is ready (attempt $i/12)"
break
fi
if [ "$i" -eq 12 ]; then
log "ERROR: database not ready after 60s"
exit 3
fi
sleep 5
done
# ── 4. Run database migrations ───────────────────────────────────────────────
log "Running database migrations …" log "Running database migrations …"
if ! $COMPOSE exec -T -e PYTHONPATH=/app api python -m alembic upgrade heads; then if ! $COMPOSE exec -T -e PYTHONPATH=/app api python -m alembic upgrade heads; then
log "ERROR: alembic migration failed" log "ERROR: alembic migration failed"
exit 3 exit 3
fi fi
# ── 4. Health check with retries ───────────────────────────────────────────── # ── 5. Health check with retries ─────────────────────────────────────────────
log "Waiting for health check ($HEALTH_URL) …" log "Waiting for health check ($HEALTH_URL) …"
for i in $(seq 1 "$HEALTH_RETRIES"); do for i in $(seq 1 "$HEALTH_RETRIES"); do
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
log "Health check passed (attempt $i/$HEALTH_RETRIES)" log "Health check passed (attempt $i/$HEALTH_RETRIES)"
log "Deploy complete." break
exit 0
fi fi
log "Health check attempt $i/$HEALTH_RETRIES failed, retrying in ${HEALTH_INTERVAL}s …" log "Health check attempt $i/$HEALTH_RETRIES failed, retrying in ${HEALTH_INTERVAL}s …"
sleep "$HEALTH_INTERVAL" sleep "$HEALTH_INTERVAL"
done done
log "ERROR: health check failed after $HEALTH_RETRIES attempts" if ! curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
exit 4 log "ERROR: health check failed after $HEALTH_RETRIES attempts"
exit 4
fi
# ── 6. Clean up old Docker images ───────────────────────────────────────────
log "Pruning unused Docker images …"
docker image prune -f --filter "until=72h" > /dev/null 2>&1 || true
log "Deploy complete."
exit 0

View File

@@ -46,10 +46,10 @@ restore_orion() {
log "=== Restoring Orion database ===" log "=== Restoring Orion database ==="
# Stop app containers (keep DB running) # Stop app containers (keep DB and Redis running)
log "Stopping Orion app containers..." log "Stopping Orion app containers..."
cd "${ORION_APP_DIR}" cd "${ORION_APP_DIR}"
docker compose --profile full stop api celery-worker celery-beat flower 2>/dev/null || true docker compose --profile full stop api celery-worker celery-beat flower redis-exporter 2>/dev/null || true
# Drop and recreate database # Drop and recreate database
log "Dropping and recreating ${db_name}..." log "Dropping and recreating ${db_name}..."
@@ -66,8 +66,19 @@ restore_orion() {
log "Running Alembic migrations..." log "Running Alembic migrations..."
docker compose --profile full start api 2>/dev/null || \ docker compose --profile full start api 2>/dev/null || \
docker compose --profile full up -d api docker compose --profile full up -d api
sleep 5 # Wait for API container to be ready
docker compose --profile full exec -e PYTHONPATH=/app api python -m alembic upgrade heads # Wait for API container to be healthy before running migrations
log "Waiting for API container to be ready..."
for i in $(seq 1 12); do
if docker compose --profile full exec -T db pg_isready -U orion_user -d orion_db > /dev/null 2>&1; then
log "Database is ready (attempt $i/12)"
break
fi
[ "$i" -eq 12 ] && { log "WARNING: database may not be ready, attempting migration anyway"; }
sleep 5
done
docker compose --profile full exec -T -e PYTHONPATH=/app api python -m alembic upgrade heads
# Restart all # Restart all
log "Restarting all services..." log "Restarting all services..."

View File

@@ -226,7 +226,7 @@ if [ "$MODE" = "prod" ]; then
section "3. Docker Containers" section "3. Docker Containers"
# ----------------------------------------------------------------------- # -----------------------------------------------------------------------
EXPECTED_CONTAINERS="db redis api celery-worker celery-beat flower prometheus grafana node-exporter cadvisor alertmanager" EXPECTED_CONTAINERS="db redis api celery-worker celery-beat flower prometheus grafana node-exporter cadvisor alertmanager redis-exporter"
for name in $EXPECTED_CONTAINERS; do for name in $EXPECTED_CONTAINERS; do
container=$(docker compose --profile full -f "$ORION_DIR/docker-compose.yml" ps --format '{{.Name}}' 2>/dev/null | grep "$name" || true) container=$(docker compose --profile full -f "$ORION_DIR/docker-compose.yml" ps --format '{{.Name}}' 2>/dev/null | grep "$name" || true)
if [ -n "$container" ]; then if [ -n "$container" ]; then
@@ -338,7 +338,44 @@ if [ "$MODE" = "prod" ]; then
fi fi
# ----------------------------------------------------------------------- # -----------------------------------------------------------------------
section "9. DNS Resolution" section "9. Sentry"
# -----------------------------------------------------------------------
if [ -f "$ORION_DIR/.env" ]; then
SENTRY_DSN=$(grep -E '^SENTRY_DSN=' "$ORION_DIR/.env" 2>/dev/null | cut -d= -f2- || echo "")
if [ -n "$SENTRY_DSN" ] && [ "$SENTRY_DSN" != "None" ]; then
pass "SENTRY_DSN is configured"
else
warn "SENTRY_DSN not set — error tracking disabled"
fi
SENTRY_ENV=$(grep -E '^SENTRY_ENVIRONMENT=' "$ORION_DIR/.env" 2>/dev/null | cut -d= -f2- || echo "")
if [ "$SENTRY_ENV" = "production" ]; then
pass "SENTRY_ENVIRONMENT is 'production'"
elif [ -n "$SENTRY_ENV" ]; then
warn "SENTRY_ENVIRONMENT is '$SENTRY_ENV' (expected 'production')"
fi
fi
# -----------------------------------------------------------------------
section "10. Redis Exporter"
# -----------------------------------------------------------------------
redis_exporter_status=$(curl -s -o /dev/null -w '%{http_code}' http://localhost:9121/health 2>/dev/null || echo "000")
if [ "$redis_exporter_status" = "200" ]; then
pass "Redis exporter: accessible (HTTP 200)"
redis_up=$(curl -s http://localhost:9121/metrics 2>/dev/null | grep '^redis_up ' | awk '{print $2}' || echo "0")
if [ "$redis_up" = "1" ]; then
pass "Redis exporter: redis_up = 1"
else
fail "Redis exporter: redis_up = $redis_up (Redis unreachable)"
fi
else
fail "Redis exporter: HTTP $redis_exporter_status (expected 200)"
fi
# -----------------------------------------------------------------------
section "11. DNS Resolution"
# ----------------------------------------------------------------------- # -----------------------------------------------------------------------
EXPECTED_DOMAINS="wizard.lu api.wizard.lu git.wizard.lu grafana.wizard.lu flower.wizard.lu omsflow.lu rewardflow.lu" EXPECTED_DOMAINS="wizard.lu api.wizard.lu git.wizard.lu grafana.wizard.lu flower.wizard.lu omsflow.lu rewardflow.lu"
@@ -352,7 +389,7 @@ if [ "$MODE" = "prod" ]; then
done done
# ----------------------------------------------------------------------- # -----------------------------------------------------------------------
section "10. Health Endpoints" section "12. Health Endpoints"
# ----------------------------------------------------------------------- # -----------------------------------------------------------------------
HEALTH_URL="http://localhost:8001/health" HEALTH_URL="http://localhost:8001/health"
@@ -383,7 +420,7 @@ if [ "$MODE" = "prod" ]; then
fi fi
# ----------------------------------------------------------------------- # -----------------------------------------------------------------------
section "11. Prometheus Targets" section "13. Prometheus Targets"
# ----------------------------------------------------------------------- # -----------------------------------------------------------------------
targets=$(curl -s http://localhost:9090/api/v1/targets 2>/dev/null || echo "") targets=$(curl -s http://localhost:9090/api/v1/targets 2>/dev/null || echo "")
@@ -402,7 +439,7 @@ if [ "$MODE" = "prod" ]; then
fi fi
# ----------------------------------------------------------------------- # -----------------------------------------------------------------------
section "12. Grafana" section "14. Grafana"
# ----------------------------------------------------------------------- # -----------------------------------------------------------------------
grafana_status=$(curl -s -o /dev/null -w '%{http_code}' http://localhost:3001/api/health 2>/dev/null || echo "000") grafana_status=$(curl -s -o /dev/null -w '%{http_code}' http://localhost:3001/api/health 2>/dev/null || echo "000")