fix(ops): harden deploy/restore/verify scripts
Some checks failed
Some checks failed
- deploy.sh: add DB health wait before migrations, prune old Docker images - restore.sh: add redis-exporter to stop list, replace sleep with DB health wait - verify-server.sh: add redis-exporter to expected containers, add Sentry + Redis exporter checks Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -44,24 +44,46 @@ if ! $COMPOSE up -d --build; then
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# ── 3. Run database migrations ───────────────────────────────────────────────
|
||||
# ── 3. Wait for DB to be healthy before running migrations ──────────────────
|
||||
log "Waiting for database to be healthy …"
|
||||
for i in $(seq 1 12); do
|
||||
if $COMPOSE exec -T db pg_isready -U orion_user -d orion_db > /dev/null 2>&1; then
|
||||
log "Database is ready (attempt $i/12)"
|
||||
break
|
||||
fi
|
||||
if [ "$i" -eq 12 ]; then
|
||||
log "ERROR: database not ready after 60s"
|
||||
exit 3
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
|
||||
# ── 4. Run database migrations ───────────────────────────────────────────────
|
||||
log "Running database migrations …"
|
||||
if ! $COMPOSE exec -T -e PYTHONPATH=/app api python -m alembic upgrade heads; then
|
||||
log "ERROR: alembic migration failed"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# ── 4. Health check with retries ─────────────────────────────────────────────
|
||||
# ── 5. Health check with retries ─────────────────────────────────────────────
|
||||
log "Waiting for health check ($HEALTH_URL) …"
|
||||
for i in $(seq 1 "$HEALTH_RETRIES"); do
|
||||
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
|
||||
log "Health check passed (attempt $i/$HEALTH_RETRIES)"
|
||||
log "Deploy complete."
|
||||
exit 0
|
||||
break
|
||||
fi
|
||||
log "Health check attempt $i/$HEALTH_RETRIES failed, retrying in ${HEALTH_INTERVAL}s …"
|
||||
sleep "$HEALTH_INTERVAL"
|
||||
done
|
||||
|
||||
if ! curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
|
||||
log "ERROR: health check failed after $HEALTH_RETRIES attempts"
|
||||
exit 4
|
||||
fi
|
||||
|
||||
# ── 6. Clean up old Docker images ───────────────────────────────────────────
|
||||
log "Pruning unused Docker images …"
|
||||
docker image prune -f --filter "until=72h" > /dev/null 2>&1 || true
|
||||
|
||||
log "Deploy complete."
|
||||
exit 0
|
||||
|
||||
@@ -46,10 +46,10 @@ restore_orion() {
|
||||
|
||||
log "=== Restoring Orion database ==="
|
||||
|
||||
# Stop app containers (keep DB running)
|
||||
# Stop app containers (keep DB and Redis running)
|
||||
log "Stopping Orion app containers..."
|
||||
cd "${ORION_APP_DIR}"
|
||||
docker compose --profile full stop api celery-worker celery-beat flower 2>/dev/null || true
|
||||
docker compose --profile full stop api celery-worker celery-beat flower redis-exporter 2>/dev/null || true
|
||||
|
||||
# Drop and recreate database
|
||||
log "Dropping and recreating ${db_name}..."
|
||||
@@ -66,8 +66,19 @@ restore_orion() {
|
||||
log "Running Alembic migrations..."
|
||||
docker compose --profile full start api 2>/dev/null || \
|
||||
docker compose --profile full up -d api
|
||||
sleep 5 # Wait for API container to be ready
|
||||
docker compose --profile full exec -e PYTHONPATH=/app api python -m alembic upgrade heads
|
||||
|
||||
# Wait for API container to be healthy before running migrations
|
||||
log "Waiting for API container to be ready..."
|
||||
for i in $(seq 1 12); do
|
||||
if docker compose --profile full exec -T db pg_isready -U orion_user -d orion_db > /dev/null 2>&1; then
|
||||
log "Database is ready (attempt $i/12)"
|
||||
break
|
||||
fi
|
||||
[ "$i" -eq 12 ] && { log "WARNING: database may not be ready, attempting migration anyway"; }
|
||||
sleep 5
|
||||
done
|
||||
|
||||
docker compose --profile full exec -T -e PYTHONPATH=/app api python -m alembic upgrade heads
|
||||
|
||||
# Restart all
|
||||
log "Restarting all services..."
|
||||
|
||||
@@ -226,7 +226,7 @@ if [ "$MODE" = "prod" ]; then
|
||||
section "3. Docker Containers"
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
EXPECTED_CONTAINERS="db redis api celery-worker celery-beat flower prometheus grafana node-exporter cadvisor alertmanager"
|
||||
EXPECTED_CONTAINERS="db redis api celery-worker celery-beat flower prometheus grafana node-exporter cadvisor alertmanager redis-exporter"
|
||||
for name in $EXPECTED_CONTAINERS; do
|
||||
container=$(docker compose --profile full -f "$ORION_DIR/docker-compose.yml" ps --format '{{.Name}}' 2>/dev/null | grep "$name" || true)
|
||||
if [ -n "$container" ]; then
|
||||
@@ -338,7 +338,44 @@ if [ "$MODE" = "prod" ]; then
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
section "9. DNS Resolution"
|
||||
section "9. Sentry"
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
if [ -f "$ORION_DIR/.env" ]; then
|
||||
SENTRY_DSN=$(grep -E '^SENTRY_DSN=' "$ORION_DIR/.env" 2>/dev/null | cut -d= -f2- || echo "")
|
||||
if [ -n "$SENTRY_DSN" ] && [ "$SENTRY_DSN" != "None" ]; then
|
||||
pass "SENTRY_DSN is configured"
|
||||
else
|
||||
warn "SENTRY_DSN not set — error tracking disabled"
|
||||
fi
|
||||
|
||||
SENTRY_ENV=$(grep -E '^SENTRY_ENVIRONMENT=' "$ORION_DIR/.env" 2>/dev/null | cut -d= -f2- || echo "")
|
||||
if [ "$SENTRY_ENV" = "production" ]; then
|
||||
pass "SENTRY_ENVIRONMENT is 'production'"
|
||||
elif [ -n "$SENTRY_ENV" ]; then
|
||||
warn "SENTRY_ENVIRONMENT is '$SENTRY_ENV' (expected 'production')"
|
||||
fi
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
section "10. Redis Exporter"
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
redis_exporter_status=$(curl -s -o /dev/null -w '%{http_code}' http://localhost:9121/health 2>/dev/null || echo "000")
|
||||
if [ "$redis_exporter_status" = "200" ]; then
|
||||
pass "Redis exporter: accessible (HTTP 200)"
|
||||
redis_up=$(curl -s http://localhost:9121/metrics 2>/dev/null | grep '^redis_up ' | awk '{print $2}' || echo "0")
|
||||
if [ "$redis_up" = "1" ]; then
|
||||
pass "Redis exporter: redis_up = 1"
|
||||
else
|
||||
fail "Redis exporter: redis_up = $redis_up (Redis unreachable)"
|
||||
fi
|
||||
else
|
||||
fail "Redis exporter: HTTP $redis_exporter_status (expected 200)"
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
section "11. DNS Resolution"
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
EXPECTED_DOMAINS="wizard.lu api.wizard.lu git.wizard.lu grafana.wizard.lu flower.wizard.lu omsflow.lu rewardflow.lu"
|
||||
@@ -352,7 +389,7 @@ if [ "$MODE" = "prod" ]; then
|
||||
done
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
section "10. Health Endpoints"
|
||||
section "12. Health Endpoints"
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
HEALTH_URL="http://localhost:8001/health"
|
||||
@@ -383,7 +420,7 @@ if [ "$MODE" = "prod" ]; then
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
section "11. Prometheus Targets"
|
||||
section "13. Prometheus Targets"
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
targets=$(curl -s http://localhost:9090/api/v1/targets 2>/dev/null || echo "")
|
||||
@@ -402,7 +439,7 @@ if [ "$MODE" = "prod" ]; then
|
||||
fi
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
section "12. Grafana"
|
||||
section "14. Grafana"
|
||||
# -----------------------------------------------------------------------
|
||||
|
||||
grafana_status=$(curl -s -o /dev/null -w '%{http_code}' http://localhost:3001/api/health 2>/dev/null || echo "000")
|
||||
|
||||
Reference in New Issue
Block a user