orion/scripts/deploy.sh

#!/usr/bin/env bash
# =============================================================================
# Orion Production Deploy Script
# =============================================================================
# Usage: cd ~/apps/orion && bash scripts/deploy.sh
#
# Called by Gitea Actions CD pipeline (appleboy/ssh-action) or manually.
#
# Exit codes:
#   0 — success
#   1 — git pull failed
#   2 — docker compose build/up failed
#   3 — alembic migration failed
#   4 — health check failed
# =============================================================================

set -euo pipefail

COMPOSE="docker compose -f docker-compose.yml --profile full"
HEALTH_URL="http://localhost:8001/health"
HEALTH_RETRIES=12
HEALTH_INTERVAL=5

log() { echo "[deploy] $(date '+%H:%M:%S') $*"; }

# ── 1. Pull latest code (stash local changes like .env) ─────────────────────
log "Stashing local changes …"
git stash --include-untracked --quiet || true

log "Pulling latest code …"
if ! git pull --ff-only; then
    log "ERROR: git pull failed"
    git stash pop --quiet 2>/dev/null || true
    exit 1
fi

log "Restoring local changes …"
git stash pop --quiet 2>/dev/null || true

# ── 1b. Write build info ─────────────────────────────────────────────────────
log "Writing build info …"
printf '{"commit":"%s","deployed_at":"%s"}\n' \
    "$(git rev-parse --short=8 HEAD)" \
    "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
    > .build-info

# ── 2. Rebuild and restart containers ────────────────────────────────────────
log "Rebuilding containers …"
if ! $COMPOSE up -d --build; then
    log "ERROR: docker compose up failed"
    exit 2
fi

# ── 3. Wait for DB to be healthy before running migrations ──────────────────
log "Waiting for database to be healthy …"
for i in $(seq 1 12); do
    if $COMPOSE exec -T db pg_isready -U orion_user -d orion_db > /dev/null 2>&1; then
        log "Database is ready (attempt $i/12)"
        break
    fi
    if [ "$i" -eq 12 ]; then
        log "ERROR: database not ready after 60s"
        exit 3
    fi
    sleep 5
done

# ── 4. Run database migrations ───────────────────────────────────────────────
log "Running database migrations …"
if ! $COMPOSE exec -T -e PYTHONPATH=/app api python -m alembic upgrade heads; then
    log "ERROR: alembic migration failed"
    exit 3
fi

# ── 5. Health check with retries ─────────────────────────────────────────────
log "Waiting for health check ($HEALTH_URL) …"
for i in $(seq 1 "$HEALTH_RETRIES"); do
    if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
        log "Health check passed (attempt $i/$HEALTH_RETRIES)"
        break
    fi
    log "Health check attempt $i/$HEALTH_RETRIES failed, retrying in ${HEALTH_INTERVAL}s …"
    sleep "$HEALTH_INTERVAL"
done

if ! curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
    log "ERROR: health check failed after $HEALTH_RETRIES attempts"
    exit 4
fi

# ── 6. Clean up old Docker images + build cache ─────────────────────────────
# Image prune alone leaves CI build cache to grow unbounded (the larger half of
# disk creep). Prune both; keep the last week of cache so CI stays fast.
log "Pruning unused Docker images and build cache …"
docker image prune -f --filter "until=72h" > /dev/null 2>&1 || true
docker builder prune -f --filter "until=168h" > /dev/null 2>&1 || true

log "Deploy complete."
exit 0