#!/usr/bin/env bash # verify-server.sh — Check Orion infrastructure health # Automatically detects dev vs production from .env DEBUG flag. # Override with: bash scripts/verify-server.sh --dev | --prod set -uo pipefail PASS=0 FAIL=0 WARN=0 pass() { echo " [PASS] $1"; PASS=$((PASS + 1)); } fail() { echo " [FAIL] $1"; FAIL=$((FAIL + 1)); } warn() { echo " [WARN] $1"; WARN=$((WARN + 1)); } section() { echo ""; echo "=== $1 ==="; } # --------------------------------------------------------------------------- # Detect environment # --------------------------------------------------------------------------- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="$(dirname "$SCRIPT_DIR")" ENV_FILE="$PROJECT_DIR/.env" MODE="" if [ "${1:-}" = "--dev" ]; then MODE="dev" elif [ "${1:-}" = "--prod" ]; then MODE="prod" elif [ -f "$ENV_FILE" ]; then if grep -qE '^DEBUG=True' "$ENV_FILE" 2>/dev/null; then MODE="dev" else MODE="prod" fi else # No .env found — assume production (server deployment) MODE="prod" fi echo "===========================================" echo " Orion Infrastructure Check (${MODE})" echo "===========================================" # Helper: read a value from .env env_val() { grep -E "^${1}=" "$ENV_FILE" 2>/dev/null | head -1 | cut -d= -f2- } # =========================================================================== # DEVELOPMENT CHECKS # =========================================================================== if [ "$MODE" = "dev" ]; then # ----------------------------------------------------------------------- section "1. .env Configuration" # ----------------------------------------------------------------------- if [ -f "$ENV_FILE" ]; then pass ".env file exists" else fail ".env file not found — copy from .env.example" fi REQUIRED_KEYS="DATABASE_URL REDIS_URL JWT_SECRET_KEY ADMIN_EMAIL MAIN_DOMAIN" for key in $REQUIRED_KEYS; do val=$(env_val "$key") if [ -n "$val" ]; then pass "$key is set" else fail "$key is missing or empty" fi done # Check for stale wizamart references if grep -qiE 'wizamart' "$ENV_FILE" 2>/dev/null; then fail "Stale 'wizamart' references found in .env" else pass "No stale wizamart references" fi # ----------------------------------------------------------------------- section "2. PostgreSQL" # ----------------------------------------------------------------------- DB_URL=$(env_val "DATABASE_URL") if [ -n "$DB_URL" ]; then # Extract host and port from DATABASE_URL DB_HOST=$(echo "$DB_URL" | sed -E 's|.*@([^:/]+).*|\1|') DB_PORT=$(echo "$DB_URL" | sed -E 's|.*:([0-9]+)/.*|\1|') DB_PORT="${DB_PORT:-5432}" if command -v pg_isready &>/dev/null; then if pg_isready -h "$DB_HOST" -p "$DB_PORT" &>/dev/null; then pass "PostgreSQL reachable at $DB_HOST:$DB_PORT" else fail "PostgreSQL not reachable at $DB_HOST:$DB_PORT — start with: docker compose up -d db" fi elif (echo > /dev/tcp/"$DB_HOST"/"$DB_PORT") &>/dev/null; then pass "PostgreSQL port open at $DB_HOST:$DB_PORT" else fail "PostgreSQL not reachable at $DB_HOST:$DB_PORT — start with: docker compose up -d db" fi else fail "DATABASE_URL not set" fi # ----------------------------------------------------------------------- section "3. Redis" # ----------------------------------------------------------------------- REDIS_URL=$(env_val "REDIS_URL") if [ -n "$REDIS_URL" ]; then # Extract host and port from redis://host:port/db REDIS_HOST=$(echo "$REDIS_URL" | sed -E 's|redis://([^:/]+).*|\1|') REDIS_PORT=$(echo "$REDIS_URL" | sed -E 's|redis://[^:]+:([0-9]+).*|\1|') REDIS_PORT="${REDIS_PORT:-6379}" if redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" ping &>/dev/null; then pass "Redis reachable at $REDIS_HOST:$REDIS_PORT" else fail "Redis not reachable at $REDIS_HOST:$REDIS_PORT — start with: docker compose up -d redis" fi else fail "REDIS_URL not set" fi # ----------------------------------------------------------------------- section "4. Dev Server Health" # ----------------------------------------------------------------------- # Dev server runs on port 9999 (make dev) DEV_PORT=9999 HEALTH_URL="http://localhost:$DEV_PORT/health" READY_URL="http://localhost:$DEV_PORT/health/ready" status=$(curl -s -o /dev/null -w '%{http_code}' "$HEALTH_URL" 2>/dev/null || echo "000") if [ "$status" = "200" ]; then pass "/health endpoint: HTTP 200 (port $DEV_PORT)" elif [ "$status" = "000" ]; then warn "Dev server not running on port $DEV_PORT — start with: make dev" else fail "/health endpoint: HTTP $status (port $DEV_PORT)" fi if [ "$status" = "200" ]; then ready_response=$(curl -s "$READY_URL" 2>/dev/null || echo "") if echo "$ready_response" | grep -q '"healthy"'; then pass "/health/ready: healthy" else fail "/health/ready: not healthy" fi fi # ----------------------------------------------------------------------- section "5. Migrations" # ----------------------------------------------------------------------- if command -v python3 &>/dev/null; then alembic_output=$(cd "$PROJECT_DIR" && python3 -m alembic current 2>&1 || echo "ERROR") if echo "$alembic_output" | grep -q "head"; then pass "Alembic migrations at head" elif echo "$alembic_output" | grep -q "ERROR"; then fail "Could not check migration status" else warn "Migrations may not be at head — run: make migrate-up" fi else warn "python3 not found, cannot check migrations" fi fi # =========================================================================== # PRODUCTION CHECKS # =========================================================================== if [ "$MODE" = "prod" ]; then # When run with sudo, $HOME is /root. Use SUDO_USER's home instead. if [ -n "${SUDO_USER:-}" ]; then REAL_HOME=$(getent passwd "$SUDO_USER" | cut -d: -f6) else REAL_HOME="$HOME" fi ORION_DIR="${ORION_DIR:-$REAL_HOME/apps/orion}" # ----------------------------------------------------------------------- section "1. fail2ban" # ----------------------------------------------------------------------- if systemctl is-active --quiet fail2ban; then pass "fail2ban service running" else fail "fail2ban service not running" fi if sudo fail2ban-client status sshd &>/dev/null; then pass "SSH jail active" else fail "SSH jail not active" fi if sudo fail2ban-client status caddy-auth &>/dev/null; then pass "Caddy auth jail active" else fail "Caddy auth jail not active — deploy /etc/fail2ban/jail.d/caddy.conf" fi # ----------------------------------------------------------------------- section "2. Unattended Upgrades" # ----------------------------------------------------------------------- if dpkg -l unattended-upgrades &>/dev/null; then pass "unattended-upgrades package installed" else fail "unattended-upgrades not installed" fi if [ -f /etc/apt/apt.conf.d/20auto-upgrades ]; then if grep -q 'Unattended-Upgrade "1"' /etc/apt/apt.conf.d/20auto-upgrades; then pass "Automatic upgrades enabled" else fail "Automatic upgrades not enabled in 20auto-upgrades" fi else fail "/etc/apt/apt.conf.d/20auto-upgrades missing" fi # ----------------------------------------------------------------------- section "3. Docker Containers" # ----------------------------------------------------------------------- EXPECTED_CONTAINERS="db redis api celery-worker celery-beat flower prometheus grafana node-exporter cadvisor alertmanager redis-exporter" for name in $EXPECTED_CONTAINERS; do container=$(docker compose --profile full --project-directory "$ORION_DIR" ps --format '{{.Name}}' 2>/dev/null | grep -E "[-]${name}-[0-9]" || true) if [ -n "$container" ]; then state=$(docker inspect --format='{{.State.Status}}' "$container" 2>/dev/null || echo "unknown") if [ "$state" = "running" ]; then pass "Container $name: running" else fail "Container $name: $state (expected running)" fi else fail "Container $name: not found" fi done # Check for healthy status on containers with healthchecks for name in db redis api celery-worker; do container=$(docker compose --profile full --project-directory "$ORION_DIR" ps --format '{{.Name}}' 2>/dev/null | grep -E "[-]${name}-[0-9]" || true) if [ -n "$container" ]; then health=$(docker inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "none") if [ "$health" = "healthy" ]; then pass "Container $name: healthy" elif [ "$health" = "none" ]; then warn "Container $name: no healthcheck configured" else fail "Container $name: $health (expected healthy)" fi fi done # ----------------------------------------------------------------------- section "4. Caddy" # ----------------------------------------------------------------------- if systemctl is-active --quiet caddy; then pass "Caddy service running" else fail "Caddy service not running" fi if [ -f /etc/caddy/Caddyfile ]; then pass "Caddyfile exists" else fail "Caddyfile not found" fi # ----------------------------------------------------------------------- section "5. Backup Timer" # ----------------------------------------------------------------------- if systemctl is-active --quiet orion-backup.timer; then pass "Backup timer active" else fail "Backup timer not active — enable with: sudo systemctl enable --now orion-backup.timer" fi LATEST_BACKUP=$(find "$REAL_HOME/backups/orion/daily/" -name "*.sql.gz" -mtime -2 2>/dev/null | head -1) if [ -n "$LATEST_BACKUP" ]; then pass "Recent backup found: $(basename "$LATEST_BACKUP")" else warn "No backup found from the last 2 days" fi # ----------------------------------------------------------------------- section "6. Gitea Runner" # ----------------------------------------------------------------------- if systemctl is-active --quiet gitea-runner; then pass "Gitea runner service running" else fail "Gitea runner service not running" fi # ----------------------------------------------------------------------- section "7. SSL Certificates" # ----------------------------------------------------------------------- DOMAINS="wizard.lu api.wizard.lu git.wizard.lu omsflow.lu rewardflow.lu" for domain in $DOMAINS; do expiry=$(echo | openssl s_client -servername "$domain" -connect "$domain":443 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2) if [ -n "$expiry" ]; then expiry_epoch=$(date -d "$expiry" +%s 2>/dev/null || echo 0) now_epoch=$(date +%s) days_left=$(( (expiry_epoch - now_epoch) / 86400 )) if [ "$days_left" -gt 14 ]; then pass "SSL $domain: valid ($days_left days remaining)" elif [ "$days_left" -gt 0 ]; then warn "SSL $domain: expiring soon ($days_left days remaining)" else fail "SSL $domain: expired" fi else fail "SSL $domain: could not check certificate" fi done # ----------------------------------------------------------------------- section "8. Flower Password" # ----------------------------------------------------------------------- if [ -f "$ORION_DIR/.env" ]; then FLOWER_PW=$(grep -E '^FLOWER_PASSWORD=' "$ORION_DIR/.env" 2>/dev/null | cut -d= -f2- || echo "") if [ -z "$FLOWER_PW" ] || [ "$FLOWER_PW" = "changeme" ]; then fail "Flower password is default or empty — change FLOWER_PASSWORD in .env" else pass "Flower password changed from default" fi else warn ".env file not found at $ORION_DIR/.env" fi # ----------------------------------------------------------------------- section "9. Sentry" # ----------------------------------------------------------------------- if [ -f "$ORION_DIR/.env" ]; then SENTRY_DSN=$(grep -E '^SENTRY_DSN=' "$ORION_DIR/.env" 2>/dev/null | cut -d= -f2- || echo "") if [ -n "$SENTRY_DSN" ] && [ "$SENTRY_DSN" != "None" ]; then pass "SENTRY_DSN is configured" else warn "SENTRY_DSN not set — error tracking disabled" fi SENTRY_ENV=$(grep -E '^SENTRY_ENVIRONMENT=' "$ORION_DIR/.env" 2>/dev/null | cut -d= -f2- || echo "") if [ "$SENTRY_ENV" = "production" ]; then pass "SENTRY_ENVIRONMENT is 'production'" elif [ -n "$SENTRY_ENV" ]; then warn "SENTRY_ENVIRONMENT is '$SENTRY_ENV' (expected 'production')" fi fi # ----------------------------------------------------------------------- section "10. Redis Exporter" # ----------------------------------------------------------------------- redis_exporter_status=$(curl -s -o /dev/null -w '%{http_code}' http://localhost:9121/health 2>/dev/null || echo "000") if [ "$redis_exporter_status" = "200" ]; then pass "Redis exporter: accessible (HTTP 200)" redis_up=$(curl -s http://localhost:9121/metrics 2>/dev/null | grep '^redis_up ' | awk '{print $2}' || echo "0") if [ "$redis_up" = "1" ]; then pass "Redis exporter: redis_up = 1" else fail "Redis exporter: redis_up = $redis_up (Redis unreachable)" fi else fail "Redis exporter: HTTP $redis_exporter_status (expected 200)" fi # ----------------------------------------------------------------------- section "11. DNS Resolution" # ----------------------------------------------------------------------- EXPECTED_DOMAINS="wizard.lu api.wizard.lu git.wizard.lu grafana.wizard.lu flower.wizard.lu omsflow.lu rewardflow.lu" for domain in $EXPECTED_DOMAINS; do resolved=$(dig +short "$domain" A 2>/dev/null | head -1) if [ -n "$resolved" ]; then pass "DNS $domain: $resolved" else fail "DNS $domain: no A record found" fi done # ----------------------------------------------------------------------- section "12. Health Endpoints" # ----------------------------------------------------------------------- HEALTH_URL="http://localhost:8001/health" READY_URL="http://localhost:8001/health/ready" status=$(curl -s -o /dev/null -w '%{http_code}' "$HEALTH_URL" 2>/dev/null || echo "000") if [ "$status" = "200" ]; then pass "/health endpoint: HTTP 200" else fail "/health endpoint: HTTP $status" fi ready_response=$(curl -s "$READY_URL" 2>/dev/null || echo "") if echo "$ready_response" | grep -q '"healthy"'; then pass "/health/ready: healthy" if echo "$ready_response" | grep -q '"database"'; then pass "/health/ready: database check registered" else warn "/health/ready: database check not found" fi if echo "$ready_response" | grep -q '"redis"'; then pass "/health/ready: redis check registered" else warn "/health/ready: redis check not found" fi else fail "/health/ready: not healthy — $ready_response" fi # ----------------------------------------------------------------------- section "13. Prometheus Targets" # ----------------------------------------------------------------------- targets=$(curl -s http://localhost:9090/api/v1/targets 2>/dev/null || echo "") if [ -n "$targets" ]; then up_count=$(echo "$targets" | grep -o '"health":"up"' | wc -l) down_count=$(echo "$targets" | grep -o '"health":"down"' | wc -l) if [ "$down_count" -eq 0 ] && [ "$up_count" -gt 0 ]; then pass "Prometheus: all $up_count targets up" elif [ "$down_count" -gt 0 ]; then fail "Prometheus: $down_count target(s) down ($up_count up)" else warn "Prometheus: no targets found" fi else fail "Prometheus: could not reach API at localhost:9090" fi # ----------------------------------------------------------------------- section "14. Grafana" # ----------------------------------------------------------------------- grafana_status=$(curl -s -o /dev/null -w '%{http_code}' http://localhost:3001/api/health 2>/dev/null || echo "000") if [ "$grafana_status" = "200" ]; then pass "Grafana: accessible (HTTP 200)" else fail "Grafana: HTTP $grafana_status (expected 200)" fi fi # --------------------------------------------------------------------------- # Summary # --------------------------------------------------------------------------- echo "" echo "===========================================" echo " PASS: $PASS | FAIL: $FAIL | WARN: $WARN" echo "===========================================" if [ "$FAIL" -gt 0 ]; then echo " Status: NOT READY — fix $FAIL issue(s) above" exit 1 elif [ "$WARN" -gt 0 ]; then echo " Status: READY (with $WARN warning(s))" exit 0 else echo " Status: FULLY READY" exit 0 fi