Files
orion/scripts/verify-server.sh
Samir Boulahtit 8d5c8a52e6
Some checks failed
CI / ruff (push) Successful in 10s
CI / validate (push) Has been cancelled
CI / dependency-scanning (push) Has been cancelled
CI / docs (push) Has been cancelled
CI / deploy (push) Has been cancelled
CI / pytest (push) Has been cancelled
fix(ops): exact container name matching in verify-server.sh
Use regex pattern [-]name-[0-9] to avoid redis matching redis-exporter.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 00:28:57 +01:00

479 lines
18 KiB
Bash
Executable File

#!/usr/bin/env bash
# verify-server.sh — Check Orion infrastructure health
# Automatically detects dev vs production from .env DEBUG flag.
# Override with: bash scripts/verify-server.sh --dev | --prod
set -uo pipefail
PASS=0
FAIL=0
WARN=0
pass() { echo " [PASS] $1"; PASS=$((PASS + 1)); }
fail() { echo " [FAIL] $1"; FAIL=$((FAIL + 1)); }
warn() { echo " [WARN] $1"; WARN=$((WARN + 1)); }
section() { echo ""; echo "=== $1 ==="; }
# ---------------------------------------------------------------------------
# Detect environment
# ---------------------------------------------------------------------------
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
ENV_FILE="$PROJECT_DIR/.env"
MODE=""
if [ "${1:-}" = "--dev" ]; then
MODE="dev"
elif [ "${1:-}" = "--prod" ]; then
MODE="prod"
elif [ -f "$ENV_FILE" ]; then
if grep -qE '^DEBUG=True' "$ENV_FILE" 2>/dev/null; then
MODE="dev"
else
MODE="prod"
fi
else
# No .env found — assume production (server deployment)
MODE="prod"
fi
echo "==========================================="
echo " Orion Infrastructure Check (${MODE})"
echo "==========================================="
# Helper: read a value from .env
env_val() {
grep -E "^${1}=" "$ENV_FILE" 2>/dev/null | head -1 | cut -d= -f2-
}
# ===========================================================================
# DEVELOPMENT CHECKS
# ===========================================================================
if [ "$MODE" = "dev" ]; then
# -----------------------------------------------------------------------
section "1. .env Configuration"
# -----------------------------------------------------------------------
if [ -f "$ENV_FILE" ]; then
pass ".env file exists"
else
fail ".env file not found — copy from .env.example"
fi
REQUIRED_KEYS="DATABASE_URL REDIS_URL JWT_SECRET_KEY ADMIN_EMAIL PLATFORM_DOMAIN"
for key in $REQUIRED_KEYS; do
val=$(env_val "$key")
if [ -n "$val" ]; then
pass "$key is set"
else
fail "$key is missing or empty"
fi
done
# Check for stale wizamart references
if grep -qiE 'wizamart' "$ENV_FILE" 2>/dev/null; then
fail "Stale 'wizamart' references found in .env"
else
pass "No stale wizamart references"
fi
# -----------------------------------------------------------------------
section "2. PostgreSQL"
# -----------------------------------------------------------------------
DB_URL=$(env_val "DATABASE_URL")
if [ -n "$DB_URL" ]; then
# Extract host and port from DATABASE_URL
DB_HOST=$(echo "$DB_URL" | sed -E 's|.*@([^:/]+).*|\1|')
DB_PORT=$(echo "$DB_URL" | sed -E 's|.*:([0-9]+)/.*|\1|')
DB_PORT="${DB_PORT:-5432}"
if command -v pg_isready &>/dev/null; then
if pg_isready -h "$DB_HOST" -p "$DB_PORT" &>/dev/null; then
pass "PostgreSQL reachable at $DB_HOST:$DB_PORT"
else
fail "PostgreSQL not reachable at $DB_HOST:$DB_PORT — start with: docker compose up -d db"
fi
elif (echo > /dev/tcp/"$DB_HOST"/"$DB_PORT") &>/dev/null; then
pass "PostgreSQL port open at $DB_HOST:$DB_PORT"
else
fail "PostgreSQL not reachable at $DB_HOST:$DB_PORT — start with: docker compose up -d db"
fi
else
fail "DATABASE_URL not set"
fi
# -----------------------------------------------------------------------
section "3. Redis"
# -----------------------------------------------------------------------
REDIS_URL=$(env_val "REDIS_URL")
if [ -n "$REDIS_URL" ]; then
# Extract host and port from redis://host:port/db
REDIS_HOST=$(echo "$REDIS_URL" | sed -E 's|redis://([^:/]+).*|\1|')
REDIS_PORT=$(echo "$REDIS_URL" | sed -E 's|redis://[^:]+:([0-9]+).*|\1|')
REDIS_PORT="${REDIS_PORT:-6379}"
if redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" ping &>/dev/null; then
pass "Redis reachable at $REDIS_HOST:$REDIS_PORT"
else
fail "Redis not reachable at $REDIS_HOST:$REDIS_PORT — start with: docker compose up -d redis"
fi
else
fail "REDIS_URL not set"
fi
# -----------------------------------------------------------------------
section "4. Dev Server Health"
# -----------------------------------------------------------------------
# Dev server runs on port 9999 (make dev)
DEV_PORT=9999
HEALTH_URL="http://localhost:$DEV_PORT/health"
READY_URL="http://localhost:$DEV_PORT/health/ready"
status=$(curl -s -o /dev/null -w '%{http_code}' "$HEALTH_URL" 2>/dev/null || echo "000")
if [ "$status" = "200" ]; then
pass "/health endpoint: HTTP 200 (port $DEV_PORT)"
elif [ "$status" = "000" ]; then
warn "Dev server not running on port $DEV_PORT — start with: make dev"
else
fail "/health endpoint: HTTP $status (port $DEV_PORT)"
fi
if [ "$status" = "200" ]; then
ready_response=$(curl -s "$READY_URL" 2>/dev/null || echo "")
if echo "$ready_response" | grep -q '"healthy"'; then
pass "/health/ready: healthy"
else
fail "/health/ready: not healthy"
fi
fi
# -----------------------------------------------------------------------
section "5. Migrations"
# -----------------------------------------------------------------------
if command -v python3 &>/dev/null; then
alembic_output=$(cd "$PROJECT_DIR" && python3 -m alembic current 2>&1 || echo "ERROR")
if echo "$alembic_output" | grep -q "head"; then
pass "Alembic migrations at head"
elif echo "$alembic_output" | grep -q "ERROR"; then
fail "Could not check migration status"
else
warn "Migrations may not be at head — run: make migrate-up"
fi
else
warn "python3 not found, cannot check migrations"
fi
fi
# ===========================================================================
# PRODUCTION CHECKS
# ===========================================================================
if [ "$MODE" = "prod" ]; then
# When run with sudo, $HOME is /root. Use SUDO_USER's home instead.
if [ -n "${SUDO_USER:-}" ]; then
REAL_HOME=$(getent passwd "$SUDO_USER" | cut -d: -f6)
else
REAL_HOME="$HOME"
fi
ORION_DIR="${ORION_DIR:-$REAL_HOME/apps/orion}"
# -----------------------------------------------------------------------
section "1. fail2ban"
# -----------------------------------------------------------------------
if systemctl is-active --quiet fail2ban; then
pass "fail2ban service running"
else
fail "fail2ban service not running"
fi
if sudo fail2ban-client status sshd &>/dev/null; then
pass "SSH jail active"
else
fail "SSH jail not active"
fi
if sudo fail2ban-client status caddy-auth &>/dev/null; then
pass "Caddy auth jail active"
else
fail "Caddy auth jail not active — deploy /etc/fail2ban/jail.d/caddy.conf"
fi
# -----------------------------------------------------------------------
section "2. Unattended Upgrades"
# -----------------------------------------------------------------------
if dpkg -l unattended-upgrades &>/dev/null; then
pass "unattended-upgrades package installed"
else
fail "unattended-upgrades not installed"
fi
if [ -f /etc/apt/apt.conf.d/20auto-upgrades ]; then
if grep -q 'Unattended-Upgrade "1"' /etc/apt/apt.conf.d/20auto-upgrades; then
pass "Automatic upgrades enabled"
else
fail "Automatic upgrades not enabled in 20auto-upgrades"
fi
else
fail "/etc/apt/apt.conf.d/20auto-upgrades missing"
fi
# -----------------------------------------------------------------------
section "3. Docker Containers"
# -----------------------------------------------------------------------
EXPECTED_CONTAINERS="db redis api celery-worker celery-beat flower prometheus grafana node-exporter cadvisor alertmanager redis-exporter"
for name in $EXPECTED_CONTAINERS; do
container=$(docker compose --profile full --project-directory "$ORION_DIR" ps --format '{{.Name}}' 2>/dev/null | grep -E "[-]${name}-[0-9]" || true)
if [ -n "$container" ]; then
state=$(docker inspect --format='{{.State.Status}}' "$container" 2>/dev/null || echo "unknown")
if [ "$state" = "running" ]; then
pass "Container $name: running"
else
fail "Container $name: $state (expected running)"
fi
else
fail "Container $name: not found"
fi
done
# Check for healthy status on containers with healthchecks
for name in db redis api celery-worker; do
container=$(docker compose --profile full --project-directory "$ORION_DIR" ps --format '{{.Name}}' 2>/dev/null | grep -E "[-]${name}-[0-9]" || true)
if [ -n "$container" ]; then
health=$(docker inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "none")
if [ "$health" = "healthy" ]; then
pass "Container $name: healthy"
elif [ "$health" = "none" ]; then
warn "Container $name: no healthcheck configured"
else
fail "Container $name: $health (expected healthy)"
fi
fi
done
# -----------------------------------------------------------------------
section "4. Caddy"
# -----------------------------------------------------------------------
if systemctl is-active --quiet caddy; then
pass "Caddy service running"
else
fail "Caddy service not running"
fi
if [ -f /etc/caddy/Caddyfile ]; then
pass "Caddyfile exists"
else
fail "Caddyfile not found"
fi
# -----------------------------------------------------------------------
section "5. Backup Timer"
# -----------------------------------------------------------------------
if systemctl is-active --quiet orion-backup.timer; then
pass "Backup timer active"
else
fail "Backup timer not active — enable with: sudo systemctl enable --now orion-backup.timer"
fi
LATEST_BACKUP=$(find "$HOME/backups/orion/daily/" -name "*.sql.gz" -mtime -2 2>/dev/null | head -1)
if [ -n "$LATEST_BACKUP" ]; then
pass "Recent backup found: $(basename "$LATEST_BACKUP")"
else
warn "No backup found from the last 2 days"
fi
# -----------------------------------------------------------------------
section "6. Gitea Runner"
# -----------------------------------------------------------------------
if systemctl is-active --quiet gitea-runner; then
pass "Gitea runner service running"
else
fail "Gitea runner service not running"
fi
# -----------------------------------------------------------------------
section "7. SSL Certificates"
# -----------------------------------------------------------------------
DOMAINS="wizard.lu api.wizard.lu git.wizard.lu omsflow.lu rewardflow.lu"
for domain in $DOMAINS; do
expiry=$(echo | openssl s_client -servername "$domain" -connect "$domain":443 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2)
if [ -n "$expiry" ]; then
expiry_epoch=$(date -d "$expiry" +%s 2>/dev/null || echo 0)
now_epoch=$(date +%s)
days_left=$(( (expiry_epoch - now_epoch) / 86400 ))
if [ "$days_left" -gt 14 ]; then
pass "SSL $domain: valid ($days_left days remaining)"
elif [ "$days_left" -gt 0 ]; then
warn "SSL $domain: expiring soon ($days_left days remaining)"
else
fail "SSL $domain: expired"
fi
else
fail "SSL $domain: could not check certificate"
fi
done
# -----------------------------------------------------------------------
section "8. Flower Password"
# -----------------------------------------------------------------------
if [ -f "$ORION_DIR/.env" ]; then
FLOWER_PW=$(grep -E '^FLOWER_PASSWORD=' "$ORION_DIR/.env" 2>/dev/null | cut -d= -f2- || echo "")
if [ -z "$FLOWER_PW" ] || [ "$FLOWER_PW" = "changeme" ]; then
fail "Flower password is default or empty — change FLOWER_PASSWORD in .env"
else
pass "Flower password changed from default"
fi
else
warn ".env file not found at $ORION_DIR/.env"
fi
# -----------------------------------------------------------------------
section "9. Sentry"
# -----------------------------------------------------------------------
if [ -f "$ORION_DIR/.env" ]; then
SENTRY_DSN=$(grep -E '^SENTRY_DSN=' "$ORION_DIR/.env" 2>/dev/null | cut -d= -f2- || echo "")
if [ -n "$SENTRY_DSN" ] && [ "$SENTRY_DSN" != "None" ]; then
pass "SENTRY_DSN is configured"
else
warn "SENTRY_DSN not set — error tracking disabled"
fi
SENTRY_ENV=$(grep -E '^SENTRY_ENVIRONMENT=' "$ORION_DIR/.env" 2>/dev/null | cut -d= -f2- || echo "")
if [ "$SENTRY_ENV" = "production" ]; then
pass "SENTRY_ENVIRONMENT is 'production'"
elif [ -n "$SENTRY_ENV" ]; then
warn "SENTRY_ENVIRONMENT is '$SENTRY_ENV' (expected 'production')"
fi
fi
# -----------------------------------------------------------------------
section "10. Redis Exporter"
# -----------------------------------------------------------------------
redis_exporter_status=$(curl -s -o /dev/null -w '%{http_code}' http://localhost:9121/health 2>/dev/null || echo "000")
if [ "$redis_exporter_status" = "200" ]; then
pass "Redis exporter: accessible (HTTP 200)"
redis_up=$(curl -s http://localhost:9121/metrics 2>/dev/null | grep '^redis_up ' | awk '{print $2}' || echo "0")
if [ "$redis_up" = "1" ]; then
pass "Redis exporter: redis_up = 1"
else
fail "Redis exporter: redis_up = $redis_up (Redis unreachable)"
fi
else
fail "Redis exporter: HTTP $redis_exporter_status (expected 200)"
fi
# -----------------------------------------------------------------------
section "11. DNS Resolution"
# -----------------------------------------------------------------------
EXPECTED_DOMAINS="wizard.lu api.wizard.lu git.wizard.lu grafana.wizard.lu flower.wizard.lu omsflow.lu rewardflow.lu"
for domain in $EXPECTED_DOMAINS; do
resolved=$(dig +short "$domain" A 2>/dev/null | head -1)
if [ -n "$resolved" ]; then
pass "DNS $domain: $resolved"
else
fail "DNS $domain: no A record found"
fi
done
# -----------------------------------------------------------------------
section "12. Health Endpoints"
# -----------------------------------------------------------------------
HEALTH_URL="http://localhost:8001/health"
READY_URL="http://localhost:8001/health/ready"
status=$(curl -s -o /dev/null -w '%{http_code}' "$HEALTH_URL" 2>/dev/null || echo "000")
if [ "$status" = "200" ]; then
pass "/health endpoint: HTTP 200"
else
fail "/health endpoint: HTTP $status"
fi
ready_response=$(curl -s "$READY_URL" 2>/dev/null || echo "")
if echo "$ready_response" | grep -q '"healthy"'; then
pass "/health/ready: healthy"
if echo "$ready_response" | grep -q '"database"'; then
pass "/health/ready: database check registered"
else
warn "/health/ready: database check not found"
fi
if echo "$ready_response" | grep -q '"redis"'; then
pass "/health/ready: redis check registered"
else
warn "/health/ready: redis check not found"
fi
else
fail "/health/ready: not healthy — $ready_response"
fi
# -----------------------------------------------------------------------
section "13. Prometheus Targets"
# -----------------------------------------------------------------------
targets=$(curl -s http://localhost:9090/api/v1/targets 2>/dev/null || echo "")
if [ -n "$targets" ]; then
up_count=$(echo "$targets" | grep -o '"health":"up"' | wc -l)
down_count=$(echo "$targets" | grep -o '"health":"down"' | wc -l)
if [ "$down_count" -eq 0 ] && [ "$up_count" -gt 0 ]; then
pass "Prometheus: all $up_count targets up"
elif [ "$down_count" -gt 0 ]; then
fail "Prometheus: $down_count target(s) down ($up_count up)"
else
warn "Prometheus: no targets found"
fi
else
fail "Prometheus: could not reach API at localhost:9090"
fi
# -----------------------------------------------------------------------
section "14. Grafana"
# -----------------------------------------------------------------------
grafana_status=$(curl -s -o /dev/null -w '%{http_code}' http://localhost:3001/api/health 2>/dev/null || echo "000")
if [ "$grafana_status" = "200" ]; then
pass "Grafana: accessible (HTTP 200)"
else
fail "Grafana: HTTP $grafana_status (expected 200)"
fi
fi
# ---------------------------------------------------------------------------
# Summary
# ---------------------------------------------------------------------------
echo ""
echo "==========================================="
echo " PASS: $PASS | FAIL: $FAIL | WARN: $WARN"
echo "==========================================="
if [ "$FAIL" -gt 0 ]; then
echo " Status: NOT READY — fix $FAIL issue(s) above"
exit 1
elif [ "$WARN" -gt 0 ]; then
echo " Status: READY (with $WARN warning(s))"
exit 0
else
echo " Status: FULLY READY"
exit 0
fi