feat: add automated backups and Prometheus/Grafana monitoring stack (Steps 17-18)
Some checks failed
CI / dependency-scanning (push) Has been cancelled
CI / docs (push) Has been cancelled
CI / ruff (push) Successful in 7s
CI / validate (push) Has been cancelled
CI / deploy (push) Has been cancelled
CI / pytest (push) Has started running

Backups: pg_dump scripts with daily/weekly rotation and Cloudflare R2 offsite sync.
Monitoring: Prometheus, Grafana, node-exporter, cAdvisor in docker-compose; /metrics
endpoint activated via prometheus_client.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-14 22:40:08 +01:00
parent 488d5a6f0e
commit ef7187b508
15 changed files with 809 additions and 20 deletions

150
scripts/backup.sh Executable file
View File

@@ -0,0 +1,150 @@
#!/usr/bin/env bash
# scripts/backup.sh — Automated PostgreSQL backup for Orion and Gitea
#
# Usage:
# bash scripts/backup.sh # Local backup only
# bash scripts/backup.sh --upload # Local backup + sync to Cloudflare R2
#
# Cron / systemd timer: runs daily at 03:00
# On Sundays: copies daily backup to weekly/
# Retention: 7 daily, 4 weekly
set -euo pipefail
# =============================================================================
# Configuration
# =============================================================================
BACKUP_ROOT="${HOME}/backups"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
DAY_OF_WEEK=$(date +%u) # 1=Monday, 7=Sunday
# Orion DB settings (from docker-compose.yml)
ORION_CONTAINER="orion-db-1"
ORION_DB="orion_db"
ORION_USER="orion_user"
# Gitea DB settings (from ~/gitea/docker-compose.yml)
GITEA_CONTAINER="gitea-db"
GITEA_DB="gitea"
GITEA_USER="gitea"
# R2 settings (loaded from .env if available)
ORION_APP_DIR="${HOME}/apps/orion"
if [ -f "${ORION_APP_DIR}/.env" ]; then
R2_ACCOUNT_ID=$(grep -s '^R2_ACCOUNT_ID=' "${ORION_APP_DIR}/.env" | cut -d= -f2- || true)
R2_BACKUP_BUCKET=$(grep -s '^R2_BACKUP_BUCKET=' "${ORION_APP_DIR}/.env" | cut -d= -f2- || true)
fi
R2_BACKUP_BUCKET="${R2_BACKUP_BUCKET:-orion-backups}"
R2_ENDPOINT="https://${R2_ACCOUNT_ID}.r2.cloudflarestorage.com"
# Retention
DAILY_KEEP=7
WEEKLY_KEEP=4
# =============================================================================
# Functions
# =============================================================================
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
}
backup_database() {
local container="$1"
local db_name="$2"
local db_user="$3"
local target_dir="$4"
local filename="$5"
mkdir -p "${target_dir}"
log "Backing up ${db_name} from ${container}..."
if docker exec "${container}" pg_dump -U "${db_user}" "${db_name}" | gzip > "${target_dir}/${filename}"; then
local size
size=$(du -h "${target_dir}/${filename}" | cut -f1)
log " OK: ${filename} (${size})"
else
log " FAILED: ${db_name} backup"
return 1
fi
}
rotate_backups() {
local dir="$1"
local keep_days="$2"
if [ -d "${dir}" ]; then
local count
count=$(find "${dir}" -name "*.sql.gz" -mtime +"${keep_days}" 2>/dev/null | wc -l)
if [ "${count}" -gt 0 ]; then
find "${dir}" -name "*.sql.gz" -mtime +"${keep_days}" -delete
log " Rotated: removed ${count} old backups from ${dir}"
fi
fi
}
upload_to_r2() {
if [ -z "${R2_ACCOUNT_ID:-}" ]; then
log "ERROR: R2_ACCOUNT_ID not set. Cannot upload."
return 1
fi
log "Syncing backups to R2 bucket: ${R2_BACKUP_BUCKET}..."
aws s3 sync "${BACKUP_ROOT}/" "s3://${R2_BACKUP_BUCKET}/" \
--endpoint-url "${R2_ENDPOINT}" \
--profile r2 \
--delete \
--exclude "*.tmp"
log " OK: R2 sync complete"
}
# =============================================================================
# Main
# =============================================================================
UPLOAD=false
if [ "${1:-}" = "--upload" ]; then
UPLOAD=true
fi
log "=== Orion Backup Started ==="
# Ensure backup directories exist
mkdir -p "${BACKUP_ROOT}/orion/"{daily,weekly}
mkdir -p "${BACKUP_ROOT}/gitea/"{daily,weekly}
# --- Daily backups ---
ERRORS=0
backup_database "${ORION_CONTAINER}" "${ORION_DB}" "${ORION_USER}" \
"${BACKUP_ROOT}/orion/daily" "orion_${TIMESTAMP}.sql.gz" || ERRORS=$((ERRORS + 1))
backup_database "${GITEA_CONTAINER}" "${GITEA_DB}" "${GITEA_USER}" \
"${BACKUP_ROOT}/gitea/daily" "gitea_${TIMESTAMP}.sql.gz" || ERRORS=$((ERRORS + 1))
# --- Weekly copies (Sunday) ---
if [ "${DAY_OF_WEEK}" -eq 7 ]; then
log "Sunday: copying to weekly/"
cp -f "${BACKUP_ROOT}/orion/daily/orion_${TIMESTAMP}.sql.gz" \
"${BACKUP_ROOT}/orion/weekly/" 2>/dev/null || true
cp -f "${BACKUP_ROOT}/gitea/daily/gitea_${TIMESTAMP}.sql.gz" \
"${BACKUP_ROOT}/gitea/weekly/" 2>/dev/null || true
fi
# --- Rotation ---
log "Rotating old backups..."
rotate_backups "${BACKUP_ROOT}/orion/daily" "${DAILY_KEEP}"
rotate_backups "${BACKUP_ROOT}/gitea/daily" "${DAILY_KEEP}"
rotate_backups "${BACKUP_ROOT}/orion/weekly" $((WEEKLY_KEEP * 7))
rotate_backups "${BACKUP_ROOT}/gitea/weekly" $((WEEKLY_KEEP * 7))
# --- Optional R2 upload ---
if [ "${UPLOAD}" = true ]; then
upload_to_r2 || ERRORS=$((ERRORS + 1))
fi
# --- Summary ---
if [ "${ERRORS}" -eq 0 ]; then
log "=== Backup completed successfully ==="
else
log "=== Backup completed with ${ERRORS} error(s) ==="
exit 1
fi

152
scripts/restore.sh Executable file
View File

@@ -0,0 +1,152 @@
#!/usr/bin/env bash
# scripts/restore.sh — Database restore helper for Orion and Gitea
#
# Usage:
# bash scripts/restore.sh orion ~/backups/orion/daily/orion_20260214_030000.sql.gz
# bash scripts/restore.sh gitea ~/backups/gitea/daily/gitea_20260214_030000.sql.gz
#
# What it does:
# 1. Stops app containers (keeps DB running)
# 2. Drops and recreates the database
# 3. Restores from the .sql.gz backup
# 4. Runs Alembic migrations (Orion only)
# 5. Restarts all containers
set -euo pipefail
# =============================================================================
# Configuration
# =============================================================================
ORION_APP_DIR="${HOME}/apps/orion"
# =============================================================================
# Functions
# =============================================================================
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
}
usage() {
echo "Usage: $0 <target> <backup-file>"
echo ""
echo " target: 'orion' or 'gitea'"
echo " backup-file: path to .sql.gz file"
echo ""
echo "Examples:"
echo " $0 orion ~/backups/orion/daily/orion_20260214_030000.sql.gz"
echo " $0 gitea ~/backups/gitea/daily/gitea_20260214_030000.sql.gz"
exit 1
}
restore_orion() {
local backup_file="$1"
local container="orion-db-1"
local db_name="orion_db"
local db_user="orion_user"
log "=== Restoring Orion database ==="
# Stop app containers (keep DB running)
log "Stopping Orion app containers..."
cd "${ORION_APP_DIR}"
docker compose --profile full stop api celery-worker celery-beat flower 2>/dev/null || true
# Drop and recreate database
log "Dropping and recreating ${db_name}..."
docker exec "${container}" psql -U "${db_user}" -d postgres -c \
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${db_name}' AND pid <> pg_backend_pid();" 2>/dev/null || true
docker exec "${container}" dropdb -U "${db_user}" --if-exists "${db_name}"
docker exec "${container}" createdb -U "${db_user}" "${db_name}"
# Restore
log "Restoring from ${backup_file}..."
gunzip -c "${backup_file}" | docker exec -i "${container}" psql -U "${db_user}" -d "${db_name}" --quiet
# Run migrations
log "Running Alembic migrations..."
docker compose --profile full start api 2>/dev/null || \
docker compose --profile full up -d api
sleep 5 # Wait for API container to be ready
docker compose --profile full exec -e PYTHONPATH=/app api python -m alembic upgrade heads
# Restart all
log "Restarting all services..."
docker compose --profile full up -d
log "=== Orion restore complete ==="
}
restore_gitea() {
local backup_file="$1"
local container="gitea-db"
local db_name="gitea"
local db_user="gitea"
local gitea_dir="${HOME}/gitea"
log "=== Restoring Gitea database ==="
# Stop Gitea container (keep DB running)
log "Stopping Gitea..."
cd "${gitea_dir}"
docker compose stop gitea 2>/dev/null || true
# Drop and recreate database
log "Dropping and recreating ${db_name}..."
docker exec "${container}" psql -U "${db_user}" -d postgres -c \
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${db_name}' AND pid <> pg_backend_pid();" 2>/dev/null || true
docker exec "${container}" dropdb -U "${db_user}" --if-exists "${db_name}"
docker exec "${container}" createdb -U "${db_user}" "${db_name}"
# Restore
log "Restoring from ${backup_file}..."
gunzip -c "${backup_file}" | docker exec -i "${container}" psql -U "${db_user}" -d "${db_name}" --quiet
# Restart Gitea
log "Restarting Gitea..."
docker compose up -d
log "=== Gitea restore complete ==="
}
# =============================================================================
# Main
# =============================================================================
if [ $# -lt 2 ]; then
usage
fi
TARGET="$1"
BACKUP_FILE="$2"
# Validate backup file
if [ ! -f "${BACKUP_FILE}" ]; then
log "ERROR: Backup file not found: ${BACKUP_FILE}"
exit 1
fi
if [[ ! "${BACKUP_FILE}" == *.sql.gz ]]; then
log "ERROR: Expected a .sql.gz file, got: ${BACKUP_FILE}"
exit 1
fi
# Confirm
log "WARNING: This will DROP and RECREATE the ${TARGET} database!"
log "Backup file: ${BACKUP_FILE}"
read -rp "Continue? (y/N) " confirm
if [[ "${confirm}" != [yY] ]]; then
log "Aborted."
exit 0
fi
case "${TARGET}" in
orion)
restore_orion "${BACKUP_FILE}"
;;
gitea)
restore_gitea "${BACKUP_FILE}"
;;
*)
log "ERROR: Unknown target '${TARGET}'. Use 'orion' or 'gitea'."
usage
;;
esac