feat: add automated backups and Prometheus/Grafana monitoring stack (Steps 17-18)
Some checks failed
Some checks failed
Backups: pg_dump scripts with daily/weekly rotation and Cloudflare R2 offsite sync. Monitoring: Prometheus, Grafana, node-exporter, cAdvisor in docker-compose; /metrics endpoint activated via prometheus_client. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -19,3 +19,4 @@ alembic/versions_backup/
|
|||||||
.performance-rules/
|
.performance-rules/
|
||||||
.security-rules/
|
.security-rules/
|
||||||
mkdocs.yml
|
mkdocs.yml
|
||||||
|
monitoring/
|
||||||
|
|||||||
11
.env.example
11
.env.example
@@ -173,6 +173,14 @@ SENTRY_DSN=
|
|||||||
SENTRY_ENVIRONMENT=production
|
SENTRY_ENVIRONMENT=production
|
||||||
SENTRY_TRACES_SAMPLE_RATE=0.1
|
SENTRY_TRACES_SAMPLE_RATE=0.1
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MONITORING
|
||||||
|
# =============================================================================
|
||||||
|
ENABLE_METRICS=true
|
||||||
|
GRAFANA_URL=https://grafana.wizard.lu
|
||||||
|
GRAFANA_ADMIN_USER=admin
|
||||||
|
GRAFANA_ADMIN_PASSWORD=changeme
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# CLOUDFLARE R2 STORAGE
|
# CLOUDFLARE R2 STORAGE
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -192,6 +200,9 @@ R2_BUCKET_NAME=orion-media
|
|||||||
# Example: https://media.yoursite.com
|
# Example: https://media.yoursite.com
|
||||||
R2_PUBLIC_URL=
|
R2_PUBLIC_URL=
|
||||||
|
|
||||||
|
# Cloudflare R2 backup bucket (used by scripts/backup.sh --upload)
|
||||||
|
R2_BACKUP_BUCKET=orion-backups
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# CLOUDFLARE CDN / PROXY
|
# CLOUDFLARE CDN / PROXY
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
@@ -194,6 +194,12 @@ class Settings(BaseSettings):
|
|||||||
sentry_environment: str = "development" # development, staging, production
|
sentry_environment: str = "development" # development, staging, production
|
||||||
sentry_traces_sample_rate: float = 0.1 # 10% of transactions for performance monitoring
|
sentry_traces_sample_rate: float = 0.1 # 10% of transactions for performance monitoring
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MONITORING
|
||||||
|
# =============================================================================
|
||||||
|
enable_metrics: bool = False
|
||||||
|
grafana_url: str = "https://grafana.wizard.lu"
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# CLOUDFLARE R2 STORAGE
|
# CLOUDFLARE R2 STORAGE
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
@@ -16,8 +16,10 @@ from sqlalchemy import text
|
|||||||
|
|
||||||
from middleware.auth import AuthManager
|
from middleware.auth import AuthManager
|
||||||
|
|
||||||
|
from .config import settings
|
||||||
from .database import engine
|
from .database import engine
|
||||||
from .logging import setup_logging
|
from .logging import setup_logging
|
||||||
|
from .observability import init_observability, shutdown_observability
|
||||||
|
|
||||||
# Remove this import if not needed: from models.database.base import Base
|
# Remove this import if not needed: from models.database.base import Base
|
||||||
|
|
||||||
@@ -33,13 +35,22 @@ async def lifespan(app: FastAPI):
|
|||||||
# === STARTUP ===
|
# === STARTUP ===
|
||||||
app_logger = setup_logging()
|
app_logger = setup_logging()
|
||||||
app_logger.info("Starting Orion multi-tenant platform")
|
app_logger.info("Starting Orion multi-tenant platform")
|
||||||
|
|
||||||
|
init_observability(
|
||||||
|
enable_metrics=settings.enable_metrics,
|
||||||
|
sentry_dsn=settings.sentry_dsn,
|
||||||
|
environment=settings.sentry_environment,
|
||||||
|
flower_url=settings.flower_url,
|
||||||
|
grafana_url=settings.grafana_url,
|
||||||
|
)
|
||||||
|
|
||||||
logger.info("[OK] Application startup completed")
|
logger.info("[OK] Application startup completed")
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
||||||
# === SHUTDOWN ===
|
# === SHUTDOWN ===
|
||||||
app_logger.info("Shutting down Orion platform")
|
app_logger.info("Shutting down Orion platform")
|
||||||
# Add cleanup tasks here if needed
|
shutdown_observability()
|
||||||
|
|
||||||
|
|
||||||
# === NEW HELPER FUNCTION ===
|
# === NEW HELPER FUNCTION ===
|
||||||
|
|||||||
@@ -515,17 +515,6 @@ external_tools = ExternalToolConfig()
|
|||||||
health_router = APIRouter(tags=["Health"])
|
health_router = APIRouter(tags=["Health"])
|
||||||
|
|
||||||
|
|
||||||
@health_router.get("/health")
|
|
||||||
async def health_check() -> dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Aggregated health check endpoint.
|
|
||||||
|
|
||||||
Returns combined health status from all registered checks.
|
|
||||||
"""
|
|
||||||
result = health_registry.run_all()
|
|
||||||
return result.to_dict()
|
|
||||||
|
|
||||||
|
|
||||||
@health_router.get("/health/live")
|
@health_router.get("/health/live")
|
||||||
async def liveness_check() -> dict[str, str]:
|
async def liveness_check() -> dict[str, str]:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -117,6 +117,94 @@ services:
|
|||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 3
|
retries: 3
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# MONITORING STACK
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:latest
|
||||||
|
restart: always
|
||||||
|
profiles:
|
||||||
|
- full
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:9090:9090"
|
||||||
|
volumes:
|
||||||
|
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||||
|
- prometheus_data:/prometheus
|
||||||
|
command:
|
||||||
|
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||||
|
- "--storage.tsdb.retention.time=15d"
|
||||||
|
- "--storage.tsdb.retention.size=2GB"
|
||||||
|
- "--web.enable-lifecycle"
|
||||||
|
mem_limit: 256m
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "wget -qO- http://localhost:9090/-/healthy || exit 1"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
restart: always
|
||||||
|
profiles:
|
||||||
|
- full
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:3001:3000"
|
||||||
|
environment:
|
||||||
|
GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin}
|
||||||
|
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-changeme}
|
||||||
|
GF_SERVER_ROOT_URL: ${GRAFANA_URL:-https://grafana.wizard.lu}
|
||||||
|
volumes:
|
||||||
|
- grafana_data:/var/lib/grafana
|
||||||
|
- ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro
|
||||||
|
- ./monitoring/grafana/provisioning/dashboards/json:/var/lib/grafana/dashboards:ro
|
||||||
|
mem_limit: 192m
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "wget -qO- http://localhost:3000/api/health || exit 1"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
||||||
|
node-exporter:
|
||||||
|
image: prom/node-exporter:latest
|
||||||
|
restart: always
|
||||||
|
profiles:
|
||||||
|
- full
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:9100:9100"
|
||||||
|
volumes:
|
||||||
|
- /proc:/host/proc:ro
|
||||||
|
- /sys:/host/sys:ro
|
||||||
|
- /:/rootfs:ro
|
||||||
|
command:
|
||||||
|
- "--path.procfs=/host/proc"
|
||||||
|
- "--path.sysfs=/host/sys"
|
||||||
|
- "--path.rootfs=/rootfs"
|
||||||
|
- "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
|
||||||
|
mem_limit: 64m
|
||||||
|
|
||||||
|
cadvisor:
|
||||||
|
image: gcr.io/cadvisor/cadvisor:latest
|
||||||
|
restart: always
|
||||||
|
profiles:
|
||||||
|
- full
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:8080:8080"
|
||||||
|
volumes:
|
||||||
|
- /:/rootfs:ro
|
||||||
|
- /var/run:/var/run:ro
|
||||||
|
- /sys:/sys:ro
|
||||||
|
- /var/lib/docker/:/var/lib/docker:ro
|
||||||
|
- /dev/disk/:/dev/disk:ro
|
||||||
|
privileged: true
|
||||||
|
devices:
|
||||||
|
- /dev/kmsg
|
||||||
|
mem_limit: 128m
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
postgres_data:
|
postgres_data:
|
||||||
name: orion_postgres_data
|
name: orion_postgres_data
|
||||||
|
prometheus_data:
|
||||||
|
name: orion_prometheus_data
|
||||||
|
grafana_data:
|
||||||
|
name: orion_grafana_data
|
||||||
|
|||||||
@@ -49,8 +49,8 @@ Complete step-by-step guide for deploying Orion on a Hetzner Cloud VPS.
|
|||||||
|
|
||||||
**Next steps:**
|
**Next steps:**
|
||||||
|
|
||||||
- [ ] Step 17: Backups — verify Hetzner backup scope, add PostgreSQL pg_dump
|
- [x] Step 17: Backups
|
||||||
- [ ] Step 18: Monitoring & observability — Prometheus, Grafana, uptime checks, alerting
|
- [x] Step 18: Monitoring & observability
|
||||||
|
|
||||||
**Deferred (not urgent, do when all platforms ready):**
|
**Deferred (not urgent, do when all platforms ready):**
|
||||||
|
|
||||||
@@ -69,11 +69,13 @@ Complete step-by-step guide for deploying Orion on a Hetzner Cloud VPS.
|
|||||||
- `env_file: .env` added to `docker-compose.yml` — containers load host env vars properly
|
- `env_file: .env` added to `docker-compose.yml` — containers load host env vars properly
|
||||||
- `CapacitySnapshot` model import fixed (moved from billing to monitoring in `alembic/env.py`)
|
- `CapacitySnapshot` model import fixed (moved from billing to monitoring in `alembic/env.py`)
|
||||||
- All services verified healthy at `https://api.wizard.lu/health`
|
- All services verified healthy at `https://api.wizard.lu/health`
|
||||||
|
- **Step 17: Backups** — automated pg_dump scripts (daily + weekly rotation), R2 offsite upload, restore helper
|
||||||
|
- **Step 18: Monitoring** — Prometheus, Grafana, node-exporter, cAdvisor added to docker-compose; `/metrics` endpoint activated via `prometheus_client`
|
||||||
|
|
||||||
**Next steps:**
|
**Next steps:**
|
||||||
|
|
||||||
- [ ] Step 17: Backups — verify Hetzner backup scope, add PostgreSQL pg_dump
|
- [ ] Server-side: enable Hetzner backups, create R2 bucket, configure systemd timer
|
||||||
- [ ] Step 18: Monitoring & observability — Prometheus, Grafana, uptime checks, alerting
|
- [ ] Server-side: add `grafana` DNS record, Caddyfile block, redeploy with `--profile full`
|
||||||
|
|
||||||
|
|
||||||
## Installed Software Versions
|
## Installed Software Versions
|
||||||
@@ -787,6 +789,298 @@ curl -I https://flower.wizard.lu
|
|||||||
sudo systemctl status gitea-runner
|
sudo systemctl status gitea-runner
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Step 17: Backups
|
||||||
|
|
||||||
|
Three layers of backup protection: Hetzner server snapshots, automated PostgreSQL dumps with local rotation, and offsite sync to Cloudflare R2.
|
||||||
|
|
||||||
|
### 17.1 Enable Hetzner Server Backups
|
||||||
|
|
||||||
|
In the Hetzner Cloud Console:
|
||||||
|
|
||||||
|
1. Go to **Servers** > select your server > **Backups**
|
||||||
|
2. Click **Enable backups** (~20% of server cost, ~1.20 EUR/mo for CAX11)
|
||||||
|
3. Hetzner takes automatic weekly snapshots with 7-day retention
|
||||||
|
|
||||||
|
This covers full-disk recovery (OS, Docker volumes, config files) but is coarse-grained. Database-level backups (below) give finer restore granularity.
|
||||||
|
|
||||||
|
### 17.2 Cloudflare R2 Setup (Offsite Backup Storage)
|
||||||
|
|
||||||
|
R2 provides S3-compatible object storage with a generous free tier (10 GB storage, 10 million reads/month).
|
||||||
|
|
||||||
|
**Create Cloudflare account and R2 bucket:**
|
||||||
|
|
||||||
|
1. Sign up at [cloudflare.com](https://dash.cloudflare.com/sign-up) (free account)
|
||||||
|
2. Go to **R2 Object Storage** > **Create bucket**
|
||||||
|
3. Name: `orion-backups`, region: automatic
|
||||||
|
4. Go to **R2** > **Manage R2 API Tokens** > **Create API token**
|
||||||
|
- Permissions: Object Read & Write
|
||||||
|
- Specify bucket: `orion-backups`
|
||||||
|
5. Note the **Account ID**, **Access Key ID**, and **Secret Access Key**
|
||||||
|
|
||||||
|
**Install and configure AWS CLI on the server:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo apt install -y awscli
|
||||||
|
aws configure --profile r2
|
||||||
|
# Access Key ID: <from step 5>
|
||||||
|
# Secret Access Key: <from step 5>
|
||||||
|
# Default region name: auto
|
||||||
|
# Default output format: json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Test connectivity:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
aws s3 ls --endpoint-url https://<ACCOUNT_ID>.r2.cloudflarestorage.com --profile r2
|
||||||
|
```
|
||||||
|
|
||||||
|
Add the R2 backup bucket name to your production `.env`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
R2_BACKUP_BUCKET=orion-backups
|
||||||
|
```
|
||||||
|
|
||||||
|
### 17.3 Backup Script
|
||||||
|
|
||||||
|
The backup script at `scripts/backup.sh` handles:
|
||||||
|
|
||||||
|
- `pg_dump` of Orion DB (via `docker exec orion-db-1`)
|
||||||
|
- `pg_dump` of Gitea DB (via `docker exec gitea-db`)
|
||||||
|
- On Sundays: copies daily backup to `weekly/` subdirectory
|
||||||
|
- Rotation: keeps 7 daily, 4 weekly backups
|
||||||
|
- Optional `--upload` flag: syncs to Cloudflare R2
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create backup directories
|
||||||
|
mkdir -p ~/backups/{orion,gitea}/{daily,weekly}
|
||||||
|
|
||||||
|
# Run a manual backup
|
||||||
|
bash ~/apps/orion/scripts/backup.sh
|
||||||
|
|
||||||
|
# Run with R2 upload
|
||||||
|
bash ~/apps/orion/scripts/backup.sh --upload
|
||||||
|
|
||||||
|
# Verify backup integrity
|
||||||
|
ls -lh ~/backups/orion/daily/
|
||||||
|
gunzip -t ~/backups/orion/daily/*.sql.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
### 17.4 Systemd Timer (Daily at 03:00)
|
||||||
|
|
||||||
|
Create the service unit:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo nano /etc/systemd/system/orion-backup.service
|
||||||
|
```
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[Unit]
|
||||||
|
Description=Orion database backup
|
||||||
|
After=docker.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
User=samir
|
||||||
|
ExecStart=/usr/bin/bash /home/samir/apps/orion/scripts/backup.sh --upload
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
```
|
||||||
|
|
||||||
|
Create the timer:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo nano /etc/systemd/system/orion-backup.timer
|
||||||
|
```
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[Unit]
|
||||||
|
Description=Run Orion backup daily at 03:00
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnCalendar=*-*-* 03:00:00
|
||||||
|
Persistent=true
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
||||||
|
```
|
||||||
|
|
||||||
|
Enable and start:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
sudo systemctl enable --now orion-backup.timer
|
||||||
|
|
||||||
|
# Verify timer is active
|
||||||
|
systemctl list-timers orion-backup.timer
|
||||||
|
|
||||||
|
# Test manually
|
||||||
|
sudo systemctl start orion-backup.service
|
||||||
|
journalctl -u orion-backup.service --no-pager
|
||||||
|
```
|
||||||
|
|
||||||
|
### 17.5 Restore Procedure
|
||||||
|
|
||||||
|
The restore script at `scripts/restore.sh` handles the full restore cycle:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Restore Orion database
|
||||||
|
bash ~/apps/orion/scripts/restore.sh orion ~/backups/orion/daily/orion_20260214_030000.sql.gz
|
||||||
|
|
||||||
|
# Restore Gitea database
|
||||||
|
bash ~/apps/orion/scripts/restore.sh gitea ~/backups/gitea/daily/gitea_20260214_030000.sql.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
The script will:
|
||||||
|
|
||||||
|
1. Stop app containers (keep DB running)
|
||||||
|
2. Drop and recreate the database
|
||||||
|
3. Restore from the `.sql.gz` backup
|
||||||
|
4. Run Alembic migrations (Orion only)
|
||||||
|
5. Restart all containers
|
||||||
|
|
||||||
|
To restore from R2 (if local backups are lost):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Download from R2
|
||||||
|
aws s3 sync s3://orion-backups/ ~/backups/ \
|
||||||
|
--endpoint-url https://<ACCOUNT_ID>.r2.cloudflarestorage.com \
|
||||||
|
--profile r2
|
||||||
|
|
||||||
|
# Then restore as usual
|
||||||
|
bash ~/apps/orion/scripts/restore.sh orion ~/backups/orion/daily/<latest>.sql.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
### 17.6 Verification
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Backup files exist
|
||||||
|
ls -lh ~/backups/orion/daily/
|
||||||
|
ls -lh ~/backups/gitea/daily/
|
||||||
|
|
||||||
|
# Backup integrity
|
||||||
|
gunzip -t ~/backups/orion/daily/*.sql.gz
|
||||||
|
|
||||||
|
# Timer is scheduled
|
||||||
|
systemctl list-timers orion-backup.timer
|
||||||
|
|
||||||
|
# R2 sync (if configured)
|
||||||
|
aws s3 ls s3://orion-backups/ --endpoint-url https://<ACCOUNT_ID>.r2.cloudflarestorage.com --profile r2 --recursive
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Step 18: Monitoring & Observability
|
||||||
|
|
||||||
|
Prometheus + Grafana monitoring stack with host and container metrics.
|
||||||
|
|
||||||
|
### Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────────┐ scrape ┌─────────────────┐
|
||||||
|
│ Prometheus │◄────────────────│ Orion API │ /metrics
|
||||||
|
│ :9090 │◄────────────────│ node-exporter │ :9100
|
||||||
|
│ │◄────────────────│ cAdvisor │ :8080
|
||||||
|
└──────┬───────┘ └─────────────────┘
|
||||||
|
│ query
|
||||||
|
┌──────▼───────┐
|
||||||
|
│ Grafana │──── https://grafana.wizard.lu
|
||||||
|
│ :3001 │
|
||||||
|
└──────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Resource Budget (4 GB Server)
|
||||||
|
|
||||||
|
| Container | RAM Limit | Purpose |
|
||||||
|
|---|---|---|
|
||||||
|
| prometheus | 256 MB | Metrics storage (15-day retention, 2 GB max) |
|
||||||
|
| grafana | 192 MB | Dashboards (SQLite backend) |
|
||||||
|
| node-exporter | 64 MB | Host CPU/RAM/disk metrics |
|
||||||
|
| cadvisor | 128 MB | Per-container resource metrics |
|
||||||
|
| **Total new** | **640 MB** | |
|
||||||
|
|
||||||
|
Existing stack ~1.8 GB + 640 MB new = ~2.4 GB. Leaves ~1.6 GB for OS. If too tight, live-upgrade to CAX21 (8 GB/80 GB, ~7.50 EUR/mo) via **Cloud Console > Server > Rescale** (~2 min restart).
|
||||||
|
|
||||||
|
### 18.1 DNS Record
|
||||||
|
|
||||||
|
Add A and AAAA records for `grafana.wizard.lu`:
|
||||||
|
|
||||||
|
| Type | Name | Value | TTL |
|
||||||
|
|---|---|---|---|
|
||||||
|
| A | `grafana` | `91.99.65.229` | 300 |
|
||||||
|
| AAAA | `grafana` | `2a01:4f8:1c1a:b39c::1` | 300 |
|
||||||
|
|
||||||
|
### 18.2 Caddy Configuration
|
||||||
|
|
||||||
|
Add to `/etc/caddy/Caddyfile`:
|
||||||
|
|
||||||
|
```caddy
|
||||||
|
grafana.wizard.lu {
|
||||||
|
reverse_proxy localhost:3001
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Reload Caddy:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo systemctl reload caddy
|
||||||
|
```
|
||||||
|
|
||||||
|
### 18.3 Production Environment
|
||||||
|
|
||||||
|
Add to `~/apps/orion/.env`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ENABLE_METRICS=true
|
||||||
|
GRAFANA_URL=https://grafana.wizard.lu
|
||||||
|
GRAFANA_ADMIN_USER=admin
|
||||||
|
GRAFANA_ADMIN_PASSWORD=<strong-password>
|
||||||
|
```
|
||||||
|
|
||||||
|
### 18.4 Deploy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/apps/orion
|
||||||
|
docker compose --profile full up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify all containers are running:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose --profile full ps
|
||||||
|
docker stats --no-stream
|
||||||
|
```
|
||||||
|
|
||||||
|
### 18.5 Grafana First Login
|
||||||
|
|
||||||
|
1. Open `https://grafana.wizard.lu`
|
||||||
|
2. Login with `admin` / `<password from .env>`
|
||||||
|
3. Change the default password when prompted
|
||||||
|
|
||||||
|
**Import community dashboards:**
|
||||||
|
|
||||||
|
- **Node Exporter Full**: Dashboards > Import > ID `1860` > Select Prometheus datasource
|
||||||
|
- **Docker / cAdvisor**: Dashboards > Import > ID `193` > Select Prometheus datasource
|
||||||
|
|
||||||
|
### 18.6 Verification
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Prometheus metrics from Orion API
|
||||||
|
curl -s https://api.wizard.lu/metrics | head -5
|
||||||
|
|
||||||
|
# Health endpoints
|
||||||
|
curl -s https://api.wizard.lu/health/live
|
||||||
|
curl -s https://api.wizard.lu/health/ready
|
||||||
|
|
||||||
|
# Prometheus targets (all should be "up")
|
||||||
|
curl -s http://localhost:9090/api/v1/targets | python3 -m json.tool | grep health
|
||||||
|
|
||||||
|
# Grafana accessible
|
||||||
|
curl -I https://grafana.wizard.lu
|
||||||
|
|
||||||
|
# RAM usage within limits
|
||||||
|
docker stats --no-stream
|
||||||
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Domain & Port Reference
|
## Domain & Port Reference
|
||||||
@@ -801,6 +1095,10 @@ sudo systemctl status gitea-runner
|
|||||||
| Redis | 6379 | 6380 | (internal only) |
|
| Redis | 6379 | 6380 | (internal only) |
|
||||||
| Flower | 5555 | 5555 | `flower.wizard.lu` |
|
| Flower | 5555 | 5555 | `flower.wizard.lu` |
|
||||||
| Gitea | 3000 | 3000 | `git.wizard.lu` |
|
| Gitea | 3000 | 3000 | `git.wizard.lu` |
|
||||||
|
| Prometheus | 9090 | 9090 (localhost) | (internal only) |
|
||||||
|
| Grafana | 3000 | 3001 (localhost) | `grafana.wizard.lu` |
|
||||||
|
| Node Exporter | 9100 | 9100 (localhost) | (internal only) |
|
||||||
|
| cAdvisor | 8080 | 8080 (localhost) | (internal only) |
|
||||||
| Caddy | — | 80, 443 | (reverse proxy) |
|
| Caddy | — | 80, 443 | (reverse proxy) |
|
||||||
|
|
||||||
!!! note "Single backend, multiple domains"
|
!!! note "Single backend, multiple domains"
|
||||||
@@ -810,15 +1108,23 @@ sudo systemctl status gitea-runner
|
|||||||
|
|
||||||
```
|
```
|
||||||
~/
|
~/
|
||||||
├── gitea/
|
|
||||||
│ └── docker-compose.yml # Gitea + PostgreSQL
|
|
||||||
├── apps/
|
├── apps/
|
||||||
│ └── orion/ # Orion application
|
│ └── orion/ # Orion application
|
||||||
│ ├── .env # Production environment
|
│ ├── .env # Production environment
|
||||||
│ ├── docker-compose.yml # App stack (API, DB, Redis, Celery)
|
│ ├── docker-compose.yml # App stack (API, DB, Redis, Celery, monitoring)
|
||||||
|
│ ├── monitoring/ # Prometheus + Grafana config
|
||||||
│ ├── logs/ # Application logs
|
│ ├── logs/ # Application logs
|
||||||
│ ├── uploads/ # User uploads
|
│ ├── uploads/ # User uploads
|
||||||
│ └── exports/ # Export files
|
│ └── exports/ # Export files
|
||||||
|
├── backups/
|
||||||
|
│ ├── orion/
|
||||||
|
│ │ ├── daily/ # 7-day retention
|
||||||
|
│ │ └── weekly/ # 4-week retention
|
||||||
|
│ └── gitea/
|
||||||
|
│ ├── daily/
|
||||||
|
│ └── weekly/
|
||||||
|
├── gitea/
|
||||||
|
│ └── docker-compose.yml # Gitea + PostgreSQL
|
||||||
└── gitea-runner/ # CI/CD runner (act_runner v0.2.13)
|
└── gitea-runner/ # CI/CD runner (act_runner v0.2.13)
|
||||||
├── act_runner # symlink → act_runner-0.2.13-linux-arm64
|
├── act_runner # symlink → act_runner-0.2.13-linux-arm64
|
||||||
├── act_runner-0.2.13-linux-arm64
|
├── act_runner-0.2.13-linux-arm64
|
||||||
@@ -930,8 +1236,10 @@ After Caddy is configured:
|
|||||||
| API ReDoc | `https://api.wizard.lu/redoc` |
|
| API ReDoc | `https://api.wizard.lu/redoc` |
|
||||||
| Admin panel | `https://wizard.lu/admin/login` |
|
| Admin panel | `https://wizard.lu/admin/login` |
|
||||||
| Health check | `https://api.wizard.lu/health` |
|
| Health check | `https://api.wizard.lu/health` |
|
||||||
|
| Prometheus metrics | `https://api.wizard.lu/metrics` |
|
||||||
| Gitea | `https://git.wizard.lu` |
|
| Gitea | `https://git.wizard.lu` |
|
||||||
| Flower | `https://flower.wizard.lu` |
|
| Flower | `https://flower.wizard.lu` |
|
||||||
|
| Grafana | `https://grafana.wizard.lu` |
|
||||||
| OMS Platform | `https://oms.lu` (after DNS) |
|
| OMS Platform | `https://oms.lu` (after DNS) |
|
||||||
| Loyalty+ Platform | `https://rewardflow.lu` (after DNS) |
|
| Loyalty+ Platform | `https://rewardflow.lu` (after DNS) |
|
||||||
|
|
||||||
|
|||||||
5
main.py
5
main.py
@@ -237,6 +237,11 @@ else:
|
|||||||
# Include API router (JSON endpoints at /api/*)
|
# Include API router (JSON endpoints at /api/*)
|
||||||
app.include_router(api_router, prefix="/api")
|
app.include_router(api_router, prefix="/api")
|
||||||
|
|
||||||
|
# Include observability endpoints (/metrics, /health/live, /health/ready, /health/tools)
|
||||||
|
from app.core.observability import health_router
|
||||||
|
|
||||||
|
app.include_router(health_router)
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# FAVICON ROUTES (Must be registered BEFORE page routers)
|
# FAVICON ROUTES (Must be registered BEFORE page routers)
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|||||||
17
monitoring/grafana/provisioning/dashboards/dashboard.yml
Normal file
17
monitoring/grafana/provisioning/dashboards/dashboard.yml
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
# File-based dashboard provider
|
||||||
|
# Import dashboards via Grafana UI; they'll be saved to the SQLite backend.
|
||||||
|
# Pre-built JSON dashboards can be placed in the json/ subdirectory.
|
||||||
|
# Docs: https://grafana.com/docs/grafana/latest/administration/provisioning/#dashboards
|
||||||
|
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: default
|
||||||
|
orgId: 1
|
||||||
|
folder: ""
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
editable: true
|
||||||
|
options:
|
||||||
|
path: /var/lib/grafana/dashboards
|
||||||
|
foldersFromFilesStructure: false
|
||||||
12
monitoring/grafana/provisioning/datasources/datasource.yml
Normal file
12
monitoring/grafana/provisioning/datasources/datasource.yml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# Auto-provision Prometheus as the default datasource
|
||||||
|
# Docs: https://grafana.com/docs/grafana/latest/administration/provisioning/#datasources
|
||||||
|
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
type: prometheus
|
||||||
|
access: proxy
|
||||||
|
url: http://prometheus:9090
|
||||||
|
isDefault: true
|
||||||
|
editable: true
|
||||||
36
monitoring/prometheus.yml
Normal file
36
monitoring/prometheus.yml
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# Prometheus configuration for Orion platform
|
||||||
|
# Docs: https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
||||||
|
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
# Orion API — /metrics endpoint (prometheus_client)
|
||||||
|
- job_name: "orion-api"
|
||||||
|
metrics_path: /metrics
|
||||||
|
static_configs:
|
||||||
|
- targets: ["api:8000"]
|
||||||
|
labels:
|
||||||
|
service: "orion-api"
|
||||||
|
|
||||||
|
# Node Exporter — host-level CPU, RAM, disk metrics
|
||||||
|
- job_name: "node-exporter"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["node-exporter:9100"]
|
||||||
|
labels:
|
||||||
|
service: "node-exporter"
|
||||||
|
|
||||||
|
# cAdvisor — per-container resource metrics
|
||||||
|
- job_name: "cadvisor"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["cadvisor:8080"]
|
||||||
|
labels:
|
||||||
|
service: "cadvisor"
|
||||||
|
|
||||||
|
# Prometheus self-monitoring
|
||||||
|
- job_name: "prometheus"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["localhost:9090"]
|
||||||
|
labels:
|
||||||
|
service: "prometheus"
|
||||||
@@ -49,5 +49,8 @@ flower==2.0.1
|
|||||||
# Error tracking
|
# Error tracking
|
||||||
sentry-sdk[fastapi]>=2.0.0
|
sentry-sdk[fastapi]>=2.0.0
|
||||||
|
|
||||||
|
# Prometheus metrics
|
||||||
|
prometheus_client>=0.20.0
|
||||||
|
|
||||||
# Cloud storage (S3-compatible - Cloudflare R2)
|
# Cloud storage (S3-compatible - Cloudflare R2)
|
||||||
boto3>=1.34.0
|
boto3>=1.34.0
|
||||||
150
scripts/backup.sh
Executable file
150
scripts/backup.sh
Executable file
@@ -0,0 +1,150 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# scripts/backup.sh — Automated PostgreSQL backup for Orion and Gitea
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash scripts/backup.sh # Local backup only
|
||||||
|
# bash scripts/backup.sh --upload # Local backup + sync to Cloudflare R2
|
||||||
|
#
|
||||||
|
# Cron / systemd timer: runs daily at 03:00
|
||||||
|
# On Sundays: copies daily backup to weekly/
|
||||||
|
# Retention: 7 daily, 4 weekly
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Configuration
|
||||||
|
# =============================================================================
|
||||||
|
BACKUP_ROOT="${HOME}/backups"
|
||||||
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||||
|
DAY_OF_WEEK=$(date +%u) # 1=Monday, 7=Sunday
|
||||||
|
|
||||||
|
# Orion DB settings (from docker-compose.yml)
|
||||||
|
ORION_CONTAINER="orion-db-1"
|
||||||
|
ORION_DB="orion_db"
|
||||||
|
ORION_USER="orion_user"
|
||||||
|
|
||||||
|
# Gitea DB settings (from ~/gitea/docker-compose.yml)
|
||||||
|
GITEA_CONTAINER="gitea-db"
|
||||||
|
GITEA_DB="gitea"
|
||||||
|
GITEA_USER="gitea"
|
||||||
|
|
||||||
|
# R2 settings (loaded from .env if available)
|
||||||
|
ORION_APP_DIR="${HOME}/apps/orion"
|
||||||
|
if [ -f "${ORION_APP_DIR}/.env" ]; then
|
||||||
|
R2_ACCOUNT_ID=$(grep -s '^R2_ACCOUNT_ID=' "${ORION_APP_DIR}/.env" | cut -d= -f2- || true)
|
||||||
|
R2_BACKUP_BUCKET=$(grep -s '^R2_BACKUP_BUCKET=' "${ORION_APP_DIR}/.env" | cut -d= -f2- || true)
|
||||||
|
fi
|
||||||
|
R2_BACKUP_BUCKET="${R2_BACKUP_BUCKET:-orion-backups}"
|
||||||
|
R2_ENDPOINT="https://${R2_ACCOUNT_ID}.r2.cloudflarestorage.com"
|
||||||
|
|
||||||
|
# Retention
|
||||||
|
DAILY_KEEP=7
|
||||||
|
WEEKLY_KEEP=4
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Functions
|
||||||
|
# =============================================================================
|
||||||
|
log() {
|
||||||
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
backup_database() {
|
||||||
|
local container="$1"
|
||||||
|
local db_name="$2"
|
||||||
|
local db_user="$3"
|
||||||
|
local target_dir="$4"
|
||||||
|
local filename="$5"
|
||||||
|
|
||||||
|
mkdir -p "${target_dir}"
|
||||||
|
|
||||||
|
log "Backing up ${db_name} from ${container}..."
|
||||||
|
if docker exec "${container}" pg_dump -U "${db_user}" "${db_name}" | gzip > "${target_dir}/${filename}"; then
|
||||||
|
local size
|
||||||
|
size=$(du -h "${target_dir}/${filename}" | cut -f1)
|
||||||
|
log " OK: ${filename} (${size})"
|
||||||
|
else
|
||||||
|
log " FAILED: ${db_name} backup"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
rotate_backups() {
|
||||||
|
local dir="$1"
|
||||||
|
local keep_days="$2"
|
||||||
|
|
||||||
|
if [ -d "${dir}" ]; then
|
||||||
|
local count
|
||||||
|
count=$(find "${dir}" -name "*.sql.gz" -mtime +"${keep_days}" 2>/dev/null | wc -l)
|
||||||
|
if [ "${count}" -gt 0 ]; then
|
||||||
|
find "${dir}" -name "*.sql.gz" -mtime +"${keep_days}" -delete
|
||||||
|
log " Rotated: removed ${count} old backups from ${dir}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
upload_to_r2() {
|
||||||
|
if [ -z "${R2_ACCOUNT_ID:-}" ]; then
|
||||||
|
log "ERROR: R2_ACCOUNT_ID not set. Cannot upload."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "Syncing backups to R2 bucket: ${R2_BACKUP_BUCKET}..."
|
||||||
|
aws s3 sync "${BACKUP_ROOT}/" "s3://${R2_BACKUP_BUCKET}/" \
|
||||||
|
--endpoint-url "${R2_ENDPOINT}" \
|
||||||
|
--profile r2 \
|
||||||
|
--delete \
|
||||||
|
--exclude "*.tmp"
|
||||||
|
log " OK: R2 sync complete"
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Main
|
||||||
|
# =============================================================================
|
||||||
|
UPLOAD=false
|
||||||
|
if [ "${1:-}" = "--upload" ]; then
|
||||||
|
UPLOAD=true
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "=== Orion Backup Started ==="
|
||||||
|
|
||||||
|
# Ensure backup directories exist
|
||||||
|
mkdir -p "${BACKUP_ROOT}/orion/"{daily,weekly}
|
||||||
|
mkdir -p "${BACKUP_ROOT}/gitea/"{daily,weekly}
|
||||||
|
|
||||||
|
# --- Daily backups ---
|
||||||
|
ERRORS=0
|
||||||
|
|
||||||
|
backup_database "${ORION_CONTAINER}" "${ORION_DB}" "${ORION_USER}" \
|
||||||
|
"${BACKUP_ROOT}/orion/daily" "orion_${TIMESTAMP}.sql.gz" || ERRORS=$((ERRORS + 1))
|
||||||
|
|
||||||
|
backup_database "${GITEA_CONTAINER}" "${GITEA_DB}" "${GITEA_USER}" \
|
||||||
|
"${BACKUP_ROOT}/gitea/daily" "gitea_${TIMESTAMP}.sql.gz" || ERRORS=$((ERRORS + 1))
|
||||||
|
|
||||||
|
# --- Weekly copies (Sunday) ---
|
||||||
|
if [ "${DAY_OF_WEEK}" -eq 7 ]; then
|
||||||
|
log "Sunday: copying to weekly/"
|
||||||
|
cp -f "${BACKUP_ROOT}/orion/daily/orion_${TIMESTAMP}.sql.gz" \
|
||||||
|
"${BACKUP_ROOT}/orion/weekly/" 2>/dev/null || true
|
||||||
|
cp -f "${BACKUP_ROOT}/gitea/daily/gitea_${TIMESTAMP}.sql.gz" \
|
||||||
|
"${BACKUP_ROOT}/gitea/weekly/" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Rotation ---
|
||||||
|
log "Rotating old backups..."
|
||||||
|
rotate_backups "${BACKUP_ROOT}/orion/daily" "${DAILY_KEEP}"
|
||||||
|
rotate_backups "${BACKUP_ROOT}/gitea/daily" "${DAILY_KEEP}"
|
||||||
|
rotate_backups "${BACKUP_ROOT}/orion/weekly" $((WEEKLY_KEEP * 7))
|
||||||
|
rotate_backups "${BACKUP_ROOT}/gitea/weekly" $((WEEKLY_KEEP * 7))
|
||||||
|
|
||||||
|
# --- Optional R2 upload ---
|
||||||
|
if [ "${UPLOAD}" = true ]; then
|
||||||
|
upload_to_r2 || ERRORS=$((ERRORS + 1))
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- Summary ---
|
||||||
|
if [ "${ERRORS}" -eq 0 ]; then
|
||||||
|
log "=== Backup completed successfully ==="
|
||||||
|
else
|
||||||
|
log "=== Backup completed with ${ERRORS} error(s) ==="
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
152
scripts/restore.sh
Executable file
152
scripts/restore.sh
Executable file
@@ -0,0 +1,152 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# scripts/restore.sh — Database restore helper for Orion and Gitea
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# bash scripts/restore.sh orion ~/backups/orion/daily/orion_20260214_030000.sql.gz
|
||||||
|
# bash scripts/restore.sh gitea ~/backups/gitea/daily/gitea_20260214_030000.sql.gz
|
||||||
|
#
|
||||||
|
# What it does:
|
||||||
|
# 1. Stops app containers (keeps DB running)
|
||||||
|
# 2. Drops and recreates the database
|
||||||
|
# 3. Restores from the .sql.gz backup
|
||||||
|
# 4. Runs Alembic migrations (Orion only)
|
||||||
|
# 5. Restarts all containers
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Configuration
|
||||||
|
# =============================================================================
|
||||||
|
ORION_APP_DIR="${HOME}/apps/orion"
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Functions
|
||||||
|
# =============================================================================
|
||||||
|
log() {
|
||||||
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
echo "Usage: $0 <target> <backup-file>"
|
||||||
|
echo ""
|
||||||
|
echo " target: 'orion' or 'gitea'"
|
||||||
|
echo " backup-file: path to .sql.gz file"
|
||||||
|
echo ""
|
||||||
|
echo "Examples:"
|
||||||
|
echo " $0 orion ~/backups/orion/daily/orion_20260214_030000.sql.gz"
|
||||||
|
echo " $0 gitea ~/backups/gitea/daily/gitea_20260214_030000.sql.gz"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
restore_orion() {
|
||||||
|
local backup_file="$1"
|
||||||
|
local container="orion-db-1"
|
||||||
|
local db_name="orion_db"
|
||||||
|
local db_user="orion_user"
|
||||||
|
|
||||||
|
log "=== Restoring Orion database ==="
|
||||||
|
|
||||||
|
# Stop app containers (keep DB running)
|
||||||
|
log "Stopping Orion app containers..."
|
||||||
|
cd "${ORION_APP_DIR}"
|
||||||
|
docker compose --profile full stop api celery-worker celery-beat flower 2>/dev/null || true
|
||||||
|
|
||||||
|
# Drop and recreate database
|
||||||
|
log "Dropping and recreating ${db_name}..."
|
||||||
|
docker exec "${container}" psql -U "${db_user}" -d postgres -c \
|
||||||
|
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${db_name}' AND pid <> pg_backend_pid();" 2>/dev/null || true
|
||||||
|
docker exec "${container}" dropdb -U "${db_user}" --if-exists "${db_name}"
|
||||||
|
docker exec "${container}" createdb -U "${db_user}" "${db_name}"
|
||||||
|
|
||||||
|
# Restore
|
||||||
|
log "Restoring from ${backup_file}..."
|
||||||
|
gunzip -c "${backup_file}" | docker exec -i "${container}" psql -U "${db_user}" -d "${db_name}" --quiet
|
||||||
|
|
||||||
|
# Run migrations
|
||||||
|
log "Running Alembic migrations..."
|
||||||
|
docker compose --profile full start api 2>/dev/null || \
|
||||||
|
docker compose --profile full up -d api
|
||||||
|
sleep 5 # Wait for API container to be ready
|
||||||
|
docker compose --profile full exec -e PYTHONPATH=/app api python -m alembic upgrade heads
|
||||||
|
|
||||||
|
# Restart all
|
||||||
|
log "Restarting all services..."
|
||||||
|
docker compose --profile full up -d
|
||||||
|
|
||||||
|
log "=== Orion restore complete ==="
|
||||||
|
}
|
||||||
|
|
||||||
|
restore_gitea() {
|
||||||
|
local backup_file="$1"
|
||||||
|
local container="gitea-db"
|
||||||
|
local db_name="gitea"
|
||||||
|
local db_user="gitea"
|
||||||
|
local gitea_dir="${HOME}/gitea"
|
||||||
|
|
||||||
|
log "=== Restoring Gitea database ==="
|
||||||
|
|
||||||
|
# Stop Gitea container (keep DB running)
|
||||||
|
log "Stopping Gitea..."
|
||||||
|
cd "${gitea_dir}"
|
||||||
|
docker compose stop gitea 2>/dev/null || true
|
||||||
|
|
||||||
|
# Drop and recreate database
|
||||||
|
log "Dropping and recreating ${db_name}..."
|
||||||
|
docker exec "${container}" psql -U "${db_user}" -d postgres -c \
|
||||||
|
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${db_name}' AND pid <> pg_backend_pid();" 2>/dev/null || true
|
||||||
|
docker exec "${container}" dropdb -U "${db_user}" --if-exists "${db_name}"
|
||||||
|
docker exec "${container}" createdb -U "${db_user}" "${db_name}"
|
||||||
|
|
||||||
|
# Restore
|
||||||
|
log "Restoring from ${backup_file}..."
|
||||||
|
gunzip -c "${backup_file}" | docker exec -i "${container}" psql -U "${db_user}" -d "${db_name}" --quiet
|
||||||
|
|
||||||
|
# Restart Gitea
|
||||||
|
log "Restarting Gitea..."
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
log "=== Gitea restore complete ==="
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Main
|
||||||
|
# =============================================================================
|
||||||
|
if [ $# -lt 2 ]; then
|
||||||
|
usage
|
||||||
|
fi
|
||||||
|
|
||||||
|
TARGET="$1"
|
||||||
|
BACKUP_FILE="$2"
|
||||||
|
|
||||||
|
# Validate backup file
|
||||||
|
if [ ! -f "${BACKUP_FILE}" ]; then
|
||||||
|
log "ERROR: Backup file not found: ${BACKUP_FILE}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! "${BACKUP_FILE}" == *.sql.gz ]]; then
|
||||||
|
log "ERROR: Expected a .sql.gz file, got: ${BACKUP_FILE}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Confirm
|
||||||
|
log "WARNING: This will DROP and RECREATE the ${TARGET} database!"
|
||||||
|
log "Backup file: ${BACKUP_FILE}"
|
||||||
|
read -rp "Continue? (y/N) " confirm
|
||||||
|
if [[ "${confirm}" != [yY] ]]; then
|
||||||
|
log "Aborted."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
case "${TARGET}" in
|
||||||
|
orion)
|
||||||
|
restore_orion "${BACKUP_FILE}"
|
||||||
|
;;
|
||||||
|
gitea)
|
||||||
|
restore_gitea "${BACKUP_FILE}"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
log "ERROR: Unknown target '${TARGET}'. Use 'orion' or 'gitea'."
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
esac
|
||||||
Reference in New Issue
Block a user