feat: add automated backups and Prometheus/Grafana monitoring stack (Steps 17-18)
Some checks failed
Some checks failed
Backups: pg_dump scripts with daily/weekly rotation and Cloudflare R2 offsite sync. Monitoring: Prometheus, Grafana, node-exporter, cAdvisor in docker-compose; /metrics endpoint activated via prometheus_client. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -194,6 +194,12 @@ class Settings(BaseSettings):
|
||||
sentry_environment: str = "development" # development, staging, production
|
||||
sentry_traces_sample_rate: float = 0.1 # 10% of transactions for performance monitoring
|
||||
|
||||
# =============================================================================
|
||||
# MONITORING
|
||||
# =============================================================================
|
||||
enable_metrics: bool = False
|
||||
grafana_url: str = "https://grafana.wizard.lu"
|
||||
|
||||
# =============================================================================
|
||||
# CLOUDFLARE R2 STORAGE
|
||||
# =============================================================================
|
||||
|
||||
@@ -16,8 +16,10 @@ from sqlalchemy import text
|
||||
|
||||
from middleware.auth import AuthManager
|
||||
|
||||
from .config import settings
|
||||
from .database import engine
|
||||
from .logging import setup_logging
|
||||
from .observability import init_observability, shutdown_observability
|
||||
|
||||
# Remove this import if not needed: from models.database.base import Base
|
||||
|
||||
@@ -33,13 +35,22 @@ async def lifespan(app: FastAPI):
|
||||
# === STARTUP ===
|
||||
app_logger = setup_logging()
|
||||
app_logger.info("Starting Orion multi-tenant platform")
|
||||
|
||||
init_observability(
|
||||
enable_metrics=settings.enable_metrics,
|
||||
sentry_dsn=settings.sentry_dsn,
|
||||
environment=settings.sentry_environment,
|
||||
flower_url=settings.flower_url,
|
||||
grafana_url=settings.grafana_url,
|
||||
)
|
||||
|
||||
logger.info("[OK] Application startup completed")
|
||||
|
||||
yield
|
||||
|
||||
# === SHUTDOWN ===
|
||||
app_logger.info("Shutting down Orion platform")
|
||||
# Add cleanup tasks here if needed
|
||||
shutdown_observability()
|
||||
|
||||
|
||||
# === NEW HELPER FUNCTION ===
|
||||
|
||||
@@ -515,17 +515,6 @@ external_tools = ExternalToolConfig()
|
||||
health_router = APIRouter(tags=["Health"])
|
||||
|
||||
|
||||
@health_router.get("/health")
|
||||
async def health_check() -> dict[str, Any]:
|
||||
"""
|
||||
Aggregated health check endpoint.
|
||||
|
||||
Returns combined health status from all registered checks.
|
||||
"""
|
||||
result = health_registry.run_all()
|
||||
return result.to_dict()
|
||||
|
||||
|
||||
@health_router.get("/health/live")
|
||||
async def liveness_check() -> dict[str, str]:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user