refactor: migrate templates and static files to self-contained modules
Templates Migration: - Migrate admin templates to modules (tenancy, billing, monitoring, marketplace, etc.) - Migrate vendor templates to modules (tenancy, billing, orders, messaging, etc.) - Migrate storefront templates to modules (catalog, customers, orders, cart, checkout, cms) - Migrate public templates to modules (billing, marketplace, cms) - Keep shared templates in app/templates/ (base.html, errors/, partials/, macros/) - Migrate letzshop partials to marketplace module Static Files Migration: - Migrate admin JS to modules: tenancy (23 files), core (5 files), monitoring (1 file) - Migrate vendor JS to modules: tenancy (4 files), core (2 files) - Migrate shared JS: vendor-selector.js to core, media-picker.js to cms - Migrate storefront JS: storefront-layout.js to core - Keep framework JS in static/ (api-client, utils, money, icons, log-config, lib/) - Update all template references to use module_static paths Naming Consistency: - Rename static/platform/ to static/public/ - Rename app/templates/platform/ to app/templates/public/ - Update all extends and static references Documentation: - Update module-system.md with shared templates documentation - Update frontend-structure.md with new module JS organization Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -5,12 +5,30 @@ Monitoring module services.
|
||||
This module contains the canonical implementations of monitoring-related services.
|
||||
"""
|
||||
|
||||
from app.modules.monitoring.services.admin_audit_service import (
|
||||
admin_audit_service,
|
||||
AdminAuditService,
|
||||
)
|
||||
from app.modules.monitoring.services.background_tasks_service import (
|
||||
background_tasks_service,
|
||||
BackgroundTasksService,
|
||||
)
|
||||
from app.modules.monitoring.services.log_service import (
|
||||
log_service,
|
||||
LogService,
|
||||
)
|
||||
from app.modules.monitoring.services.platform_health_service import (
|
||||
platform_health_service,
|
||||
PlatformHealthService,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"admin_audit_service",
|
||||
"AdminAuditService",
|
||||
"background_tasks_service",
|
||||
"BackgroundTasksService",
|
||||
"log_service",
|
||||
"LogService",
|
||||
"platform_health_service",
|
||||
"PlatformHealthService",
|
||||
]
|
||||
|
||||
234
app/modules/monitoring/services/admin_audit_service.py
Normal file
234
app/modules/monitoring/services/admin_audit_service.py
Normal file
@@ -0,0 +1,234 @@
|
||||
# app/modules/monitoring/services/admin_audit_service.py
|
||||
"""
|
||||
Admin audit service for tracking admin actions.
|
||||
|
||||
This module provides functions for:
|
||||
- Logging admin actions
|
||||
- Querying audit logs
|
||||
- Generating audit reports
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import and_
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.modules.tenancy.exceptions import AdminOperationException
|
||||
from models.database.admin import AdminAuditLog
|
||||
from models.database.user import User
|
||||
from models.schema.admin import AdminAuditLogFilters, AdminAuditLogResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AdminAuditService:
|
||||
"""Service for admin audit logging."""
|
||||
|
||||
def log_action(
|
||||
self,
|
||||
db: Session,
|
||||
admin_user_id: int,
|
||||
action: str,
|
||||
target_type: str,
|
||||
target_id: str,
|
||||
details: dict[str, Any] | None = None,
|
||||
ip_address: str | None = None,
|
||||
user_agent: str | None = None,
|
||||
request_id: str | None = None,
|
||||
) -> AdminAuditLog | None:
|
||||
"""
|
||||
Log an admin action to the audit trail.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
admin_user_id: ID of the admin performing the action
|
||||
action: Action performed (e.g., 'create_vendor', 'delete_user')
|
||||
target_type: Type of target (e.g., 'vendor', 'user')
|
||||
target_id: ID of the target entity
|
||||
details: Additional context about the action
|
||||
ip_address: IP address of the admin
|
||||
user_agent: User agent string
|
||||
request_id: Request ID for correlation
|
||||
|
||||
Returns:
|
||||
Created AdminAuditLog instance
|
||||
"""
|
||||
try:
|
||||
audit_log = AdminAuditLog(
|
||||
admin_user_id=admin_user_id,
|
||||
action=action,
|
||||
target_type=target_type,
|
||||
target_id=str(target_id),
|
||||
details=details or {},
|
||||
ip_address=ip_address,
|
||||
user_agent=user_agent,
|
||||
request_id=request_id,
|
||||
)
|
||||
|
||||
db.add(audit_log)
|
||||
db.flush()
|
||||
db.refresh(audit_log)
|
||||
|
||||
logger.info(
|
||||
f"Admin action logged: {action} on {target_type}:{target_id} "
|
||||
f"by admin {admin_user_id}"
|
||||
)
|
||||
|
||||
return audit_log
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to log admin action: {str(e)}")
|
||||
# Don't raise exception - audit logging should not break operations
|
||||
return None
|
||||
|
||||
def get_audit_logs(
|
||||
self, db: Session, filters: AdminAuditLogFilters
|
||||
) -> list[AdminAuditLogResponse]:
|
||||
"""
|
||||
Get filtered admin audit logs with pagination.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
filters: Filter criteria for audit logs
|
||||
|
||||
Returns:
|
||||
List of audit log responses
|
||||
"""
|
||||
try:
|
||||
query = db.query(AdminAuditLog).join(
|
||||
User, AdminAuditLog.admin_user_id == User.id
|
||||
)
|
||||
|
||||
# Apply filters
|
||||
conditions = []
|
||||
|
||||
if filters.admin_user_id:
|
||||
conditions.append(AdminAuditLog.admin_user_id == filters.admin_user_id)
|
||||
|
||||
if filters.action:
|
||||
conditions.append(AdminAuditLog.action.ilike(f"%{filters.action}%"))
|
||||
|
||||
if filters.target_type:
|
||||
conditions.append(AdminAuditLog.target_type == filters.target_type)
|
||||
|
||||
if filters.date_from:
|
||||
conditions.append(AdminAuditLog.created_at >= filters.date_from)
|
||||
|
||||
if filters.date_to:
|
||||
conditions.append(AdminAuditLog.created_at <= filters.date_to)
|
||||
|
||||
if conditions:
|
||||
query = query.filter(and_(*conditions))
|
||||
|
||||
# Execute query with pagination
|
||||
logs = (
|
||||
query.order_by(AdminAuditLog.created_at.desc())
|
||||
.offset(filters.skip)
|
||||
.limit(filters.limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Convert to response models
|
||||
return [
|
||||
AdminAuditLogResponse(
|
||||
id=log.id,
|
||||
admin_user_id=log.admin_user_id,
|
||||
admin_username=log.admin_user.username if log.admin_user else None,
|
||||
action=log.action,
|
||||
target_type=log.target_type,
|
||||
target_id=log.target_id,
|
||||
details=log.details,
|
||||
ip_address=log.ip_address,
|
||||
user_agent=log.user_agent,
|
||||
request_id=log.request_id,
|
||||
created_at=log.created_at,
|
||||
)
|
||||
for log in logs
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to retrieve audit logs: {str(e)}")
|
||||
raise AdminOperationException(
|
||||
operation="get_audit_logs", reason="Database query failed"
|
||||
)
|
||||
|
||||
def get_audit_logs_count(self, db: Session, filters: AdminAuditLogFilters) -> int:
|
||||
"""Get total count of audit logs matching filters."""
|
||||
try:
|
||||
query = db.query(AdminAuditLog)
|
||||
|
||||
# Apply same filters as get_audit_logs
|
||||
conditions = []
|
||||
|
||||
if filters.admin_user_id:
|
||||
conditions.append(AdminAuditLog.admin_user_id == filters.admin_user_id)
|
||||
|
||||
if filters.action:
|
||||
conditions.append(AdminAuditLog.action.ilike(f"%{filters.action}%"))
|
||||
|
||||
if filters.target_type:
|
||||
conditions.append(AdminAuditLog.target_type == filters.target_type)
|
||||
|
||||
if filters.date_from:
|
||||
conditions.append(AdminAuditLog.created_at >= filters.date_from)
|
||||
|
||||
if filters.date_to:
|
||||
conditions.append(AdminAuditLog.created_at <= filters.date_to)
|
||||
|
||||
if conditions:
|
||||
query = query.filter(and_(*conditions))
|
||||
|
||||
return query.count()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to count audit logs: {str(e)}")
|
||||
return 0
|
||||
|
||||
def get_recent_actions_by_admin(
|
||||
self, db: Session, admin_user_id: int, limit: int = 10
|
||||
) -> list[AdminAuditLogResponse]:
|
||||
"""Get recent actions by a specific admin."""
|
||||
filters = AdminAuditLogFilters(admin_user_id=admin_user_id, limit=limit)
|
||||
return self.get_audit_logs(db, filters)
|
||||
|
||||
def get_actions_by_target(
|
||||
self, db: Session, target_type: str, target_id: str, limit: int = 50
|
||||
) -> list[AdminAuditLogResponse]:
|
||||
"""Get all actions performed on a specific target."""
|
||||
try:
|
||||
logs = (
|
||||
db.query(AdminAuditLog)
|
||||
.filter(
|
||||
and_(
|
||||
AdminAuditLog.target_type == target_type,
|
||||
AdminAuditLog.target_id == str(target_id),
|
||||
)
|
||||
)
|
||||
.order_by(AdminAuditLog.created_at.desc())
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
return [
|
||||
AdminAuditLogResponse(
|
||||
id=log.id,
|
||||
admin_user_id=log.admin_user_id,
|
||||
admin_username=log.admin_user.username if log.admin_user else None,
|
||||
action=log.action,
|
||||
target_type=log.target_type,
|
||||
target_id=log.target_id,
|
||||
details=log.details,
|
||||
ip_address=log.ip_address,
|
||||
created_at=log.created_at,
|
||||
)
|
||||
for log in logs
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get actions by target: {str(e)}")
|
||||
return []
|
||||
|
||||
|
||||
# Create service instance
|
||||
admin_audit_service = AdminAuditService()
|
||||
387
app/modules/monitoring/services/log_service.py
Normal file
387
app/modules/monitoring/services/log_service.py
Normal file
@@ -0,0 +1,387 @@
|
||||
# app/modules/monitoring/services/log_service.py
|
||||
"""
|
||||
Log management service for viewing and managing application logs.
|
||||
|
||||
This module provides functions for:
|
||||
- Querying database logs with filters
|
||||
- Reading file logs
|
||||
- Log statistics and analytics
|
||||
- Log retention and cleanup
|
||||
- Downloading log files
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy import and_, func, or_
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import settings
|
||||
from app.exceptions import ResourceNotFoundException
|
||||
from app.modules.tenancy.exceptions import AdminOperationException
|
||||
from models.database.admin import ApplicationLog
|
||||
from models.schema.admin import (
|
||||
ApplicationLogFilters,
|
||||
ApplicationLogListResponse,
|
||||
ApplicationLogResponse,
|
||||
FileLogResponse,
|
||||
LogStatistics,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LogService:
|
||||
"""Service for managing application logs."""
|
||||
|
||||
def get_database_logs(
|
||||
self, db: Session, filters: ApplicationLogFilters
|
||||
) -> ApplicationLogListResponse:
|
||||
"""
|
||||
Get logs from database with filtering and pagination.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
filters: Filter criteria
|
||||
|
||||
Returns:
|
||||
Paginated list of logs
|
||||
"""
|
||||
try:
|
||||
query = db.query(ApplicationLog)
|
||||
|
||||
# Apply filters
|
||||
conditions = []
|
||||
|
||||
if filters.level:
|
||||
conditions.append(ApplicationLog.level == filters.level.upper())
|
||||
|
||||
if filters.logger_name:
|
||||
conditions.append(
|
||||
ApplicationLog.logger_name.like(f"%{filters.logger_name}%")
|
||||
)
|
||||
|
||||
if filters.module:
|
||||
conditions.append(ApplicationLog.module.like(f"%{filters.module}%"))
|
||||
|
||||
if filters.user_id:
|
||||
conditions.append(ApplicationLog.user_id == filters.user_id)
|
||||
|
||||
if filters.vendor_id:
|
||||
conditions.append(ApplicationLog.vendor_id == filters.vendor_id)
|
||||
|
||||
if filters.date_from:
|
||||
conditions.append(ApplicationLog.timestamp >= filters.date_from)
|
||||
|
||||
if filters.date_to:
|
||||
conditions.append(ApplicationLog.timestamp <= filters.date_to)
|
||||
|
||||
if filters.search:
|
||||
search_pattern = f"%{filters.search}%"
|
||||
conditions.append(
|
||||
or_(
|
||||
ApplicationLog.message.like(search_pattern),
|
||||
ApplicationLog.exception_message.like(search_pattern),
|
||||
)
|
||||
)
|
||||
|
||||
if conditions:
|
||||
query = query.filter(and_(*conditions))
|
||||
|
||||
# Get total count
|
||||
total = query.count()
|
||||
|
||||
# Apply pagination and sorting
|
||||
logs = (
|
||||
query.order_by(ApplicationLog.timestamp.desc())
|
||||
.offset(filters.skip)
|
||||
.limit(filters.limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
return ApplicationLogListResponse(
|
||||
logs=[ApplicationLogResponse.model_validate(log) for log in logs],
|
||||
total=total,
|
||||
skip=filters.skip,
|
||||
limit=filters.limit,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get database logs: {e}")
|
||||
raise AdminOperationException(
|
||||
operation="get_database_logs", reason=f"Database query failed: {str(e)}"
|
||||
)
|
||||
|
||||
def get_log_statistics(self, db: Session, days: int = 7) -> LogStatistics:
|
||||
"""
|
||||
Get statistics about logs from the last N days.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
days: Number of days to analyze
|
||||
|
||||
Returns:
|
||||
Log statistics
|
||||
"""
|
||||
try:
|
||||
cutoff_date = datetime.now(UTC) - timedelta(days=days)
|
||||
|
||||
# Total counts
|
||||
total_count = (
|
||||
db.query(func.count(ApplicationLog.id))
|
||||
.filter(ApplicationLog.timestamp >= cutoff_date)
|
||||
.scalar()
|
||||
)
|
||||
|
||||
warning_count = (
|
||||
db.query(func.count(ApplicationLog.id))
|
||||
.filter(
|
||||
and_(
|
||||
ApplicationLog.timestamp >= cutoff_date,
|
||||
ApplicationLog.level == "WARNING",
|
||||
)
|
||||
)
|
||||
.scalar()
|
||||
)
|
||||
|
||||
error_count = (
|
||||
db.query(func.count(ApplicationLog.id))
|
||||
.filter(
|
||||
and_(
|
||||
ApplicationLog.timestamp >= cutoff_date,
|
||||
ApplicationLog.level == "ERROR",
|
||||
)
|
||||
)
|
||||
.scalar()
|
||||
)
|
||||
|
||||
critical_count = (
|
||||
db.query(func.count(ApplicationLog.id))
|
||||
.filter(
|
||||
and_(
|
||||
ApplicationLog.timestamp >= cutoff_date,
|
||||
ApplicationLog.level == "CRITICAL",
|
||||
)
|
||||
)
|
||||
.scalar()
|
||||
)
|
||||
|
||||
# Count by level
|
||||
by_level_raw = (
|
||||
db.query(ApplicationLog.level, func.count(ApplicationLog.id))
|
||||
.filter(ApplicationLog.timestamp >= cutoff_date)
|
||||
.group_by(ApplicationLog.level)
|
||||
.all()
|
||||
)
|
||||
by_level = {level: count for level, count in by_level_raw}
|
||||
|
||||
# Count by module (top 10)
|
||||
by_module_raw = (
|
||||
db.query(ApplicationLog.module, func.count(ApplicationLog.id))
|
||||
.filter(ApplicationLog.timestamp >= cutoff_date)
|
||||
.filter(ApplicationLog.module.isnot(None))
|
||||
.group_by(ApplicationLog.module)
|
||||
.order_by(func.count(ApplicationLog.id).desc())
|
||||
.limit(10)
|
||||
.all()
|
||||
)
|
||||
by_module = {module: count for module, count in by_module_raw}
|
||||
|
||||
# Recent errors (last 5)
|
||||
recent_errors = (
|
||||
db.query(ApplicationLog)
|
||||
.filter(
|
||||
and_(
|
||||
ApplicationLog.timestamp >= cutoff_date,
|
||||
ApplicationLog.level.in_(["ERROR", "CRITICAL"]),
|
||||
)
|
||||
)
|
||||
.order_by(ApplicationLog.timestamp.desc())
|
||||
.limit(5)
|
||||
.all()
|
||||
)
|
||||
|
||||
return LogStatistics(
|
||||
total_count=total_count or 0,
|
||||
warning_count=warning_count or 0,
|
||||
error_count=error_count or 0,
|
||||
critical_count=critical_count or 0,
|
||||
by_level=by_level,
|
||||
by_module=by_module,
|
||||
recent_errors=[
|
||||
ApplicationLogResponse.model_validate(log) for log in recent_errors
|
||||
],
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get log statistics: {e}")
|
||||
raise AdminOperationException(
|
||||
operation="get_log_statistics",
|
||||
reason=f"Database query failed: {str(e)}",
|
||||
)
|
||||
|
||||
def get_file_logs(
|
||||
self, filename: str = "app.log", lines: int = 500
|
||||
) -> FileLogResponse:
|
||||
"""
|
||||
Read logs from file.
|
||||
|
||||
Args:
|
||||
filename: Log filename (default: app.log)
|
||||
lines: Number of lines to return from end of file
|
||||
|
||||
Returns:
|
||||
File log content
|
||||
"""
|
||||
try:
|
||||
# Determine log file path
|
||||
log_file_path = settings.log_file
|
||||
if log_file_path:
|
||||
log_file = Path(log_file_path)
|
||||
else:
|
||||
log_file = Path("logs") / "app.log"
|
||||
|
||||
# Allow reading backup files
|
||||
if filename != "app.log":
|
||||
log_file = log_file.parent / filename
|
||||
|
||||
if not log_file.exists():
|
||||
raise ResourceNotFoundException(
|
||||
resource_type="log_file", identifier=str(log_file)
|
||||
)
|
||||
|
||||
# Get file stats
|
||||
stat = log_file.stat()
|
||||
|
||||
# Read last N lines efficiently
|
||||
with open(log_file, encoding="utf-8", errors="replace") as f:
|
||||
# For large files, seek to end and read backwards
|
||||
all_lines = f.readlines()
|
||||
log_lines = all_lines[-lines:] if len(all_lines) > lines else all_lines
|
||||
|
||||
return FileLogResponse(
|
||||
filename=log_file.name,
|
||||
size_bytes=stat.st_size,
|
||||
last_modified=datetime.fromtimestamp(stat.st_mtime, tz=UTC),
|
||||
lines=[line.rstrip("\n") for line in log_lines],
|
||||
total_lines=len(all_lines),
|
||||
)
|
||||
|
||||
except ResourceNotFoundException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read log file: {e}")
|
||||
raise AdminOperationException(
|
||||
operation="get_file_logs", reason=f"File read failed: {str(e)}"
|
||||
)
|
||||
|
||||
def list_log_files(self) -> list[dict]:
|
||||
"""
|
||||
List all available log files.
|
||||
|
||||
Returns:
|
||||
List of log file info (name, size, modified date)
|
||||
"""
|
||||
try:
|
||||
# Determine log directory
|
||||
log_file_path = settings.log_file
|
||||
if log_file_path:
|
||||
log_dir = Path(log_file_path).parent
|
||||
else:
|
||||
log_dir = Path("logs")
|
||||
|
||||
if not log_dir.exists():
|
||||
return []
|
||||
|
||||
files = []
|
||||
for log_file in log_dir.glob("*.log*"):
|
||||
if log_file.is_file():
|
||||
stat = log_file.stat()
|
||||
files.append(
|
||||
{
|
||||
"filename": log_file.name,
|
||||
"size_bytes": stat.st_size,
|
||||
"size_mb": round(stat.st_size / (1024 * 1024), 2),
|
||||
"last_modified": datetime.fromtimestamp(
|
||||
stat.st_mtime, tz=UTC
|
||||
).isoformat(),
|
||||
}
|
||||
)
|
||||
|
||||
# Sort by modified date (newest first)
|
||||
files.sort(key=lambda x: x["last_modified"], reverse=True)
|
||||
|
||||
return files
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list log files: {e}")
|
||||
raise AdminOperationException(
|
||||
operation="list_log_files", reason=f"Directory read failed: {str(e)}"
|
||||
)
|
||||
|
||||
def cleanup_old_logs(self, db: Session, retention_days: int) -> int:
|
||||
"""
|
||||
Delete logs older than retention period from database.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
retention_days: Days to retain logs
|
||||
|
||||
Returns:
|
||||
Number of logs deleted
|
||||
"""
|
||||
try:
|
||||
cutoff_date = datetime.now(UTC) - timedelta(days=retention_days)
|
||||
|
||||
deleted_count = (
|
||||
db.query(ApplicationLog)
|
||||
.filter(ApplicationLog.timestamp < cutoff_date)
|
||||
.delete()
|
||||
)
|
||||
|
||||
db.commit()
|
||||
|
||||
logger.info(
|
||||
f"Cleaned up {deleted_count} logs older than {retention_days} days"
|
||||
)
|
||||
|
||||
return deleted_count
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.error(f"Failed to cleanup old logs: {e}")
|
||||
raise AdminOperationException(
|
||||
operation="cleanup_old_logs",
|
||||
reason=f"Delete operation failed: {str(e)}",
|
||||
)
|
||||
|
||||
def delete_log(self, db: Session, log_id: int) -> str:
|
||||
"""Delete a specific log entry."""
|
||||
try:
|
||||
log_entry = (
|
||||
db.query(ApplicationLog).filter(ApplicationLog.id == log_id).first()
|
||||
)
|
||||
|
||||
if not log_entry:
|
||||
raise ResourceNotFoundException(
|
||||
resource_type="log", identifier=str(log_id)
|
||||
)
|
||||
|
||||
db.delete(log_entry)
|
||||
db.commit()
|
||||
|
||||
return f"Log entry {log_id} deleted successfully"
|
||||
|
||||
except ResourceNotFoundException:
|
||||
raise
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.error(f"Failed to delete log {log_id}: {e}")
|
||||
raise AdminOperationException(
|
||||
operation="delete_log", reason=f"Delete operation failed: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# Create service instance
|
||||
log_service = LogService()
|
||||
525
app/modules/monitoring/services/platform_health_service.py
Normal file
525
app/modules/monitoring/services/platform_health_service.py
Normal file
@@ -0,0 +1,525 @@
|
||||
# app/modules/monitoring/services/platform_health_service.py
|
||||
"""
|
||||
Platform health and capacity monitoring service.
|
||||
|
||||
Provides:
|
||||
- System resource metrics (CPU, memory, disk)
|
||||
- Database metrics and statistics
|
||||
- Capacity threshold calculations
|
||||
- Scaling recommendations
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
import psutil
|
||||
from sqlalchemy import func, text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.modules.core.services.image_service import image_service
|
||||
from app.modules.inventory.models import Inventory
|
||||
from app.modules.orders.models import Order
|
||||
from app.modules.catalog.models import Product
|
||||
from models.database.vendor import Vendor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Thresholds Configuration
|
||||
# ============================================================================
|
||||
|
||||
CAPACITY_THRESHOLDS = {
|
||||
"products_total": {
|
||||
"warning": 400_000,
|
||||
"critical": 475_000,
|
||||
"limit": 500_000,
|
||||
},
|
||||
"storage_gb": {
|
||||
"warning": 800,
|
||||
"critical": 950,
|
||||
"limit": 1000,
|
||||
},
|
||||
"db_size_mb": {
|
||||
"warning": 20_000,
|
||||
"critical": 24_000,
|
||||
"limit": 25_000,
|
||||
},
|
||||
"disk_percent": {
|
||||
"warning": 70,
|
||||
"critical": 85,
|
||||
"limit": 100,
|
||||
},
|
||||
"memory_percent": {
|
||||
"warning": 75,
|
||||
"critical": 90,
|
||||
"limit": 100,
|
||||
},
|
||||
"cpu_percent": {
|
||||
"warning": 70,
|
||||
"critical": 85,
|
||||
"limit": 100,
|
||||
},
|
||||
}
|
||||
|
||||
INFRASTRUCTURE_TIERS = [
|
||||
{"name": "Starter", "max_clients": 50, "max_products": 10_000},
|
||||
{"name": "Small", "max_clients": 100, "max_products": 30_000},
|
||||
{"name": "Medium", "max_clients": 300, "max_products": 100_000},
|
||||
{"name": "Large", "max_clients": 500, "max_products": 250_000},
|
||||
{"name": "Scale", "max_clients": 1000, "max_products": 500_000},
|
||||
{"name": "Enterprise", "max_clients": None, "max_products": None},
|
||||
]
|
||||
|
||||
|
||||
class PlatformHealthService:
|
||||
"""Service for platform health and capacity monitoring."""
|
||||
|
||||
def get_system_metrics(self) -> dict:
|
||||
"""Get current system resource metrics."""
|
||||
cpu_percent = psutil.cpu_percent(interval=0.1)
|
||||
memory = psutil.virtual_memory()
|
||||
disk = psutil.disk_usage("/")
|
||||
|
||||
return {
|
||||
"cpu_percent": cpu_percent,
|
||||
"memory_percent": memory.percent,
|
||||
"memory_used_gb": round(memory.used / (1024**3), 2),
|
||||
"memory_total_gb": round(memory.total / (1024**3), 2),
|
||||
"disk_percent": disk.percent,
|
||||
"disk_used_gb": round(disk.used / (1024**3), 2),
|
||||
"disk_total_gb": round(disk.total / (1024**3), 2),
|
||||
}
|
||||
|
||||
def get_database_metrics(self, db: Session) -> dict:
|
||||
"""Get database statistics."""
|
||||
products_count = db.query(func.count(Product.id)).scalar() or 0
|
||||
orders_count = db.query(func.count(Order.id)).scalar() or 0
|
||||
vendors_count = db.query(func.count(Vendor.id)).scalar() or 0
|
||||
inventory_count = db.query(func.count(Inventory.id)).scalar() or 0
|
||||
|
||||
db_size = self._get_database_size(db)
|
||||
|
||||
return {
|
||||
"size_mb": db_size,
|
||||
"products_count": products_count,
|
||||
"orders_count": orders_count,
|
||||
"vendors_count": vendors_count,
|
||||
"inventory_count": inventory_count,
|
||||
}
|
||||
|
||||
def get_image_storage_metrics(self) -> dict:
|
||||
"""Get image storage statistics."""
|
||||
stats = image_service.get_storage_stats()
|
||||
return {
|
||||
"total_files": stats["total_files"],
|
||||
"total_size_mb": stats["total_size_mb"],
|
||||
"total_size_gb": stats["total_size_gb"],
|
||||
"max_files_per_dir": stats["max_files_per_dir"],
|
||||
"products_estimated": stats["products_estimated"],
|
||||
}
|
||||
|
||||
def get_capacity_metrics(self, db: Session) -> dict:
|
||||
"""Get capacity-focused metrics for planning."""
|
||||
# Products total
|
||||
products_total = db.query(func.count(Product.id)).scalar() or 0
|
||||
|
||||
# Products by vendor
|
||||
vendor_counts = (
|
||||
db.query(Vendor.name, func.count(Product.id))
|
||||
.join(Product, Vendor.id == Product.vendor_id)
|
||||
.group_by(Vendor.name)
|
||||
.all()
|
||||
)
|
||||
products_by_vendor = {name or "Unknown": count for name, count in vendor_counts}
|
||||
|
||||
# Image storage
|
||||
image_stats = image_service.get_storage_stats()
|
||||
|
||||
# Database size
|
||||
db_size = self._get_database_size(db)
|
||||
|
||||
# Orders this month
|
||||
start_of_month = datetime.utcnow().replace(day=1, hour=0, minute=0, second=0)
|
||||
orders_this_month = (
|
||||
db.query(func.count(Order.id))
|
||||
.filter(Order.created_at >= start_of_month)
|
||||
.scalar()
|
||||
or 0
|
||||
)
|
||||
|
||||
# Active vendors
|
||||
active_vendors = (
|
||||
db.query(func.count(Vendor.id))
|
||||
.filter(Vendor.is_active == True) # noqa: E712
|
||||
.scalar()
|
||||
or 0
|
||||
)
|
||||
|
||||
return {
|
||||
"products_total": products_total,
|
||||
"products_by_vendor": products_by_vendor,
|
||||
"images_total": image_stats["total_files"],
|
||||
"storage_used_gb": image_stats["total_size_gb"],
|
||||
"database_size_mb": db_size,
|
||||
"orders_this_month": orders_this_month,
|
||||
"active_vendors": active_vendors,
|
||||
}
|
||||
|
||||
def get_subscription_capacity(self, db: Session) -> dict:
|
||||
"""
|
||||
Calculate theoretical capacity based on all vendor subscriptions.
|
||||
|
||||
Returns aggregated limits and current usage for capacity planning.
|
||||
"""
|
||||
from app.modules.billing.models import VendorSubscription
|
||||
from models.database.vendor import VendorUser
|
||||
|
||||
# Get all active subscriptions with their limits
|
||||
subscriptions = (
|
||||
db.query(VendorSubscription)
|
||||
.filter(VendorSubscription.status.in_(["active", "trial"]))
|
||||
.all()
|
||||
)
|
||||
|
||||
# Aggregate theoretical limits
|
||||
total_products_limit = 0
|
||||
total_orders_limit = 0
|
||||
total_team_limit = 0
|
||||
unlimited_products = 0
|
||||
unlimited_orders = 0
|
||||
unlimited_team = 0
|
||||
|
||||
tier_distribution = {}
|
||||
|
||||
for sub in subscriptions:
|
||||
# Track tier distribution
|
||||
tier = sub.tier or "unknown"
|
||||
tier_distribution[tier] = tier_distribution.get(tier, 0) + 1
|
||||
|
||||
# Aggregate limits
|
||||
if sub.products_limit is None:
|
||||
unlimited_products += 1
|
||||
else:
|
||||
total_products_limit += sub.products_limit
|
||||
|
||||
if sub.orders_limit is None:
|
||||
unlimited_orders += 1
|
||||
else:
|
||||
total_orders_limit += sub.orders_limit
|
||||
|
||||
if sub.team_members_limit is None:
|
||||
unlimited_team += 1
|
||||
else:
|
||||
total_team_limit += sub.team_members_limit
|
||||
|
||||
# Get actual usage
|
||||
actual_products = db.query(func.count(Product.id)).scalar() or 0
|
||||
actual_team = (
|
||||
db.query(func.count(VendorUser.id))
|
||||
.filter(VendorUser.is_active == True) # noqa: E712
|
||||
.scalar()
|
||||
or 0
|
||||
)
|
||||
|
||||
# Orders this period (aggregate across all subscriptions)
|
||||
total_orders_used = sum(s.orders_this_period for s in subscriptions)
|
||||
|
||||
def calc_utilization(actual: int, limit: int, unlimited: int) -> dict:
|
||||
if unlimited > 0:
|
||||
# Some subscriptions have unlimited - can't calculate true %
|
||||
return {
|
||||
"actual": actual,
|
||||
"theoretical_limit": limit,
|
||||
"unlimited_count": unlimited,
|
||||
"utilization_percent": None,
|
||||
"has_unlimited": True,
|
||||
}
|
||||
elif limit > 0:
|
||||
return {
|
||||
"actual": actual,
|
||||
"theoretical_limit": limit,
|
||||
"unlimited_count": 0,
|
||||
"utilization_percent": round((actual / limit) * 100, 1),
|
||||
"headroom": limit - actual,
|
||||
"has_unlimited": False,
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"actual": actual,
|
||||
"theoretical_limit": 0,
|
||||
"unlimited_count": 0,
|
||||
"utilization_percent": 0,
|
||||
"has_unlimited": False,
|
||||
}
|
||||
|
||||
return {
|
||||
"total_subscriptions": len(subscriptions),
|
||||
"tier_distribution": tier_distribution,
|
||||
"products": calc_utilization(actual_products, total_products_limit, unlimited_products),
|
||||
"orders_monthly": calc_utilization(total_orders_used, total_orders_limit, unlimited_orders),
|
||||
"team_members": calc_utilization(actual_team, total_team_limit, unlimited_team),
|
||||
}
|
||||
|
||||
def get_full_health_report(self, db: Session) -> dict:
|
||||
"""Get comprehensive platform health report."""
|
||||
# System metrics
|
||||
system = self.get_system_metrics()
|
||||
|
||||
# Database metrics
|
||||
database = self.get_database_metrics(db)
|
||||
|
||||
# Image storage metrics
|
||||
image_storage = self.get_image_storage_metrics()
|
||||
|
||||
# Subscription capacity
|
||||
subscription_capacity = self.get_subscription_capacity(db)
|
||||
|
||||
# Calculate thresholds
|
||||
thresholds = self._calculate_thresholds(system, database, image_storage)
|
||||
|
||||
# Generate recommendations
|
||||
recommendations = self._generate_recommendations(thresholds, database)
|
||||
|
||||
# Determine infrastructure tier
|
||||
tier, next_trigger = self._determine_tier(
|
||||
database["vendors_count"], database["products_count"]
|
||||
)
|
||||
|
||||
# Overall status
|
||||
overall_status = self._determine_overall_status(thresholds)
|
||||
|
||||
return {
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"overall_status": overall_status,
|
||||
"system": system,
|
||||
"database": database,
|
||||
"image_storage": image_storage,
|
||||
"subscription_capacity": subscription_capacity,
|
||||
"thresholds": thresholds,
|
||||
"recommendations": recommendations,
|
||||
"infrastructure_tier": tier,
|
||||
"next_tier_trigger": next_trigger,
|
||||
}
|
||||
|
||||
def _get_database_size(self, db: Session) -> float:
|
||||
"""Get database size in MB."""
|
||||
try:
|
||||
result = db.execute(text("SELECT pg_database_size(current_database())"))
|
||||
row = result.fetchone()
|
||||
if row:
|
||||
return round(row[0] / (1024 * 1024), 2)
|
||||
except Exception:
|
||||
logger.warning("Failed to get database size")
|
||||
return 0.0
|
||||
|
||||
return 0.0
|
||||
|
||||
def _calculate_thresholds(
|
||||
self, system: dict, database: dict, image_storage: dict
|
||||
) -> list[dict]:
|
||||
"""Calculate threshold status for each metric."""
|
||||
thresholds = []
|
||||
|
||||
# Products threshold
|
||||
products_config = CAPACITY_THRESHOLDS["products_total"]
|
||||
thresholds.append(
|
||||
self._create_threshold(
|
||||
"Products",
|
||||
database["products_count"],
|
||||
products_config["warning"],
|
||||
products_config["critical"],
|
||||
products_config["limit"],
|
||||
)
|
||||
)
|
||||
|
||||
# Storage threshold
|
||||
storage_config = CAPACITY_THRESHOLDS["storage_gb"]
|
||||
thresholds.append(
|
||||
self._create_threshold(
|
||||
"Image Storage (GB)",
|
||||
image_storage["total_size_gb"],
|
||||
storage_config["warning"],
|
||||
storage_config["critical"],
|
||||
storage_config["limit"],
|
||||
)
|
||||
)
|
||||
|
||||
# Database size threshold
|
||||
db_config = CAPACITY_THRESHOLDS["db_size_mb"]
|
||||
thresholds.append(
|
||||
self._create_threshold(
|
||||
"Database (MB)",
|
||||
database["size_mb"],
|
||||
db_config["warning"],
|
||||
db_config["critical"],
|
||||
db_config["limit"],
|
||||
)
|
||||
)
|
||||
|
||||
# Disk threshold
|
||||
disk_config = CAPACITY_THRESHOLDS["disk_percent"]
|
||||
thresholds.append(
|
||||
self._create_threshold(
|
||||
"Disk Usage (%)",
|
||||
system["disk_percent"],
|
||||
disk_config["warning"],
|
||||
disk_config["critical"],
|
||||
disk_config["limit"],
|
||||
)
|
||||
)
|
||||
|
||||
# Memory threshold
|
||||
memory_config = CAPACITY_THRESHOLDS["memory_percent"]
|
||||
thresholds.append(
|
||||
self._create_threshold(
|
||||
"Memory Usage (%)",
|
||||
system["memory_percent"],
|
||||
memory_config["warning"],
|
||||
memory_config["critical"],
|
||||
memory_config["limit"],
|
||||
)
|
||||
)
|
||||
|
||||
# CPU threshold
|
||||
cpu_config = CAPACITY_THRESHOLDS["cpu_percent"]
|
||||
thresholds.append(
|
||||
self._create_threshold(
|
||||
"CPU Usage (%)",
|
||||
system["cpu_percent"],
|
||||
cpu_config["warning"],
|
||||
cpu_config["critical"],
|
||||
cpu_config["limit"],
|
||||
)
|
||||
)
|
||||
|
||||
return thresholds
|
||||
|
||||
def _create_threshold(
|
||||
self, name: str, current: float, warning: float, critical: float, limit: float
|
||||
) -> dict:
|
||||
"""Create a threshold status object."""
|
||||
percent_used = (current / limit) * 100 if limit > 0 else 0
|
||||
|
||||
if current >= critical:
|
||||
status = "critical"
|
||||
elif current >= warning:
|
||||
status = "warning"
|
||||
else:
|
||||
status = "ok"
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"current": current,
|
||||
"warning": warning,
|
||||
"critical": critical,
|
||||
"limit": limit,
|
||||
"status": status,
|
||||
"percent_used": round(percent_used, 1),
|
||||
}
|
||||
|
||||
def _generate_recommendations(
|
||||
self, thresholds: list[dict], database: dict
|
||||
) -> list[dict]:
|
||||
"""Generate scaling recommendations based on thresholds."""
|
||||
recommendations = []
|
||||
|
||||
for threshold in thresholds:
|
||||
if threshold["status"] == "critical":
|
||||
recommendations.append(
|
||||
{
|
||||
"priority": "critical",
|
||||
"title": f"{threshold['name']} at critical level",
|
||||
"description": (
|
||||
f"Currently at {threshold['percent_used']:.0f}% of capacity "
|
||||
f"({threshold['current']:.0f} of {threshold['limit']:.0f})"
|
||||
),
|
||||
"action": "Immediate scaling or cleanup required",
|
||||
}
|
||||
)
|
||||
elif threshold["status"] == "warning":
|
||||
recommendations.append(
|
||||
{
|
||||
"priority": "warning",
|
||||
"title": f"{threshold['name']} approaching limit",
|
||||
"description": (
|
||||
f"Currently at {threshold['percent_used']:.0f}% of capacity "
|
||||
f"({threshold['current']:.0f} of {threshold['limit']:.0f})"
|
||||
),
|
||||
"action": "Plan scaling in the next 2-4 weeks",
|
||||
}
|
||||
)
|
||||
|
||||
# Add tier-based recommendations
|
||||
if database["vendors_count"] > 0:
|
||||
tier, next_trigger = self._determine_tier(
|
||||
database["vendors_count"], database["products_count"]
|
||||
)
|
||||
if next_trigger:
|
||||
recommendations.append(
|
||||
{
|
||||
"priority": "info",
|
||||
"title": f"Current tier: {tier}",
|
||||
"description": next_trigger,
|
||||
"action": "Review capacity planning documentation",
|
||||
}
|
||||
)
|
||||
|
||||
# If no issues, add positive status
|
||||
if not recommendations:
|
||||
recommendations.append(
|
||||
{
|
||||
"priority": "info",
|
||||
"title": "All systems healthy",
|
||||
"description": "No capacity concerns at this time",
|
||||
"action": None,
|
||||
}
|
||||
)
|
||||
|
||||
return recommendations
|
||||
|
||||
def _determine_tier(self, vendors: int, products: int) -> tuple[str, str | None]:
|
||||
"""Determine current infrastructure tier and next trigger."""
|
||||
current_tier = "Starter"
|
||||
next_trigger = None
|
||||
|
||||
for i, tier in enumerate(INFRASTRUCTURE_TIERS):
|
||||
max_clients = tier["max_clients"]
|
||||
max_products = tier["max_products"]
|
||||
|
||||
if max_clients is None:
|
||||
current_tier = tier["name"]
|
||||
break
|
||||
|
||||
if vendors <= max_clients and products <= max_products:
|
||||
current_tier = tier["name"]
|
||||
|
||||
# Check proximity to next tier
|
||||
if i < len(INFRASTRUCTURE_TIERS) - 1:
|
||||
next_tier = INFRASTRUCTURE_TIERS[i + 1]
|
||||
vendor_percent = (vendors / max_clients) * 100
|
||||
product_percent = (products / max_products) * 100
|
||||
|
||||
if vendor_percent > 70 or product_percent > 70:
|
||||
next_trigger = (
|
||||
f"Approaching {next_tier['name']} tier "
|
||||
f"(vendors: {vendor_percent:.0f}%, products: {product_percent:.0f}%)"
|
||||
)
|
||||
break
|
||||
|
||||
return current_tier, next_trigger
|
||||
|
||||
def _determine_overall_status(self, thresholds: list[dict]) -> str:
|
||||
"""Determine overall platform status."""
|
||||
statuses = [t["status"] for t in thresholds]
|
||||
|
||||
if "critical" in statuses:
|
||||
return "critical"
|
||||
elif "warning" in statuses:
|
||||
return "degraded"
|
||||
else:
|
||||
return "healthy"
|
||||
|
||||
|
||||
# Create service instance
|
||||
platform_health_service = PlatformHealthService()
|
||||
Reference in New Issue
Block a user