diff --git a/app/api/v1/admin/__init__.py b/app/api/v1/admin/__init__.py index 66b62094..f9f16e6b 100644 --- a/app/api/v1/admin/__init__.py +++ b/app/api/v1/admin/__init__.py @@ -33,6 +33,7 @@ from . import ( content_pages, customers, dashboard, + images, inventory, letzshop, logs, @@ -42,6 +43,7 @@ from . import ( notifications, order_item_exceptions, orders, + platform_health, products, settings, tests, @@ -162,6 +164,14 @@ router.include_router(messages.router, tags=["admin-messages"]) # Include log management endpoints router.include_router(logs.router, tags=["admin-logs"]) +# Include image management endpoints +router.include_router(images.router, tags=["admin-images"]) + +# Include platform health endpoints +router.include_router( + platform_health.router, prefix="/platform", tags=["admin-platform-health"] +) + # ============================================================================ # Code Quality & Architecture diff --git a/app/api/v1/admin/images.py b/app/api/v1/admin/images.py new file mode 100644 index 00000000..99a0cdf3 --- /dev/null +++ b/app/api/v1/admin/images.py @@ -0,0 +1,121 @@ +# app/api/v1/admin/images.py +""" +Admin image management endpoints. + +Provides: +- Image upload with automatic processing +- Image deletion +- Storage statistics +""" + +import logging + +from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile + +from app.api.deps import get_current_admin_api +from app.services.image_service import image_service +from models.database.user import User +from models.schema.image import ( + ImageDeleteResponse, + ImageStorageStats, + ImageUploadResponse, +) + +router = APIRouter(prefix="/images") +logger = logging.getLogger(__name__) + +# Maximum upload size (10MB) +MAX_UPLOAD_SIZE = 10 * 1024 * 1024 + + +@router.post("/upload", response_model=ImageUploadResponse) +async def upload_image( + file: UploadFile = File(...), + vendor_id: int = Form(...), + product_id: int | None = Form(None), + current_admin: User = Depends(get_current_admin_api), +): + """Upload and process an image. + + The image will be: + - Converted to WebP format + - Resized to multiple variants (original, 800px, 200px) + - Stored in a sharded directory structure + + Args: + file: Image file to upload + vendor_id: Vendor ID for the image + product_id: Optional product ID + + Returns: + Image URLs and metadata + """ + # Validate file size + content = await file.read() + if len(content) > MAX_UPLOAD_SIZE: + raise HTTPException( + status_code=413, + detail=f"File too large. Maximum size: {MAX_UPLOAD_SIZE // (1024*1024)}MB", + ) + + # Validate content type + if not file.content_type or not file.content_type.startswith("image/"): + raise HTTPException( + status_code=400, + detail="Invalid file type. Only images are allowed.", + ) + + try: + result = image_service.upload_product_image( + file_content=content, + filename=file.filename or "image.jpg", + vendor_id=vendor_id, + product_id=product_id, + ) + + logger.info(f"Image uploaded: {result['id']} for vendor {vendor_id}") + + return ImageUploadResponse(success=True, image=result) + + except ValueError as e: + logger.warning(f"Image upload failed: {e}") + return ImageUploadResponse(success=False, error=str(e)) + + except Exception as e: + logger.error(f"Image upload error: {e}") + raise HTTPException(status_code=500, detail="Failed to process image") + + +@router.delete("/{image_hash}", response_model=ImageDeleteResponse) +async def delete_image( + image_hash: str, + current_admin: User = Depends(get_current_admin_api), +): + """Delete an image and all its variants. + + Args: + image_hash: The image ID/hash + + Returns: + Deletion status + """ + deleted = image_service.delete_product_image(image_hash) + + if deleted: + logger.info(f"Image deleted: {image_hash}") + return ImageDeleteResponse(success=True, message="Image deleted successfully") + else: + return ImageDeleteResponse(success=False, message="Image not found") + + +@router.get("/stats", response_model=ImageStorageStats) +async def get_storage_stats( + current_admin: User = Depends(get_current_admin_api), +): + """Get image storage statistics. + + Returns: + Storage metrics including file counts, sizes, and directory info + """ + stats = image_service.get_storage_stats() + return ImageStorageStats(**stats) diff --git a/app/api/v1/admin/platform_health.py b/app/api/v1/admin/platform_health.py new file mode 100644 index 00000000..fd2d2b25 --- /dev/null +++ b/app/api/v1/admin/platform_health.py @@ -0,0 +1,532 @@ +# app/api/v1/admin/platform_health.py +""" +Platform health and capacity monitoring endpoints. + +Provides: +- Overall platform health status +- Capacity metrics and thresholds +- Scaling recommendations +""" + +import logging +import os +import platform +import psutil +from datetime import datetime + +from fastapi import APIRouter, Depends +from pydantic import BaseModel +from sqlalchemy import func, text +from sqlalchemy.orm import Session + +from app.api.deps import get_current_admin_api +from app.core.database import get_db +from app.services.image_service import image_service +from models.database.inventory import Inventory +from models.database.order import Order +from models.database.product import Product +from models.database.user import User +from models.database.vendor import Vendor + +router = APIRouter() +logger = logging.getLogger(__name__) + + +# ============================================================================ +# Schemas +# ============================================================================ + + +class SystemMetrics(BaseModel): + """System resource metrics.""" + + cpu_percent: float + memory_percent: float + memory_used_gb: float + memory_total_gb: float + disk_percent: float + disk_used_gb: float + disk_total_gb: float + + +class DatabaseMetrics(BaseModel): + """Database metrics.""" + + size_mb: float + products_count: int + orders_count: int + vendors_count: int + inventory_count: int + + +class ImageStorageMetrics(BaseModel): + """Image storage metrics.""" + + total_files: int + total_size_mb: float + total_size_gb: float + max_files_per_dir: int + products_estimated: int + + +class CapacityThreshold(BaseModel): + """Capacity threshold status.""" + + name: str + current: float + warning: float + critical: float + limit: float + status: str # ok, warning, critical + percent_used: float + + +class ScalingRecommendation(BaseModel): + """Scaling recommendation.""" + + priority: str # info, warning, critical + title: str + description: str + action: str | None = None + + +class PlatformHealthResponse(BaseModel): + """Complete platform health response.""" + + timestamp: str + overall_status: str # healthy, degraded, critical + system: SystemMetrics + database: DatabaseMetrics + image_storage: ImageStorageMetrics + thresholds: list[CapacityThreshold] + recommendations: list[ScalingRecommendation] + infrastructure_tier: str + next_tier_trigger: str | None = None + + +class CapacityMetricsResponse(BaseModel): + """Capacity-focused metrics.""" + + products_total: int + products_by_vendor: dict[str, int] + images_total: int + storage_used_gb: float + database_size_mb: float + orders_this_month: int + active_vendors: int + + +# ============================================================================ +# Thresholds Configuration +# ============================================================================ + +CAPACITY_THRESHOLDS = { + "products_total": { + "warning": 400_000, + "critical": 475_000, + "limit": 500_000, + }, + "storage_gb": { + "warning": 800, + "critical": 950, + "limit": 1000, + }, + "db_size_mb": { + "warning": 20_000, + "critical": 24_000, + "limit": 25_000, + }, + "disk_percent": { + "warning": 70, + "critical": 85, + "limit": 100, + }, + "memory_percent": { + "warning": 75, + "critical": 90, + "limit": 100, + }, + "cpu_percent": { + "warning": 70, + "critical": 85, + "limit": 100, + }, +} + +INFRASTRUCTURE_TIERS = [ + {"name": "Starter", "max_clients": 50, "max_products": 10_000}, + {"name": "Small", "max_clients": 100, "max_products": 30_000}, + {"name": "Medium", "max_clients": 300, "max_products": 100_000}, + {"name": "Large", "max_clients": 500, "max_products": 250_000}, + {"name": "Scale", "max_clients": 1000, "max_products": 500_000}, + {"name": "Enterprise", "max_clients": None, "max_products": None}, +] + + +# ============================================================================ +# Endpoints +# ============================================================================ + + +@router.get("/health", response_model=PlatformHealthResponse) +async def get_platform_health( + db: Session = Depends(get_db), + current_admin: User = Depends(get_current_admin_api), +): + """Get comprehensive platform health status. + + Returns system metrics, database stats, storage info, and recommendations. + """ + # System metrics + system = _get_system_metrics() + + # Database metrics + database = _get_database_metrics(db) + + # Image storage metrics + image_stats = image_service.get_storage_stats() + image_storage = ImageStorageMetrics( + total_files=image_stats["total_files"], + total_size_mb=image_stats["total_size_mb"], + total_size_gb=image_stats["total_size_gb"], + max_files_per_dir=image_stats["max_files_per_dir"], + products_estimated=image_stats["products_estimated"], + ) + + # Calculate thresholds + thresholds = _calculate_thresholds(system, database, image_storage) + + # Generate recommendations + recommendations = _generate_recommendations(thresholds, database) + + # Determine infrastructure tier + tier, next_trigger = _determine_tier(database.vendors_count, database.products_count) + + # Overall status + overall_status = _determine_overall_status(thresholds) + + return PlatformHealthResponse( + timestamp=datetime.utcnow().isoformat(), + overall_status=overall_status, + system=system, + database=database, + image_storage=image_storage, + thresholds=thresholds, + recommendations=recommendations, + infrastructure_tier=tier, + next_tier_trigger=next_trigger, + ) + + +@router.get("/capacity", response_model=CapacityMetricsResponse) +async def get_capacity_metrics( + db: Session = Depends(get_db), + current_admin: User = Depends(get_current_admin_api), +): + """Get capacity-focused metrics for planning.""" + # Products total + products_total = db.query(func.count(Product.id)).scalar() or 0 + + # Products by vendor + vendor_counts = ( + db.query(Vendor.name, func.count(Product.id)) + .join(Product, Vendor.id == Product.vendor_id) + .group_by(Vendor.name) + .all() + ) + products_by_vendor = {name or "Unknown": count for name, count in vendor_counts} + + # Image storage + image_stats = image_service.get_storage_stats() + + # Database size (approximate for SQLite) + db_size = _get_database_size(db) + + # Orders this month + start_of_month = datetime.utcnow().replace(day=1, hour=0, minute=0, second=0) + orders_this_month = ( + db.query(func.count(Order.id)) + .filter(Order.created_at >= start_of_month) + .scalar() + or 0 + ) + + # Active vendors + active_vendors = db.query(func.count(Vendor.id)).filter(Vendor.is_active == True).scalar() or 0 # noqa: E712 + + return CapacityMetricsResponse( + products_total=products_total, + products_by_vendor=products_by_vendor, + images_total=image_stats["total_files"], + storage_used_gb=image_stats["total_size_gb"], + database_size_mb=db_size, + orders_this_month=orders_this_month, + active_vendors=active_vendors, + ) + + +# ============================================================================ +# Helper Functions +# ============================================================================ + + +def _get_system_metrics() -> SystemMetrics: + """Get current system resource metrics.""" + cpu_percent = psutil.cpu_percent(interval=0.1) + memory = psutil.virtual_memory() + disk = psutil.disk_usage("/") + + return SystemMetrics( + cpu_percent=cpu_percent, + memory_percent=memory.percent, + memory_used_gb=round(memory.used / (1024**3), 2), + memory_total_gb=round(memory.total / (1024**3), 2), + disk_percent=disk.percent, + disk_used_gb=round(disk.used / (1024**3), 2), + disk_total_gb=round(disk.total / (1024**3), 2), + ) + + +def _get_database_metrics(db: Session) -> DatabaseMetrics: + """Get database statistics.""" + products_count = db.query(func.count(Product.id)).scalar() or 0 + orders_count = db.query(func.count(Order.id)).scalar() or 0 + vendors_count = db.query(func.count(Vendor.id)).scalar() or 0 + inventory_count = db.query(func.count(Inventory.id)).scalar() or 0 + + db_size = _get_database_size(db) + + return DatabaseMetrics( + size_mb=db_size, + products_count=products_count, + orders_count=orders_count, + vendors_count=vendors_count, + inventory_count=inventory_count, + ) + + +def _get_database_size(db: Session) -> float: + """Get database size in MB.""" + try: + # Try SQLite approach + result = db.execute(text("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()")) + row = result.fetchone() + if row: + return round(row[0] / (1024 * 1024), 2) + except Exception: + pass + + try: + # Try PostgreSQL approach + result = db.execute(text("SELECT pg_database_size(current_database())")) + row = result.fetchone() + if row: + return round(row[0] / (1024 * 1024), 2) + except Exception: + pass + + return 0.0 + + +def _calculate_thresholds( + system: SystemMetrics, + database: DatabaseMetrics, + image_storage: ImageStorageMetrics, +) -> list[CapacityThreshold]: + """Calculate threshold status for each metric.""" + thresholds = [] + + # Products threshold + products_config = CAPACITY_THRESHOLDS["products_total"] + thresholds.append( + _create_threshold( + "Products", + database.products_count, + products_config["warning"], + products_config["critical"], + products_config["limit"], + ) + ) + + # Storage threshold + storage_config = CAPACITY_THRESHOLDS["storage_gb"] + thresholds.append( + _create_threshold( + "Image Storage (GB)", + image_storage.total_size_gb, + storage_config["warning"], + storage_config["critical"], + storage_config["limit"], + ) + ) + + # Database size threshold + db_config = CAPACITY_THRESHOLDS["db_size_mb"] + thresholds.append( + _create_threshold( + "Database (MB)", + database.size_mb, + db_config["warning"], + db_config["critical"], + db_config["limit"], + ) + ) + + # Disk threshold + disk_config = CAPACITY_THRESHOLDS["disk_percent"] + thresholds.append( + _create_threshold( + "Disk Usage (%)", + system.disk_percent, + disk_config["warning"], + disk_config["critical"], + disk_config["limit"], + ) + ) + + # Memory threshold + memory_config = CAPACITY_THRESHOLDS["memory_percent"] + thresholds.append( + _create_threshold( + "Memory Usage (%)", + system.memory_percent, + memory_config["warning"], + memory_config["critical"], + memory_config["limit"], + ) + ) + + # CPU threshold + cpu_config = CAPACITY_THRESHOLDS["cpu_percent"] + thresholds.append( + _create_threshold( + "CPU Usage (%)", + system.cpu_percent, + cpu_config["warning"], + cpu_config["critical"], + cpu_config["limit"], + ) + ) + + return thresholds + + +def _create_threshold( + name: str, current: float, warning: float, critical: float, limit: float +) -> CapacityThreshold: + """Create a threshold status object.""" + percent_used = (current / limit) * 100 if limit > 0 else 0 + + if current >= critical: + status = "critical" + elif current >= warning: + status = "warning" + else: + status = "ok" + + return CapacityThreshold( + name=name, + current=current, + warning=warning, + critical=critical, + limit=limit, + status=status, + percent_used=round(percent_used, 1), + ) + + +def _generate_recommendations( + thresholds: list[CapacityThreshold], database: DatabaseMetrics +) -> list[ScalingRecommendation]: + """Generate scaling recommendations based on thresholds.""" + recommendations = [] + + for threshold in thresholds: + if threshold.status == "critical": + recommendations.append( + ScalingRecommendation( + priority="critical", + title=f"{threshold.name} at critical level", + description=f"Currently at {threshold.percent_used:.0f}% of capacity ({threshold.current:.0f} of {threshold.limit:.0f})", + action="Immediate scaling or cleanup required", + ) + ) + elif threshold.status == "warning": + recommendations.append( + ScalingRecommendation( + priority="warning", + title=f"{threshold.name} approaching limit", + description=f"Currently at {threshold.percent_used:.0f}% of capacity ({threshold.current:.0f} of {threshold.limit:.0f})", + action="Plan scaling in the next 2-4 weeks", + ) + ) + + # Add tier-based recommendations + if database.vendors_count > 0: + tier, next_trigger = _determine_tier(database.vendors_count, database.products_count) + if next_trigger: + recommendations.append( + ScalingRecommendation( + priority="info", + title=f"Current tier: {tier}", + description=next_trigger, + action="Review capacity planning documentation", + ) + ) + + # If no issues, add positive status + if not recommendations: + recommendations.append( + ScalingRecommendation( + priority="info", + title="All systems healthy", + description="No capacity concerns at this time", + action=None, + ) + ) + + return recommendations + + +def _determine_tier(vendors: int, products: int) -> tuple[str, str | None]: + """Determine current infrastructure tier and next trigger.""" + current_tier = "Starter" + next_trigger = None + + for i, tier in enumerate(INFRASTRUCTURE_TIERS): + max_clients = tier["max_clients"] + max_products = tier["max_products"] + + if max_clients is None: + current_tier = tier["name"] + break + + if vendors <= max_clients and products <= max_products: + current_tier = tier["name"] + + # Check proximity to next tier + if i < len(INFRASTRUCTURE_TIERS) - 1: + next_tier = INFRASTRUCTURE_TIERS[i + 1] + vendor_percent = (vendors / max_clients) * 100 + product_percent = (products / max_products) * 100 + + if vendor_percent > 70 or product_percent > 70: + next_trigger = ( + f"Approaching {next_tier['name']} tier " + f"(vendors: {vendor_percent:.0f}%, products: {product_percent:.0f}%)" + ) + break + + return current_tier, next_trigger + + +def _determine_overall_status(thresholds: list[CapacityThreshold]) -> str: + """Determine overall platform status.""" + statuses = [t.status for t in thresholds] + + if "critical" in statuses: + return "critical" + elif "warning" in statuses: + return "degraded" + else: + return "healthy" diff --git a/app/routes/admin_pages.py b/app/routes/admin_pages.py index 4343a660..f97724ae 100644 --- a/app/routes/admin_pages.py +++ b/app/routes/admin_pages.py @@ -1200,3 +1200,27 @@ async def admin_code_quality_violation_detail( "violation_id": violation_id, }, ) + + +# ============================================================================ +# PLATFORM HEALTH & MONITORING ROUTES +# ============================================================================ + + +@router.get("/platform-health", response_class=HTMLResponse, include_in_schema=False) +async def admin_platform_health( + request: Request, + current_user: User = Depends(get_current_admin_from_cookie_or_header), + db: Session = Depends(get_db), +): + """ + Render platform health monitoring page. + Shows system metrics, capacity thresholds, and scaling recommendations. + """ + return templates.TemplateResponse( + "admin/platform-health.html", + { + "request": request, + "user": current_user, + }, + ) diff --git a/app/services/image_service.py b/app/services/image_service.py new file mode 100644 index 00000000..e840ff3f --- /dev/null +++ b/app/services/image_service.py @@ -0,0 +1,285 @@ +# app/services/image_service.py +""" +Image upload and management service. + +Provides: +- Image upload with automatic optimization +- WebP conversion +- Multiple size variant generation +- Sharded directory structure for performance +""" + +import hashlib +import logging +import os +import shutil +from datetime import datetime +from io import BytesIO +from pathlib import Path + +from PIL import Image + +logger = logging.getLogger(__name__) + + +class ImageService: + """Service for image upload and management.""" + + # Supported image formats + ALLOWED_EXTENSIONS = {"jpg", "jpeg", "png", "gif", "webp"} + + # Size variants to generate + SIZES = { + "original": None, # No max dimension, just optimize + "800": 800, # Medium size for product cards + "200": 200, # Thumbnail for grids + } + + # Quality settings + QUALITY = 85 + MAX_DIMENSION = 2000 # Max dimension for original + + def __init__(self, upload_dir: str = "static/uploads"): + """Initialize image service. + + Args: + upload_dir: Base directory for uploads (relative to project root) + """ + self.upload_dir = Path(upload_dir) + self.products_dir = self.upload_dir / "products" + + # Ensure directories exist + self.products_dir.mkdir(parents=True, exist_ok=True) + + def upload_product_image( + self, + file_content: bytes, + filename: str, + vendor_id: int, + product_id: int | None = None, + ) -> dict: + """Upload and process a product image. + + Args: + file_content: Raw file bytes + filename: Original filename + vendor_id: Vendor ID for path generation + product_id: Optional product ID + + Returns: + Dict with image info and URLs + """ + # Validate file extension + ext = self._get_extension(filename) + if ext not in self.ALLOWED_EXTENSIONS: + raise ValueError(f"Invalid file type: {ext}. Allowed: {self.ALLOWED_EXTENSIONS}") + + # Generate unique hash for this image + image_hash = self._generate_hash(vendor_id, product_id, filename) + + # Determine sharded directory path + shard_path = self._get_shard_path(image_hash) + full_dir = self.products_dir / shard_path + full_dir.mkdir(parents=True, exist_ok=True) + + # Load and process image + try: + img = Image.open(BytesIO(file_content)) + + # Convert to RGB if necessary (for PNG with alpha) + if img.mode in ("RGBA", "P"): + img = img.convert("RGB") + + # Get original dimensions + original_width, original_height = img.size + + # Process and save variants + urls = {} + total_size = 0 + + for size_name, max_dim in self.SIZES.items(): + processed_img = self._resize_image(img.copy(), max_dim) + file_path = full_dir / f"{image_hash}_{size_name}.webp" + + # Save as WebP + processed_img.save(file_path, "WEBP", quality=self.QUALITY) + + # Track size + file_size = file_path.stat().st_size + total_size += file_size + + # Generate URL path (relative to static) + url_path = f"/static/uploads/products/{shard_path}/{image_hash}_{size_name}.webp" + urls[size_name] = url_path + + logger.debug(f"Saved {size_name}: {file_path} ({file_size} bytes)") + + logger.info( + f"Uploaded image {image_hash} for vendor {vendor_id}: " + f"{len(urls)} variants, {total_size} bytes total" + ) + + return { + "id": image_hash, + "urls": urls, + "size_bytes": total_size, + "dimensions": { + "width": original_width, + "height": original_height, + }, + "path": str(shard_path), + } + + except Exception as e: + logger.error(f"Failed to process image: {e}") + raise ValueError(f"Failed to process image: {e}") + + def delete_product_image(self, image_hash: str) -> bool: + """Delete all variants of a product image. + + Args: + image_hash: The image hash/ID + + Returns: + True if deleted, False if not found + """ + shard_path = self._get_shard_path(image_hash) + full_dir = self.products_dir / shard_path + + if not full_dir.exists(): + return False + + deleted = False + for size_name in self.SIZES: + file_path = full_dir / f"{image_hash}_{size_name}.webp" + if file_path.exists(): + file_path.unlink() + deleted = True + logger.debug(f"Deleted: {file_path}") + + # Clean up empty directories + self._cleanup_empty_dirs(full_dir) + + if deleted: + logger.info(f"Deleted image {image_hash}") + + return deleted + + def get_storage_stats(self) -> dict: + """Get storage statistics. + + Returns: + Dict with storage metrics + """ + total_files = 0 + total_size = 0 + max_files_per_dir = 0 + dir_count = 0 + + for root, dirs, files in os.walk(self.products_dir): + webp_files = [f for f in files if f.endswith(".webp")] + file_count = len(webp_files) + total_files += file_count + + if file_count > 0: + dir_count += 1 + max_files_per_dir = max(max_files_per_dir, file_count) + + for f in webp_files: + file_path = Path(root) / f + total_size += file_path.stat().st_size + + # Calculate average files per directory + avg_files_per_dir = total_files / dir_count if dir_count > 0 else 0 + + return { + "total_files": total_files, + "total_size_bytes": total_size, + "total_size_mb": round(total_size / (1024 * 1024), 2), + "total_size_gb": round(total_size / (1024 * 1024 * 1024), 3), + "directory_count": dir_count, + "max_files_per_dir": max_files_per_dir, + "avg_files_per_dir": round(avg_files_per_dir, 1), + "products_estimated": total_files // 3, # 3 variants per image + } + + def _generate_hash( + self, vendor_id: int, product_id: int | None, filename: str + ) -> str: + """Generate unique hash for image. + + Args: + vendor_id: Vendor ID + product_id: Product ID (optional) + filename: Original filename + + Returns: + 8-character hex hash + """ + timestamp = datetime.utcnow().isoformat() + content = f"{vendor_id}:{product_id}:{timestamp}:{filename}" + return hashlib.md5(content.encode()).hexdigest()[:8] + + def _get_shard_path(self, image_hash: str) -> str: + """Get sharded directory path from hash. + + Uses first 4 characters to create 2-level directory structure. + This creates 256 possible directories at each level. + + Args: + image_hash: 8-character hash + + Returns: + Path like "0a/1b" + """ + return f"{image_hash[:2]}/{image_hash[2:4]}" + + def _get_extension(self, filename: str) -> str: + """Get lowercase file extension.""" + return filename.rsplit(".", 1)[-1].lower() if "." in filename else "" + + def _resize_image(self, img: Image.Image, max_dimension: int | None) -> Image.Image: + """Resize image while maintaining aspect ratio. + + Args: + img: PIL Image + max_dimension: Maximum width or height (None = use MAX_DIMENSION) + + Returns: + Resized PIL Image + """ + if max_dimension is None: + max_dimension = self.MAX_DIMENSION + + width, height = img.size + + # Only resize if larger than max + if width <= max_dimension and height <= max_dimension: + return img + + # Calculate new dimensions maintaining aspect ratio + if width > height: + new_width = max_dimension + new_height = int(height * (max_dimension / width)) + else: + new_height = max_dimension + new_width = int(width * (max_dimension / height)) + + return img.resize((new_width, new_height), Image.Resampling.LANCZOS) + + def _cleanup_empty_dirs(self, dir_path: Path): + """Remove empty directories up the tree.""" + try: + # Try to remove the directory and its parents if empty + while dir_path != self.products_dir: + if dir_path.exists() and not any(dir_path.iterdir()): + dir_path.rmdir() + dir_path = dir_path.parent + else: + break + except OSError: + pass # Directory not empty or other error + + +# Create service instance +image_service = ImageService() diff --git a/app/templates/admin/partials/sidebar.html b/app/templates/admin/partials/sidebar.html index bdd5fc69..4c52ea21 100644 --- a/app/templates/admin/partials/sidebar.html +++ b/app/templates/admin/partials/sidebar.html @@ -111,6 +111,7 @@ {{ section_header('Platform Health', 'platformHealth') }} {% call section_content('platformHealth') %} + {{ menu_item('platform-health', '/admin/platform-health', 'chart-bar', 'Capacity Monitor') }} {{ menu_item('testing', '/admin/testing', 'beaker', 'Testing Hub') }} {{ menu_item('code-quality', '/admin/code-quality', 'shield-check', 'Code Quality') }} {% endcall %} diff --git a/app/templates/admin/platform-health.html b/app/templates/admin/platform-health.html new file mode 100644 index 00000000..946b2105 --- /dev/null +++ b/app/templates/admin/platform-health.html @@ -0,0 +1,275 @@ +{# app/templates/admin/platform-health.html #} +{% extends "admin/base.html" %} +{% from 'shared/macros/alerts.html' import loading_state, error_state %} +{% from 'shared/macros/headers.html' import page_header %} + +{% block title %}Platform Health{% endblock %} + +{% block alpine_data %}adminPlatformHealth(){% endblock %} + +{% block content %} +{% call page_header("Platform Health", subtitle="System metrics, capacity monitoring, and scaling recommendations") %} + +{% endcall %} + +{{ loading_state('Loading platform health...') }} + +{{ error_state('Error loading platform health') }} + + +
+ +
+
+ +
+ + + Infrastructure Tier: + +
+
+ +
+ + +
+ +
+
+
+ +
+
+

Products

+

+
+
+
+ + +
+
+
+ +
+
+

Image Storage

+

+
+
+
+ + +
+
+
+ +
+
+

Database

+

+
+
+
+ + +
+
+
+ +
+
+

Vendors

+

+
+
+
+
+ + +
+ +
+

System Resources

+
+ +
+
+ CPU + +
+
+
+
+
+ + +
+
+ Memory + + + + +
+
+
+
+
+ + +
+
+ Disk + + + + +
+
+
+
+
+
+
+ + +
+

Capacity Thresholds

+
+ +
+
+
+ + +
+

Scaling Recommendations

+
+ +
+
+ + +
+

Related Resources

+ +
+
+{% endblock %} + +{% block extra_scripts %} + +{% endblock %} diff --git a/docs/architecture/capacity-planning.md b/docs/architecture/capacity-planning.md new file mode 100644 index 00000000..e11b2ea5 --- /dev/null +++ b/docs/architecture/capacity-planning.md @@ -0,0 +1,454 @@ +# Capacity Planning & Infrastructure Sizing + +This document provides comprehensive capacity planning guidelines for the Wizamart platform, including resource requirements, scaling thresholds, and monitoring recommendations. + +> **Related:** [Pricing Strategy](../marketing/pricing.md) for tier definitions and limits + +--- + +## Tier Resource Allocations + +Based on our [pricing tiers](../marketing/pricing.md), here are the expected resource requirements per client: + +| Metric | Essential (€49) | Professional (€99) | Business (€199) | Enterprise (€399+) | +|--------|-----------------|--------------------|-----------------|--------------------| +| Products | 200 | 500 | 2,000 | Unlimited | +| Images per product | 3 | 5 | 8 | 10+ | +| Orders per month | 100 | 500 | 2,000 | Unlimited | +| SKU variants | 1.2x | 1.5x | 2x | 3x | +| Team members | 1 | 3 | 10 | Unlimited | +| API requests/day | 1,000 | 5,000 | 20,000 | Unlimited | + +--- + +## Scale Projections + +### Target: 1,000 Business Clients (€149/month tier) + +This represents our primary growth target. Here's the infrastructure impact: + +| Resource | Calculation | Total | +|----------|-------------|-------| +| **Products** | 1,000 clients × 500 products | **500,000** | +| **Product Translations** | 500,000 × 4 languages | **2,000,000 rows** | +| **Images (files)** | 500,000 × 5 images × 3 sizes | **7,500,000 files** | +| **Image Storage** | 7.5M files × 200KB avg | **1.5 TB** | +| **Database Size** | Products + translations + orders + indexes | **15-25 GB** | +| **Monthly Orders** | 1,000 clients × 300 orders | **300,000 orders** | +| **Order Items** | 300,000 × 2.5 avg items | **750,000 items/month** | +| **Monthly API Requests** | 1,000 × 10,000 req/day × 30 | **300M requests** | + +### Multi-Tier Mix (Realistic Scenario) + +More realistic distribution across tiers: + +| Tier | Clients | Products Each | Total Products | Monthly Orders | +|------|---------|---------------|----------------|----------------| +| Essential | 500 | 100 | 50,000 | 50,000 | +| Professional | 300 | 300 | 90,000 | 150,000 | +| Business | 150 | 1,000 | 150,000 | 300,000 | +| Enterprise | 50 | 3,000 | 150,000 | 200,000 | +| **Total** | **1,000** | - | **440,000** | **700,000** | + +--- + +## Server Sizing Recommendations + +### Infrastructure Tiers + +| Scale | Clients | vCPU | RAM | Storage | Database | Monthly Cost | +|-------|---------|------|-----|---------|----------|--------------| +| **Starter** | 1-50 | 2 | 4GB | 100GB SSD | SQLite | €20-40 | +| **Small** | 50-100 | 4 | 8GB | 250GB SSD | PostgreSQL | €60-100 | +| **Medium** | 100-300 | 4 | 16GB | 500GB SSD | PostgreSQL | €100-180 | +| **Large** | 300-500 | 8 | 32GB | 1TB SSD | PostgreSQL + Redis | €250-400 | +| **Scale** | 500-1000 | 16 | 64GB | 2TB SSD + CDN | PostgreSQL + Redis | €500-900 | +| **Enterprise** | 1000+ | 32+ | 128GB+ | 4TB+ + CDN | PostgreSQL cluster | €1,500+ | + +### Recommended Configurations + +#### Starter (1-50 clients) +``` +Single Server Setup: +- Hetzner CX22 or similar (2 vCPU, 4GB RAM) +- 100GB SSD storage +- SQLite database +- nginx for static files + reverse proxy +- Estimated cost: €20-40/month +``` + +#### Small-Medium (50-300 clients) +``` +Two-Server Setup: +- App Server: 4 vCPU, 8-16GB RAM +- Database: Managed PostgreSQL (basic tier) +- Storage: Local SSD + backup +- Optional: Redis for sessions/caching +- Estimated cost: €80-180/month +``` + +#### Large (300-1000 clients) +``` +Multi-Component Setup: +- Load Balancer: nginx or cloud LB +- App Servers: 2-4 × (4 vCPU, 8GB RAM) +- Database: Managed PostgreSQL (production tier) +- Cache: Redis (managed or self-hosted) +- Storage: Object storage (S3-compatible) + CDN +- Estimated cost: €400-900/month +``` + +#### Enterprise (1000+ clients) +``` +Full Production Setup: +- CDN: Cloudflare or similar +- Load Balancer: Cloud-native with health checks +- App Servers: 4-8 × (4 vCPU, 16GB RAM) with auto-scaling +- Database: PostgreSQL with read replicas +- Cache: Redis cluster +- Storage: S3 + CloudFront or equivalent +- Monitoring: Prometheus + Grafana +- Estimated cost: €1,500+/month +``` + +--- + +## Image Storage Architecture + +### Capacity Calculations + +| Image Size (optimized) | Files per 25GB | Files per 100GB | Files per 1TB | +|------------------------|----------------|-----------------|---------------| +| 100KB (thumbnails) | 250,000 | 1,000,000 | 10,000,000 | +| 200KB (web-ready) | 125,000 | 500,000 | 5,000,000 | +| 300KB (high quality) | 83,000 | 333,000 | 3,330,000 | +| 500KB (original) | 50,000 | 200,000 | 2,000,000 | + +### Image Sizes Generated + +Each uploaded image generates 3 variants: + +| Variant | Dimensions | Typical Size | Use Case | +|---------|------------|--------------|----------| +| `thumb` | 200×200 | 10-20KB | List views, grids | +| `medium` | 800×800 | 80-150KB | Product cards, previews | +| `original` | As uploaded | 200-500KB | Detail views, zoom | + +**Storage per product:** ~600KB (with 3 sizes for main image + 2 additional images) + +### Directory Structure (Sharded) + +To prevent filesystem performance degradation, images are stored in a sharded directory structure: + +``` +/uploads/ + └── products/ + ├── 00/ # First 2 chars of hash + │ ├── 1a/ # Next 2 chars + │ │ ├── 001a2b3c_original.webp + │ │ ├── 001a2b3c_800.webp + │ │ └── 001a2b3c_200.webp + │ └── 2b/ + │ └── ... + ├── 01/ + └── ... +``` + +This structure ensures: +- Maximum ~256 subdirectories per level +- ~16 files per leaf directory at 1M total images +- Fast filesystem operations even at scale + +### Performance Thresholds + +| Files per Directory | Performance | Required Action | +|---------------------|-------------|-----------------| +| < 10,000 | Excellent | None | +| 10,000 - 100,000 | Good | Monitor, plan sharding | +| 100,000 - 500,000 | Degraded | **Implement sharding** | +| > 500,000 | Poor | **Migrate to object storage** | + +--- + +## Database Performance + +### Table Size Guidelines + +| Table | Rows | Query Time | Status | +|-------|------|------------|--------| +| < 10,000 | < 1ms | Excellent | +| 10,000 - 100,000 | 1-10ms | Good | +| 100,000 - 1,000,000 | 10-50ms | **Add indexes, optimize queries** | +| 1,000,000 - 10,000,000 | 50-200ms | **Consider partitioning** | +| > 10,000,000 | Variable | **Sharding or dedicated DB** | + +### Critical Indexes + +Ensure these indexes exist at scale: + +```sql +-- Products +CREATE INDEX idx_product_vendor_active ON products(vendor_id, is_active); +CREATE INDEX idx_product_gtin ON products(gtin); +CREATE INDEX idx_product_vendor_sku ON products(vendor_id, vendor_sku); + +-- Orders +CREATE INDEX idx_order_vendor_status ON orders(vendor_id, status); +CREATE INDEX idx_order_created ON orders(created_at DESC); +CREATE INDEX idx_order_customer ON orders(customer_id); + +-- Inventory +CREATE INDEX idx_inventory_product_location ON inventory(product_id, warehouse, bin_location); +CREATE INDEX idx_inventory_vendor ON inventory(vendor_id); +``` + +### Database Size Estimates + +| Component | Size per 100K Products | Size per 1M Products | +|-----------|------------------------|----------------------| +| Products table | 100 MB | 1 GB | +| Translations (4 langs) | 400 MB | 4 GB | +| Orders (1 year) | 500 MB | 5 GB | +| Order items | 200 MB | 2 GB | +| Inventory | 50 MB | 500 MB | +| Indexes | 300 MB | 3 GB | +| **Total** | **~1.5 GB** | **~15 GB** | + +--- + +## Bandwidth & Network + +### Monthly Bandwidth Estimates (1000 clients) + +| Traffic Type | Calculation | Monthly Volume | +|--------------|-------------|----------------| +| Image views | 500K products × 10 views × 500KB | **2.5 TB** | +| API requests | 10K req/client/day × 1000 × 30 × 2KB | **600 GB** | +| Static assets | CSS/JS cached, minimal | **50 GB** | +| **Total Egress** | | **~3 TB/month** | + +### Bandwidth Costs (Approximate) + +| Provider | First 1TB | Additional per TB | +|----------|-----------|-------------------| +| Hetzner | Included | €1/TB | +| AWS | $90 | $85/TB | +| DigitalOcean | 1TB free | $10/TB | +| Cloudflare | Unlimited (CDN) | Free | + +**Recommendation:** Use Cloudflare for image CDN to eliminate egress costs. + +--- + +## Scaling Triggers & Thresholds + +### When to Scale Up + +| Metric | Warning | Critical | Action | +|--------|---------|----------|--------| +| CPU Usage | > 70% avg | > 85% avg | Add app server | +| Memory Usage | > 75% | > 90% | Upgrade RAM or add server | +| Disk Usage | > 70% | > 85% | Expand storage | +| DB Query Time (p95) | > 100ms | > 500ms | Optimize queries, add indexes | +| API Response Time (p95) | > 500ms | > 2000ms | Scale horizontally | +| DB Connections | > 80% max | > 95% max | Add connection pooling | +| Error Rate | > 1% | > 5% | Investigate and fix | + +### Architecture Transition Points + +``` +STARTER → SMALL (50 clients) +├── Trigger: SQLite becomes bottleneck +├── Action: Migrate to PostgreSQL +└── Cost increase: +€40-60/month + +SMALL → MEDIUM (100 clients) +├── Trigger: Single server at 70%+ CPU +├── Action: Separate DB server +└── Cost increase: +€50-80/month + +MEDIUM → LARGE (300 clients) +├── Trigger: Need for caching, higher availability +├── Action: Add Redis, consider multiple app servers +└── Cost increase: +€150-200/month + +LARGE → SCALE (500 clients) +├── Trigger: Storage >500GB, high traffic +├── Action: Object storage + CDN, load balancing +└── Cost increase: +€200-400/month + +SCALE → ENTERPRISE (1000+ clients) +├── Trigger: High availability requirements, SLA +├── Action: Full redundancy, read replicas, auto-scaling +└── Cost increase: +€600-1000/month +``` + +--- + +## Monitoring Requirements + +### Essential Metrics + +Track these metrics for capacity planning: + +#### Infrastructure +- CPU utilization (per server) +- Memory utilization +- Disk I/O and usage +- Network throughput + +#### Application +- Request latency (p50, p95, p99) +- Request rate (per endpoint) +- Error rate by type +- Active sessions + +#### Database +- Query execution time +- Connection pool usage +- Table sizes +- Index usage + +#### Business +- Active clients +- Products per client +- Orders per day +- API calls per client + +### Monitoring Dashboard + +The admin platform includes a **Capacity Monitoring** page at `/admin/platform-health` with: + +1. **Current Usage** - Real-time resource utilization +2. **Growth Trends** - Historical charts for planning +3. **Threshold Alerts** - Warning and critical indicators +4. **Scaling Recommendations** - Automated suggestions + +See [Platform Health Monitoring](#platform-health-monitoring) section below. + +--- + +## Cost Analysis + +### Infrastructure Cost per Client + +| Scale | Clients | Monthly Infra | Cost/Client | +|-------|---------|---------------|-------------| +| Starter | 25 | €30 | €1.20 | +| Small | 75 | €80 | €1.07 | +| Medium | 200 | €150 | €0.75 | +| Large | 400 | €350 | €0.88 | +| Scale | 800 | €700 | €0.88 | +| Enterprise | 1500 | €1,800 | €1.20 | + +### Revenue vs Infrastructure Cost + +At 1,000 Business tier clients (€149/month): + +| Item | Monthly | +|------|---------| +| **Revenue** | €149,000 | +| Infrastructure | €700-900 | +| Support (est.) | €3,000 | +| Development (est.) | €5,000 | +| **Gross Margin** | **~96%** | + +--- + +## Disaster Recovery + +### Backup Strategy by Scale + +| Scale | Database Backup | File Backup | RTO | RPO | +|-------|----------------|-------------|-----|-----| +| Starter | Daily SQLite copy | Daily rsync | 4h | 24h | +| Small | Daily pg_dump | Daily sync | 2h | 12h | +| Medium | Managed backups | S3 versioning | 1h | 6h | +| Large | Point-in-time | S3 + cross-region | 30m | 1h | +| Enterprise | Streaming replicas | Multi-region | 5m | 5m | + +--- + +## Platform Health Monitoring + +The admin dashboard includes a dedicated capacity monitoring page that tracks: + +### Metrics Displayed + +1. **Client Growth** + - Total active clients + - New clients this month + - Churn rate + +2. **Resource Usage** + - Total products across all vendors + - Total images stored + - Database size + - Storage usage + +3. **Performance Indicators** + - Average API response time + - Database query latency + - Error rate + +4. **Threshold Status** + - Current infrastructure tier + - Distance to next threshold + - Recommended actions + +### Alert Configuration + +Configure alerts for proactive scaling: + +```python +CAPACITY_THRESHOLDS = { + "products_total": { + "warning": 400_000, # 80% of 500K + "critical": 475_000, # 95% of 500K + }, + "storage_gb": { + "warning": 800, # 80% of 1TB + "critical": 950, + }, + "db_size_gb": { + "warning": 20, + "critical": 24, + }, + "avg_response_ms": { + "warning": 200, + "critical": 500, + }, +} +``` + +--- + +## Quick Reference + +### TL;DR Sizing Guide + +| Clients | Server | RAM | Storage | Database | Monthly Cost | +|---------|--------|-----|---------|----------|--------------| +| 1-50 | 2 vCPU | 4GB | 100GB | SQLite | €30 | +| 50-100 | 4 vCPU | 8GB | 250GB | PostgreSQL | €80 | +| 100-300 | 4 vCPU | 16GB | 500GB | PostgreSQL | €150 | +| 300-500 | 8 vCPU | 32GB | 1TB | PostgreSQL + Redis | €350 | +| 500-1000 | 16 vCPU | 64GB | 2TB + CDN | PostgreSQL + Redis | €700 | +| 1000+ | 32+ vCPU | 128GB+ | 4TB+ + CDN | PostgreSQL cluster | €1,500+ | + +### Key Formulas + +``` +Storage (GB) = (Products × Images × 3 sizes × 200KB) / 1,000,000 +DB Size (GB) = Products × 0.00003 + Orders × 0.00002 +Bandwidth (TB/mo) = Products × Daily Views × 500KB × 30 / 1,000,000,000 +``` + +--- + +## See Also + +- [Pricing Strategy](../marketing/pricing.md) - Tier definitions and limits +- [Multi-Tenant Architecture](multi-tenant.md) - How client isolation works +- [Background Tasks](background-tasks.md) - Task queue scaling +- [Production Deployment](../deployment/production.md) - Deployment guidelines diff --git a/docs/marketing/pricing.md b/docs/marketing/pricing.md index 6d7daddf..4c055f05 100644 --- a/docs/marketing/pricing.md +++ b/docs/marketing/pricing.md @@ -6,6 +6,8 @@ A focused Order Management System built specifically for Luxembourg e-commerce. Works alongside Letzshop, not instead of it. Provides the operational tools Letzshop lacks: real inventory, correct invoicing, customer ownership. +> **Infrastructure Planning:** See [Capacity Planning](../architecture/capacity-planning.md) for resource requirements, server sizing, and scaling guidelines per tier. + --- ## Market Context diff --git a/docs/operations/capacity-monitoring.md b/docs/operations/capacity-monitoring.md new file mode 100644 index 00000000..f46625f5 --- /dev/null +++ b/docs/operations/capacity-monitoring.md @@ -0,0 +1,121 @@ +# Capacity Monitoring + +Detailed guide for monitoring and managing platform capacity. + +## Overview + +The Capacity Monitoring page (`/admin/platform-health/capacity`) provides insights into resource consumption and helps plan infrastructure scaling. + +## Key Metrics + +### Client Metrics + +| Metric | Description | Threshold Indicator | +|--------|-------------|---------------------| +| Active Clients | Vendors with activity in last 30 days | Scale planning | +| Total Products | Sum across all vendors | Storage/DB sizing | +| Products per Client | Average products per vendor | Tier compliance | +| Monthly Orders | Order volume this month | Performance impact | + +### Storage Metrics + +| Metric | Description | Warning | Critical | +|--------|-------------|---------|----------| +| Image Files | Total files in storage | 80% of limit | 95% of limit | +| Image Storage (GB) | Total size in gigabytes | 80% of disk | 95% of disk | +| Database Size (GB) | PostgreSQL data size | 80% of allocation | 95% of allocation | +| Backup Size (GB) | Latest backup size | Informational | N/A | + +### Performance Metrics + +| Metric | Good | Warning | Critical | +|--------|------|---------|----------| +| Avg Response Time | < 100ms | 100-300ms | > 300ms | +| DB Query Time (p95) | < 50ms | 50-200ms | > 200ms | +| Cache Hit Rate | > 90% | 70-90% | < 70% | +| Connection Pool Usage | < 70% | 70-90% | > 90% | + +## Scaling Recommendations + +The system provides automatic scaling recommendations based on current usage: + +### Example Recommendations + +``` +Current Infrastructure: MEDIUM (100-300 clients) +Current Usage: 85% of capacity + +Recommendations: +1. [WARNING] Approaching product limit (420K of 500K) + → Consider upgrading to LARGE tier + +2. [INFO] Database size growing 5GB/month + → Plan storage expansion in 3 months + +3. [OK] API response times within normal range + → No action needed +``` + +## Threshold Configuration + +Edit thresholds in the admin settings or via environment: + +```python +# Capacity thresholds (can be configured per deployment) +CAPACITY_THRESHOLDS = { + # Products + "products_total": { + "warning": 400_000, + "critical": 475_000, + "limit": 500_000, + }, + # Storage (GB) + "storage_gb": { + "warning": 800, + "critical": 950, + "limit": 1000, + }, + # Database (GB) + "db_size_gb": { + "warning": 20, + "critical": 24, + "limit": 25, + }, + # Monthly orders + "monthly_orders": { + "warning": 250_000, + "critical": 280_000, + "limit": 300_000, + }, +} +``` + +## Historical Trends + +View growth trends to plan ahead: + +- **30-day growth rate**: Products, storage, clients +- **Projected capacity date**: When limits will be reached +- **Seasonal patterns**: Order volume fluctuations + +## Alerts + +Capacity alerts trigger when: + +1. **Warning (Yellow)**: 80% of any threshold +2. **Critical (Red)**: 95% of any threshold +3. **Exceeded**: 100%+ of threshold (immediate action) + +## Export Reports + +Generate capacity reports for planning: + +- **Weekly summary**: PDF or CSV +- **Monthly capacity report**: Detailed analysis +- **Projection report**: 3/6/12 month forecasts + +## Related Documentation + +- [Capacity Planning](../architecture/capacity-planning.md) - Full sizing guide +- [Platform Health](platform-health.md) - Real-time health monitoring +- [Image Storage](image-storage.md) - Image system details diff --git a/docs/operations/image-storage.md b/docs/operations/image-storage.md new file mode 100644 index 00000000..55256313 --- /dev/null +++ b/docs/operations/image-storage.md @@ -0,0 +1,246 @@ +# Image Storage System + +Documentation for the platform's image storage and management system. + +## Overview + +The Wizamart platform uses a self-hosted image storage system with: + +- **Sharded directory structure** for filesystem performance +- **Automatic WebP conversion** for optimization +- **Multiple size variants** for different use cases +- **CDN-ready architecture** for scaling + +## Storage Architecture + +### Directory Structure + +Images are stored in a sharded directory structure to prevent filesystem performance degradation: + +``` +/static/uploads/ + └── products/ + ├── 00/ # First 2 chars of hash + │ ├── 1a/ # Next 2 chars + │ │ ├── 001a2b3c_original.webp + │ │ ├── 001a2b3c_800.webp + │ │ └── 001a2b3c_200.webp + │ └── 2b/ + │ └── ... + ├── 01/ + │ └── ... + └── ff/ + └── ... +``` + +### Hash Generation + +The file hash is generated from: +```python +hash = md5(f"{vendor_id}:{product_id}:{timestamp}:{original_filename}")[:8] +``` + +This ensures: +- Unique file paths +- Even distribution across directories +- Predictable file locations + +## Image Variants + +Each uploaded image generates multiple variants: + +| Variant | Max Dimensions | Format | Use Case | +|---------|---------------|--------|----------| +| `original` | As uploaded (max 2000px) | WebP | Detail view, zoom | +| `800` | 800×800 | WebP | Product cards | +| `200` | 200×200 | WebP | Thumbnails, grids | + +### Size Estimates + +| Original Size | After Processing | Storage per Image | +|---------------|------------------|-------------------| +| 2MB JPEG | ~200KB (original) + 80KB (800) + 15KB (200) | ~295KB | +| 500KB JPEG | ~150KB (original) + 60KB (800) + 12KB (200) | ~222KB | +| 100KB JPEG | ~80KB (original) + 40KB (800) + 10KB (200) | ~130KB | + +**Average: ~200KB per image (all variants)** + +## Upload Process + +### API Endpoint + +```http +POST /api/v1/admin/images/upload +Content-Type: multipart/form-data + +file: +vendor_id: 123 +product_id: 456 (optional, for product images) +type: product|category|banner +``` + +### Response + +```json +{ + "success": true, + "image": { + "id": "001a2b3c", + "urls": { + "original": "/uploads/products/00/1a/001a2b3c_original.webp", + "medium": "/uploads/products/00/1a/001a2b3c_800.webp", + "thumb": "/uploads/products/00/1a/001a2b3c_200.webp" + }, + "size_bytes": 295000, + "dimensions": { + "width": 1200, + "height": 1200 + } + } +} +``` + +## Configuration + +### Environment Variables + +```bash +# Image storage configuration +IMAGE_UPLOAD_DIR=/var/www/uploads +IMAGE_MAX_SIZE_MB=10 +IMAGE_ALLOWED_TYPES=jpg,jpeg,png,gif,webp +IMAGE_QUALITY=85 +IMAGE_MAX_DIMENSION=2000 +``` + +### Python Configuration + +```python +# app/core/config.py +class ImageSettings: + UPLOAD_DIR: str = "/static/uploads" + MAX_SIZE_MB: int = 10 + ALLOWED_TYPES: list = ["jpg", "jpeg", "png", "gif", "webp"] + QUALITY: int = 85 + MAX_DIMENSION: int = 2000 + + # Generated sizes + SIZES: dict = { + "original": None, # No resize, just optimize + "medium": 800, + "thumb": 200, + } +``` + +## Performance Guidelines + +### Filesystem Limits + +| Files per Directory | Status | Action | +|---------------------|--------|--------| +| < 10,000 | OK | None needed | +| 10,000 - 50,000 | Monitor | Plan migration | +| 50,000 - 100,000 | Warning | Increase sharding depth | +| > 100,000 | Critical | Migrate to object storage | + +### Capacity Planning + +| Products | Images (5/product) | Total Files (3 sizes) | Storage | +|----------|--------------------|-----------------------|---------| +| 10,000 | 50,000 | 150,000 | 30 GB | +| 50,000 | 250,000 | 750,000 | 150 GB | +| 100,000 | 500,000 | 1,500,000 | 300 GB | +| 500,000 | 2,500,000 | 7,500,000 | 1.5 TB | + +## CDN Integration + +For production deployments, configure a CDN for image delivery: + +### Cloudflare (Recommended) + +1. Set up Cloudflare for your domain +2. Configure page rules for `/uploads/*`: + - Cache Level: Cache Everything + - Edge Cache TTL: 1 month + - Browser Cache TTL: 1 week + +### nginx Configuration + +```nginx +location /uploads/ { + alias /var/www/uploads/; + expires 30d; + add_header Cache-Control "public, immutable"; + add_header X-Content-Type-Options nosniff; + + # WebP fallback for older browsers + location ~ \.(jpg|jpeg|png)$ { + try_files $uri$webp_suffix $uri =404; + } +} +``` + +## Maintenance + +### Cleanup Orphaned Images + +Remove images not referenced by any product: + +```bash +# Run via admin CLI +python -m scripts.cleanup_orphaned_images --dry-run +python -m scripts.cleanup_orphaned_images --execute +``` + +### Regenerate Variants + +If image quality settings change: + +```bash +# Regenerate all variants for a vendor +python -m scripts.regenerate_images --vendor-id 123 + +# Regenerate all variants (use with caution) +python -m scripts.regenerate_images --all +``` + +## Monitoring + +### Metrics to Track + +- Total file count +- Storage used (GB) +- Files per directory (max) +- Upload success rate +- Average processing time + +### Health Checks + +The platform health page includes image storage metrics: + +- Current file count +- Storage usage +- Directory distribution +- Processing queue status + +## Troubleshooting + +### Common Issues + +**Upload fails with "File too large"** +- Check `IMAGE_MAX_SIZE_MB` setting +- Verify nginx `client_max_body_size` + +**Images not displaying** +- Check file permissions (should be readable by web server) +- Verify URL paths match actual file locations + +**Slow uploads** +- Check disk I/O performance +- Consider async processing queue + +## Related Documentation + +- [Capacity Planning](../architecture/capacity-planning.md) +- [Platform Health](platform-health.md) +- [Capacity Monitoring](capacity-monitoring.md) diff --git a/docs/operations/platform-health.md b/docs/operations/platform-health.md new file mode 100644 index 00000000..18b65247 --- /dev/null +++ b/docs/operations/platform-health.md @@ -0,0 +1,92 @@ +# Platform Health Monitoring + +This guide covers the platform health monitoring features available in the admin dashboard. + +## Overview + +The Platform Health page (`/admin/platform-health`) provides real-time visibility into system performance, resource usage, and capacity thresholds. + +## Accessing Platform Health + +Navigate to **Admin > Platform Health** in the sidebar, or go directly to `/admin/platform-health`. + +## Dashboard Sections + +### 1. System Overview + +Quick glance at overall platform status: + +| Indicator | Green | Yellow | Red | +|-----------|-------|--------|-----| +| API Response Time | < 100ms | 100-500ms | > 500ms | +| Error Rate | < 0.1% | 0.1-1% | > 1% | +| Database Health | Connected | Slow queries | Disconnected | +| Storage | < 70% | 70-85% | > 85% | + +### 2. Resource Usage + +Real-time metrics: + +- **CPU Usage**: Current and 24h average +- **Memory Usage**: Used vs available +- **Disk Usage**: Storage consumption with trend +- **Network**: Inbound/outbound throughput + +### 3. Capacity Metrics + +Track growth toward scaling thresholds: + +- **Total Products**: Count across all vendors +- **Total Images**: Files stored in image system +- **Database Size**: Current size vs recommended max +- **Active Clients**: Monthly active vendor accounts + +### 4. Performance Trends + +Historical charts (7-day, 30-day): + +- API response times (p50, p95, p99) +- Request volume by endpoint +- Database query latency +- Error rate over time + +## Alert Configuration + +### Threshold Alerts + +Configure alerts for proactive monitoring: + +```python +# In app/core/config.py +HEALTH_THRESHOLDS = { + "cpu_percent": {"warning": 70, "critical": 85}, + "memory_percent": {"warning": 75, "critical": 90}, + "disk_percent": {"warning": 70, "critical": 85}, + "response_time_ms": {"warning": 200, "critical": 500}, + "error_rate_percent": {"warning": 1.0, "critical": 5.0}, +} +``` + +### Notification Channels + +Alerts can be sent via: +- Email to admin users +- Slack webhook (if configured) +- Dashboard notifications + +## Related Pages + +- [Capacity Monitoring](capacity-monitoring.md) - Detailed capacity metrics +- [Image Storage](image-storage.md) - Image system management +- [Capacity Planning](../architecture/capacity-planning.md) - Infrastructure sizing guide + +## API Endpoints + +The platform health page uses these admin API endpoints: + +| Endpoint | Description | +|----------|-------------| +| `GET /api/v1/admin/platform/health` | Overall health status | +| `GET /api/v1/admin/platform/metrics` | Current metrics | +| `GET /api/v1/admin/platform/metrics/history` | Historical data | +| `GET /api/v1/admin/platform/capacity` | Capacity usage | diff --git a/mkdocs.yml b/mkdocs.yml index b6fe4a68..708eb062 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -41,6 +41,7 @@ nav: - Frontend Structure: architecture/frontend-structure.md - Models Structure: architecture/models-structure.md - Background Tasks: architecture/background-tasks.md + - Capacity Planning: architecture/capacity-planning.md - API Consolidation: - Proposal: architecture/api-consolidation-proposal.md - Migration Status: architecture/api-migration-status.md @@ -186,6 +187,14 @@ nav: - Environment Variables: deployment/environment.md - Stripe Integration: deployment/stripe-integration.md + # ============================================ + # OPERATIONS (Platform Health & Monitoring) + # ============================================ + - Operations: + - Platform Health: operations/platform-health.md + - Capacity Monitoring: operations/capacity-monitoring.md + - Image Storage: operations/image-storage.md + # ============================================ # FEATURES # ============================================ diff --git a/models/schema/image.py b/models/schema/image.py new file mode 100644 index 00000000..c0c61d27 --- /dev/null +++ b/models/schema/image.py @@ -0,0 +1,46 @@ +# models/schema/image.py +""" +Pydantic schemas for image operations. +""" + +from pydantic import BaseModel + + +class ImageUrls(BaseModel): + """URLs for image variants.""" + + original: str + medium: str | None = None # 800px variant + thumb: str | None = None # 200px variant + + # Allow arbitrary keys for flexibility + class Config: + extra = "allow" + + +class ImageUploadResponse(BaseModel): + """Response from image upload.""" + + success: bool + image: dict | None = None + error: str | None = None + + +class ImageDeleteResponse(BaseModel): + """Response from image deletion.""" + + success: bool + message: str + + +class ImageStorageStats(BaseModel): + """Image storage statistics.""" + + total_files: int + total_size_bytes: int + total_size_mb: float + total_size_gb: float + directory_count: int + max_files_per_dir: int + avg_files_per_dir: float + products_estimated: int diff --git a/requirements.txt b/requirements.txt index bd6055e3..948b4cd1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,6 +22,12 @@ python-multipart==0.0.20 pandas==2.2.3 requests==2.32.3 +# Image processing +Pillow>=10.0.0 + +# System monitoring +psutil>=5.9.0 + # PDF generation weasyprint==62.3 diff --git a/static/admin/js/platform-health.js b/static/admin/js/platform-health.js new file mode 100644 index 00000000..ae0f4830 --- /dev/null +++ b/static/admin/js/platform-health.js @@ -0,0 +1,128 @@ +// static/admin/js/platform-health.js +/** + * Admin platform health monitoring page logic + * Displays system metrics, capacity thresholds, and scaling recommendations + */ + +const adminPlatformHealthLog = window.LogConfig.loggers.adminPlatformHealth || + window.LogConfig.createLogger('adminPlatformHealth', false); + +adminPlatformHealthLog.info('Loading...'); + +function adminPlatformHealth() { + adminPlatformHealthLog.info('adminPlatformHealth() called'); + + return { + // Inherit base layout state + ...data(), + + // Set page identifier + currentPage: 'platform-health', + + // Loading states + loading: true, + error: '', + + // Health data + health: null, + + // Auto-refresh interval (30 seconds) + refreshInterval: null, + + async init() { + adminPlatformHealthLog.info('Platform Health init() called'); + + // Guard against multiple initialization + if (window._adminPlatformHealthInitialized) { + adminPlatformHealthLog.warn('Already initialized, skipping'); + return; + } + window._adminPlatformHealthInitialized = true; + + // Load initial data + await this.loadHealth(); + + // Set up auto-refresh every 30 seconds + this.refreshInterval = setInterval(() => { + this.loadHealth(); + }, 30000); + + adminPlatformHealthLog.info('Platform Health initialization complete'); + }, + + /** + * Clean up on component destroy + */ + destroy() { + if (this.refreshInterval) { + clearInterval(this.refreshInterval); + this.refreshInterval = null; + } + }, + + /** + * Load platform health data + */ + async loadHealth() { + this.loading = true; + this.error = ''; + + try { + const response = await apiClient.get('/admin/platform/health'); + this.health = response; + + adminPlatformHealthLog.info('Loaded health data:', { + status: response.overall_status, + tier: response.infrastructure_tier + }); + } catch (error) { + adminPlatformHealthLog.error('Failed to load health:', error); + this.error = error.message || 'Failed to load platform health'; + } finally { + this.loading = false; + } + }, + + /** + * Manual refresh + */ + async refresh() { + await this.loadHealth(); + }, + + /** + * Format number with locale + */ + formatNumber(num) { + if (num === null || num === undefined) return '0'; + if (typeof num === 'number' && num % 1 !== 0) { + return num.toFixed(2); + } + return new Intl.NumberFormat('en-US').format(num); + }, + + /** + * Format storage size + */ + formatStorage(gb) { + if (gb === null || gb === undefined) return '0 GB'; + if (gb < 1) { + return (gb * 1024).toFixed(0) + ' MB'; + } + return gb.toFixed(2) + ' GB'; + }, + + /** + * Format timestamp + */ + formatTime(timestamp) { + if (!timestamp) return 'Unknown'; + try { + const date = new Date(timestamp); + return date.toLocaleTimeString(); + } catch (e) { + return 'Unknown'; + } + } + }; +}