diff --git a/app/api/v1/admin/__init__.py b/app/api/v1/admin/__init__.py
index 66b62094..f9f16e6b 100644
--- a/app/api/v1/admin/__init__.py
+++ b/app/api/v1/admin/__init__.py
@@ -33,6 +33,7 @@ from . import (
content_pages,
customers,
dashboard,
+ images,
inventory,
letzshop,
logs,
@@ -42,6 +43,7 @@ from . import (
notifications,
order_item_exceptions,
orders,
+ platform_health,
products,
settings,
tests,
@@ -162,6 +164,14 @@ router.include_router(messages.router, tags=["admin-messages"])
# Include log management endpoints
router.include_router(logs.router, tags=["admin-logs"])
+# Include image management endpoints
+router.include_router(images.router, tags=["admin-images"])
+
+# Include platform health endpoints
+router.include_router(
+ platform_health.router, prefix="/platform", tags=["admin-platform-health"]
+)
+
# ============================================================================
# Code Quality & Architecture
diff --git a/app/api/v1/admin/images.py b/app/api/v1/admin/images.py
new file mode 100644
index 00000000..99a0cdf3
--- /dev/null
+++ b/app/api/v1/admin/images.py
@@ -0,0 +1,121 @@
+# app/api/v1/admin/images.py
+"""
+Admin image management endpoints.
+
+Provides:
+- Image upload with automatic processing
+- Image deletion
+- Storage statistics
+"""
+
+import logging
+
+from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
+
+from app.api.deps import get_current_admin_api
+from app.services.image_service import image_service
+from models.database.user import User
+from models.schema.image import (
+ ImageDeleteResponse,
+ ImageStorageStats,
+ ImageUploadResponse,
+)
+
+router = APIRouter(prefix="/images")
+logger = logging.getLogger(__name__)
+
+# Maximum upload size (10MB)
+MAX_UPLOAD_SIZE = 10 * 1024 * 1024
+
+
+@router.post("/upload", response_model=ImageUploadResponse)
+async def upload_image(
+ file: UploadFile = File(...),
+ vendor_id: int = Form(...),
+ product_id: int | None = Form(None),
+ current_admin: User = Depends(get_current_admin_api),
+):
+ """Upload and process an image.
+
+ The image will be:
+ - Converted to WebP format
+ - Resized to multiple variants (original, 800px, 200px)
+ - Stored in a sharded directory structure
+
+ Args:
+ file: Image file to upload
+ vendor_id: Vendor ID for the image
+ product_id: Optional product ID
+
+ Returns:
+ Image URLs and metadata
+ """
+ # Validate file size
+ content = await file.read()
+ if len(content) > MAX_UPLOAD_SIZE:
+ raise HTTPException(
+ status_code=413,
+ detail=f"File too large. Maximum size: {MAX_UPLOAD_SIZE // (1024*1024)}MB",
+ )
+
+ # Validate content type
+ if not file.content_type or not file.content_type.startswith("image/"):
+ raise HTTPException(
+ status_code=400,
+ detail="Invalid file type. Only images are allowed.",
+ )
+
+ try:
+ result = image_service.upload_product_image(
+ file_content=content,
+ filename=file.filename or "image.jpg",
+ vendor_id=vendor_id,
+ product_id=product_id,
+ )
+
+ logger.info(f"Image uploaded: {result['id']} for vendor {vendor_id}")
+
+ return ImageUploadResponse(success=True, image=result)
+
+ except ValueError as e:
+ logger.warning(f"Image upload failed: {e}")
+ return ImageUploadResponse(success=False, error=str(e))
+
+ except Exception as e:
+ logger.error(f"Image upload error: {e}")
+ raise HTTPException(status_code=500, detail="Failed to process image")
+
+
+@router.delete("/{image_hash}", response_model=ImageDeleteResponse)
+async def delete_image(
+ image_hash: str,
+ current_admin: User = Depends(get_current_admin_api),
+):
+ """Delete an image and all its variants.
+
+ Args:
+ image_hash: The image ID/hash
+
+ Returns:
+ Deletion status
+ """
+ deleted = image_service.delete_product_image(image_hash)
+
+ if deleted:
+ logger.info(f"Image deleted: {image_hash}")
+ return ImageDeleteResponse(success=True, message="Image deleted successfully")
+ else:
+ return ImageDeleteResponse(success=False, message="Image not found")
+
+
+@router.get("/stats", response_model=ImageStorageStats)
+async def get_storage_stats(
+ current_admin: User = Depends(get_current_admin_api),
+):
+ """Get image storage statistics.
+
+ Returns:
+ Storage metrics including file counts, sizes, and directory info
+ """
+ stats = image_service.get_storage_stats()
+ return ImageStorageStats(**stats)
diff --git a/app/api/v1/admin/platform_health.py b/app/api/v1/admin/platform_health.py
new file mode 100644
index 00000000..fd2d2b25
--- /dev/null
+++ b/app/api/v1/admin/platform_health.py
@@ -0,0 +1,532 @@
+# app/api/v1/admin/platform_health.py
+"""
+Platform health and capacity monitoring endpoints.
+
+Provides:
+- Overall platform health status
+- Capacity metrics and thresholds
+- Scaling recommendations
+"""
+
+import logging
+import os
+import platform
+import psutil
+from datetime import datetime
+
+from fastapi import APIRouter, Depends
+from pydantic import BaseModel
+from sqlalchemy import func, text
+from sqlalchemy.orm import Session
+
+from app.api.deps import get_current_admin_api
+from app.core.database import get_db
+from app.services.image_service import image_service
+from models.database.inventory import Inventory
+from models.database.order import Order
+from models.database.product import Product
+from models.database.user import User
+from models.database.vendor import Vendor
+
+router = APIRouter()
+logger = logging.getLogger(__name__)
+
+
+# ============================================================================
+# Schemas
+# ============================================================================
+
+
+class SystemMetrics(BaseModel):
+ """System resource metrics."""
+
+ cpu_percent: float
+ memory_percent: float
+ memory_used_gb: float
+ memory_total_gb: float
+ disk_percent: float
+ disk_used_gb: float
+ disk_total_gb: float
+
+
+class DatabaseMetrics(BaseModel):
+ """Database metrics."""
+
+ size_mb: float
+ products_count: int
+ orders_count: int
+ vendors_count: int
+ inventory_count: int
+
+
+class ImageStorageMetrics(BaseModel):
+ """Image storage metrics."""
+
+ total_files: int
+ total_size_mb: float
+ total_size_gb: float
+ max_files_per_dir: int
+ products_estimated: int
+
+
+class CapacityThreshold(BaseModel):
+ """Capacity threshold status."""
+
+ name: str
+ current: float
+ warning: float
+ critical: float
+ limit: float
+ status: str # ok, warning, critical
+ percent_used: float
+
+
+class ScalingRecommendation(BaseModel):
+ """Scaling recommendation."""
+
+ priority: str # info, warning, critical
+ title: str
+ description: str
+ action: str | None = None
+
+
+class PlatformHealthResponse(BaseModel):
+ """Complete platform health response."""
+
+ timestamp: str
+ overall_status: str # healthy, degraded, critical
+ system: SystemMetrics
+ database: DatabaseMetrics
+ image_storage: ImageStorageMetrics
+ thresholds: list[CapacityThreshold]
+ recommendations: list[ScalingRecommendation]
+ infrastructure_tier: str
+ next_tier_trigger: str | None = None
+
+
+class CapacityMetricsResponse(BaseModel):
+ """Capacity-focused metrics."""
+
+ products_total: int
+ products_by_vendor: dict[str, int]
+ images_total: int
+ storage_used_gb: float
+ database_size_mb: float
+ orders_this_month: int
+ active_vendors: int
+
+
+# ============================================================================
+# Thresholds Configuration
+# ============================================================================
+
+CAPACITY_THRESHOLDS = {
+ "products_total": {
+ "warning": 400_000,
+ "critical": 475_000,
+ "limit": 500_000,
+ },
+ "storage_gb": {
+ "warning": 800,
+ "critical": 950,
+ "limit": 1000,
+ },
+ "db_size_mb": {
+ "warning": 20_000,
+ "critical": 24_000,
+ "limit": 25_000,
+ },
+ "disk_percent": {
+ "warning": 70,
+ "critical": 85,
+ "limit": 100,
+ },
+ "memory_percent": {
+ "warning": 75,
+ "critical": 90,
+ "limit": 100,
+ },
+ "cpu_percent": {
+ "warning": 70,
+ "critical": 85,
+ "limit": 100,
+ },
+}
+
+INFRASTRUCTURE_TIERS = [
+ {"name": "Starter", "max_clients": 50, "max_products": 10_000},
+ {"name": "Small", "max_clients": 100, "max_products": 30_000},
+ {"name": "Medium", "max_clients": 300, "max_products": 100_000},
+ {"name": "Large", "max_clients": 500, "max_products": 250_000},
+ {"name": "Scale", "max_clients": 1000, "max_products": 500_000},
+ {"name": "Enterprise", "max_clients": None, "max_products": None},
+]
+
+
+# ============================================================================
+# Endpoints
+# ============================================================================
+
+
+@router.get("/health", response_model=PlatformHealthResponse)
+async def get_platform_health(
+ db: Session = Depends(get_db),
+ current_admin: User = Depends(get_current_admin_api),
+):
+ """Get comprehensive platform health status.
+
+ Returns system metrics, database stats, storage info, and recommendations.
+ """
+ # System metrics
+ system = _get_system_metrics()
+
+ # Database metrics
+ database = _get_database_metrics(db)
+
+ # Image storage metrics
+ image_stats = image_service.get_storage_stats()
+ image_storage = ImageStorageMetrics(
+ total_files=image_stats["total_files"],
+ total_size_mb=image_stats["total_size_mb"],
+ total_size_gb=image_stats["total_size_gb"],
+ max_files_per_dir=image_stats["max_files_per_dir"],
+ products_estimated=image_stats["products_estimated"],
+ )
+
+ # Calculate thresholds
+ thresholds = _calculate_thresholds(system, database, image_storage)
+
+ # Generate recommendations
+ recommendations = _generate_recommendations(thresholds, database)
+
+ # Determine infrastructure tier
+ tier, next_trigger = _determine_tier(database.vendors_count, database.products_count)
+
+ # Overall status
+ overall_status = _determine_overall_status(thresholds)
+
+ return PlatformHealthResponse(
+ timestamp=datetime.utcnow().isoformat(),
+ overall_status=overall_status,
+ system=system,
+ database=database,
+ image_storage=image_storage,
+ thresholds=thresholds,
+ recommendations=recommendations,
+ infrastructure_tier=tier,
+ next_tier_trigger=next_trigger,
+ )
+
+
+@router.get("/capacity", response_model=CapacityMetricsResponse)
+async def get_capacity_metrics(
+ db: Session = Depends(get_db),
+ current_admin: User = Depends(get_current_admin_api),
+):
+ """Get capacity-focused metrics for planning."""
+ # Products total
+ products_total = db.query(func.count(Product.id)).scalar() or 0
+
+ # Products by vendor
+ vendor_counts = (
+ db.query(Vendor.name, func.count(Product.id))
+ .join(Product, Vendor.id == Product.vendor_id)
+ .group_by(Vendor.name)
+ .all()
+ )
+ products_by_vendor = {name or "Unknown": count for name, count in vendor_counts}
+
+ # Image storage
+ image_stats = image_service.get_storage_stats()
+
+ # Database size (approximate for SQLite)
+ db_size = _get_database_size(db)
+
+ # Orders this month
+ start_of_month = datetime.utcnow().replace(day=1, hour=0, minute=0, second=0)
+ orders_this_month = (
+ db.query(func.count(Order.id))
+ .filter(Order.created_at >= start_of_month)
+ .scalar()
+ or 0
+ )
+
+ # Active vendors
+ active_vendors = db.query(func.count(Vendor.id)).filter(Vendor.is_active == True).scalar() or 0 # noqa: E712
+
+ return CapacityMetricsResponse(
+ products_total=products_total,
+ products_by_vendor=products_by_vendor,
+ images_total=image_stats["total_files"],
+ storage_used_gb=image_stats["total_size_gb"],
+ database_size_mb=db_size,
+ orders_this_month=orders_this_month,
+ active_vendors=active_vendors,
+ )
+
+
+# ============================================================================
+# Helper Functions
+# ============================================================================
+
+
+def _get_system_metrics() -> SystemMetrics:
+ """Get current system resource metrics."""
+ cpu_percent = psutil.cpu_percent(interval=0.1)
+ memory = psutil.virtual_memory()
+ disk = psutil.disk_usage("/")
+
+ return SystemMetrics(
+ cpu_percent=cpu_percent,
+ memory_percent=memory.percent,
+ memory_used_gb=round(memory.used / (1024**3), 2),
+ memory_total_gb=round(memory.total / (1024**3), 2),
+ disk_percent=disk.percent,
+ disk_used_gb=round(disk.used / (1024**3), 2),
+ disk_total_gb=round(disk.total / (1024**3), 2),
+ )
+
+
+def _get_database_metrics(db: Session) -> DatabaseMetrics:
+ """Get database statistics."""
+ products_count = db.query(func.count(Product.id)).scalar() or 0
+ orders_count = db.query(func.count(Order.id)).scalar() or 0
+ vendors_count = db.query(func.count(Vendor.id)).scalar() or 0
+ inventory_count = db.query(func.count(Inventory.id)).scalar() or 0
+
+ db_size = _get_database_size(db)
+
+ return DatabaseMetrics(
+ size_mb=db_size,
+ products_count=products_count,
+ orders_count=orders_count,
+ vendors_count=vendors_count,
+ inventory_count=inventory_count,
+ )
+
+
+def _get_database_size(db: Session) -> float:
+ """Get database size in MB."""
+ try:
+ # Try SQLite approach
+ result = db.execute(text("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()"))
+ row = result.fetchone()
+ if row:
+ return round(row[0] / (1024 * 1024), 2)
+ except Exception:
+ pass
+
+ try:
+ # Try PostgreSQL approach
+ result = db.execute(text("SELECT pg_database_size(current_database())"))
+ row = result.fetchone()
+ if row:
+ return round(row[0] / (1024 * 1024), 2)
+ except Exception:
+ pass
+
+ return 0.0
+
+
+def _calculate_thresholds(
+ system: SystemMetrics,
+ database: DatabaseMetrics,
+ image_storage: ImageStorageMetrics,
+) -> list[CapacityThreshold]:
+ """Calculate threshold status for each metric."""
+ thresholds = []
+
+ # Products threshold
+ products_config = CAPACITY_THRESHOLDS["products_total"]
+ thresholds.append(
+ _create_threshold(
+ "Products",
+ database.products_count,
+ products_config["warning"],
+ products_config["critical"],
+ products_config["limit"],
+ )
+ )
+
+ # Storage threshold
+ storage_config = CAPACITY_THRESHOLDS["storage_gb"]
+ thresholds.append(
+ _create_threshold(
+ "Image Storage (GB)",
+ image_storage.total_size_gb,
+ storage_config["warning"],
+ storage_config["critical"],
+ storage_config["limit"],
+ )
+ )
+
+ # Database size threshold
+ db_config = CAPACITY_THRESHOLDS["db_size_mb"]
+ thresholds.append(
+ _create_threshold(
+ "Database (MB)",
+ database.size_mb,
+ db_config["warning"],
+ db_config["critical"],
+ db_config["limit"],
+ )
+ )
+
+ # Disk threshold
+ disk_config = CAPACITY_THRESHOLDS["disk_percent"]
+ thresholds.append(
+ _create_threshold(
+ "Disk Usage (%)",
+ system.disk_percent,
+ disk_config["warning"],
+ disk_config["critical"],
+ disk_config["limit"],
+ )
+ )
+
+ # Memory threshold
+ memory_config = CAPACITY_THRESHOLDS["memory_percent"]
+ thresholds.append(
+ _create_threshold(
+ "Memory Usage (%)",
+ system.memory_percent,
+ memory_config["warning"],
+ memory_config["critical"],
+ memory_config["limit"],
+ )
+ )
+
+ # CPU threshold
+ cpu_config = CAPACITY_THRESHOLDS["cpu_percent"]
+ thresholds.append(
+ _create_threshold(
+ "CPU Usage (%)",
+ system.cpu_percent,
+ cpu_config["warning"],
+ cpu_config["critical"],
+ cpu_config["limit"],
+ )
+ )
+
+ return thresholds
+
+
+def _create_threshold(
+ name: str, current: float, warning: float, critical: float, limit: float
+) -> CapacityThreshold:
+ """Create a threshold status object."""
+ percent_used = (current / limit) * 100 if limit > 0 else 0
+
+ if current >= critical:
+ status = "critical"
+ elif current >= warning:
+ status = "warning"
+ else:
+ status = "ok"
+
+ return CapacityThreshold(
+ name=name,
+ current=current,
+ warning=warning,
+ critical=critical,
+ limit=limit,
+ status=status,
+ percent_used=round(percent_used, 1),
+ )
+
+
+def _generate_recommendations(
+ thresholds: list[CapacityThreshold], database: DatabaseMetrics
+) -> list[ScalingRecommendation]:
+ """Generate scaling recommendations based on thresholds."""
+ recommendations = []
+
+ for threshold in thresholds:
+ if threshold.status == "critical":
+ recommendations.append(
+ ScalingRecommendation(
+ priority="critical",
+ title=f"{threshold.name} at critical level",
+ description=f"Currently at {threshold.percent_used:.0f}% of capacity ({threshold.current:.0f} of {threshold.limit:.0f})",
+ action="Immediate scaling or cleanup required",
+ )
+ )
+ elif threshold.status == "warning":
+ recommendations.append(
+ ScalingRecommendation(
+ priority="warning",
+ title=f"{threshold.name} approaching limit",
+ description=f"Currently at {threshold.percent_used:.0f}% of capacity ({threshold.current:.0f} of {threshold.limit:.0f})",
+ action="Plan scaling in the next 2-4 weeks",
+ )
+ )
+
+ # Add tier-based recommendations
+ if database.vendors_count > 0:
+ tier, next_trigger = _determine_tier(database.vendors_count, database.products_count)
+ if next_trigger:
+ recommendations.append(
+ ScalingRecommendation(
+ priority="info",
+ title=f"Current tier: {tier}",
+ description=next_trigger,
+ action="Review capacity planning documentation",
+ )
+ )
+
+ # If no issues, add positive status
+ if not recommendations:
+ recommendations.append(
+ ScalingRecommendation(
+ priority="info",
+ title="All systems healthy",
+ description="No capacity concerns at this time",
+ action=None,
+ )
+ )
+
+ return recommendations
+
+
+def _determine_tier(vendors: int, products: int) -> tuple[str, str | None]:
+ """Determine current infrastructure tier and next trigger."""
+ current_tier = "Starter"
+ next_trigger = None
+
+ for i, tier in enumerate(INFRASTRUCTURE_TIERS):
+ max_clients = tier["max_clients"]
+ max_products = tier["max_products"]
+
+ if max_clients is None:
+ current_tier = tier["name"]
+ break
+
+ if vendors <= max_clients and products <= max_products:
+ current_tier = tier["name"]
+
+ # Check proximity to next tier
+ if i < len(INFRASTRUCTURE_TIERS) - 1:
+ next_tier = INFRASTRUCTURE_TIERS[i + 1]
+ vendor_percent = (vendors / max_clients) * 100
+ product_percent = (products / max_products) * 100
+
+ if vendor_percent > 70 or product_percent > 70:
+ next_trigger = (
+ f"Approaching {next_tier['name']} tier "
+ f"(vendors: {vendor_percent:.0f}%, products: {product_percent:.0f}%)"
+ )
+ break
+
+ return current_tier, next_trigger
+
+
+def _determine_overall_status(thresholds: list[CapacityThreshold]) -> str:
+ """Determine overall platform status."""
+ statuses = [t.status for t in thresholds]
+
+ if "critical" in statuses:
+ return "critical"
+ elif "warning" in statuses:
+ return "degraded"
+ else:
+ return "healthy"
diff --git a/app/routes/admin_pages.py b/app/routes/admin_pages.py
index 4343a660..f97724ae 100644
--- a/app/routes/admin_pages.py
+++ b/app/routes/admin_pages.py
@@ -1200,3 +1200,27 @@ async def admin_code_quality_violation_detail(
"violation_id": violation_id,
},
)
+
+
+# ============================================================================
+# PLATFORM HEALTH & MONITORING ROUTES
+# ============================================================================
+
+
+@router.get("/platform-health", response_class=HTMLResponse, include_in_schema=False)
+async def admin_platform_health(
+ request: Request,
+ current_user: User = Depends(get_current_admin_from_cookie_or_header),
+ db: Session = Depends(get_db),
+):
+ """
+ Render platform health monitoring page.
+ Shows system metrics, capacity thresholds, and scaling recommendations.
+ """
+ return templates.TemplateResponse(
+ "admin/platform-health.html",
+ {
+ "request": request,
+ "user": current_user,
+ },
+ )
diff --git a/app/services/image_service.py b/app/services/image_service.py
new file mode 100644
index 00000000..e840ff3f
--- /dev/null
+++ b/app/services/image_service.py
@@ -0,0 +1,285 @@
+# app/services/image_service.py
+"""
+Image upload and management service.
+
+Provides:
+- Image upload with automatic optimization
+- WebP conversion
+- Multiple size variant generation
+- Sharded directory structure for performance
+"""
+
+import hashlib
+import logging
+import os
+import shutil
+from datetime import datetime
+from io import BytesIO
+from pathlib import Path
+
+from PIL import Image
+
+logger = logging.getLogger(__name__)
+
+
+class ImageService:
+ """Service for image upload and management."""
+
+ # Supported image formats
+ ALLOWED_EXTENSIONS = {"jpg", "jpeg", "png", "gif", "webp"}
+
+ # Size variants to generate
+ SIZES = {
+ "original": None, # No max dimension, just optimize
+ "800": 800, # Medium size for product cards
+ "200": 200, # Thumbnail for grids
+ }
+
+ # Quality settings
+ QUALITY = 85
+ MAX_DIMENSION = 2000 # Max dimension for original
+
+ def __init__(self, upload_dir: str = "static/uploads"):
+ """Initialize image service.
+
+ Args:
+ upload_dir: Base directory for uploads (relative to project root)
+ """
+ self.upload_dir = Path(upload_dir)
+ self.products_dir = self.upload_dir / "products"
+
+ # Ensure directories exist
+ self.products_dir.mkdir(parents=True, exist_ok=True)
+
+ def upload_product_image(
+ self,
+ file_content: bytes,
+ filename: str,
+ vendor_id: int,
+ product_id: int | None = None,
+ ) -> dict:
+ """Upload and process a product image.
+
+ Args:
+ file_content: Raw file bytes
+ filename: Original filename
+ vendor_id: Vendor ID for path generation
+ product_id: Optional product ID
+
+ Returns:
+ Dict with image info and URLs
+ """
+ # Validate file extension
+ ext = self._get_extension(filename)
+ if ext not in self.ALLOWED_EXTENSIONS:
+ raise ValueError(f"Invalid file type: {ext}. Allowed: {self.ALLOWED_EXTENSIONS}")
+
+ # Generate unique hash for this image
+ image_hash = self._generate_hash(vendor_id, product_id, filename)
+
+ # Determine sharded directory path
+ shard_path = self._get_shard_path(image_hash)
+ full_dir = self.products_dir / shard_path
+ full_dir.mkdir(parents=True, exist_ok=True)
+
+ # Load and process image
+ try:
+ img = Image.open(BytesIO(file_content))
+
+ # Convert to RGB if necessary (for PNG with alpha)
+ if img.mode in ("RGBA", "P"):
+ img = img.convert("RGB")
+
+ # Get original dimensions
+ original_width, original_height = img.size
+
+ # Process and save variants
+ urls = {}
+ total_size = 0
+
+ for size_name, max_dim in self.SIZES.items():
+ processed_img = self._resize_image(img.copy(), max_dim)
+ file_path = full_dir / f"{image_hash}_{size_name}.webp"
+
+ # Save as WebP
+ processed_img.save(file_path, "WEBP", quality=self.QUALITY)
+
+ # Track size
+ file_size = file_path.stat().st_size
+ total_size += file_size
+
+ # Generate URL path (relative to static)
+ url_path = f"/static/uploads/products/{shard_path}/{image_hash}_{size_name}.webp"
+ urls[size_name] = url_path
+
+ logger.debug(f"Saved {size_name}: {file_path} ({file_size} bytes)")
+
+ logger.info(
+ f"Uploaded image {image_hash} for vendor {vendor_id}: "
+ f"{len(urls)} variants, {total_size} bytes total"
+ )
+
+ return {
+ "id": image_hash,
+ "urls": urls,
+ "size_bytes": total_size,
+ "dimensions": {
+ "width": original_width,
+ "height": original_height,
+ },
+ "path": str(shard_path),
+ }
+
+ except Exception as e:
+ logger.error(f"Failed to process image: {e}")
+ raise ValueError(f"Failed to process image: {e}")
+
+ def delete_product_image(self, image_hash: str) -> bool:
+ """Delete all variants of a product image.
+
+ Args:
+ image_hash: The image hash/ID
+
+ Returns:
+ True if deleted, False if not found
+ """
+ shard_path = self._get_shard_path(image_hash)
+ full_dir = self.products_dir / shard_path
+
+ if not full_dir.exists():
+ return False
+
+ deleted = False
+ for size_name in self.SIZES:
+ file_path = full_dir / f"{image_hash}_{size_name}.webp"
+ if file_path.exists():
+ file_path.unlink()
+ deleted = True
+ logger.debug(f"Deleted: {file_path}")
+
+ # Clean up empty directories
+ self._cleanup_empty_dirs(full_dir)
+
+ if deleted:
+ logger.info(f"Deleted image {image_hash}")
+
+ return deleted
+
+ def get_storage_stats(self) -> dict:
+ """Get storage statistics.
+
+ Returns:
+ Dict with storage metrics
+ """
+ total_files = 0
+ total_size = 0
+ max_files_per_dir = 0
+ dir_count = 0
+
+ for root, dirs, files in os.walk(self.products_dir):
+ webp_files = [f for f in files if f.endswith(".webp")]
+ file_count = len(webp_files)
+ total_files += file_count
+
+ if file_count > 0:
+ dir_count += 1
+ max_files_per_dir = max(max_files_per_dir, file_count)
+
+ for f in webp_files:
+ file_path = Path(root) / f
+ total_size += file_path.stat().st_size
+
+ # Calculate average files per directory
+ avg_files_per_dir = total_files / dir_count if dir_count > 0 else 0
+
+ return {
+ "total_files": total_files,
+ "total_size_bytes": total_size,
+ "total_size_mb": round(total_size / (1024 * 1024), 2),
+ "total_size_gb": round(total_size / (1024 * 1024 * 1024), 3),
+ "directory_count": dir_count,
+ "max_files_per_dir": max_files_per_dir,
+ "avg_files_per_dir": round(avg_files_per_dir, 1),
+ "products_estimated": total_files // 3, # 3 variants per image
+ }
+
+ def _generate_hash(
+ self, vendor_id: int, product_id: int | None, filename: str
+ ) -> str:
+ """Generate unique hash for image.
+
+ Args:
+ vendor_id: Vendor ID
+ product_id: Product ID (optional)
+ filename: Original filename
+
+ Returns:
+ 8-character hex hash
+ """
+ timestamp = datetime.utcnow().isoformat()
+ content = f"{vendor_id}:{product_id}:{timestamp}:{filename}"
+ return hashlib.md5(content.encode()).hexdigest()[:8]
+
+ def _get_shard_path(self, image_hash: str) -> str:
+ """Get sharded directory path from hash.
+
+ Uses first 4 characters to create 2-level directory structure.
+ This creates 256 possible directories at each level.
+
+ Args:
+ image_hash: 8-character hash
+
+ Returns:
+ Path like "0a/1b"
+ """
+ return f"{image_hash[:2]}/{image_hash[2:4]}"
+
+ def _get_extension(self, filename: str) -> str:
+ """Get lowercase file extension."""
+ return filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
+
+ def _resize_image(self, img: Image.Image, max_dimension: int | None) -> Image.Image:
+ """Resize image while maintaining aspect ratio.
+
+ Args:
+ img: PIL Image
+ max_dimension: Maximum width or height (None = use MAX_DIMENSION)
+
+ Returns:
+ Resized PIL Image
+ """
+ if max_dimension is None:
+ max_dimension = self.MAX_DIMENSION
+
+ width, height = img.size
+
+ # Only resize if larger than max
+ if width <= max_dimension and height <= max_dimension:
+ return img
+
+ # Calculate new dimensions maintaining aspect ratio
+ if width > height:
+ new_width = max_dimension
+ new_height = int(height * (max_dimension / width))
+ else:
+ new_height = max_dimension
+ new_width = int(width * (max_dimension / height))
+
+ return img.resize((new_width, new_height), Image.Resampling.LANCZOS)
+
+ def _cleanup_empty_dirs(self, dir_path: Path):
+ """Remove empty directories up the tree."""
+ try:
+ # Try to remove the directory and its parents if empty
+ while dir_path != self.products_dir:
+ if dir_path.exists() and not any(dir_path.iterdir()):
+ dir_path.rmdir()
+ dir_path = dir_path.parent
+ else:
+ break
+ except OSError:
+ pass # Directory not empty or other error
+
+
+# Create service instance
+image_service = ImageService()
diff --git a/app/templates/admin/partials/sidebar.html b/app/templates/admin/partials/sidebar.html
index bdd5fc69..4c52ea21 100644
--- a/app/templates/admin/partials/sidebar.html
+++ b/app/templates/admin/partials/sidebar.html
@@ -111,6 +111,7 @@
{{ section_header('Platform Health', 'platformHealth') }}
{% call section_content('platformHealth') %}
+ {{ menu_item('platform-health', '/admin/platform-health', 'chart-bar', 'Capacity Monitor') }}
{{ menu_item('testing', '/admin/testing', 'beaker', 'Testing Hub') }}
{{ menu_item('code-quality', '/admin/code-quality', 'shield-check', 'Code Quality') }}
{% endcall %}
diff --git a/app/templates/admin/platform-health.html b/app/templates/admin/platform-health.html
new file mode 100644
index 00000000..946b2105
--- /dev/null
+++ b/app/templates/admin/platform-health.html
@@ -0,0 +1,275 @@
+{# app/templates/admin/platform-health.html #}
+{% extends "admin/base.html" %}
+{% from 'shared/macros/alerts.html' import loading_state, error_state %}
+{% from 'shared/macros/headers.html' import page_header %}
+
+{% block title %}Platform Health{% endblock %}
+
+{% block alpine_data %}adminPlatformHealth(){% endblock %}
+
+{% block content %}
+{% call page_header("Platform Health", subtitle="System metrics, capacity monitoring, and scaling recommendations") %}
+
+{% endcall %}
+
+{{ loading_state('Loading platform health...') }}
+
+{{ error_state('Error loading platform health') }}
+
+
+
+
+
+
+
+
+
+
+ Infrastructure Tier:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Capacity Thresholds
+
+
+
+
+
+
+
Scaling Recommendations
+
+
+
+
+
+
+{% endblock %}
+
+{% block extra_scripts %}
+
+{% endblock %}
diff --git a/docs/architecture/capacity-planning.md b/docs/architecture/capacity-planning.md
new file mode 100644
index 00000000..e11b2ea5
--- /dev/null
+++ b/docs/architecture/capacity-planning.md
@@ -0,0 +1,454 @@
+# Capacity Planning & Infrastructure Sizing
+
+This document provides comprehensive capacity planning guidelines for the Wizamart platform, including resource requirements, scaling thresholds, and monitoring recommendations.
+
+> **Related:** [Pricing Strategy](../marketing/pricing.md) for tier definitions and limits
+
+---
+
+## Tier Resource Allocations
+
+Based on our [pricing tiers](../marketing/pricing.md), here are the expected resource requirements per client:
+
+| Metric | Essential (€49) | Professional (€99) | Business (€199) | Enterprise (€399+) |
+|--------|-----------------|--------------------|-----------------|--------------------|
+| Products | 200 | 500 | 2,000 | Unlimited |
+| Images per product | 3 | 5 | 8 | 10+ |
+| Orders per month | 100 | 500 | 2,000 | Unlimited |
+| SKU variants | 1.2x | 1.5x | 2x | 3x |
+| Team members | 1 | 3 | 10 | Unlimited |
+| API requests/day | 1,000 | 5,000 | 20,000 | Unlimited |
+
+---
+
+## Scale Projections
+
+### Target: 1,000 Business Clients (€149/month tier)
+
+This represents our primary growth target. Here's the infrastructure impact:
+
+| Resource | Calculation | Total |
+|----------|-------------|-------|
+| **Products** | 1,000 clients × 500 products | **500,000** |
+| **Product Translations** | 500,000 × 4 languages | **2,000,000 rows** |
+| **Images (files)** | 500,000 × 5 images × 3 sizes | **7,500,000 files** |
+| **Image Storage** | 7.5M files × 200KB avg | **1.5 TB** |
+| **Database Size** | Products + translations + orders + indexes | **15-25 GB** |
+| **Monthly Orders** | 1,000 clients × 300 orders | **300,000 orders** |
+| **Order Items** | 300,000 × 2.5 avg items | **750,000 items/month** |
+| **Monthly API Requests** | 1,000 × 10,000 req/day × 30 | **300M requests** |
+
+### Multi-Tier Mix (Realistic Scenario)
+
+More realistic distribution across tiers:
+
+| Tier | Clients | Products Each | Total Products | Monthly Orders |
+|------|---------|---------------|----------------|----------------|
+| Essential | 500 | 100 | 50,000 | 50,000 |
+| Professional | 300 | 300 | 90,000 | 150,000 |
+| Business | 150 | 1,000 | 150,000 | 300,000 |
+| Enterprise | 50 | 3,000 | 150,000 | 200,000 |
+| **Total** | **1,000** | - | **440,000** | **700,000** |
+
+---
+
+## Server Sizing Recommendations
+
+### Infrastructure Tiers
+
+| Scale | Clients | vCPU | RAM | Storage | Database | Monthly Cost |
+|-------|---------|------|-----|---------|----------|--------------|
+| **Starter** | 1-50 | 2 | 4GB | 100GB SSD | SQLite | €20-40 |
+| **Small** | 50-100 | 4 | 8GB | 250GB SSD | PostgreSQL | €60-100 |
+| **Medium** | 100-300 | 4 | 16GB | 500GB SSD | PostgreSQL | €100-180 |
+| **Large** | 300-500 | 8 | 32GB | 1TB SSD | PostgreSQL + Redis | €250-400 |
+| **Scale** | 500-1000 | 16 | 64GB | 2TB SSD + CDN | PostgreSQL + Redis | €500-900 |
+| **Enterprise** | 1000+ | 32+ | 128GB+ | 4TB+ + CDN | PostgreSQL cluster | €1,500+ |
+
+### Recommended Configurations
+
+#### Starter (1-50 clients)
+```
+Single Server Setup:
+- Hetzner CX22 or similar (2 vCPU, 4GB RAM)
+- 100GB SSD storage
+- SQLite database
+- nginx for static files + reverse proxy
+- Estimated cost: €20-40/month
+```
+
+#### Small-Medium (50-300 clients)
+```
+Two-Server Setup:
+- App Server: 4 vCPU, 8-16GB RAM
+- Database: Managed PostgreSQL (basic tier)
+- Storage: Local SSD + backup
+- Optional: Redis for sessions/caching
+- Estimated cost: €80-180/month
+```
+
+#### Large (300-1000 clients)
+```
+Multi-Component Setup:
+- Load Balancer: nginx or cloud LB
+- App Servers: 2-4 × (4 vCPU, 8GB RAM)
+- Database: Managed PostgreSQL (production tier)
+- Cache: Redis (managed or self-hosted)
+- Storage: Object storage (S3-compatible) + CDN
+- Estimated cost: €400-900/month
+```
+
+#### Enterprise (1000+ clients)
+```
+Full Production Setup:
+- CDN: Cloudflare or similar
+- Load Balancer: Cloud-native with health checks
+- App Servers: 4-8 × (4 vCPU, 16GB RAM) with auto-scaling
+- Database: PostgreSQL with read replicas
+- Cache: Redis cluster
+- Storage: S3 + CloudFront or equivalent
+- Monitoring: Prometheus + Grafana
+- Estimated cost: €1,500+/month
+```
+
+---
+
+## Image Storage Architecture
+
+### Capacity Calculations
+
+| Image Size (optimized) | Files per 25GB | Files per 100GB | Files per 1TB |
+|------------------------|----------------|-----------------|---------------|
+| 100KB (thumbnails) | 250,000 | 1,000,000 | 10,000,000 |
+| 200KB (web-ready) | 125,000 | 500,000 | 5,000,000 |
+| 300KB (high quality) | 83,000 | 333,000 | 3,330,000 |
+| 500KB (original) | 50,000 | 200,000 | 2,000,000 |
+
+### Image Sizes Generated
+
+Each uploaded image generates 3 variants:
+
+| Variant | Dimensions | Typical Size | Use Case |
+|---------|------------|--------------|----------|
+| `thumb` | 200×200 | 10-20KB | List views, grids |
+| `medium` | 800×800 | 80-150KB | Product cards, previews |
+| `original` | As uploaded | 200-500KB | Detail views, zoom |
+
+**Storage per product:** ~600KB (with 3 sizes for main image + 2 additional images)
+
+### Directory Structure (Sharded)
+
+To prevent filesystem performance degradation, images are stored in a sharded directory structure:
+
+```
+/uploads/
+ └── products/
+ ├── 00/ # First 2 chars of hash
+ │ ├── 1a/ # Next 2 chars
+ │ │ ├── 001a2b3c_original.webp
+ │ │ ├── 001a2b3c_800.webp
+ │ │ └── 001a2b3c_200.webp
+ │ └── 2b/
+ │ └── ...
+ ├── 01/
+ └── ...
+```
+
+This structure ensures:
+- Maximum ~256 subdirectories per level
+- ~16 files per leaf directory at 1M total images
+- Fast filesystem operations even at scale
+
+### Performance Thresholds
+
+| Files per Directory | Performance | Required Action |
+|---------------------|-------------|-----------------|
+| < 10,000 | Excellent | None |
+| 10,000 - 100,000 | Good | Monitor, plan sharding |
+| 100,000 - 500,000 | Degraded | **Implement sharding** |
+| > 500,000 | Poor | **Migrate to object storage** |
+
+---
+
+## Database Performance
+
+### Table Size Guidelines
+
+| Table | Rows | Query Time | Status |
+|-------|------|------------|--------|
+| < 10,000 | < 1ms | Excellent |
+| 10,000 - 100,000 | 1-10ms | Good |
+| 100,000 - 1,000,000 | 10-50ms | **Add indexes, optimize queries** |
+| 1,000,000 - 10,000,000 | 50-200ms | **Consider partitioning** |
+| > 10,000,000 | Variable | **Sharding or dedicated DB** |
+
+### Critical Indexes
+
+Ensure these indexes exist at scale:
+
+```sql
+-- Products
+CREATE INDEX idx_product_vendor_active ON products(vendor_id, is_active);
+CREATE INDEX idx_product_gtin ON products(gtin);
+CREATE INDEX idx_product_vendor_sku ON products(vendor_id, vendor_sku);
+
+-- Orders
+CREATE INDEX idx_order_vendor_status ON orders(vendor_id, status);
+CREATE INDEX idx_order_created ON orders(created_at DESC);
+CREATE INDEX idx_order_customer ON orders(customer_id);
+
+-- Inventory
+CREATE INDEX idx_inventory_product_location ON inventory(product_id, warehouse, bin_location);
+CREATE INDEX idx_inventory_vendor ON inventory(vendor_id);
+```
+
+### Database Size Estimates
+
+| Component | Size per 100K Products | Size per 1M Products |
+|-----------|------------------------|----------------------|
+| Products table | 100 MB | 1 GB |
+| Translations (4 langs) | 400 MB | 4 GB |
+| Orders (1 year) | 500 MB | 5 GB |
+| Order items | 200 MB | 2 GB |
+| Inventory | 50 MB | 500 MB |
+| Indexes | 300 MB | 3 GB |
+| **Total** | **~1.5 GB** | **~15 GB** |
+
+---
+
+## Bandwidth & Network
+
+### Monthly Bandwidth Estimates (1000 clients)
+
+| Traffic Type | Calculation | Monthly Volume |
+|--------------|-------------|----------------|
+| Image views | 500K products × 10 views × 500KB | **2.5 TB** |
+| API requests | 10K req/client/day × 1000 × 30 × 2KB | **600 GB** |
+| Static assets | CSS/JS cached, minimal | **50 GB** |
+| **Total Egress** | | **~3 TB/month** |
+
+### Bandwidth Costs (Approximate)
+
+| Provider | First 1TB | Additional per TB |
+|----------|-----------|-------------------|
+| Hetzner | Included | €1/TB |
+| AWS | $90 | $85/TB |
+| DigitalOcean | 1TB free | $10/TB |
+| Cloudflare | Unlimited (CDN) | Free |
+
+**Recommendation:** Use Cloudflare for image CDN to eliminate egress costs.
+
+---
+
+## Scaling Triggers & Thresholds
+
+### When to Scale Up
+
+| Metric | Warning | Critical | Action |
+|--------|---------|----------|--------|
+| CPU Usage | > 70% avg | > 85% avg | Add app server |
+| Memory Usage | > 75% | > 90% | Upgrade RAM or add server |
+| Disk Usage | > 70% | > 85% | Expand storage |
+| DB Query Time (p95) | > 100ms | > 500ms | Optimize queries, add indexes |
+| API Response Time (p95) | > 500ms | > 2000ms | Scale horizontally |
+| DB Connections | > 80% max | > 95% max | Add connection pooling |
+| Error Rate | > 1% | > 5% | Investigate and fix |
+
+### Architecture Transition Points
+
+```
+STARTER → SMALL (50 clients)
+├── Trigger: SQLite becomes bottleneck
+├── Action: Migrate to PostgreSQL
+└── Cost increase: +€40-60/month
+
+SMALL → MEDIUM (100 clients)
+├── Trigger: Single server at 70%+ CPU
+├── Action: Separate DB server
+└── Cost increase: +€50-80/month
+
+MEDIUM → LARGE (300 clients)
+├── Trigger: Need for caching, higher availability
+├── Action: Add Redis, consider multiple app servers
+└── Cost increase: +€150-200/month
+
+LARGE → SCALE (500 clients)
+├── Trigger: Storage >500GB, high traffic
+├── Action: Object storage + CDN, load balancing
+└── Cost increase: +€200-400/month
+
+SCALE → ENTERPRISE (1000+ clients)
+├── Trigger: High availability requirements, SLA
+├── Action: Full redundancy, read replicas, auto-scaling
+└── Cost increase: +€600-1000/month
+```
+
+---
+
+## Monitoring Requirements
+
+### Essential Metrics
+
+Track these metrics for capacity planning:
+
+#### Infrastructure
+- CPU utilization (per server)
+- Memory utilization
+- Disk I/O and usage
+- Network throughput
+
+#### Application
+- Request latency (p50, p95, p99)
+- Request rate (per endpoint)
+- Error rate by type
+- Active sessions
+
+#### Database
+- Query execution time
+- Connection pool usage
+- Table sizes
+- Index usage
+
+#### Business
+- Active clients
+- Products per client
+- Orders per day
+- API calls per client
+
+### Monitoring Dashboard
+
+The admin platform includes a **Capacity Monitoring** page at `/admin/platform-health` with:
+
+1. **Current Usage** - Real-time resource utilization
+2. **Growth Trends** - Historical charts for planning
+3. **Threshold Alerts** - Warning and critical indicators
+4. **Scaling Recommendations** - Automated suggestions
+
+See [Platform Health Monitoring](#platform-health-monitoring) section below.
+
+---
+
+## Cost Analysis
+
+### Infrastructure Cost per Client
+
+| Scale | Clients | Monthly Infra | Cost/Client |
+|-------|---------|---------------|-------------|
+| Starter | 25 | €30 | €1.20 |
+| Small | 75 | €80 | €1.07 |
+| Medium | 200 | €150 | €0.75 |
+| Large | 400 | €350 | €0.88 |
+| Scale | 800 | €700 | €0.88 |
+| Enterprise | 1500 | €1,800 | €1.20 |
+
+### Revenue vs Infrastructure Cost
+
+At 1,000 Business tier clients (€149/month):
+
+| Item | Monthly |
+|------|---------|
+| **Revenue** | €149,000 |
+| Infrastructure | €700-900 |
+| Support (est.) | €3,000 |
+| Development (est.) | €5,000 |
+| **Gross Margin** | **~96%** |
+
+---
+
+## Disaster Recovery
+
+### Backup Strategy by Scale
+
+| Scale | Database Backup | File Backup | RTO | RPO |
+|-------|----------------|-------------|-----|-----|
+| Starter | Daily SQLite copy | Daily rsync | 4h | 24h |
+| Small | Daily pg_dump | Daily sync | 2h | 12h |
+| Medium | Managed backups | S3 versioning | 1h | 6h |
+| Large | Point-in-time | S3 + cross-region | 30m | 1h |
+| Enterprise | Streaming replicas | Multi-region | 5m | 5m |
+
+---
+
+## Platform Health Monitoring
+
+The admin dashboard includes a dedicated capacity monitoring page that tracks:
+
+### Metrics Displayed
+
+1. **Client Growth**
+ - Total active clients
+ - New clients this month
+ - Churn rate
+
+2. **Resource Usage**
+ - Total products across all vendors
+ - Total images stored
+ - Database size
+ - Storage usage
+
+3. **Performance Indicators**
+ - Average API response time
+ - Database query latency
+ - Error rate
+
+4. **Threshold Status**
+ - Current infrastructure tier
+ - Distance to next threshold
+ - Recommended actions
+
+### Alert Configuration
+
+Configure alerts for proactive scaling:
+
+```python
+CAPACITY_THRESHOLDS = {
+ "products_total": {
+ "warning": 400_000, # 80% of 500K
+ "critical": 475_000, # 95% of 500K
+ },
+ "storage_gb": {
+ "warning": 800, # 80% of 1TB
+ "critical": 950,
+ },
+ "db_size_gb": {
+ "warning": 20,
+ "critical": 24,
+ },
+ "avg_response_ms": {
+ "warning": 200,
+ "critical": 500,
+ },
+}
+```
+
+---
+
+## Quick Reference
+
+### TL;DR Sizing Guide
+
+| Clients | Server | RAM | Storage | Database | Monthly Cost |
+|---------|--------|-----|---------|----------|--------------|
+| 1-50 | 2 vCPU | 4GB | 100GB | SQLite | €30 |
+| 50-100 | 4 vCPU | 8GB | 250GB | PostgreSQL | €80 |
+| 100-300 | 4 vCPU | 16GB | 500GB | PostgreSQL | €150 |
+| 300-500 | 8 vCPU | 32GB | 1TB | PostgreSQL + Redis | €350 |
+| 500-1000 | 16 vCPU | 64GB | 2TB + CDN | PostgreSQL + Redis | €700 |
+| 1000+ | 32+ vCPU | 128GB+ | 4TB+ + CDN | PostgreSQL cluster | €1,500+ |
+
+### Key Formulas
+
+```
+Storage (GB) = (Products × Images × 3 sizes × 200KB) / 1,000,000
+DB Size (GB) = Products × 0.00003 + Orders × 0.00002
+Bandwidth (TB/mo) = Products × Daily Views × 500KB × 30 / 1,000,000,000
+```
+
+---
+
+## See Also
+
+- [Pricing Strategy](../marketing/pricing.md) - Tier definitions and limits
+- [Multi-Tenant Architecture](multi-tenant.md) - How client isolation works
+- [Background Tasks](background-tasks.md) - Task queue scaling
+- [Production Deployment](../deployment/production.md) - Deployment guidelines
diff --git a/docs/marketing/pricing.md b/docs/marketing/pricing.md
index 6d7daddf..4c055f05 100644
--- a/docs/marketing/pricing.md
+++ b/docs/marketing/pricing.md
@@ -6,6 +6,8 @@
A focused Order Management System built specifically for Luxembourg e-commerce. Works alongside Letzshop, not instead of it. Provides the operational tools Letzshop lacks: real inventory, correct invoicing, customer ownership.
+> **Infrastructure Planning:** See [Capacity Planning](../architecture/capacity-planning.md) for resource requirements, server sizing, and scaling guidelines per tier.
+
---
## Market Context
diff --git a/docs/operations/capacity-monitoring.md b/docs/operations/capacity-monitoring.md
new file mode 100644
index 00000000..f46625f5
--- /dev/null
+++ b/docs/operations/capacity-monitoring.md
@@ -0,0 +1,121 @@
+# Capacity Monitoring
+
+Detailed guide for monitoring and managing platform capacity.
+
+## Overview
+
+The Capacity Monitoring page (`/admin/platform-health/capacity`) provides insights into resource consumption and helps plan infrastructure scaling.
+
+## Key Metrics
+
+### Client Metrics
+
+| Metric | Description | Threshold Indicator |
+|--------|-------------|---------------------|
+| Active Clients | Vendors with activity in last 30 days | Scale planning |
+| Total Products | Sum across all vendors | Storage/DB sizing |
+| Products per Client | Average products per vendor | Tier compliance |
+| Monthly Orders | Order volume this month | Performance impact |
+
+### Storage Metrics
+
+| Metric | Description | Warning | Critical |
+|--------|-------------|---------|----------|
+| Image Files | Total files in storage | 80% of limit | 95% of limit |
+| Image Storage (GB) | Total size in gigabytes | 80% of disk | 95% of disk |
+| Database Size (GB) | PostgreSQL data size | 80% of allocation | 95% of allocation |
+| Backup Size (GB) | Latest backup size | Informational | N/A |
+
+### Performance Metrics
+
+| Metric | Good | Warning | Critical |
+|--------|------|---------|----------|
+| Avg Response Time | < 100ms | 100-300ms | > 300ms |
+| DB Query Time (p95) | < 50ms | 50-200ms | > 200ms |
+| Cache Hit Rate | > 90% | 70-90% | < 70% |
+| Connection Pool Usage | < 70% | 70-90% | > 90% |
+
+## Scaling Recommendations
+
+The system provides automatic scaling recommendations based on current usage:
+
+### Example Recommendations
+
+```
+Current Infrastructure: MEDIUM (100-300 clients)
+Current Usage: 85% of capacity
+
+Recommendations:
+1. [WARNING] Approaching product limit (420K of 500K)
+ → Consider upgrading to LARGE tier
+
+2. [INFO] Database size growing 5GB/month
+ → Plan storage expansion in 3 months
+
+3. [OK] API response times within normal range
+ → No action needed
+```
+
+## Threshold Configuration
+
+Edit thresholds in the admin settings or via environment:
+
+```python
+# Capacity thresholds (can be configured per deployment)
+CAPACITY_THRESHOLDS = {
+ # Products
+ "products_total": {
+ "warning": 400_000,
+ "critical": 475_000,
+ "limit": 500_000,
+ },
+ # Storage (GB)
+ "storage_gb": {
+ "warning": 800,
+ "critical": 950,
+ "limit": 1000,
+ },
+ # Database (GB)
+ "db_size_gb": {
+ "warning": 20,
+ "critical": 24,
+ "limit": 25,
+ },
+ # Monthly orders
+ "monthly_orders": {
+ "warning": 250_000,
+ "critical": 280_000,
+ "limit": 300_000,
+ },
+}
+```
+
+## Historical Trends
+
+View growth trends to plan ahead:
+
+- **30-day growth rate**: Products, storage, clients
+- **Projected capacity date**: When limits will be reached
+- **Seasonal patterns**: Order volume fluctuations
+
+## Alerts
+
+Capacity alerts trigger when:
+
+1. **Warning (Yellow)**: 80% of any threshold
+2. **Critical (Red)**: 95% of any threshold
+3. **Exceeded**: 100%+ of threshold (immediate action)
+
+## Export Reports
+
+Generate capacity reports for planning:
+
+- **Weekly summary**: PDF or CSV
+- **Monthly capacity report**: Detailed analysis
+- **Projection report**: 3/6/12 month forecasts
+
+## Related Documentation
+
+- [Capacity Planning](../architecture/capacity-planning.md) - Full sizing guide
+- [Platform Health](platform-health.md) - Real-time health monitoring
+- [Image Storage](image-storage.md) - Image system details
diff --git a/docs/operations/image-storage.md b/docs/operations/image-storage.md
new file mode 100644
index 00000000..55256313
--- /dev/null
+++ b/docs/operations/image-storage.md
@@ -0,0 +1,246 @@
+# Image Storage System
+
+Documentation for the platform's image storage and management system.
+
+## Overview
+
+The Wizamart platform uses a self-hosted image storage system with:
+
+- **Sharded directory structure** for filesystem performance
+- **Automatic WebP conversion** for optimization
+- **Multiple size variants** for different use cases
+- **CDN-ready architecture** for scaling
+
+## Storage Architecture
+
+### Directory Structure
+
+Images are stored in a sharded directory structure to prevent filesystem performance degradation:
+
+```
+/static/uploads/
+ └── products/
+ ├── 00/ # First 2 chars of hash
+ │ ├── 1a/ # Next 2 chars
+ │ │ ├── 001a2b3c_original.webp
+ │ │ ├── 001a2b3c_800.webp
+ │ │ └── 001a2b3c_200.webp
+ │ └── 2b/
+ │ └── ...
+ ├── 01/
+ │ └── ...
+ └── ff/
+ └── ...
+```
+
+### Hash Generation
+
+The file hash is generated from:
+```python
+hash = md5(f"{vendor_id}:{product_id}:{timestamp}:{original_filename}")[:8]
+```
+
+This ensures:
+- Unique file paths
+- Even distribution across directories
+- Predictable file locations
+
+## Image Variants
+
+Each uploaded image generates multiple variants:
+
+| Variant | Max Dimensions | Format | Use Case |
+|---------|---------------|--------|----------|
+| `original` | As uploaded (max 2000px) | WebP | Detail view, zoom |
+| `800` | 800×800 | WebP | Product cards |
+| `200` | 200×200 | WebP | Thumbnails, grids |
+
+### Size Estimates
+
+| Original Size | After Processing | Storage per Image |
+|---------------|------------------|-------------------|
+| 2MB JPEG | ~200KB (original) + 80KB (800) + 15KB (200) | ~295KB |
+| 500KB JPEG | ~150KB (original) + 60KB (800) + 12KB (200) | ~222KB |
+| 100KB JPEG | ~80KB (original) + 40KB (800) + 10KB (200) | ~130KB |
+
+**Average: ~200KB per image (all variants)**
+
+## Upload Process
+
+### API Endpoint
+
+```http
+POST /api/v1/admin/images/upload
+Content-Type: multipart/form-data
+
+file:
+vendor_id: 123
+product_id: 456 (optional, for product images)
+type: product|category|banner
+```
+
+### Response
+
+```json
+{
+ "success": true,
+ "image": {
+ "id": "001a2b3c",
+ "urls": {
+ "original": "/uploads/products/00/1a/001a2b3c_original.webp",
+ "medium": "/uploads/products/00/1a/001a2b3c_800.webp",
+ "thumb": "/uploads/products/00/1a/001a2b3c_200.webp"
+ },
+ "size_bytes": 295000,
+ "dimensions": {
+ "width": 1200,
+ "height": 1200
+ }
+ }
+}
+```
+
+## Configuration
+
+### Environment Variables
+
+```bash
+# Image storage configuration
+IMAGE_UPLOAD_DIR=/var/www/uploads
+IMAGE_MAX_SIZE_MB=10
+IMAGE_ALLOWED_TYPES=jpg,jpeg,png,gif,webp
+IMAGE_QUALITY=85
+IMAGE_MAX_DIMENSION=2000
+```
+
+### Python Configuration
+
+```python
+# app/core/config.py
+class ImageSettings:
+ UPLOAD_DIR: str = "/static/uploads"
+ MAX_SIZE_MB: int = 10
+ ALLOWED_TYPES: list = ["jpg", "jpeg", "png", "gif", "webp"]
+ QUALITY: int = 85
+ MAX_DIMENSION: int = 2000
+
+ # Generated sizes
+ SIZES: dict = {
+ "original": None, # No resize, just optimize
+ "medium": 800,
+ "thumb": 200,
+ }
+```
+
+## Performance Guidelines
+
+### Filesystem Limits
+
+| Files per Directory | Status | Action |
+|---------------------|--------|--------|
+| < 10,000 | OK | None needed |
+| 10,000 - 50,000 | Monitor | Plan migration |
+| 50,000 - 100,000 | Warning | Increase sharding depth |
+| > 100,000 | Critical | Migrate to object storage |
+
+### Capacity Planning
+
+| Products | Images (5/product) | Total Files (3 sizes) | Storage |
+|----------|--------------------|-----------------------|---------|
+| 10,000 | 50,000 | 150,000 | 30 GB |
+| 50,000 | 250,000 | 750,000 | 150 GB |
+| 100,000 | 500,000 | 1,500,000 | 300 GB |
+| 500,000 | 2,500,000 | 7,500,000 | 1.5 TB |
+
+## CDN Integration
+
+For production deployments, configure a CDN for image delivery:
+
+### Cloudflare (Recommended)
+
+1. Set up Cloudflare for your domain
+2. Configure page rules for `/uploads/*`:
+ - Cache Level: Cache Everything
+ - Edge Cache TTL: 1 month
+ - Browser Cache TTL: 1 week
+
+### nginx Configuration
+
+```nginx
+location /uploads/ {
+ alias /var/www/uploads/;
+ expires 30d;
+ add_header Cache-Control "public, immutable";
+ add_header X-Content-Type-Options nosniff;
+
+ # WebP fallback for older browsers
+ location ~ \.(jpg|jpeg|png)$ {
+ try_files $uri$webp_suffix $uri =404;
+ }
+}
+```
+
+## Maintenance
+
+### Cleanup Orphaned Images
+
+Remove images not referenced by any product:
+
+```bash
+# Run via admin CLI
+python -m scripts.cleanup_orphaned_images --dry-run
+python -m scripts.cleanup_orphaned_images --execute
+```
+
+### Regenerate Variants
+
+If image quality settings change:
+
+```bash
+# Regenerate all variants for a vendor
+python -m scripts.regenerate_images --vendor-id 123
+
+# Regenerate all variants (use with caution)
+python -m scripts.regenerate_images --all
+```
+
+## Monitoring
+
+### Metrics to Track
+
+- Total file count
+- Storage used (GB)
+- Files per directory (max)
+- Upload success rate
+- Average processing time
+
+### Health Checks
+
+The platform health page includes image storage metrics:
+
+- Current file count
+- Storage usage
+- Directory distribution
+- Processing queue status
+
+## Troubleshooting
+
+### Common Issues
+
+**Upload fails with "File too large"**
+- Check `IMAGE_MAX_SIZE_MB` setting
+- Verify nginx `client_max_body_size`
+
+**Images not displaying**
+- Check file permissions (should be readable by web server)
+- Verify URL paths match actual file locations
+
+**Slow uploads**
+- Check disk I/O performance
+- Consider async processing queue
+
+## Related Documentation
+
+- [Capacity Planning](../architecture/capacity-planning.md)
+- [Platform Health](platform-health.md)
+- [Capacity Monitoring](capacity-monitoring.md)
diff --git a/docs/operations/platform-health.md b/docs/operations/platform-health.md
new file mode 100644
index 00000000..18b65247
--- /dev/null
+++ b/docs/operations/platform-health.md
@@ -0,0 +1,92 @@
+# Platform Health Monitoring
+
+This guide covers the platform health monitoring features available in the admin dashboard.
+
+## Overview
+
+The Platform Health page (`/admin/platform-health`) provides real-time visibility into system performance, resource usage, and capacity thresholds.
+
+## Accessing Platform Health
+
+Navigate to **Admin > Platform Health** in the sidebar, or go directly to `/admin/platform-health`.
+
+## Dashboard Sections
+
+### 1. System Overview
+
+Quick glance at overall platform status:
+
+| Indicator | Green | Yellow | Red |
+|-----------|-------|--------|-----|
+| API Response Time | < 100ms | 100-500ms | > 500ms |
+| Error Rate | < 0.1% | 0.1-1% | > 1% |
+| Database Health | Connected | Slow queries | Disconnected |
+| Storage | < 70% | 70-85% | > 85% |
+
+### 2. Resource Usage
+
+Real-time metrics:
+
+- **CPU Usage**: Current and 24h average
+- **Memory Usage**: Used vs available
+- **Disk Usage**: Storage consumption with trend
+- **Network**: Inbound/outbound throughput
+
+### 3. Capacity Metrics
+
+Track growth toward scaling thresholds:
+
+- **Total Products**: Count across all vendors
+- **Total Images**: Files stored in image system
+- **Database Size**: Current size vs recommended max
+- **Active Clients**: Monthly active vendor accounts
+
+### 4. Performance Trends
+
+Historical charts (7-day, 30-day):
+
+- API response times (p50, p95, p99)
+- Request volume by endpoint
+- Database query latency
+- Error rate over time
+
+## Alert Configuration
+
+### Threshold Alerts
+
+Configure alerts for proactive monitoring:
+
+```python
+# In app/core/config.py
+HEALTH_THRESHOLDS = {
+ "cpu_percent": {"warning": 70, "critical": 85},
+ "memory_percent": {"warning": 75, "critical": 90},
+ "disk_percent": {"warning": 70, "critical": 85},
+ "response_time_ms": {"warning": 200, "critical": 500},
+ "error_rate_percent": {"warning": 1.0, "critical": 5.0},
+}
+```
+
+### Notification Channels
+
+Alerts can be sent via:
+- Email to admin users
+- Slack webhook (if configured)
+- Dashboard notifications
+
+## Related Pages
+
+- [Capacity Monitoring](capacity-monitoring.md) - Detailed capacity metrics
+- [Image Storage](image-storage.md) - Image system management
+- [Capacity Planning](../architecture/capacity-planning.md) - Infrastructure sizing guide
+
+## API Endpoints
+
+The platform health page uses these admin API endpoints:
+
+| Endpoint | Description |
+|----------|-------------|
+| `GET /api/v1/admin/platform/health` | Overall health status |
+| `GET /api/v1/admin/platform/metrics` | Current metrics |
+| `GET /api/v1/admin/platform/metrics/history` | Historical data |
+| `GET /api/v1/admin/platform/capacity` | Capacity usage |
diff --git a/mkdocs.yml b/mkdocs.yml
index b6fe4a68..708eb062 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -41,6 +41,7 @@ nav:
- Frontend Structure: architecture/frontend-structure.md
- Models Structure: architecture/models-structure.md
- Background Tasks: architecture/background-tasks.md
+ - Capacity Planning: architecture/capacity-planning.md
- API Consolidation:
- Proposal: architecture/api-consolidation-proposal.md
- Migration Status: architecture/api-migration-status.md
@@ -186,6 +187,14 @@ nav:
- Environment Variables: deployment/environment.md
- Stripe Integration: deployment/stripe-integration.md
+ # ============================================
+ # OPERATIONS (Platform Health & Monitoring)
+ # ============================================
+ - Operations:
+ - Platform Health: operations/platform-health.md
+ - Capacity Monitoring: operations/capacity-monitoring.md
+ - Image Storage: operations/image-storage.md
+
# ============================================
# FEATURES
# ============================================
diff --git a/models/schema/image.py b/models/schema/image.py
new file mode 100644
index 00000000..c0c61d27
--- /dev/null
+++ b/models/schema/image.py
@@ -0,0 +1,46 @@
+# models/schema/image.py
+"""
+Pydantic schemas for image operations.
+"""
+
+from pydantic import BaseModel
+
+
+class ImageUrls(BaseModel):
+ """URLs for image variants."""
+
+ original: str
+ medium: str | None = None # 800px variant
+ thumb: str | None = None # 200px variant
+
+ # Allow arbitrary keys for flexibility
+ class Config:
+ extra = "allow"
+
+
+class ImageUploadResponse(BaseModel):
+ """Response from image upload."""
+
+ success: bool
+ image: dict | None = None
+ error: str | None = None
+
+
+class ImageDeleteResponse(BaseModel):
+ """Response from image deletion."""
+
+ success: bool
+ message: str
+
+
+class ImageStorageStats(BaseModel):
+ """Image storage statistics."""
+
+ total_files: int
+ total_size_bytes: int
+ total_size_mb: float
+ total_size_gb: float
+ directory_count: int
+ max_files_per_dir: int
+ avg_files_per_dir: float
+ products_estimated: int
diff --git a/requirements.txt b/requirements.txt
index bd6055e3..948b4cd1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,6 +22,12 @@ python-multipart==0.0.20
pandas==2.2.3
requests==2.32.3
+# Image processing
+Pillow>=10.0.0
+
+# System monitoring
+psutil>=5.9.0
+
# PDF generation
weasyprint==62.3
diff --git a/static/admin/js/platform-health.js b/static/admin/js/platform-health.js
new file mode 100644
index 00000000..ae0f4830
--- /dev/null
+++ b/static/admin/js/platform-health.js
@@ -0,0 +1,128 @@
+// static/admin/js/platform-health.js
+/**
+ * Admin platform health monitoring page logic
+ * Displays system metrics, capacity thresholds, and scaling recommendations
+ */
+
+const adminPlatformHealthLog = window.LogConfig.loggers.adminPlatformHealth ||
+ window.LogConfig.createLogger('adminPlatformHealth', false);
+
+adminPlatformHealthLog.info('Loading...');
+
+function adminPlatformHealth() {
+ adminPlatformHealthLog.info('adminPlatformHealth() called');
+
+ return {
+ // Inherit base layout state
+ ...data(),
+
+ // Set page identifier
+ currentPage: 'platform-health',
+
+ // Loading states
+ loading: true,
+ error: '',
+
+ // Health data
+ health: null,
+
+ // Auto-refresh interval (30 seconds)
+ refreshInterval: null,
+
+ async init() {
+ adminPlatformHealthLog.info('Platform Health init() called');
+
+ // Guard against multiple initialization
+ if (window._adminPlatformHealthInitialized) {
+ adminPlatformHealthLog.warn('Already initialized, skipping');
+ return;
+ }
+ window._adminPlatformHealthInitialized = true;
+
+ // Load initial data
+ await this.loadHealth();
+
+ // Set up auto-refresh every 30 seconds
+ this.refreshInterval = setInterval(() => {
+ this.loadHealth();
+ }, 30000);
+
+ adminPlatformHealthLog.info('Platform Health initialization complete');
+ },
+
+ /**
+ * Clean up on component destroy
+ */
+ destroy() {
+ if (this.refreshInterval) {
+ clearInterval(this.refreshInterval);
+ this.refreshInterval = null;
+ }
+ },
+
+ /**
+ * Load platform health data
+ */
+ async loadHealth() {
+ this.loading = true;
+ this.error = '';
+
+ try {
+ const response = await apiClient.get('/admin/platform/health');
+ this.health = response;
+
+ adminPlatformHealthLog.info('Loaded health data:', {
+ status: response.overall_status,
+ tier: response.infrastructure_tier
+ });
+ } catch (error) {
+ adminPlatformHealthLog.error('Failed to load health:', error);
+ this.error = error.message || 'Failed to load platform health';
+ } finally {
+ this.loading = false;
+ }
+ },
+
+ /**
+ * Manual refresh
+ */
+ async refresh() {
+ await this.loadHealth();
+ },
+
+ /**
+ * Format number with locale
+ */
+ formatNumber(num) {
+ if (num === null || num === undefined) return '0';
+ if (typeof num === 'number' && num % 1 !== 0) {
+ return num.toFixed(2);
+ }
+ return new Intl.NumberFormat('en-US').format(num);
+ },
+
+ /**
+ * Format storage size
+ */
+ formatStorage(gb) {
+ if (gb === null || gb === undefined) return '0 GB';
+ if (gb < 1) {
+ return (gb * 1024).toFixed(0) + ' MB';
+ }
+ return gb.toFixed(2) + ' GB';
+ },
+
+ /**
+ * Format timestamp
+ */
+ formatTime(timestamp) {
+ if (!timestamp) return 'Unknown';
+ try {
+ const date = new Date(timestamp);
+ return date.toLocaleTimeString();
+ } catch (e) {
+ return 'Unknown';
+ }
+ }
+ };
+}