# app/services/platform_health_service.py """ Platform health and capacity monitoring service. Provides: - System resource metrics (CPU, memory, disk) - Database metrics and statistics - Capacity threshold calculations - Scaling recommendations """ import logging from datetime import datetime import psutil from sqlalchemy import func, text from sqlalchemy.orm import Session from app.services.image_service import image_service from models.database.inventory import Inventory from models.database.order import Order from models.database.product import Product from models.database.vendor import Vendor logger = logging.getLogger(__name__) # ============================================================================ # Thresholds Configuration # ============================================================================ CAPACITY_THRESHOLDS = { "products_total": { "warning": 400_000, "critical": 475_000, "limit": 500_000, }, "storage_gb": { "warning": 800, "critical": 950, "limit": 1000, }, "db_size_mb": { "warning": 20_000, "critical": 24_000, "limit": 25_000, }, "disk_percent": { "warning": 70, "critical": 85, "limit": 100, }, "memory_percent": { "warning": 75, "critical": 90, "limit": 100, }, "cpu_percent": { "warning": 70, "critical": 85, "limit": 100, }, } INFRASTRUCTURE_TIERS = [ {"name": "Starter", "max_clients": 50, "max_products": 10_000}, {"name": "Small", "max_clients": 100, "max_products": 30_000}, {"name": "Medium", "max_clients": 300, "max_products": 100_000}, {"name": "Large", "max_clients": 500, "max_products": 250_000}, {"name": "Scale", "max_clients": 1000, "max_products": 500_000}, {"name": "Enterprise", "max_clients": None, "max_products": None}, ] class PlatformHealthService: """Service for platform health and capacity monitoring.""" def get_system_metrics(self) -> dict: """Get current system resource metrics.""" cpu_percent = psutil.cpu_percent(interval=0.1) memory = psutil.virtual_memory() disk = psutil.disk_usage("/") return { "cpu_percent": cpu_percent, "memory_percent": memory.percent, "memory_used_gb": round(memory.used / (1024**3), 2), "memory_total_gb": round(memory.total / (1024**3), 2), "disk_percent": disk.percent, "disk_used_gb": round(disk.used / (1024**3), 2), "disk_total_gb": round(disk.total / (1024**3), 2), } def get_database_metrics(self, db: Session) -> dict: """Get database statistics.""" products_count = db.query(func.count(Product.id)).scalar() or 0 orders_count = db.query(func.count(Order.id)).scalar() or 0 vendors_count = db.query(func.count(Vendor.id)).scalar() or 0 inventory_count = db.query(func.count(Inventory.id)).scalar() or 0 db_size = self._get_database_size(db) return { "size_mb": db_size, "products_count": products_count, "orders_count": orders_count, "vendors_count": vendors_count, "inventory_count": inventory_count, } def get_image_storage_metrics(self) -> dict: """Get image storage statistics.""" stats = image_service.get_storage_stats() return { "total_files": stats["total_files"], "total_size_mb": stats["total_size_mb"], "total_size_gb": stats["total_size_gb"], "max_files_per_dir": stats["max_files_per_dir"], "products_estimated": stats["products_estimated"], } def get_capacity_metrics(self, db: Session) -> dict: """Get capacity-focused metrics for planning.""" # Products total products_total = db.query(func.count(Product.id)).scalar() or 0 # Products by vendor vendor_counts = ( db.query(Vendor.name, func.count(Product.id)) .join(Product, Vendor.id == Product.vendor_id) .group_by(Vendor.name) .all() ) products_by_vendor = {name or "Unknown": count for name, count in vendor_counts} # Image storage image_stats = image_service.get_storage_stats() # Database size db_size = self._get_database_size(db) # Orders this month start_of_month = datetime.utcnow().replace(day=1, hour=0, minute=0, second=0) orders_this_month = ( db.query(func.count(Order.id)) .filter(Order.created_at >= start_of_month) .scalar() or 0 ) # Active vendors active_vendors = ( db.query(func.count(Vendor.id)) .filter(Vendor.is_active == True) # noqa: E712 .scalar() or 0 ) return { "products_total": products_total, "products_by_vendor": products_by_vendor, "images_total": image_stats["total_files"], "storage_used_gb": image_stats["total_size_gb"], "database_size_mb": db_size, "orders_this_month": orders_this_month, "active_vendors": active_vendors, } def get_subscription_capacity(self, db: Session) -> dict: """ Calculate theoretical capacity based on all vendor subscriptions. Returns aggregated limits and current usage for capacity planning. """ from models.database.subscription import VendorSubscription from models.database.vendor import VendorUser # Get all active subscriptions with their limits subscriptions = ( db.query(VendorSubscription) .filter(VendorSubscription.status.in_(["active", "trial"])) .all() ) # Aggregate theoretical limits total_products_limit = 0 total_orders_limit = 0 total_team_limit = 0 unlimited_products = 0 unlimited_orders = 0 unlimited_team = 0 tier_distribution = {} for sub in subscriptions: # Track tier distribution tier = sub.tier or "unknown" tier_distribution[tier] = tier_distribution.get(tier, 0) + 1 # Aggregate limits if sub.products_limit is None: unlimited_products += 1 else: total_products_limit += sub.products_limit if sub.orders_limit is None: unlimited_orders += 1 else: total_orders_limit += sub.orders_limit if sub.team_members_limit is None: unlimited_team += 1 else: total_team_limit += sub.team_members_limit # Get actual usage actual_products = db.query(func.count(Product.id)).scalar() or 0 actual_team = ( db.query(func.count(VendorUser.id)) .filter(VendorUser.is_active == True) # noqa: E712 .scalar() or 0 ) # Orders this period (aggregate across all subscriptions) total_orders_used = sum(s.orders_this_period for s in subscriptions) def calc_utilization(actual: int, limit: int, unlimited: int) -> dict: if unlimited > 0: # Some subscriptions have unlimited - can't calculate true % return { "actual": actual, "theoretical_limit": limit, "unlimited_count": unlimited, "utilization_percent": None, "has_unlimited": True, } elif limit > 0: return { "actual": actual, "theoretical_limit": limit, "unlimited_count": 0, "utilization_percent": round((actual / limit) * 100, 1), "headroom": limit - actual, "has_unlimited": False, } else: return { "actual": actual, "theoretical_limit": 0, "unlimited_count": 0, "utilization_percent": 0, "has_unlimited": False, } return { "total_subscriptions": len(subscriptions), "tier_distribution": tier_distribution, "products": calc_utilization(actual_products, total_products_limit, unlimited_products), "orders_monthly": calc_utilization(total_orders_used, total_orders_limit, unlimited_orders), "team_members": calc_utilization(actual_team, total_team_limit, unlimited_team), } def get_full_health_report(self, db: Session) -> dict: """Get comprehensive platform health report.""" # System metrics system = self.get_system_metrics() # Database metrics database = self.get_database_metrics(db) # Image storage metrics image_storage = self.get_image_storage_metrics() # Subscription capacity subscription_capacity = self.get_subscription_capacity(db) # Calculate thresholds thresholds = self._calculate_thresholds(system, database, image_storage) # Generate recommendations recommendations = self._generate_recommendations(thresholds, database) # Determine infrastructure tier tier, next_trigger = self._determine_tier( database["vendors_count"], database["products_count"] ) # Overall status overall_status = self._determine_overall_status(thresholds) return { "timestamp": datetime.utcnow().isoformat(), "overall_status": overall_status, "system": system, "database": database, "image_storage": image_storage, "subscription_capacity": subscription_capacity, "thresholds": thresholds, "recommendations": recommendations, "infrastructure_tier": tier, "next_tier_trigger": next_trigger, } def _get_database_size(self, db: Session) -> float: """Get database size in MB.""" try: # Try SQLite approach result = db.execute( text( "SELECT page_count * page_size as size " "FROM pragma_page_count(), pragma_page_size()" ) ) row = result.fetchone() if row: return round(row[0] / (1024 * 1024), 2) except Exception: pass try: # Try PostgreSQL approach result = db.execute(text("SELECT pg_database_size(current_database())")) row = result.fetchone() if row: return round(row[0] / (1024 * 1024), 2) except Exception: pass return 0.0 def _calculate_thresholds( self, system: dict, database: dict, image_storage: dict ) -> list[dict]: """Calculate threshold status for each metric.""" thresholds = [] # Products threshold products_config = CAPACITY_THRESHOLDS["products_total"] thresholds.append( self._create_threshold( "Products", database["products_count"], products_config["warning"], products_config["critical"], products_config["limit"], ) ) # Storage threshold storage_config = CAPACITY_THRESHOLDS["storage_gb"] thresholds.append( self._create_threshold( "Image Storage (GB)", image_storage["total_size_gb"], storage_config["warning"], storage_config["critical"], storage_config["limit"], ) ) # Database size threshold db_config = CAPACITY_THRESHOLDS["db_size_mb"] thresholds.append( self._create_threshold( "Database (MB)", database["size_mb"], db_config["warning"], db_config["critical"], db_config["limit"], ) ) # Disk threshold disk_config = CAPACITY_THRESHOLDS["disk_percent"] thresholds.append( self._create_threshold( "Disk Usage (%)", system["disk_percent"], disk_config["warning"], disk_config["critical"], disk_config["limit"], ) ) # Memory threshold memory_config = CAPACITY_THRESHOLDS["memory_percent"] thresholds.append( self._create_threshold( "Memory Usage (%)", system["memory_percent"], memory_config["warning"], memory_config["critical"], memory_config["limit"], ) ) # CPU threshold cpu_config = CAPACITY_THRESHOLDS["cpu_percent"] thresholds.append( self._create_threshold( "CPU Usage (%)", system["cpu_percent"], cpu_config["warning"], cpu_config["critical"], cpu_config["limit"], ) ) return thresholds def _create_threshold( self, name: str, current: float, warning: float, critical: float, limit: float ) -> dict: """Create a threshold status object.""" percent_used = (current / limit) * 100 if limit > 0 else 0 if current >= critical: status = "critical" elif current >= warning: status = "warning" else: status = "ok" return { "name": name, "current": current, "warning": warning, "critical": critical, "limit": limit, "status": status, "percent_used": round(percent_used, 1), } def _generate_recommendations( self, thresholds: list[dict], database: dict ) -> list[dict]: """Generate scaling recommendations based on thresholds.""" recommendations = [] for threshold in thresholds: if threshold["status"] == "critical": recommendations.append( { "priority": "critical", "title": f"{threshold['name']} at critical level", "description": ( f"Currently at {threshold['percent_used']:.0f}% of capacity " f"({threshold['current']:.0f} of {threshold['limit']:.0f})" ), "action": "Immediate scaling or cleanup required", } ) elif threshold["status"] == "warning": recommendations.append( { "priority": "warning", "title": f"{threshold['name']} approaching limit", "description": ( f"Currently at {threshold['percent_used']:.0f}% of capacity " f"({threshold['current']:.0f} of {threshold['limit']:.0f})" ), "action": "Plan scaling in the next 2-4 weeks", } ) # Add tier-based recommendations if database["vendors_count"] > 0: tier, next_trigger = self._determine_tier( database["vendors_count"], database["products_count"] ) if next_trigger: recommendations.append( { "priority": "info", "title": f"Current tier: {tier}", "description": next_trigger, "action": "Review capacity planning documentation", } ) # If no issues, add positive status if not recommendations: recommendations.append( { "priority": "info", "title": "All systems healthy", "description": "No capacity concerns at this time", "action": None, } ) return recommendations def _determine_tier(self, vendors: int, products: int) -> tuple[str, str | None]: """Determine current infrastructure tier and next trigger.""" current_tier = "Starter" next_trigger = None for i, tier in enumerate(INFRASTRUCTURE_TIERS): max_clients = tier["max_clients"] max_products = tier["max_products"] if max_clients is None: current_tier = tier["name"] break if vendors <= max_clients and products <= max_products: current_tier = tier["name"] # Check proximity to next tier if i < len(INFRASTRUCTURE_TIERS) - 1: next_tier = INFRASTRUCTURE_TIERS[i + 1] vendor_percent = (vendors / max_clients) * 100 product_percent = (products / max_products) * 100 if vendor_percent > 70 or product_percent > 70: next_trigger = ( f"Approaching {next_tier['name']} tier " f"(vendors: {vendor_percent:.0f}%, products: {product_percent:.0f}%)" ) break return current_tier, next_trigger def _determine_overall_status(self, thresholds: list[dict]) -> str: """Determine overall platform status.""" statuses = [t["status"] for t in thresholds] if "critical" in statuses: return "critical" elif "warning" in statuses: return "degraded" else: return "healthy" # Create service instance platform_health_service = PlatformHealthService()