orion/app/services/platform_health_service.py

# app/services/platform_health_service.py
"""
Platform health and capacity monitoring service.

Provides:
- System resource metrics (CPU, memory, disk)
- Database metrics and statistics
- Capacity threshold calculations
- Scaling recommendations
"""

import logging
from datetime import datetime

import psutil
from sqlalchemy import func, text
from sqlalchemy.orm import Session

from app.services.image_service import image_service
from models.database.inventory import Inventory
from models.database.order import Order
from models.database.product import Product
from models.database.vendor import Vendor

logger = logging.getLogger(__name__)


# ============================================================================
# Thresholds Configuration
# ============================================================================

CAPACITY_THRESHOLDS = {
    "products_total": {
        "warning": 400_000,
        "critical": 475_000,
        "limit": 500_000,
    },
    "storage_gb": {
        "warning": 800,
        "critical": 950,
        "limit": 1000,
    },
    "db_size_mb": {
        "warning": 20_000,
        "critical": 24_000,
        "limit": 25_000,
    },
    "disk_percent": {
        "warning": 70,
        "critical": 85,
        "limit": 100,
    },
    "memory_percent": {
        "warning": 75,
        "critical": 90,
        "limit": 100,
    },
    "cpu_percent": {
        "warning": 70,
        "critical": 85,
        "limit": 100,
    },
}

INFRASTRUCTURE_TIERS = [
    {"name": "Starter", "max_clients": 50, "max_products": 10_000},
    {"name": "Small", "max_clients": 100, "max_products": 30_000},
    {"name": "Medium", "max_clients": 300, "max_products": 100_000},
    {"name": "Large", "max_clients": 500, "max_products": 250_000},
    {"name": "Scale", "max_clients": 1000, "max_products": 500_000},
    {"name": "Enterprise", "max_clients": None, "max_products": None},
]


class PlatformHealthService:
    """Service for platform health and capacity monitoring."""

    def get_system_metrics(self) -> dict:
        """Get current system resource metrics."""
        cpu_percent = psutil.cpu_percent(interval=0.1)
        memory = psutil.virtual_memory()
        disk = psutil.disk_usage("/")

        return {
            "cpu_percent": cpu_percent,
            "memory_percent": memory.percent,
            "memory_used_gb": round(memory.used / (1024**3), 2),
            "memory_total_gb": round(memory.total / (1024**3), 2),
            "disk_percent": disk.percent,
            "disk_used_gb": round(disk.used / (1024**3), 2),
            "disk_total_gb": round(disk.total / (1024**3), 2),
        }

    def get_database_metrics(self, db: Session) -> dict:
        """Get database statistics."""
        products_count = db.query(func.count(Product.id)).scalar() or 0
        orders_count = db.query(func.count(Order.id)).scalar() or 0
        vendors_count = db.query(func.count(Vendor.id)).scalar() or 0
        inventory_count = db.query(func.count(Inventory.id)).scalar() or 0

        db_size = self._get_database_size(db)

        return {
            "size_mb": db_size,
            "products_count": products_count,
            "orders_count": orders_count,
            "vendors_count": vendors_count,
            "inventory_count": inventory_count,
        }

    def get_image_storage_metrics(self) -> dict:
        """Get image storage statistics."""
        stats = image_service.get_storage_stats()
        return {
            "total_files": stats["total_files"],
            "total_size_mb": stats["total_size_mb"],
            "total_size_gb": stats["total_size_gb"],
            "max_files_per_dir": stats["max_files_per_dir"],
            "products_estimated": stats["products_estimated"],
        }

    def get_capacity_metrics(self, db: Session) -> dict:
        """Get capacity-focused metrics for planning."""
        # Products total
        products_total = db.query(func.count(Product.id)).scalar() or 0

        # Products by vendor
        vendor_counts = (
            db.query(Vendor.name, func.count(Product.id))
            .join(Product, Vendor.id == Product.vendor_id)
            .group_by(Vendor.name)
            .all()
        )
        products_by_vendor = {name or "Unknown": count for name, count in vendor_counts}

        # Image storage
        image_stats = image_service.get_storage_stats()

        # Database size
        db_size = self._get_database_size(db)

        # Orders this month
        start_of_month = datetime.utcnow().replace(day=1, hour=0, minute=0, second=0)
        orders_this_month = (
            db.query(func.count(Order.id))
            .filter(Order.created_at >= start_of_month)
            .scalar()
            or 0
        )

        # Active vendors
        active_vendors = (
            db.query(func.count(Vendor.id))
            .filter(Vendor.is_active == True)  # noqa: E712
            .scalar()
            or 0
        )

        return {
            "products_total": products_total,
            "products_by_vendor": products_by_vendor,
            "images_total": image_stats["total_files"],
            "storage_used_gb": image_stats["total_size_gb"],
            "database_size_mb": db_size,
            "orders_this_month": orders_this_month,
            "active_vendors": active_vendors,
        }

    def get_subscription_capacity(self, db: Session) -> dict:
        """
        Calculate theoretical capacity based on all vendor subscriptions.

        Returns aggregated limits and current usage for capacity planning.
        """
        from models.database.subscription import VendorSubscription
        from models.database.vendor import VendorUser

        # Get all active subscriptions with their limits
        subscriptions = (
            db.query(VendorSubscription)
            .filter(VendorSubscription.status.in_(["active", "trial"]))
            .all()
        )

        # Aggregate theoretical limits
        total_products_limit = 0
        total_orders_limit = 0
        total_team_limit = 0
        unlimited_products = 0
        unlimited_orders = 0
        unlimited_team = 0

        tier_distribution = {}

        for sub in subscriptions:
            # Track tier distribution
            tier = sub.tier or "unknown"
            tier_distribution[tier] = tier_distribution.get(tier, 0) + 1

            # Aggregate limits
            if sub.products_limit is None:
                unlimited_products += 1
            else:
                total_products_limit += sub.products_limit

            if sub.orders_limit is None:
                unlimited_orders += 1
            else:
                total_orders_limit += sub.orders_limit

            if sub.team_members_limit is None:
                unlimited_team += 1
            else:
                total_team_limit += sub.team_members_limit

        # Get actual usage
        actual_products = db.query(func.count(Product.id)).scalar() or 0
        actual_team = (
            db.query(func.count(VendorUser.id))
            .filter(VendorUser.is_active == True)  # noqa: E712
            .scalar()
            or 0
        )

        # Orders this period (aggregate across all subscriptions)
        total_orders_used = sum(s.orders_this_period for s in subscriptions)

        def calc_utilization(actual: int, limit: int, unlimited: int) -> dict:
            if unlimited > 0:
                # Some subscriptions have unlimited - can't calculate true %
                return {
                    "actual": actual,
                    "theoretical_limit": limit,
                    "unlimited_count": unlimited,
                    "utilization_percent": None,
                    "has_unlimited": True,
                }
            elif limit > 0:
                return {
                    "actual": actual,
                    "theoretical_limit": limit,
                    "unlimited_count": 0,
                    "utilization_percent": round((actual / limit) * 100, 1),
                    "headroom": limit - actual,
                    "has_unlimited": False,
                }
            else:
                return {
                    "actual": actual,
                    "theoretical_limit": 0,
                    "unlimited_count": 0,
                    "utilization_percent": 0,
                    "has_unlimited": False,
                }

        return {
            "total_subscriptions": len(subscriptions),
            "tier_distribution": tier_distribution,
            "products": calc_utilization(actual_products, total_products_limit, unlimited_products),
            "orders_monthly": calc_utilization(total_orders_used, total_orders_limit, unlimited_orders),
            "team_members": calc_utilization(actual_team, total_team_limit, unlimited_team),
        }

    def get_full_health_report(self, db: Session) -> dict:
        """Get comprehensive platform health report."""
        # System metrics
        system = self.get_system_metrics()

        # Database metrics
        database = self.get_database_metrics(db)

        # Image storage metrics
        image_storage = self.get_image_storage_metrics()

        # Subscription capacity
        subscription_capacity = self.get_subscription_capacity(db)

        # Calculate thresholds
        thresholds = self._calculate_thresholds(system, database, image_storage)

        # Generate recommendations
        recommendations = self._generate_recommendations(thresholds, database)

        # Determine infrastructure tier
        tier, next_trigger = self._determine_tier(
            database["vendors_count"], database["products_count"]
        )

        # Overall status
        overall_status = self._determine_overall_status(thresholds)

        return {
            "timestamp": datetime.utcnow().isoformat(),
            "overall_status": overall_status,
            "system": system,
            "database": database,
            "image_storage": image_storage,
            "subscription_capacity": subscription_capacity,
            "thresholds": thresholds,
            "recommendations": recommendations,
            "infrastructure_tier": tier,
            "next_tier_trigger": next_trigger,
        }

    def _get_database_size(self, db: Session) -> float:
        """Get database size in MB."""
        try:
            # Try SQLite approach
            result = db.execute(
                text(
                    "SELECT page_count * page_size as size "
                    "FROM pragma_page_count(), pragma_page_size()"
                )
            )
            row = result.fetchone()
            if row:
                return round(row[0] / (1024 * 1024), 2)
        except Exception:
            pass

        try:
            # Try PostgreSQL approach
            result = db.execute(text("SELECT pg_database_size(current_database())"))
            row = result.fetchone()
            if row:
                return round(row[0] / (1024 * 1024), 2)
        except Exception:
            pass

        return 0.0

    def _calculate_thresholds(
        self, system: dict, database: dict, image_storage: dict
    ) -> list[dict]:
        """Calculate threshold status for each metric."""
        thresholds = []

        # Products threshold
        products_config = CAPACITY_THRESHOLDS["products_total"]
        thresholds.append(
            self._create_threshold(
                "Products",
                database["products_count"],
                products_config["warning"],
                products_config["critical"],
                products_config["limit"],
            )
        )

        # Storage threshold
        storage_config = CAPACITY_THRESHOLDS["storage_gb"]
        thresholds.append(
            self._create_threshold(
                "Image Storage (GB)",
                image_storage["total_size_gb"],
                storage_config["warning"],
                storage_config["critical"],
                storage_config["limit"],
            )
        )

        # Database size threshold
        db_config = CAPACITY_THRESHOLDS["db_size_mb"]
        thresholds.append(
            self._create_threshold(
                "Database (MB)",
                database["size_mb"],
                db_config["warning"],
                db_config["critical"],
                db_config["limit"],
            )
        )

        # Disk threshold
        disk_config = CAPACITY_THRESHOLDS["disk_percent"]
        thresholds.append(
            self._create_threshold(
                "Disk Usage (%)",
                system["disk_percent"],
                disk_config["warning"],
                disk_config["critical"],
                disk_config["limit"],
            )
        )

        # Memory threshold
        memory_config = CAPACITY_THRESHOLDS["memory_percent"]
        thresholds.append(
            self._create_threshold(
                "Memory Usage (%)",
                system["memory_percent"],
                memory_config["warning"],
                memory_config["critical"],
                memory_config["limit"],
            )
        )

        # CPU threshold
        cpu_config = CAPACITY_THRESHOLDS["cpu_percent"]
        thresholds.append(
            self._create_threshold(
                "CPU Usage (%)",
                system["cpu_percent"],
                cpu_config["warning"],
                cpu_config["critical"],
                cpu_config["limit"],
            )
        )

        return thresholds

    def _create_threshold(
        self, name: str, current: float, warning: float, critical: float, limit: float
    ) -> dict:
        """Create a threshold status object."""
        percent_used = (current / limit) * 100 if limit > 0 else 0

        if current >= critical:
            status = "critical"
        elif current >= warning:
            status = "warning"
        else:
            status = "ok"

        return {
            "name": name,
            "current": current,
            "warning": warning,
            "critical": critical,
            "limit": limit,
            "status": status,
            "percent_used": round(percent_used, 1),
        }

    def _generate_recommendations(
        self, thresholds: list[dict], database: dict
    ) -> list[dict]:
        """Generate scaling recommendations based on thresholds."""
        recommendations = []

        for threshold in thresholds:
            if threshold["status"] == "critical":
                recommendations.append(
                    {
                        "priority": "critical",
                        "title": f"{threshold['name']} at critical level",
                        "description": (
                            f"Currently at {threshold['percent_used']:.0f}% of capacity "
                            f"({threshold['current']:.0f} of {threshold['limit']:.0f})"
                        ),
                        "action": "Immediate scaling or cleanup required",
                    }
                )
            elif threshold["status"] == "warning":
                recommendations.append(
                    {
                        "priority": "warning",
                        "title": f"{threshold['name']} approaching limit",
                        "description": (
                            f"Currently at {threshold['percent_used']:.0f}% of capacity "
                            f"({threshold['current']:.0f} of {threshold['limit']:.0f})"
                        ),
                        "action": "Plan scaling in the next 2-4 weeks",
                    }
                )

        # Add tier-based recommendations
        if database["vendors_count"] > 0:
            tier, next_trigger = self._determine_tier(
                database["vendors_count"], database["products_count"]
            )
            if next_trigger:
                recommendations.append(
                    {
                        "priority": "info",
                        "title": f"Current tier: {tier}",
                        "description": next_trigger,
                        "action": "Review capacity planning documentation",
                    }
                )

        # If no issues, add positive status
        if not recommendations:
            recommendations.append(
                {
                    "priority": "info",
                    "title": "All systems healthy",
                    "description": "No capacity concerns at this time",
                    "action": None,
                }
            )

        return recommendations

    def _determine_tier(self, vendors: int, products: int) -> tuple[str, str | None]:
        """Determine current infrastructure tier and next trigger."""
        current_tier = "Starter"
        next_trigger = None

        for i, tier in enumerate(INFRASTRUCTURE_TIERS):
            max_clients = tier["max_clients"]
            max_products = tier["max_products"]

            if max_clients is None:
                current_tier = tier["name"]
                break

            if vendors <= max_clients and products <= max_products:
                current_tier = tier["name"]

                # Check proximity to next tier
                if i < len(INFRASTRUCTURE_TIERS) - 1:
                    next_tier = INFRASTRUCTURE_TIERS[i + 1]
                    vendor_percent = (vendors / max_clients) * 100
                    product_percent = (products / max_products) * 100

                    if vendor_percent > 70 or product_percent > 70:
                        next_trigger = (
                            f"Approaching {next_tier['name']} tier "
                            f"(vendors: {vendor_percent:.0f}%, products: {product_percent:.0f}%)"
                        )
                break

        return current_tier, next_trigger

    def _determine_overall_status(self, thresholds: list[dict]) -> str:
        """Determine overall platform status."""
        statuses = [t["status"] for t in thresholds]

        if "critical" in statuses:
            return "critical"
        elif "warning" in statuses:
            return "degraded"
        else:
            return "healthy"


# Create service instance
platform_health_service = PlatformHealthService()