Phase 6 - Database-driven tiers: - Update subscription_service to query database first with legacy fallback - Add get_tier_info() db parameter and _get_tier_from_legacy() method Phase 7 - Platform health integration: - Add get_subscription_capacity() for theoretical vs actual capacity - Include subscription capacity in full health report Phase 8 - Background subscription tasks: - Add reset_period_counters() for billing period resets - Add check_trial_expirations() for trial management - Add sync_stripe_status() for Stripe synchronization - Add cleanup_stale_subscriptions() for maintenance - Add capture_capacity_snapshot() for daily metrics Phase 10 - Capacity planning & forecasting: - Add CapacitySnapshot model for historical tracking - Create capacity_forecast_service with growth trends - Add /subscription-capacity, /trends, /recommendations endpoints - Add /snapshot endpoint for manual captures Also includes billing API enhancements from phase 4: - Add upcoming-invoice, change-tier, addon purchase/cancel endpoints - Add UsageSummary schema for billing page - Enhance billing.js with addon management functions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
540 lines
18 KiB
Python
540 lines
18 KiB
Python
# app/services/platform_health_service.py
|
|
"""
|
|
Platform health and capacity monitoring service.
|
|
|
|
Provides:
|
|
- System resource metrics (CPU, memory, disk)
|
|
- Database metrics and statistics
|
|
- Capacity threshold calculations
|
|
- Scaling recommendations
|
|
"""
|
|
|
|
import logging
|
|
from datetime import datetime
|
|
|
|
import psutil
|
|
from sqlalchemy import func, text
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.services.image_service import image_service
|
|
from models.database.inventory import Inventory
|
|
from models.database.order import Order
|
|
from models.database.product import Product
|
|
from models.database.vendor import Vendor
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ============================================================================
|
|
# Thresholds Configuration
|
|
# ============================================================================
|
|
|
|
CAPACITY_THRESHOLDS = {
|
|
"products_total": {
|
|
"warning": 400_000,
|
|
"critical": 475_000,
|
|
"limit": 500_000,
|
|
},
|
|
"storage_gb": {
|
|
"warning": 800,
|
|
"critical": 950,
|
|
"limit": 1000,
|
|
},
|
|
"db_size_mb": {
|
|
"warning": 20_000,
|
|
"critical": 24_000,
|
|
"limit": 25_000,
|
|
},
|
|
"disk_percent": {
|
|
"warning": 70,
|
|
"critical": 85,
|
|
"limit": 100,
|
|
},
|
|
"memory_percent": {
|
|
"warning": 75,
|
|
"critical": 90,
|
|
"limit": 100,
|
|
},
|
|
"cpu_percent": {
|
|
"warning": 70,
|
|
"critical": 85,
|
|
"limit": 100,
|
|
},
|
|
}
|
|
|
|
INFRASTRUCTURE_TIERS = [
|
|
{"name": "Starter", "max_clients": 50, "max_products": 10_000},
|
|
{"name": "Small", "max_clients": 100, "max_products": 30_000},
|
|
{"name": "Medium", "max_clients": 300, "max_products": 100_000},
|
|
{"name": "Large", "max_clients": 500, "max_products": 250_000},
|
|
{"name": "Scale", "max_clients": 1000, "max_products": 500_000},
|
|
{"name": "Enterprise", "max_clients": None, "max_products": None},
|
|
]
|
|
|
|
|
|
class PlatformHealthService:
|
|
"""Service for platform health and capacity monitoring."""
|
|
|
|
def get_system_metrics(self) -> dict:
|
|
"""Get current system resource metrics."""
|
|
cpu_percent = psutil.cpu_percent(interval=0.1)
|
|
memory = psutil.virtual_memory()
|
|
disk = psutil.disk_usage("/")
|
|
|
|
return {
|
|
"cpu_percent": cpu_percent,
|
|
"memory_percent": memory.percent,
|
|
"memory_used_gb": round(memory.used / (1024**3), 2),
|
|
"memory_total_gb": round(memory.total / (1024**3), 2),
|
|
"disk_percent": disk.percent,
|
|
"disk_used_gb": round(disk.used / (1024**3), 2),
|
|
"disk_total_gb": round(disk.total / (1024**3), 2),
|
|
}
|
|
|
|
def get_database_metrics(self, db: Session) -> dict:
|
|
"""Get database statistics."""
|
|
products_count = db.query(func.count(Product.id)).scalar() or 0
|
|
orders_count = db.query(func.count(Order.id)).scalar() or 0
|
|
vendors_count = db.query(func.count(Vendor.id)).scalar() or 0
|
|
inventory_count = db.query(func.count(Inventory.id)).scalar() or 0
|
|
|
|
db_size = self._get_database_size(db)
|
|
|
|
return {
|
|
"size_mb": db_size,
|
|
"products_count": products_count,
|
|
"orders_count": orders_count,
|
|
"vendors_count": vendors_count,
|
|
"inventory_count": inventory_count,
|
|
}
|
|
|
|
def get_image_storage_metrics(self) -> dict:
|
|
"""Get image storage statistics."""
|
|
stats = image_service.get_storage_stats()
|
|
return {
|
|
"total_files": stats["total_files"],
|
|
"total_size_mb": stats["total_size_mb"],
|
|
"total_size_gb": stats["total_size_gb"],
|
|
"max_files_per_dir": stats["max_files_per_dir"],
|
|
"products_estimated": stats["products_estimated"],
|
|
}
|
|
|
|
def get_capacity_metrics(self, db: Session) -> dict:
|
|
"""Get capacity-focused metrics for planning."""
|
|
# Products total
|
|
products_total = db.query(func.count(Product.id)).scalar() or 0
|
|
|
|
# Products by vendor
|
|
vendor_counts = (
|
|
db.query(Vendor.name, func.count(Product.id))
|
|
.join(Product, Vendor.id == Product.vendor_id)
|
|
.group_by(Vendor.name)
|
|
.all()
|
|
)
|
|
products_by_vendor = {name or "Unknown": count for name, count in vendor_counts}
|
|
|
|
# Image storage
|
|
image_stats = image_service.get_storage_stats()
|
|
|
|
# Database size
|
|
db_size = self._get_database_size(db)
|
|
|
|
# Orders this month
|
|
start_of_month = datetime.utcnow().replace(day=1, hour=0, minute=0, second=0)
|
|
orders_this_month = (
|
|
db.query(func.count(Order.id))
|
|
.filter(Order.created_at >= start_of_month)
|
|
.scalar()
|
|
or 0
|
|
)
|
|
|
|
# Active vendors
|
|
active_vendors = (
|
|
db.query(func.count(Vendor.id))
|
|
.filter(Vendor.is_active == True) # noqa: E712
|
|
.scalar()
|
|
or 0
|
|
)
|
|
|
|
return {
|
|
"products_total": products_total,
|
|
"products_by_vendor": products_by_vendor,
|
|
"images_total": image_stats["total_files"],
|
|
"storage_used_gb": image_stats["total_size_gb"],
|
|
"database_size_mb": db_size,
|
|
"orders_this_month": orders_this_month,
|
|
"active_vendors": active_vendors,
|
|
}
|
|
|
|
def get_subscription_capacity(self, db: Session) -> dict:
|
|
"""
|
|
Calculate theoretical capacity based on all vendor subscriptions.
|
|
|
|
Returns aggregated limits and current usage for capacity planning.
|
|
"""
|
|
from models.database.subscription import VendorSubscription
|
|
from models.database.vendor import VendorUser
|
|
|
|
# Get all active subscriptions with their limits
|
|
subscriptions = (
|
|
db.query(VendorSubscription)
|
|
.filter(VendorSubscription.status.in_(["active", "trial"]))
|
|
.all()
|
|
)
|
|
|
|
# Aggregate theoretical limits
|
|
total_products_limit = 0
|
|
total_orders_limit = 0
|
|
total_team_limit = 0
|
|
unlimited_products = 0
|
|
unlimited_orders = 0
|
|
unlimited_team = 0
|
|
|
|
tier_distribution = {}
|
|
|
|
for sub in subscriptions:
|
|
# Track tier distribution
|
|
tier = sub.tier or "unknown"
|
|
tier_distribution[tier] = tier_distribution.get(tier, 0) + 1
|
|
|
|
# Aggregate limits
|
|
if sub.products_limit is None:
|
|
unlimited_products += 1
|
|
else:
|
|
total_products_limit += sub.products_limit
|
|
|
|
if sub.orders_limit is None:
|
|
unlimited_orders += 1
|
|
else:
|
|
total_orders_limit += sub.orders_limit
|
|
|
|
if sub.team_members_limit is None:
|
|
unlimited_team += 1
|
|
else:
|
|
total_team_limit += sub.team_members_limit
|
|
|
|
# Get actual usage
|
|
actual_products = db.query(func.count(Product.id)).scalar() or 0
|
|
actual_team = (
|
|
db.query(func.count(VendorUser.id))
|
|
.filter(VendorUser.is_active == True) # noqa: E712
|
|
.scalar()
|
|
or 0
|
|
)
|
|
|
|
# Orders this period (aggregate across all subscriptions)
|
|
total_orders_used = sum(s.orders_this_period for s in subscriptions)
|
|
|
|
def calc_utilization(actual: int, limit: int, unlimited: int) -> dict:
|
|
if unlimited > 0:
|
|
# Some subscriptions have unlimited - can't calculate true %
|
|
return {
|
|
"actual": actual,
|
|
"theoretical_limit": limit,
|
|
"unlimited_count": unlimited,
|
|
"utilization_percent": None,
|
|
"has_unlimited": True,
|
|
}
|
|
elif limit > 0:
|
|
return {
|
|
"actual": actual,
|
|
"theoretical_limit": limit,
|
|
"unlimited_count": 0,
|
|
"utilization_percent": round((actual / limit) * 100, 1),
|
|
"headroom": limit - actual,
|
|
"has_unlimited": False,
|
|
}
|
|
else:
|
|
return {
|
|
"actual": actual,
|
|
"theoretical_limit": 0,
|
|
"unlimited_count": 0,
|
|
"utilization_percent": 0,
|
|
"has_unlimited": False,
|
|
}
|
|
|
|
return {
|
|
"total_subscriptions": len(subscriptions),
|
|
"tier_distribution": tier_distribution,
|
|
"products": calc_utilization(actual_products, total_products_limit, unlimited_products),
|
|
"orders_monthly": calc_utilization(total_orders_used, total_orders_limit, unlimited_orders),
|
|
"team_members": calc_utilization(actual_team, total_team_limit, unlimited_team),
|
|
}
|
|
|
|
def get_full_health_report(self, db: Session) -> dict:
|
|
"""Get comprehensive platform health report."""
|
|
# System metrics
|
|
system = self.get_system_metrics()
|
|
|
|
# Database metrics
|
|
database = self.get_database_metrics(db)
|
|
|
|
# Image storage metrics
|
|
image_storage = self.get_image_storage_metrics()
|
|
|
|
# Subscription capacity
|
|
subscription_capacity = self.get_subscription_capacity(db)
|
|
|
|
# Calculate thresholds
|
|
thresholds = self._calculate_thresholds(system, database, image_storage)
|
|
|
|
# Generate recommendations
|
|
recommendations = self._generate_recommendations(thresholds, database)
|
|
|
|
# Determine infrastructure tier
|
|
tier, next_trigger = self._determine_tier(
|
|
database["vendors_count"], database["products_count"]
|
|
)
|
|
|
|
# Overall status
|
|
overall_status = self._determine_overall_status(thresholds)
|
|
|
|
return {
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"overall_status": overall_status,
|
|
"system": system,
|
|
"database": database,
|
|
"image_storage": image_storage,
|
|
"subscription_capacity": subscription_capacity,
|
|
"thresholds": thresholds,
|
|
"recommendations": recommendations,
|
|
"infrastructure_tier": tier,
|
|
"next_tier_trigger": next_trigger,
|
|
}
|
|
|
|
def _get_database_size(self, db: Session) -> float:
|
|
"""Get database size in MB."""
|
|
try:
|
|
# Try SQLite approach
|
|
result = db.execute(
|
|
text(
|
|
"SELECT page_count * page_size as size "
|
|
"FROM pragma_page_count(), pragma_page_size()"
|
|
)
|
|
)
|
|
row = result.fetchone()
|
|
if row:
|
|
return round(row[0] / (1024 * 1024), 2)
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
# Try PostgreSQL approach
|
|
result = db.execute(text("SELECT pg_database_size(current_database())"))
|
|
row = result.fetchone()
|
|
if row:
|
|
return round(row[0] / (1024 * 1024), 2)
|
|
except Exception:
|
|
pass
|
|
|
|
return 0.0
|
|
|
|
def _calculate_thresholds(
|
|
self, system: dict, database: dict, image_storage: dict
|
|
) -> list[dict]:
|
|
"""Calculate threshold status for each metric."""
|
|
thresholds = []
|
|
|
|
# Products threshold
|
|
products_config = CAPACITY_THRESHOLDS["products_total"]
|
|
thresholds.append(
|
|
self._create_threshold(
|
|
"Products",
|
|
database["products_count"],
|
|
products_config["warning"],
|
|
products_config["critical"],
|
|
products_config["limit"],
|
|
)
|
|
)
|
|
|
|
# Storage threshold
|
|
storage_config = CAPACITY_THRESHOLDS["storage_gb"]
|
|
thresholds.append(
|
|
self._create_threshold(
|
|
"Image Storage (GB)",
|
|
image_storage["total_size_gb"],
|
|
storage_config["warning"],
|
|
storage_config["critical"],
|
|
storage_config["limit"],
|
|
)
|
|
)
|
|
|
|
# Database size threshold
|
|
db_config = CAPACITY_THRESHOLDS["db_size_mb"]
|
|
thresholds.append(
|
|
self._create_threshold(
|
|
"Database (MB)",
|
|
database["size_mb"],
|
|
db_config["warning"],
|
|
db_config["critical"],
|
|
db_config["limit"],
|
|
)
|
|
)
|
|
|
|
# Disk threshold
|
|
disk_config = CAPACITY_THRESHOLDS["disk_percent"]
|
|
thresholds.append(
|
|
self._create_threshold(
|
|
"Disk Usage (%)",
|
|
system["disk_percent"],
|
|
disk_config["warning"],
|
|
disk_config["critical"],
|
|
disk_config["limit"],
|
|
)
|
|
)
|
|
|
|
# Memory threshold
|
|
memory_config = CAPACITY_THRESHOLDS["memory_percent"]
|
|
thresholds.append(
|
|
self._create_threshold(
|
|
"Memory Usage (%)",
|
|
system["memory_percent"],
|
|
memory_config["warning"],
|
|
memory_config["critical"],
|
|
memory_config["limit"],
|
|
)
|
|
)
|
|
|
|
# CPU threshold
|
|
cpu_config = CAPACITY_THRESHOLDS["cpu_percent"]
|
|
thresholds.append(
|
|
self._create_threshold(
|
|
"CPU Usage (%)",
|
|
system["cpu_percent"],
|
|
cpu_config["warning"],
|
|
cpu_config["critical"],
|
|
cpu_config["limit"],
|
|
)
|
|
)
|
|
|
|
return thresholds
|
|
|
|
def _create_threshold(
|
|
self, name: str, current: float, warning: float, critical: float, limit: float
|
|
) -> dict:
|
|
"""Create a threshold status object."""
|
|
percent_used = (current / limit) * 100 if limit > 0 else 0
|
|
|
|
if current >= critical:
|
|
status = "critical"
|
|
elif current >= warning:
|
|
status = "warning"
|
|
else:
|
|
status = "ok"
|
|
|
|
return {
|
|
"name": name,
|
|
"current": current,
|
|
"warning": warning,
|
|
"critical": critical,
|
|
"limit": limit,
|
|
"status": status,
|
|
"percent_used": round(percent_used, 1),
|
|
}
|
|
|
|
def _generate_recommendations(
|
|
self, thresholds: list[dict], database: dict
|
|
) -> list[dict]:
|
|
"""Generate scaling recommendations based on thresholds."""
|
|
recommendations = []
|
|
|
|
for threshold in thresholds:
|
|
if threshold["status"] == "critical":
|
|
recommendations.append(
|
|
{
|
|
"priority": "critical",
|
|
"title": f"{threshold['name']} at critical level",
|
|
"description": (
|
|
f"Currently at {threshold['percent_used']:.0f}% of capacity "
|
|
f"({threshold['current']:.0f} of {threshold['limit']:.0f})"
|
|
),
|
|
"action": "Immediate scaling or cleanup required",
|
|
}
|
|
)
|
|
elif threshold["status"] == "warning":
|
|
recommendations.append(
|
|
{
|
|
"priority": "warning",
|
|
"title": f"{threshold['name']} approaching limit",
|
|
"description": (
|
|
f"Currently at {threshold['percent_used']:.0f}% of capacity "
|
|
f"({threshold['current']:.0f} of {threshold['limit']:.0f})"
|
|
),
|
|
"action": "Plan scaling in the next 2-4 weeks",
|
|
}
|
|
)
|
|
|
|
# Add tier-based recommendations
|
|
if database["vendors_count"] > 0:
|
|
tier, next_trigger = self._determine_tier(
|
|
database["vendors_count"], database["products_count"]
|
|
)
|
|
if next_trigger:
|
|
recommendations.append(
|
|
{
|
|
"priority": "info",
|
|
"title": f"Current tier: {tier}",
|
|
"description": next_trigger,
|
|
"action": "Review capacity planning documentation",
|
|
}
|
|
)
|
|
|
|
# If no issues, add positive status
|
|
if not recommendations:
|
|
recommendations.append(
|
|
{
|
|
"priority": "info",
|
|
"title": "All systems healthy",
|
|
"description": "No capacity concerns at this time",
|
|
"action": None,
|
|
}
|
|
)
|
|
|
|
return recommendations
|
|
|
|
def _determine_tier(self, vendors: int, products: int) -> tuple[str, str | None]:
|
|
"""Determine current infrastructure tier and next trigger."""
|
|
current_tier = "Starter"
|
|
next_trigger = None
|
|
|
|
for i, tier in enumerate(INFRASTRUCTURE_TIERS):
|
|
max_clients = tier["max_clients"]
|
|
max_products = tier["max_products"]
|
|
|
|
if max_clients is None:
|
|
current_tier = tier["name"]
|
|
break
|
|
|
|
if vendors <= max_clients and products <= max_products:
|
|
current_tier = tier["name"]
|
|
|
|
# Check proximity to next tier
|
|
if i < len(INFRASTRUCTURE_TIERS) - 1:
|
|
next_tier = INFRASTRUCTURE_TIERS[i + 1]
|
|
vendor_percent = (vendors / max_clients) * 100
|
|
product_percent = (products / max_products) * 100
|
|
|
|
if vendor_percent > 70 or product_percent > 70:
|
|
next_trigger = (
|
|
f"Approaching {next_tier['name']} tier "
|
|
f"(vendors: {vendor_percent:.0f}%, products: {product_percent:.0f}%)"
|
|
)
|
|
break
|
|
|
|
return current_tier, next_trigger
|
|
|
|
def _determine_overall_status(self, thresholds: list[dict]) -> str:
|
|
"""Determine overall platform status."""
|
|
statuses = [t["status"] for t in thresholds]
|
|
|
|
if "critical" in statuses:
|
|
return "critical"
|
|
elif "warning" in statuses:
|
|
return "degraded"
|
|
else:
|
|
return "healthy"
|
|
|
|
|
|
# Create service instance
|
|
platform_health_service = PlatformHealthService()
|