refactor: move capacity_forecast_service from billing to monitoring
Some checks failed
CI / ruff (push) Failing after 8s
CI / pytest (push) Successful in 36m5s
CI / architecture (push) Successful in 11s
CI / dependency-scanning (push) Successful in 27s
CI / docs (push) Has been skipped
CI / audit (push) Successful in 8s

Resolves the billing (core) → monitoring (optional) architecture violation
by moving CapacityForecastService to the monitoring module where it belongs.

- Create BillingMetricsProvider to expose subscription counts via stats_aggregator
- Move CapacitySnapshot model from billing to monitoring
- Replace direct MerchantSubscription queries with stats_aggregator calls
- Fix middleware test mocks to cover StoreDomain/MerchantDomain fallback chains

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-13 20:58:22 +01:00
parent 7c43d6f4a2
commit 9c27fa02b0
16 changed files with 234 additions and 102 deletions

View File

@@ -9,6 +9,10 @@ from app.modules.monitoring.services.admin_audit_service import (
AdminAuditService,
admin_audit_service,
)
from app.modules.monitoring.services.capacity_forecast_service import (
CapacityForecastService,
capacity_forecast_service,
)
from app.modules.monitoring.services.background_tasks_service import (
BackgroundTasksService,
background_tasks_service,
@@ -25,6 +29,8 @@ from app.modules.monitoring.services.platform_health_service import (
__all__ = [
"admin_audit_service",
"AdminAuditService",
"capacity_forecast_service",
"CapacityForecastService",
"background_tasks_service",
"BackgroundTasksService",
"log_service",

View File

@@ -0,0 +1,317 @@
# app/modules/monitoring/services/capacity_forecast_service.py
"""
Capacity forecasting service for growth trends and scaling recommendations.
Provides:
- Historical capacity trend analysis
- Growth rate calculations
- Days-until-threshold projections
- Scaling recommendations based on growth patterns
"""
import logging
from datetime import UTC, datetime, timedelta
from decimal import Decimal
from sqlalchemy import func
from sqlalchemy.orm import Session
from app.modules.contracts.metrics import MetricsContext
from app.modules.core.services.stats_aggregator import stats_aggregator
from app.modules.monitoring.models.capacity_snapshot import CapacitySnapshot
from app.modules.tenancy.models import Platform, Store, StoreUser
logger = logging.getLogger(__name__)
# Scaling thresholds based on capacity-planning.md
INFRASTRUCTURE_SCALING = [
{"name": "Starter", "max_stores": 50, "max_products": 10_000, "cost_monthly": 30},
{"name": "Small", "max_stores": 100, "max_products": 30_000, "cost_monthly": 80},
{"name": "Medium", "max_stores": 300, "max_products": 100_000, "cost_monthly": 150},
{"name": "Large", "max_stores": 500, "max_products": 250_000, "cost_monthly": 350},
{"name": "Scale", "max_stores": 1000, "max_products": 500_000, "cost_monthly": 700},
{"name": "Enterprise", "max_stores": None, "max_products": None, "cost_monthly": 1500},
]
class CapacityForecastService:
"""Service for capacity forecasting and trend analysis."""
def capture_daily_snapshot(self, db: Session) -> CapacitySnapshot:
"""
Capture a daily snapshot of platform capacity metrics.
Should be called by a daily background job.
"""
from app.modules.cms.services.media_service import media_service
from app.modules.monitoring.services.platform_health_service import (
platform_health_service,
)
now = datetime.now(UTC)
today = now.replace(hour=0, minute=0, second=0, microsecond=0)
# Check if snapshot already exists for today
existing = (
db.query(CapacitySnapshot)
.filter(CapacitySnapshot.snapshot_date == today)
.first()
)
if existing:
logger.info(f"Snapshot already exists for {today}")
return existing
# Gather metrics
total_stores = db.query(func.count(Store.id)).scalar() or 0
active_stores = (
db.query(func.count(Store.id))
.filter(Store.is_active == True) # noqa: E712
.scalar()
or 0
)
# Resource metrics via provider pattern (avoids cross-module imports)
start_of_month = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
platform = db.query(Platform).first()
platform_id = platform.id if platform else 1
stats = stats_aggregator.get_admin_stats_flat(
db, platform_id,
context=MetricsContext(date_from=start_of_month),
)
# Subscription metrics via stats aggregator (avoids billing → monitoring violation)
total_subs = stats.get("billing.total_subscriptions", 0)
active_subs = stats.get("billing.active_subscriptions", 0)
trial_stores = stats.get("billing.trial_subscriptions", 0)
total_products = stats.get("catalog.total_products", 0)
total_team = (
db.query(func.count(StoreUser.id))
.filter(StoreUser.is_active == True) # noqa: E712
.scalar()
or 0
)
# Orders this month (from stats aggregator)
total_orders = stats.get("orders.in_period", 0)
# Storage metrics
try:
image_stats = media_service.get_storage_stats(db)
storage_gb = image_stats.get("total_size_gb", 0)
except Exception:
storage_gb = 0
try:
db_size = platform_health_service._get_database_size(db)
except Exception:
db_size = 0
# Theoretical capacity from subscriptions
capacity = platform_health_service.get_subscription_capacity(db)
theoretical_products = capacity["products"].get("theoretical_limit", 0)
theoretical_orders = capacity["orders_monthly"].get("theoretical_limit", 0)
theoretical_team = capacity["team_members"].get("theoretical_limit", 0)
# Tier distribution
tier_distribution = capacity.get("tier_distribution", {})
# Create snapshot
snapshot = CapacitySnapshot(
snapshot_date=today,
total_stores=total_stores,
active_stores=active_stores,
trial_stores=trial_stores,
total_subscriptions=total_subs,
active_subscriptions=active_subs,
total_products=total_products,
total_orders_month=total_orders,
total_team_members=total_team,
storage_used_gb=Decimal(str(storage_gb)),
db_size_mb=Decimal(str(db_size)),
theoretical_products_limit=theoretical_products,
theoretical_orders_limit=theoretical_orders,
theoretical_team_limit=theoretical_team,
tier_distribution=tier_distribution,
)
db.add(snapshot)
db.flush()
db.refresh(snapshot)
logger.info(f"Captured capacity snapshot for {today}")
return snapshot
def get_growth_trends(self, db: Session, days: int = 30) -> dict:
"""
Calculate growth trends over the specified period.
Returns growth rates and projections for key metrics.
"""
now = datetime.now(UTC)
start_date = now - timedelta(days=days)
# Get snapshots for the period
snapshots = (
db.query(CapacitySnapshot)
.filter(CapacitySnapshot.snapshot_date >= start_date)
.order_by(CapacitySnapshot.snapshot_date)
.all()
)
if len(snapshots) < 2:
return {
"period_days": days,
"snapshots_available": len(snapshots),
"trends": {},
"message": "Insufficient data for trend analysis",
}
first = snapshots[0]
last = snapshots[-1]
period_days = (last.snapshot_date - first.snapshot_date).days or 1
def calc_growth(metric: str) -> dict:
start_val = getattr(first, metric) or 0
end_val = getattr(last, metric) or 0
change = end_val - start_val
if start_val > 0:
growth_rate = (change / start_val) * 100
daily_rate = growth_rate / period_days
monthly_rate = daily_rate * 30
else:
growth_rate = 0 if end_val == 0 else 100
daily_rate = 0
monthly_rate = 0
return {
"start_value": start_val,
"current_value": end_val,
"change": change,
"growth_rate_percent": round(growth_rate, 2),
"daily_growth_rate": round(daily_rate, 3),
"monthly_projection": round(end_val * (1 + monthly_rate / 100), 0),
}
trends = {
"stores": calc_growth("active_stores"),
"products": calc_growth("total_products"),
"orders": calc_growth("total_orders_month"),
"team_members": calc_growth("total_team_members"),
"storage_gb": {
"start_value": float(first.storage_used_gb or 0),
"current_value": float(last.storage_used_gb or 0),
"change": float((last.storage_used_gb or 0) - (first.storage_used_gb or 0)),
},
}
return {
"period_days": period_days,
"snapshots_available": len(snapshots),
"start_date": first.snapshot_date.isoformat(),
"end_date": last.snapshot_date.isoformat(),
"trends": trends,
}
def get_scaling_recommendations(self, db: Session) -> list[dict]:
"""
Generate scaling recommendations based on current capacity and growth.
Returns prioritized list of recommendations.
"""
from app.modules.monitoring.services.platform_health_service import (
platform_health_service,
)
recommendations = []
# Get current capacity
capacity = platform_health_service.get_subscription_capacity(db)
health = platform_health_service.get_full_health_report(db)
trends = self.get_growth_trends(db, days=30)
# Check product capacity
products = capacity["products"]
if products.get("utilization_percent") and products["utilization_percent"] > 80:
recommendations.append({
"category": "capacity",
"severity": "warning",
"title": "Product capacity approaching limit",
"description": f"Currently at {products['utilization_percent']:.0f}% of theoretical product capacity",
"action": "Consider upgrading store tiers or adding capacity",
})
# Check infrastructure tier
current_tier = health.get("infrastructure_tier", {})
next_trigger = health.get("next_tier_trigger")
if next_trigger:
recommendations.append({
"category": "infrastructure",
"severity": "info",
"title": f"Current tier: {current_tier.get('name', 'Unknown')}",
"description": f"Next upgrade trigger: {next_trigger}",
"action": "Monitor growth and plan for infrastructure scaling",
})
# Check growth rate
if trends.get("trends"):
store_growth = trends["trends"].get("stores", {})
if store_growth.get("monthly_projection", 0) > 0:
monthly_rate = store_growth.get("growth_rate_percent", 0)
if monthly_rate > 20:
recommendations.append({
"category": "growth",
"severity": "info",
"title": "High store growth rate",
"description": f"Store base growing at {monthly_rate:.1f}% over last 30 days",
"action": "Ensure infrastructure can scale to meet demand",
})
# Check storage
storage_percent = health.get("image_storage", {}).get("total_size_gb", 0)
if storage_percent > 800: # 80% of 1TB
recommendations.append({
"category": "storage",
"severity": "warning",
"title": "Storage usage high",
"description": f"Image storage at {storage_percent:.1f} GB",
"action": "Plan for storage expansion or implement cleanup policies",
})
# Sort by severity
severity_order = {"critical": 0, "warning": 1, "info": 2}
recommendations.sort(key=lambda r: severity_order.get(r["severity"], 3))
return recommendations
def get_days_until_threshold(
self, db: Session, metric: str, threshold: int
) -> int | None:
"""
Calculate days until a metric reaches a threshold based on current growth.
Returns None if insufficient data or no growth.
"""
trends = self.get_growth_trends(db, days=30)
if not trends.get("trends") or metric not in trends["trends"]:
return None
metric_data = trends["trends"][metric]
current = metric_data.get("current_value", 0)
daily_rate = metric_data.get("daily_growth_rate", 0)
if daily_rate <= 0 or current >= threshold:
return None
remaining = threshold - current
days = remaining / (current * daily_rate / 100) if current > 0 else None
return int(days) if days else None
# Singleton instance
capacity_forecast_service = CapacityForecastService()