# app/modules/prospecting/routes/api/admin_enrichment.py """ Admin API routes for enrichment/scanning pipeline. NOTE: Batch routes MUST be defined before /{prospect_id} routes. FastAPI matches routes in definition order, and {prospect_id} would catch "batch" as a string before trying to parse it as int → 422. """ import logging import time from fastapi import APIRouter, Depends, Path, Query from fastapi.responses import HTMLResponse from sqlalchemy.orm import Session from app.api.deps import get_current_admin_api from app.core.database import get_db from app.modules.prospecting.config import config as prospecting_config from app.modules.prospecting.models import JobType from app.modules.prospecting.schemas.enrichment import ( ContactScrapeResponse, FullEnrichmentResponse, HttpCheckBatchItem, HttpCheckBatchResponse, HttpCheckResult, ScanBatchResponse, ScanSingleResponse, ScoreComputeBatchResponse, ) from app.modules.prospecting.schemas.security_audit import ( SecurityAuditSingleResponse, ) from app.modules.prospecting.services.enrichment_service import enrichment_service from app.modules.prospecting.services.prospect_service import prospect_service from app.modules.prospecting.services.scoring_service import scoring_service from app.modules.prospecting.services.security_audit_service import ( security_audit_service, ) from app.modules.prospecting.services.security_report_service import ( security_report_service, ) from app.modules.prospecting.services.stats_service import stats_service from app.modules.tenancy.schemas.auth import UserContext router = APIRouter(prefix="/enrichment") logger = logging.getLogger(__name__) def _batch_delay(): """Delay between prospects in batch scans to avoid rate limiting.""" if prospecting_config.batch_delay_seconds > 0: time.sleep(prospecting_config.batch_delay_seconds) # ── Batch endpoints (must be before /{prospect_id} routes) ────────────────── @router.post("/http-check/batch", response_model=HttpCheckBatchResponse) def http_check_batch( limit: int = Query(100, ge=1, le=500), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Run HTTP check for pending prospects.""" job = stats_service.create_job(db,JobType.HTTP_CHECK) prospects = prospect_service.get_pending_http_check(db, limit=limit) results = [] for i, prospect in enumerate(prospects): result = enrichment_service.check_http(db, prospect) results.append(HttpCheckBatchItem(domain=prospect.domain_name, **result)) if i < len(prospects) - 1: _batch_delay() stats_service.complete_job(job, processed=len(results)) db.commit() return HttpCheckBatchResponse(processed=len(results), results=results) @router.post("/tech-scan/batch", response_model=ScanBatchResponse) def tech_scan_batch( limit: int = Query(100, ge=1, le=500), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Run tech scan for pending prospects.""" job = stats_service.create_job(db,JobType.TECH_SCAN) prospects = prospect_service.get_pending_tech_scan(db, limit=limit) count = 0 for i, prospect in enumerate(prospects): result = enrichment_service.scan_tech_stack(db, prospect) if result: count += 1 if i < len(prospects) - 1: _batch_delay() stats_service.complete_job(job, processed=len(prospects)) db.commit() return ScanBatchResponse(processed=len(prospects), successful=count) @router.post("/performance/batch", response_model=ScanBatchResponse) def performance_scan_batch( limit: int = Query(50, ge=1, le=200), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Run performance scan for pending prospects.""" job = stats_service.create_job(db,JobType.PERFORMANCE_SCAN) prospects = prospect_service.get_pending_performance_scan(db, limit=limit) count = 0 for i, prospect in enumerate(prospects): result = enrichment_service.scan_performance(db, prospect) if result: count += 1 if i < len(prospects) - 1: _batch_delay() stats_service.complete_job(job, processed=len(prospects)) db.commit() return ScanBatchResponse(processed=len(prospects), successful=count) @router.post("/contacts/batch", response_model=ScanBatchResponse) def contact_scrape_batch( limit: int = Query(50, ge=1, le=200), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Scrape contacts for pending prospects.""" job = stats_service.create_job(db,JobType.CONTACT_SCRAPE) prospects = prospect_service.get_pending_contact_scrape(db, limit=limit) count = 0 for i, prospect in enumerate(prospects): contacts = enrichment_service.scrape_contacts(db, prospect) if contacts: count += 1 if i < len(prospects) - 1: _batch_delay() stats_service.complete_job(job, processed=len(prospects)) db.commit() return ScanBatchResponse(processed=len(prospects), successful=count) @router.post("/content-scrape/batch", response_model=ScanBatchResponse) def content_scrape_batch( limit: int = Query(50, ge=1, le=200), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Scrape page content for pending prospects.""" job = stats_service.create_job(db, JobType.CONTENT_SCRAPE) prospects = prospect_service.get_pending_content_scrape(db, limit=limit) count = 0 for i, prospect in enumerate(prospects): result = enrichment_service.scrape_content(db, prospect) if result: count += 1 if i < len(prospects) - 1: _batch_delay() stats_service.complete_job(job, processed=len(prospects)) db.commit() return ScanBatchResponse(processed=len(prospects), successful=count) @router.post("/security-audit/batch", response_model=ScanBatchResponse) def security_audit_batch( limit: int = Query(50, ge=1, le=200), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Run security audit for pending prospects.""" job = stats_service.create_job(db, JobType.SECURITY_AUDIT) prospects = prospect_service.get_pending_security_audit(db, limit=limit) count = 0 for i, prospect in enumerate(prospects): result = security_audit_service.run_audit(db, prospect) if result: count += 1 if i < len(prospects) - 1: _batch_delay() stats_service.complete_job(job, processed=len(prospects)) db.commit() return ScanBatchResponse(processed=len(prospects), successful=count) @router.post("/score-compute/batch", response_model=ScoreComputeBatchResponse) def compute_scores_batch( limit: int = Query(500, ge=1, le=5000), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Compute or recompute scores for all prospects.""" job = stats_service.create_job(db,JobType.SCORE_COMPUTE) count = scoring_service.compute_all(db, limit=limit) stats_service.complete_job(job,processed=count) db.commit() return ScoreComputeBatchResponse(scored=count) # ── Report endpoints ──────────────────────────────────────────────────────── @router.get("/security-audit/report/{prospect_id}", response_class=HTMLResponse) def security_audit_report( prospect_id: int = Path(...), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Generate branded HTML security audit report.""" prospect = prospect_service.get_by_id(db, prospect_id) if not prospect.security_audit: from app.exceptions.base import ResourceNotFoundException raise ResourceNotFoundException("SecurityAudit", str(prospect_id)) html = security_report_service.generate_html_report( audit=prospect.security_audit, domain=prospect.domain_name, ) return HTMLResponse(content=html) # ── Single-prospect endpoints ─────────────────────────────────────────────── @router.post("/http-check/{prospect_id}", response_model=HttpCheckResult) def http_check_single( prospect_id: int = Path(...), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Run HTTP connectivity check for a single prospect.""" prospect = prospect_service.get_by_id(db, prospect_id) result = enrichment_service.check_http(db, prospect) db.commit() return HttpCheckResult(**result) @router.post("/tech-scan/{prospect_id}", response_model=ScanSingleResponse) def tech_scan_single( prospect_id: int = Path(...), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Run technology scan for a single prospect.""" prospect = prospect_service.get_by_id(db, prospect_id) profile = enrichment_service.scan_tech_stack(db, prospect) db.commit() return ScanSingleResponse(domain=prospect.domain_name, profile=profile is not None) @router.post("/performance/{prospect_id}", response_model=ScanSingleResponse) def performance_scan_single( prospect_id: int = Path(...), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Run PageSpeed audit for a single prospect.""" prospect = prospect_service.get_by_id(db, prospect_id) profile = enrichment_service.scan_performance(db, prospect) db.commit() return ScanSingleResponse(domain=prospect.domain_name, profile=profile is not None) @router.post("/contacts/{prospect_id}", response_model=ContactScrapeResponse) def scrape_contacts_single( prospect_id: int = Path(...), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Scrape contacts for a single prospect.""" prospect = prospect_service.get_by_id(db, prospect_id) contacts = enrichment_service.scrape_contacts(db, prospect) db.commit() return ContactScrapeResponse(domain=prospect.domain_name, contacts_found=len(contacts)) @router.post("/security-audit/{prospect_id}", response_model=SecurityAuditSingleResponse) def security_audit_single( prospect_id: int = Path(...), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Run security audit for a single prospect.""" prospect = prospect_service.get_by_id(db, prospect_id) audit = security_audit_service.run_audit(db, prospect) db.commit() findings_count = 0 if audit: findings_count = audit.findings_count_critical + audit.findings_count_high + audit.findings_count_medium + audit.findings_count_low return SecurityAuditSingleResponse( domain=prospect.domain_name, score=audit.score if audit else 0, grade=audit.grade if audit else "F", findings_count=findings_count, ) @router.post("/content-scrape/{prospect_id}", response_model=ScanSingleResponse) def content_scrape_single( prospect_id: int = Path(...), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Scrape page content for a single prospect.""" prospect = prospect_service.get_by_id(db, prospect_id) result = enrichment_service.scrape_content(db, prospect) db.commit() return ScanSingleResponse(domain=prospect.domain_name, profile=result is not None) @router.post("/full/{prospect_id}", response_model=FullEnrichmentResponse) def full_enrichment( prospect_id: int = Path(...), db: Session = Depends(get_db), current_admin: UserContext = Depends(get_current_admin_api), ): """Run full enrichment pipeline for a single prospect.""" prospect = prospect_service.get_by_id(db, prospect_id) # Step 1: HTTP check enrichment_service.check_http(db, prospect) # Step 2: Tech scan (if has website) tech_profile = None if prospect.has_website: tech_profile = enrichment_service.scan_tech_stack(db, prospect) # Step 3: Performance scan (if has website) perf_profile = None if prospect.has_website: perf_profile = enrichment_service.scan_performance(db, prospect) # Step 4: Contact scrape (if has website) contacts = [] if prospect.has_website: contacts = enrichment_service.scrape_contacts(db, prospect) # Step 5: Content scrape (if has website) if prospect.has_website: enrichment_service.scrape_content(db, prospect) # Step 6: Security audit (if has website) if prospect.has_website: security_audit_service.run_audit(db, prospect) # Step 7: Compute score db.refresh(prospect) score = scoring_service.compute_score(db, prospect) db.commit() return FullEnrichmentResponse( domain=prospect.domain_name, has_website=prospect.has_website, tech_scanned=tech_profile is not None, perf_scanned=perf_profile is not None, contacts_found=len(contacts), score=score.score, lead_tier=score.lead_tier, )