fix(prospecting): fix scan-jobs batch endpoints and add job tracking
- Reorder routes: batch endpoints before /{prospect_id} to fix FastAPI
route matching (was parsing "batch" as prospect_id → 422)
- Add scan job tracking via stats_service.create_job/complete_job so
the scan-jobs table gets populated after each batch run
- Add contact scrape batch endpoint (POST /contacts/batch) with
get_pending_contact_scrape query
- Fix scan-jobs.js: explicit route map instead of naive replace
- Normalize domain_name on create/update (strip protocol, www, slash)
- Add domain_name to ProspectUpdate schema
- Add proposal for contact scraper enum + regex fixes
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,10 @@
|
||||
# app/modules/prospecting/routes/api/admin_enrichment.py
|
||||
"""
|
||||
Admin API routes for enrichment/scanning pipeline.
|
||||
|
||||
NOTE: Batch routes MUST be defined before /{prospect_id} routes.
|
||||
FastAPI matches routes in definition order, and {prospect_id} would
|
||||
catch "batch" as a string before trying to parse it as int → 422.
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -10,6 +14,7 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.deps import get_current_admin_api
|
||||
from app.core.database import get_db
|
||||
from app.modules.prospecting.models import JobType
|
||||
from app.modules.prospecting.schemas.enrichment import (
|
||||
ContactScrapeResponse,
|
||||
FullEnrichmentResponse,
|
||||
@@ -23,12 +28,108 @@ from app.modules.prospecting.schemas.enrichment import (
|
||||
from app.modules.prospecting.services.enrichment_service import enrichment_service
|
||||
from app.modules.prospecting.services.prospect_service import prospect_service
|
||||
from app.modules.prospecting.services.scoring_service import scoring_service
|
||||
from app.modules.prospecting.services.stats_service import stats_service
|
||||
from app.modules.tenancy.schemas.auth import UserContext
|
||||
|
||||
router = APIRouter(prefix="/enrichment")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── Batch endpoints (must be before /{prospect_id} routes) ──────────────────
|
||||
|
||||
|
||||
@router.post("/http-check/batch", response_model=HttpCheckBatchResponse)
|
||||
def http_check_batch(
|
||||
limit: int = Query(100, ge=1, le=500),
|
||||
db: Session = Depends(get_db),
|
||||
current_admin: UserContext = Depends(get_current_admin_api),
|
||||
):
|
||||
"""Run HTTP check for pending prospects."""
|
||||
job = stats_service.create_job(db,JobType.HTTP_CHECK)
|
||||
prospects = prospect_service.get_pending_http_check(db, limit=limit)
|
||||
results = []
|
||||
for prospect in prospects:
|
||||
result = enrichment_service.check_http(db, prospect)
|
||||
results.append(HttpCheckBatchItem(domain=prospect.domain_name, **result))
|
||||
stats_service.complete_job(job,processed=len(results))
|
||||
db.commit()
|
||||
return HttpCheckBatchResponse(processed=len(results), results=results)
|
||||
|
||||
|
||||
@router.post("/tech-scan/batch", response_model=ScanBatchResponse)
|
||||
def tech_scan_batch(
|
||||
limit: int = Query(100, ge=1, le=500),
|
||||
db: Session = Depends(get_db),
|
||||
current_admin: UserContext = Depends(get_current_admin_api),
|
||||
):
|
||||
"""Run tech scan for pending prospects."""
|
||||
job = stats_service.create_job(db,JobType.TECH_SCAN)
|
||||
prospects = prospect_service.get_pending_tech_scan(db, limit=limit)
|
||||
count = 0
|
||||
for prospect in prospects:
|
||||
result = enrichment_service.scan_tech_stack(db, prospect)
|
||||
if result:
|
||||
count += 1
|
||||
stats_service.complete_job(job,processed=len(prospects))
|
||||
db.commit()
|
||||
return ScanBatchResponse(processed=len(prospects), successful=count)
|
||||
|
||||
|
||||
@router.post("/performance/batch", response_model=ScanBatchResponse)
|
||||
def performance_scan_batch(
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
db: Session = Depends(get_db),
|
||||
current_admin: UserContext = Depends(get_current_admin_api),
|
||||
):
|
||||
"""Run performance scan for pending prospects."""
|
||||
job = stats_service.create_job(db,JobType.PERFORMANCE_SCAN)
|
||||
prospects = prospect_service.get_pending_performance_scan(db, limit=limit)
|
||||
count = 0
|
||||
for prospect in prospects:
|
||||
result = enrichment_service.scan_performance(db, prospect)
|
||||
if result:
|
||||
count += 1
|
||||
stats_service.complete_job(job,processed=len(prospects))
|
||||
db.commit()
|
||||
return ScanBatchResponse(processed=len(prospects), successful=count)
|
||||
|
||||
|
||||
@router.post("/contacts/batch", response_model=ScanBatchResponse)
|
||||
def contact_scrape_batch(
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
db: Session = Depends(get_db),
|
||||
current_admin: UserContext = Depends(get_current_admin_api),
|
||||
):
|
||||
"""Scrape contacts for pending prospects."""
|
||||
job = stats_service.create_job(db,JobType.CONTACT_SCRAPE)
|
||||
prospects = prospect_service.get_pending_contact_scrape(db, limit=limit)
|
||||
count = 0
|
||||
for prospect in prospects:
|
||||
contacts = enrichment_service.scrape_contacts(db, prospect)
|
||||
if contacts:
|
||||
count += 1
|
||||
stats_service.complete_job(job,processed=len(prospects))
|
||||
db.commit()
|
||||
return ScanBatchResponse(processed=len(prospects), successful=count)
|
||||
|
||||
|
||||
@router.post("/score-compute/batch", response_model=ScoreComputeBatchResponse)
|
||||
def compute_scores_batch(
|
||||
limit: int = Query(500, ge=1, le=5000),
|
||||
db: Session = Depends(get_db),
|
||||
current_admin: UserContext = Depends(get_current_admin_api),
|
||||
):
|
||||
"""Compute or recompute scores for all prospects."""
|
||||
job = stats_service.create_job(db,JobType.SCORE_COMPUTE)
|
||||
count = scoring_service.compute_all(db, limit=limit)
|
||||
stats_service.complete_job(job,processed=count)
|
||||
db.commit()
|
||||
return ScoreComputeBatchResponse(scored=count)
|
||||
|
||||
|
||||
# ── Single-prospect endpoints ───────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.post("/http-check/{prospect_id}", response_model=HttpCheckResult)
|
||||
def http_check_single(
|
||||
prospect_id: int = Path(...),
|
||||
@@ -42,22 +143,6 @@ def http_check_single(
|
||||
return HttpCheckResult(**result)
|
||||
|
||||
|
||||
@router.post("/http-check/batch", response_model=HttpCheckBatchResponse)
|
||||
def http_check_batch(
|
||||
limit: int = Query(100, ge=1, le=500),
|
||||
db: Session = Depends(get_db),
|
||||
current_admin: UserContext = Depends(get_current_admin_api),
|
||||
):
|
||||
"""Run HTTP check for pending prospects."""
|
||||
prospects = prospect_service.get_pending_http_check(db, limit=limit)
|
||||
results = []
|
||||
for prospect in prospects:
|
||||
result = enrichment_service.check_http(db, prospect)
|
||||
results.append(HttpCheckBatchItem(domain=prospect.domain_name, **result))
|
||||
db.commit()
|
||||
return HttpCheckBatchResponse(processed=len(results), results=results)
|
||||
|
||||
|
||||
@router.post("/tech-scan/{prospect_id}", response_model=ScanSingleResponse)
|
||||
def tech_scan_single(
|
||||
prospect_id: int = Path(...),
|
||||
@@ -71,23 +156,6 @@ def tech_scan_single(
|
||||
return ScanSingleResponse(domain=prospect.domain_name, profile=profile is not None)
|
||||
|
||||
|
||||
@router.post("/tech-scan/batch", response_model=ScanBatchResponse)
|
||||
def tech_scan_batch(
|
||||
limit: int = Query(100, ge=1, le=500),
|
||||
db: Session = Depends(get_db),
|
||||
current_admin: UserContext = Depends(get_current_admin_api),
|
||||
):
|
||||
"""Run tech scan for pending prospects."""
|
||||
prospects = prospect_service.get_pending_tech_scan(db, limit=limit)
|
||||
count = 0
|
||||
for prospect in prospects:
|
||||
result = enrichment_service.scan_tech_stack(db, prospect)
|
||||
if result:
|
||||
count += 1
|
||||
db.commit()
|
||||
return ScanBatchResponse(processed=len(prospects), successful=count)
|
||||
|
||||
|
||||
@router.post("/performance/{prospect_id}", response_model=ScanSingleResponse)
|
||||
def performance_scan_single(
|
||||
prospect_id: int = Path(...),
|
||||
@@ -101,23 +169,6 @@ def performance_scan_single(
|
||||
return ScanSingleResponse(domain=prospect.domain_name, profile=profile is not None)
|
||||
|
||||
|
||||
@router.post("/performance/batch", response_model=ScanBatchResponse)
|
||||
def performance_scan_batch(
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
db: Session = Depends(get_db),
|
||||
current_admin: UserContext = Depends(get_current_admin_api),
|
||||
):
|
||||
"""Run performance scan for pending prospects."""
|
||||
prospects = prospect_service.get_pending_performance_scan(db, limit=limit)
|
||||
count = 0
|
||||
for prospect in prospects:
|
||||
result = enrichment_service.scan_performance(db, prospect)
|
||||
if result:
|
||||
count += 1
|
||||
db.commit()
|
||||
return ScanBatchResponse(processed=len(prospects), successful=count)
|
||||
|
||||
|
||||
@router.post("/contacts/{prospect_id}", response_model=ContactScrapeResponse)
|
||||
def scrape_contacts_single(
|
||||
prospect_id: int = Path(...),
|
||||
@@ -172,15 +223,3 @@ def full_enrichment(
|
||||
score=score.score,
|
||||
lead_tier=score.lead_tier,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/score-compute/batch", response_model=ScoreComputeBatchResponse)
|
||||
def compute_scores_batch(
|
||||
limit: int = Query(500, ge=1, le=5000),
|
||||
db: Session = Depends(get_db),
|
||||
current_admin: UserContext = Depends(get_current_admin_api),
|
||||
):
|
||||
"""Compute or recompute scores for all prospects."""
|
||||
count = scoring_service.compute_all(db, limit=limit)
|
||||
db.commit()
|
||||
return ScoreComputeBatchResponse(scored=count)
|
||||
|
||||
Reference in New Issue
Block a user