fix(prospecting): fix scan-jobs batch endpoints and add job tracking

- Reorder routes: batch endpoints before /{prospect_id} to fix FastAPI
  route matching (was parsing "batch" as prospect_id → 422)
- Add scan job tracking via stats_service.create_job/complete_job so
  the scan-jobs table gets populated after each batch run
- Add contact scrape batch endpoint (POST /contacts/batch) with
  get_pending_contact_scrape query
- Fix scan-jobs.js: explicit route map instead of naive replace
- Normalize domain_name on create/update (strip protocol, www, slash)
- Add domain_name to ProspectUpdate schema
- Add proposal for contact scraper enum + regex fixes

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-29 23:31:33 +02:00
parent 95f0eac079
commit f310363f7c
7 changed files with 208 additions and 63 deletions

View File

@@ -94,10 +94,22 @@ class ProspectService:
return prospects, total
@staticmethod
def _normalize_domain(domain: str) -> str:
"""Strip protocol, www prefix, and trailing slash from a domain."""
domain = domain.strip()
for prefix in ["https://", "http://"]:
if domain.lower().startswith(prefix):
domain = domain[len(prefix):]
if domain.lower().startswith("www."):
domain = domain[4:]
return domain.rstrip("/")
def create(self, db: Session, data: dict, captured_by_user_id: int | None = None) -> Prospect:
channel = data.get("channel", "digital")
if channel == "digital" and data.get("domain_name"):
data["domain_name"] = self._normalize_domain(data["domain_name"])
existing = self.get_by_domain(db, data["domain_name"])
if existing:
raise DuplicateDomainException(data["domain_name"])
@@ -148,7 +160,7 @@ class ProspectService:
skipped = 0
new_prospects = []
for name in domain_names:
name = name.strip().lower()
name = self._normalize_domain(name).lower()
if not name:
continue
existing = self.get_by_domain(db, name)
@@ -171,6 +183,9 @@ class ProspectService:
def update(self, db: Session, prospect_id: int, data: dict) -> Prospect:
prospect = self.get_by_id(db, prospect_id)
if "domain_name" in data and data["domain_name"] is not None:
prospect.domain_name = self._normalize_domain(data["domain_name"])
for field in ["business_name", "status", "source", "address", "city", "postal_code", "notes"]:
if field in data and data[field] is not None:
setattr(prospect, field, data[field])
@@ -225,6 +240,17 @@ class ProspectService:
.all()
)
def get_pending_contact_scrape(self, db: Session, limit: int = 100) -> list[Prospect]:
return (
db.query(Prospect)
.filter(
Prospect.has_website.is_(True),
Prospect.last_contact_scrape_at.is_(None),
)
.limit(limit)
.all()
)
def count_by_status(self, db: Session) -> dict[str, int]:
results = db.query(Prospect.status, func.count(Prospect.id)).group_by(Prospect.status).all() # noqa: SVC-005 - prospecting is platform-scoped, not store-scoped
return {status.value if hasattr(status, "value") else str(status): count for status, count in results}