feat(prospecting): add batch delay + fix Celery error_message field
- Add PROSPECTING_BATCH_DELAY_SECONDS config (default 1.0s) — polite delay between prospects in batch scans to avoid rate limiting - Apply delay to all 5 batch API endpoints and all Celery tasks - Fix Celery tasks: error_message → error_log (matches model field) - Add batch-scanning.md docs with rate limiting guide, scaling estimates for 70k+ URL imports, and pipeline order recommendations Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,9 +4,11 @@ Celery tasks for batch prospect scanning and enrichment.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from app.core.celery_config import celery_app
|
||||
from app.modules.prospecting.config import config as prospecting_config
|
||||
from app.modules.prospecting.models import ProspectScanJob
|
||||
from app.modules.task_base import ModuleTask
|
||||
|
||||
@@ -53,6 +55,8 @@ def batch_http_check(self, job_id: int, limit: int = 100):
|
||||
job.processed_items = processed
|
||||
if processed % 10 == 0:
|
||||
db.flush()
|
||||
if processed < len(prospects):
|
||||
time.sleep(prospecting_config.batch_delay_seconds)
|
||||
|
||||
job.status = "completed"
|
||||
job.completed_at = datetime.now(UTC)
|
||||
@@ -61,7 +65,7 @@ def batch_http_check(self, job_id: int, limit: int = 100):
|
||||
except Exception as e:
|
||||
logger.error("batch_http_check job %d failed: %s", job_id, e, exc_info=True)
|
||||
job.status = "failed"
|
||||
job.error_message = str(e)[:500]
|
||||
job.error_log = str(e)[:500]
|
||||
job.completed_at = datetime.now(UTC)
|
||||
db.commit() # SVC-006 - persist failure status
|
||||
raise
|
||||
@@ -110,6 +114,8 @@ def batch_tech_scan(self, job_id: int, limit: int = 100):
|
||||
job.processed_items = processed
|
||||
if processed % 10 == 0:
|
||||
db.flush()
|
||||
if processed < len(prospects):
|
||||
time.sleep(prospecting_config.batch_delay_seconds)
|
||||
|
||||
job.status = "completed"
|
||||
job.completed_at = datetime.now(UTC)
|
||||
@@ -118,7 +124,7 @@ def batch_tech_scan(self, job_id: int, limit: int = 100):
|
||||
except Exception as e:
|
||||
logger.error("batch_tech_scan job %d failed: %s", job_id, e, exc_info=True)
|
||||
job.status = "failed"
|
||||
job.error_message = str(e)[:500]
|
||||
job.error_log = str(e)[:500]
|
||||
job.completed_at = datetime.now(UTC)
|
||||
db.commit() # SVC-006 - persist failure status
|
||||
raise
|
||||
@@ -167,6 +173,8 @@ def batch_performance_scan(self, job_id: int, limit: int = 50):
|
||||
job.processed_items = processed
|
||||
if processed % 5 == 0:
|
||||
db.flush()
|
||||
if processed < len(prospects):
|
||||
time.sleep(prospecting_config.batch_delay_seconds)
|
||||
|
||||
job.status = "completed"
|
||||
job.completed_at = datetime.now(UTC)
|
||||
@@ -175,7 +183,7 @@ def batch_performance_scan(self, job_id: int, limit: int = 50):
|
||||
except Exception as e:
|
||||
logger.error("batch_performance_scan job %d failed: %s", job_id, e, exc_info=True)
|
||||
job.status = "failed"
|
||||
job.error_message = str(e)[:500]
|
||||
job.error_log = str(e)[:500]
|
||||
job.completed_at = datetime.now(UTC)
|
||||
db.commit() # SVC-006 - persist failure status
|
||||
raise
|
||||
@@ -223,6 +231,8 @@ def batch_contact_scrape(self, job_id: int, limit: int = 100):
|
||||
job.processed_items = processed
|
||||
if processed % 10 == 0:
|
||||
db.flush()
|
||||
if processed < len(prospects):
|
||||
time.sleep(prospecting_config.batch_delay_seconds)
|
||||
|
||||
job.status = "completed"
|
||||
job.completed_at = datetime.now(UTC)
|
||||
@@ -231,7 +241,7 @@ def batch_contact_scrape(self, job_id: int, limit: int = 100):
|
||||
except Exception as e:
|
||||
logger.error("batch_contact_scrape job %d failed: %s", job_id, e, exc_info=True)
|
||||
job.status = "failed"
|
||||
job.error_message = str(e)[:500]
|
||||
job.error_log = str(e)[:500]
|
||||
job.completed_at = datetime.now(UTC)
|
||||
db.commit() # SVC-006 - persist failure status
|
||||
raise
|
||||
@@ -270,7 +280,7 @@ def batch_score_compute(self, job_id: int, limit: int = 500):
|
||||
except Exception as e:
|
||||
logger.error("batch_score_compute job %d failed: %s", job_id, e, exc_info=True)
|
||||
job.status = "failed"
|
||||
job.error_message = str(e)[:500]
|
||||
job.error_log = str(e)[:500]
|
||||
job.completed_at = datetime.now(UTC)
|
||||
db.commit() # SVC-006 - persist failure status
|
||||
raise
|
||||
@@ -331,7 +341,7 @@ def full_enrichment(self, job_id: int, prospect_id: int):
|
||||
except Exception as e:
|
||||
logger.error("full_enrichment job %d failed: %s", job_id, e, exc_info=True)
|
||||
job.status = "failed"
|
||||
job.error_message = str(e)[:500]
|
||||
job.error_log = str(e)[:500]
|
||||
job.completed_at = datetime.now(UTC)
|
||||
db.commit() # SVC-006 - persist failure status
|
||||
raise
|
||||
|
||||
Reference in New Issue
Block a user