feat(prospecting): add complete prospecting module for lead discovery and scoring
Some checks failed
CI / pytest (push) Failing after 48m31s
CI / docs (push) Has been skipped
CI / deploy (push) Has been skipped
CI / ruff (push) Successful in 11s
CI / validate (push) Successful in 23s
CI / dependency-scanning (push) Successful in 28s

Migrates scanning pipeline from marketing-.lu-domains app into Orion module.
Supports digital (domain scan) and offline (manual capture) lead channels
with enrichment, scoring, campaign management, and interaction tracking.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-28 00:59:47 +01:00
parent a709adaee8
commit 6d6eba75bf
79 changed files with 7551 additions and 0 deletions

View File

@@ -0,0 +1,47 @@
# app/modules/prospecting/models/__init__.py
from app.modules.prospecting.models.campaign import (
CampaignChannel,
CampaignSend,
CampaignSendStatus,
CampaignTemplate,
LeadType,
)
from app.modules.prospecting.models.interaction import (
InteractionOutcome,
InteractionType,
ProspectInteraction,
)
from app.modules.prospecting.models.performance_profile import (
ProspectPerformanceProfile,
)
from app.modules.prospecting.models.prospect import (
Prospect,
ProspectChannel,
ProspectStatus,
)
from app.modules.prospecting.models.prospect_contact import ContactType, ProspectContact
from app.modules.prospecting.models.prospect_score import ProspectScore
from app.modules.prospecting.models.scan_job import JobStatus, JobType, ProspectScanJob
from app.modules.prospecting.models.tech_profile import ProspectTechProfile
__all__ = [
"Prospect",
"ProspectChannel",
"ProspectStatus",
"ProspectTechProfile",
"ProspectPerformanceProfile",
"ProspectScore",
"ProspectContact",
"ContactType",
"ProspectScanJob",
"JobType",
"JobStatus",
"ProspectInteraction",
"InteractionType",
"InteractionOutcome",
"CampaignTemplate",
"CampaignSend",
"CampaignChannel",
"CampaignSendStatus",
"LeadType",
]

View File

@@ -0,0 +1,83 @@
# app/modules/prospecting/models/campaign.py
"""
Campaign templates and send tracking.
Templates are tailored by lead type (no_website, bad_website, etc.)
with support for multiple languages and delivery channels.
"""
import enum
from sqlalchemy import (
Boolean,
Column,
DateTime,
Enum,
ForeignKey,
Integer,
String,
Text,
)
from app.core.database import Base
from models.database.base import TimestampMixin
class LeadType(str, enum.Enum):
NO_WEBSITE = "no_website"
BAD_WEBSITE = "bad_website"
GMAIL_ONLY = "gmail_only"
SECURITY_ISSUES = "security_issues"
PERFORMANCE_ISSUES = "performance_issues"
OUTDATED_CMS = "outdated_cms"
GENERAL = "general"
class CampaignChannel(str, enum.Enum):
EMAIL = "email"
LETTER = "letter"
PHONE_SCRIPT = "phone_script"
class CampaignSendStatus(str, enum.Enum):
DRAFT = "draft"
SENT = "sent"
DELIVERED = "delivered"
OPENED = "opened"
BOUNCED = "bounced"
REPLIED = "replied"
class CampaignTemplate(Base, TimestampMixin):
"""A reusable marketing campaign template."""
__tablename__ = "campaign_templates"
id = Column(Integer, primary_key=True, index=True)
name = Column(String(255), nullable=False)
lead_type = Column(Enum(LeadType), nullable=False)
channel = Column(Enum(CampaignChannel), nullable=False, default=CampaignChannel.EMAIL)
language = Column(String(5), nullable=False, default="fr")
subject_template = Column(String(500), nullable=True)
body_template = Column(Text, nullable=False)
is_active = Column(Boolean, nullable=False, default=True)
class CampaignSend(Base, TimestampMixin):
"""A record of a campaign sent to a specific prospect."""
__tablename__ = "campaign_sends"
id = Column(Integer, primary_key=True, index=True)
template_id = Column(Integer, ForeignKey("campaign_templates.id", ondelete="SET NULL"), nullable=True)
prospect_id = Column(Integer, ForeignKey("prospects.id", ondelete="CASCADE"), nullable=False, index=True)
channel = Column(Enum(CampaignChannel), nullable=False)
rendered_subject = Column(String(500), nullable=True)
rendered_body = Column(Text, nullable=True)
status = Column(Enum(CampaignSendStatus), nullable=False, default=CampaignSendStatus.DRAFT)
sent_at = Column(DateTime, nullable=True)
sent_by_user_id = Column(Integer, nullable=True)

View File

@@ -0,0 +1,54 @@
# app/modules/prospecting/models/interaction.py
"""
Interaction tracking for prospect follow-ups.
Logs all touchpoints: calls, emails, meetings, visits, notes.
"""
import enum
from sqlalchemy import Column, Date, Enum, ForeignKey, Integer, String, Text
from sqlalchemy.orm import relationship
from app.core.database import Base
from models.database.base import TimestampMixin
class InteractionType(str, enum.Enum):
NOTE = "note"
CALL = "call"
EMAIL_SENT = "email_sent"
EMAIL_RECEIVED = "email_received"
MEETING = "meeting"
VISIT = "visit"
SMS = "sms"
PROPOSAL_SENT = "proposal_sent"
class InteractionOutcome(str, enum.Enum):
POSITIVE = "positive"
NEUTRAL = "neutral"
NEGATIVE = "negative"
NO_ANSWER = "no_answer"
class ProspectInteraction(Base, TimestampMixin):
"""A logged interaction with a prospect."""
__tablename__ = "prospect_interactions"
id = Column(Integer, primary_key=True, index=True)
prospect_id = Column(Integer, ForeignKey("prospects.id", ondelete="CASCADE"), nullable=False, index=True)
interaction_type = Column(Enum(InteractionType), nullable=False)
subject = Column(String(255), nullable=True)
notes = Column(Text, nullable=True)
outcome = Column(Enum(InteractionOutcome), nullable=True)
next_action = Column(String(255), nullable=True)
next_action_date = Column(Date, nullable=True)
created_by_user_id = Column(Integer, nullable=False)
# Relationships
prospect = relationship("Prospect", back_populates="interactions")

View File

@@ -0,0 +1,64 @@
# app/modules/prospecting/models/performance_profile.py
"""
Performance profile for a prospect's website.
Stores Lighthouse audit results including Core Web Vitals,
mobile-friendliness, and asset size analysis.
"""
from sqlalchemy import Boolean, Column, Float, ForeignKey, Integer, String, Text
from sqlalchemy.orm import relationship
from app.core.database import Base
from models.database.base import TimestampMixin
class ProspectPerformanceProfile(Base, TimestampMixin):
"""Performance audit results from PageSpeed Insights / Lighthouse."""
__tablename__ = "prospect_performance_profiles"
id = Column(Integer, primary_key=True, index=True)
prospect_id = Column(Integer, ForeignKey("prospects.id", ondelete="CASCADE"), nullable=False, unique=True)
# Lighthouse Scores (0-100)
performance_score = Column(Integer, nullable=True)
accessibility_score = Column(Integer, nullable=True)
best_practices_score = Column(Integer, nullable=True)
seo_score = Column(Integer, nullable=True)
# Core Web Vitals
first_contentful_paint_ms = Column(Integer, nullable=True)
largest_contentful_paint_ms = Column(Integer, nullable=True)
total_blocking_time_ms = Column(Integer, nullable=True)
cumulative_layout_shift = Column(Float, nullable=True)
speed_index = Column(Integer, nullable=True)
time_to_interactive_ms = Column(Integer, nullable=True)
# Mobile
is_mobile_friendly = Column(Boolean, nullable=True)
viewport_configured = Column(Boolean, nullable=True)
font_size_ok = Column(Boolean, nullable=True)
tap_targets_ok = Column(Boolean, nullable=True)
# Asset Sizes (bytes)
total_bytes = Column(Integer, nullable=True)
html_bytes = Column(Integer, nullable=True)
css_bytes = Column(Integer, nullable=True)
js_bytes = Column(Integer, nullable=True)
image_bytes = Column(Integer, nullable=True)
font_bytes = Column(Integer, nullable=True)
# Request Counts
total_requests = Column(Integer, nullable=True)
js_requests = Column(Integer, nullable=True)
css_requests = Column(Integer, nullable=True)
image_requests = Column(Integer, nullable=True)
# Raw data
lighthouse_json = Column(Text, nullable=True) # JSON string
scan_strategy = Column(String(20), nullable=True) # mobile or desktop
scan_error = Column(Text, nullable=True)
# Relationships
prospect = relationship("Prospect", back_populates="performance_profile")

View File

@@ -0,0 +1,82 @@
# app/modules/prospecting/models/prospect.py
"""
Prospect model - core entity for lead discovery.
Supports two channels:
- digital: discovered via domain scanning (.lu domains)
- offline: manually captured (street encounters, networking)
"""
import enum
from sqlalchemy import Boolean, Column, DateTime, Enum, Float, Integer, String, Text
from sqlalchemy.orm import relationship
from app.core.database import Base
from models.database.base import TimestampMixin
class ProspectChannel(str, enum.Enum):
DIGITAL = "digital"
OFFLINE = "offline"
class ProspectStatus(str, enum.Enum):
PENDING = "pending"
ACTIVE = "active"
INACTIVE = "inactive"
PARKED = "parked"
ERROR = "error"
CONTACTED = "contacted"
CONVERTED = "converted"
class Prospect(Base, TimestampMixin):
"""Represents a business prospect (potential client)."""
__tablename__ = "prospects"
id = Column(Integer, primary_key=True, index=True)
channel = Column(Enum(ProspectChannel), nullable=False, default=ProspectChannel.DIGITAL)
business_name = Column(String(255), nullable=True)
domain_name = Column(String(255), nullable=True, unique=True, index=True)
status = Column(Enum(ProspectStatus), nullable=False, default=ProspectStatus.PENDING)
source = Column(String(100), nullable=True)
# Website status (digital channel)
has_website = Column(Boolean, nullable=True)
uses_https = Column(Boolean, nullable=True)
http_status_code = Column(Integer, nullable=True)
redirect_url = Column(Text, nullable=True)
# Location (offline channel)
address = Column(String(500), nullable=True)
city = Column(String(100), nullable=True)
postal_code = Column(String(10), nullable=True)
country = Column(String(2), nullable=False, default="LU")
# Notes and metadata
notes = Column(Text, nullable=True)
tags = Column(Text, nullable=True) # JSON string of tags
# Capture info
captured_by_user_id = Column(Integer, nullable=True)
location_lat = Column(Float, nullable=True)
location_lng = Column(Float, nullable=True)
# Scan timestamps
last_http_check_at = Column(DateTime, nullable=True)
last_tech_scan_at = Column(DateTime, nullable=True)
last_perf_scan_at = Column(DateTime, nullable=True)
last_contact_scrape_at = Column(DateTime, nullable=True)
# Relationships
tech_profile = relationship("ProspectTechProfile", back_populates="prospect", uselist=False, cascade="all, delete-orphan")
performance_profile = relationship("ProspectPerformanceProfile", back_populates="prospect", uselist=False, cascade="all, delete-orphan")
score = relationship("ProspectScore", back_populates="prospect", uselist=False, cascade="all, delete-orphan")
contacts = relationship("ProspectContact", back_populates="prospect", cascade="all, delete-orphan")
interactions = relationship("ProspectInteraction", back_populates="prospect", cascade="all, delete-orphan")
@property
def display_name(self) -> str:
return self.business_name or self.domain_name or f"Prospect #{self.id}"

View File

@@ -0,0 +1,44 @@
# app/modules/prospecting/models/prospect_contact.py
"""
Contact information for a prospect.
Supports both auto-scraped (digital) and manually entered (offline) contacts.
"""
import enum
from sqlalchemy import Boolean, Column, Enum, ForeignKey, Integer, String, Text
from sqlalchemy.orm import relationship
from app.core.database import Base
from models.database.base import TimestampMixin
class ContactType(str, enum.Enum):
EMAIL = "email"
PHONE = "phone"
ADDRESS = "address"
SOCIAL = "social"
FORM = "form"
class ProspectContact(Base, TimestampMixin):
"""Contact information associated with a prospect."""
__tablename__ = "prospect_contacts"
id = Column(Integer, primary_key=True, index=True)
prospect_id = Column(Integer, ForeignKey("prospects.id", ondelete="CASCADE"), nullable=False, index=True)
contact_type = Column(Enum(ContactType), nullable=False)
value = Column(String(500), nullable=False)
label = Column(String(100), nullable=True) # e.g., "info", "sales", "main"
source_url = Column(Text, nullable=True) # Page where contact was found
source_element = Column(String(100), nullable=True) # e.g., "mailto", "tel", "contact-form"
is_validated = Column(Boolean, nullable=False, default=False)
validation_error = Column(Text, nullable=True)
is_primary = Column(Boolean, nullable=False, default=False)
# Relationships
prospect = relationship("Prospect", back_populates="contacts")

View File

@@ -0,0 +1,46 @@
# app/modules/prospecting/models/prospect_score.py
"""
Opportunity score for a prospect.
Scoring algorithm: 0-100 total
- Technical Health: max 40pts
- Modernity: max 25pts
- Business Value: max 25pts
- Engagement: max 10pts
"""
from sqlalchemy import Column, ForeignKey, Integer, String, Text
from sqlalchemy.orm import relationship
from app.core.database import Base
from models.database.base import TimestampMixin
class ProspectScore(Base, TimestampMixin):
"""Opportunity score computed from prospect analysis."""
__tablename__ = "prospect_scores"
id = Column(Integer, primary_key=True, index=True)
prospect_id = Column(Integer, ForeignKey("prospects.id", ondelete="CASCADE"), nullable=False, unique=True)
# Overall score
score = Column(Integer, nullable=False, default=0, index=True)
# Component scores
technical_health_score = Column(Integer, nullable=False, default=0) # max 40
modernity_score = Column(Integer, nullable=False, default=0) # max 25
business_value_score = Column(Integer, nullable=False, default=0) # max 25
engagement_score = Column(Integer, nullable=False, default=0) # max 10
# Detailed breakdown
reason_flags = Column(Text, nullable=True) # JSON array of flag strings
score_breakdown = Column(Text, nullable=True) # JSON dict of flag -> points
# Lead tier classification
lead_tier = Column(String(20), nullable=True, index=True) # top_priority, quick_win, strategic, low_priority
notes = Column(Text, nullable=True)
# Relationships
prospect = relationship("Prospect", back_populates="score")

View File

@@ -0,0 +1,61 @@
# app/modules/prospecting/models/scan_job.py
"""
Scan job tracking for batch enrichment operations.
"""
import enum
from sqlalchemy import Column, DateTime, Enum, Integer, String, Text
from app.core.database import Base
from models.database.base import TimestampMixin
class JobType(str, enum.Enum):
IMPORT = "import"
HTTP_CHECK = "http_check"
TECH_SCAN = "tech_scan"
PERFORMANCE_SCAN = "performance_scan"
CONTACT_SCRAPE = "contact_scrape"
SCORE_COMPUTE = "score_compute"
FULL_ENRICHMENT = "full_enrichment"
SECURITY_AUDIT = "security_audit"
class JobStatus(str, enum.Enum):
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class ProspectScanJob(Base, TimestampMixin):
"""Tracks batch scanning operations."""
__tablename__ = "prospect_scan_jobs"
id = Column(Integer, primary_key=True, index=True)
job_type = Column(Enum(JobType), nullable=False)
status = Column(Enum(JobStatus), nullable=False, default=JobStatus.PENDING)
total_items = Column(Integer, nullable=False, default=0)
processed_items = Column(Integer, nullable=False, default=0)
failed_items = Column(Integer, nullable=False, default=0)
skipped_items = Column(Integer, nullable=False, default=0)
started_at = Column(DateTime, nullable=True)
completed_at = Column(DateTime, nullable=True)
config = Column(Text, nullable=True) # JSON string
result_summary = Column(Text, nullable=True) # JSON string
error_log = Column(Text, nullable=True)
source_file = Column(String(500), nullable=True)
celery_task_id = Column(String(255), nullable=True)
@property
def progress_percent(self) -> float:
if self.total_items == 0:
return 0.0
return round(self.processed_items / self.total_items * 100, 1)

View File

@@ -0,0 +1,51 @@
# app/modules/prospecting/models/tech_profile.py
"""
Technology profile for a prospect's website.
Stores CMS, server, framework, analytics, and other
technology detection results from website scanning.
"""
from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Integer, String, Text
from sqlalchemy.orm import relationship
from app.core.database import Base
from models.database.base import TimestampMixin
class ProspectTechProfile(Base, TimestampMixin):
"""Technology profile detected from a prospect's website."""
__tablename__ = "prospect_tech_profiles"
id = Column(Integer, primary_key=True, index=True)
prospect_id = Column(Integer, ForeignKey("prospects.id", ondelete="CASCADE"), nullable=False, unique=True)
# CMS Detection
cms = Column(String(100), nullable=True)
cms_version = Column(String(50), nullable=True)
# Server
server = Column(String(100), nullable=True)
server_version = Column(String(50), nullable=True)
hosting_provider = Column(String(100), nullable=True)
cdn = Column(String(100), nullable=True)
# SSL
has_valid_cert = Column(Boolean, nullable=True)
cert_issuer = Column(String(200), nullable=True)
cert_expires_at = Column(DateTime, nullable=True)
# Frontend
js_framework = Column(String(100), nullable=True)
analytics = Column(String(200), nullable=True)
tag_manager = Column(String(100), nullable=True)
ecommerce_platform = Column(String(100), nullable=True)
# Raw data
tech_stack_json = Column(Text, nullable=True) # JSON string
scan_source = Column(String(50), nullable=True)
scan_error = Column(Text, nullable=True)
# Relationships
prospect = relationship("Prospect", back_populates="tech_profile")