feat(prospecting): add complete prospecting module for lead discovery and scoring
Some checks failed
CI / pytest (push) Failing after 48m31s
CI / docs (push) Has been skipped
CI / deploy (push) Has been skipped
CI / ruff (push) Successful in 11s
CI / validate (push) Successful in 23s
CI / dependency-scanning (push) Successful in 28s

Migrates scanning pipeline from marketing-.lu-domains app into Orion module.
Supports digital (domain scan) and offline (manual capture) lead channels
with enrichment, scoring, campaign management, and interaction tracking.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-28 00:59:47 +01:00
parent a709adaee8
commit 6d6eba75bf
79 changed files with 7551 additions and 0 deletions

View File

@@ -0,0 +1 @@
# app/modules/prospecting/services/__init__.py

View File

@@ -0,0 +1,190 @@
# app/modules/prospecting/services/campaign_service.py
"""
Campaign management service.
Handles campaign template CRUD, rendering with prospect data,
and send tracking.
"""
import json
import logging
from datetime import UTC, datetime
from sqlalchemy.orm import Session
from app.modules.prospecting.exceptions import (
CampaignRenderException,
CampaignTemplateNotFoundException,
)
from app.modules.prospecting.models import (
CampaignSend,
CampaignSendStatus,
CampaignTemplate,
Prospect,
)
from app.modules.prospecting.services.prospect_service import prospect_service
logger = logging.getLogger(__name__)
class CampaignService:
"""Service for campaign template management and sending."""
# --- Template CRUD ---
def get_templates(
self,
db: Session,
*,
lead_type: str | None = None,
active_only: bool = False,
) -> list[CampaignTemplate]:
query = db.query(CampaignTemplate)
if lead_type:
query = query.filter(CampaignTemplate.lead_type == lead_type)
if active_only:
query = query.filter(CampaignTemplate.is_active.is_(True))
return query.order_by(CampaignTemplate.lead_type, CampaignTemplate.name).all()
def get_template_by_id(self, db: Session, template_id: int) -> CampaignTemplate:
template = db.query(CampaignTemplate).filter(CampaignTemplate.id == template_id).first()
if not template:
raise CampaignTemplateNotFoundException(str(template_id))
return template
def create_template(self, db: Session, data: dict) -> CampaignTemplate:
template = CampaignTemplate(
name=data["name"],
lead_type=data["lead_type"],
channel=data.get("channel", "email"),
language=data.get("language", "fr"),
subject_template=data.get("subject_template"),
body_template=data["body_template"],
is_active=data.get("is_active", True),
)
db.add(template)
db.commit()
db.refresh(template)
return template
def update_template(self, db: Session, template_id: int, data: dict) -> CampaignTemplate:
template = self.get_template_by_id(db, template_id)
for field in ["name", "lead_type", "channel", "language", "subject_template", "body_template", "is_active"]:
if field in data and data[field] is not None:
setattr(template, field, data[field])
db.commit()
db.refresh(template)
return template
def delete_template(self, db: Session, template_id: int) -> bool:
template = self.get_template_by_id(db, template_id)
db.delete(template)
db.commit()
return True
# --- Rendering ---
def render_campaign(self, db: Session, template_id: int, prospect_id: int) -> dict:
"""Render a campaign template with prospect data."""
template = self.get_template_by_id(db, template_id)
prospect = prospect_service.get_by_id(db, prospect_id)
placeholders = self._build_placeholders(prospect)
try:
rendered_subject = None
if template.subject_template:
rendered_subject = template.subject_template.format(**placeholders)
rendered_body = template.body_template.format(**placeholders)
except KeyError as e:
raise CampaignRenderException(template_id, f"Missing placeholder: {e}")
return {
"subject": rendered_subject,
"body": rendered_body,
}
# --- Sending ---
def send_campaign(
self,
db: Session,
template_id: int,
prospect_ids: list[int],
sent_by_user_id: int,
) -> list[CampaignSend]:
"""Create campaign send records for prospects."""
template = self.get_template_by_id(db, template_id)
sends = []
for pid in prospect_ids:
prospect = prospect_service.get_by_id(db, pid)
placeholders = self._build_placeholders(prospect)
try:
rendered_subject = None
if template.subject_template:
rendered_subject = template.subject_template.format(**placeholders)
rendered_body = template.body_template.format(**placeholders)
except KeyError:
rendered_body = template.body_template
rendered_subject = template.subject_template
send = CampaignSend(
template_id=template_id,
prospect_id=pid,
channel=template.channel,
rendered_subject=rendered_subject,
rendered_body=rendered_body,
status=CampaignSendStatus.SENT,
sent_at=datetime.now(UTC),
sent_by_user_id=sent_by_user_id,
)
db.add(send)
sends.append(send)
db.commit()
logger.info("Sent campaign %d to %d prospects", template_id, len(prospect_ids))
return sends
def get_sends(
self,
db: Session,
*,
prospect_id: int | None = None,
template_id: int | None = None,
) -> list[CampaignSend]:
query = db.query(CampaignSend)
if prospect_id:
query = query.filter(CampaignSend.prospect_id == prospect_id)
if template_id:
query = query.filter(CampaignSend.template_id == template_id)
return query.order_by(CampaignSend.created_at.desc()).all()
def _build_placeholders(self, prospect: Prospect) -> dict:
"""Build template placeholder values from prospect data."""
contacts = prospect.contacts or []
primary_email = next((c.value for c in contacts if c.contact_type == "email"), "")
primary_phone = next((c.value for c in contacts if c.contact_type == "phone"), "")
reason_flags = []
if prospect.score and prospect.score.reason_flags:
try:
reason_flags = json.loads(prospect.score.reason_flags)
except (json.JSONDecodeError, TypeError):
pass
issues_text = ", ".join(f.replace("_", " ") for f in reason_flags)
return {
"business_name": prospect.business_name or prospect.domain_name or "",
"domain": prospect.domain_name or "",
"score": str(prospect.score.score) if prospect.score else "",
"issues": issues_text,
"primary_email": primary_email,
"primary_phone": primary_phone,
"city": prospect.city or "Luxembourg",
}
campaign_service = CampaignService()

View File

@@ -0,0 +1,369 @@
# app/modules/prospecting/services/enrichment_service.py
"""
Enrichment service for prospect scanning pipeline.
Migrated from marketing-.lu-domains/app/services/enrichment_service.py.
Performs passive HTTP checks, technology detection, performance audits,
and contact scraping for digital prospects.
Uses `requests` (sync) to match Orion's tech stack.
"""
import logging
import re
import socket
import ssl
from datetime import UTC, datetime
import requests
from sqlalchemy.orm import Session
from app.modules.prospecting.config import config
from app.modules.prospecting.models import (
Prospect,
ProspectContact,
ProspectPerformanceProfile,
ProspectTechProfile,
)
logger = logging.getLogger(__name__)
# CMS detection patterns
CMS_PATTERNS = {
"wordpress": [r"wp-content", r"wp-includes", r"wordpress"],
"drupal": [r"drupal", r"sites/default", r"sites/all"],
"joomla": [r"/media/jui/", r"joomla", r"/components/com_"],
"shopify": [r"cdn\.shopify\.com", r"shopify"],
"wix": [r"wix\.com", r"wixstatic\.com", r"parastorage\.com"],
"squarespace": [r"squarespace\.com", r"sqsp\.com"],
"webflow": [r"webflow\.com", r"webflow\.io"],
"typo3": [r"typo3", r"/typo3conf/"],
"prestashop": [r"prestashop", r"/modules/ps_"],
"magento": [r"magento", r"mage/", r"/static/version"],
}
JS_FRAMEWORK_PATTERNS = {
"react": [r"react", r"__NEXT_DATA__", r"_next/"],
"vue": [r"vue\.js", r"vue\.min\.js", r"__vue__"],
"angular": [r"angular", r"ng-version"],
"jquery": [r"jquery"],
"alpine": [r"alpine\.js", r"alpinejs"],
}
ANALYTICS_PATTERNS = {
"google_analytics": [r"google-analytics\.com", r"gtag/js", r"ga\.js"],
"google_tag_manager": [r"googletagmanager\.com", r"gtm\.js"],
"matomo": [r"matomo", r"piwik"],
"facebook_pixel": [r"facebook\.net/en_US/fbevents"],
}
class EnrichmentService:
"""Service for prospect enrichment via passive scanning."""
def check_http(self, db: Session, prospect: Prospect) -> dict:
"""Check HTTP connectivity for a prospect's domain."""
result = {
"has_website": False,
"uses_https": False,
"http_status_code": None,
"redirect_url": None,
"error": None,
}
domain = prospect.domain_name
if not domain:
result["error"] = "No domain name"
return result
# Try HTTPS first, then HTTP
for scheme in ["https", "http"]:
try:
url = f"{scheme}://{domain}"
response = requests.get(
url,
timeout=config.http_timeout,
allow_redirects=True,
verify=False, # noqa: SEC047 passive scan, not sending sensitive data
)
result["has_website"] = True
result["uses_https"] = scheme == "https"
result["http_status_code"] = response.status_code
if response.url != url:
result["redirect_url"] = str(response.url)
break
except requests.exceptions.Timeout:
result["error"] = f"Timeout on {scheme}"
except requests.exceptions.RequestException as e:
result["error"] = str(e)
if scheme == "https":
continue
break
# Update prospect
prospect.has_website = result["has_website"]
prospect.uses_https = result["uses_https"]
prospect.http_status_code = result["http_status_code"]
prospect.redirect_url = result["redirect_url"]
prospect.last_http_check_at = datetime.now(UTC)
if result["has_website"]:
prospect.status = "active"
db.commit()
return result
def scan_tech_stack(self, db: Session, prospect: Prospect) -> ProspectTechProfile | None:
"""Scan technology stack from prospect's website HTML."""
domain = prospect.domain_name
if not domain or not prospect.has_website:
return None
scheme = "https" if prospect.uses_https else "http"
url = f"{scheme}://{domain}"
try:
response = requests.get(
url,
timeout=config.http_timeout,
allow_redirects=True,
verify=False, # noqa: SEC047 passive scan, not sending sensitive data
)
html = response.text.lower()
headers = dict(response.headers)
cms = self._detect_cms(html)
js_framework = self._detect_js_framework(html)
analytics = self._detect_analytics(html)
server = headers.get("Server", "").split("/")[0] if "Server" in headers else None
server_version = None
if server and "/" in headers.get("Server", ""):
server_version = headers["Server"].split("/", 1)[1].strip()
# SSL certificate check
has_valid_cert = None
cert_issuer = None
cert_expires_at = None
if prospect.uses_https:
try:
ctx = ssl.create_default_context()
with ctx.wrap_socket(
socket.create_connection((domain, 443), timeout=5),
server_hostname=domain,
) as sock:
cert = sock.getpeercert()
has_valid_cert = True
cert_issuer = dict(x[0] for x in cert.get("issuer", [()])).get("organizationName")
not_after = cert.get("notAfter")
if not_after:
cert_expires_at = datetime.strptime(not_after, "%b %d %H:%M:%S %Y %Z")
except Exception:
has_valid_cert = False
# Upsert tech profile
profile = prospect.tech_profile
if not profile:
profile = ProspectTechProfile(prospect_id=prospect.id)
db.add(profile)
profile.cms = cms
profile.server = server
profile.server_version = server_version
profile.js_framework = js_framework
profile.analytics = analytics
profile.has_valid_cert = has_valid_cert
profile.cert_issuer = cert_issuer
profile.cert_expires_at = cert_expires_at
profile.scan_source = "basic_http"
prospect.last_tech_scan_at = datetime.now(UTC)
db.commit()
return profile
except Exception as e:
logger.error("Tech scan failed for %s: %s", domain, e)
if prospect.tech_profile:
prospect.tech_profile.scan_error = str(e)
prospect.last_tech_scan_at = datetime.now(UTC)
db.commit()
return None
def scan_performance(self, db: Session, prospect: Prospect) -> ProspectPerformanceProfile | None:
"""Run PageSpeed Insights audit for a prospect's website."""
domain = prospect.domain_name
if not domain or not prospect.has_website:
return None
scheme = "https" if prospect.uses_https else "http"
url = f"{scheme}://{domain}"
api_url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
params = {
"url": url,
"strategy": "mobile",
"category": ["performance", "accessibility", "best-practices", "seo"],
}
if config.pagespeed_api_key:
params["key"] = config.pagespeed_api_key
try:
response = requests.get(api_url, params=params, timeout=60)
data = response.json()
lighthouse = data.get("lighthouseResult", {})
categories = lighthouse.get("categories", {})
audits = lighthouse.get("audits", {})
perf_score = int((categories.get("performance", {}).get("score") or 0) * 100)
accessibility = int((categories.get("accessibility", {}).get("score") or 0) * 100)
best_practices = int((categories.get("best-practices", {}).get("score") or 0) * 100)
seo = int((categories.get("seo", {}).get("score") or 0) * 100)
# Upsert performance profile
profile = prospect.performance_profile
if not profile:
profile = ProspectPerformanceProfile(prospect_id=prospect.id)
db.add(profile)
profile.performance_score = perf_score
profile.accessibility_score = accessibility
profile.best_practices_score = best_practices
profile.seo_score = seo
# Core Web Vitals
fcp = audits.get("first-contentful-paint", {}).get("numericValue")
profile.first_contentful_paint_ms = int(fcp) if fcp else None
lcp = audits.get("largest-contentful-paint", {}).get("numericValue")
profile.largest_contentful_paint_ms = int(lcp) if lcp else None
tbt = audits.get("total-blocking-time", {}).get("numericValue")
profile.total_blocking_time_ms = int(tbt) if tbt else None
cls_val = audits.get("cumulative-layout-shift", {}).get("numericValue")
profile.cumulative_layout_shift = cls_val
si = audits.get("speed-index", {}).get("numericValue")
profile.speed_index = int(si) if si else None
tti = audits.get("interactive", {}).get("numericValue")
profile.time_to_interactive_ms = int(tti) if tti else None
# Mobile-friendly check
viewport = audits.get("viewport", {}).get("score")
profile.viewport_configured = viewport == 1 if viewport is not None else None
profile.is_mobile_friendly = profile.viewport_configured
profile.scan_strategy = "mobile"
prospect.last_perf_scan_at = datetime.now(UTC)
db.commit()
return profile
except Exception as e:
logger.error("Performance scan failed for %s: %s", domain, e)
prospect.last_perf_scan_at = datetime.now(UTC)
db.commit()
return None
def scrape_contacts(self, db: Session, prospect: Prospect) -> list[ProspectContact]:
"""Scrape email and phone contacts from prospect's website."""
domain = prospect.domain_name
if not domain or not prospect.has_website:
return []
scheme = "https" if prospect.uses_https else "http"
base_url = f"{scheme}://{domain}"
paths = ["", "/contact", "/kontakt", "/impressum", "/about"]
email_pattern = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
phone_pattern = re.compile(r"(?:\+352|00352)?[\s.-]?\d{2,3}[\s.-]?\d{2,3}[\s.-]?\d{2,3}")
false_positive_domains = {"example.com", "email.com", "domain.com", "wordpress.org", "w3.org", "schema.org"}
found_emails = set()
found_phones = set()
contacts = []
session = requests.Session()
session.verify = False # noqa: SEC047 passive scan, not sending sensitive data
session.headers.update({"User-Agent": "Mozilla/5.0 (compatible; OrionBot/1.0)"})
for path in paths:
try:
url = base_url + path
response = session.get(url, timeout=config.http_timeout, allow_redirects=True)
if response.status_code != 200:
continue
html = response.text
for email in email_pattern.findall(html):
email_domain = email.split("@")[1].lower()
if email_domain not in false_positive_domains and email not in found_emails:
found_emails.add(email)
contacts.append(ProspectContact(
prospect_id=prospect.id,
contact_type="email",
value=email.lower(),
source_url=url,
source_element="regex",
))
for phone in phone_pattern.findall(html):
phone_clean = re.sub(r"[\s.-]", "", phone)
if len(phone_clean) >= 8 and phone_clean not in found_phones:
found_phones.add(phone_clean)
contacts.append(ProspectContact(
prospect_id=prospect.id,
contact_type="phone",
value=phone_clean,
source_url=url,
source_element="regex",
))
except Exception as e:
logger.debug("Contact scrape failed for %s%s: %s", domain, path, e)
session.close()
# Save contacts (replace existing auto-scraped ones)
db.query(ProspectContact).filter(
ProspectContact.prospect_id == prospect.id,
ProspectContact.source_element == "regex",
).delete()
for contact in contacts:
db.add(contact)
# Mark first email and phone as primary
if contacts:
for c in contacts:
if c.contact_type == "email":
c.is_primary = True
break
for c in contacts:
if c.contact_type == "phone":
c.is_primary = True
break
prospect.last_contact_scrape_at = datetime.now(UTC)
db.commit()
return contacts
def _detect_cms(self, html: str) -> str | None:
for cms, patterns in CMS_PATTERNS.items():
for pattern in patterns:
if re.search(pattern, html):
return cms
return None
def _detect_js_framework(self, html: str) -> str | None:
for framework, patterns in JS_FRAMEWORK_PATTERNS.items():
for pattern in patterns:
if re.search(pattern, html):
return framework
return None
def _detect_analytics(self, html: str) -> str | None:
found = []
for tool, patterns in ANALYTICS_PATTERNS.items():
for pattern in patterns:
if re.search(pattern, html):
found.append(tool)
break
return ",".join(found) if found else None
enrichment_service = EnrichmentService()

View File

@@ -0,0 +1,77 @@
# app/modules/prospecting/services/interaction_service.py
"""
Interaction tracking service.
Manages logging of all touchpoints with prospects:
calls, emails, meetings, visits, notes, etc.
"""
import logging
from datetime import date
from sqlalchemy.orm import Session
from app.modules.prospecting.models import ProspectInteraction
logger = logging.getLogger(__name__)
class InteractionService:
"""Service for prospect interaction management."""
def create(
self,
db: Session,
prospect_id: int,
user_id: int,
data: dict,
) -> ProspectInteraction:
"""Log a new interaction."""
interaction = ProspectInteraction(
prospect_id=prospect_id,
interaction_type=data["interaction_type"],
subject=data.get("subject"),
notes=data.get("notes"),
outcome=data.get("outcome"),
next_action=data.get("next_action"),
next_action_date=data.get("next_action_date"),
created_by_user_id=user_id,
)
db.add(interaction)
db.commit()
db.refresh(interaction)
logger.info("Interaction logged for prospect %d: %s", prospect_id, data["interaction_type"])
return interaction
def get_for_prospect(
self,
db: Session,
prospect_id: int,
) -> list[ProspectInteraction]:
"""Get all interactions for a prospect, newest first."""
return (
db.query(ProspectInteraction)
.filter(ProspectInteraction.prospect_id == prospect_id)
.order_by(ProspectInteraction.created_at.desc())
.all()
)
def get_upcoming_actions(
self,
db: Session,
*,
before_date: date | None = None,
) -> list[ProspectInteraction]:
"""Get interactions with upcoming follow-up actions."""
query = db.query(ProspectInteraction).filter(
ProspectInteraction.next_action.isnot(None),
ProspectInteraction.next_action_date.isnot(None),
)
if before_date:
query = query.filter(ProspectInteraction.next_action_date <= before_date)
return query.order_by(ProspectInteraction.next_action_date.asc()).all()
interaction_service = InteractionService()

View File

@@ -0,0 +1,153 @@
# app/modules/prospecting/services/lead_service.py
"""
Lead filtering and export service.
Provides filtered views of scored prospects and CSV export capabilities.
"""
import csv
import io
import json
import logging
from sqlalchemy.orm import Session, joinedload
from app.modules.prospecting.models import (
Prospect,
ProspectScore,
)
logger = logging.getLogger(__name__)
class LeadService:
"""Service for lead retrieval and export."""
def get_leads(
self,
db: Session,
*,
page: int = 1,
per_page: int = 20,
min_score: int = 0,
max_score: int = 100,
lead_tier: str | None = None,
channel: str | None = None,
has_email: bool | None = None,
has_phone: bool | None = None,
reason_flag: str | None = None,
) -> tuple[list[dict], int]:
"""Get filtered leads with scores."""
query = (
db.query(Prospect)
.join(ProspectScore)
.options(
joinedload(Prospect.score),
joinedload(Prospect.contacts),
)
.filter(
ProspectScore.score >= min_score,
ProspectScore.score <= max_score,
)
)
if lead_tier:
query = query.filter(ProspectScore.lead_tier == lead_tier)
if channel:
query = query.filter(Prospect.channel == channel)
if reason_flag:
query = query.filter(ProspectScore.reason_flags.contains(reason_flag))
total = query.count()
prospects = (
query.order_by(ProspectScore.score.desc())
.offset((page - 1) * per_page)
.limit(per_page)
.all()
)
leads = []
for p in prospects:
contacts = p.contacts or []
primary_email = next((c.value for c in contacts if c.contact_type == "email" and c.is_primary), None)
if not primary_email:
primary_email = next((c.value for c in contacts if c.contact_type == "email"), None)
primary_phone = next((c.value for c in contacts if c.contact_type == "phone" and c.is_primary), None)
if not primary_phone:
primary_phone = next((c.value for c in contacts if c.contact_type == "phone"), None)
# Filter by contact availability if requested
if has_email is True and not primary_email:
continue
if has_email is False and primary_email:
continue
if has_phone is True and not primary_phone:
continue
if has_phone is False and primary_phone:
continue
reason_flags = json.loads(p.score.reason_flags) if p.score and p.score.reason_flags else []
leads.append({
"id": p.id,
"business_name": p.business_name,
"domain_name": p.domain_name,
"channel": str(p.channel.value) if p.channel else None,
"score": p.score.score if p.score else 0,
"lead_tier": p.score.lead_tier if p.score else None,
"reason_flags": reason_flags,
"primary_email": primary_email,
"primary_phone": primary_phone,
})
return leads, total
def get_top_priority(self, db: Session, limit: int = 50) -> list[dict]:
leads, _ = self.get_leads(db, min_score=70, per_page=limit)
return leads
def get_quick_wins(self, db: Session, limit: int = 50) -> list[dict]:
leads, _ = self.get_leads(db, min_score=50, max_score=69, per_page=limit)
return leads
def export_csv(
self,
db: Session,
*,
min_score: int = 0,
lead_tier: str | None = None,
channel: str | None = None,
limit: int = 1000,
) -> str:
"""Export leads to CSV string."""
leads, _ = self.get_leads(
db,
min_score=min_score,
lead_tier=lead_tier,
channel=channel,
per_page=limit,
)
output = io.StringIO()
writer = csv.writer(output)
writer.writerow([
"Domain", "Business Name", "Channel", "Score", "Tier",
"Issues", "Email", "Phone",
])
for lead in leads:
writer.writerow([
lead["domain_name"] or "",
lead["business_name"] or "",
lead["channel"] or "",
lead["score"],
lead["lead_tier"] or "",
"; ".join(lead["reason_flags"]),
lead["primary_email"] or "",
lead["primary_phone"] or "",
])
return output.getvalue()
lead_service = LeadService()

View File

@@ -0,0 +1,235 @@
# app/modules/prospecting/services/prospect_service.py
"""
Prospect CRUD service.
Manages creation, retrieval, update, and deletion of prospects.
Supports both digital (domain scan) and offline (manual capture) channels.
"""
import json
import logging
from sqlalchemy import func, or_
from sqlalchemy.orm import Session, joinedload
from app.modules.prospecting.exceptions import (
DuplicateDomainException,
ProspectNotFoundException,
)
from app.modules.prospecting.models import (
Prospect,
ProspectChannel,
ProspectContact,
ProspectScore,
ProspectStatus,
)
logger = logging.getLogger(__name__)
class ProspectService:
"""Service for prospect CRUD operations."""
def get_by_id(self, db: Session, prospect_id: int) -> Prospect:
prospect = (
db.query(Prospect)
.options(
joinedload(Prospect.tech_profile),
joinedload(Prospect.performance_profile),
joinedload(Prospect.score),
joinedload(Prospect.contacts),
)
.filter(Prospect.id == prospect_id)
.first()
)
if not prospect:
raise ProspectNotFoundException(str(prospect_id))
return prospect
def get_by_domain(self, db: Session, domain_name: str) -> Prospect | None:
return db.query(Prospect).filter(Prospect.domain_name == domain_name).first()
def get_all(
self,
db: Session,
*,
page: int = 1,
per_page: int = 20,
search: str | None = None,
channel: str | None = None,
status: str | None = None,
tier: str | None = None,
city: str | None = None,
has_email: bool | None = None,
has_phone: bool | None = None,
) -> tuple[list[Prospect], int]:
query = db.query(Prospect).options(
joinedload(Prospect.score),
joinedload(Prospect.contacts),
)
if search:
query = query.filter(
or_(
Prospect.domain_name.ilike(f"%{search}%"),
Prospect.business_name.ilike(f"%{search}%"),
)
)
if channel:
query = query.filter(Prospect.channel == channel)
if status:
query = query.filter(Prospect.status == status)
if city:
query = query.filter(Prospect.city.ilike(f"%{city}%"))
if tier:
query = query.join(ProspectScore).filter(ProspectScore.lead_tier == tier)
total = query.count()
prospects = (
query.order_by(Prospect.created_at.desc())
.offset((page - 1) * per_page)
.limit(per_page)
.all()
)
return prospects, total
def create(self, db: Session, data: dict, captured_by_user_id: int | None = None) -> Prospect:
channel = data.get("channel", "digital")
if channel == "digital" and data.get("domain_name"):
existing = self.get_by_domain(db, data["domain_name"])
if existing:
raise DuplicateDomainException(data["domain_name"])
tags = data.get("tags")
if isinstance(tags, list):
tags = json.dumps(tags)
prospect = Prospect(
channel=ProspectChannel(channel),
business_name=data.get("business_name"),
domain_name=data.get("domain_name"),
status=ProspectStatus.PENDING,
source=data.get("source", "domain_scan" if channel == "digital" else "manual"),
address=data.get("address"),
city=data.get("city"),
postal_code=data.get("postal_code"),
country=data.get("country", "LU"),
notes=data.get("notes"),
tags=tags,
captured_by_user_id=captured_by_user_id,
location_lat=data.get("location_lat"),
location_lng=data.get("location_lng"),
)
db.add(prospect)
db.flush()
# Create inline contacts if provided
contacts = data.get("contacts", [])
for c in contacts:
contact = ProspectContact(
prospect_id=prospect.id,
contact_type=c["contact_type"],
value=c["value"],
label=c.get("label"),
is_primary=c.get("is_primary", False),
)
db.add(contact)
db.commit()
db.refresh(prospect)
logger.info("Created prospect: %s (channel=%s)", prospect.display_name, channel)
return prospect
def create_bulk(self, db: Session, domain_names: list[str], source: str = "csv_import") -> tuple[int, int]:
created = 0
skipped = 0
for name in domain_names:
name = name.strip().lower()
if not name:
continue
existing = self.get_by_domain(db, name)
if existing:
skipped += 1
continue
prospect = Prospect(
channel=ProspectChannel.DIGITAL,
domain_name=name,
source=source,
)
db.add(prospect)
created += 1
db.commit()
logger.info("Bulk import: %d created, %d skipped", created, skipped)
return created, skipped
def update(self, db: Session, prospect_id: int, data: dict) -> Prospect:
prospect = self.get_by_id(db, prospect_id)
for field in ["business_name", "status", "source", "address", "city", "postal_code", "notes"]:
if field in data and data[field] is not None:
setattr(prospect, field, data[field])
if "tags" in data:
tags = data["tags"]
if isinstance(tags, list):
tags = json.dumps(tags)
prospect.tags = tags
db.commit()
db.refresh(prospect)
return prospect
def delete(self, db: Session, prospect_id: int) -> bool:
prospect = self.get_by_id(db, prospect_id)
db.delete(prospect)
db.commit()
logger.info("Deleted prospect: %d", prospect_id)
return True
def get_pending_http_check(self, db: Session, limit: int = 100) -> list[Prospect]:
return (
db.query(Prospect)
.filter(
Prospect.channel == ProspectChannel.DIGITAL,
Prospect.domain_name.isnot(None),
Prospect.last_http_check_at.is_(None),
)
.limit(limit)
.all()
)
def get_pending_tech_scan(self, db: Session, limit: int = 100) -> list[Prospect]:
return (
db.query(Prospect)
.filter(
Prospect.has_website.is_(True),
Prospect.last_tech_scan_at.is_(None),
)
.limit(limit)
.all()
)
def get_pending_performance_scan(self, db: Session, limit: int = 100) -> list[Prospect]:
return (
db.query(Prospect)
.filter(
Prospect.has_website.is_(True),
Prospect.last_perf_scan_at.is_(None),
)
.limit(limit)
.all()
)
def count_by_status(self, db: Session) -> dict[str, int]:
results = db.query(Prospect.status, func.count(Prospect.id)).group_by(Prospect.status).all()
return {status.value if hasattr(status, "value") else str(status): count for status, count in results}
def count_by_channel(self, db: Session) -> dict[str, int]:
results = db.query(Prospect.channel, func.count(Prospect.id)).group_by(Prospect.channel).all()
return {channel.value if hasattr(channel, "value") else str(channel): count for channel, count in results}
prospect_service = ProspectService()

View File

@@ -0,0 +1,253 @@
# app/modules/prospecting/services/scoring_service.py
"""
Opportunity scoring service.
Migrated from marketing-.lu-domains/app/services/scoring_service.py.
Scores prospects on a 0-100 scale across 4 categories:
- Technical Health (max 40pts)
- Modernity (max 25pts)
- Business Value (max 25pts)
- Engagement (max 10pts)
Extended for offline leads with additional scoring factors.
"""
import json
import logging
from sqlalchemy.orm import Session
from app.modules.prospecting.models import (
Prospect,
ProspectChannel,
ProspectScore,
)
logger = logging.getLogger(__name__)
# Outdated CMS list
OUTDATED_CMS = {"drupal", "joomla", "typo3"}
class ScoringService:
"""Service for computing opportunity scores."""
def compute_score(self, db: Session, prospect: Prospect) -> ProspectScore:
"""Compute or update the opportunity score for a prospect."""
tech_health = 0
modernity = 0
business_value = 0
engagement = 0
reason_flags = []
breakdown = {}
if prospect.channel == ProspectChannel.OFFLINE:
# Offline lead scoring
tech_health, modernity, business_value, engagement, reason_flags, breakdown = (
self._score_offline(prospect)
)
else:
# Digital lead scoring
tech_health, modernity, business_value, engagement, reason_flags, breakdown = (
self._score_digital(prospect)
)
total = min(tech_health + modernity + business_value + engagement, 100)
# Determine lead tier
if total >= 70:
lead_tier = "top_priority"
elif total >= 50:
lead_tier = "quick_win"
elif total >= 30:
lead_tier = "strategic"
else:
lead_tier = "low_priority"
# Upsert score
score = prospect.score
if not score:
score = ProspectScore(prospect_id=prospect.id)
db.add(score)
score.score = total
score.technical_health_score = tech_health
score.modernity_score = modernity
score.business_value_score = business_value
score.engagement_score = engagement
score.reason_flags = json.dumps(reason_flags)
score.score_breakdown = json.dumps(breakdown)
score.lead_tier = lead_tier
db.commit()
logger.info("Scored prospect %d: %d (%s)", prospect.id, total, lead_tier)
return score
def compute_all(self, db: Session, limit: int | None = None) -> int:
"""Compute scores for all prospects. Returns count of scored prospects."""
query = db.query(Prospect)
if limit:
query = query.limit(limit)
count = 0
for prospect in query.all():
self.compute_score(db, prospect)
count += 1
return count
def _score_digital(self, prospect: Prospect) -> tuple:
"""Score a digital (domain-scanned) prospect."""
tech_health = 0
modernity = 0
business_value = 0
engagement = 0
flags = []
breakdown = {}
# === TECHNICAL HEALTH (max 40) ===
if not prospect.uses_https:
tech_health += 15
flags.append("no_ssl")
breakdown["no_ssl"] = 15
perf = prospect.performance_profile
if perf and perf.performance_score is not None:
if perf.performance_score < 30:
tech_health += 15
flags.append("very_slow")
breakdown["very_slow"] = 15
elif perf.performance_score < 50:
tech_health += 10
flags.append("slow")
breakdown["slow"] = 10
elif perf.performance_score < 70:
tech_health += 5
flags.append("moderate_speed")
breakdown["moderate_speed"] = 5
if perf.is_mobile_friendly is False:
tech_health += 10
flags.append("not_mobile_friendly")
breakdown["not_mobile_friendly"] = 10
tech_health = min(tech_health, 40)
# === MODERNITY (max 25) ===
tp = prospect.tech_profile
if tp:
if tp.cms and tp.cms.lower() in OUTDATED_CMS:
modernity += 15
flags.append("outdated_cms")
breakdown["outdated_cms"] = 15
elif tp.cms is None and prospect.has_website:
modernity += 5
flags.append("unknown_cms")
breakdown["unknown_cms"] = 5
if tp.js_framework and tp.js_framework.lower() == "jquery":
modernity += 5
flags.append("legacy_js")
breakdown["legacy_js"] = 5
if not tp.analytics:
modernity += 5
flags.append("no_analytics")
breakdown["no_analytics"] = 5
modernity = min(modernity, 25)
# === BUSINESS VALUE (max 25) ===
if prospect.has_website:
business_value += 10
breakdown["has_website"] = 10
if tp and tp.ecommerce_platform:
business_value += 10
breakdown["has_ecommerce"] = 10
if prospect.domain_name and len(prospect.domain_name) <= 15:
business_value += 5
breakdown["short_domain"] = 5
business_value = min(business_value, 25)
# === ENGAGEMENT (max 10) ===
contacts = prospect.contacts or []
if contacts:
engagement += 5
flags.append("has_contacts")
breakdown["has_contacts"] = 5
has_email = any(c.contact_type == "email" for c in contacts)
has_phone = any(c.contact_type == "phone" for c in contacts)
if has_email:
engagement += 3
flags.append("has_email")
breakdown["has_email"] = 3
if has_phone:
engagement += 2
flags.append("has_phone")
breakdown["has_phone"] = 2
engagement = min(engagement, 10)
return tech_health, modernity, business_value, engagement, flags, breakdown
def _score_offline(self, prospect: Prospect) -> tuple:
"""Score an offline (manually captured) prospect."""
tech_health = 0
modernity = 0
business_value = 0
engagement = 0
flags = []
breakdown = {}
# Offline prospects without a website are high opportunity
if not prospect.has_website:
tech_health = 30
modernity = 20
business_value = 20
flags.extend(["no_website"])
breakdown["no_website_tech"] = 30
breakdown["no_website_mod"] = 20
breakdown["no_website_biz"] = 20
# Check for gmail usage (from contacts)
contacts = prospect.contacts or []
has_gmail = any(
c.contact_type == "email" and "@gmail." in c.value.lower()
for c in contacts
)
if has_gmail:
modernity += 10
flags.append("uses_gmail")
breakdown["uses_gmail"] = 10
modernity = min(modernity, 25)
# Engagement - offline leads met in person are warm
if prospect.source in ("street", "networking_event", "referral"):
engagement += 5
flags.append("met_in_person")
breakdown["met_in_person"] = 5
if contacts:
has_email = any(c.contact_type == "email" for c in contacts)
has_phone = any(c.contact_type == "phone" for c in contacts)
if has_email:
engagement += 3
flags.append("has_email")
breakdown["has_email"] = 3
if has_phone:
engagement += 2
flags.append("has_phone")
breakdown["has_phone"] = 2
engagement = min(engagement, 10)
return tech_health, modernity, business_value, engagement, flags, breakdown
scoring_service = ScoringService()

View File

@@ -0,0 +1,99 @@
# app/modules/prospecting/services/stats_service.py
"""
Statistics service for the prospecting dashboard.
"""
import logging
from sqlalchemy import func
from sqlalchemy.orm import Session
from app.modules.prospecting.models import (
Prospect,
ProspectChannel,
ProspectScanJob,
ProspectScore,
)
logger = logging.getLogger(__name__)
class StatsService:
"""Service for dashboard statistics and reporting."""
def get_overview(self, db: Session) -> dict:
"""Get overview statistics for the dashboard."""
total = db.query(func.count(Prospect.id)).scalar() or 0
digital = db.query(func.count(Prospect.id)).filter(Prospect.channel == ProspectChannel.DIGITAL).scalar() or 0
offline = db.query(func.count(Prospect.id)).filter(Prospect.channel == ProspectChannel.OFFLINE).scalar() or 0
with_website = db.query(func.count(Prospect.id)).filter(Prospect.has_website.is_(True)).scalar() or 0
with_https = db.query(func.count(Prospect.id)).filter(Prospect.uses_https.is_(True)).scalar() or 0
scored = db.query(func.count(ProspectScore.id)).scalar() or 0
avg_score = db.query(func.avg(ProspectScore.score)).scalar()
# Leads by tier
tier_results = (
db.query(ProspectScore.lead_tier, func.count(ProspectScore.id))
.group_by(ProspectScore.lead_tier)
.all()
)
leads_by_tier = {tier: count for tier, count in tier_results if tier}
# Common issues (from reason_flags JSON)
# Simplified: count scored prospects per tier
top_priority = leads_by_tier.get("top_priority", 0)
return {
"total_prospects": total,
"digital_count": digital,
"offline_count": offline,
"with_website": with_website,
"with_https": with_https,
"scored": scored,
"avg_score": round(avg_score, 1) if avg_score else None,
"top_priority": top_priority,
"leads_by_tier": leads_by_tier,
"common_issues": self._get_common_issues(db),
}
def get_scan_jobs(
self,
db: Session,
*,
page: int = 1,
per_page: int = 20,
status: str | None = None,
) -> tuple[list[ProspectScanJob], int]:
"""Get paginated scan jobs."""
query = db.query(ProspectScanJob)
if status:
query = query.filter(ProspectScanJob.status == status)
total = query.count()
jobs = (
query.order_by(ProspectScanJob.created_at.desc())
.offset((page - 1) * per_page)
.limit(per_page)
.all()
)
return jobs, total
def _get_common_issues(self, db: Session) -> list[dict]:
"""Extract common issue flags from scored prospects."""
scores = db.query(ProspectScore.reason_flags).filter(ProspectScore.reason_flags.isnot(None)).all()
import json
flag_counts: dict[str, int] = {}
for (flags_json,) in scores:
try:
flags = json.loads(flags_json)
for flag in flags:
flag_counts[flag] = flag_counts.get(flag, 0) + 1
except (json.JSONDecodeError, TypeError):
continue
sorted_flags = sorted(flag_counts.items(), key=lambda x: x[1], reverse=True)
return [{"flag": flag, "count": count} for flag, count in sorted_flags[:10]]
stats_service = StatsService()