Files
orion/app/modules/prospecting/static/admin/js/scan-jobs.js
Samir Boulahtit 1828ac85eb feat(prospecting): add content scraping for POC builder (Workstream 3A)
- New scrape_content() method in enrichment_service: extracts meta
  description, H1/H2 headings, paragraphs, images (filtered for size),
  social links, service items, and detected languages using BeautifulSoup
- Scans 6 pages per prospect: /, /about, /a-propos, /services,
  /nos-services, /contact
- Results stored as JSON in prospect.scraped_content_json
- New endpoints: POST /content-scrape/{id} and /content-scrape/batch
- Added to full_enrichment pipeline (Step 5, before security audit)
- CONTENT_SCRAPE job type for scan-jobs tracking
- "Content Scrape" batch button on scan-jobs page
- Add beautifulsoup4 to requirements.txt

Tested on batirenovation-strasbourg.fr: extracted 30 headings,
21 paragraphs, 13 images.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-01 22:26:56 +02:00

140 lines
4.9 KiB
JavaScript

// static/admin/js/scan-jobs.js
const jobsLog = window.LogConfig.createLogger('prospecting-scan-jobs');
function scanJobs() {
return {
...data(),
currentPage: 'scan-jobs',
jobs: [],
loading: true,
error: null,
pagination: { page: 1, per_page: 20, total: 0, pages: 0 },
async init() {
await I18n.loadModule('prospecting');
if (window._scanJobsInit) return;
window._scanJobsInit = true;
if (window.PlatformSettings) {
this.pagination.per_page = await window.PlatformSettings.getRowsPerPage();
}
jobsLog.info('Scan jobs initializing');
await this.loadJobs();
},
async loadJobs() {
this.loading = true;
this.error = null;
try {
const params = new URLSearchParams({
page: this.pagination.page,
per_page: this.pagination.per_page,
});
const response = await apiClient.get('/admin/prospecting/stats/jobs?' + params);
this.jobs = response.items || [];
this.pagination.total = response.total || 0;
this.pagination.pages = response.pages || 0;
} catch (err) {
this.error = err.message;
jobsLog.error('Failed to load jobs', err);
} finally {
this.loading = false;
}
},
// Maps button job types to actual API route segments
batchRoutes: {
'http_check': 'http-check',
'tech_scan': 'tech-scan',
'performance_scan': 'performance',
'contact_scrape': 'contacts',
'content_scrape': 'content-scrape',
'security_audit': 'security-audit',
'score_compute': 'score-compute',
},
async startBatchJob(jobType) {
var route = this.batchRoutes[jobType];
if (!route) {
Utils.showToast('Unknown job type: ' + jobType, 'error');
return;
}
try {
await apiClient.post('/admin/prospecting/enrichment/' + route + '/batch');
Utils.showToast(jobType.replace(/_/g, ' ') + ' batch started', 'success');
setTimeout(() => this.loadJobs(), 2000);
} catch (err) {
Utils.showToast('Failed: ' + err.message, 'error');
}
},
get startIndex() {
if (this.pagination.total === 0) return 0;
return (this.pagination.page - 1) * this.pagination.per_page + 1;
},
get endIndex() {
const end = this.pagination.page * this.pagination.per_page;
return end > this.pagination.total ? this.pagination.total : end;
},
get totalPages() {
return this.pagination.pages;
},
get pageNumbers() {
const pages = [];
const total = this.totalPages;
const current = this.pagination.page;
if (total <= 7) { for (let i = 1; i <= total; i++) pages.push(i); return pages; }
pages.push(1);
if (current > 3) pages.push('...');
for (let i = Math.max(2, current - 1); i <= Math.min(total - 1, current + 1); i++) pages.push(i);
if (current < total - 2) pages.push('...');
pages.push(total);
return pages;
},
goToPage(page) {
if (page === '...' || page < 1 || page > this.totalPages) return;
this.pagination.page = page;
this.loadJobs();
},
nextPage() {
if (this.pagination.page < this.totalPages) { this.pagination.page++; this.loadJobs(); }
},
previousPage() {
if (this.pagination.page > 1) { this.pagination.page--; this.loadJobs(); }
},
jobStatusClass(status) {
const classes = {
pending: 'text-yellow-700 bg-yellow-100 dark:text-yellow-100 dark:bg-yellow-700',
running: 'text-blue-700 bg-blue-100 dark:text-blue-100 dark:bg-blue-700',
completed: 'text-green-700 bg-green-100 dark:text-green-100 dark:bg-green-700',
failed: 'text-red-700 bg-red-100 dark:text-red-100 dark:bg-red-700',
cancelled: 'text-gray-700 bg-gray-100 dark:text-gray-100 dark:bg-gray-700',
};
return classes[status] || classes.pending;
},
formatDuration(job) {
if (!job.started_at) return '—';
const start = new Date(job.started_at);
const end = job.completed_at ? new Date(job.completed_at) : new Date();
const seconds = Math.round((end - start) / 1000);
if (seconds < 60) return seconds + 's';
const mins = Math.floor(seconds / 60);
const secs = seconds % 60;
return mins + 'm ' + secs + 's';
},
};
}