Files
orion/app/modules/prospecting/templates/prospecting/admin/scan-jobs.html
Samir Boulahtit 1828ac85eb feat(prospecting): add content scraping for POC builder (Workstream 3A)
- New scrape_content() method in enrichment_service: extracts meta
  description, H1/H2 headings, paragraphs, images (filtered for size),
  social links, service items, and detected languages using BeautifulSoup
- Scans 6 pages per prospect: /, /about, /a-propos, /services,
  /nos-services, /contact
- Results stored as JSON in prospect.scraped_content_json
- New endpoints: POST /content-scrape/{id} and /content-scrape/batch
- Added to full_enrichment pipeline (Step 5, before security audit)
- CONTENT_SCRAPE job type for scan-jobs tracking
- "Content Scrape" batch button on scan-jobs page
- Add beautifulsoup4 to requirements.txt

Tested on batirenovation-strasbourg.fr: extracted 30 headings,
21 paragraphs, 13 images.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-01 22:26:56 +02:00

103 lines
6.3 KiB
HTML

{% extends "admin/base.html" %}
{% from 'shared/macros/headers.html' import page_header %}
{% from 'shared/macros/alerts.html' import loading_state, error_state %}
{% from 'shared/macros/tables.html' import table_wrapper, table_header, table_empty_state %}
{% from 'shared/macros/pagination.html' import pagination %}
{% block title %}Scan Jobs{% endblock %}
{% block alpine_data %}scanJobs(){% endblock %}
{% block content %}
{{ page_header('Scan Jobs') }}
<!-- Quick Actions -->
<div class="mb-6 p-4 bg-white rounded-lg shadow-xs dark:bg-gray-800">
<div class="flex flex-wrap gap-3">
<button type="button" @click="startBatchJob('http_check')"
class="inline-flex items-center px-4 py-2 text-sm font-medium leading-5 text-white transition-colors duration-150 bg-blue-600 border border-transparent rounded-lg hover:bg-blue-700 focus:outline-none">
<span x-html="$icon('globe', 'w-4 h-4 mr-2')"></span>
HTTP Check
</button>
<button type="button" @click="startBatchJob('tech_scan')"
class="inline-flex items-center px-4 py-2 text-sm font-medium leading-5 text-white transition-colors duration-150 bg-green-600 border border-transparent rounded-lg hover:bg-green-700 focus:outline-none">
<span x-html="$icon('code', 'w-4 h-4 mr-2')"></span>
Tech Scan
</button>
<button type="button" @click="startBatchJob('performance_scan')"
class="inline-flex items-center px-4 py-2 text-sm font-medium leading-5 text-white transition-colors duration-150 bg-orange-600 border border-transparent rounded-lg hover:bg-orange-700 focus:outline-none">
<span x-html="$icon('chart-bar', 'w-4 h-4 mr-2')"></span>
Performance Scan
</button>
<button type="button" @click="startBatchJob('contact_scrape')"
class="inline-flex items-center px-4 py-2 text-sm font-medium leading-5 text-white transition-colors duration-150 bg-purple-600 border border-transparent rounded-lg hover:bg-purple-700 focus:outline-none">
<span x-html="$icon('mail', 'w-4 h-4 mr-2')"></span>
Contact Scrape
</button>
<button type="button" @click="startBatchJob('content_scrape')"
class="inline-flex items-center px-4 py-2 text-sm font-medium leading-5 text-white transition-colors duration-150 bg-teal-600 border border-transparent rounded-lg hover:bg-teal-700 focus:outline-none">
<span x-html="$icon('document-text', 'w-4 h-4 mr-2')"></span>
Content Scrape
</button>
<button type="button" @click="startBatchJob('security_audit')"
class="inline-flex items-center px-4 py-2 text-sm font-medium leading-5 text-white transition-colors duration-150 bg-yellow-600 border border-transparent rounded-lg hover:bg-yellow-700 focus:outline-none">
<span x-html="$icon('shield-check', 'w-4 h-4 mr-2')"></span>
Security Audit
</button>
<button type="button" @click="startBatchJob('score_compute')"
class="inline-flex items-center px-4 py-2 text-sm font-medium leading-5 text-white transition-colors duration-150 bg-red-600 border border-transparent rounded-lg hover:bg-red-700 focus:outline-none">
<span x-html="$icon('cursor-click', 'w-4 h-4 mr-2')"></span>
Compute Scores
</button>
</div>
</div>
{{ loading_state('Loading scan jobs...') }}
{{ error_state('Error loading scan jobs') }}
<!-- Jobs Table -->
<div x-show="!loading && !error">
{% call table_wrapper() %}
{{ table_header(['Job Type', 'Status', 'Progress', 'Started', 'Duration', 'Result']) }}
<tbody class="bg-white divide-y dark:divide-gray-700 dark:bg-gray-800">
{{ table_empty_state(6, title='No scan jobs found', x_message="'Run a batch job above to get started'", show_condition='jobs.length === 0', icon='code') }}
<template x-for="job in jobs" :key="job.id">
<tr class="text-gray-700 dark:text-gray-400 hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors">
<td class="px-4 py-3 text-sm font-semibold capitalize" x-text="job.job_type.replace(/_/g, ' ')"></td>
<td class="px-4 py-3">
<span class="px-2.5 py-0.5 text-xs font-medium rounded-full"
:class="jobStatusClass(job.status)"
x-text="job.status"></span>
</td>
<td class="px-4 py-3">
<div class="flex items-center space-x-2">
<div class="w-24 h-2 bg-gray-200 rounded-full dark:bg-gray-700">
<div class="h-2 rounded-full transition-all duration-300"
:class="job.status === 'completed' ? 'bg-green-500' : job.status === 'failed' ? 'bg-red-500' : 'bg-blue-500'"
:style="'width: ' + (job.progress_percent || 0) + '%'"></div>
</div>
<span class="text-xs text-gray-500" x-text="job.processed_items + '/' + job.total_items"></span>
</div>
</td>
<td class="px-4 py-3 text-xs" x-text="job.started_at ? new Date(job.started_at).toLocaleString() : '—'"></td>
<td class="px-4 py-3 text-xs" x-text="formatDuration(job)"></td>
<td class="px-4 py-3 text-xs">
<div class="flex items-center space-x-2">
<span x-show="job.failed_items > 0" class="text-red-600 dark:text-red-400" x-text="job.failed_items + ' failed'"></span>
<span x-show="job.skipped_items > 0" class="text-yellow-600 dark:text-yellow-400" x-text="job.skipped_items + ' skipped'"></span>
<span x-show="!job.failed_items && !job.skipped_items && job.status === 'completed'" class="text-green-600 dark:text-green-400">OK</span>
</div>
</td>
</tr>
</template>
</tbody>
{% endcall %}
{{ pagination() }}
</div>
{% endblock %}
{% block extra_scripts %}
<script defer src="{{ url_for('prospecting_static', path='admin/js/scan-jobs.js') }}"></script>
{% endblock %}