From 26b3dc9e3b7ec76510e1bef73be1a1aaf13d228b Mon Sep 17 00:00:00 2001 From: Samir Boulahtit Date: Sun, 21 Dec 2025 20:57:47 +0100 Subject: [PATCH] feat: add unified code quality dashboard with multiple validators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add validator_type field to scans and violations (architecture, security, performance) - Create security validator with SEC-xxx rules - Create performance validator with PERF-xxx rules - Add base validator class for shared functionality - Add validate_all.py script to run all validators - Update code quality service with validator type filtering - Add validator type tabs to dashboard UI - Add validator type filter to violations list - Update stats response with per-validator breakdown - Add security and performance rules documentation - Add chat-bubble icons to icon library 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .performance-rules/_main.yaml | 66 ++ .performance-rules/api.yaml | 135 ++++ .performance-rules/async.yaml | 142 ++++ .performance-rules/caching.yaml | 125 +++ .performance-rules/database.yaml | 223 ++++++ .performance-rules/frontend.yaml | 177 +++++ .performance-rules/memory.yaml | 156 ++++ .security-rules/_main.yaml | 66 ++ .security-rules/api_security.yaml | 66 ++ .security-rules/audit.yaml | 131 ++++ .security-rules/authentication.yaml | 70 ++ .security-rules/cryptography.yaml | 72 ++ .security-rules/data_protection.yaml | 67 ++ .security-rules/injection.yaml | 70 ++ ...d8e9_add_validator_type_to_code_quality.py | 95 +++ app/services/code_quality_service.py | 476 +++++++++--- .../admin/code-quality-violations.html | 41 +- docs/development/performance-rules.md | 570 ++++++++++++++ docs/development/security-rules.md | 560 ++++++++++++++ mkdocs.yml | 3 + models/schema/stats.py | 19 + scripts/base_validator.py | 465 ++++++++++++ scripts/validate_all.py | 218 ++++++ scripts/validate_performance.py | 648 ++++++++++++++++ scripts/validate_security.py | 718 ++++++++++++++++++ static/admin/js/code-quality-violations.js | 4 + static/shared/js/icons.js | 6 +- 27 files changed, 5270 insertions(+), 119 deletions(-) create mode 100644 .performance-rules/_main.yaml create mode 100644 .performance-rules/api.yaml create mode 100644 .performance-rules/async.yaml create mode 100644 .performance-rules/caching.yaml create mode 100644 .performance-rules/database.yaml create mode 100644 .performance-rules/frontend.yaml create mode 100644 .performance-rules/memory.yaml create mode 100644 .security-rules/_main.yaml create mode 100644 .security-rules/api_security.yaml create mode 100644 .security-rules/audit.yaml create mode 100644 .security-rules/authentication.yaml create mode 100644 .security-rules/cryptography.yaml create mode 100644 .security-rules/data_protection.yaml create mode 100644 .security-rules/injection.yaml create mode 100644 alembic/versions/f4a5b6c7d8e9_add_validator_type_to_code_quality.py create mode 100644 docs/development/performance-rules.md create mode 100644 docs/development/security-rules.md create mode 100755 scripts/base_validator.py create mode 100755 scripts/validate_all.py create mode 100755 scripts/validate_performance.py create mode 100755 scripts/validate_security.py diff --git a/.performance-rules/_main.yaml b/.performance-rules/_main.yaml new file mode 100644 index 00000000..abad34f7 --- /dev/null +++ b/.performance-rules/_main.yaml @@ -0,0 +1,66 @@ +# Performance Rules Configuration +# ================================ +# Performance-focused validation rules for the codebase. +# Run with: python scripts/validate_performance.py + +version: "1.0" +project: "letzshop-product-import" + +description: | + Performance validation rules to detect inefficient patterns and ensure + optimal performance across the application. + +principles: + - name: "Minimize Database Queries" + description: "Reduce N+1 queries and optimize data fetching" + - name: "Efficient Data Structures" + description: "Use appropriate data structures for the task" + - name: "Lazy Loading" + description: "Load data only when needed" + - name: "Caching Strategy" + description: "Cache expensive computations and frequent queries" + - name: "Async I/O" + description: "Use async for I/O-bound operations" + +includes: + - database.yaml + - caching.yaml + - api.yaml + - async.yaml + - memory.yaml + - frontend.yaml + +severity_levels: + error: + description: "Critical performance issue that must be fixed" + exit_code: 1 + warning: + description: "Performance concern that should be addressed" + exit_code: 0 + info: + description: "Performance optimization recommendation" + exit_code: 0 + +ignore: + files: + - "**/test_*.py" + - "**/tests/**" + - "**/*_test.py" + - "**/conftest.py" + - "**/migrations/**" + - "**/.venv/**" + - "**/venv/**" + - "**/node_modules/**" + - "**/site/**" + - "**/scripts/**" + - "**/__pycache__/**" + - "**/*.pyc" + patterns: + # Allow patterns in test files + - file: "**/tests/**" + pattern: ".*" + reason: "Test files may have different performance requirements" + # Allow patterns in scripts + - file: "**/scripts/**" + pattern: "\\.all\\(\\)" + reason: "Scripts may need to process all records" diff --git a/.performance-rules/api.yaml b/.performance-rules/api.yaml new file mode 100644 index 00000000..41e49327 --- /dev/null +++ b/.performance-rules/api.yaml @@ -0,0 +1,135 @@ +# API Performance Rules +# ===================== + +api_rules: + - id: "PERF-026" + name: "Pagination required for list endpoints" + severity: error + description: | + All list endpoints must support pagination. + Unbounded lists cause performance issues: + - Memory exhaustion + - Slow response times + - Database locks + file_pattern: "**/api/**/*.py" + anti_patterns: + - '@router\\.get\\([^)]*\\)\\s*\\n(?:(?!limit|skip|offset|page).)*def\\s+\\w+.*:\\s*\\n(?:(?!limit|skip|offset|page).)*return.*\\.all\\(\\)' + required_patterns: + - "limit|skip|offset|page" + example_bad: | + @router.get("/products") + def list_products(db: Session): + return db.query(Product).all() + example_good: | + @router.get("/products") + def list_products( + skip: int = 0, + limit: int = Query(default=20, le=100), + db: Session = Depends(get_db) + ): + return db.query(Product).offset(skip).limit(limit).all() + + - id: "PERF-027" + name: "Reasonable default page sizes" + severity: warning + description: | + Default page sizes should be reasonable: + - Default: 20-50 items + - Maximum: 100-200 items + + Very large page sizes negate pagination benefits. + file_pattern: "**/api/**/*.py" + anti_patterns: + - 'limit.*=.*Query\\([^)]*default\\s*=\\s*[5-9]\\d{2,}' + - 'limit.*=.*Query\\([^)]*le\\s*=\\s*[1-9]\\d{3,}' + example_bad: | + limit: int = Query(default=500, le=10000) + example_good: | + limit: int = Query(default=20, ge=1, le=100) + + - id: "PERF-028" + name: "Response compression" + severity: info + description: | + Enable response compression for large responses: + - GZip or Brotli + - Significant bandwidth savings + - Faster load times + file_pattern: "**/main.py|**/app.py" + suggested_patterns: + - "GZipMiddleware|BrotliMiddleware|compress" + + - id: "PERF-029" + name: "Efficient serialization" + severity: info + description: | + Use Pydantic's response_model for efficient serialization. + Avoid manual dict conversion. + file_pattern: "**/api/**/*.py" + anti_patterns: + - 'return\\s+\\{[^}]*for\\s+\\w+\\s+in' + - 'return\\s+\\[\\{.*for.*in.*\\]' + suggested_patterns: + - "response_model" + + - id: "PERF-030" + name: "Avoid redundant queries in response" + severity: warning + description: | + Don't trigger lazy-loaded relationships during serialization. + Use eager loading or carefully control serialization. + file_pattern: "**/api/**/*.py" + + - id: "PERF-031" + name: "Streaming for large responses" + severity: info + description: | + Use streaming responses for large data: + - File downloads + - Large exports (CSV, JSON) + - Real-time data feeds + file_pattern: "**/api/**/*.py" + suggested_patterns: + - "StreamingResponse|yield|generator" + + - id: "PERF-032" + name: "Conditional requests support" + severity: info + description: | + Support conditional requests to reduce bandwidth: + - ETag validation + - If-None-Match handling + - 304 Not Modified responses + file_pattern: "**/api/**/*.py" + suggested_patterns: + - "ETag|If-None-Match|304" + + - id: "PERF-033" + name: "Field selection support" + severity: info + description: | + Allow clients to request only needed fields. + Reduces response size and serialization cost. + file_pattern: "**/api/**/*.py" + suggested_patterns: + - "fields|include|exclude|sparse" + + - id: "PERF-034" + name: "Avoid deep nesting in responses" + severity: info + description: | + Deeply nested responses are slow to serialize. + Consider flattening or using links instead. + file_pattern: "**/api/**/*.py" + + - id: "PERF-035" + name: "Endpoint response time monitoring" + severity: info + description: | + Monitor API response times: + - Set SLA targets + - Alert on degradation + - Track percentiles (p50, p95, p99) + file_pattern: "**/main.py|**/middleware*.py" + suggested_patterns: + - "prometheus|metrics|timing|latency" diff --git a/.performance-rules/async.yaml b/.performance-rules/async.yaml new file mode 100644 index 00000000..26e79a8e --- /dev/null +++ b/.performance-rules/async.yaml @@ -0,0 +1,142 @@ +# Async & Concurrency Performance Rules +# ===================================== + +async_rules: + - id: "PERF-036" + name: "Async for I/O operations" + severity: info + description: | + Use async for I/O-bound operations: + - Database queries (with async driver) + - External API calls + - File operations + - Network requests + file_pattern: "**/api/**/*.py|**/service*.py" + suggested_patterns: + - "async def|await|asyncio" + + - id: "PERF-037" + name: "Parallel independent operations" + severity: warning + description: | + Multiple independent async operations should run in parallel. + Use asyncio.gather() instead of sequential awaits. + file_pattern: "**/*.py" + anti_patterns: + - 'await\\s+\\w+\\([^)]*\\)\\s*\\n\\s*await\\s+\\w+\\([^)]*\\)\\s*\\n\\s*await\\s+\\w+\\(' + suggested_patterns: + - "asyncio\\.gather|asyncio\\.create_task" + example_bad: | + user = await get_user(user_id) + orders = await get_orders(user_id) + preferences = await get_preferences(user_id) + example_good: | + user, orders, preferences = await asyncio.gather( + get_user(user_id), + get_orders(user_id), + get_preferences(user_id) + ) + + - id: "PERF-038" + name: "Background tasks for slow operations" + severity: warning + description: | + Operations taking > 500ms should run in background: + - Email sending + - Report generation + - External API syncs + - File processing + file_pattern: "**/api/**/*.py" + suggested_patterns: + - "BackgroundTasks|background_task|celery|rq|dramatiq" + + - id: "PERF-039" + name: "Connection pooling for HTTP clients" + severity: warning + description: | + HTTP clients should reuse connections. + Create client once, not per request. + file_pattern: "**/*client*.py|**/service*.py" + anti_patterns: + - 'def\\s+\\w+\\([^)]*\\):\\s*\\n[^}]*requests\\.get\\(' + - 'httpx\\.get\\(' + - 'aiohttp\\.request\\(' + suggested_patterns: + - "httpx\\.AsyncClient|aiohttp\\.ClientSession|requests\\.Session" + example_bad: | + def fetch_data(url): + response = requests.get(url) # New connection each time + example_good: | + # Use a session (connection pool) + async with httpx.AsyncClient() as client: + response = await client.get(url) + + - id: "PERF-040" + name: "Timeout configuration" + severity: error + description: | + All external calls must have timeouts. + Without timeouts, requests can hang indefinitely. + file_pattern: "**/*client*.py|**/service*.py" + context_patterns: + - "requests|httpx|aiohttp|urllib" + required_patterns: + - "timeout" + example_bad: | + response = requests.get(url) + example_good: | + response = requests.get(url, timeout=30) + + - id: "PERF-041" + name: "Connection pool limits" + severity: info + description: | + Configure appropriate connection pool limits: + - max_connections: Total connections + - max_keepalive_connections: Idle connections + - keepalive_expiry: Time before closing idle + file_pattern: "**/*client*.py" + suggested_patterns: + - "max_connections|pool_connections|pool_maxsize" + + - id: "PERF-042" + name: "Retry with backoff" + severity: info + description: | + External calls should retry with exponential backoff. + Prevents cascade failures and respects rate limits. + file_pattern: "**/*client*.py|**/service*.py" + suggested_patterns: + - "retry|backoff|tenacity|Retry" + + - id: "PERF-043" + name: "Circuit breaker pattern" + severity: info + description: | + Use circuit breaker for unreliable external services. + Prevents repeated failures from slowing down the system. + file_pattern: "**/*client*.py" + suggested_patterns: + - "circuit_breaker|CircuitBreaker|pybreaker" + + - id: "PERF-044" + name: "Task queues for heavy processing" + severity: info + description: | + Heavy processing should use task queues: + - Celery + - RQ (Redis Queue) + - Dramatiq + - Huey + file_pattern: "**/tasks/**/*.py" + suggested_patterns: + - "celery|rq|dramatiq|huey|@task" + + - id: "PERF-045" + name: "Worker pool sizing" + severity: info + description: | + Size worker pools appropriately: + - CPU-bound: Number of cores + - I/O-bound: Higher multiplier (2-4x cores) + - Memory-constrained: Based on available RAM diff --git a/.performance-rules/caching.yaml b/.performance-rules/caching.yaml new file mode 100644 index 00000000..603c905e --- /dev/null +++ b/.performance-rules/caching.yaml @@ -0,0 +1,125 @@ +# Caching Performance Rules +# ========================= + +caching_rules: + - id: "PERF-016" + name: "Cache expensive computations" + severity: info + description: | + Computationally expensive operations should be cached: + - Complex aggregations + - External API results + - Template rendering + - Data transformations + file_pattern: "**/service*.py" + suggested_patterns: + - "@cache|@lru_cache|@cached|redis|memcache" + + - id: "PERF-017" + name: "Cache key includes tenant context" + severity: warning + description: | + Multi-tenant cache keys must include vendor_id. + Otherwise, cached data may leak between tenants. + file_pattern: "**/*cache*.py|**/service*.py" + context_patterns: + - "cache|@cached|redis" + required_patterns: + - "vendor_id|tenant" + example_bad: | + @cache.memoize() + def get_products(): + return db.query(Product).all() + example_good: | + @cache.memoize() + def get_products(vendor_id: int): + return db.query(Product).filter_by(vendor_id=vendor_id).all() + + - id: "PERF-018" + name: "Cache TTL configuration" + severity: info + description: | + Cache entries should have appropriate TTL: + - Short TTL (1-5 min): Frequently changing data + - Medium TTL (5-60 min): Semi-static data + - Long TTL (1+ hour): Reference data + file_pattern: "**/*cache*.py" + suggested_patterns: + - "ttl|expire|timeout" + + - id: "PERF-019" + name: "Cache invalidation strategy" + severity: warning + description: | + Define cache invalidation strategy: + - Time-based (TTL) + - Event-based (on data change) + - Manual (admin action) + + Without invalidation, stale data may be served. + file_pattern: "**/*cache*.py|**/service*.py" + suggested_patterns: + - "invalidate|delete|clear|purge" + + - id: "PERF-020" + name: "Response caching headers" + severity: info + description: | + API responses can use HTTP caching headers: + - Cache-Control for browser/CDN caching + - ETag for conditional requests + - Last-Modified for validation + file_pattern: "**/api/**/*.py" + suggested_patterns: + - "Cache-Control|ETag|Last-Modified" + + - id: "PERF-021" + name: "Query result caching" + severity: info + description: | + Frequently accessed, rarely changed data should be cached: + - User preferences + - Configuration settings + - Static reference data + file_pattern: "**/service*.py" + + - id: "PERF-022" + name: "Session-level caching" + severity: info + description: | + Use SQLAlchemy's identity map for request-scoped caching. + Avoid re-fetching the same entity within a request. + file_pattern: "**/service*.py" + + - id: "PERF-023" + name: "Distributed cache for scalability" + severity: info + description: | + For multi-instance deployments, use distributed cache: + - Redis + - Memcached + - Database-backed cache + + Local caches don't work across instances. + file_pattern: "**/config*.py" + suggested_patterns: + - "redis|memcache|CACHE_TYPE" + + - id: "PERF-024" + name: "Cache warming strategy" + severity: info + description: | + Pre-warm cache for predictable high-traffic patterns: + - On application startup + - Before marketing campaigns + - After cache flush + + - id: "PERF-025" + name: "Monitor cache hit rates" + severity: info + description: | + Track cache performance: + - Hit rate (should be > 80%) + - Miss penalty (time saved) + - Memory usage + - Eviction rate diff --git a/.performance-rules/database.yaml b/.performance-rules/database.yaml new file mode 100644 index 00000000..fc71acce --- /dev/null +++ b/.performance-rules/database.yaml @@ -0,0 +1,223 @@ +# Database Performance Rules +# ========================== + +database_rules: + - id: "PERF-001" + name: "N+1 query detection" + severity: warning + description: | + Accessing relationships in loops causes N+1 queries. + For each item in a list, a separate query is executed. + + Solutions: + - joinedload(): Eager load with JOIN + - selectinload(): Eager load with IN clause + - subqueryload(): Eager load with subquery + file_pattern: "**/service*.py|**/api/**/*.py" + anti_patterns: + - 'for\s+\w+\s+in\s+\w+\.all\(\):\s*\n[^}]*\.\w+\.\w+' + suggested_patterns: + - "joinedload|selectinload|subqueryload" + example_bad: | + orders = db.query(Order).all() + for order in orders: + customer_name = order.customer.name # N+1 query! + example_good: | + orders = db.query(Order).options( + joinedload(Order.customer) + ).all() + for order in orders: + customer_name = order.customer.name # Already loaded + + - id: "PERF-002" + name: "Eager loading for known relationships" + severity: info + description: | + When you always need related data, use eager loading + to reduce the number of database round trips. + file_pattern: "**/service*.py" + suggested_patterns: + - "joinedload|selectinload|subqueryload" + + - id: "PERF-003" + name: "Query result limiting" + severity: warning + description: | + All list queries should have pagination or limits. + Unbounded queries can cause memory issues and slow responses. + file_pattern: "**/service*.py|**/api/**/*.py" + anti_patterns: + - '\\.all\\(\\)(?![^\\n]*limit|[^\\n]*\\[:)' + exclude_patterns: + - "# noqa: PERF-003" + - "# bounded query" + - ".filter(" + suggested_patterns: + - "limit|offset|skip|paginate" + example_bad: | + all_products = db.query(Product).all() + example_good: | + products = db.query(Product).limit(100).all() + # Or with pagination + products = db.query(Product).offset(skip).limit(limit).all() + + - id: "PERF-004" + name: "Index usage for filtered columns" + severity: info + description: | + Columns frequently used in WHERE clauses should have indexes: + - Foreign keys (vendor_id, customer_id) + - Status fields + - Date fields used for filtering + - Boolean flags used for filtering + file_pattern: "**/models/database/*.py" + suggested_patterns: + - "index=True|Index\\(" + + - id: "PERF-005" + name: "Select only needed columns" + severity: info + description: | + For large tables, select only the columns you need. + Use .with_entities() or load_only() to reduce data transfer. + file_pattern: "**/service*.py" + suggested_patterns: + - "with_entities|load_only|defer" + example_good: | + # Only load id and name columns + products = db.query(Product).options( + load_only(Product.id, Product.name) + ).all() + + - id: "PERF-006" + name: "Bulk operations for multiple records" + severity: warning + description: | + Use bulk operations instead of individual operations in loops: + - bulk_insert_mappings() for inserts + - bulk_update_mappings() for updates + - add_all() for ORM inserts + file_pattern: "**/service*.py" + anti_patterns: + - 'for\\s+\\w+\\s+in\\s+\\w+:\\s*\\n[^}]*db\\.add\\s*\\(' + - 'for\\s+\\w+\\s+in\\s+\\w+:\\s*\\n[^}]*\\.save\\s*\\(' + suggested_patterns: + - "bulk_insert_mappings|bulk_update_mappings|add_all" + example_bad: | + for item in items: + product = Product(**item) + db.add(product) + example_good: | + products = [Product(**item) for item in items] + db.add_all(products) + + - id: "PERF-007" + name: "Connection pool configuration" + severity: info + description: | + Configure database connection pool for optimal performance: + - pool_size: Number of persistent connections + - max_overflow: Additional connections allowed + - pool_pre_ping: Check connection health + - pool_recycle: Recycle connections periodically + file_pattern: "**/database.py|**/config*.py" + suggested_patterns: + - "pool_size|pool_pre_ping|pool_recycle|max_overflow" + + - id: "PERF-008" + name: "Use EXISTS for existence checks" + severity: info + description: | + Use EXISTS or .first() is not None instead of count() > 0. + EXISTS stops at first match, count() scans all matches. + file_pattern: "**/service*.py" + anti_patterns: + - '\\.count\\(\\)\\s*>\\s*0' + - '\\.count\\(\\)\\s*>=\\s*1' + - '\\.count\\(\\)\\s*!=\\s*0' + suggested_patterns: + - "exists\\(\\)|scalar\\(exists" + example_bad: | + if db.query(Order).filter_by(customer_id=id).count() > 0: + example_good: | + exists_query = db.query(exists().where(Order.customer_id == id)) + if db.scalar(exists_query): + + - id: "PERF-009" + name: "Batch updates instead of loops" + severity: warning + description: | + Use .update() with filters instead of updating in a loop. + One UPDATE statement is faster than N individual updates. + file_pattern: "**/service*.py" + anti_patterns: + - 'for\\s+\\w+\\s+in\\s+\\w+:\\s*\\n[^}]*\\w+\\.\\w+\\s*=' + suggested_patterns: + - "\\.update\\(\\{" + example_bad: | + for product in products: + product.is_active = False + db.add(product) + example_good: | + db.query(Product).filter( + Product.id.in_(product_ids) + ).update({"is_active": False}, synchronize_session=False) + + - id: "PERF-010" + name: "Avoid SELECT * patterns" + severity: info + description: | + When you only need specific columns, don't load entire rows. + This reduces memory usage and network transfer. + file_pattern: "**/service*.py" + + - id: "PERF-011" + name: "Use appropriate join strategies" + severity: info + description: | + Choose the right join strategy: + - joinedload: Few related items, always needed + - selectinload: Many related items, always needed + - subqueryload: Complex queries, many related items + - lazyload: Rarely accessed relationships + file_pattern: "**/service*.py" + + - id: "PERF-012" + name: "Transaction scope optimization" + severity: warning + description: | + Keep transactions short and focused: + - Don't hold transactions during I/O + - Commit after bulk operations + - Use read-only transactions when possible + file_pattern: "**/service*.py" + + - id: "PERF-013" + name: "Query result caching" + severity: info + description: | + Consider caching for: + - Frequently accessed, rarely changed data + - Configuration tables + - Reference data (categories, statuses) + file_pattern: "**/service*.py" + suggested_patterns: + - "@cache|@lru_cache|redis|memcache" + + - id: "PERF-014" + name: "Composite indexes for multi-column filters" + severity: info + description: | + Queries filtering on multiple columns benefit from composite indexes. + Order columns by selectivity (most selective first). + file_pattern: "**/models/database/*.py" + suggested_patterns: + - "Index\\([^)]*,[^)]*\\)" + + - id: "PERF-015" + name: "Avoid correlated subqueries" + severity: info + description: | + Correlated subqueries execute once per row. + Use JOINs or window functions instead when possible. + file_pattern: "**/service*.py" diff --git a/.performance-rules/frontend.yaml b/.performance-rules/frontend.yaml new file mode 100644 index 00000000..0d9845b7 --- /dev/null +++ b/.performance-rules/frontend.yaml @@ -0,0 +1,177 @@ +# Frontend Performance Rules +# ========================== + +frontend_rules: + - id: "PERF-056" + name: "Debounce search inputs" + severity: warning + description: | + Search inputs should debounce API calls. + Recommended: 300-500ms delay. + + Prevents excessive API calls while user is typing. + file_pattern: "**/*.js" + context_patterns: + - "search|filter|query" + anti_patterns: + - '@input=".*search.*fetch' + - '@keyup=".*search.*fetch' + suggested_patterns: + - "debounce|setTimeout.*search|\\$watch.*search" + example_bad: | + + example_good: | + + // With: debouncedSearch = debounce(searchProducts, 300) + + - id: "PERF-057" + name: "Lazy load off-screen content" + severity: info + description: | + Defer loading of off-screen content: + - Modals + - Tabs (inactive) + - Below-the-fold content + - Images + file_pattern: "**/*.html" + suggested_patterns: + - 'loading="lazy"|x-intersect|x-show|x-if' + + - id: "PERF-058" + name: "Image optimization" + severity: warning + description: | + Images should be optimized: + - Use appropriate formats (WebP, AVIF) + - Serve responsive sizes + - Lazy load off-screen images + - Use CDN for static assets + file_pattern: "**/*.html" + required_patterns: + - 'loading="lazy"|srcset|x-intersect' + example_good: | + Product + + - id: "PERF-059" + name: "Minimize Alpine.js watchers" + severity: info + description: | + Excessive $watch calls impact performance. + Use computed properties or event handlers instead. + file_pattern: "**/*.js" + anti_patterns: + - '\\$watch\\([^)]+\\).*\\$watch\\([^)]+\\).*\\$watch\\(' + + - id: "PERF-060" + name: "Virtual scrolling for long lists" + severity: info + description: | + Lists with 100+ items should use virtual scrolling. + Only render visible items in the viewport. + file_pattern: "**/*.html|**/*.js" + suggested_patterns: + - "virtual-scroll|x-intersect|IntersectionObserver" + + - id: "PERF-061" + name: "Minimize bundle size" + severity: info + description: | + Reduce JavaScript bundle size: + - Import only needed modules + - Use tree-shaking + - Split code by route + file_pattern: "**/*.js" + + - id: "PERF-062" + name: "Reasonable polling intervals" + severity: warning + description: | + Polling should be >= 10 seconds for non-critical updates. + Lower intervals waste bandwidth and server resources. + file_pattern: "**/*.js" + anti_patterns: + - 'setInterval\\s*\\([^,]+,\\s*[1-9]\\d{0,3}\\s*\\)' + exclude_patterns: + - "# real-time required" + example_bad: | + setInterval(fetchUpdates, 1000); // Every second + example_good: | + setInterval(fetchUpdates, 30000); // Every 30 seconds + + - id: "PERF-063" + name: "CSS containment" + severity: info + description: | + Use CSS containment for complex layouts. + Limits rendering scope for better performance. + file_pattern: "**/*.css|**/*.html" + suggested_patterns: + - "contain:|content-visibility" + + - id: "PERF-064" + name: "Avoid layout thrashing" + severity: warning + description: | + Don't interleave DOM reads and writes. + Batch reads first, then writes. + file_pattern: "**/*.js" + anti_patterns: + - 'offsetHeight.*style\\.|style\\..*offsetHeight' + + - id: "PERF-065" + name: "Use CSS animations over JavaScript" + severity: info + description: | + CSS animations are hardware-accelerated. + Use CSS for simple animations, JS for complex ones. + file_pattern: "**/*.js" + suggested_patterns: + - "transition|animation|transform" + + - id: "PERF-066" + name: "Preload critical resources" + severity: info + description: | + Preload critical CSS, fonts, and above-the-fold images. + Reduces perceived load time. + file_pattern: "**/*.html" + suggested_patterns: + - 'rel="preload"|rel="prefetch"|rel="preconnect"' + + - id: "PERF-067" + name: "Defer non-critical JavaScript" + severity: info + description: | + Non-critical JavaScript should be deferred. + Allows page rendering to complete first. + file_pattern: "**/*.html" + suggested_patterns: + - 'defer|async' + + - id: "PERF-068" + name: "Minimize DOM nodes" + severity: info + description: | + Excessive DOM nodes slow rendering. + Target: < 1500 nodes, depth < 32, children < 60 + file_pattern: "**/*.html" + + - id: "PERF-069" + name: "Efficient event handlers" + severity: info + description: | + Use event delegation for repeated elements. + Add listener to parent, not each child. + file_pattern: "**/*.js" + suggested_patterns: + - "@click.delegate|event.target.closest" + + - id: "PERF-070" + name: "Cache DOM queries" + severity: info + description: | + Store DOM element references instead of re-querying. + Each querySelector has performance cost. + file_pattern: "**/*.js" + anti_patterns: + - 'document\\.querySelector\\([^)]+\\).*document\\.querySelector\\(' diff --git a/.performance-rules/memory.yaml b/.performance-rules/memory.yaml new file mode 100644 index 00000000..8a22748b --- /dev/null +++ b/.performance-rules/memory.yaml @@ -0,0 +1,156 @@ +# Memory Management Performance Rules +# ==================================== + +memory_rules: + - id: "PERF-046" + name: "Generators for large datasets" + severity: warning + description: | + Use generators/iterators for processing large datasets. + Avoids loading everything into memory at once. + file_pattern: "**/service*.py" + anti_patterns: + - '\\.all\\(\\).*for\\s+\\w+\\s+in' + suggested_patterns: + - "yield|yield_per|iter" + example_bad: | + products = db.query(Product).all() # Loads all into memory + for product in products: + process(product) + example_good: | + for product in db.query(Product).yield_per(100): + process(product) + + - id: "PERF-047" + name: "Stream large file uploads" + severity: warning + description: | + Large files should be streamed to disk, not held in memory. + Use SpooledTemporaryFile or direct disk writing. + file_pattern: "**/upload*.py|**/attachment*.py" + suggested_patterns: + - "SpooledTemporaryFile|chunk|stream" + example_bad: | + content = await file.read() # Entire file in memory + with open(path, 'wb') as f: + f.write(content) + example_good: | + with open(path, 'wb') as f: + while chunk := await file.read(8192): + f.write(chunk) + + - id: "PERF-048" + name: "Chunked processing for imports" + severity: warning + description: | + Bulk imports should process in chunks: + - Read in batches + - Commit in batches + - Report progress periodically + file_pattern: "**/import*.py|**/csv*.py" + required_patterns: + - "chunk|batch|yield" + example_bad: | + rows = list(csv_reader) # All rows in memory + for row in rows: + process(row) + example_good: | + def process_in_chunks(reader, chunk_size=1000): + chunk = [] + for row in reader: + chunk.append(row) + if len(chunk) >= chunk_size: + yield chunk + chunk = [] + if chunk: + yield chunk + + - id: "PERF-049" + name: "Context managers for resources" + severity: error + description: | + Use context managers for file operations. + Ensures resources are properly released. + file_pattern: "**/*.py" + anti_patterns: + - 'f\\s*=\\s*open\\s*\\([^)]+\\)(?!\\s*#.*context)' + - '^(?!.*with).*open\\s*\\([^)]+\\)\\s*$' + exclude_patterns: + - "# noqa: PERF-049" + - "with open" + example_bad: | + f = open('file.txt') + content = f.read() + f.close() # May not run if exception + example_good: | + with open('file.txt') as f: + content = f.read() + + - id: "PERF-050" + name: "Limit in-memory collections" + severity: info + description: | + Avoid building large lists in memory. + Use generators, itertools, or database pagination. + file_pattern: "**/service*.py" + anti_patterns: + - '\\[.*for.*in.*\\](?!.*[:10])' + + - id: "PERF-051" + name: "String concatenation efficiency" + severity: info + description: | + For many string concatenations, use join() or StringIO. + Repeated += creates many intermediate strings. + file_pattern: "**/*.py" + anti_patterns: + - 'for.*:\\s*\\n[^}]*\\+=' + suggested_patterns: + - "\\.join\\(|StringIO" + example_bad: | + result = "" + for item in items: + result += str(item) + example_good: | + result = "".join(str(item) for item in items) + + - id: "PERF-052" + name: "Efficient data structures" + severity: info + description: | + Choose appropriate data structures: + - set for membership testing + - dict for key-value lookup + - deque for queue operations + - defaultdict for grouping + file_pattern: "**/*.py" + + - id: "PERF-053" + name: "Object pooling for expensive objects" + severity: info + description: | + Reuse expensive-to-create objects: + - Database connections + - HTTP clients + - Template engines + file_pattern: "**/*.py" + + - id: "PERF-054" + name: "Weak references for caches" + severity: info + description: | + Use weak references for large object caches. + Allows garbage collection when memory is needed. + file_pattern: "**/*cache*.py" + suggested_patterns: + - "WeakValueDictionary|WeakKeyDictionary|weakref" + + - id: "PERF-055" + name: "Slots for frequently instantiated classes" + severity: info + description: | + Use __slots__ for classes with many instances. + Reduces memory footprint per instance. + file_pattern: "**/models/**/*.py" + suggested_patterns: + - "__slots__" diff --git a/.security-rules/_main.yaml b/.security-rules/_main.yaml new file mode 100644 index 00000000..d25b0211 --- /dev/null +++ b/.security-rules/_main.yaml @@ -0,0 +1,66 @@ +# Security Rules Configuration +# ============================ +# Security-focused validation rules for the codebase. +# Run with: python scripts/validate_security.py + +version: "1.0" +project: "letzshop-product-import" + +description: | + Security validation rules to detect common vulnerabilities and ensure + secure coding practices across the application. + +principles: + - name: "Defense in Depth" + description: "Multiple layers of security controls" + - name: "Least Privilege" + description: "Minimal access rights for users and processes" + - name: "Secure by Default" + description: "Secure configurations out of the box" + - name: "Fail Securely" + description: "Errors should not compromise security" + - name: "Input Validation" + description: "Never trust user input" + +includes: + - authentication.yaml + - injection.yaml + - data_protection.yaml + - api_security.yaml + - cryptography.yaml + - audit.yaml + +severity_levels: + error: + description: "Critical security vulnerability that must be fixed" + exit_code: 1 + warning: + description: "Security concern that should be addressed" + exit_code: 0 + info: + description: "Security best practice recommendation" + exit_code: 0 + +ignore: + files: + - "**/test_*.py" + - "**/tests/**" + - "**/*_test.py" + - "**/conftest.py" + - "**/migrations/**" + - "**/.venv/**" + - "**/venv/**" + - "**/node_modules/**" + - "**/site/**" + - "**/scripts/**" + - "**/__pycache__/**" + - "**/*.pyc" + patterns: + # Allow test credentials in test files + - file: "**/tests/**" + pattern: "password.*=.*test" + reason: "Test fixtures use dummy credentials" + # Allow example patterns in documentation + - file: "**/docs/**" + pattern: ".*" + reason: "Documentation examples" diff --git a/.security-rules/api_security.yaml b/.security-rules/api_security.yaml new file mode 100644 index 00000000..c5ceb95a --- /dev/null +++ b/.security-rules/api_security.yaml @@ -0,0 +1,66 @@ +# API Security Rules +# ================== + +api_security_rules: + - id: SEC-031 + name: CORS origin validation + severity: error + description: > + CORS must not allow all origins in production. + Specify allowed origins explicitly. + + - id: SEC-032 + name: Rate limiting on sensitive endpoints + severity: warning + description: > + Auth, password reset, and payment endpoints need rate limiting. + + - id: SEC-033 + name: Security headers + severity: warning + description: > + Configure security headers like X-Content-Type-Options, + X-Frame-Options, Content-Security-Policy. + + - id: SEC-034 + name: HTTPS enforcement + severity: error + description: > + External URLs must use HTTPS. + HTTP is only acceptable for localhost. + + - id: SEC-035 + name: Request size limits + severity: warning + description: > + Limit request body size to prevent DoS attacks. + + - id: SEC-036 + name: Input validation with Pydantic + severity: warning + description: > + All API inputs should be validated using Pydantic models. + + - id: SEC-037 + name: API versioning + severity: info + description: > + APIs should be versioned for security update isolation. + + - id: SEC-038 + name: Method restrictions + severity: warning + description: > + Endpoints should only allow necessary HTTP methods. + + - id: SEC-039 + name: Authentication bypass prevention + severity: error + description: > + Ensure authentication cannot be bypassed. + + - id: SEC-040 + name: Timeout configuration + severity: warning + description: > + All external calls must have timeouts configured. diff --git a/.security-rules/audit.yaml b/.security-rules/audit.yaml new file mode 100644 index 00000000..6df9ffa0 --- /dev/null +++ b/.security-rules/audit.yaml @@ -0,0 +1,131 @@ +# Audit & Logging Rules +# ===================== + +audit_rules: + - id: "SEC-051" + name: "Authentication event logging" + severity: warning + description: | + Log authentication events: + - Successful logins (with user ID, IP) + - Failed login attempts (with IP, reason) + - Logouts + - Password changes + - Password reset requests + file_pattern: "**/auth*.py|**/login*.py" + required_patterns: + - "log" + suggested_patterns: + - 'logger\.(info|warning).*login|auth|password' + + - id: "SEC-052" + name: "Admin action audit trail" + severity: warning + description: | + All admin operations should be logged with: + - Admin user ID + - Action performed + - Target resource + - Timestamp + - IP address + file_pattern: "**/admin/**/*.py" + required_patterns: + - "log" + suggested_patterns: + - "logger|audit" + + - id: "SEC-053" + name: "Data modification logging" + severity: info + description: | + Log create/update/delete on sensitive data: + - User accounts + - Roles and permissions + - Financial transactions + - Configuration changes + file_pattern: "**/service*.py" + + - id: "SEC-054" + name: "Security event logging" + severity: warning + description: | + Log security-relevant events: + - Authorization failures + - Input validation failures + - Rate limit triggers + - Suspicious activity patterns + file_pattern: "**/*.py" + context_patterns: + - "unauthorized|forbidden|rate_limit|suspicious" + suggested_patterns: + - "logger\\.warning|logger\\.error" + + - id: "SEC-055" + name: "Log injection prevention" + severity: warning + description: | + Sanitize user input before logging. + Newlines and control characters can corrupt logs. + file_pattern: "**/*.py" + anti_patterns: + - 'logger\.[a-z]+\(.*request\..*\)' + suggested_patterns: + - "sanitize|escape|repr\\(" + example_bad: | + logger.info(f"User search: {request.query}") + example_good: | + logger.info(f"User search: {request.query!r}") # repr escapes + + - id: "SEC-056" + name: "Centralized logging" + severity: info + description: | + Use centralized logging for: + - Correlation across services + - Tamper-evident storage + - Retention management + - Alerting capabilities + + - id: "SEC-057" + name: "Log level appropriateness" + severity: info + description: | + Use appropriate log levels: + - ERROR: Security failures requiring attention + - WARNING: Suspicious activity, failed auth + - INFO: Successful security events + - DEBUG: Never log sensitive data even at debug + + - id: "SEC-058" + name: "Structured logging format" + severity: info + description: | + Use structured logging (JSON) for: + - Easy parsing + - Consistent fields + - Searchability + suggested_patterns: + - "structlog|json_formatter|extra={" + + - id: "SEC-059" + name: "Audit log integrity" + severity: info + description: | + Protect audit logs from tampering: + - Append-only storage + - Cryptographic chaining + - Separate access controls + + - id: "SEC-060" + name: "Privacy-aware logging" + severity: warning + description: | + Comply with data protection regulations: + - No PII in logs without consent + - Log retention limits + - Right to deletion support + file_pattern: "**/*.py" + anti_patterns: + - 'log.*email(?!.*@.*sanitized)' + - 'log.*phone' + - 'log.*address(?!.*ip)' diff --git a/.security-rules/authentication.yaml b/.security-rules/authentication.yaml new file mode 100644 index 00000000..7a8cdfeb --- /dev/null +++ b/.security-rules/authentication.yaml @@ -0,0 +1,70 @@ +# Authentication Security Rules +# ============================= + +authentication_rules: + - id: SEC-001 + name: No hardcoded credentials + severity: error + description: > + Credentials must never be hardcoded in source code. + Use environment variables or secret management. + + - id: SEC-002 + name: JWT expiry enforcement + severity: error + description: > + All JWT tokens must have expiration claims. + Access tokens should expire in 15-60 minutes. + + - id: SEC-003 + name: Password hashing required + severity: error + description: > + Passwords must be hashed using bcrypt, argon2, or scrypt. + Never store or compare passwords in plain text. + + - id: SEC-004 + name: Session regeneration after auth + severity: warning + description: > + Session IDs should be regenerated after authentication + to prevent session fixation attacks. + + - id: SEC-005 + name: Brute force protection + severity: warning + description: > + Login endpoints should implement rate limiting + or account lockout after failed attempts. + + - id: SEC-006 + name: Secure password reset + severity: warning + description: > + Password reset tokens must be cryptographically random, + expire within 1 hour, and be single-use. + + - id: SEC-007 + name: Authentication on sensitive endpoints + severity: error + description: > + All endpoints except public ones must require authentication. + + - id: SEC-008 + name: Token in Authorization header + severity: warning + description: > + JWT tokens should be sent in Authorization header, + not in URL parameters. + + - id: SEC-009 + name: Logout invalidates tokens + severity: warning + description: > + Logout should invalidate or blacklist tokens. + + - id: SEC-010 + name: Multi-factor authentication support + severity: info + description: > + Consider implementing MFA for sensitive operations. diff --git a/.security-rules/cryptography.yaml b/.security-rules/cryptography.yaml new file mode 100644 index 00000000..c59ce7f6 --- /dev/null +++ b/.security-rules/cryptography.yaml @@ -0,0 +1,72 @@ +# Cryptography Rules +# ================== + +cryptography_rules: + - id: SEC-041 + name: Strong hashing algorithms + severity: error + description: > + Use bcrypt, argon2, scrypt for passwords. + Use SHA-256 or stronger for general hashing. + Never use MD5 or SHA1. + + - id: SEC-042 + name: Secure random generation + severity: error + description: > + Use the secrets module for security-sensitive randomness. + Never use random module for tokens or keys. + + - id: SEC-043 + name: No hardcoded encryption keys + severity: error + description: > + Encryption keys must come from environment variables + or secret management services. + + - id: SEC-044 + name: Strong encryption algorithms + severity: error + description: > + Use AES-256 or ChaCha20. Never use DES, 3DES, or RC4. + + - id: SEC-045 + name: Proper IV/nonce usage + severity: error + description: > + Encryption IVs and nonces must be randomly generated + and unique per encryption. + + - id: SEC-046 + name: TLS version requirements + severity: warning + description: > + Enforce TLS 1.2 or higher. + Disable SSLv2, SSLv3, TLS 1.0, TLS 1.1. + + - id: SEC-047 + name: Certificate verification + severity: error + description: > + Always verify SSL certificates. + Never disable verification in production. + + - id: SEC-048 + name: Key derivation for passwords + severity: warning + description: > + When deriving encryption keys from passwords, + use PBKDF2 with 100K+ iterations, Argon2, or scrypt. + + - id: SEC-049 + name: Secure key storage + severity: info + description: > + Encryption keys should be stored in environment variables, + secret management, or HSMs. + + - id: SEC-050 + name: Key rotation support + severity: info + description: > + Implement key rotation with multiple key versions. diff --git a/.security-rules/data_protection.yaml b/.security-rules/data_protection.yaml new file mode 100644 index 00000000..2cc184fb --- /dev/null +++ b/.security-rules/data_protection.yaml @@ -0,0 +1,67 @@ +# Data Protection Rules +# ===================== + +data_protection_rules: + - id: SEC-021 + name: PII logging prevention + severity: error + description: > + Never log passwords, tokens, credit cards, or sensitive PII. + + - id: SEC-022 + name: Sensitive data in URLs + severity: error + description: > + Sensitive data should not appear in URL query parameters. + Use POST body or headers instead. + + - id: SEC-023 + name: Mass assignment prevention + severity: warning + description: > + Use explicit field assignment, not **kwargs from user input. + + - id: SEC-024 + name: Error message information leakage + severity: error + description: > + Error messages should not reveal internal details. + No stack traces to users. + + - id: SEC-025 + name: Secure cookie settings + severity: error + description: > + Cookies must have Secure, HttpOnly, SameSite attributes. + + - id: SEC-026 + name: Encryption for sensitive data at rest + severity: info + description: > + Consider encrypting sensitive data stored in the database. + + - id: SEC-027 + name: Data retention limits + severity: info + description: > + Implement data retention policies. + + - id: SEC-028 + name: Response data filtering + severity: warning + description: > + API responses should not include sensitive internal fields. + Use Pydantic response models. + + - id: SEC-029 + name: File upload validation + severity: error + description: > + Validate uploaded files by extension AND content type. + Limit file size. + + - id: SEC-030 + name: Backup encryption + severity: info + description: > + Database backups should be encrypted. diff --git a/.security-rules/injection.yaml b/.security-rules/injection.yaml new file mode 100644 index 00000000..cfc6384d --- /dev/null +++ b/.security-rules/injection.yaml @@ -0,0 +1,70 @@ +# Injection Prevention Rules +# ========================== + +injection_rules: + - id: SEC-011 + name: No raw SQL queries + severity: error + description: > + Use SQLAlchemy ORM or parameterized queries only. + Never concatenate user input into SQL strings. + + - id: SEC-012 + name: No shell command injection + severity: error + description: > + Never use shell=True with subprocess. + Use subprocess with list arguments. + + - id: SEC-013 + name: No code execution + severity: error + description: > + Never use eval() or exec() with user input. + + - id: SEC-014 + name: Path traversal prevention + severity: error + description: > + Validate file paths to prevent directory traversal. + Use secure_filename() for uploads. + + - id: SEC-015 + name: XSS prevention in templates + severity: error + description: > + Use safe output methods in templates. + Prefer x-text over x-html. + + - id: SEC-016 + name: LDAP injection prevention + severity: error + description: > + Escape special characters in LDAP queries. + + - id: SEC-017 + name: XML external entity prevention + severity: error + description: > + Disable external entities when parsing XML. + Use defusedxml. + + - id: SEC-018 + name: Template injection prevention + severity: error + description: > + Never render user input as template code. + + - id: SEC-019 + name: SSRF prevention + severity: warning + description: > + Validate URLs before making external requests. + Whitelist allowed domains. + + - id: SEC-020 + name: Deserialization safety + severity: error + description: > + Never deserialize untrusted data with pickle. + Use yaml.safe_load() instead of yaml.load(). diff --git a/alembic/versions/f4a5b6c7d8e9_add_validator_type_to_code_quality.py b/alembic/versions/f4a5b6c7d8e9_add_validator_type_to_code_quality.py new file mode 100644 index 00000000..35e48b91 --- /dev/null +++ b/alembic/versions/f4a5b6c7d8e9_add_validator_type_to_code_quality.py @@ -0,0 +1,95 @@ +"""add_validator_type_to_code_quality + +Revision ID: f4a5b6c7d8e9 +Revises: e3f4a5b6c7d8 +Create Date: 2025-12-21 + +This migration adds validator_type column to architecture scans and violations +to support multiple validator types (architecture, security, performance). +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "f4a5b6c7d8e9" +down_revision: Union[str, None] = "e3f4a5b6c7d8" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Add validator_type to architecture_scans table + op.add_column( + "architecture_scans", + sa.Column( + "validator_type", + sa.String(length=20), + nullable=False, + server_default="architecture", + ), + ) + op.create_index( + op.f("ix_architecture_scans_validator_type"), + "architecture_scans", + ["validator_type"], + unique=False, + ) + + # Add validator_type to architecture_violations table + op.add_column( + "architecture_violations", + sa.Column( + "validator_type", + sa.String(length=20), + nullable=False, + server_default="architecture", + ), + ) + op.create_index( + op.f("ix_architecture_violations_validator_type"), + "architecture_violations", + ["validator_type"], + unique=False, + ) + + # Add validator_type to architecture_rules table + op.add_column( + "architecture_rules", + sa.Column( + "validator_type", + sa.String(length=20), + nullable=False, + server_default="architecture", + ), + ) + op.create_index( + op.f("ix_architecture_rules_validator_type"), + "architecture_rules", + ["validator_type"], + unique=False, + ) + + +def downgrade() -> None: + # Drop indexes first + op.drop_index( + op.f("ix_architecture_rules_validator_type"), + table_name="architecture_rules", + ) + op.drop_index( + op.f("ix_architecture_violations_validator_type"), + table_name="architecture_violations", + ) + op.drop_index( + op.f("ix_architecture_scans_validator_type"), + table_name="architecture_scans", + ) + + # Drop columns + op.drop_column("architecture_rules", "validator_type") + op.drop_column("architecture_violations", "validator_type") + op.drop_column("architecture_scans", "validator_type") diff --git a/app/services/code_quality_service.py b/app/services/code_quality_service.py index d4b20460..2c8e9abc 100644 --- a/app/services/code_quality_service.py +++ b/app/services/code_quality_service.py @@ -1,6 +1,7 @@ """ Code Quality Service -Business logic for managing architecture scans and violations +Business logic for managing code quality scans and violations +Supports multiple validator types: architecture, security, performance """ import json @@ -25,25 +26,65 @@ from models.database.architecture_scan import ( logger = logging.getLogger(__name__) +# Validator type constants +VALIDATOR_ARCHITECTURE = "architecture" +VALIDATOR_SECURITY = "security" +VALIDATOR_PERFORMANCE = "performance" + +VALID_VALIDATOR_TYPES = [VALIDATOR_ARCHITECTURE, VALIDATOR_SECURITY, VALIDATOR_PERFORMANCE] + +# Map validator types to their scripts +VALIDATOR_SCRIPTS = { + VALIDATOR_ARCHITECTURE: "scripts/validate_architecture.py", + VALIDATOR_SECURITY: "scripts/validate_security.py", + VALIDATOR_PERFORMANCE: "scripts/validate_performance.py", +} + +# Human-readable names +VALIDATOR_NAMES = { + VALIDATOR_ARCHITECTURE: "Architecture", + VALIDATOR_SECURITY: "Security", + VALIDATOR_PERFORMANCE: "Performance", +} + class CodeQualityService: """Service for managing code quality scans and violations""" - def run_scan(self, db: Session, triggered_by: str = "manual") -> ArchitectureScan: + def run_scan( + self, + db: Session, + triggered_by: str = "manual", + validator_type: str = VALIDATOR_ARCHITECTURE, + ) -> ArchitectureScan: """ - Run architecture validator and store results in database + Run a code quality validator and store results in database Args: db: Database session triggered_by: Who/what triggered the scan ('manual', 'scheduled', 'ci/cd') + validator_type: Type of validator ('architecture', 'security', 'performance') Returns: ArchitectureScan object with results Raises: - Exception: If validator script fails + ValueError: If validator_type is invalid + ScanTimeoutException: If validator times out + ScanParseException: If validator output cannot be parsed """ - logger.info(f"Starting architecture scan (triggered by: {triggered_by})") + if validator_type not in VALID_VALIDATOR_TYPES: + raise ValueError( + f"Invalid validator type: {validator_type}. " + f"Must be one of: {VALID_VALIDATOR_TYPES}" + ) + + script_path = VALIDATOR_SCRIPTS[validator_type] + validator_name = VALIDATOR_NAMES[validator_type] + + logger.info( + f"Starting {validator_name} scan (triggered by: {triggered_by})" + ) # Get git commit hash git_commit = self._get_git_commit_hash() @@ -52,13 +93,13 @@ class CodeQualityService: start_time = datetime.now() try: result = subprocess.run( - ["python", "scripts/validate_architecture.py", "--json"], + ["python", script_path, "--json"], capture_output=True, text=True, timeout=300, # 5 minute timeout ) except subprocess.TimeoutExpired: - logger.error("Architecture scan timed out after 5 minutes") + logger.error(f"{validator_name} scan timed out after 5 minutes") raise ScanTimeoutException(timeout_seconds=300) duration = (datetime.now() - start_time).total_seconds() @@ -79,7 +120,7 @@ class CodeQualityService: json_output = "\n".join(lines[json_start:]) data = json.loads(json_output) except (json.JSONDecodeError, ValueError) as e: - logger.error(f"Failed to parse validator output: {e}") + logger.error(f"Failed to parse {validator_name} validator output: {e}") logger.error(f"Stdout: {result.stdout}") logger.error(f"Stderr: {result.stderr}") raise ScanParseException(reason=str(e)) @@ -87,6 +128,7 @@ class CodeQualityService: # Create scan record scan = ArchitectureScan( timestamp=datetime.now(), + validator_type=validator_type, total_files=data.get("files_checked", 0), total_violations=data.get("total_violations", 0), errors=data.get("errors", 0), @@ -100,11 +142,12 @@ class CodeQualityService: # Create violation records violations_data = data.get("violations", []) - logger.info(f"Creating {len(violations_data)} violation records") + logger.info(f"Creating {len(violations_data)} {validator_name} violation records") for v in violations_data: violation = ArchitectureViolation( scan_id=scan.id, + validator_type=validator_type, rule_id=v["rule_id"], rule_name=v["rule_name"], severity=v["severity"], @@ -120,43 +163,98 @@ class CodeQualityService: db.flush() db.refresh(scan) - logger.info(f"Scan completed: {scan.total_violations} violations found") + logger.info( + f"{validator_name} scan completed: {scan.total_violations} violations found" + ) return scan - def get_latest_scan(self, db: Session) -> ArchitectureScan | None: - """Get the most recent scan""" - return ( - db.query(ArchitectureScan) - .order_by(desc(ArchitectureScan.timestamp)) - .first() - ) + def run_all_scans( + self, db: Session, triggered_by: str = "manual" + ) -> list[ArchitectureScan]: + """ + Run all validators and return list of scans + + Args: + db: Database session + triggered_by: Who/what triggered the scan + + Returns: + List of ArchitectureScan objects (one per validator) + """ + results = [] + for validator_type in VALID_VALIDATOR_TYPES: + try: + scan = self.run_scan(db, triggered_by, validator_type) + results.append(scan) + except Exception as e: + logger.error(f"Failed to run {validator_type} scan: {e}") + # Continue with other validators even if one fails + return results + + def get_latest_scan( + self, db: Session, validator_type: str = None + ) -> ArchitectureScan | None: + """ + Get the most recent scan + + Args: + db: Database session + validator_type: Optional filter by validator type + + Returns: + Most recent ArchitectureScan or None + """ + query = db.query(ArchitectureScan).order_by(desc(ArchitectureScan.timestamp)) + + if validator_type: + query = query.filter(ArchitectureScan.validator_type == validator_type) + + return query.first() + + def get_latest_scans_by_type(self, db: Session) -> dict[str, ArchitectureScan]: + """ + Get the most recent scan for each validator type + + Returns: + Dictionary mapping validator_type to its latest scan + """ + result = {} + for vtype in VALID_VALIDATOR_TYPES: + scan = self.get_latest_scan(db, validator_type=vtype) + if scan: + result[vtype] = scan + return result def get_scan_by_id(self, db: Session, scan_id: int) -> ArchitectureScan | None: """Get scan by ID""" return db.query(ArchitectureScan).filter(ArchitectureScan.id == scan_id).first() - def get_scan_history(self, db: Session, limit: int = 30) -> list[ArchitectureScan]: + def get_scan_history( + self, db: Session, limit: int = 30, validator_type: str = None + ) -> list[ArchitectureScan]: """ Get scan history for trend graphs Args: db: Database session limit: Maximum number of scans to return + validator_type: Optional filter by validator type Returns: List of ArchitectureScan objects, newest first """ - return ( - db.query(ArchitectureScan) - .order_by(desc(ArchitectureScan.timestamp)) - .limit(limit) - .all() - ) + query = db.query(ArchitectureScan).order_by(desc(ArchitectureScan.timestamp)) + + if validator_type: + query = query.filter(ArchitectureScan.validator_type == validator_type) + + return query.limit(limit).all() def get_violations( self, db: Session, scan_id: int = None, + validator_type: str = None, severity: str = None, status: str = None, rule_id: str = None, @@ -169,7 +267,8 @@ class CodeQualityService: Args: db: Database session - scan_id: Filter by scan ID (if None, use latest scan) + scan_id: Filter by scan ID (if None, use latest scan(s)) + validator_type: Filter by validator type severity: Filter by severity ('error', 'warning') status: Filter by status ('open', 'assigned', 'resolved', etc.) rule_id: Filter by rule ID @@ -180,19 +279,33 @@ class CodeQualityService: Returns: Tuple of (violations list, total count) """ - # If no scan_id specified, use latest scan - if scan_id is None: - latest_scan = self.get_latest_scan(db) - if not latest_scan: - return [], 0 - scan_id = latest_scan.id - # Build query - query = db.query(ArchitectureViolation).filter( - ArchitectureViolation.scan_id == scan_id - ) + query = db.query(ArchitectureViolation) - # Apply filters + # If scan_id specified, filter by it + if scan_id is not None: + query = query.filter(ArchitectureViolation.scan_id == scan_id) + else: + # If no scan_id, get violations from latest scan(s) + if validator_type: + # Get latest scan for specific validator type + latest_scan = self.get_latest_scan(db, validator_type) + if not latest_scan: + return [], 0 + query = query.filter(ArchitectureViolation.scan_id == latest_scan.id) + else: + # Get violations from latest scans of all types + latest_scans = self.get_latest_scans_by_type(db) + if not latest_scans: + return [], 0 + scan_ids = [s.id for s in latest_scans.values()] + query = query.filter(ArchitectureViolation.scan_id.in_(scan_ids)) + + # Apply validator_type filter if specified (for scan_id queries) + if validator_type and scan_id is not None: + query = query.filter(ArchitectureViolation.validator_type == validator_type) + + # Apply other filters if severity: query = query.filter(ArchitectureViolation.severity == severity) @@ -211,7 +324,9 @@ class CodeQualityService: # Get page of results violations = ( query.order_by( - ArchitectureViolation.severity.desc(), ArchitectureViolation.file_path + ArchitectureViolation.severity.desc(), + ArchitectureViolation.validator_type, + ArchitectureViolation.file_path, ) .limit(limit) .offset(offset) @@ -353,40 +468,65 @@ class CodeQualityService: logger.info(f"Comment added to violation {violation_id} by user {user_id}") return comment_obj - def get_dashboard_stats(self, db: Session) -> dict: + def get_dashboard_stats( + self, db: Session, validator_type: str = None + ) -> dict: """ Get statistics for dashboard - Returns: - Dictionary with various statistics - """ - latest_scan = self.get_latest_scan(db) - if not latest_scan: - return { - "total_violations": 0, - "errors": 0, - "warnings": 0, - "open": 0, - "assigned": 0, - "resolved": 0, - "ignored": 0, - "technical_debt_score": 100, - "trend": [], - "by_severity": {}, - "by_rule": {}, - "by_module": {}, - "top_files": [], - "last_scan": None, - } + Args: + db: Database session + validator_type: Optional filter by validator type. If None, returns combined stats. + Returns: + Dictionary with various statistics including per-validator breakdown + """ + # Get latest scans by type + latest_scans = self.get_latest_scans_by_type(db) + + if not latest_scans: + return self._empty_dashboard_stats() + + # If specific validator type requested + if validator_type and validator_type in latest_scans: + scan = latest_scans[validator_type] + return self._get_stats_for_scan(db, scan, validator_type) + + # Combined stats across all validators + return self._get_combined_stats(db, latest_scans) + + def _empty_dashboard_stats(self) -> dict: + """Return empty dashboard stats structure""" + return { + "total_violations": 0, + "errors": 0, + "warnings": 0, + "info": 0, + "open": 0, + "assigned": 0, + "resolved": 0, + "ignored": 0, + "technical_debt_score": 100, + "trend": [], + "by_severity": {}, + "by_rule": {}, + "by_module": {}, + "top_files": [], + "last_scan": None, + "by_validator": {}, + } + + def _get_stats_for_scan( + self, db: Session, scan: ArchitectureScan, validator_type: str + ) -> dict: + """Get stats for a single scan/validator type""" # Get violation counts by status status_counts = ( db.query(ArchitectureViolation.status, func.count(ArchitectureViolation.id)) - .filter(ArchitectureViolation.scan_id == latest_scan.id) + .filter(ArchitectureViolation.scan_id == scan.id) .group_by(ArchitectureViolation.status) .all() ) - status_dict = {status: count for status, count in status_counts} # Get violations by severity @@ -394,11 +534,10 @@ class CodeQualityService: db.query( ArchitectureViolation.severity, func.count(ArchitectureViolation.id) ) - .filter(ArchitectureViolation.scan_id == latest_scan.id) + .filter(ArchitectureViolation.scan_id == scan.id) .group_by(ArchitectureViolation.severity) .all() ) - by_severity = {sev: count for sev, count in severity_counts} # Get violations by rule @@ -406,16 +545,13 @@ class CodeQualityService: db.query( ArchitectureViolation.rule_id, func.count(ArchitectureViolation.id) ) - .filter(ArchitectureViolation.scan_id == latest_scan.id) + .filter(ArchitectureViolation.scan_id == scan.id) .group_by(ArchitectureViolation.rule_id) .all() ) - by_rule = { rule: count - for rule, count in sorted(rule_counts, key=lambda x: x[1], reverse=True)[ - :10 - ] + for rule, count in sorted(rule_counts, key=lambda x: x[1], reverse=True)[:10] } # Get top violating files @@ -424,69 +560,185 @@ class CodeQualityService: ArchitectureViolation.file_path, func.count(ArchitectureViolation.id).label("count"), ) - .filter(ArchitectureViolation.scan_id == latest_scan.id) + .filter(ArchitectureViolation.scan_id == scan.id) .group_by(ArchitectureViolation.file_path) .order_by(desc("count")) .limit(10) .all() ) - top_files = [{"file": file, "count": count} for file, count in file_counts] - # Get violations by module (extract module from file path) + # Get violations by module + by_module = self._get_violations_by_module(db, scan.id) + + # Get trend for this validator type + trend_scans = self.get_scan_history(db, limit=7, validator_type=validator_type) + trend = [ + { + "timestamp": s.timestamp.isoformat(), + "violations": s.total_violations, + "errors": s.errors, + "warnings": s.warnings, + } + for s in reversed(trend_scans) + ] + + return { + "total_violations": scan.total_violations, + "errors": scan.errors, + "warnings": scan.warnings, + "info": by_severity.get("info", 0), + "open": status_dict.get("open", 0), + "assigned": status_dict.get("assigned", 0), + "resolved": status_dict.get("resolved", 0), + "ignored": status_dict.get("ignored", 0), + "technical_debt_score": self._calculate_score(scan.errors, scan.warnings), + "trend": trend, + "by_severity": by_severity, + "by_rule": by_rule, + "by_module": by_module, + "top_files": top_files, + "last_scan": scan.timestamp.isoformat(), + "validator_type": validator_type, + "by_validator": { + validator_type: { + "total_violations": scan.total_violations, + "errors": scan.errors, + "warnings": scan.warnings, + "last_scan": scan.timestamp.isoformat(), + } + }, + } + + def _get_combined_stats( + self, db: Session, latest_scans: dict[str, ArchitectureScan] + ) -> dict: + """Get combined stats across all validators""" + # Aggregate totals + total_violations = sum(s.total_violations for s in latest_scans.values()) + total_errors = sum(s.errors for s in latest_scans.values()) + total_warnings = sum(s.warnings for s in latest_scans.values()) + + # Get all scan IDs + scan_ids = [s.id for s in latest_scans.values()] + + # Get violation counts by status + status_counts = ( + db.query(ArchitectureViolation.status, func.count(ArchitectureViolation.id)) + .filter(ArchitectureViolation.scan_id.in_(scan_ids)) + .group_by(ArchitectureViolation.status) + .all() + ) + status_dict = {status: count for status, count in status_counts} + + # Get violations by severity + severity_counts = ( + db.query( + ArchitectureViolation.severity, func.count(ArchitectureViolation.id) + ) + .filter(ArchitectureViolation.scan_id.in_(scan_ids)) + .group_by(ArchitectureViolation.severity) + .all() + ) + by_severity = {sev: count for sev, count in severity_counts} + + # Get violations by rule (across all validators) + rule_counts = ( + db.query( + ArchitectureViolation.rule_id, func.count(ArchitectureViolation.id) + ) + .filter(ArchitectureViolation.scan_id.in_(scan_ids)) + .group_by(ArchitectureViolation.rule_id) + .all() + ) + by_rule = { + rule: count + for rule, count in sorted(rule_counts, key=lambda x: x[1], reverse=True)[:10] + } + + # Get top violating files + file_counts = ( + db.query( + ArchitectureViolation.file_path, + func.count(ArchitectureViolation.id).label("count"), + ) + .filter(ArchitectureViolation.scan_id.in_(scan_ids)) + .group_by(ArchitectureViolation.file_path) + .order_by(desc("count")) + .limit(10) + .all() + ) + top_files = [{"file": file, "count": count} for file, count in file_counts] + + # Get violations by module + by_module = {} + for scan_id in scan_ids: + module_counts = self._get_violations_by_module(db, scan_id) + for module, count in module_counts.items(): + by_module[module] = by_module.get(module, 0) + count + by_module = dict( + sorted(by_module.items(), key=lambda x: x[1], reverse=True)[:10] + ) + + # Per-validator breakdown + by_validator = {} + for vtype, scan in latest_scans.items(): + by_validator[vtype] = { + "total_violations": scan.total_violations, + "errors": scan.errors, + "warnings": scan.warnings, + "last_scan": scan.timestamp.isoformat(), + } + + # Get most recent scan timestamp + most_recent = max(latest_scans.values(), key=lambda s: s.timestamp) + + return { + "total_violations": total_violations, + "errors": total_errors, + "warnings": total_warnings, + "info": by_severity.get("info", 0), + "open": status_dict.get("open", 0), + "assigned": status_dict.get("assigned", 0), + "resolved": status_dict.get("resolved", 0), + "ignored": status_dict.get("ignored", 0), + "technical_debt_score": self._calculate_score(total_errors, total_warnings), + "trend": [], # Combined trend would need special handling + "by_severity": by_severity, + "by_rule": by_rule, + "by_module": by_module, + "top_files": top_files, + "last_scan": most_recent.timestamp.isoformat(), + "by_validator": by_validator, + } + + def _get_violations_by_module(self, db: Session, scan_id: int) -> dict[str, int]: + """Extract module from file paths and count violations""" by_module = {} violations = ( db.query(ArchitectureViolation.file_path) - .filter(ArchitectureViolation.scan_id == latest_scan.id) + .filter(ArchitectureViolation.scan_id == scan_id) .all() ) for v in violations: path_parts = v.file_path.split("/") if len(path_parts) >= 2: - module = "/".join(path_parts[:2]) # e.g., 'app/api' + module = "/".join(path_parts[:2]) else: module = path_parts[0] by_module[module] = by_module.get(module, 0) + 1 - # Sort by count and take top 10 - by_module = dict( - sorted(by_module.items(), key=lambda x: x[1], reverse=True)[:10] - ) + return dict(sorted(by_module.items(), key=lambda x: x[1], reverse=True)[:10]) - # Calculate technical debt score - tech_debt_score = self.calculate_technical_debt_score(db, latest_scan.id) + def _calculate_score(self, errors: int, warnings: int) -> int: + """Calculate technical debt score (0-100)""" + score = 100 - (errors * 0.5 + warnings * 0.05) + return max(0, min(100, int(score))) - # Get trend (last 7 scans) - trend_scans = self.get_scan_history(db, limit=7) - trend = [ - { - "timestamp": scan.timestamp.isoformat(), - "violations": scan.total_violations, - "errors": scan.errors, - "warnings": scan.warnings, - } - for scan in reversed(trend_scans) # Oldest first for chart - ] - - return { - "total_violations": latest_scan.total_violations, - "errors": latest_scan.errors, - "warnings": latest_scan.warnings, - "open": status_dict.get("open", 0), - "assigned": status_dict.get("assigned", 0), - "resolved": status_dict.get("resolved", 0), - "ignored": status_dict.get("ignored", 0), - "technical_debt_score": tech_debt_score, - "trend": trend, - "by_severity": by_severity, - "by_rule": by_rule, - "by_module": by_module, - "top_files": top_files, - "last_scan": latest_scan.timestamp.isoformat() if latest_scan else None, - } - - def calculate_technical_debt_score(self, db: Session, scan_id: int = None) -> int: + def calculate_technical_debt_score( + self, db: Session, scan_id: int = None, validator_type: str = None + ) -> int: """ Calculate technical debt score (0-100) @@ -496,12 +748,13 @@ class CodeQualityService: Args: db: Database session scan_id: Scan ID (if None, use latest) + validator_type: Filter by validator type Returns: Score from 0-100 """ if scan_id is None: - latest_scan = self.get_latest_scan(db) + latest_scan = self.get_latest_scan(db, validator_type) if not latest_scan: return 100 scan_id = latest_scan.id @@ -510,8 +763,7 @@ class CodeQualityService: if not scan: return 100 - score = 100 - (scan.errors * 0.5 + scan.warnings * 0.05) - return max(0, min(100, int(score))) # Clamp to 0-100 + return self._calculate_score(scan.errors, scan.warnings) def _get_git_commit_hash(self) -> str | None: """Get current git commit hash""" diff --git a/app/templates/admin/code-quality-violations.html b/app/templates/admin/code-quality-violations.html index fb33361e..a9038656 100644 --- a/app/templates/admin/code-quality-violations.html +++ b/app/templates/admin/code-quality-violations.html @@ -14,7 +14,7 @@ {% endblock %} {% block content %} -{{ page_header('Architecture Violations', subtitle='Review and manage code quality violations', back_url='/admin/code-quality', back_label='Back to Dashboard') }} +{{ page_header('Code Quality Violations', subtitle='Review and manage violations across all validators', back_url='/admin/code-quality', back_label='Back to Dashboard') }} {{ loading_state('Loading violations...') }} @@ -25,7 +25,22 @@

Filters

-
+
+ +
+ + +
+
@@ -64,7 +80,7 @@
@@ -84,17 +100,29 @@ {% call table_wrapper() %} - {{ table_header(['Rule', 'Severity', 'File', 'Line', 'Message', 'Status', 'Actions']) }} + {{ table_header(['Validator', 'Rule', 'Severity', 'File', 'Line', 'Message', 'Status', 'Actions']) }}