refactor(scripts): reorganize scripts/ into seed/ and validate/ subfolders

Move 9 init/seed scripts into scripts/seed/ and 7 validation scripts
(+ validators/ subfolder) into scripts/validate/ to reduce clutter in
the root scripts/ directory. Update all references across Makefile,
CI/CD configs, pre-commit hooks, docs (~40 files), and Python imports.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-09 21:35:53 +01:00
parent d201221fb1
commit 7a9dda282d
63 changed files with 173 additions and 174 deletions

View File

@@ -0,0 +1,290 @@
"""
Base Validator Class
Shared functionality for all validators.
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Any
import yaml
class Severity(str, Enum):
"""Severity levels for validation findings."""
ERROR = "error"
WARNING = "warning"
INFO = "info"
@dataclass
class Violation:
"""A single validation violation."""
rule_id: str
message: str
severity: Severity
file_path: str = ""
line: int = 0
suggestion: str = ""
@dataclass
class ValidationResult:
"""Result of a validation run."""
violations: list[Violation] = field(default_factory=list)
files_checked: int = 0
def has_errors(self) -> bool:
"""Check if there are any error-level violations."""
return any(v.severity == Severity.ERROR for v in self.violations)
def error_count(self) -> int:
"""Count error-level violations."""
return sum(1 for v in self.violations if v.severity == Severity.ERROR)
def warning_count(self) -> int:
"""Count warning-level violations."""
return sum(1 for v in self.violations if v.severity == Severity.WARNING)
def info_count(self) -> int:
"""Count info-level violations."""
return sum(1 for v in self.violations if v.severity == Severity.INFO)
class BaseValidator(ABC):
"""Base class for architecture, security, and performance validators."""
# Directories/patterns to ignore by default
IGNORE_PATTERNS = [
".venv", "venv", "node_modules", "__pycache__", ".git",
".pytest_cache", ".mypy_cache", "dist", "build", "*.egg-info",
"migrations", "alembic/versions", ".tox", "htmlcov",
]
def __init__(
self,
rules_dir: str = "",
project_root: Path | None = None,
verbose: bool = False,
):
self.rules_dir = rules_dir
self.project_root = project_root or Path.cwd()
self.verbose = verbose
self.rules: list[dict[str, Any]] = []
self.errors: list[dict[str, Any]] = []
self.warnings: list[dict[str, Any]] = []
self.result = ValidationResult()
def load_rules(self) -> None:
"""Load rules from YAML files."""
rules_path = self.project_root / self.rules_dir
if not rules_path.exists():
print(f"Rules directory not found: {rules_path}")
return
for rule_file in rules_path.glob("*.yaml"):
if rule_file.name.startswith("_"):
continue # Skip main config
with open(rule_file) as f:
data = yaml.safe_load(f)
if data and "rules" in data:
self.rules.extend(data["rules"])
def validate(self) -> bool:
"""Run validation. Returns True if passed.
Subclasses should implement validate_all() instead.
"""
result = self.validate_all()
return not result.has_errors() if hasattr(result, 'has_errors') else True
def validate_all(self, target_path: Path | None = None) -> ValidationResult:
"""Run all validations. Override in subclasses."""
return ValidationResult()
def add_error(
self, rule_id: str, message: str, file: str = "", line: int = 0
) -> None:
"""Add an error."""
self.errors.append(
{
"rule_id": rule_id,
"message": message,
"file": file,
"line": line,
"severity": "error",
}
)
def add_warning(
self, rule_id: str, message: str, file: str = "", line: int = 0
) -> None:
"""Add a warning."""
self.warnings.append(
{
"rule_id": rule_id,
"message": message,
"file": file,
"line": line,
"severity": "warning",
}
)
def add_info(
self, rule_id: str, message: str, file: str = "", line: int = 0
) -> None:
"""Add an informational note."""
self.warnings.append(
{
"rule_id": rule_id,
"message": message,
"file": file,
"line": line,
"severity": "info",
}
)
def print_results(self) -> None:
"""Print validation results."""
if not self.errors and not self.warnings:
print(f"✅ All {self.rules_dir} rules passed!")
return
if self.errors:
print(f"\n{len(self.errors)} errors found:")
for error in self.errors:
print(f" [{error['rule_id']}] {error['message']}")
if error["file"]:
print(f" File: {error['file']}:{error['line']}")
if self.warnings:
print(f"\n⚠️ {len(self.warnings)} warnings:")
for warning in self.warnings:
print(f" [{warning['rule_id']}] {warning['message']}")
if warning["file"]:
print(f" File: {warning['file']}:{warning['line']}")
def run(self) -> int:
"""Run validation and return exit code."""
self.load_rules()
passed = self.validate()
self.print_results()
return 0 if passed else 1
def _should_ignore_file(self, file_path: Path) -> bool:
"""Check if a file should be ignored based on patterns."""
path_str = str(file_path)
for pattern in self.IGNORE_PATTERNS:
if pattern in path_str:
return True
return False
def _add_violation(
self,
rule_id: str,
rule_name: str,
severity: Severity,
file_path: Path,
line_number: int,
message: str,
context: str = "",
suggestion: str = "",
) -> None:
"""Add a violation to the result."""
violation = Violation(
rule_id=rule_id,
message=f"{rule_name}: {message}",
severity=severity,
file_path=str(file_path),
line=line_number,
suggestion=suggestion,
)
self.result.violations.append(violation)
if self.verbose and context:
print(f" [{rule_id}] {file_path}:{line_number}")
print(f" {message}")
print(f" Context: {context}")
def validate_file(self, file_path: Path) -> ValidationResult:
"""Validate a single file."""
if not file_path.exists():
print(f"File not found: {file_path}")
return self.result
self.result.files_checked = 1
content = file_path.read_text()
lines = content.split("\n")
self._validate_file_content(file_path, content, lines)
return self.result
def _validate_file_content(self, file_path: Path, content: str, lines: list[str]):
"""Validate file content. Override in subclasses."""
pass
def output_results(self, json_output: bool = False, errors_only: bool = False) -> None:
"""Output validation results."""
if json_output:
import json
output = {
"files_checked": self.result.files_checked,
"violations": [
{
"rule_id": v.rule_id,
"message": v.message,
"severity": v.severity.value,
"file": v.file_path,
"line": v.line,
"suggestion": v.suggestion,
}
for v in self.result.violations
if not errors_only or v.severity == Severity.ERROR
],
}
print(json.dumps(output, indent=2))
else:
self._print_violations(errors_only)
def _print_violations(self, errors_only: bool = False) -> None:
"""Print violations in human-readable format."""
violations = self.result.violations
if errors_only:
violations = [v for v in violations if v.severity == Severity.ERROR]
if not violations:
print(f"\n✅ No issues found! ({self.result.files_checked} files checked)")
return
errors = [v for v in violations if v.severity == Severity.ERROR]
warnings = [v for v in violations if v.severity == Severity.WARNING]
info = [v for v in violations if v.severity == Severity.INFO]
if errors:
print(f"\n{len(errors)} errors:")
for v in errors:
print(f" [{v.rule_id}] {v.file_path}:{v.line}")
print(f" {v.message}")
if v.suggestion:
print(f" 💡 {v.suggestion}")
if warnings and not errors_only:
print(f"\n⚠️ {len(warnings)} warnings:")
for v in warnings:
print(f" [{v.rule_id}] {v.file_path}:{v.line}")
print(f" {v.message}")
if info and not errors_only:
print(f"\n {len(info)} info:")
for v in info:
print(f" [{v.rule_id}] {v.file_path}:{v.line}")
print(f" {v.message}")
print(f"\n📊 Summary: {len(errors)} errors, {len(warnings)} warnings, {len(info)} info")
def get_exit_code(self) -> int:
"""Get exit code based on validation results."""
return 1 if self.result.has_errors() else 0

246
scripts/validate/validate_all.py Executable file
View File

@@ -0,0 +1,246 @@
#!/usr/bin/env python3
"""
Unified Code Validator
======================
Runs all validation scripts (architecture, security, performance, audit) in sequence.
This provides a single entry point for comprehensive code validation,
useful for CI/CD pipelines and pre-commit hooks.
Usage:
python scripts/validate/validate_all.py # Run all validators
python scripts/validate/validate_all.py --security # Run only security validator
python scripts/validate/validate_all.py --performance # Run only performance validator
python scripts/validate/validate_all.py --architecture # Run only architecture validator
python scripts/validate/validate_all.py --audit # Run only audit validator
python scripts/validate/validate_all.py -v # Verbose output
python scripts/validate/validate_all.py --fail-fast # Stop on first failure
python scripts/validate/validate_all.py --json # JSON output
Options:
--architecture Run architecture validator
--security Run security validator
--performance Run performance validator
--audit Run audit validator
--fail-fast Stop on first validator failure
-v, --verbose Show detailed output
--errors-only Only show errors
--json Output results as JSON
"""
import argparse
import json
import sys
from pathlib import Path
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
def run_architecture_validator(verbose: bool = False) -> tuple[int, dict]:
"""Run the architecture validator"""
try:
# Import dynamically to avoid circular imports
from validate_architecture import ArchitectureValidator
config_path = Path.cwd() / ".architecture-rules.yaml"
validator = ArchitectureValidator(config_path=config_path, verbose=verbose)
result = validator.validate_all()
return (
1 if result.has_errors() else 0,
{
"name": "Architecture",
"files_checked": result.files_checked,
"errors": sum(1 for v in result.violations if v.severity.value == "error"),
"warnings": sum(1 for v in result.violations if v.severity.value == "warning"),
"info": sum(1 for v in result.violations if v.severity.value == "info"),
}
)
except ImportError as e:
print(f"⚠️ Architecture validator not available: {e}")
return 0, {"name": "Architecture", "skipped": True}
except Exception as e:
print(f"❌ Architecture validator failed: {e}")
return 1, {"name": "Architecture", "error": str(e)}
def run_security_validator(verbose: bool = False) -> tuple[int, dict]:
"""Run the security validator"""
try:
from validate_security import SecurityValidator
validator = SecurityValidator(verbose=verbose)
result = validator.validate_all()
return (
1 if result.has_errors() else 0,
{
"name": "Security",
"files_checked": result.files_checked,
"errors": result.error_count(),
"warnings": result.warning_count(),
"info": result.info_count(),
}
)
except ImportError as e:
print(f"⚠️ Security validator not available: {e}")
return 0, {"name": "Security", "skipped": True}
except Exception as e:
print(f"❌ Security validator failed: {e}")
return 1, {"name": "Security", "error": str(e)}
def run_performance_validator(verbose: bool = False) -> tuple[int, dict]:
"""Run the performance validator"""
try:
from validate_performance import PerformanceValidator
validator = PerformanceValidator(verbose=verbose)
result = validator.validate_all()
return (
1 if result.has_errors() else 0,
{
"name": "Performance",
"files_checked": result.files_checked,
"errors": result.error_count(),
"warnings": result.warning_count(),
"info": result.info_count(),
}
)
except ImportError as e:
print(f"⚠️ Performance validator not available: {e}")
return 0, {"name": "Performance", "skipped": True}
except Exception as e:
print(f"❌ Performance validator failed: {e}")
return 1, {"name": "Performance", "error": str(e)}
def run_audit_validator(verbose: bool = False) -> tuple[int, dict]:
"""Run the audit validator"""
try:
from validate_audit import AuditValidator
validator = AuditValidator()
has_errors = not validator.validate()
return (
1 if has_errors else 0,
{
"name": "Audit",
"files_checked": len(validator.files_checked) if hasattr(validator, 'files_checked') else 0,
"errors": len(validator.errors),
"warnings": len(validator.warnings),
"info": len(validator.info) if hasattr(validator, 'info') else 0,
}
)
except ImportError as e:
print(f"⚠️ Audit validator not available: {e}")
return 0, {"name": "Audit", "skipped": True}
except Exception as e:
print(f"❌ Audit validator failed: {e}")
return 1, {"name": "Audit", "error": str(e)}
def print_summary(results: list[dict], json_output: bool = False):
"""Print validation summary"""
if json_output:
print(json.dumps({"validators": results}, indent=2))
return
print("\n" + "=" * 80)
print("📊 UNIFIED VALIDATION SUMMARY")
print("=" * 80)
total_errors = 0
total_warnings = 0
total_info = 0
for result in results:
if result.get("skipped"):
print(f"\n⏭️ {result['name']}: Skipped")
elif result.get("error"):
print(f"\n{result['name']}: Error - {result['error']}")
else:
errors = result.get("errors", 0)
warnings = result.get("warnings", 0)
info = result.get("info", 0)
total_errors += errors
total_warnings += warnings
total_info += info
status = "" if errors == 0 else ""
print(f"\n{status} {result['name']}:")
print(f" Files: {result.get('files_checked', 0)}")
print(f" Errors: {errors}, Warnings: {warnings}, Info: {info}")
print("\n" + "-" * 80)
print(f"TOTAL: {total_errors} errors, {total_warnings} warnings, {total_info} info")
print("=" * 80)
if total_errors > 0:
print("❌ VALIDATION FAILED")
elif total_warnings > 0:
print(f"⚠️ VALIDATION PASSED WITH {total_warnings} WARNING(S)")
else:
print("✅ VALIDATION PASSED")
print("=" * 80)
def main():
parser = argparse.ArgumentParser(
description="Unified code validator - runs architecture, security, performance, and audit checks",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("--architecture", action="store_true", help="Run architecture validator")
parser.add_argument("--security", action="store_true", help="Run security validator")
parser.add_argument("--performance", action="store_true", help="Run performance validator")
parser.add_argument("--audit", action="store_true", help="Run audit validator")
parser.add_argument("--fail-fast", action="store_true", help="Stop on first failure")
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
parser.add_argument("--errors-only", action="store_true", help="Only show errors")
parser.add_argument("--json", action="store_true", help="JSON output")
args = parser.parse_args()
# If no specific validators selected, run all
run_all = not (args.architecture or args.security or args.performance or args.audit)
print("\n🔍 UNIFIED CODE VALIDATION")
print("=" * 80)
validators = []
if run_all or args.architecture:
validators.append(("Architecture", run_architecture_validator))
if run_all or args.security:
validators.append(("Security", run_security_validator))
if run_all or args.performance:
validators.append(("Performance", run_performance_validator))
if run_all or args.audit:
validators.append(("Audit", run_audit_validator))
results = []
exit_code = 0
for name, validator_func in validators:
print(f"\n{'=' * 40}")
print(f"🔍 Running {name} Validator...")
print("=" * 40)
code, result = validator_func(verbose=args.verbose)
results.append(result)
if code != 0:
exit_code = 1
if args.fail_fast:
print(f"\n{name} validator failed. Stopping (--fail-fast)")
break
print_summary(results, json_output=args.json)
sys.exit(exit_code)
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,543 @@
#!/usr/bin/env python3
"""
IT Internal Audit Validator
Validates code against internal audit rules defined in .audit-rules/
Focuses on governance, compliance, and control requirements.
"""
import re
import sys
from pathlib import Path
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from base_validator import BaseValidator
class AuditValidator(BaseValidator):
"""Validates IT internal audit rules."""
def __init__(self, project_root: Path | None = None):
super().__init__(".audit-rules", project_root)
def validate(self) -> bool:
"""Run all audit validations."""
self._validate_audit_trail()
self._validate_access_control()
self._validate_data_governance()
self._validate_compliance()
self._validate_change_management()
self._validate_third_party()
self._validate_documentation()
return len(self.errors) == 0
# ==================
# AUDIT TRAIL
# ==================
def _validate_audit_trail(self) -> None:
"""Validate audit trail requirements."""
# Check authentication logging
auth_files = [
self.project_root / "app" / "api" / "v1" / "auth.py",
self.project_root / "app" / "routes" / "admin.py",
]
for file in auth_files:
if file.exists():
content = file.read_text()
if "logger" not in content:
self.add_error(
"AUDIT-LOG-001",
"Authentication operations must include logging",
str(file),
)
# Check service layer logging
services_path = self.project_root / "app" / "services"
if services_path.exists():
for file in services_path.glob("*.py"):
if file.name == "__init__.py":
continue
content = file.read_text()
# Services that modify data should have logging
if re.search(r"def (create|update|delete)", content):
if "logger" not in content:
self.add_warning(
"AUDIT-LOG-002",
"Service with data modifications should include logging",
str(file),
)
# Check for audit timestamp fields in models
# Models can have timestamps directly or inherit from BaseModel/TimestampMixin
models_path = self.project_root / "models" / "database"
if models_path.exists():
for file in models_path.glob("*.py"):
# audit_log.py uses timestamp field instead of created_at/updated_at
if file.name in ("__init__.py", "base.py", "audit_log.py"):
continue
content = file.read_text()
if "class " in content: # Has model definition
# Check if timestamps are present directly or via inheritance
has_timestamps = (
"created_at" in content
or "updated_at" in content
or "BaseModel" in content # Inherits from BaseModel
or "TimestampMixin" in content # Uses TimestampMixin
)
if not has_timestamps:
self.add_warning(
"AUDIT-FIELD-001",
"Database model should include audit timestamp fields",
str(file),
)
# Check for forbidden log modification patterns
self._check_forbidden_patterns(
paths=["app/**/*.py"],
patterns=[
r"os\.remove.*\.log",
r"truncate.*log",
r"open.*\.log.*['\"]w['\"]",
],
rule_id="AUDIT-INT-001",
message="Application must not modify or delete log files",
)
# ==================
# ACCESS CONTROL
# ==================
def _validate_access_control(self) -> None:
"""Validate access control requirements."""
# Check API endpoints have authentication
api_path = self.project_root / "app" / "api" / "v1"
if api_path.exists():
for file in api_path.glob("*.py"):
# Skip endpoints that are intentionally unauthenticated
if file.name in ("__init__.py", "health.py", "metrics.py"):
continue
content = file.read_text()
# Check for authentication dependency
if "@router" in content:
if not re.search(
r"CurrentUser|Depends.*get_current_user|AdminUser", content
):
# auth.py handles its own auth
if file.name != "auth.py":
self.add_warning(
"ACCESS-AUTH-001",
"API endpoint should require authentication",
str(file),
)
# Check admin routes verify admin role
admin_route = self.project_root / "app" / "routes" / "admin.py"
if admin_route.exists():
content = admin_route.read_text()
if "is_admin" not in content and "admin_required" not in content:
self.add_warning(
"ACCESS-AUTH-002",
"Admin routes should verify admin privileges",
str(admin_route),
)
# Check password hashing
security_file = self.project_root / "app" / "core" / "security.py"
if security_file.exists():
content = security_file.read_text()
if not re.search(r"bcrypt|argon2|scrypt|pbkdf2", content, re.IGNORECASE):
self.add_error(
"ACCESS-ACCT-003",
"Passwords must use approved hashing algorithms",
str(security_file),
)
# Check password not in API responses
# Note: Only flag if a class with "Response" in name directly defines password_hash
# Internal schemas (like UserInDB) are not flagged as they're not API responses
schema_path = self.project_root / "models" / "schema"
if schema_path.exists():
for file in schema_path.glob("*.py"):
content = file.read_text()
# Check for Response classes that directly define password_hash
# Split by class definitions and check each
class_blocks = re.split(r"(?=^class\s)", content, flags=re.MULTILINE)
for block in class_blocks:
# Check if this class is a Response class
class_match = re.match(r"class\s+(\w*Response\w*)", block)
if class_match:
# Check if password_hash is defined in this class (not inherited)
if "password_hash:" in block or "password_hash =" in block:
if "exclude" not in block.lower():
self.add_error(
"ACCESS-PRIV-002",
f"Password hash must be excluded from {class_match.group(1)}",
str(file),
)
# ==================
# DATA GOVERNANCE
# ==================
def _validate_data_governance(self) -> None:
"""Validate data governance requirements."""
# Check PII not logged
# Note: Patterns detect actual password values, not descriptive usage like "Password reset"
# We look for patterns that suggest password values are being logged:
# - password= or password: followed by a variable
# - %s or {} after password indicating interpolation of password value
self._check_forbidden_patterns(
paths=["app/**/*.py", "middleware/**/*.py"],
patterns=[
r"logger\.\w+\(.*password\s*[=:]\s*['\"]?%", # password=%s
r"logger\.\w+\(.*password\s*[=:]\s*\{", # password={var}
r"logging\.\w+\(.*password\s*[=:]\s*['\"]?%", # password=%s
r"print\(.*password\s*=", # print(password=xxx)
r"logger.*credit.*card.*\d", # credit card with numbers
r"logger.*\bssn\b.*\d", # SSN with numbers
],
rule_id="DATA-PII-003",
message="PII/sensitive data must not be logged",
)
# Check input validation (Pydantic)
schema_path = self.project_root / "models" / "schema"
if schema_path.exists():
has_validation = False
for file in schema_path.glob("*.py"):
content = file.read_text()
if re.search(r"Field|validator|field_validator", content):
has_validation = True
break
if not has_validation:
self.add_error(
"DATA-INT-001",
"Pydantic validation required for data integrity",
str(schema_path),
)
# Check user data access endpoint exists (GDPR)
users_api = self.project_root / "app" / "api" / "v1" / "users.py"
if users_api.exists():
content = users_api.read_text()
if "/me" not in content and "current" not in content.lower():
self.add_warning(
"DATA-PRIV-001",
"Endpoint for users to access their own data required (GDPR Art. 15)",
str(users_api),
)
# ==================
# COMPLIANCE
# ==================
def _validate_compliance(self) -> None:
"""Validate compliance requirements."""
# Check HTTPS configuration
config_files = [
self.project_root / "app" / "core" / "config.py",
self.project_root / "main.py",
]
https_configured = False
for file in config_files:
if file.exists():
content = file.read_text()
if re.search(r"https|SSL|TLS|SECURE", content, re.IGNORECASE):
https_configured = True
break
if not https_configured:
self.add_warning(
"COMP-REG-002",
"HTTPS configuration should be documented",
"app/core/config.py",
)
# Check version control
if not (self.project_root / ".git").exists():
self.add_error(
"COMP-EVID-003",
"Version control (Git) is required",
str(self.project_root),
)
# Check CI/CD exists (GitHub or GitLab)
github_ci = self.project_root / ".github" / "workflows" / "ci.yml"
gitlab_ci = self.project_root / ".gitlab-ci.yml"
if not github_ci.exists() and not gitlab_ci.exists():
self.add_warning(
"COMP-EVID-001",
"CI workflow for automated testing recommended",
".gitlab-ci.yml or .github/workflows/ci.yml",
)
# Check code review process (GitHub or GitLab)
github_pr_template = self.project_root / ".github" / "PULL_REQUEST_TEMPLATE.md"
gitlab_mr_templates = self.project_root / ".gitlab" / "merge_request_templates"
has_mr_template = github_pr_template.exists() or (
gitlab_mr_templates.exists() and any(gitlab_mr_templates.iterdir())
)
if not has_mr_template:
self.add_warning(
"COMP-POL-001",
"Merge request template recommended for code review",
".gitlab/merge_request_templates/ or .github/PULL_REQUEST_TEMPLATE.md",
)
# ==================
# CHANGE MANAGEMENT
# ==================
def _validate_change_management(self) -> None:
"""Validate change management requirements."""
# Check .gitignore exists and excludes secrets
gitignore = self.project_root / ".gitignore"
if gitignore.exists():
content = gitignore.read_text()
required_exclusions = [".env", "*.pem", "*.key"]
for pattern in required_exclusions:
# Simplified check - just look for the pattern
if pattern.replace("*", "") not in content:
self.add_warning(
"CHANGE-VC-003",
f"Secret pattern '{pattern}' should be in .gitignore",
str(gitignore),
)
else:
self.add_error(
"CHANGE-VC-002",
".gitignore file required",
str(self.project_root),
)
# Check database migrations
alembic_dir = self.project_root / "alembic"
if not alembic_dir.exists():
self.add_warning(
"CHANGE-ROLL-001",
"Database migration tool (Alembic) recommended",
"alembic/",
)
else:
# Check for downgrade functions
versions_dir = alembic_dir / "versions"
if versions_dir.exists():
for file in versions_dir.glob("*.py"):
content = file.read_text()
if "def upgrade" in content and "def downgrade" not in content:
self.add_warning(
"CHANGE-ROLL-002",
"Migration should include downgrade function",
str(file),
)
# Check environment separation
config_file = self.project_root / "app" / "core" / "config.py"
if config_file.exists():
content = config_file.read_text()
if not re.search(r"ENVIRONMENT|development|staging|production", content):
self.add_warning(
"CHANGE-DEP-001",
"Environment separation configuration recommended",
str(config_file),
)
# ==================
# THIRD PARTY
# ==================
def _validate_third_party(self) -> None:
"""Validate third-party dependency management."""
# Check dependency lock file exists
lock_files = ["uv.lock", "poetry.lock", "Pipfile.lock", "requirements.lock"]
has_lock = any((self.project_root / f).exists() for f in lock_files)
if not has_lock:
self.add_warning(
"THIRD-DEP-001",
"Dependency lock file recommended for reproducible builds",
"uv.lock or similar",
)
# Check dependency manifest exists
manifest_files = ["pyproject.toml", "requirements.txt", "Pipfile"]
has_manifest = any((self.project_root / f).exists() for f in manifest_files)
if not has_manifest:
self.add_error(
"THIRD-DEP-002",
"Dependency manifest file required",
"pyproject.toml",
)
# Check for dependency scanning (GitHub Dependabot or GitLab)
dependabot = self.project_root / ".github" / "dependabot.yml"
gitlab_ci = self.project_root / ".gitlab-ci.yml"
has_dep_scanning = dependabot.exists()
if not has_dep_scanning and gitlab_ci.exists():
# Check if GitLab CI includes dependency scanning
ci_content = gitlab_ci.read_text()
has_dep_scanning = "dependency_scanning" in ci_content.lower()
if not has_dep_scanning:
self.add_info(
"THIRD-VULN-002",
"Consider enabling dependency scanning for security updates",
".gitlab-ci.yml (include dependency_scanning) or .github/dependabot.yml",
)
# Check for insecure package sources
pyproject = self.project_root / "pyproject.toml"
if pyproject.exists():
content = pyproject.read_text()
if "http://" in content and "https://" not in content:
self.add_error(
"THIRD-VEND-001",
"Only HTTPS sources allowed for packages",
str(pyproject),
)
# ==================
# DOCUMENTATION
# ==================
def _validate_documentation(self) -> None:
"""Validate documentation requirements."""
# Check README exists
readme_files = ["README.md", "README.rst", "README.txt"]
has_readme = any((self.project_root / f).exists() for f in readme_files)
if not has_readme:
self.add_error(
"DOC-PROJ-001",
"Project README required",
"README.md",
)
else:
# Check README has setup instructions
for readme in readme_files:
readme_path = self.project_root / readme
if readme_path.exists():
content = readme_path.read_text().lower()
has_setup = any(
term in content
for term in [
"install",
"setup",
"quick start",
"getting started",
]
)
if not has_setup:
self.add_warning(
"DOC-PROJ-002",
"README should include setup instructions",
str(readme_path),
)
break
# Check security policy exists
security_files = ["SECURITY.md", ".github/SECURITY.md"]
has_security = any((self.project_root / f).exists() for f in security_files)
if not has_security:
self.add_warning(
"DOC-SEC-001",
"Security policy (SECURITY.md) recommended",
"SECURITY.md",
)
# Check API documentation
docs_api = self.project_root / "docs" / "api"
if not docs_api.exists() or not list(docs_api.glob("*.md")):
self.add_warning(
"DOC-API-003",
"API documentation recommended",
"docs/api/",
)
# Check authentication documentation
auth_doc = self.project_root / "docs" / "api" / "authentication.md"
if not auth_doc.exists():
self.add_warning(
"DOC-SEC-002",
"Authentication documentation recommended",
"docs/api/authentication.md",
)
# Check architecture documentation
arch_docs = self.project_root / "docs" / "architecture"
if not arch_docs.exists() or not list(arch_docs.glob("*.md")):
self.add_warning(
"DOC-ARCH-001",
"Architecture documentation recommended",
"docs/architecture/",
)
# Check deployment documentation
deploy_doc = self.project_root / "docs" / "deployment" / "index.md"
if not deploy_doc.exists():
self.add_warning(
"DOC-OPS-001",
"Deployment documentation recommended",
"docs/deployment/index.md",
)
# ==================
# HELPERS
# ==================
def _check_forbidden_patterns(
self,
paths: list[str],
patterns: list[str],
rule_id: str,
message: str,
) -> None:
"""Check for forbidden patterns in files."""
for path_pattern in paths:
if "**" in path_pattern:
base, pattern = path_pattern.split("**", 1)
base_path = self.project_root / base.rstrip("/")
if base_path.exists():
files = base_path.rglob(pattern.lstrip("/"))
else:
continue
else:
files = [self.project_root / path_pattern]
for file in files:
if not file.exists() or not file.is_file():
continue
try:
content = file.read_text()
for pattern in patterns:
if re.search(pattern, content, re.IGNORECASE):
self.add_error(rule_id, message, str(file))
except Exception:
pass
def main() -> int:
"""Run audit validation."""
import argparse
parser = argparse.ArgumentParser(description="Validate IT internal audit rules")
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
parser.add_argument(
"--format",
choices=["text", "json"],
default="text",
help="Output format",
)
args = parser.parse_args()
validator = AuditValidator()
validator.load_rules()
success = validator.validate()
validator.print_results()
return 0 if success else 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,648 @@
#!/usr/bin/env python3
"""
Performance Validator
=====================
Validates code against performance rules defined in .performance-rules/
This script checks for common performance issues:
- N+1 query patterns
- Missing pagination
- Inefficient database operations
- Memory management issues
- Frontend performance anti-patterns
- Missing timeouts and connection pooling
Usage:
python scripts/validate/validate_performance.py # Check all files
python scripts/validate/validate_performance.py -d app/services/ # Check specific directory
python scripts/validate/validate_performance.py -f app/api/v1/products.py # Check single file
python scripts/validate/validate_performance.py -v # Verbose output
python scripts/validate/validate_performance.py --json # JSON output
python scripts/validate/validate_performance.py --errors-only # Only show errors
Options:
-f, --file PATH Validate a single file
-d, --folder PATH Validate all files in a directory (recursive)
-v, --verbose Show detailed output including context
--errors-only Only show errors, suppress warnings and info
--json Output results as JSON
"""
import argparse
import re
import sys
from pathlib import Path
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from base_validator import BaseValidator, Severity, ValidationResult
class PerformanceValidator(BaseValidator):
"""Performance-focused code validator"""
VALIDATOR_NAME = "Performance Validator"
VALIDATOR_EMOJI = ""
RULES_DIR_NAME = ".performance-rules"
CONFIG_FILE_NAME = ".performance-rules.yaml"
def validate_all(self, target_path: Path = None) -> ValidationResult:
"""Validate all files for performance issues"""
print(f"\n{self.VALIDATOR_EMOJI} Starting performance validation...\n")
target = target_path or self.project_root
# Validate Python files
self._validate_python_files(target)
# Validate JavaScript files
self._validate_javascript_files(target)
# Validate HTML templates
self._validate_template_files(target)
return self.result
def _validate_python_files(self, target: Path):
"""Validate all Python files for performance issues"""
print("🐍 Validating Python files...")
for py_file in target.rglob("*.py"):
if self._should_ignore_file(py_file):
continue
self.result.files_checked += 1
content = py_file.read_text()
lines = content.split("\n")
self._validate_python_performance(py_file, content, lines)
def _validate_javascript_files(self, target: Path):
"""Validate all JavaScript files for performance issues"""
print("🟨 Validating JavaScript files...")
for js_file in target.rglob("*.js"):
if self._should_ignore_file(js_file):
continue
self.result.files_checked += 1
content = js_file.read_text()
lines = content.split("\n")
self._validate_javascript_performance(js_file, content, lines)
def _validate_template_files(self, target: Path):
"""Validate all HTML template files for performance issues"""
print("📄 Validating template files...")
for html_file in target.rglob("*.html"):
if self._should_ignore_file(html_file):
continue
self.result.files_checked += 1
content = html_file.read_text()
lines = content.split("\n")
self._validate_template_performance(html_file, content, lines)
def _validate_file_content(self, file_path: Path, content: str, lines: list[str]):
"""Validate file content based on file type"""
if file_path.suffix == ".py":
self._validate_python_performance(file_path, content, lines)
elif file_path.suffix == ".js":
self._validate_javascript_performance(file_path, content, lines)
elif file_path.suffix == ".html":
self._validate_template_performance(file_path, content, lines)
def _validate_python_performance(self, file_path: Path, content: str, lines: list[str]):
"""Validate Python file for performance issues"""
file_path_str = str(file_path)
# PERF-001: N+1 query detection
self._check_n_plus_1_queries(file_path, content, lines)
# PERF-003: Query result limiting
self._check_query_limiting(file_path, content, lines)
# PERF-006: Bulk operations
self._check_bulk_operations(file_path, content, lines)
# PERF-008: Use EXISTS for existence checks
self._check_existence_checks(file_path, content, lines)
# PERF-009: Batch updates
self._check_batch_updates(file_path, content, lines)
# PERF-026: Pagination for API endpoints
if "/api/" in file_path_str:
self._check_api_pagination(file_path, content, lines)
# PERF-037: Parallel async operations
self._check_parallel_async(file_path, content, lines)
# PERF-040: Timeout configuration
self._check_timeout_config(file_path, content, lines)
# PERF-046: Generators for large datasets
self._check_generators(file_path, content, lines)
# PERF-047: Stream file uploads
if "upload" in file_path_str.lower() or "file" in file_path_str.lower():
self._check_file_streaming(file_path, content, lines)
# PERF-048: Chunked processing
if "import" in file_path_str.lower() or "csv" in file_path_str.lower():
self._check_chunked_processing(file_path, content, lines)
# PERF-049: Context managers for files
self._check_context_managers(file_path, content, lines)
# PERF-051: String concatenation
self._check_string_concatenation(file_path, content, lines)
def _validate_javascript_performance(self, file_path: Path, content: str, lines: list[str]):
"""Validate JavaScript file for performance issues"""
# PERF-056: Debounce search inputs
self._check_debounce(file_path, content, lines)
# PERF-062: Polling intervals
self._check_polling_intervals(file_path, content, lines)
# PERF-064: Layout thrashing
self._check_layout_thrashing(file_path, content, lines)
def _validate_template_performance(self, file_path: Path, content: str, lines: list[str]):
"""Validate HTML template file for performance issues"""
# PERF-058: Image lazy loading
self._check_image_lazy_loading(file_path, content, lines)
# PERF-067: Script defer/async
self._check_script_loading(file_path, content, lines)
# =========================================================================
# Database Performance Checks
# =========================================================================
def _check_n_plus_1_queries(self, file_path: Path, content: str, lines: list[str]):
"""PERF-001: Check for N+1 query patterns"""
# Look for patterns like: for item in items: item.relationship.attribute
in_for_loop = False
for_line_num = 0
for i, line in enumerate(lines, 1):
stripped = line.strip()
# Track for loops over query results
if re.search(r'for\s+\w+\s+in\s+.*\.(all|query)', line):
in_for_loop = True
for_line_num = i
elif in_for_loop and stripped and not stripped.startswith("#"):
# Check for relationship access in loop
if re.search(r'\.\w+\.\w+', line) and "(" not in line:
# Could be accessing a relationship
if any(rel in line for rel in [".customer.", ".store.", ".order.", ".product.", ".user."]):
self._add_violation(
rule_id="PERF-001",
rule_name="N+1 query detection",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="Possible N+1 query - relationship accessed in loop",
context=line.strip()[:80],
suggestion="Use joinedload() or selectinload() for eager loading",
)
in_for_loop = False
# Reset on dedent
if in_for_loop and line and not line.startswith(" " * 4) and i > for_line_num + 1:
in_for_loop = False
def _check_query_limiting(self, file_path: Path, content: str, lines: list[str]):
"""PERF-003: Check for unbounded query results"""
for i, line in enumerate(lines, 1):
if re.search(r'\.all\(\)', line):
# Check if there's a limit or filter before
context_start = max(0, i - 5)
context_lines = lines[context_start:i]
context_text = "\n".join(context_lines)
if "limit" not in context_text.lower() and "filter" not in context_text.lower():
if "# noqa" in line or "# bounded" in line:
continue
self._add_violation(
rule_id="PERF-003",
rule_name="Query result limiting",
severity=Severity.INFO,
file_path=file_path,
line_number=i,
message="Query may return unbounded results",
context=line.strip()[:80],
suggestion="Add .limit() or pagination for large tables",
)
def _check_bulk_operations(self, file_path: Path, content: str, lines: list[str]):
"""PERF-006: Check for individual operations in loops"""
in_for_loop = False
for_indent = 0
for i, line in enumerate(lines, 1):
stripped = line.strip()
# Track for loops
if re.search(r'for\s+\w+\s+in\s+', line):
in_for_loop = True
for_indent = len(line) - len(line.lstrip())
elif in_for_loop:
current_indent = len(line) - len(line.lstrip()) if line.strip() else for_indent + 4
if current_indent <= for_indent and stripped:
in_for_loop = False
elif "db.add(" in line or ".save(" in line:
self._add_violation(
rule_id="PERF-006",
rule_name="Bulk operations for multiple records",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="Individual db.add() in loop - consider bulk operations",
context=line.strip()[:80],
suggestion="Use db.add_all() or bulk_insert_mappings()",
)
def _check_existence_checks(self, file_path: Path, content: str, lines: list[str]):
"""PERF-008: Check for inefficient existence checks"""
patterns = [
(r'\.count\(\)\s*>\s*0', "count() > 0"),
(r'\.count\(\)\s*>=\s*1', "count() >= 1"),
(r'\.count\(\)\s*!=\s*0', "count() != 0"),
]
for i, line in enumerate(lines, 1):
for pattern, issue in patterns:
if re.search(pattern, line):
self._add_violation(
rule_id="PERF-008",
rule_name="Use EXISTS for existence checks",
severity=Severity.INFO,
file_path=file_path,
line_number=i,
message=f"{issue} scans all rows - use EXISTS instead",
context=line.strip()[:80],
suggestion="Use db.scalar(exists().where(...)) or .first() is not None",
)
def _check_batch_updates(self, file_path: Path, content: str, lines: list[str]):
"""PERF-009: Check for updates in loops"""
in_for_loop = False
for_indent = 0
loop_var = ""
for i, line in enumerate(lines, 1):
stripped = line.strip()
# Track for loops
match = re.search(r'for\s+(\w+)\s+in\s+', line)
if match:
in_for_loop = True
for_indent = len(line) - len(line.lstrip())
loop_var = match.group(1)
elif in_for_loop:
current_indent = len(line) - len(line.lstrip()) if line.strip() else for_indent + 4
if current_indent <= for_indent and stripped:
in_for_loop = False
elif loop_var and f"{loop_var}." in line and "=" in line and "==" not in line:
# Attribute assignment in loop
if "# noqa" not in line:
self._add_violation(
rule_id="PERF-009",
rule_name="Batch updates instead of loops",
severity=Severity.INFO,
file_path=file_path,
line_number=i,
message="Individual updates in loop - consider batch update",
context=line.strip()[:80],
suggestion="Use .update({...}) with filters for batch updates",
)
# =========================================================================
# API Performance Checks
# =========================================================================
def _check_api_pagination(self, file_path: Path, content: str, lines: list[str]):
"""PERF-026: Check for missing pagination in list endpoints"""
# Look for GET endpoints that return lists
in_endpoint = False
endpoint_line = 0
has_pagination = False
for i, line in enumerate(lines, 1):
# Track router decorators
if re.search(r'@router\.(get|post)', line):
in_endpoint = True
endpoint_line = i
has_pagination = False
elif in_endpoint:
# Check for pagination parameters
if re.search(r'(skip|offset|page|limit)', line):
has_pagination = True
# Check for function end
if re.search(r'^def\s+\w+', line.lstrip()) and i > endpoint_line + 1:
in_endpoint = False
# Check for .all() without pagination
if ".all()" in line and not has_pagination:
if "# noqa" not in line:
self._add_violation(
rule_id="PERF-026",
rule_name="Pagination required for list endpoints",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="List endpoint may lack pagination",
context=line.strip()[:80],
suggestion="Add skip/limit parameters for pagination",
)
# =========================================================================
# Async Performance Checks
# =========================================================================
def _check_parallel_async(self, file_path: Path, content: str, lines: list[str]):
"""PERF-037: Check for sequential awaits that could be parallel"""
await_count = 0
await_lines = []
for i, line in enumerate(lines, 1):
stripped = line.strip()
if stripped.startswith("await "):
await_count += 1
await_lines.append(i)
# Check for 3+ sequential awaits
if await_count >= 3:
# Verify they're sequential (within 5 lines of each other)
if all(await_lines[j+1] - await_lines[j] <= 2 for j in range(len(await_lines)-1)):
self._add_violation(
rule_id="PERF-037",
rule_name="Parallel independent operations",
severity=Severity.INFO,
file_path=file_path,
line_number=await_lines[0],
message=f"{await_count} sequential awaits - consider asyncio.gather()",
context="Multiple await statements",
suggestion="Use asyncio.gather() for independent async operations",
)
await_count = 0
await_lines = []
elif stripped and not stripped.startswith("#"):
# Reset on non-await, non-empty line
if await_count > 0:
await_count = 0
await_lines = []
def _check_timeout_config(self, file_path: Path, content: str, lines: list[str]):
"""PERF-040: Check for missing timeouts on HTTP clients"""
if "requests" not in content and "httpx" not in content and "aiohttp" not in content:
return
patterns = [
r'requests\.(get|post|put|delete|patch)\s*\([^)]+\)',
r'httpx\.(get|post|put|delete|patch)\s*\([^)]+\)',
]
for i, line in enumerate(lines, 1):
for pattern in patterns:
if re.search(pattern, line) and "timeout" not in line:
self._add_violation(
rule_id="PERF-040",
rule_name="Timeout configuration",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="HTTP request without timeout",
context=line.strip()[:80],
suggestion="Add timeout parameter to prevent hanging requests",
)
# =========================================================================
# Memory Performance Checks
# =========================================================================
def _check_generators(self, file_path: Path, content: str, lines: list[str]):
"""PERF-046: Check for loading large datasets into memory"""
for i, line in enumerate(lines, 1):
# Check for .all() followed by iteration
if ".all()" in line:
# Look ahead for iteration
if i < len(lines):
next_lines = "\n".join(lines[i:min(i+3, len(lines))])
if "for " in next_lines and "in" in next_lines:
if "# noqa" not in line:
self._add_violation(
rule_id="PERF-046",
rule_name="Generators for large datasets",
severity=Severity.INFO,
file_path=file_path,
line_number=i,
message=".all() loads everything into memory before iteration",
context=line.strip()[:80],
suggestion="Use .yield_per(100) for large result sets",
)
def _check_file_streaming(self, file_path: Path, content: str, lines: list[str]):
"""PERF-047: Check for loading entire files into memory"""
for i, line in enumerate(lines, 1):
if re.search(r'await\s+\w+\.read\(\)', line) and "chunk" not in line:
self._add_violation(
rule_id="PERF-047",
rule_name="Stream large file uploads",
severity=Severity.INFO,
file_path=file_path,
line_number=i,
message="Full file read into memory",
context=line.strip()[:80],
suggestion="Stream large files: while chunk := await file.read(8192)",
)
def _check_chunked_processing(self, file_path: Path, content: str, lines: list[str]):
"""PERF-048: Check for chunked processing in imports"""
if "chunk" not in content.lower() and "batch" not in content.lower():
# Check if file processes multiple records
if "for " in content and ("csv" in content.lower() or "import" in content.lower()):
self._add_violation(
rule_id="PERF-048",
rule_name="Chunked processing for imports",
severity=Severity.INFO,
file_path=file_path,
line_number=1,
message="Import processing may benefit from chunking",
context="File processes multiple records",
suggestion="Process in chunks with periodic commits",
)
def _check_context_managers(self, file_path: Path, content: str, lines: list[str]):
"""PERF-049: Check for file handles without context managers"""
for i, line in enumerate(lines, 1):
# Check for file open without 'with'
if re.search(r'^\s*\w+\s*=\s*open\s*\(', line):
if "# noqa" not in line:
self._add_violation(
rule_id="PERF-049",
rule_name="Context managers for resources",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="File opened without context manager",
context=line.strip()[:80],
suggestion="Use 'with open(...) as f:' to ensure cleanup",
)
def _check_string_concatenation(self, file_path: Path, content: str, lines: list[str]):
"""PERF-051: Check for inefficient string concatenation in loops"""
in_for_loop = False
for_indent = 0
for i, line in enumerate(lines, 1):
stripped = line.strip()
if re.search(r'for\s+\w+\s+in\s+', line):
in_for_loop = True
for_indent = len(line) - len(line.lstrip())
elif in_for_loop:
current_indent = len(line) - len(line.lstrip()) if line.strip() else for_indent + 4
if current_indent <= for_indent and stripped:
in_for_loop = False
elif re.search(r'\w+\s*\+=\s*["\']|str\s*\(', line):
if "# noqa" not in line:
self._add_violation(
rule_id="PERF-051",
rule_name="String concatenation efficiency",
severity=Severity.INFO,
file_path=file_path,
line_number=i,
message="String concatenation in loop",
context=line.strip()[:80],
suggestion="Use ''.join() or StringIO for many concatenations",
)
# =========================================================================
# Frontend Performance Checks
# =========================================================================
def _check_debounce(self, file_path: Path, content: str, lines: list[str]):
"""PERF-056: Check for search inputs without debounce"""
for i, line in enumerate(lines, 1):
if re.search(r'@(input|keyup)=".*search.*fetch', line, re.IGNORECASE):
if "debounce" not in content.lower():
self._add_violation(
rule_id="PERF-056",
rule_name="Debounce search inputs",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="Search input triggers API call without debounce",
context=line.strip()[:80],
suggestion="Add 300-500ms debounce to prevent excessive API calls",
)
def _check_polling_intervals(self, file_path: Path, content: str, lines: list[str]):
"""PERF-062: Check for too-frequent polling"""
for i, line in enumerate(lines, 1):
match = re.search(r'setInterval\s*\([^,]+,\s*(\d+)\s*\)', line)
if match:
interval = int(match.group(1))
if interval < 10000: # Less than 10 seconds
if "# real-time" not in line and "# noqa" not in line:
self._add_violation(
rule_id="PERF-062",
rule_name="Reasonable polling intervals",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message=f"Polling interval {interval}ms is very frequent",
context=line.strip()[:80],
suggestion="Use >= 10 second intervals for non-critical updates",
)
def _check_layout_thrashing(self, file_path: Path, content: str, lines: list[str]):
"""PERF-064: Check for layout thrashing patterns"""
for i, line in enumerate(lines, 1):
# Check for read then write patterns
if re.search(r'(offsetHeight|offsetWidth|clientHeight|clientWidth)', line):
if i < len(lines):
next_line = lines[i] if i < len(lines) else ""
if "style" in next_line:
self._add_violation(
rule_id="PERF-064",
rule_name="Avoid layout thrashing",
severity=Severity.INFO,
file_path=file_path,
line_number=i,
message="DOM read followed by write can cause layout thrashing",
context=line.strip()[:80],
suggestion="Batch DOM reads, then batch DOM writes",
)
def _check_image_lazy_loading(self, file_path: Path, content: str, lines: list[str]):
"""PERF-058: Check for images without lazy loading"""
for i, line in enumerate(lines, 1):
if re.search(r'<img\s+[^>]*src=', line):
if 'loading="lazy"' not in line and "x-intersect" not in line:
if "logo" not in line.lower() and "icon" not in line.lower():
self._add_violation(
rule_id="PERF-058",
rule_name="Image optimization",
severity=Severity.INFO,
file_path=file_path,
line_number=i,
message="Image without lazy loading",
context=line.strip()[:80],
suggestion='Add loading="lazy" for off-screen images',
)
def _check_script_loading(self, file_path: Path, content: str, lines: list[str]):
"""PERF-067: Check for script tags without defer/async"""
for i, line in enumerate(lines, 1):
if re.search(r'<script\s+[^>]*src=', line):
if "defer" not in line and "async" not in line:
if "alpine" not in line.lower() and "htmx" not in line.lower():
self._add_violation(
rule_id="PERF-067",
rule_name="Defer non-critical JavaScript",
severity=Severity.INFO,
file_path=file_path,
line_number=i,
message="Script tag without defer/async",
context=line.strip()[:80],
suggestion="Add defer for non-critical scripts",
)
def main():
parser = argparse.ArgumentParser(
description="Performance code validator",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("-f", "--file", type=Path, help="Validate a single file")
parser.add_argument("-d", "--folder", type=Path, help="Validate a directory")
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
parser.add_argument("--errors-only", action="store_true", help="Only show errors")
parser.add_argument("--json", action="store_true", help="JSON output")
args = parser.parse_args()
validator = PerformanceValidator(verbose=args.verbose)
if args.file:
validator.validate_file(args.file)
elif args.folder:
validator.validate_all(args.folder)
else:
validator.validate_all()
validator.output_results(json_output=args.json, errors_only=args.errors_only)
sys.exit(validator.get_exit_code())
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,718 @@
#!/usr/bin/env python3
"""
Security Validator
==================
Validates code against security rules defined in .security-rules/
This script checks for common security vulnerabilities:
- Hardcoded credentials and secrets
- SQL injection patterns
- Command injection risks
- XSS vulnerabilities
- Insecure cryptography
- Authentication weaknesses
- Data exposure risks
Usage:
python scripts/validate/validate_security.py # Check all files
python scripts/validate/validate_security.py -d app/api/ # Check specific directory
python scripts/validate/validate_security.py -f app/api/v1/auth.py # Check single file
python scripts/validate/validate_security.py -v # Verbose output
python scripts/validate/validate_security.py --json # JSON output
python scripts/validate/validate_security.py --errors-only # Only show errors
Options:
-f, --file PATH Validate a single file
-d, --folder PATH Validate all files in a directory (recursive)
-v, --verbose Show detailed output including context
--errors-only Only show errors, suppress warnings and info
--json Output results as JSON
"""
import argparse
import re
import sys
from pathlib import Path
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from base_validator import BaseValidator, Severity, ValidationResult
class SecurityValidator(BaseValidator):
"""Security-focused code validator"""
VALIDATOR_NAME = "Security Validator"
VALIDATOR_EMOJI = "🔒"
RULES_DIR_NAME = ".security-rules"
CONFIG_FILE_NAME = ".security-rules.yaml"
def validate_all(self, target_path: Path = None) -> ValidationResult:
"""Validate all files for security issues"""
print(f"\n{self.VALIDATOR_EMOJI} Starting security validation...\n")
target = target_path or self.project_root
# Validate Python files
self._validate_python_files(target)
# Validate JavaScript files
self._validate_javascript_files(target)
# Validate HTML templates
self._validate_template_files(target)
# Validate configuration files
self._validate_config_files(target)
return self.result
def _validate_python_files(self, target: Path):
"""Validate all Python files for security issues"""
print("🐍 Validating Python files...")
for py_file in target.rglob("*.py"):
if self._should_ignore_file(py_file):
continue
self.result.files_checked += 1
content = py_file.read_text()
lines = content.split("\n")
self._validate_python_security(py_file, content, lines)
def _validate_javascript_files(self, target: Path):
"""Validate all JavaScript files for security issues"""
print("🟨 Validating JavaScript files...")
for js_file in target.rglob("*.js"):
if self._should_ignore_file(js_file):
continue
self.result.files_checked += 1
content = js_file.read_text()
lines = content.split("\n")
self._validate_javascript_security(js_file, content, lines)
def _validate_template_files(self, target: Path):
"""Validate all HTML template files for security issues"""
print("📄 Validating template files...")
for html_file in target.rglob("*.html"):
if self._should_ignore_file(html_file):
continue
self.result.files_checked += 1
content = html_file.read_text()
lines = content.split("\n")
self._validate_template_security(html_file, content, lines)
def _validate_config_files(self, target: Path):
"""Validate configuration files for security issues"""
print("⚙️ Validating configuration files...")
config_patterns = ["*.yaml", "*.yml", "*.json", "*.toml", "*.ini", "*.env*"]
for pattern in config_patterns:
for config_file in target.rglob(pattern):
if self._should_ignore_file(config_file):
continue
if config_file.suffix in [".yaml", ".yml", ".json"]:
self.result.files_checked += 1
content = config_file.read_text()
lines = content.split("\n")
self._validate_config_security(config_file, content, lines)
def _validate_file_content(self, file_path: Path, content: str, lines: list[str]):
"""Validate file content based on file type"""
if file_path.suffix == ".py":
self._validate_python_security(file_path, content, lines)
elif file_path.suffix == ".js":
self._validate_javascript_security(file_path, content, lines)
elif file_path.suffix == ".html":
self._validate_template_security(file_path, content, lines)
elif file_path.suffix in [".yaml", ".yml", ".json"]:
self._validate_config_security(file_path, content, lines)
def _validate_python_security(self, file_path: Path, content: str, lines: list[str]):
"""Validate Python file for security issues"""
file_path_str = str(file_path)
# SEC-001: Hardcoded credentials
self._check_hardcoded_credentials(file_path, content, lines)
# SEC-011: SQL injection
self._check_sql_injection(file_path, content, lines)
# SEC-012: Command injection
self._check_command_injection(file_path, content, lines)
# SEC-013: Code execution
self._check_code_execution(file_path, content, lines)
# SEC-014: Path traversal
if "upload" in file_path_str.lower() or "file" in file_path_str.lower():
self._check_path_traversal(file_path, content, lines)
# SEC-020: Unsafe deserialization
self._check_unsafe_deserialization(file_path, content, lines)
# SEC-021: PII logging
self._check_pii_logging(file_path, content, lines)
# SEC-024: Error information leakage
self._check_error_leakage(file_path, content, lines)
# SEC-034: HTTPS enforcement
self._check_https_enforcement(file_path, content, lines)
# SEC-040: Timeout configuration
self._check_timeout_configuration(file_path, content, lines)
# SEC-041: Weak hashing
self._check_weak_hashing(file_path, content, lines)
# SEC-042: Insecure random
self._check_insecure_random(file_path, content, lines)
# SEC-043: Hardcoded encryption keys
self._check_hardcoded_keys(file_path, content, lines)
# SEC-047: Certificate verification
self._check_certificate_verification(file_path, content, lines)
# Auth file specific checks
if "auth" in file_path_str.lower():
self._check_jwt_expiry(file_path, content, lines)
def _validate_javascript_security(self, file_path: Path, content: str, lines: list[str]):
"""Validate JavaScript file for security issues"""
# SEC-022: Sensitive data in URLs
self._check_sensitive_url_params_js(file_path, content, lines)
# Check for eval usage
for i, line in enumerate(lines, 1):
if re.search(r'\beval\s*\(', line) and "//" not in line.split("eval")[0]:
self._add_violation(
rule_id="SEC-013",
rule_name="No code execution",
severity=Severity.ERROR,
file_path=file_path,
line_number=i,
message="eval() allows arbitrary code execution",
context=line.strip()[:80],
suggestion="Use JSON.parse() for JSON or other safe alternatives",
)
# Check for innerHTML with user input
for i, line in enumerate(lines, 1):
if re.search(r'\.innerHTML\s*=', line) and "//" not in line.split("innerHTML")[0]:
self._add_violation(
rule_id="SEC-015",
rule_name="XSS prevention",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="innerHTML can lead to XSS if used with untrusted input",
context=line.strip()[:80],
suggestion="Use textContent for text or sanitize HTML input",
)
def _validate_template_security(self, file_path: Path, content: str, lines: list[str]):
"""Validate HTML template file for security issues"""
# SEC-015: XSS via |safe filter
for i, line in enumerate(lines, 1):
if re.search(r'\|\s*safe', line) and 'sanitized' not in line.lower():
self._add_violation(
rule_id="SEC-015",
rule_name="XSS prevention in templates",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="|safe filter disables auto-escaping - ensure content is sanitized",
context=line.strip()[:80],
suggestion="Mark with {# sanitized #} comment if content is sanitized",
)
# Check for x-html with dynamic content
for i, line in enumerate(lines, 1):
if re.search(r'x-html="[^"]*\w', line) and "sanitized" not in line.lower():
self._add_violation(
rule_id="SEC-015",
rule_name="XSS prevention in templates",
severity=Severity.INFO,
file_path=file_path,
line_number=i,
message="x-html renders raw HTML - ensure content is safe",
context=line.strip()[:80],
suggestion="Use x-text for text content or sanitize HTML",
)
def _validate_config_security(self, file_path: Path, content: str, lines: list[str]):
"""Validate configuration file for security issues"""
# Check for hardcoded secrets in config
secret_patterns = [
(r'password\s*[=:]\s*["\'][^"\']{4,}["\']', "password"),
(r'secret\s*[=:]\s*["\'][^"\']{8,}["\']', "secret"),
(r'api_key\s*[=:]\s*["\'][A-Za-z0-9_-]{16,}["\']', "API key"),
(r'token\s*[=:]\s*["\'][A-Za-z0-9._-]{20,}["\']', "token"),
]
for i, line in enumerate(lines, 1):
# Skip comments
stripped = line.strip()
if stripped.startswith("#") or stripped.startswith("//"):
continue
for pattern, secret_type in secret_patterns:
if re.search(pattern, line, re.IGNORECASE):
# Check for environment variable references
if "${" in line or "os.getenv" in line or "environ" in line:
continue
self._add_violation(
rule_id="SEC-001",
rule_name="No hardcoded credentials",
severity=Severity.ERROR,
file_path=file_path,
line_number=i,
message=f"Possible hardcoded {secret_type} in configuration",
context=line.strip()[:60] + "...",
suggestion="Use environment variables for secrets",
)
# =========================================================================
# Specific Security Checks
# =========================================================================
def _check_hardcoded_credentials(self, file_path: Path, content: str, lines: list[str]):
"""SEC-001: Check for hardcoded credentials"""
patterns = [
(r'password\s*=\s*["\'][^"\']{4,}["\']', "password"),
(r'api_key\s*=\s*["\'][A-Za-z0-9_-]{16,}["\']', "API key"),
(r'secret_key\s*=\s*["\'][^"\']{8,}["\']', "secret key"),
(r'auth_token\s*=\s*["\'][A-Za-z0-9._-]{20,}["\']', "auth token"),
(r'AWS_SECRET.*=\s*["\'][^"\']+["\']', "AWS secret"),
(r'STRIPE_.*KEY.*=\s*["\'][^"\']+["\']', "Stripe key"),
]
exclude_patterns = [
"os.getenv", "os.environ", "settings.", '""', "''",
"# noqa", "# test", "password_hash", "example"
]
for i, line in enumerate(lines, 1):
for pattern, secret_type in patterns:
if re.search(pattern, line, re.IGNORECASE):
# Check exclusions
if any(exc in line for exc in exclude_patterns):
continue
self._add_violation(
rule_id="SEC-001",
rule_name="No hardcoded credentials",
severity=Severity.ERROR,
file_path=file_path,
line_number=i,
message=f"Possible hardcoded {secret_type}",
context=line.strip()[:60] + "...",
suggestion="Use environment variables or secret management",
)
def _check_sql_injection(self, file_path: Path, content: str, lines: list[str]):
"""SEC-011: Check for SQL injection vulnerabilities"""
patterns = [
r'execute\s*\(\s*f["\']',
r'execute\s*\([^)]*\s*\+\s*',
r'execute\s*\([^)]*%[^)]*%',
r'text\s*\(\s*f["\']',
r'\.raw\s*\(\s*f["\']',
]
for i, line in enumerate(lines, 1):
for pattern in patterns:
if re.search(pattern, line):
if "# noqa" in line or "# safe" in line:
continue
self._add_violation(
rule_id="SEC-011",
rule_name="No raw SQL queries",
severity=Severity.ERROR,
file_path=file_path,
line_number=i,
message="Possible SQL injection - use parameterized queries",
context=line.strip()[:80],
suggestion="Use SQLAlchemy ORM or parameterized queries with :param syntax",
)
def _check_command_injection(self, file_path: Path, content: str, lines: list[str]):
"""SEC-012: Check for command injection vulnerabilities"""
patterns = [
(r'subprocess.*shell\s*=\s*True', "shell=True in subprocess"),
(r'os\.system\s*\(', "os.system()"),
(r'os\.popen\s*\(', "os.popen()"),
]
for i, line in enumerate(lines, 1):
for pattern, issue in patterns:
if re.search(pattern, line):
if "# noqa" in line or "# safe" in line:
continue
self._add_violation(
rule_id="SEC-012",
rule_name="No shell command injection",
severity=Severity.ERROR,
file_path=file_path,
line_number=i,
message=f"{issue} allows command injection",
context=line.strip()[:80],
suggestion="Use subprocess with list arguments, shell=False",
)
def _check_code_execution(self, file_path: Path, content: str, lines: list[str]):
"""SEC-013: Check for code execution vulnerabilities"""
patterns = [
(r'eval\s*\([^)]*request', "eval with request data"),
(r'eval\s*\([^)]*input', "eval with user input"),
(r'exec\s*\([^)]*request', "exec with request data"),
(r'__import__\s*\([^)]*request', "__import__ with request data"),
]
for i, line in enumerate(lines, 1):
for pattern, issue in patterns:
if re.search(pattern, line, re.IGNORECASE):
self._add_violation(
rule_id="SEC-013",
rule_name="No code execution",
severity=Severity.ERROR,
file_path=file_path,
line_number=i,
message=f"{issue} allows arbitrary code execution",
context=line.strip()[:80],
suggestion="Never use eval/exec with user input",
)
def _check_path_traversal(self, file_path: Path, content: str, lines: list[str]):
"""SEC-014: Check for path traversal vulnerabilities"""
# Check if file has path operations with user input
has_secure_filename = "secure_filename" in content or "basename" in content
patterns = [
r'open\s*\([^)]*request',
r'open\s*\([^)]*\+',
r'Path\s*\([^)]*request',
]
for i, line in enumerate(lines, 1):
for pattern in patterns:
if re.search(pattern, line, re.IGNORECASE):
if has_secure_filename:
continue
self._add_violation(
rule_id="SEC-014",
rule_name="Path traversal prevention",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="Possible path traversal - validate file paths",
context=line.strip()[:80],
suggestion="Use secure_filename() and validate paths against allowed directories",
)
def _check_unsafe_deserialization(self, file_path: Path, content: str, lines: list[str]):
"""SEC-020: Check for unsafe deserialization"""
patterns = [
(r'pickle\.loads?\s*\(', "pickle deserialization"),
(r'yaml\.load\s*\([^,)]+\)(?!.*SafeLoader)', "yaml.load without SafeLoader"),
(r'marshal\.loads?\s*\(', "marshal deserialization"),
]
for i, line in enumerate(lines, 1):
for pattern, issue in patterns:
if re.search(pattern, line):
if "# noqa" in line:
continue
self._add_violation(
rule_id="SEC-020",
rule_name="Deserialization safety",
severity=Severity.ERROR,
file_path=file_path,
line_number=i,
message=f"Unsafe {issue} can lead to code execution",
context=line.strip()[:80],
suggestion="Use json.loads() or yaml.safe_load() instead",
)
def _check_pii_logging(self, file_path: Path, content: str, lines: list[str]):
"""SEC-021: Check for PII in logs"""
patterns = [
(r'log\w*\.[a-z]+\([^)]*password', "password in log"),
(r'log\w*\.[a-z]+\([^)]*credit_card', "credit card in log"),
(r'log\w*\.[a-z]+\([^)]*ssn', "SSN in log"),
(r'print\s*\([^)]*password', "password in print"),
]
exclude = ["password_hash", "password_reset", "password_changed", "# noqa"]
for i, line in enumerate(lines, 1):
for pattern, issue in patterns:
if re.search(pattern, line, re.IGNORECASE):
if any(exc in line for exc in exclude):
continue
self._add_violation(
rule_id="SEC-021",
rule_name="PII logging prevention",
severity=Severity.ERROR,
file_path=file_path,
line_number=i,
message=f"Possible {issue}",
context=line.strip()[:60] + "...",
suggestion="Never log sensitive data - redact or omit",
)
def _check_error_leakage(self, file_path: Path, content: str, lines: list[str]):
"""SEC-024: Check for error information leakage"""
patterns = [
r'traceback\.format_exc\(\).*detail',
r'traceback\.format_exc\(\).*response',
r'str\(e\).*HTTPException',
]
for i, line in enumerate(lines, 1):
for pattern in patterns:
if re.search(pattern, line):
if "logger" in line or "# noqa" in line:
continue
self._add_violation(
rule_id="SEC-024",
rule_name="Error message information leakage",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="Internal error details may be exposed to users",
context=line.strip()[:80],
suggestion="Log errors internally, return generic message to users",
)
def _check_https_enforcement(self, file_path: Path, content: str, lines: list[str]):
"""SEC-034: Check for HTTP instead of HTTPS"""
for i, line in enumerate(lines, 1):
if re.search(r'http://(?!localhost|127\.0\.0\.1|0\.0\.0\.0|\$)', line):
if "# noqa" in line or "example.com" in line or "schemas" in line:
continue
if "http://www.w3.org" in line:
continue
self._add_violation(
rule_id="SEC-034",
rule_name="HTTPS enforcement",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="HTTP URL found - use HTTPS for security",
context=line.strip()[:80],
suggestion="Replace http:// with https://",
)
def _check_timeout_configuration(self, file_path: Path, content: str, lines: list[str]):
"""SEC-040: Check for missing timeouts on external calls"""
# Check for requests/httpx calls without timeout
if "requests" in content or "httpx" in content or "aiohttp" in content:
has_timeout_import = "timeout" in content.lower()
patterns = [
r'requests\.(get|post|put|delete|patch)\s*\([^)]+\)(?!.*timeout)',
r'httpx\.(get|post|put|delete|patch)\s*\([^)]+\)(?!.*timeout)',
]
for i, line in enumerate(lines, 1):
for pattern in patterns:
if re.search(pattern, line) and "timeout" not in line:
self._add_violation(
rule_id="SEC-040",
rule_name="Timeout configuration",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="HTTP request without timeout - can hang indefinitely",
context=line.strip()[:80],
suggestion="Add timeout parameter: requests.get(url, timeout=30)",
)
def _check_weak_hashing(self, file_path: Path, content: str, lines: list[str]):
"""SEC-041: Check for weak hashing algorithms"""
patterns = [
(r'hashlib\.md5\s*\(', "MD5"),
(r'hashlib\.sha1\s*\(', "SHA1"),
(r'MD5\.new\s*\(', "MD5"),
(r'SHA\.new\s*\(', "SHA1"),
]
for i, line in enumerate(lines, 1):
for pattern, algo in patterns:
if re.search(pattern, line):
if "# noqa" in line or "# checksum" in line or "# file hash" in line:
continue
self._add_violation(
rule_id="SEC-041",
rule_name="Strong hashing algorithms",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message=f"{algo} is cryptographically weak",
context=line.strip()[:80],
suggestion="Use SHA-256 or stronger for security purposes",
)
def _check_insecure_random(self, file_path: Path, content: str, lines: list[str]):
"""SEC-042: Check for insecure random number generation"""
# Only check if file appears to deal with security
security_context = any(
word in content.lower()
for word in ["token", "secret", "key", "session", "csrf", "nonce", "salt"]
)
if not security_context:
return
patterns = [
r'random\.random\s*\(',
r'random\.randint\s*\(',
r'random\.choice\s*\(',
]
for i, line in enumerate(lines, 1):
for pattern in patterns:
if re.search(pattern, line):
if "# noqa" in line or "# not security" in line:
continue
self._add_violation(
rule_id="SEC-042",
rule_name="Secure random generation",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="random module is not cryptographically secure",
context=line.strip()[:80],
suggestion="Use secrets module for security-sensitive randomness",
)
def _check_hardcoded_keys(self, file_path: Path, content: str, lines: list[str]):
"""SEC-043: Check for hardcoded encryption keys"""
patterns = [
r'ENCRYPTION_KEY\s*=\s*["\'][^"\']+["\']',
r'SECRET_KEY\s*=\s*["\'][A-Za-z0-9+/=]{16,}["\']',
r'AES_KEY\s*=\s*["\']',
r'PRIVATE_KEY\s*=\s*["\']-----BEGIN',
]
exclude = ["os.getenv", "os.environ", "settings.", '""', "# test"]
for i, line in enumerate(lines, 1):
for pattern in patterns:
if re.search(pattern, line):
if any(exc in line for exc in exclude):
continue
self._add_violation(
rule_id="SEC-043",
rule_name="No hardcoded encryption keys",
severity=Severity.ERROR,
file_path=file_path,
line_number=i,
message="Hardcoded encryption key found",
context=line.strip()[:50] + "...",
suggestion="Use environment variables for encryption keys",
)
def _check_certificate_verification(self, file_path: Path, content: str, lines: list[str]):
"""SEC-047: Check for disabled certificate verification"""
patterns = [
(r'verify\s*=\s*False', "SSL verification disabled"),
(r'CERT_NONE', "Certificate verification disabled"),
(r'check_hostname\s*=\s*False', "Hostname verification disabled"),
]
for i, line in enumerate(lines, 1):
for pattern, issue in patterns:
if re.search(pattern, line):
if "# noqa" in line or "# test" in line or "DEBUG" in line:
continue
self._add_violation(
rule_id="SEC-047",
rule_name="Certificate verification",
severity=Severity.ERROR,
file_path=file_path,
line_number=i,
message=f"{issue} - vulnerable to MITM attacks",
context=line.strip()[:80],
suggestion="Always verify SSL certificates in production",
)
def _check_jwt_expiry(self, file_path: Path, content: str, lines: list[str]):
"""SEC-002: Check for JWT tokens without expiry"""
if "jwt.encode" in content and "exp" not in content:
# Find the jwt.encode line
for i, line in enumerate(lines, 1):
if "jwt.encode" in line:
self._add_violation(
rule_id="SEC-002",
rule_name="JWT expiry enforcement",
severity=Severity.WARNING,
file_path=file_path,
line_number=i,
message="JWT token may not have expiration claim",
context=line.strip()[:80],
suggestion="Include 'exp' claim with appropriate expiration",
)
break
def _check_sensitive_url_params_js(self, file_path: Path, content: str, lines: list[str]):
"""SEC-022: Check for sensitive data in URLs (JavaScript)"""
patterns = [
r'\?password=',
r'&password=',
r'\?token=(?!type)',
r'&token=(?!type)',
r'\?api_key=',
r'&api_key=',
]
for i, line in enumerate(lines, 1):
for pattern in patterns:
if re.search(pattern, line):
self._add_violation(
rule_id="SEC-022",
rule_name="Sensitive data in URLs",
severity=Severity.ERROR,
file_path=file_path,
line_number=i,
message="Sensitive data in URL query parameters",
context=line.strip()[:80],
suggestion="Send sensitive data in request body or headers",
)
def main():
parser = argparse.ArgumentParser(
description="Security code validator",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("-f", "--file", type=Path, help="Validate a single file")
parser.add_argument("-d", "--folder", type=Path, help="Validate a directory")
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
parser.add_argument("--errors-only", action="store_true", help="Only show errors")
parser.add_argument("--json", action="store_true", help="JSON output")
args = parser.parse_args()
validator = SecurityValidator(verbose=args.verbose)
if args.file:
validator.validate_file(args.file)
elif args.folder:
validator.validate_all(args.folder)
else:
validator.validate_all()
validator.output_results(json_output=args.json, errors_only=args.errors_only)
sys.exit(validator.get_exit_code())
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,36 @@
# scripts/validators/__init__.py
"""
Architecture Validators Package
===============================
This package contains domain-specific validators for the architecture validation system.
Each validator module handles a specific category of rules.
Modules:
- base: Base classes and helpers (Severity, Violation, ValidationResult)
- api_validator: API endpoint rules (API-*)
- service_validator: Service layer rules (SVC-*)
- model_validator: Model rules (MDL-*)
- exception_validator: Exception handling rules (EXC-*)
- naming_validator: Naming convention rules (NAM-*)
- auth_validator: Auth and multi-tenancy rules (AUTH-*, MT-*)
- middleware_validator: Middleware rules (MDW-*)
- frontend_validator: Frontend rules (JS-*, TPL-*, FE-*, CSS-*)
- language_validator: Language/i18n rules (LANG-*)
"""
from .base import (
BaseValidator,
FileResult,
Severity,
ValidationResult,
Violation,
)
__all__ = [
"Severity",
"Violation",
"FileResult",
"ValidationResult",
"BaseValidator",
]

View File

@@ -0,0 +1,314 @@
# scripts/validators/base.py
"""
Base classes and helpers for architecture validation.
This module contains:
- Severity: Enum for validation severity levels
- Violation: Dataclass for representing rule violations
- FileResult: Dataclass for single file validation results
- ValidationResult: Dataclass for overall validation results
- BaseValidator: Base class for domain-specific validators
"""
import json
import re
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Any
class Severity(Enum):
"""Validation severity levels"""
ERROR = "error"
WARNING = "warning"
INFO = "info"
@dataclass
class Violation:
"""Represents an architectural rule violation"""
rule_id: str
rule_name: str
severity: Severity
file_path: Path
line_number: int
message: str
context: str = ""
suggestion: str = ""
@dataclass
class FileResult:
"""Results for a single file validation"""
file_path: Path
errors: int = 0
warnings: int = 0
@property
def passed(self) -> bool:
return self.errors == 0
@property
def status(self) -> str:
if self.errors > 0:
return "FAILED"
if self.warnings > 0:
return "PASSED*"
return "PASSED"
@property
def status_icon(self) -> str:
if self.errors > 0:
return ""
if self.warnings > 0:
return "⚠️"
return ""
@dataclass
class ValidationResult:
"""Results of architecture validation"""
violations: list[Violation] = field(default_factory=list)
files_checked: int = 0
rules_applied: int = 0
file_results: list[FileResult] = field(default_factory=list)
def has_errors(self) -> bool:
"""Check if there are any error-level violations"""
return any(v.severity == Severity.ERROR for v in self.violations)
def has_warnings(self) -> bool:
"""Check if there are any warning-level violations"""
return any(v.severity == Severity.WARNING for v in self.violations)
class BaseValidator:
"""
Base class for domain-specific validators.
Provides common functionality for all validators including:
- Violation tracking
- File filtering
- Rule lookup
- Common pattern matching utilities
"""
def __init__(
self,
config: dict[str, Any],
result: ValidationResult,
project_root: Path,
verbose: bool = False,
):
"""
Initialize validator with shared state.
Args:
config: Loaded architecture rules configuration
result: Shared ValidationResult for tracking violations
project_root: Root path of the project
verbose: Whether to show verbose output
"""
self.config = config
self.result = result
self.project_root = project_root
self.verbose = verbose
def validate(self, target_path: Path) -> None:
"""
Run validation on target path.
Must be implemented by subclasses.
Args:
target_path: Path to validate (file or directory)
"""
raise NotImplementedError("Subclasses must implement validate()")
def _add_violation(
self,
rule_id: str,
rule_name: str,
severity: Severity,
file_path: Path,
line_number: int,
message: str,
context: str = "",
suggestion: str = "",
) -> None:
"""Add a violation to results"""
violation = Violation(
rule_id=rule_id,
rule_name=rule_name,
severity=severity,
file_path=file_path,
line_number=line_number,
message=message,
context=context,
suggestion=suggestion,
)
self.result.violations.append(violation)
def _should_ignore_file(self, file_path: Path) -> bool:
"""Check if file should be ignored"""
ignore_patterns = self.config.get("ignore", {}).get("files", [])
# Convert to string for easier matching
file_path_str = str(file_path)
for pattern in ignore_patterns:
# Check if any part of the path matches the pattern
if file_path.match(pattern):
return True
# Also check if pattern appears in the path (for .venv, venv, etc.)
if "/.venv/" in file_path_str or file_path_str.startswith(".venv/"):
return True
if "/venv/" in file_path_str or file_path_str.startswith("venv/"):
return True
return False
def _get_rule(self, rule_id: str) -> dict[str, Any] | None:
"""Get rule configuration by ID"""
# Look in different rule categories
for category in [
"api_endpoint_rules",
"service_layer_rules",
"model_rules",
"exception_rules",
"naming_rules",
"auth_rules",
"middleware_rules",
"javascript_rules",
"template_rules",
"frontend_component_rules",
"styling_rules",
"language_rules",
"multi_tenancy_rules",
"code_quality_rules",
]:
rules = self.config.get(category, [])
for rule in rules:
if rule.get("id") == rule_id:
return rule
return None
def _get_files(self, target_path: Path, pattern: str) -> list[Path]:
"""Get files matching a glob pattern, excluding ignored files"""
files = list(target_path.glob(pattern))
return [f for f in files if not self._should_ignore_file(f)]
def _find_decorators(self, content: str) -> list[tuple[int, str, str]]:
"""
Find all function decorators and their associated functions.
Returns list of (line_number, decorator, function_name) tuples.
"""
results = []
lines = content.split("\n")
i = 0
while i < len(lines):
line = lines[i].strip()
if line.startswith("@"):
decorator = line
# Look for the function definition
for j in range(i + 1, min(i + 10, len(lines))):
next_line = lines[j].strip()
if next_line.startswith("def ") or next_line.startswith(
"async def "
):
# Extract function name
match = re.search(r"(?:async\s+)?def\s+(\w+)", next_line)
if match:
func_name = match.group(1)
results.append((i + 1, decorator, func_name))
break
if next_line.startswith("@"):
# Multiple decorators - continue to next
continue
if next_line and not next_line.startswith("#"):
# Non-decorator, non-comment line - stop looking
break
i += 1
return results
def _check_pattern_in_lines(
self,
file_path: Path,
lines: list[str],
pattern: str,
rule_id: str,
rule_name: str,
severity: Severity,
message: str,
suggestion: str = "",
exclude_comments: bool = True,
exclude_patterns: list[str] = None,
) -> None:
"""
Check for pattern violations in file lines.
Args:
file_path: Path to the file
lines: File content split by lines
pattern: Regex pattern to search for
rule_id: Rule identifier
rule_name: Human-readable rule name
severity: Violation severity
message: Violation message
suggestion: Suggested fix
exclude_comments: Skip lines that are comments
exclude_patterns: Additional patterns that mark lines to skip
"""
exclude_patterns = exclude_patterns or []
for i, line in enumerate(lines, 1):
stripped = line.strip()
# Skip comments if requested
if exclude_comments and stripped.startswith("#"):
continue
# Check exclusion patterns
skip = False
for exc in exclude_patterns:
if exc in line:
skip = True
break
if skip:
continue
# Check for pattern
if re.search(pattern, line):
self._add_violation(
rule_id=rule_id,
rule_name=rule_name,
severity=severity,
file_path=file_path,
line_number=i,
message=message,
context=stripped[:80],
suggestion=suggestion,
)
def _is_valid_json(self, file_path: Path) -> tuple[bool, str]:
"""
Check if a file contains valid JSON.
Returns (is_valid, error_message) tuple.
"""
try:
with open(file_path, encoding="utf-8") as f:
json.load(f)
return True, ""
except json.JSONDecodeError as e:
return False, f"Line {e.lineno}: {e.msg}"
except Exception as e:
return False, str(e)

View File

@@ -0,0 +1,156 @@
#!/usr/bin/env python3
"""
Verify Critical Imports
========================
Checks that critical imports (re-exports) haven't been removed by linters.
This script verifies that essential import statements exist in key files,
preventing issues where tools like Ruff might remove imports that appear
unused but are actually critical for the application structure.
"""
import sys
from pathlib import Path
# Define critical imports that must exist
# Format: {file_path: [(import_line, description)]}
CRITICAL_IMPORTS: dict[str, list[tuple[str, str]]] = {
"models/database/base.py": [
("from app.core.database import Base", "Re-export Base for all models"),
],
"models/__init__.py": [
("from .database.base import Base", "Export Base for Alembic and models"),
],
"models/database/__init__.py": [
("from .base import Base", "Export Base from database package"),
],
"app/core/database.py": [
(
"from sqlalchemy.ext.declarative import declarative_base",
"SQLAlchemy Base declaration",
),
# Note: Might also use sqlalchemy.orm declarative_base in newer versions
],
}
class ImportVerifier:
"""Verifies critical imports exist in codebase"""
def __init__(self, project_root: Path):
self.project_root = project_root
self.issues: list[str] = []
def verify_all(self) -> bool:
"""Verify all critical imports"""
print("🔍 Verifying critical imports...\n")
all_good = True
for file_path, imports in CRITICAL_IMPORTS.items():
if not self.verify_file(file_path, imports):
all_good = False
return all_good
def verify_file(
self, file_path: str, required_imports: list[tuple[str, str]]
) -> bool:
"""Verify imports in a single file"""
full_path = self.project_root / file_path
if not full_path.exists():
self.issues.append(f"❌ File not found: {file_path}")
print(f"{file_path}: File not found")
return False
content = full_path.read_text()
file_ok = True
for import_line, description in required_imports:
# Check for exact import or variations
if import_line in content:
print(f"{file_path}: {import_line}")
else:
# Check for alternative import formats
alternatives = self._get_import_alternatives(import_line)
found = any(alt in content for alt in alternatives)
if found:
print(f"{file_path}: {import_line} (alternative format)")
else:
self.issues.append(
f"{file_path}: Missing critical import\n"
f" Expected: {import_line}\n"
f" Purpose: {description}"
)
print(f"{file_path}: Missing {import_line}")
file_ok = False
print()
return file_ok
def _get_import_alternatives(self, import_line: str) -> list[str]:
"""Get alternative formats for an import"""
alternatives = [import_line]
# Handle 'from x import y' vs 'from x import (y)'
if "from" in import_line and "import" in import_line:
parts = import_line.split("import")
if len(parts) == 2:
from_part = parts[0].strip()
import_part = parts[1].strip()
# Add parenthesized version
alternatives.append(f"{from_part} import ({import_part})")
# Add version with 'as' clause
alternatives.append(f"{import_line} as")
# Handle declarative_base alternatives (sqlalchemy changes)
if "declarative_base" in import_line:
# Old style
alternatives.append(
"from sqlalchemy.ext.declarative import declarative_base"
)
# New style (SQLAlchemy 1.4+)
alternatives.append("from sqlalchemy.orm import declarative_base")
return alternatives
def print_summary(self):
"""Print summary of verification"""
print("\n" + "=" * 80)
print("📊 CRITICAL IMPORTS VERIFICATION SUMMARY")
print("=" * 80)
if not self.issues:
print("\n✅ All critical imports verified successfully!")
print("\nAll re-export patterns are intact.")
else:
print(f"\n❌ Found {len(self.issues)} issue(s):\n")
for issue in self.issues:
print(issue)
print()
print("💡 RESOLUTION:")
print(" 1. Check if imports were removed by linter (Ruff)")
print(" 2. Add missing imports back to the files")
print(" 3. Update pyproject.toml to ignore F401 for these files")
print(" 4. Run this script again to verify")
print("=" * 80)
def main():
"""Main entry point"""
project_root = Path(__file__).parent.parent
verifier = ImportVerifier(project_root)
success = verifier.verify_all()
verifier.print_summary()
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()