refactor(scripts): reorganize scripts/ into seed/ and validate/ subfolders
Move 9 init/seed scripts into scripts/seed/ and 7 validation scripts (+ validators/ subfolder) into scripts/validate/ to reduce clutter in the root scripts/ directory. Update all references across Makefile, CI/CD configs, pre-commit hooks, docs (~40 files), and Python imports. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
290
scripts/validate/base_validator.py
Executable file
290
scripts/validate/base_validator.py
Executable file
@@ -0,0 +1,290 @@
|
||||
"""
|
||||
Base Validator Class
|
||||
|
||||
Shared functionality for all validators.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
class Severity(str, Enum):
|
||||
"""Severity levels for validation findings."""
|
||||
ERROR = "error"
|
||||
WARNING = "warning"
|
||||
INFO = "info"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Violation:
|
||||
"""A single validation violation."""
|
||||
rule_id: str
|
||||
message: str
|
||||
severity: Severity
|
||||
file_path: str = ""
|
||||
line: int = 0
|
||||
suggestion: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""Result of a validation run."""
|
||||
violations: list[Violation] = field(default_factory=list)
|
||||
files_checked: int = 0
|
||||
|
||||
def has_errors(self) -> bool:
|
||||
"""Check if there are any error-level violations."""
|
||||
return any(v.severity == Severity.ERROR for v in self.violations)
|
||||
|
||||
def error_count(self) -> int:
|
||||
"""Count error-level violations."""
|
||||
return sum(1 for v in self.violations if v.severity == Severity.ERROR)
|
||||
|
||||
def warning_count(self) -> int:
|
||||
"""Count warning-level violations."""
|
||||
return sum(1 for v in self.violations if v.severity == Severity.WARNING)
|
||||
|
||||
def info_count(self) -> int:
|
||||
"""Count info-level violations."""
|
||||
return sum(1 for v in self.violations if v.severity == Severity.INFO)
|
||||
|
||||
|
||||
class BaseValidator(ABC):
|
||||
"""Base class for architecture, security, and performance validators."""
|
||||
|
||||
# Directories/patterns to ignore by default
|
||||
IGNORE_PATTERNS = [
|
||||
".venv", "venv", "node_modules", "__pycache__", ".git",
|
||||
".pytest_cache", ".mypy_cache", "dist", "build", "*.egg-info",
|
||||
"migrations", "alembic/versions", ".tox", "htmlcov",
|
||||
]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
rules_dir: str = "",
|
||||
project_root: Path | None = None,
|
||||
verbose: bool = False,
|
||||
):
|
||||
self.rules_dir = rules_dir
|
||||
self.project_root = project_root or Path.cwd()
|
||||
self.verbose = verbose
|
||||
self.rules: list[dict[str, Any]] = []
|
||||
self.errors: list[dict[str, Any]] = []
|
||||
self.warnings: list[dict[str, Any]] = []
|
||||
self.result = ValidationResult()
|
||||
|
||||
def load_rules(self) -> None:
|
||||
"""Load rules from YAML files."""
|
||||
rules_path = self.project_root / self.rules_dir
|
||||
if not rules_path.exists():
|
||||
print(f"Rules directory not found: {rules_path}")
|
||||
return
|
||||
|
||||
for rule_file in rules_path.glob("*.yaml"):
|
||||
if rule_file.name.startswith("_"):
|
||||
continue # Skip main config
|
||||
|
||||
with open(rule_file) as f:
|
||||
data = yaml.safe_load(f)
|
||||
if data and "rules" in data:
|
||||
self.rules.extend(data["rules"])
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""Run validation. Returns True if passed.
|
||||
|
||||
Subclasses should implement validate_all() instead.
|
||||
"""
|
||||
result = self.validate_all()
|
||||
return not result.has_errors() if hasattr(result, 'has_errors') else True
|
||||
|
||||
def validate_all(self, target_path: Path | None = None) -> ValidationResult:
|
||||
"""Run all validations. Override in subclasses."""
|
||||
return ValidationResult()
|
||||
|
||||
def add_error(
|
||||
self, rule_id: str, message: str, file: str = "", line: int = 0
|
||||
) -> None:
|
||||
"""Add an error."""
|
||||
self.errors.append(
|
||||
{
|
||||
"rule_id": rule_id,
|
||||
"message": message,
|
||||
"file": file,
|
||||
"line": line,
|
||||
"severity": "error",
|
||||
}
|
||||
)
|
||||
|
||||
def add_warning(
|
||||
self, rule_id: str, message: str, file: str = "", line: int = 0
|
||||
) -> None:
|
||||
"""Add a warning."""
|
||||
self.warnings.append(
|
||||
{
|
||||
"rule_id": rule_id,
|
||||
"message": message,
|
||||
"file": file,
|
||||
"line": line,
|
||||
"severity": "warning",
|
||||
}
|
||||
)
|
||||
|
||||
def add_info(
|
||||
self, rule_id: str, message: str, file: str = "", line: int = 0
|
||||
) -> None:
|
||||
"""Add an informational note."""
|
||||
self.warnings.append(
|
||||
{
|
||||
"rule_id": rule_id,
|
||||
"message": message,
|
||||
"file": file,
|
||||
"line": line,
|
||||
"severity": "info",
|
||||
}
|
||||
)
|
||||
|
||||
def print_results(self) -> None:
|
||||
"""Print validation results."""
|
||||
if not self.errors and not self.warnings:
|
||||
print(f"✅ All {self.rules_dir} rules passed!")
|
||||
return
|
||||
|
||||
if self.errors:
|
||||
print(f"\n❌ {len(self.errors)} errors found:")
|
||||
for error in self.errors:
|
||||
print(f" [{error['rule_id']}] {error['message']}")
|
||||
if error["file"]:
|
||||
print(f" File: {error['file']}:{error['line']}")
|
||||
|
||||
if self.warnings:
|
||||
print(f"\n⚠️ {len(self.warnings)} warnings:")
|
||||
for warning in self.warnings:
|
||||
print(f" [{warning['rule_id']}] {warning['message']}")
|
||||
if warning["file"]:
|
||||
print(f" File: {warning['file']}:{warning['line']}")
|
||||
|
||||
def run(self) -> int:
|
||||
"""Run validation and return exit code."""
|
||||
self.load_rules()
|
||||
passed = self.validate()
|
||||
self.print_results()
|
||||
return 0 if passed else 1
|
||||
|
||||
def _should_ignore_file(self, file_path: Path) -> bool:
|
||||
"""Check if a file should be ignored based on patterns."""
|
||||
path_str = str(file_path)
|
||||
for pattern in self.IGNORE_PATTERNS:
|
||||
if pattern in path_str:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _add_violation(
|
||||
self,
|
||||
rule_id: str,
|
||||
rule_name: str,
|
||||
severity: Severity,
|
||||
file_path: Path,
|
||||
line_number: int,
|
||||
message: str,
|
||||
context: str = "",
|
||||
suggestion: str = "",
|
||||
) -> None:
|
||||
"""Add a violation to the result."""
|
||||
violation = Violation(
|
||||
rule_id=rule_id,
|
||||
message=f"{rule_name}: {message}",
|
||||
severity=severity,
|
||||
file_path=str(file_path),
|
||||
line=line_number,
|
||||
suggestion=suggestion,
|
||||
)
|
||||
self.result.violations.append(violation)
|
||||
|
||||
if self.verbose and context:
|
||||
print(f" [{rule_id}] {file_path}:{line_number}")
|
||||
print(f" {message}")
|
||||
print(f" Context: {context}")
|
||||
|
||||
def validate_file(self, file_path: Path) -> ValidationResult:
|
||||
"""Validate a single file."""
|
||||
if not file_path.exists():
|
||||
print(f"File not found: {file_path}")
|
||||
return self.result
|
||||
|
||||
self.result.files_checked = 1
|
||||
content = file_path.read_text()
|
||||
lines = content.split("\n")
|
||||
self._validate_file_content(file_path, content, lines)
|
||||
return self.result
|
||||
|
||||
def _validate_file_content(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""Validate file content. Override in subclasses."""
|
||||
pass
|
||||
|
||||
def output_results(self, json_output: bool = False, errors_only: bool = False) -> None:
|
||||
"""Output validation results."""
|
||||
if json_output:
|
||||
import json
|
||||
output = {
|
||||
"files_checked": self.result.files_checked,
|
||||
"violations": [
|
||||
{
|
||||
"rule_id": v.rule_id,
|
||||
"message": v.message,
|
||||
"severity": v.severity.value,
|
||||
"file": v.file_path,
|
||||
"line": v.line,
|
||||
"suggestion": v.suggestion,
|
||||
}
|
||||
for v in self.result.violations
|
||||
if not errors_only or v.severity == Severity.ERROR
|
||||
],
|
||||
}
|
||||
print(json.dumps(output, indent=2))
|
||||
else:
|
||||
self._print_violations(errors_only)
|
||||
|
||||
def _print_violations(self, errors_only: bool = False) -> None:
|
||||
"""Print violations in human-readable format."""
|
||||
violations = self.result.violations
|
||||
if errors_only:
|
||||
violations = [v for v in violations if v.severity == Severity.ERROR]
|
||||
|
||||
if not violations:
|
||||
print(f"\n✅ No issues found! ({self.result.files_checked} files checked)")
|
||||
return
|
||||
|
||||
errors = [v for v in violations if v.severity == Severity.ERROR]
|
||||
warnings = [v for v in violations if v.severity == Severity.WARNING]
|
||||
info = [v for v in violations if v.severity == Severity.INFO]
|
||||
|
||||
if errors:
|
||||
print(f"\n❌ {len(errors)} errors:")
|
||||
for v in errors:
|
||||
print(f" [{v.rule_id}] {v.file_path}:{v.line}")
|
||||
print(f" {v.message}")
|
||||
if v.suggestion:
|
||||
print(f" 💡 {v.suggestion}")
|
||||
|
||||
if warnings and not errors_only:
|
||||
print(f"\n⚠️ {len(warnings)} warnings:")
|
||||
for v in warnings:
|
||||
print(f" [{v.rule_id}] {v.file_path}:{v.line}")
|
||||
print(f" {v.message}")
|
||||
|
||||
if info and not errors_only:
|
||||
print(f"\nℹ️ {len(info)} info:")
|
||||
for v in info:
|
||||
print(f" [{v.rule_id}] {v.file_path}:{v.line}")
|
||||
print(f" {v.message}")
|
||||
|
||||
print(f"\n📊 Summary: {len(errors)} errors, {len(warnings)} warnings, {len(info)} info")
|
||||
|
||||
def get_exit_code(self) -> int:
|
||||
"""Get exit code based on validation results."""
|
||||
return 1 if self.result.has_errors() else 0
|
||||
246
scripts/validate/validate_all.py
Executable file
246
scripts/validate/validate_all.py
Executable file
@@ -0,0 +1,246 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unified Code Validator
|
||||
======================
|
||||
Runs all validation scripts (architecture, security, performance, audit) in sequence.
|
||||
|
||||
This provides a single entry point for comprehensive code validation,
|
||||
useful for CI/CD pipelines and pre-commit hooks.
|
||||
|
||||
Usage:
|
||||
python scripts/validate/validate_all.py # Run all validators
|
||||
python scripts/validate/validate_all.py --security # Run only security validator
|
||||
python scripts/validate/validate_all.py --performance # Run only performance validator
|
||||
python scripts/validate/validate_all.py --architecture # Run only architecture validator
|
||||
python scripts/validate/validate_all.py --audit # Run only audit validator
|
||||
python scripts/validate/validate_all.py -v # Verbose output
|
||||
python scripts/validate/validate_all.py --fail-fast # Stop on first failure
|
||||
python scripts/validate/validate_all.py --json # JSON output
|
||||
|
||||
Options:
|
||||
--architecture Run architecture validator
|
||||
--security Run security validator
|
||||
--performance Run performance validator
|
||||
--audit Run audit validator
|
||||
--fail-fast Stop on first validator failure
|
||||
-v, --verbose Show detailed output
|
||||
--errors-only Only show errors
|
||||
--json Output results as JSON
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
|
||||
def run_architecture_validator(verbose: bool = False) -> tuple[int, dict]:
|
||||
"""Run the architecture validator"""
|
||||
try:
|
||||
# Import dynamically to avoid circular imports
|
||||
from validate_architecture import ArchitectureValidator
|
||||
|
||||
config_path = Path.cwd() / ".architecture-rules.yaml"
|
||||
validator = ArchitectureValidator(config_path=config_path, verbose=verbose)
|
||||
result = validator.validate_all()
|
||||
|
||||
return (
|
||||
1 if result.has_errors() else 0,
|
||||
{
|
||||
"name": "Architecture",
|
||||
"files_checked": result.files_checked,
|
||||
"errors": sum(1 for v in result.violations if v.severity.value == "error"),
|
||||
"warnings": sum(1 for v in result.violations if v.severity.value == "warning"),
|
||||
"info": sum(1 for v in result.violations if v.severity.value == "info"),
|
||||
}
|
||||
)
|
||||
except ImportError as e:
|
||||
print(f"⚠️ Architecture validator not available: {e}")
|
||||
return 0, {"name": "Architecture", "skipped": True}
|
||||
except Exception as e:
|
||||
print(f"❌ Architecture validator failed: {e}")
|
||||
return 1, {"name": "Architecture", "error": str(e)}
|
||||
|
||||
|
||||
def run_security_validator(verbose: bool = False) -> tuple[int, dict]:
|
||||
"""Run the security validator"""
|
||||
try:
|
||||
from validate_security import SecurityValidator
|
||||
|
||||
validator = SecurityValidator(verbose=verbose)
|
||||
result = validator.validate_all()
|
||||
|
||||
return (
|
||||
1 if result.has_errors() else 0,
|
||||
{
|
||||
"name": "Security",
|
||||
"files_checked": result.files_checked,
|
||||
"errors": result.error_count(),
|
||||
"warnings": result.warning_count(),
|
||||
"info": result.info_count(),
|
||||
}
|
||||
)
|
||||
except ImportError as e:
|
||||
print(f"⚠️ Security validator not available: {e}")
|
||||
return 0, {"name": "Security", "skipped": True}
|
||||
except Exception as e:
|
||||
print(f"❌ Security validator failed: {e}")
|
||||
return 1, {"name": "Security", "error": str(e)}
|
||||
|
||||
|
||||
def run_performance_validator(verbose: bool = False) -> tuple[int, dict]:
|
||||
"""Run the performance validator"""
|
||||
try:
|
||||
from validate_performance import PerformanceValidator
|
||||
|
||||
validator = PerformanceValidator(verbose=verbose)
|
||||
result = validator.validate_all()
|
||||
|
||||
return (
|
||||
1 if result.has_errors() else 0,
|
||||
{
|
||||
"name": "Performance",
|
||||
"files_checked": result.files_checked,
|
||||
"errors": result.error_count(),
|
||||
"warnings": result.warning_count(),
|
||||
"info": result.info_count(),
|
||||
}
|
||||
)
|
||||
except ImportError as e:
|
||||
print(f"⚠️ Performance validator not available: {e}")
|
||||
return 0, {"name": "Performance", "skipped": True}
|
||||
except Exception as e:
|
||||
print(f"❌ Performance validator failed: {e}")
|
||||
return 1, {"name": "Performance", "error": str(e)}
|
||||
|
||||
|
||||
def run_audit_validator(verbose: bool = False) -> tuple[int, dict]:
|
||||
"""Run the audit validator"""
|
||||
try:
|
||||
from validate_audit import AuditValidator
|
||||
|
||||
validator = AuditValidator()
|
||||
has_errors = not validator.validate()
|
||||
|
||||
return (
|
||||
1 if has_errors else 0,
|
||||
{
|
||||
"name": "Audit",
|
||||
"files_checked": len(validator.files_checked) if hasattr(validator, 'files_checked') else 0,
|
||||
"errors": len(validator.errors),
|
||||
"warnings": len(validator.warnings),
|
||||
"info": len(validator.info) if hasattr(validator, 'info') else 0,
|
||||
}
|
||||
)
|
||||
except ImportError as e:
|
||||
print(f"⚠️ Audit validator not available: {e}")
|
||||
return 0, {"name": "Audit", "skipped": True}
|
||||
except Exception as e:
|
||||
print(f"❌ Audit validator failed: {e}")
|
||||
return 1, {"name": "Audit", "error": str(e)}
|
||||
|
||||
|
||||
def print_summary(results: list[dict], json_output: bool = False):
|
||||
"""Print validation summary"""
|
||||
if json_output:
|
||||
print(json.dumps({"validators": results}, indent=2))
|
||||
return
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("📊 UNIFIED VALIDATION SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
total_errors = 0
|
||||
total_warnings = 0
|
||||
total_info = 0
|
||||
|
||||
for result in results:
|
||||
if result.get("skipped"):
|
||||
print(f"\n⏭️ {result['name']}: Skipped")
|
||||
elif result.get("error"):
|
||||
print(f"\n❌ {result['name']}: Error - {result['error']}")
|
||||
else:
|
||||
errors = result.get("errors", 0)
|
||||
warnings = result.get("warnings", 0)
|
||||
info = result.get("info", 0)
|
||||
total_errors += errors
|
||||
total_warnings += warnings
|
||||
total_info += info
|
||||
|
||||
status = "✅" if errors == 0 else "❌"
|
||||
print(f"\n{status} {result['name']}:")
|
||||
print(f" Files: {result.get('files_checked', 0)}")
|
||||
print(f" Errors: {errors}, Warnings: {warnings}, Info: {info}")
|
||||
|
||||
print("\n" + "-" * 80)
|
||||
print(f"TOTAL: {total_errors} errors, {total_warnings} warnings, {total_info} info")
|
||||
print("=" * 80)
|
||||
|
||||
if total_errors > 0:
|
||||
print("❌ VALIDATION FAILED")
|
||||
elif total_warnings > 0:
|
||||
print(f"⚠️ VALIDATION PASSED WITH {total_warnings} WARNING(S)")
|
||||
else:
|
||||
print("✅ VALIDATION PASSED")
|
||||
print("=" * 80)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Unified code validator - runs architecture, security, performance, and audit checks",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument("--architecture", action="store_true", help="Run architecture validator")
|
||||
parser.add_argument("--security", action="store_true", help="Run security validator")
|
||||
parser.add_argument("--performance", action="store_true", help="Run performance validator")
|
||||
parser.add_argument("--audit", action="store_true", help="Run audit validator")
|
||||
parser.add_argument("--fail-fast", action="store_true", help="Stop on first failure")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
||||
parser.add_argument("--errors-only", action="store_true", help="Only show errors")
|
||||
parser.add_argument("--json", action="store_true", help="JSON output")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# If no specific validators selected, run all
|
||||
run_all = not (args.architecture or args.security or args.performance or args.audit)
|
||||
|
||||
print("\n🔍 UNIFIED CODE VALIDATION")
|
||||
print("=" * 80)
|
||||
|
||||
validators = []
|
||||
if run_all or args.architecture:
|
||||
validators.append(("Architecture", run_architecture_validator))
|
||||
if run_all or args.security:
|
||||
validators.append(("Security", run_security_validator))
|
||||
if run_all or args.performance:
|
||||
validators.append(("Performance", run_performance_validator))
|
||||
if run_all or args.audit:
|
||||
validators.append(("Audit", run_audit_validator))
|
||||
|
||||
results = []
|
||||
exit_code = 0
|
||||
|
||||
for name, validator_func in validators:
|
||||
print(f"\n{'=' * 40}")
|
||||
print(f"🔍 Running {name} Validator...")
|
||||
print("=" * 40)
|
||||
|
||||
code, result = validator_func(verbose=args.verbose)
|
||||
|
||||
results.append(result)
|
||||
|
||||
if code != 0:
|
||||
exit_code = 1
|
||||
if args.fail_fast:
|
||||
print(f"\n❌ {name} validator failed. Stopping (--fail-fast)")
|
||||
break
|
||||
|
||||
print_summary(results, json_output=args.json)
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
5203
scripts/validate/validate_architecture.py
Executable file
5203
scripts/validate/validate_architecture.py
Executable file
File diff suppressed because it is too large
Load Diff
543
scripts/validate/validate_audit.py
Normal file
543
scripts/validate/validate_audit.py
Normal file
@@ -0,0 +1,543 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
IT Internal Audit Validator
|
||||
|
||||
Validates code against internal audit rules defined in .audit-rules/
|
||||
Focuses on governance, compliance, and control requirements.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from base_validator import BaseValidator
|
||||
|
||||
|
||||
class AuditValidator(BaseValidator):
|
||||
"""Validates IT internal audit rules."""
|
||||
|
||||
def __init__(self, project_root: Path | None = None):
|
||||
super().__init__(".audit-rules", project_root)
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""Run all audit validations."""
|
||||
self._validate_audit_trail()
|
||||
self._validate_access_control()
|
||||
self._validate_data_governance()
|
||||
self._validate_compliance()
|
||||
self._validate_change_management()
|
||||
self._validate_third_party()
|
||||
self._validate_documentation()
|
||||
return len(self.errors) == 0
|
||||
|
||||
# ==================
|
||||
# AUDIT TRAIL
|
||||
# ==================
|
||||
|
||||
def _validate_audit_trail(self) -> None:
|
||||
"""Validate audit trail requirements."""
|
||||
# Check authentication logging
|
||||
auth_files = [
|
||||
self.project_root / "app" / "api" / "v1" / "auth.py",
|
||||
self.project_root / "app" / "routes" / "admin.py",
|
||||
]
|
||||
|
||||
for file in auth_files:
|
||||
if file.exists():
|
||||
content = file.read_text()
|
||||
if "logger" not in content:
|
||||
self.add_error(
|
||||
"AUDIT-LOG-001",
|
||||
"Authentication operations must include logging",
|
||||
str(file),
|
||||
)
|
||||
|
||||
# Check service layer logging
|
||||
services_path = self.project_root / "app" / "services"
|
||||
if services_path.exists():
|
||||
for file in services_path.glob("*.py"):
|
||||
if file.name == "__init__.py":
|
||||
continue
|
||||
content = file.read_text()
|
||||
# Services that modify data should have logging
|
||||
if re.search(r"def (create|update|delete)", content):
|
||||
if "logger" not in content:
|
||||
self.add_warning(
|
||||
"AUDIT-LOG-002",
|
||||
"Service with data modifications should include logging",
|
||||
str(file),
|
||||
)
|
||||
|
||||
# Check for audit timestamp fields in models
|
||||
# Models can have timestamps directly or inherit from BaseModel/TimestampMixin
|
||||
models_path = self.project_root / "models" / "database"
|
||||
if models_path.exists():
|
||||
for file in models_path.glob("*.py"):
|
||||
# audit_log.py uses timestamp field instead of created_at/updated_at
|
||||
if file.name in ("__init__.py", "base.py", "audit_log.py"):
|
||||
continue
|
||||
content = file.read_text()
|
||||
if "class " in content: # Has model definition
|
||||
# Check if timestamps are present directly or via inheritance
|
||||
has_timestamps = (
|
||||
"created_at" in content
|
||||
or "updated_at" in content
|
||||
or "BaseModel" in content # Inherits from BaseModel
|
||||
or "TimestampMixin" in content # Uses TimestampMixin
|
||||
)
|
||||
if not has_timestamps:
|
||||
self.add_warning(
|
||||
"AUDIT-FIELD-001",
|
||||
"Database model should include audit timestamp fields",
|
||||
str(file),
|
||||
)
|
||||
|
||||
# Check for forbidden log modification patterns
|
||||
self._check_forbidden_patterns(
|
||||
paths=["app/**/*.py"],
|
||||
patterns=[
|
||||
r"os\.remove.*\.log",
|
||||
r"truncate.*log",
|
||||
r"open.*\.log.*['\"]w['\"]",
|
||||
],
|
||||
rule_id="AUDIT-INT-001",
|
||||
message="Application must not modify or delete log files",
|
||||
)
|
||||
|
||||
# ==================
|
||||
# ACCESS CONTROL
|
||||
# ==================
|
||||
|
||||
def _validate_access_control(self) -> None:
|
||||
"""Validate access control requirements."""
|
||||
# Check API endpoints have authentication
|
||||
api_path = self.project_root / "app" / "api" / "v1"
|
||||
if api_path.exists():
|
||||
for file in api_path.glob("*.py"):
|
||||
# Skip endpoints that are intentionally unauthenticated
|
||||
if file.name in ("__init__.py", "health.py", "metrics.py"):
|
||||
continue
|
||||
content = file.read_text()
|
||||
# Check for authentication dependency
|
||||
if "@router" in content:
|
||||
if not re.search(
|
||||
r"CurrentUser|Depends.*get_current_user|AdminUser", content
|
||||
):
|
||||
# auth.py handles its own auth
|
||||
if file.name != "auth.py":
|
||||
self.add_warning(
|
||||
"ACCESS-AUTH-001",
|
||||
"API endpoint should require authentication",
|
||||
str(file),
|
||||
)
|
||||
|
||||
# Check admin routes verify admin role
|
||||
admin_route = self.project_root / "app" / "routes" / "admin.py"
|
||||
if admin_route.exists():
|
||||
content = admin_route.read_text()
|
||||
if "is_admin" not in content and "admin_required" not in content:
|
||||
self.add_warning(
|
||||
"ACCESS-AUTH-002",
|
||||
"Admin routes should verify admin privileges",
|
||||
str(admin_route),
|
||||
)
|
||||
|
||||
# Check password hashing
|
||||
security_file = self.project_root / "app" / "core" / "security.py"
|
||||
if security_file.exists():
|
||||
content = security_file.read_text()
|
||||
if not re.search(r"bcrypt|argon2|scrypt|pbkdf2", content, re.IGNORECASE):
|
||||
self.add_error(
|
||||
"ACCESS-ACCT-003",
|
||||
"Passwords must use approved hashing algorithms",
|
||||
str(security_file),
|
||||
)
|
||||
|
||||
# Check password not in API responses
|
||||
# Note: Only flag if a class with "Response" in name directly defines password_hash
|
||||
# Internal schemas (like UserInDB) are not flagged as they're not API responses
|
||||
schema_path = self.project_root / "models" / "schema"
|
||||
if schema_path.exists():
|
||||
for file in schema_path.glob("*.py"):
|
||||
content = file.read_text()
|
||||
# Check for Response classes that directly define password_hash
|
||||
# Split by class definitions and check each
|
||||
class_blocks = re.split(r"(?=^class\s)", content, flags=re.MULTILINE)
|
||||
for block in class_blocks:
|
||||
# Check if this class is a Response class
|
||||
class_match = re.match(r"class\s+(\w*Response\w*)", block)
|
||||
if class_match:
|
||||
# Check if password_hash is defined in this class (not inherited)
|
||||
if "password_hash:" in block or "password_hash =" in block:
|
||||
if "exclude" not in block.lower():
|
||||
self.add_error(
|
||||
"ACCESS-PRIV-002",
|
||||
f"Password hash must be excluded from {class_match.group(1)}",
|
||||
str(file),
|
||||
)
|
||||
|
||||
# ==================
|
||||
# DATA GOVERNANCE
|
||||
# ==================
|
||||
|
||||
def _validate_data_governance(self) -> None:
|
||||
"""Validate data governance requirements."""
|
||||
# Check PII not logged
|
||||
# Note: Patterns detect actual password values, not descriptive usage like "Password reset"
|
||||
# We look for patterns that suggest password values are being logged:
|
||||
# - password= or password: followed by a variable
|
||||
# - %s or {} after password indicating interpolation of password value
|
||||
self._check_forbidden_patterns(
|
||||
paths=["app/**/*.py", "middleware/**/*.py"],
|
||||
patterns=[
|
||||
r"logger\.\w+\(.*password\s*[=:]\s*['\"]?%", # password=%s
|
||||
r"logger\.\w+\(.*password\s*[=:]\s*\{", # password={var}
|
||||
r"logging\.\w+\(.*password\s*[=:]\s*['\"]?%", # password=%s
|
||||
r"print\(.*password\s*=", # print(password=xxx)
|
||||
r"logger.*credit.*card.*\d", # credit card with numbers
|
||||
r"logger.*\bssn\b.*\d", # SSN with numbers
|
||||
],
|
||||
rule_id="DATA-PII-003",
|
||||
message="PII/sensitive data must not be logged",
|
||||
)
|
||||
|
||||
# Check input validation (Pydantic)
|
||||
schema_path = self.project_root / "models" / "schema"
|
||||
if schema_path.exists():
|
||||
has_validation = False
|
||||
for file in schema_path.glob("*.py"):
|
||||
content = file.read_text()
|
||||
if re.search(r"Field|validator|field_validator", content):
|
||||
has_validation = True
|
||||
break
|
||||
if not has_validation:
|
||||
self.add_error(
|
||||
"DATA-INT-001",
|
||||
"Pydantic validation required for data integrity",
|
||||
str(schema_path),
|
||||
)
|
||||
|
||||
# Check user data access endpoint exists (GDPR)
|
||||
users_api = self.project_root / "app" / "api" / "v1" / "users.py"
|
||||
if users_api.exists():
|
||||
content = users_api.read_text()
|
||||
if "/me" not in content and "current" not in content.lower():
|
||||
self.add_warning(
|
||||
"DATA-PRIV-001",
|
||||
"Endpoint for users to access their own data required (GDPR Art. 15)",
|
||||
str(users_api),
|
||||
)
|
||||
|
||||
# ==================
|
||||
# COMPLIANCE
|
||||
# ==================
|
||||
|
||||
def _validate_compliance(self) -> None:
|
||||
"""Validate compliance requirements."""
|
||||
# Check HTTPS configuration
|
||||
config_files = [
|
||||
self.project_root / "app" / "core" / "config.py",
|
||||
self.project_root / "main.py",
|
||||
]
|
||||
https_configured = False
|
||||
for file in config_files:
|
||||
if file.exists():
|
||||
content = file.read_text()
|
||||
if re.search(r"https|SSL|TLS|SECURE", content, re.IGNORECASE):
|
||||
https_configured = True
|
||||
break
|
||||
if not https_configured:
|
||||
self.add_warning(
|
||||
"COMP-REG-002",
|
||||
"HTTPS configuration should be documented",
|
||||
"app/core/config.py",
|
||||
)
|
||||
|
||||
# Check version control
|
||||
if not (self.project_root / ".git").exists():
|
||||
self.add_error(
|
||||
"COMP-EVID-003",
|
||||
"Version control (Git) is required",
|
||||
str(self.project_root),
|
||||
)
|
||||
|
||||
# Check CI/CD exists (GitHub or GitLab)
|
||||
github_ci = self.project_root / ".github" / "workflows" / "ci.yml"
|
||||
gitlab_ci = self.project_root / ".gitlab-ci.yml"
|
||||
if not github_ci.exists() and not gitlab_ci.exists():
|
||||
self.add_warning(
|
||||
"COMP-EVID-001",
|
||||
"CI workflow for automated testing recommended",
|
||||
".gitlab-ci.yml or .github/workflows/ci.yml",
|
||||
)
|
||||
|
||||
# Check code review process (GitHub or GitLab)
|
||||
github_pr_template = self.project_root / ".github" / "PULL_REQUEST_TEMPLATE.md"
|
||||
gitlab_mr_templates = self.project_root / ".gitlab" / "merge_request_templates"
|
||||
has_mr_template = github_pr_template.exists() or (
|
||||
gitlab_mr_templates.exists() and any(gitlab_mr_templates.iterdir())
|
||||
)
|
||||
if not has_mr_template:
|
||||
self.add_warning(
|
||||
"COMP-POL-001",
|
||||
"Merge request template recommended for code review",
|
||||
".gitlab/merge_request_templates/ or .github/PULL_REQUEST_TEMPLATE.md",
|
||||
)
|
||||
|
||||
# ==================
|
||||
# CHANGE MANAGEMENT
|
||||
# ==================
|
||||
|
||||
def _validate_change_management(self) -> None:
|
||||
"""Validate change management requirements."""
|
||||
# Check .gitignore exists and excludes secrets
|
||||
gitignore = self.project_root / ".gitignore"
|
||||
if gitignore.exists():
|
||||
content = gitignore.read_text()
|
||||
required_exclusions = [".env", "*.pem", "*.key"]
|
||||
for pattern in required_exclusions:
|
||||
# Simplified check - just look for the pattern
|
||||
if pattern.replace("*", "") not in content:
|
||||
self.add_warning(
|
||||
"CHANGE-VC-003",
|
||||
f"Secret pattern '{pattern}' should be in .gitignore",
|
||||
str(gitignore),
|
||||
)
|
||||
else:
|
||||
self.add_error(
|
||||
"CHANGE-VC-002",
|
||||
".gitignore file required",
|
||||
str(self.project_root),
|
||||
)
|
||||
|
||||
# Check database migrations
|
||||
alembic_dir = self.project_root / "alembic"
|
||||
if not alembic_dir.exists():
|
||||
self.add_warning(
|
||||
"CHANGE-ROLL-001",
|
||||
"Database migration tool (Alembic) recommended",
|
||||
"alembic/",
|
||||
)
|
||||
else:
|
||||
# Check for downgrade functions
|
||||
versions_dir = alembic_dir / "versions"
|
||||
if versions_dir.exists():
|
||||
for file in versions_dir.glob("*.py"):
|
||||
content = file.read_text()
|
||||
if "def upgrade" in content and "def downgrade" not in content:
|
||||
self.add_warning(
|
||||
"CHANGE-ROLL-002",
|
||||
"Migration should include downgrade function",
|
||||
str(file),
|
||||
)
|
||||
|
||||
# Check environment separation
|
||||
config_file = self.project_root / "app" / "core" / "config.py"
|
||||
if config_file.exists():
|
||||
content = config_file.read_text()
|
||||
if not re.search(r"ENVIRONMENT|development|staging|production", content):
|
||||
self.add_warning(
|
||||
"CHANGE-DEP-001",
|
||||
"Environment separation configuration recommended",
|
||||
str(config_file),
|
||||
)
|
||||
|
||||
# ==================
|
||||
# THIRD PARTY
|
||||
# ==================
|
||||
|
||||
def _validate_third_party(self) -> None:
|
||||
"""Validate third-party dependency management."""
|
||||
# Check dependency lock file exists
|
||||
lock_files = ["uv.lock", "poetry.lock", "Pipfile.lock", "requirements.lock"]
|
||||
has_lock = any((self.project_root / f).exists() for f in lock_files)
|
||||
if not has_lock:
|
||||
self.add_warning(
|
||||
"THIRD-DEP-001",
|
||||
"Dependency lock file recommended for reproducible builds",
|
||||
"uv.lock or similar",
|
||||
)
|
||||
|
||||
# Check dependency manifest exists
|
||||
manifest_files = ["pyproject.toml", "requirements.txt", "Pipfile"]
|
||||
has_manifest = any((self.project_root / f).exists() for f in manifest_files)
|
||||
if not has_manifest:
|
||||
self.add_error(
|
||||
"THIRD-DEP-002",
|
||||
"Dependency manifest file required",
|
||||
"pyproject.toml",
|
||||
)
|
||||
|
||||
# Check for dependency scanning (GitHub Dependabot or GitLab)
|
||||
dependabot = self.project_root / ".github" / "dependabot.yml"
|
||||
gitlab_ci = self.project_root / ".gitlab-ci.yml"
|
||||
has_dep_scanning = dependabot.exists()
|
||||
if not has_dep_scanning and gitlab_ci.exists():
|
||||
# Check if GitLab CI includes dependency scanning
|
||||
ci_content = gitlab_ci.read_text()
|
||||
has_dep_scanning = "dependency_scanning" in ci_content.lower()
|
||||
if not has_dep_scanning:
|
||||
self.add_info(
|
||||
"THIRD-VULN-002",
|
||||
"Consider enabling dependency scanning for security updates",
|
||||
".gitlab-ci.yml (include dependency_scanning) or .github/dependabot.yml",
|
||||
)
|
||||
|
||||
# Check for insecure package sources
|
||||
pyproject = self.project_root / "pyproject.toml"
|
||||
if pyproject.exists():
|
||||
content = pyproject.read_text()
|
||||
if "http://" in content and "https://" not in content:
|
||||
self.add_error(
|
||||
"THIRD-VEND-001",
|
||||
"Only HTTPS sources allowed for packages",
|
||||
str(pyproject),
|
||||
)
|
||||
|
||||
# ==================
|
||||
# DOCUMENTATION
|
||||
# ==================
|
||||
|
||||
def _validate_documentation(self) -> None:
|
||||
"""Validate documentation requirements."""
|
||||
# Check README exists
|
||||
readme_files = ["README.md", "README.rst", "README.txt"]
|
||||
has_readme = any((self.project_root / f).exists() for f in readme_files)
|
||||
if not has_readme:
|
||||
self.add_error(
|
||||
"DOC-PROJ-001",
|
||||
"Project README required",
|
||||
"README.md",
|
||||
)
|
||||
else:
|
||||
# Check README has setup instructions
|
||||
for readme in readme_files:
|
||||
readme_path = self.project_root / readme
|
||||
if readme_path.exists():
|
||||
content = readme_path.read_text().lower()
|
||||
has_setup = any(
|
||||
term in content
|
||||
for term in [
|
||||
"install",
|
||||
"setup",
|
||||
"quick start",
|
||||
"getting started",
|
||||
]
|
||||
)
|
||||
if not has_setup:
|
||||
self.add_warning(
|
||||
"DOC-PROJ-002",
|
||||
"README should include setup instructions",
|
||||
str(readme_path),
|
||||
)
|
||||
break
|
||||
|
||||
# Check security policy exists
|
||||
security_files = ["SECURITY.md", ".github/SECURITY.md"]
|
||||
has_security = any((self.project_root / f).exists() for f in security_files)
|
||||
if not has_security:
|
||||
self.add_warning(
|
||||
"DOC-SEC-001",
|
||||
"Security policy (SECURITY.md) recommended",
|
||||
"SECURITY.md",
|
||||
)
|
||||
|
||||
# Check API documentation
|
||||
docs_api = self.project_root / "docs" / "api"
|
||||
if not docs_api.exists() or not list(docs_api.glob("*.md")):
|
||||
self.add_warning(
|
||||
"DOC-API-003",
|
||||
"API documentation recommended",
|
||||
"docs/api/",
|
||||
)
|
||||
|
||||
# Check authentication documentation
|
||||
auth_doc = self.project_root / "docs" / "api" / "authentication.md"
|
||||
if not auth_doc.exists():
|
||||
self.add_warning(
|
||||
"DOC-SEC-002",
|
||||
"Authentication documentation recommended",
|
||||
"docs/api/authentication.md",
|
||||
)
|
||||
|
||||
# Check architecture documentation
|
||||
arch_docs = self.project_root / "docs" / "architecture"
|
||||
if not arch_docs.exists() or not list(arch_docs.glob("*.md")):
|
||||
self.add_warning(
|
||||
"DOC-ARCH-001",
|
||||
"Architecture documentation recommended",
|
||||
"docs/architecture/",
|
||||
)
|
||||
|
||||
# Check deployment documentation
|
||||
deploy_doc = self.project_root / "docs" / "deployment" / "index.md"
|
||||
if not deploy_doc.exists():
|
||||
self.add_warning(
|
||||
"DOC-OPS-001",
|
||||
"Deployment documentation recommended",
|
||||
"docs/deployment/index.md",
|
||||
)
|
||||
|
||||
# ==================
|
||||
# HELPERS
|
||||
# ==================
|
||||
|
||||
def _check_forbidden_patterns(
|
||||
self,
|
||||
paths: list[str],
|
||||
patterns: list[str],
|
||||
rule_id: str,
|
||||
message: str,
|
||||
) -> None:
|
||||
"""Check for forbidden patterns in files."""
|
||||
for path_pattern in paths:
|
||||
if "**" in path_pattern:
|
||||
base, pattern = path_pattern.split("**", 1)
|
||||
base_path = self.project_root / base.rstrip("/")
|
||||
if base_path.exists():
|
||||
files = base_path.rglob(pattern.lstrip("/"))
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
files = [self.project_root / path_pattern]
|
||||
|
||||
for file in files:
|
||||
if not file.exists() or not file.is_file():
|
||||
continue
|
||||
try:
|
||||
content = file.read_text()
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, content, re.IGNORECASE):
|
||||
self.add_error(rule_id, message, str(file))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Run audit validation."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Validate IT internal audit rules")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
||||
parser.add_argument(
|
||||
"--format",
|
||||
choices=["text", "json"],
|
||||
default="text",
|
||||
help="Output format",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
validator = AuditValidator()
|
||||
validator.load_rules()
|
||||
success = validator.validate()
|
||||
validator.print_results()
|
||||
|
||||
return 0 if success else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
648
scripts/validate/validate_performance.py
Executable file
648
scripts/validate/validate_performance.py
Executable file
@@ -0,0 +1,648 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Performance Validator
|
||||
=====================
|
||||
Validates code against performance rules defined in .performance-rules/
|
||||
|
||||
This script checks for common performance issues:
|
||||
- N+1 query patterns
|
||||
- Missing pagination
|
||||
- Inefficient database operations
|
||||
- Memory management issues
|
||||
- Frontend performance anti-patterns
|
||||
- Missing timeouts and connection pooling
|
||||
|
||||
Usage:
|
||||
python scripts/validate/validate_performance.py # Check all files
|
||||
python scripts/validate/validate_performance.py -d app/services/ # Check specific directory
|
||||
python scripts/validate/validate_performance.py -f app/api/v1/products.py # Check single file
|
||||
python scripts/validate/validate_performance.py -v # Verbose output
|
||||
python scripts/validate/validate_performance.py --json # JSON output
|
||||
python scripts/validate/validate_performance.py --errors-only # Only show errors
|
||||
|
||||
Options:
|
||||
-f, --file PATH Validate a single file
|
||||
-d, --folder PATH Validate all files in a directory (recursive)
|
||||
-v, --verbose Show detailed output including context
|
||||
--errors-only Only show errors, suppress warnings and info
|
||||
--json Output results as JSON
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from base_validator import BaseValidator, Severity, ValidationResult
|
||||
|
||||
|
||||
class PerformanceValidator(BaseValidator):
|
||||
"""Performance-focused code validator"""
|
||||
|
||||
VALIDATOR_NAME = "Performance Validator"
|
||||
VALIDATOR_EMOJI = "⚡"
|
||||
RULES_DIR_NAME = ".performance-rules"
|
||||
CONFIG_FILE_NAME = ".performance-rules.yaml"
|
||||
|
||||
def validate_all(self, target_path: Path = None) -> ValidationResult:
|
||||
"""Validate all files for performance issues"""
|
||||
print(f"\n{self.VALIDATOR_EMOJI} Starting performance validation...\n")
|
||||
|
||||
target = target_path or self.project_root
|
||||
|
||||
# Validate Python files
|
||||
self._validate_python_files(target)
|
||||
|
||||
# Validate JavaScript files
|
||||
self._validate_javascript_files(target)
|
||||
|
||||
# Validate HTML templates
|
||||
self._validate_template_files(target)
|
||||
|
||||
return self.result
|
||||
|
||||
def _validate_python_files(self, target: Path):
|
||||
"""Validate all Python files for performance issues"""
|
||||
print("🐍 Validating Python files...")
|
||||
|
||||
for py_file in target.rglob("*.py"):
|
||||
if self._should_ignore_file(py_file):
|
||||
continue
|
||||
|
||||
self.result.files_checked += 1
|
||||
content = py_file.read_text()
|
||||
lines = content.split("\n")
|
||||
self._validate_python_performance(py_file, content, lines)
|
||||
|
||||
def _validate_javascript_files(self, target: Path):
|
||||
"""Validate all JavaScript files for performance issues"""
|
||||
print("🟨 Validating JavaScript files...")
|
||||
|
||||
for js_file in target.rglob("*.js"):
|
||||
if self._should_ignore_file(js_file):
|
||||
continue
|
||||
|
||||
self.result.files_checked += 1
|
||||
content = js_file.read_text()
|
||||
lines = content.split("\n")
|
||||
self._validate_javascript_performance(js_file, content, lines)
|
||||
|
||||
def _validate_template_files(self, target: Path):
|
||||
"""Validate all HTML template files for performance issues"""
|
||||
print("📄 Validating template files...")
|
||||
|
||||
for html_file in target.rglob("*.html"):
|
||||
if self._should_ignore_file(html_file):
|
||||
continue
|
||||
|
||||
self.result.files_checked += 1
|
||||
content = html_file.read_text()
|
||||
lines = content.split("\n")
|
||||
self._validate_template_performance(html_file, content, lines)
|
||||
|
||||
def _validate_file_content(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""Validate file content based on file type"""
|
||||
if file_path.suffix == ".py":
|
||||
self._validate_python_performance(file_path, content, lines)
|
||||
elif file_path.suffix == ".js":
|
||||
self._validate_javascript_performance(file_path, content, lines)
|
||||
elif file_path.suffix == ".html":
|
||||
self._validate_template_performance(file_path, content, lines)
|
||||
|
||||
def _validate_python_performance(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""Validate Python file for performance issues"""
|
||||
file_path_str = str(file_path)
|
||||
|
||||
# PERF-001: N+1 query detection
|
||||
self._check_n_plus_1_queries(file_path, content, lines)
|
||||
|
||||
# PERF-003: Query result limiting
|
||||
self._check_query_limiting(file_path, content, lines)
|
||||
|
||||
# PERF-006: Bulk operations
|
||||
self._check_bulk_operations(file_path, content, lines)
|
||||
|
||||
# PERF-008: Use EXISTS for existence checks
|
||||
self._check_existence_checks(file_path, content, lines)
|
||||
|
||||
# PERF-009: Batch updates
|
||||
self._check_batch_updates(file_path, content, lines)
|
||||
|
||||
# PERF-026: Pagination for API endpoints
|
||||
if "/api/" in file_path_str:
|
||||
self._check_api_pagination(file_path, content, lines)
|
||||
|
||||
# PERF-037: Parallel async operations
|
||||
self._check_parallel_async(file_path, content, lines)
|
||||
|
||||
# PERF-040: Timeout configuration
|
||||
self._check_timeout_config(file_path, content, lines)
|
||||
|
||||
# PERF-046: Generators for large datasets
|
||||
self._check_generators(file_path, content, lines)
|
||||
|
||||
# PERF-047: Stream file uploads
|
||||
if "upload" in file_path_str.lower() or "file" in file_path_str.lower():
|
||||
self._check_file_streaming(file_path, content, lines)
|
||||
|
||||
# PERF-048: Chunked processing
|
||||
if "import" in file_path_str.lower() or "csv" in file_path_str.lower():
|
||||
self._check_chunked_processing(file_path, content, lines)
|
||||
|
||||
# PERF-049: Context managers for files
|
||||
self._check_context_managers(file_path, content, lines)
|
||||
|
||||
# PERF-051: String concatenation
|
||||
self._check_string_concatenation(file_path, content, lines)
|
||||
|
||||
def _validate_javascript_performance(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""Validate JavaScript file for performance issues"""
|
||||
# PERF-056: Debounce search inputs
|
||||
self._check_debounce(file_path, content, lines)
|
||||
|
||||
# PERF-062: Polling intervals
|
||||
self._check_polling_intervals(file_path, content, lines)
|
||||
|
||||
# PERF-064: Layout thrashing
|
||||
self._check_layout_thrashing(file_path, content, lines)
|
||||
|
||||
def _validate_template_performance(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""Validate HTML template file for performance issues"""
|
||||
# PERF-058: Image lazy loading
|
||||
self._check_image_lazy_loading(file_path, content, lines)
|
||||
|
||||
# PERF-067: Script defer/async
|
||||
self._check_script_loading(file_path, content, lines)
|
||||
|
||||
# =========================================================================
|
||||
# Database Performance Checks
|
||||
# =========================================================================
|
||||
|
||||
def _check_n_plus_1_queries(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-001: Check for N+1 query patterns"""
|
||||
# Look for patterns like: for item in items: item.relationship.attribute
|
||||
in_for_loop = False
|
||||
for_line_num = 0
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
stripped = line.strip()
|
||||
|
||||
# Track for loops over query results
|
||||
if re.search(r'for\s+\w+\s+in\s+.*\.(all|query)', line):
|
||||
in_for_loop = True
|
||||
for_line_num = i
|
||||
elif in_for_loop and stripped and not stripped.startswith("#"):
|
||||
# Check for relationship access in loop
|
||||
if re.search(r'\.\w+\.\w+', line) and "(" not in line:
|
||||
# Could be accessing a relationship
|
||||
if any(rel in line for rel in [".customer.", ".store.", ".order.", ".product.", ".user."]):
|
||||
self._add_violation(
|
||||
rule_id="PERF-001",
|
||||
rule_name="N+1 query detection",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Possible N+1 query - relationship accessed in loop",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use joinedload() or selectinload() for eager loading",
|
||||
)
|
||||
in_for_loop = False
|
||||
|
||||
# Reset on dedent
|
||||
if in_for_loop and line and not line.startswith(" " * 4) and i > for_line_num + 1:
|
||||
in_for_loop = False
|
||||
|
||||
def _check_query_limiting(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-003: Check for unbounded query results"""
|
||||
for i, line in enumerate(lines, 1):
|
||||
if re.search(r'\.all\(\)', line):
|
||||
# Check if there's a limit or filter before
|
||||
context_start = max(0, i - 5)
|
||||
context_lines = lines[context_start:i]
|
||||
context_text = "\n".join(context_lines)
|
||||
|
||||
if "limit" not in context_text.lower() and "filter" not in context_text.lower():
|
||||
if "# noqa" in line or "# bounded" in line:
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="PERF-003",
|
||||
rule_name="Query result limiting",
|
||||
severity=Severity.INFO,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Query may return unbounded results",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Add .limit() or pagination for large tables",
|
||||
)
|
||||
|
||||
def _check_bulk_operations(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-006: Check for individual operations in loops"""
|
||||
in_for_loop = False
|
||||
for_indent = 0
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
stripped = line.strip()
|
||||
|
||||
# Track for loops
|
||||
if re.search(r'for\s+\w+\s+in\s+', line):
|
||||
in_for_loop = True
|
||||
for_indent = len(line) - len(line.lstrip())
|
||||
elif in_for_loop:
|
||||
current_indent = len(line) - len(line.lstrip()) if line.strip() else for_indent + 4
|
||||
|
||||
if current_indent <= for_indent and stripped:
|
||||
in_for_loop = False
|
||||
elif "db.add(" in line or ".save(" in line:
|
||||
self._add_violation(
|
||||
rule_id="PERF-006",
|
||||
rule_name="Bulk operations for multiple records",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Individual db.add() in loop - consider bulk operations",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use db.add_all() or bulk_insert_mappings()",
|
||||
)
|
||||
|
||||
def _check_existence_checks(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-008: Check for inefficient existence checks"""
|
||||
patterns = [
|
||||
(r'\.count\(\)\s*>\s*0', "count() > 0"),
|
||||
(r'\.count\(\)\s*>=\s*1', "count() >= 1"),
|
||||
(r'\.count\(\)\s*!=\s*0', "count() != 0"),
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern, issue in patterns:
|
||||
if re.search(pattern, line):
|
||||
self._add_violation(
|
||||
rule_id="PERF-008",
|
||||
rule_name="Use EXISTS for existence checks",
|
||||
severity=Severity.INFO,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message=f"{issue} scans all rows - use EXISTS instead",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use db.scalar(exists().where(...)) or .first() is not None",
|
||||
)
|
||||
|
||||
def _check_batch_updates(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-009: Check for updates in loops"""
|
||||
in_for_loop = False
|
||||
for_indent = 0
|
||||
loop_var = ""
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
stripped = line.strip()
|
||||
|
||||
# Track for loops
|
||||
match = re.search(r'for\s+(\w+)\s+in\s+', line)
|
||||
if match:
|
||||
in_for_loop = True
|
||||
for_indent = len(line) - len(line.lstrip())
|
||||
loop_var = match.group(1)
|
||||
elif in_for_loop:
|
||||
current_indent = len(line) - len(line.lstrip()) if line.strip() else for_indent + 4
|
||||
|
||||
if current_indent <= for_indent and stripped:
|
||||
in_for_loop = False
|
||||
elif loop_var and f"{loop_var}." in line and "=" in line and "==" not in line:
|
||||
# Attribute assignment in loop
|
||||
if "# noqa" not in line:
|
||||
self._add_violation(
|
||||
rule_id="PERF-009",
|
||||
rule_name="Batch updates instead of loops",
|
||||
severity=Severity.INFO,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Individual updates in loop - consider batch update",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use .update({...}) with filters for batch updates",
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# API Performance Checks
|
||||
# =========================================================================
|
||||
|
||||
def _check_api_pagination(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-026: Check for missing pagination in list endpoints"""
|
||||
# Look for GET endpoints that return lists
|
||||
in_endpoint = False
|
||||
endpoint_line = 0
|
||||
has_pagination = False
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
# Track router decorators
|
||||
if re.search(r'@router\.(get|post)', line):
|
||||
in_endpoint = True
|
||||
endpoint_line = i
|
||||
has_pagination = False
|
||||
elif in_endpoint:
|
||||
# Check for pagination parameters
|
||||
if re.search(r'(skip|offset|page|limit)', line):
|
||||
has_pagination = True
|
||||
# Check for function end
|
||||
if re.search(r'^def\s+\w+', line.lstrip()) and i > endpoint_line + 1:
|
||||
in_endpoint = False
|
||||
# Check for .all() without pagination
|
||||
if ".all()" in line and not has_pagination:
|
||||
if "# noqa" not in line:
|
||||
self._add_violation(
|
||||
rule_id="PERF-026",
|
||||
rule_name="Pagination required for list endpoints",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="List endpoint may lack pagination",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Add skip/limit parameters for pagination",
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Async Performance Checks
|
||||
# =========================================================================
|
||||
|
||||
def _check_parallel_async(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-037: Check for sequential awaits that could be parallel"""
|
||||
await_count = 0
|
||||
await_lines = []
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
stripped = line.strip()
|
||||
|
||||
if stripped.startswith("await "):
|
||||
await_count += 1
|
||||
await_lines.append(i)
|
||||
|
||||
# Check for 3+ sequential awaits
|
||||
if await_count >= 3:
|
||||
# Verify they're sequential (within 5 lines of each other)
|
||||
if all(await_lines[j+1] - await_lines[j] <= 2 for j in range(len(await_lines)-1)):
|
||||
self._add_violation(
|
||||
rule_id="PERF-037",
|
||||
rule_name="Parallel independent operations",
|
||||
severity=Severity.INFO,
|
||||
file_path=file_path,
|
||||
line_number=await_lines[0],
|
||||
message=f"{await_count} sequential awaits - consider asyncio.gather()",
|
||||
context="Multiple await statements",
|
||||
suggestion="Use asyncio.gather() for independent async operations",
|
||||
)
|
||||
await_count = 0
|
||||
await_lines = []
|
||||
elif stripped and not stripped.startswith("#"):
|
||||
# Reset on non-await, non-empty line
|
||||
if await_count > 0:
|
||||
await_count = 0
|
||||
await_lines = []
|
||||
|
||||
def _check_timeout_config(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-040: Check for missing timeouts on HTTP clients"""
|
||||
if "requests" not in content and "httpx" not in content and "aiohttp" not in content:
|
||||
return
|
||||
|
||||
patterns = [
|
||||
r'requests\.(get|post|put|delete|patch)\s*\([^)]+\)',
|
||||
r'httpx\.(get|post|put|delete|patch)\s*\([^)]+\)',
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, line) and "timeout" not in line:
|
||||
self._add_violation(
|
||||
rule_id="PERF-040",
|
||||
rule_name="Timeout configuration",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="HTTP request without timeout",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Add timeout parameter to prevent hanging requests",
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Memory Performance Checks
|
||||
# =========================================================================
|
||||
|
||||
def _check_generators(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-046: Check for loading large datasets into memory"""
|
||||
for i, line in enumerate(lines, 1):
|
||||
# Check for .all() followed by iteration
|
||||
if ".all()" in line:
|
||||
# Look ahead for iteration
|
||||
if i < len(lines):
|
||||
next_lines = "\n".join(lines[i:min(i+3, len(lines))])
|
||||
if "for " in next_lines and "in" in next_lines:
|
||||
if "# noqa" not in line:
|
||||
self._add_violation(
|
||||
rule_id="PERF-046",
|
||||
rule_name="Generators for large datasets",
|
||||
severity=Severity.INFO,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message=".all() loads everything into memory before iteration",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use .yield_per(100) for large result sets",
|
||||
)
|
||||
|
||||
def _check_file_streaming(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-047: Check for loading entire files into memory"""
|
||||
for i, line in enumerate(lines, 1):
|
||||
if re.search(r'await\s+\w+\.read\(\)', line) and "chunk" not in line:
|
||||
self._add_violation(
|
||||
rule_id="PERF-047",
|
||||
rule_name="Stream large file uploads",
|
||||
severity=Severity.INFO,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Full file read into memory",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Stream large files: while chunk := await file.read(8192)",
|
||||
)
|
||||
|
||||
def _check_chunked_processing(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-048: Check for chunked processing in imports"""
|
||||
if "chunk" not in content.lower() and "batch" not in content.lower():
|
||||
# Check if file processes multiple records
|
||||
if "for " in content and ("csv" in content.lower() or "import" in content.lower()):
|
||||
self._add_violation(
|
||||
rule_id="PERF-048",
|
||||
rule_name="Chunked processing for imports",
|
||||
severity=Severity.INFO,
|
||||
file_path=file_path,
|
||||
line_number=1,
|
||||
message="Import processing may benefit from chunking",
|
||||
context="File processes multiple records",
|
||||
suggestion="Process in chunks with periodic commits",
|
||||
)
|
||||
|
||||
def _check_context_managers(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-049: Check for file handles without context managers"""
|
||||
for i, line in enumerate(lines, 1):
|
||||
# Check for file open without 'with'
|
||||
if re.search(r'^\s*\w+\s*=\s*open\s*\(', line):
|
||||
if "# noqa" not in line:
|
||||
self._add_violation(
|
||||
rule_id="PERF-049",
|
||||
rule_name="Context managers for resources",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="File opened without context manager",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use 'with open(...) as f:' to ensure cleanup",
|
||||
)
|
||||
|
||||
def _check_string_concatenation(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-051: Check for inefficient string concatenation in loops"""
|
||||
in_for_loop = False
|
||||
for_indent = 0
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
stripped = line.strip()
|
||||
|
||||
if re.search(r'for\s+\w+\s+in\s+', line):
|
||||
in_for_loop = True
|
||||
for_indent = len(line) - len(line.lstrip())
|
||||
elif in_for_loop:
|
||||
current_indent = len(line) - len(line.lstrip()) if line.strip() else for_indent + 4
|
||||
|
||||
if current_indent <= for_indent and stripped:
|
||||
in_for_loop = False
|
||||
elif re.search(r'\w+\s*\+=\s*["\']|str\s*\(', line):
|
||||
if "# noqa" not in line:
|
||||
self._add_violation(
|
||||
rule_id="PERF-051",
|
||||
rule_name="String concatenation efficiency",
|
||||
severity=Severity.INFO,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="String concatenation in loop",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use ''.join() or StringIO for many concatenations",
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Frontend Performance Checks
|
||||
# =========================================================================
|
||||
|
||||
def _check_debounce(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-056: Check for search inputs without debounce"""
|
||||
for i, line in enumerate(lines, 1):
|
||||
if re.search(r'@(input|keyup)=".*search.*fetch', line, re.IGNORECASE):
|
||||
if "debounce" not in content.lower():
|
||||
self._add_violation(
|
||||
rule_id="PERF-056",
|
||||
rule_name="Debounce search inputs",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Search input triggers API call without debounce",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Add 300-500ms debounce to prevent excessive API calls",
|
||||
)
|
||||
|
||||
def _check_polling_intervals(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-062: Check for too-frequent polling"""
|
||||
for i, line in enumerate(lines, 1):
|
||||
match = re.search(r'setInterval\s*\([^,]+,\s*(\d+)\s*\)', line)
|
||||
if match:
|
||||
interval = int(match.group(1))
|
||||
if interval < 10000: # Less than 10 seconds
|
||||
if "# real-time" not in line and "# noqa" not in line:
|
||||
self._add_violation(
|
||||
rule_id="PERF-062",
|
||||
rule_name="Reasonable polling intervals",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message=f"Polling interval {interval}ms is very frequent",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use >= 10 second intervals for non-critical updates",
|
||||
)
|
||||
|
||||
def _check_layout_thrashing(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-064: Check for layout thrashing patterns"""
|
||||
for i, line in enumerate(lines, 1):
|
||||
# Check for read then write patterns
|
||||
if re.search(r'(offsetHeight|offsetWidth|clientHeight|clientWidth)', line):
|
||||
if i < len(lines):
|
||||
next_line = lines[i] if i < len(lines) else ""
|
||||
if "style" in next_line:
|
||||
self._add_violation(
|
||||
rule_id="PERF-064",
|
||||
rule_name="Avoid layout thrashing",
|
||||
severity=Severity.INFO,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="DOM read followed by write can cause layout thrashing",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Batch DOM reads, then batch DOM writes",
|
||||
)
|
||||
|
||||
def _check_image_lazy_loading(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-058: Check for images without lazy loading"""
|
||||
for i, line in enumerate(lines, 1):
|
||||
if re.search(r'<img\s+[^>]*src=', line):
|
||||
if 'loading="lazy"' not in line and "x-intersect" not in line:
|
||||
if "logo" not in line.lower() and "icon" not in line.lower():
|
||||
self._add_violation(
|
||||
rule_id="PERF-058",
|
||||
rule_name="Image optimization",
|
||||
severity=Severity.INFO,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Image without lazy loading",
|
||||
context=line.strip()[:80],
|
||||
suggestion='Add loading="lazy" for off-screen images',
|
||||
)
|
||||
|
||||
def _check_script_loading(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""PERF-067: Check for script tags without defer/async"""
|
||||
for i, line in enumerate(lines, 1):
|
||||
if re.search(r'<script\s+[^>]*src=', line):
|
||||
if "defer" not in line and "async" not in line:
|
||||
if "alpine" not in line.lower() and "htmx" not in line.lower():
|
||||
self._add_violation(
|
||||
rule_id="PERF-067",
|
||||
rule_name="Defer non-critical JavaScript",
|
||||
severity=Severity.INFO,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Script tag without defer/async",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Add defer for non-critical scripts",
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Performance code validator",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument("-f", "--file", type=Path, help="Validate a single file")
|
||||
parser.add_argument("-d", "--folder", type=Path, help="Validate a directory")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
||||
parser.add_argument("--errors-only", action="store_true", help="Only show errors")
|
||||
parser.add_argument("--json", action="store_true", help="JSON output")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
validator = PerformanceValidator(verbose=args.verbose)
|
||||
|
||||
if args.file:
|
||||
validator.validate_file(args.file)
|
||||
elif args.folder:
|
||||
validator.validate_all(args.folder)
|
||||
else:
|
||||
validator.validate_all()
|
||||
|
||||
validator.output_results(json_output=args.json, errors_only=args.errors_only)
|
||||
sys.exit(validator.get_exit_code())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
718
scripts/validate/validate_security.py
Executable file
718
scripts/validate/validate_security.py
Executable file
@@ -0,0 +1,718 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Security Validator
|
||||
==================
|
||||
Validates code against security rules defined in .security-rules/
|
||||
|
||||
This script checks for common security vulnerabilities:
|
||||
- Hardcoded credentials and secrets
|
||||
- SQL injection patterns
|
||||
- Command injection risks
|
||||
- XSS vulnerabilities
|
||||
- Insecure cryptography
|
||||
- Authentication weaknesses
|
||||
- Data exposure risks
|
||||
|
||||
Usage:
|
||||
python scripts/validate/validate_security.py # Check all files
|
||||
python scripts/validate/validate_security.py -d app/api/ # Check specific directory
|
||||
python scripts/validate/validate_security.py -f app/api/v1/auth.py # Check single file
|
||||
python scripts/validate/validate_security.py -v # Verbose output
|
||||
python scripts/validate/validate_security.py --json # JSON output
|
||||
python scripts/validate/validate_security.py --errors-only # Only show errors
|
||||
|
||||
Options:
|
||||
-f, --file PATH Validate a single file
|
||||
-d, --folder PATH Validate all files in a directory (recursive)
|
||||
-v, --verbose Show detailed output including context
|
||||
--errors-only Only show errors, suppress warnings and info
|
||||
--json Output results as JSON
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from base_validator import BaseValidator, Severity, ValidationResult
|
||||
|
||||
|
||||
class SecurityValidator(BaseValidator):
|
||||
"""Security-focused code validator"""
|
||||
|
||||
VALIDATOR_NAME = "Security Validator"
|
||||
VALIDATOR_EMOJI = "🔒"
|
||||
RULES_DIR_NAME = ".security-rules"
|
||||
CONFIG_FILE_NAME = ".security-rules.yaml"
|
||||
|
||||
def validate_all(self, target_path: Path = None) -> ValidationResult:
|
||||
"""Validate all files for security issues"""
|
||||
print(f"\n{self.VALIDATOR_EMOJI} Starting security validation...\n")
|
||||
|
||||
target = target_path or self.project_root
|
||||
|
||||
# Validate Python files
|
||||
self._validate_python_files(target)
|
||||
|
||||
# Validate JavaScript files
|
||||
self._validate_javascript_files(target)
|
||||
|
||||
# Validate HTML templates
|
||||
self._validate_template_files(target)
|
||||
|
||||
# Validate configuration files
|
||||
self._validate_config_files(target)
|
||||
|
||||
return self.result
|
||||
|
||||
def _validate_python_files(self, target: Path):
|
||||
"""Validate all Python files for security issues"""
|
||||
print("🐍 Validating Python files...")
|
||||
|
||||
for py_file in target.rglob("*.py"):
|
||||
if self._should_ignore_file(py_file):
|
||||
continue
|
||||
|
||||
self.result.files_checked += 1
|
||||
content = py_file.read_text()
|
||||
lines = content.split("\n")
|
||||
self._validate_python_security(py_file, content, lines)
|
||||
|
||||
def _validate_javascript_files(self, target: Path):
|
||||
"""Validate all JavaScript files for security issues"""
|
||||
print("🟨 Validating JavaScript files...")
|
||||
|
||||
for js_file in target.rglob("*.js"):
|
||||
if self._should_ignore_file(js_file):
|
||||
continue
|
||||
|
||||
self.result.files_checked += 1
|
||||
content = js_file.read_text()
|
||||
lines = content.split("\n")
|
||||
self._validate_javascript_security(js_file, content, lines)
|
||||
|
||||
def _validate_template_files(self, target: Path):
|
||||
"""Validate all HTML template files for security issues"""
|
||||
print("📄 Validating template files...")
|
||||
|
||||
for html_file in target.rglob("*.html"):
|
||||
if self._should_ignore_file(html_file):
|
||||
continue
|
||||
|
||||
self.result.files_checked += 1
|
||||
content = html_file.read_text()
|
||||
lines = content.split("\n")
|
||||
self._validate_template_security(html_file, content, lines)
|
||||
|
||||
def _validate_config_files(self, target: Path):
|
||||
"""Validate configuration files for security issues"""
|
||||
print("⚙️ Validating configuration files...")
|
||||
|
||||
config_patterns = ["*.yaml", "*.yml", "*.json", "*.toml", "*.ini", "*.env*"]
|
||||
for pattern in config_patterns:
|
||||
for config_file in target.rglob(pattern):
|
||||
if self._should_ignore_file(config_file):
|
||||
continue
|
||||
if config_file.suffix in [".yaml", ".yml", ".json"]:
|
||||
self.result.files_checked += 1
|
||||
content = config_file.read_text()
|
||||
lines = content.split("\n")
|
||||
self._validate_config_security(config_file, content, lines)
|
||||
|
||||
def _validate_file_content(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""Validate file content based on file type"""
|
||||
if file_path.suffix == ".py":
|
||||
self._validate_python_security(file_path, content, lines)
|
||||
elif file_path.suffix == ".js":
|
||||
self._validate_javascript_security(file_path, content, lines)
|
||||
elif file_path.suffix == ".html":
|
||||
self._validate_template_security(file_path, content, lines)
|
||||
elif file_path.suffix in [".yaml", ".yml", ".json"]:
|
||||
self._validate_config_security(file_path, content, lines)
|
||||
|
||||
def _validate_python_security(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""Validate Python file for security issues"""
|
||||
file_path_str = str(file_path)
|
||||
|
||||
# SEC-001: Hardcoded credentials
|
||||
self._check_hardcoded_credentials(file_path, content, lines)
|
||||
|
||||
# SEC-011: SQL injection
|
||||
self._check_sql_injection(file_path, content, lines)
|
||||
|
||||
# SEC-012: Command injection
|
||||
self._check_command_injection(file_path, content, lines)
|
||||
|
||||
# SEC-013: Code execution
|
||||
self._check_code_execution(file_path, content, lines)
|
||||
|
||||
# SEC-014: Path traversal
|
||||
if "upload" in file_path_str.lower() or "file" in file_path_str.lower():
|
||||
self._check_path_traversal(file_path, content, lines)
|
||||
|
||||
# SEC-020: Unsafe deserialization
|
||||
self._check_unsafe_deserialization(file_path, content, lines)
|
||||
|
||||
# SEC-021: PII logging
|
||||
self._check_pii_logging(file_path, content, lines)
|
||||
|
||||
# SEC-024: Error information leakage
|
||||
self._check_error_leakage(file_path, content, lines)
|
||||
|
||||
# SEC-034: HTTPS enforcement
|
||||
self._check_https_enforcement(file_path, content, lines)
|
||||
|
||||
# SEC-040: Timeout configuration
|
||||
self._check_timeout_configuration(file_path, content, lines)
|
||||
|
||||
# SEC-041: Weak hashing
|
||||
self._check_weak_hashing(file_path, content, lines)
|
||||
|
||||
# SEC-042: Insecure random
|
||||
self._check_insecure_random(file_path, content, lines)
|
||||
|
||||
# SEC-043: Hardcoded encryption keys
|
||||
self._check_hardcoded_keys(file_path, content, lines)
|
||||
|
||||
# SEC-047: Certificate verification
|
||||
self._check_certificate_verification(file_path, content, lines)
|
||||
|
||||
# Auth file specific checks
|
||||
if "auth" in file_path_str.lower():
|
||||
self._check_jwt_expiry(file_path, content, lines)
|
||||
|
||||
def _validate_javascript_security(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""Validate JavaScript file for security issues"""
|
||||
# SEC-022: Sensitive data in URLs
|
||||
self._check_sensitive_url_params_js(file_path, content, lines)
|
||||
|
||||
# Check for eval usage
|
||||
for i, line in enumerate(lines, 1):
|
||||
if re.search(r'\beval\s*\(', line) and "//" not in line.split("eval")[0]:
|
||||
self._add_violation(
|
||||
rule_id="SEC-013",
|
||||
rule_name="No code execution",
|
||||
severity=Severity.ERROR,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="eval() allows arbitrary code execution",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use JSON.parse() for JSON or other safe alternatives",
|
||||
)
|
||||
|
||||
# Check for innerHTML with user input
|
||||
for i, line in enumerate(lines, 1):
|
||||
if re.search(r'\.innerHTML\s*=', line) and "//" not in line.split("innerHTML")[0]:
|
||||
self._add_violation(
|
||||
rule_id="SEC-015",
|
||||
rule_name="XSS prevention",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="innerHTML can lead to XSS if used with untrusted input",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use textContent for text or sanitize HTML input",
|
||||
)
|
||||
|
||||
def _validate_template_security(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""Validate HTML template file for security issues"""
|
||||
# SEC-015: XSS via |safe filter
|
||||
for i, line in enumerate(lines, 1):
|
||||
if re.search(r'\|\s*safe', line) and 'sanitized' not in line.lower():
|
||||
self._add_violation(
|
||||
rule_id="SEC-015",
|
||||
rule_name="XSS prevention in templates",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="|safe filter disables auto-escaping - ensure content is sanitized",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Mark with {# sanitized #} comment if content is sanitized",
|
||||
)
|
||||
|
||||
# Check for x-html with dynamic content
|
||||
for i, line in enumerate(lines, 1):
|
||||
if re.search(r'x-html="[^"]*\w', line) and "sanitized" not in line.lower():
|
||||
self._add_violation(
|
||||
rule_id="SEC-015",
|
||||
rule_name="XSS prevention in templates",
|
||||
severity=Severity.INFO,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="x-html renders raw HTML - ensure content is safe",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use x-text for text content or sanitize HTML",
|
||||
)
|
||||
|
||||
def _validate_config_security(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""Validate configuration file for security issues"""
|
||||
# Check for hardcoded secrets in config
|
||||
secret_patterns = [
|
||||
(r'password\s*[=:]\s*["\'][^"\']{4,}["\']', "password"),
|
||||
(r'secret\s*[=:]\s*["\'][^"\']{8,}["\']', "secret"),
|
||||
(r'api_key\s*[=:]\s*["\'][A-Za-z0-9_-]{16,}["\']', "API key"),
|
||||
(r'token\s*[=:]\s*["\'][A-Za-z0-9._-]{20,}["\']', "token"),
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
# Skip comments
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("#") or stripped.startswith("//"):
|
||||
continue
|
||||
|
||||
for pattern, secret_type in secret_patterns:
|
||||
if re.search(pattern, line, re.IGNORECASE):
|
||||
# Check for environment variable references
|
||||
if "${" in line or "os.getenv" in line or "environ" in line:
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-001",
|
||||
rule_name="No hardcoded credentials",
|
||||
severity=Severity.ERROR,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message=f"Possible hardcoded {secret_type} in configuration",
|
||||
context=line.strip()[:60] + "...",
|
||||
suggestion="Use environment variables for secrets",
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Specific Security Checks
|
||||
# =========================================================================
|
||||
|
||||
def _check_hardcoded_credentials(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-001: Check for hardcoded credentials"""
|
||||
patterns = [
|
||||
(r'password\s*=\s*["\'][^"\']{4,}["\']', "password"),
|
||||
(r'api_key\s*=\s*["\'][A-Za-z0-9_-]{16,}["\']', "API key"),
|
||||
(r'secret_key\s*=\s*["\'][^"\']{8,}["\']', "secret key"),
|
||||
(r'auth_token\s*=\s*["\'][A-Za-z0-9._-]{20,}["\']', "auth token"),
|
||||
(r'AWS_SECRET.*=\s*["\'][^"\']+["\']', "AWS secret"),
|
||||
(r'STRIPE_.*KEY.*=\s*["\'][^"\']+["\']', "Stripe key"),
|
||||
]
|
||||
|
||||
exclude_patterns = [
|
||||
"os.getenv", "os.environ", "settings.", '""', "''",
|
||||
"# noqa", "# test", "password_hash", "example"
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern, secret_type in patterns:
|
||||
if re.search(pattern, line, re.IGNORECASE):
|
||||
# Check exclusions
|
||||
if any(exc in line for exc in exclude_patterns):
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-001",
|
||||
rule_name="No hardcoded credentials",
|
||||
severity=Severity.ERROR,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message=f"Possible hardcoded {secret_type}",
|
||||
context=line.strip()[:60] + "...",
|
||||
suggestion="Use environment variables or secret management",
|
||||
)
|
||||
|
||||
def _check_sql_injection(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-011: Check for SQL injection vulnerabilities"""
|
||||
patterns = [
|
||||
r'execute\s*\(\s*f["\']',
|
||||
r'execute\s*\([^)]*\s*\+\s*',
|
||||
r'execute\s*\([^)]*%[^)]*%',
|
||||
r'text\s*\(\s*f["\']',
|
||||
r'\.raw\s*\(\s*f["\']',
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, line):
|
||||
if "# noqa" in line or "# safe" in line:
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-011",
|
||||
rule_name="No raw SQL queries",
|
||||
severity=Severity.ERROR,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Possible SQL injection - use parameterized queries",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use SQLAlchemy ORM or parameterized queries with :param syntax",
|
||||
)
|
||||
|
||||
def _check_command_injection(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-012: Check for command injection vulnerabilities"""
|
||||
patterns = [
|
||||
(r'subprocess.*shell\s*=\s*True', "shell=True in subprocess"),
|
||||
(r'os\.system\s*\(', "os.system()"),
|
||||
(r'os\.popen\s*\(', "os.popen()"),
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern, issue in patterns:
|
||||
if re.search(pattern, line):
|
||||
if "# noqa" in line or "# safe" in line:
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-012",
|
||||
rule_name="No shell command injection",
|
||||
severity=Severity.ERROR,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message=f"{issue} allows command injection",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use subprocess with list arguments, shell=False",
|
||||
)
|
||||
|
||||
def _check_code_execution(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-013: Check for code execution vulnerabilities"""
|
||||
patterns = [
|
||||
(r'eval\s*\([^)]*request', "eval with request data"),
|
||||
(r'eval\s*\([^)]*input', "eval with user input"),
|
||||
(r'exec\s*\([^)]*request', "exec with request data"),
|
||||
(r'__import__\s*\([^)]*request', "__import__ with request data"),
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern, issue in patterns:
|
||||
if re.search(pattern, line, re.IGNORECASE):
|
||||
self._add_violation(
|
||||
rule_id="SEC-013",
|
||||
rule_name="No code execution",
|
||||
severity=Severity.ERROR,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message=f"{issue} allows arbitrary code execution",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Never use eval/exec with user input",
|
||||
)
|
||||
|
||||
def _check_path_traversal(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-014: Check for path traversal vulnerabilities"""
|
||||
# Check if file has path operations with user input
|
||||
has_secure_filename = "secure_filename" in content or "basename" in content
|
||||
|
||||
patterns = [
|
||||
r'open\s*\([^)]*request',
|
||||
r'open\s*\([^)]*\+',
|
||||
r'Path\s*\([^)]*request',
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, line, re.IGNORECASE):
|
||||
if has_secure_filename:
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-014",
|
||||
rule_name="Path traversal prevention",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Possible path traversal - validate file paths",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use secure_filename() and validate paths against allowed directories",
|
||||
)
|
||||
|
||||
def _check_unsafe_deserialization(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-020: Check for unsafe deserialization"""
|
||||
patterns = [
|
||||
(r'pickle\.loads?\s*\(', "pickle deserialization"),
|
||||
(r'yaml\.load\s*\([^,)]+\)(?!.*SafeLoader)', "yaml.load without SafeLoader"),
|
||||
(r'marshal\.loads?\s*\(', "marshal deserialization"),
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern, issue in patterns:
|
||||
if re.search(pattern, line):
|
||||
if "# noqa" in line:
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-020",
|
||||
rule_name="Deserialization safety",
|
||||
severity=Severity.ERROR,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message=f"Unsafe {issue} can lead to code execution",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use json.loads() or yaml.safe_load() instead",
|
||||
)
|
||||
|
||||
def _check_pii_logging(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-021: Check for PII in logs"""
|
||||
patterns = [
|
||||
(r'log\w*\.[a-z]+\([^)]*password', "password in log"),
|
||||
(r'log\w*\.[a-z]+\([^)]*credit_card', "credit card in log"),
|
||||
(r'log\w*\.[a-z]+\([^)]*ssn', "SSN in log"),
|
||||
(r'print\s*\([^)]*password', "password in print"),
|
||||
]
|
||||
|
||||
exclude = ["password_hash", "password_reset", "password_changed", "# noqa"]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern, issue in patterns:
|
||||
if re.search(pattern, line, re.IGNORECASE):
|
||||
if any(exc in line for exc in exclude):
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-021",
|
||||
rule_name="PII logging prevention",
|
||||
severity=Severity.ERROR,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message=f"Possible {issue}",
|
||||
context=line.strip()[:60] + "...",
|
||||
suggestion="Never log sensitive data - redact or omit",
|
||||
)
|
||||
|
||||
def _check_error_leakage(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-024: Check for error information leakage"""
|
||||
patterns = [
|
||||
r'traceback\.format_exc\(\).*detail',
|
||||
r'traceback\.format_exc\(\).*response',
|
||||
r'str\(e\).*HTTPException',
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, line):
|
||||
if "logger" in line or "# noqa" in line:
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-024",
|
||||
rule_name="Error message information leakage",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Internal error details may be exposed to users",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Log errors internally, return generic message to users",
|
||||
)
|
||||
|
||||
def _check_https_enforcement(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-034: Check for HTTP instead of HTTPS"""
|
||||
for i, line in enumerate(lines, 1):
|
||||
if re.search(r'http://(?!localhost|127\.0\.0\.1|0\.0\.0\.0|\$)', line):
|
||||
if "# noqa" in line or "example.com" in line or "schemas" in line:
|
||||
continue
|
||||
if "http://www.w3.org" in line:
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-034",
|
||||
rule_name="HTTPS enforcement",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="HTTP URL found - use HTTPS for security",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Replace http:// with https://",
|
||||
)
|
||||
|
||||
def _check_timeout_configuration(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-040: Check for missing timeouts on external calls"""
|
||||
# Check for requests/httpx calls without timeout
|
||||
if "requests" in content or "httpx" in content or "aiohttp" in content:
|
||||
has_timeout_import = "timeout" in content.lower()
|
||||
|
||||
patterns = [
|
||||
r'requests\.(get|post|put|delete|patch)\s*\([^)]+\)(?!.*timeout)',
|
||||
r'httpx\.(get|post|put|delete|patch)\s*\([^)]+\)(?!.*timeout)',
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, line) and "timeout" not in line:
|
||||
self._add_violation(
|
||||
rule_id="SEC-040",
|
||||
rule_name="Timeout configuration",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="HTTP request without timeout - can hang indefinitely",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Add timeout parameter: requests.get(url, timeout=30)",
|
||||
)
|
||||
|
||||
def _check_weak_hashing(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-041: Check for weak hashing algorithms"""
|
||||
patterns = [
|
||||
(r'hashlib\.md5\s*\(', "MD5"),
|
||||
(r'hashlib\.sha1\s*\(', "SHA1"),
|
||||
(r'MD5\.new\s*\(', "MD5"),
|
||||
(r'SHA\.new\s*\(', "SHA1"),
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern, algo in patterns:
|
||||
if re.search(pattern, line):
|
||||
if "# noqa" in line or "# checksum" in line or "# file hash" in line:
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-041",
|
||||
rule_name="Strong hashing algorithms",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message=f"{algo} is cryptographically weak",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use SHA-256 or stronger for security purposes",
|
||||
)
|
||||
|
||||
def _check_insecure_random(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-042: Check for insecure random number generation"""
|
||||
# Only check if file appears to deal with security
|
||||
security_context = any(
|
||||
word in content.lower()
|
||||
for word in ["token", "secret", "key", "session", "csrf", "nonce", "salt"]
|
||||
)
|
||||
|
||||
if not security_context:
|
||||
return
|
||||
|
||||
patterns = [
|
||||
r'random\.random\s*\(',
|
||||
r'random\.randint\s*\(',
|
||||
r'random\.choice\s*\(',
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, line):
|
||||
if "# noqa" in line or "# not security" in line:
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-042",
|
||||
rule_name="Secure random generation",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="random module is not cryptographically secure",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Use secrets module for security-sensitive randomness",
|
||||
)
|
||||
|
||||
def _check_hardcoded_keys(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-043: Check for hardcoded encryption keys"""
|
||||
patterns = [
|
||||
r'ENCRYPTION_KEY\s*=\s*["\'][^"\']+["\']',
|
||||
r'SECRET_KEY\s*=\s*["\'][A-Za-z0-9+/=]{16,}["\']',
|
||||
r'AES_KEY\s*=\s*["\']',
|
||||
r'PRIVATE_KEY\s*=\s*["\']-----BEGIN',
|
||||
]
|
||||
|
||||
exclude = ["os.getenv", "os.environ", "settings.", '""', "# test"]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, line):
|
||||
if any(exc in line for exc in exclude):
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-043",
|
||||
rule_name="No hardcoded encryption keys",
|
||||
severity=Severity.ERROR,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Hardcoded encryption key found",
|
||||
context=line.strip()[:50] + "...",
|
||||
suggestion="Use environment variables for encryption keys",
|
||||
)
|
||||
|
||||
def _check_certificate_verification(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-047: Check for disabled certificate verification"""
|
||||
patterns = [
|
||||
(r'verify\s*=\s*False', "SSL verification disabled"),
|
||||
(r'CERT_NONE', "Certificate verification disabled"),
|
||||
(r'check_hostname\s*=\s*False', "Hostname verification disabled"),
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern, issue in patterns:
|
||||
if re.search(pattern, line):
|
||||
if "# noqa" in line or "# test" in line or "DEBUG" in line:
|
||||
continue
|
||||
self._add_violation(
|
||||
rule_id="SEC-047",
|
||||
rule_name="Certificate verification",
|
||||
severity=Severity.ERROR,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message=f"{issue} - vulnerable to MITM attacks",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Always verify SSL certificates in production",
|
||||
)
|
||||
|
||||
def _check_jwt_expiry(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-002: Check for JWT tokens without expiry"""
|
||||
if "jwt.encode" in content and "exp" not in content:
|
||||
# Find the jwt.encode line
|
||||
for i, line in enumerate(lines, 1):
|
||||
if "jwt.encode" in line:
|
||||
self._add_violation(
|
||||
rule_id="SEC-002",
|
||||
rule_name="JWT expiry enforcement",
|
||||
severity=Severity.WARNING,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="JWT token may not have expiration claim",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Include 'exp' claim with appropriate expiration",
|
||||
)
|
||||
break
|
||||
|
||||
def _check_sensitive_url_params_js(self, file_path: Path, content: str, lines: list[str]):
|
||||
"""SEC-022: Check for sensitive data in URLs (JavaScript)"""
|
||||
patterns = [
|
||||
r'\?password=',
|
||||
r'&password=',
|
||||
r'\?token=(?!type)',
|
||||
r'&token=(?!type)',
|
||||
r'\?api_key=',
|
||||
r'&api_key=',
|
||||
]
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, line):
|
||||
self._add_violation(
|
||||
rule_id="SEC-022",
|
||||
rule_name="Sensitive data in URLs",
|
||||
severity=Severity.ERROR,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message="Sensitive data in URL query parameters",
|
||||
context=line.strip()[:80],
|
||||
suggestion="Send sensitive data in request body or headers",
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Security code validator",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument("-f", "--file", type=Path, help="Validate a single file")
|
||||
parser.add_argument("-d", "--folder", type=Path, help="Validate a directory")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
||||
parser.add_argument("--errors-only", action="store_true", help="Only show errors")
|
||||
parser.add_argument("--json", action="store_true", help="JSON output")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
validator = SecurityValidator(verbose=args.verbose)
|
||||
|
||||
if args.file:
|
||||
validator.validate_file(args.file)
|
||||
elif args.folder:
|
||||
validator.validate_all(args.folder)
|
||||
else:
|
||||
validator.validate_all()
|
||||
|
||||
validator.output_results(json_output=args.json, errors_only=args.errors_only)
|
||||
sys.exit(validator.get_exit_code())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
36
scripts/validate/validators/__init__.py
Normal file
36
scripts/validate/validators/__init__.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# scripts/validators/__init__.py
|
||||
"""
|
||||
Architecture Validators Package
|
||||
===============================
|
||||
|
||||
This package contains domain-specific validators for the architecture validation system.
|
||||
Each validator module handles a specific category of rules.
|
||||
|
||||
Modules:
|
||||
- base: Base classes and helpers (Severity, Violation, ValidationResult)
|
||||
- api_validator: API endpoint rules (API-*)
|
||||
- service_validator: Service layer rules (SVC-*)
|
||||
- model_validator: Model rules (MDL-*)
|
||||
- exception_validator: Exception handling rules (EXC-*)
|
||||
- naming_validator: Naming convention rules (NAM-*)
|
||||
- auth_validator: Auth and multi-tenancy rules (AUTH-*, MT-*)
|
||||
- middleware_validator: Middleware rules (MDW-*)
|
||||
- frontend_validator: Frontend rules (JS-*, TPL-*, FE-*, CSS-*)
|
||||
- language_validator: Language/i18n rules (LANG-*)
|
||||
"""
|
||||
|
||||
from .base import (
|
||||
BaseValidator,
|
||||
FileResult,
|
||||
Severity,
|
||||
ValidationResult,
|
||||
Violation,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Severity",
|
||||
"Violation",
|
||||
"FileResult",
|
||||
"ValidationResult",
|
||||
"BaseValidator",
|
||||
]
|
||||
314
scripts/validate/validators/base.py
Normal file
314
scripts/validate/validators/base.py
Normal file
@@ -0,0 +1,314 @@
|
||||
# scripts/validators/base.py
|
||||
"""
|
||||
Base classes and helpers for architecture validation.
|
||||
|
||||
This module contains:
|
||||
- Severity: Enum for validation severity levels
|
||||
- Violation: Dataclass for representing rule violations
|
||||
- FileResult: Dataclass for single file validation results
|
||||
- ValidationResult: Dataclass for overall validation results
|
||||
- BaseValidator: Base class for domain-specific validators
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
class Severity(Enum):
|
||||
"""Validation severity levels"""
|
||||
|
||||
ERROR = "error"
|
||||
WARNING = "warning"
|
||||
INFO = "info"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Violation:
|
||||
"""Represents an architectural rule violation"""
|
||||
|
||||
rule_id: str
|
||||
rule_name: str
|
||||
severity: Severity
|
||||
file_path: Path
|
||||
line_number: int
|
||||
message: str
|
||||
context: str = ""
|
||||
suggestion: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileResult:
|
||||
"""Results for a single file validation"""
|
||||
|
||||
file_path: Path
|
||||
errors: int = 0
|
||||
warnings: int = 0
|
||||
|
||||
@property
|
||||
def passed(self) -> bool:
|
||||
return self.errors == 0
|
||||
|
||||
@property
|
||||
def status(self) -> str:
|
||||
if self.errors > 0:
|
||||
return "FAILED"
|
||||
if self.warnings > 0:
|
||||
return "PASSED*"
|
||||
return "PASSED"
|
||||
|
||||
@property
|
||||
def status_icon(self) -> str:
|
||||
if self.errors > 0:
|
||||
return "❌"
|
||||
if self.warnings > 0:
|
||||
return "⚠️"
|
||||
return "✅"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""Results of architecture validation"""
|
||||
|
||||
violations: list[Violation] = field(default_factory=list)
|
||||
files_checked: int = 0
|
||||
rules_applied: int = 0
|
||||
file_results: list[FileResult] = field(default_factory=list)
|
||||
|
||||
def has_errors(self) -> bool:
|
||||
"""Check if there are any error-level violations"""
|
||||
return any(v.severity == Severity.ERROR for v in self.violations)
|
||||
|
||||
def has_warnings(self) -> bool:
|
||||
"""Check if there are any warning-level violations"""
|
||||
return any(v.severity == Severity.WARNING for v in self.violations)
|
||||
|
||||
|
||||
class BaseValidator:
|
||||
"""
|
||||
Base class for domain-specific validators.
|
||||
|
||||
Provides common functionality for all validators including:
|
||||
- Violation tracking
|
||||
- File filtering
|
||||
- Rule lookup
|
||||
- Common pattern matching utilities
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: dict[str, Any],
|
||||
result: ValidationResult,
|
||||
project_root: Path,
|
||||
verbose: bool = False,
|
||||
):
|
||||
"""
|
||||
Initialize validator with shared state.
|
||||
|
||||
Args:
|
||||
config: Loaded architecture rules configuration
|
||||
result: Shared ValidationResult for tracking violations
|
||||
project_root: Root path of the project
|
||||
verbose: Whether to show verbose output
|
||||
"""
|
||||
self.config = config
|
||||
self.result = result
|
||||
self.project_root = project_root
|
||||
self.verbose = verbose
|
||||
|
||||
def validate(self, target_path: Path) -> None:
|
||||
"""
|
||||
Run validation on target path.
|
||||
|
||||
Must be implemented by subclasses.
|
||||
|
||||
Args:
|
||||
target_path: Path to validate (file or directory)
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement validate()")
|
||||
|
||||
def _add_violation(
|
||||
self,
|
||||
rule_id: str,
|
||||
rule_name: str,
|
||||
severity: Severity,
|
||||
file_path: Path,
|
||||
line_number: int,
|
||||
message: str,
|
||||
context: str = "",
|
||||
suggestion: str = "",
|
||||
) -> None:
|
||||
"""Add a violation to results"""
|
||||
violation = Violation(
|
||||
rule_id=rule_id,
|
||||
rule_name=rule_name,
|
||||
severity=severity,
|
||||
file_path=file_path,
|
||||
line_number=line_number,
|
||||
message=message,
|
||||
context=context,
|
||||
suggestion=suggestion,
|
||||
)
|
||||
self.result.violations.append(violation)
|
||||
|
||||
def _should_ignore_file(self, file_path: Path) -> bool:
|
||||
"""Check if file should be ignored"""
|
||||
ignore_patterns = self.config.get("ignore", {}).get("files", [])
|
||||
|
||||
# Convert to string for easier matching
|
||||
file_path_str = str(file_path)
|
||||
|
||||
for pattern in ignore_patterns:
|
||||
# Check if any part of the path matches the pattern
|
||||
if file_path.match(pattern):
|
||||
return True
|
||||
# Also check if pattern appears in the path (for .venv, venv, etc.)
|
||||
if "/.venv/" in file_path_str or file_path_str.startswith(".venv/"):
|
||||
return True
|
||||
if "/venv/" in file_path_str or file_path_str.startswith("venv/"):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _get_rule(self, rule_id: str) -> dict[str, Any] | None:
|
||||
"""Get rule configuration by ID"""
|
||||
# Look in different rule categories
|
||||
for category in [
|
||||
"api_endpoint_rules",
|
||||
"service_layer_rules",
|
||||
"model_rules",
|
||||
"exception_rules",
|
||||
"naming_rules",
|
||||
"auth_rules",
|
||||
"middleware_rules",
|
||||
"javascript_rules",
|
||||
"template_rules",
|
||||
"frontend_component_rules",
|
||||
"styling_rules",
|
||||
"language_rules",
|
||||
"multi_tenancy_rules",
|
||||
"code_quality_rules",
|
||||
]:
|
||||
rules = self.config.get(category, [])
|
||||
for rule in rules:
|
||||
if rule.get("id") == rule_id:
|
||||
return rule
|
||||
return None
|
||||
|
||||
def _get_files(self, target_path: Path, pattern: str) -> list[Path]:
|
||||
"""Get files matching a glob pattern, excluding ignored files"""
|
||||
files = list(target_path.glob(pattern))
|
||||
return [f for f in files if not self._should_ignore_file(f)]
|
||||
|
||||
def _find_decorators(self, content: str) -> list[tuple[int, str, str]]:
|
||||
"""
|
||||
Find all function decorators and their associated functions.
|
||||
|
||||
Returns list of (line_number, decorator, function_name) tuples.
|
||||
"""
|
||||
results = []
|
||||
lines = content.split("\n")
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
if line.startswith("@"):
|
||||
decorator = line
|
||||
# Look for the function definition
|
||||
for j in range(i + 1, min(i + 10, len(lines))):
|
||||
next_line = lines[j].strip()
|
||||
if next_line.startswith("def ") or next_line.startswith(
|
||||
"async def "
|
||||
):
|
||||
# Extract function name
|
||||
match = re.search(r"(?:async\s+)?def\s+(\w+)", next_line)
|
||||
if match:
|
||||
func_name = match.group(1)
|
||||
results.append((i + 1, decorator, func_name))
|
||||
break
|
||||
if next_line.startswith("@"):
|
||||
# Multiple decorators - continue to next
|
||||
continue
|
||||
if next_line and not next_line.startswith("#"):
|
||||
# Non-decorator, non-comment line - stop looking
|
||||
break
|
||||
i += 1
|
||||
|
||||
return results
|
||||
|
||||
def _check_pattern_in_lines(
|
||||
self,
|
||||
file_path: Path,
|
||||
lines: list[str],
|
||||
pattern: str,
|
||||
rule_id: str,
|
||||
rule_name: str,
|
||||
severity: Severity,
|
||||
message: str,
|
||||
suggestion: str = "",
|
||||
exclude_comments: bool = True,
|
||||
exclude_patterns: list[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Check for pattern violations in file lines.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file
|
||||
lines: File content split by lines
|
||||
pattern: Regex pattern to search for
|
||||
rule_id: Rule identifier
|
||||
rule_name: Human-readable rule name
|
||||
severity: Violation severity
|
||||
message: Violation message
|
||||
suggestion: Suggested fix
|
||||
exclude_comments: Skip lines that are comments
|
||||
exclude_patterns: Additional patterns that mark lines to skip
|
||||
"""
|
||||
exclude_patterns = exclude_patterns or []
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
stripped = line.strip()
|
||||
|
||||
# Skip comments if requested
|
||||
if exclude_comments and stripped.startswith("#"):
|
||||
continue
|
||||
|
||||
# Check exclusion patterns
|
||||
skip = False
|
||||
for exc in exclude_patterns:
|
||||
if exc in line:
|
||||
skip = True
|
||||
break
|
||||
if skip:
|
||||
continue
|
||||
|
||||
# Check for pattern
|
||||
if re.search(pattern, line):
|
||||
self._add_violation(
|
||||
rule_id=rule_id,
|
||||
rule_name=rule_name,
|
||||
severity=severity,
|
||||
file_path=file_path,
|
||||
line_number=i,
|
||||
message=message,
|
||||
context=stripped[:80],
|
||||
suggestion=suggestion,
|
||||
)
|
||||
|
||||
def _is_valid_json(self, file_path: Path) -> tuple[bool, str]:
|
||||
"""
|
||||
Check if a file contains valid JSON.
|
||||
|
||||
Returns (is_valid, error_message) tuple.
|
||||
"""
|
||||
try:
|
||||
with open(file_path, encoding="utf-8") as f:
|
||||
json.load(f)
|
||||
return True, ""
|
||||
except json.JSONDecodeError as e:
|
||||
return False, f"Line {e.lineno}: {e.msg}"
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
156
scripts/validate/verify_critical_imports.py
Executable file
156
scripts/validate/verify_critical_imports.py
Executable file
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Verify Critical Imports
|
||||
========================
|
||||
Checks that critical imports (re-exports) haven't been removed by linters.
|
||||
|
||||
This script verifies that essential import statements exist in key files,
|
||||
preventing issues where tools like Ruff might remove imports that appear
|
||||
unused but are actually critical for the application structure.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Define critical imports that must exist
|
||||
# Format: {file_path: [(import_line, description)]}
|
||||
CRITICAL_IMPORTS: dict[str, list[tuple[str, str]]] = {
|
||||
"models/database/base.py": [
|
||||
("from app.core.database import Base", "Re-export Base for all models"),
|
||||
],
|
||||
"models/__init__.py": [
|
||||
("from .database.base import Base", "Export Base for Alembic and models"),
|
||||
],
|
||||
"models/database/__init__.py": [
|
||||
("from .base import Base", "Export Base from database package"),
|
||||
],
|
||||
"app/core/database.py": [
|
||||
(
|
||||
"from sqlalchemy.ext.declarative import declarative_base",
|
||||
"SQLAlchemy Base declaration",
|
||||
),
|
||||
# Note: Might also use sqlalchemy.orm declarative_base in newer versions
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class ImportVerifier:
|
||||
"""Verifies critical imports exist in codebase"""
|
||||
|
||||
def __init__(self, project_root: Path):
|
||||
self.project_root = project_root
|
||||
self.issues: list[str] = []
|
||||
|
||||
def verify_all(self) -> bool:
|
||||
"""Verify all critical imports"""
|
||||
print("🔍 Verifying critical imports...\n")
|
||||
|
||||
all_good = True
|
||||
for file_path, imports in CRITICAL_IMPORTS.items():
|
||||
if not self.verify_file(file_path, imports):
|
||||
all_good = False
|
||||
|
||||
return all_good
|
||||
|
||||
def verify_file(
|
||||
self, file_path: str, required_imports: list[tuple[str, str]]
|
||||
) -> bool:
|
||||
"""Verify imports in a single file"""
|
||||
full_path = self.project_root / file_path
|
||||
|
||||
if not full_path.exists():
|
||||
self.issues.append(f"❌ File not found: {file_path}")
|
||||
print(f"❌ {file_path}: File not found")
|
||||
return False
|
||||
|
||||
content = full_path.read_text()
|
||||
file_ok = True
|
||||
|
||||
for import_line, description in required_imports:
|
||||
# Check for exact import or variations
|
||||
if import_line in content:
|
||||
print(f"✅ {file_path}: {import_line}")
|
||||
else:
|
||||
# Check for alternative import formats
|
||||
alternatives = self._get_import_alternatives(import_line)
|
||||
found = any(alt in content for alt in alternatives)
|
||||
|
||||
if found:
|
||||
print(f"✅ {file_path}: {import_line} (alternative format)")
|
||||
else:
|
||||
self.issues.append(
|
||||
f"❌ {file_path}: Missing critical import\n"
|
||||
f" Expected: {import_line}\n"
|
||||
f" Purpose: {description}"
|
||||
)
|
||||
print(f"❌ {file_path}: Missing {import_line}")
|
||||
file_ok = False
|
||||
|
||||
print()
|
||||
return file_ok
|
||||
|
||||
def _get_import_alternatives(self, import_line: str) -> list[str]:
|
||||
"""Get alternative formats for an import"""
|
||||
alternatives = [import_line]
|
||||
|
||||
# Handle 'from x import y' vs 'from x import (y)'
|
||||
if "from" in import_line and "import" in import_line:
|
||||
parts = import_line.split("import")
|
||||
if len(parts) == 2:
|
||||
from_part = parts[0].strip()
|
||||
import_part = parts[1].strip()
|
||||
|
||||
# Add parenthesized version
|
||||
alternatives.append(f"{from_part} import ({import_part})")
|
||||
|
||||
# Add version with 'as' clause
|
||||
alternatives.append(f"{import_line} as")
|
||||
|
||||
# Handle declarative_base alternatives (sqlalchemy changes)
|
||||
if "declarative_base" in import_line:
|
||||
# Old style
|
||||
alternatives.append(
|
||||
"from sqlalchemy.ext.declarative import declarative_base"
|
||||
)
|
||||
# New style (SQLAlchemy 1.4+)
|
||||
alternatives.append("from sqlalchemy.orm import declarative_base")
|
||||
|
||||
return alternatives
|
||||
|
||||
def print_summary(self):
|
||||
"""Print summary of verification"""
|
||||
print("\n" + "=" * 80)
|
||||
print("📊 CRITICAL IMPORTS VERIFICATION SUMMARY")
|
||||
print("=" * 80)
|
||||
|
||||
if not self.issues:
|
||||
print("\n✅ All critical imports verified successfully!")
|
||||
print("\nAll re-export patterns are intact.")
|
||||
else:
|
||||
print(f"\n❌ Found {len(self.issues)} issue(s):\n")
|
||||
for issue in self.issues:
|
||||
print(issue)
|
||||
print()
|
||||
|
||||
print("💡 RESOLUTION:")
|
||||
print(" 1. Check if imports were removed by linter (Ruff)")
|
||||
print(" 2. Add missing imports back to the files")
|
||||
print(" 3. Update pyproject.toml to ignore F401 for these files")
|
||||
print(" 4. Run this script again to verify")
|
||||
|
||||
print("=" * 80)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
project_root = Path(__file__).parent.parent
|
||||
|
||||
verifier = ImportVerifier(project_root)
|
||||
success = verifier.verify_all()
|
||||
verifier.print_summary()
|
||||
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user