feat: add audit validation rules and script

Import audit rules from scaffold project covering:
- Access control validation
- Audit trail requirements
- Change management policies
- Compliance checks
- Data governance rules
- Documentation requirements
- Third-party dependency checks

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-28 09:21:03 +01:00
parent ff2f475ae4
commit 92434c8971
10 changed files with 2055 additions and 437 deletions

View File

@@ -1,465 +1,111 @@
#!/usr/bin/env python3
"""
Base Validator
==============
Shared base class for all validation scripts (architecture, security, performance).
Base Validator Class
Provides common functionality for:
- Loading YAML configuration
- File pattern matching
- Violation tracking
- Output formatting (human-readable and JSON)
Shared functionality for all validators.
"""
import json
import re
import sys
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Any
import yaml
class Severity(Enum):
"""Validation severity levels"""
ERROR = "error"
WARNING = "warning"
INFO = "info"
@dataclass
class Violation:
"""Represents a rule violation"""
rule_id: str
rule_name: str
severity: Severity
file_path: Path
line_number: int
message: str
context: str = ""
suggestion: str = ""
@dataclass
class FileResult:
"""Results for a single file validation"""
file_path: Path
errors: int = 0
warnings: int = 0
info: int = 0
@property
def passed(self) -> bool:
return self.errors == 0
@property
def status(self) -> str:
if self.errors > 0:
return "FAILED"
if self.warnings > 0:
return "PASSED*"
return "PASSED"
@property
def status_icon(self) -> str:
if self.errors > 0:
return ""
if self.warnings > 0:
return "⚠️"
return ""
@dataclass
class ValidationResult:
"""Results of validation"""
violations: list[Violation] = field(default_factory=list)
files_checked: int = 0
rules_applied: int = 0
file_results: list[FileResult] = field(default_factory=list)
def has_errors(self) -> bool:
"""Check if there are any error-level violations"""
return any(v.severity == Severity.ERROR for v in self.violations)
def has_warnings(self) -> bool:
"""Check if there are any warning-level violations"""
return any(v.severity == Severity.WARNING for v in self.violations)
def error_count(self) -> int:
return sum(1 for v in self.violations if v.severity == Severity.ERROR)
def warning_count(self) -> int:
return sum(1 for v in self.violations if v.severity == Severity.WARNING)
def info_count(self) -> int:
return sum(1 for v in self.violations if v.severity == Severity.INFO)
class BaseValidator(ABC):
"""Abstract base validator class"""
"""Base class for architecture, security, and performance validators."""
# Subclasses should override these
VALIDATOR_NAME = "Base Validator"
VALIDATOR_EMOJI = "🔍"
RULES_DIR_NAME = ".rules"
CONFIG_FILE_NAME = ".rules.yaml"
def __init__(self, rules_dir: str, project_root: Path | None = None):
self.rules_dir = rules_dir
self.project_root = project_root or Path.cwd()
self.rules: list[dict[str, Any]] = []
self.errors: list[dict[str, Any]] = []
self.warnings: list[dict[str, Any]] = []
def __init__(self, config_path: Path = None, verbose: bool = False):
"""Initialize validator with configuration"""
self.project_root = Path.cwd()
self.config_path = config_path or self.project_root / self.CONFIG_FILE_NAME
self.verbose = verbose
self.config = self._load_config()
self.result = ValidationResult()
def _load_config(self) -> dict[str, Any]:
"""
Load validation rules from YAML config.
Supports two modes:
1. Split directory mode: rules directory with multiple YAML files
2. Single file mode: single YAML file (legacy)
The split directory mode takes precedence if it exists.
"""
# Check for split directory mode first
rules_dir = self.project_root / self.RULES_DIR_NAME
if rules_dir.is_dir():
return self._load_config_from_directory(rules_dir)
# Fall back to single file mode
if not self.config_path.exists():
print(f"❌ Configuration file not found: {self.config_path}")
print(f" (Also checked for directory: {rules_dir})")
sys.exit(1)
with open(self.config_path) as f:
config = yaml.safe_load(f)
print(f"📋 Loaded {self.VALIDATOR_NAME}: {config.get('project', 'unknown')}")
return config
def _load_config_from_directory(self, rules_dir: Path) -> dict[str, Any]:
"""
Load and merge configuration from split YAML files in a directory.
Reads _main.yaml first for base config, then merges all other YAML files.
"""
config: dict[str, Any] = {}
# Load _main.yaml first (contains project info, principles, ignore patterns)
main_file = rules_dir / "_main.yaml"
if main_file.exists():
with open(main_file) as f:
config = yaml.safe_load(f) or {}
# Load all other YAML files and merge their contents
yaml_files = sorted(rules_dir.glob("*.yaml"))
for yaml_file in yaml_files:
if yaml_file.name == "_main.yaml":
continue # Already loaded
with open(yaml_file) as f:
file_config = yaml.safe_load(f) or {}
# Merge rule sections from this file into main config
for key, value in file_config.items():
if key.endswith("_rules") and isinstance(value, list):
# Merge rule lists
if key not in config:
config[key] = []
config[key].extend(value)
elif key not in config:
# Add new top-level keys
config[key] = value
print(f"📋 Loaded {self.VALIDATOR_NAME}: {config.get('project', 'unknown')}")
print(f" (from {len(yaml_files)} files in {rules_dir.name}/)")
return config
def _should_ignore_file(self, file_path: Path) -> bool:
"""Check if a file should be ignored based on config patterns"""
import fnmatch
ignore_config = self.config.get("ignore", {})
ignore_files = ignore_config.get("files", [])
# Get relative path for matching
try:
rel_path = file_path.relative_to(self.project_root)
except ValueError:
rel_path = file_path
rel_path_str = str(rel_path)
for pattern in ignore_files:
# Handle glob patterns using fnmatch
if "*" in pattern:
# fnmatch handles *, **, and ? patterns correctly
if fnmatch.fnmatch(rel_path_str, pattern):
return True
# Also check each path component for patterns like **/.venv/**
# This handles cases where the pattern expects any prefix
if pattern.startswith("**/"):
# Try matching without the **/ prefix (e.g., .venv/** matches .venv/foo)
suffix_pattern = pattern[3:] # Remove "**/""
if fnmatch.fnmatch(rel_path_str, suffix_pattern):
return True
elif pattern in rel_path_str:
return True
return False
def _add_violation(
self,
rule_id: str,
rule_name: str,
severity: Severity,
file_path: Path,
line_number: int,
message: str,
context: str = "",
suggestion: str = "",
):
"""Add a violation to the results"""
# Check for inline noqa comment
if f"noqa: {rule_id.lower()}" in context.lower():
def load_rules(self) -> None:
"""Load rules from YAML files."""
rules_path = self.project_root / self.rules_dir
if not rules_path.exists():
print(f"Rules directory not found: {rules_path}")
return
self.result.violations.append(
Violation(
rule_id=rule_id,
rule_name=rule_name,
severity=severity,
file_path=file_path,
line_number=line_number,
message=message,
context=context,
suggestion=suggestion,
)
)
for rule_file in rules_path.glob("*.yaml"):
if rule_file.name.startswith("_"):
continue # Skip main config
def _get_rule(self, rule_id: str) -> dict | None:
"""Look up a rule by ID across all rule categories"""
for key, value in self.config.items():
if key.endswith("_rules") and isinstance(value, list):
for rule in value:
if rule.get("id") == rule_id:
return rule
return None
def _check_pattern_in_file(
self,
file_path: Path,
content: str,
lines: list[str],
pattern: str,
rule_id: str,
rule_name: str,
severity: Severity,
message: str,
suggestion: str = "",
exclude_patterns: list[str] = None,
):
"""Check for a regex pattern in a file and report violations"""
exclude_patterns = exclude_patterns or []
for i, line in enumerate(lines, 1):
if re.search(pattern, line, re.IGNORECASE):
# Check exclusions
should_exclude = False
for exclude in exclude_patterns:
if exclude in line:
should_exclude = True
break
if not should_exclude:
self._add_violation(
rule_id=rule_id,
rule_name=rule_name,
severity=severity,
file_path=file_path,
line_number=i,
message=message,
context=line.strip()[:100],
suggestion=suggestion,
)
with open(rule_file) as f:
data = yaml.safe_load(f)
if data and "rules" in data:
self.rules.extend(data["rules"])
@abstractmethod
def validate_all(self, target_path: Path = None) -> ValidationResult:
"""Validate all files in a directory - must be implemented by subclasses"""
pass
def validate(self) -> bool:
"""Run validation. Returns True if passed."""
def validate_file(self, file_path: Path, quiet: bool = False) -> ValidationResult:
"""Validate a single file"""
if not file_path.exists():
if not quiet:
print(f"❌ File not found: {file_path}")
return self.result
if not file_path.is_file():
if not quiet:
print(f"❌ Not a file: {file_path}")
return self.result
if not quiet:
print(f"\n{self.VALIDATOR_EMOJI} Validating single file: {file_path}\n")
# Resolve file path to absolute
file_path = file_path.resolve()
if self._should_ignore_file(file_path):
if not quiet:
print("⏭️ File is in ignore list, skipping")
return self.result
self.result.files_checked += 1
# Track violations before this file
violations_before = len(self.result.violations)
content = file_path.read_text()
lines = content.split("\n")
# Call subclass-specific validation
self._validate_file_content(file_path, content, lines)
# Calculate violations for this file
file_violations = self.result.violations[violations_before:]
errors = sum(1 for v in file_violations if v.severity == Severity.ERROR)
warnings = sum(1 for v in file_violations if v.severity == Severity.WARNING)
info = sum(1 for v in file_violations if v.severity == Severity.INFO)
# Track file result
self.result.file_results.append(
FileResult(file_path=file_path, errors=errors, warnings=warnings, info=info)
def add_error(
self, rule_id: str, message: str, file: str = "", line: int = 0
) -> None:
"""Add an error."""
self.errors.append(
{
"rule_id": rule_id,
"message": message,
"file": file,
"line": line,
"severity": "error",
}
)
return self.result
@abstractmethod
def _validate_file_content(self, file_path: Path, content: str, lines: list[str]):
"""Validate file content - must be implemented by subclasses"""
pass
def output_results(self, json_output: bool = False, errors_only: bool = False):
"""Output validation results"""
if json_output:
self._output_json()
else:
self._output_human(errors_only)
def _output_json(self):
"""Output results as JSON
Format matches code quality service expectations:
- file_path (not file)
- line_number (not line)
- total_violations count
"""
try:
rel_base = self.project_root
except Exception:
rel_base = Path.cwd()
def get_relative_path(file_path: Path) -> str:
"""Get relative path from project root"""
try:
return str(file_path.relative_to(rel_base))
except ValueError:
return str(file_path)
output = {
"validator": self.VALIDATOR_NAME,
"files_checked": self.result.files_checked,
"total_violations": len(self.result.violations),
"errors": self.result.error_count(),
"warnings": self.result.warning_count(),
"info": self.result.info_count(),
"violations": [
{
"rule_id": v.rule_id,
"rule_name": v.rule_name,
"severity": v.severity.value,
"file_path": get_relative_path(v.file_path),
"line_number": v.line_number,
"message": v.message,
"context": v.context,
"suggestion": v.suggestion,
}
for v in self.result.violations
],
}
print(json.dumps(output, indent=2))
def _output_human(self, errors_only: bool = False):
"""Output results in human-readable format"""
print("\n" + "=" * 80)
print(f"📊 {self.VALIDATOR_NAME.upper()} REPORT")
print("=" * 80)
errors = [v for v in self.result.violations if v.severity == Severity.ERROR]
warnings = [v for v in self.result.violations if v.severity == Severity.WARNING]
info = [v for v in self.result.violations if v.severity == Severity.INFO]
print(
f"\nFiles checked: {self.result.files_checked}"
)
print(
f"Findings: {len(errors)} errors, {len(warnings)} warnings, {len(info)} info"
def add_warning(
self, rule_id: str, message: str, file: str = "", line: int = 0
) -> None:
"""Add a warning."""
self.warnings.append(
{
"rule_id": rule_id,
"message": message,
"file": file,
"line": line,
"severity": "warning",
}
)
if errors:
print(f"\n\n❌ ERRORS ({len(errors)}):")
print("-" * 80)
for v in errors:
self._print_violation(v)
def add_info(
self, rule_id: str, message: str, file: str = "", line: int = 0
) -> None:
"""Add an informational note."""
self.warnings.append(
{
"rule_id": rule_id,
"message": message,
"file": file,
"line": line,
"severity": "info",
}
)
if warnings and not errors_only:
print(f"\n\n⚠️ WARNINGS ({len(warnings)}):")
print("-" * 80)
for v in warnings:
self._print_violation(v)
def print_results(self) -> None:
"""Print validation results."""
if not self.errors and not self.warnings:
print(f"✅ All {self.rules_dir} rules passed!")
return
if info and not errors_only:
print(f"\n INFO ({len(info)}):")
print("-" * 80)
for v in info:
self._print_violation(v)
if self.errors:
print(f"\n{len(self.errors)} errors found:")
for error in self.errors:
print(f" [{error['rule_id']}] {error['message']}")
if error["file"]:
print(f" File: {error['file']}:{error['line']}")
print("\n" + "=" * 80)
if errors:
print("❌ VALIDATION FAILED")
elif warnings:
print(f"⚠️ VALIDATION PASSED WITH {len(warnings)} WARNING(S)")
else:
print("✅ VALIDATION PASSED")
print("=" * 80)
if self.warnings:
print(f"\n⚠️ {len(self.warnings)} warnings:")
for warning in self.warnings:
print(f" [{warning['rule_id']}] {warning['message']}")
if warning["file"]:
print(f" File: {warning['file']}:{warning['line']}")
def _print_violation(self, v: Violation):
"""Print a single violation"""
try:
rel_path = v.file_path.relative_to(self.project_root)
except ValueError:
rel_path = v.file_path
print(f"\n [{v.rule_id}] {v.rule_name}")
print(f" File: {rel_path}:{v.line_number}")
print(f" Issue: {v.message}")
if v.context and self.verbose:
print(f" Context: {v.context}")
if v.suggestion:
print(f" 💡 Suggestion: {v.suggestion}")
def get_exit_code(self) -> int:
"""Get appropriate exit code based on results"""
if self.result.has_errors():
return 1
return 0
def run(self) -> int:
"""Run validation and return exit code."""
self.load_rules()
passed = self.validate()
self.print_results()
return 0 if passed else 1