#!/usr/bin/env python3 """ Base Validator ============== Shared base class for all validation scripts (architecture, security, performance). Provides common functionality for: - Loading YAML configuration - File pattern matching - Violation tracking - Output formatting (human-readable and JSON) """ import json import re import sys from abc import ABC, abstractmethod from dataclasses import dataclass, field from enum import Enum from pathlib import Path from typing import Any import yaml class Severity(Enum): """Validation severity levels""" ERROR = "error" WARNING = "warning" INFO = "info" @dataclass class Violation: """Represents a rule violation""" rule_id: str rule_name: str severity: Severity file_path: Path line_number: int message: str context: str = "" suggestion: str = "" @dataclass class FileResult: """Results for a single file validation""" file_path: Path errors: int = 0 warnings: int = 0 info: int = 0 @property def passed(self) -> bool: return self.errors == 0 @property def status(self) -> str: if self.errors > 0: return "FAILED" if self.warnings > 0: return "PASSED*" return "PASSED" @property def status_icon(self) -> str: if self.errors > 0: return "āŒ" if self.warnings > 0: return "āš ļø" return "āœ…" @dataclass class ValidationResult: """Results of validation""" violations: list[Violation] = field(default_factory=list) files_checked: int = 0 rules_applied: int = 0 file_results: list[FileResult] = field(default_factory=list) def has_errors(self) -> bool: """Check if there are any error-level violations""" return any(v.severity == Severity.ERROR for v in self.violations) def has_warnings(self) -> bool: """Check if there are any warning-level violations""" return any(v.severity == Severity.WARNING for v in self.violations) def error_count(self) -> int: return sum(1 for v in self.violations if v.severity == Severity.ERROR) def warning_count(self) -> int: return sum(1 for v in self.violations if v.severity == Severity.WARNING) def info_count(self) -> int: return sum(1 for v in self.violations if v.severity == Severity.INFO) class BaseValidator(ABC): """Abstract base validator class""" # Subclasses should override these VALIDATOR_NAME = "Base Validator" VALIDATOR_EMOJI = "šŸ”" RULES_DIR_NAME = ".rules" CONFIG_FILE_NAME = ".rules.yaml" def __init__(self, config_path: Path = None, verbose: bool = False): """Initialize validator with configuration""" self.project_root = Path.cwd() self.config_path = config_path or self.project_root / self.CONFIG_FILE_NAME self.verbose = verbose self.config = self._load_config() self.result = ValidationResult() def _load_config(self) -> dict[str, Any]: """ Load validation rules from YAML config. Supports two modes: 1. Split directory mode: rules directory with multiple YAML files 2. Single file mode: single YAML file (legacy) The split directory mode takes precedence if it exists. """ # Check for split directory mode first rules_dir = self.project_root / self.RULES_DIR_NAME if rules_dir.is_dir(): return self._load_config_from_directory(rules_dir) # Fall back to single file mode if not self.config_path.exists(): print(f"āŒ Configuration file not found: {self.config_path}") print(f" (Also checked for directory: {rules_dir})") sys.exit(1) with open(self.config_path) as f: config = yaml.safe_load(f) print(f"šŸ“‹ Loaded {self.VALIDATOR_NAME}: {config.get('project', 'unknown')}") return config def _load_config_from_directory(self, rules_dir: Path) -> dict[str, Any]: """ Load and merge configuration from split YAML files in a directory. Reads _main.yaml first for base config, then merges all other YAML files. """ config: dict[str, Any] = {} # Load _main.yaml first (contains project info, principles, ignore patterns) main_file = rules_dir / "_main.yaml" if main_file.exists(): with open(main_file) as f: config = yaml.safe_load(f) or {} # Load all other YAML files and merge their contents yaml_files = sorted(rules_dir.glob("*.yaml")) for yaml_file in yaml_files: if yaml_file.name == "_main.yaml": continue # Already loaded with open(yaml_file) as f: file_config = yaml.safe_load(f) or {} # Merge rule sections from this file into main config for key, value in file_config.items(): if key.endswith("_rules") and isinstance(value, list): # Merge rule lists if key not in config: config[key] = [] config[key].extend(value) elif key not in config: # Add new top-level keys config[key] = value print(f"šŸ“‹ Loaded {self.VALIDATOR_NAME}: {config.get('project', 'unknown')}") print(f" (from {len(yaml_files)} files in {rules_dir.name}/)") return config def _should_ignore_file(self, file_path: Path) -> bool: """Check if a file should be ignored based on config patterns""" import fnmatch ignore_config = self.config.get("ignore", {}) ignore_files = ignore_config.get("files", []) # Get relative path for matching try: rel_path = file_path.relative_to(self.project_root) except ValueError: rel_path = file_path rel_path_str = str(rel_path) for pattern in ignore_files: # Handle glob patterns using fnmatch if "*" in pattern: # fnmatch handles *, **, and ? patterns correctly if fnmatch.fnmatch(rel_path_str, pattern): return True # Also check each path component for patterns like **/.venv/** # This handles cases where the pattern expects any prefix if pattern.startswith("**/"): # Try matching without the **/ prefix (e.g., .venv/** matches .venv/foo) suffix_pattern = pattern[3:] # Remove "**/"" if fnmatch.fnmatch(rel_path_str, suffix_pattern): return True elif pattern in rel_path_str: return True return False def _add_violation( self, rule_id: str, rule_name: str, severity: Severity, file_path: Path, line_number: int, message: str, context: str = "", suggestion: str = "", ): """Add a violation to the results""" # Check for inline noqa comment if f"noqa: {rule_id.lower()}" in context.lower(): return self.result.violations.append( Violation( rule_id=rule_id, rule_name=rule_name, severity=severity, file_path=file_path, line_number=line_number, message=message, context=context, suggestion=suggestion, ) ) def _get_rule(self, rule_id: str) -> dict | None: """Look up a rule by ID across all rule categories""" for key, value in self.config.items(): if key.endswith("_rules") and isinstance(value, list): for rule in value: if rule.get("id") == rule_id: return rule return None def _check_pattern_in_file( self, file_path: Path, content: str, lines: list[str], pattern: str, rule_id: str, rule_name: str, severity: Severity, message: str, suggestion: str = "", exclude_patterns: list[str] = None, ): """Check for a regex pattern in a file and report violations""" exclude_patterns = exclude_patterns or [] for i, line in enumerate(lines, 1): if re.search(pattern, line, re.IGNORECASE): # Check exclusions should_exclude = False for exclude in exclude_patterns: if exclude in line: should_exclude = True break if not should_exclude: self._add_violation( rule_id=rule_id, rule_name=rule_name, severity=severity, file_path=file_path, line_number=i, message=message, context=line.strip()[:100], suggestion=suggestion, ) @abstractmethod def validate_all(self, target_path: Path = None) -> ValidationResult: """Validate all files in a directory - must be implemented by subclasses""" pass def validate_file(self, file_path: Path, quiet: bool = False) -> ValidationResult: """Validate a single file""" if not file_path.exists(): if not quiet: print(f"āŒ File not found: {file_path}") return self.result if not file_path.is_file(): if not quiet: print(f"āŒ Not a file: {file_path}") return self.result if not quiet: print(f"\n{self.VALIDATOR_EMOJI} Validating single file: {file_path}\n") # Resolve file path to absolute file_path = file_path.resolve() if self._should_ignore_file(file_path): if not quiet: print("ā­ļø File is in ignore list, skipping") return self.result self.result.files_checked += 1 # Track violations before this file violations_before = len(self.result.violations) content = file_path.read_text() lines = content.split("\n") # Call subclass-specific validation self._validate_file_content(file_path, content, lines) # Calculate violations for this file file_violations = self.result.violations[violations_before:] errors = sum(1 for v in file_violations if v.severity == Severity.ERROR) warnings = sum(1 for v in file_violations if v.severity == Severity.WARNING) info = sum(1 for v in file_violations if v.severity == Severity.INFO) # Track file result self.result.file_results.append( FileResult(file_path=file_path, errors=errors, warnings=warnings, info=info) ) return self.result @abstractmethod def _validate_file_content(self, file_path: Path, content: str, lines: list[str]): """Validate file content - must be implemented by subclasses""" pass def output_results(self, json_output: bool = False, errors_only: bool = False): """Output validation results""" if json_output: self._output_json() else: self._output_human(errors_only) def _output_json(self): """Output results as JSON Format matches code quality service expectations: - file_path (not file) - line_number (not line) - total_violations count """ try: rel_base = self.project_root except Exception: rel_base = Path.cwd() def get_relative_path(file_path: Path) -> str: """Get relative path from project root""" try: return str(file_path.relative_to(rel_base)) except ValueError: return str(file_path) output = { "validator": self.VALIDATOR_NAME, "files_checked": self.result.files_checked, "total_violations": len(self.result.violations), "errors": self.result.error_count(), "warnings": self.result.warning_count(), "info": self.result.info_count(), "violations": [ { "rule_id": v.rule_id, "rule_name": v.rule_name, "severity": v.severity.value, "file_path": get_relative_path(v.file_path), "line_number": v.line_number, "message": v.message, "context": v.context, "suggestion": v.suggestion, } for v in self.result.violations ], } print(json.dumps(output, indent=2)) def _output_human(self, errors_only: bool = False): """Output results in human-readable format""" print("\n" + "=" * 80) print(f"šŸ“Š {self.VALIDATOR_NAME.upper()} REPORT") print("=" * 80) errors = [v for v in self.result.violations if v.severity == Severity.ERROR] warnings = [v for v in self.result.violations if v.severity == Severity.WARNING] info = [v for v in self.result.violations if v.severity == Severity.INFO] print( f"\nFiles checked: {self.result.files_checked}" ) print( f"Findings: {len(errors)} errors, {len(warnings)} warnings, {len(info)} info" ) if errors: print(f"\n\nāŒ ERRORS ({len(errors)}):") print("-" * 80) for v in errors: self._print_violation(v) if warnings and not errors_only: print(f"\n\nāš ļø WARNINGS ({len(warnings)}):") print("-" * 80) for v in warnings: self._print_violation(v) if info and not errors_only: print(f"\nā„¹ļø INFO ({len(info)}):") print("-" * 80) for v in info: self._print_violation(v) print("\n" + "=" * 80) if errors: print("āŒ VALIDATION FAILED") elif warnings: print(f"āš ļø VALIDATION PASSED WITH {len(warnings)} WARNING(S)") else: print("āœ… VALIDATION PASSED") print("=" * 80) def _print_violation(self, v: Violation): """Print a single violation""" try: rel_path = v.file_path.relative_to(self.project_root) except ValueError: rel_path = v.file_path print(f"\n [{v.rule_id}] {v.rule_name}") print(f" File: {rel_path}:{v.line_number}") print(f" Issue: {v.message}") if v.context and self.verbose: print(f" Context: {v.context}") if v.suggestion: print(f" šŸ’” Suggestion: {v.suggestion}") def get_exit_code(self) -> int: """Get appropriate exit code based on results""" if self.result.has_errors(): return 1 return 0