orion/scripts/validate/validators/base.py

# scripts/validators/base.py
"""
Base classes and helpers for architecture validation.

This module contains:
- Severity: Enum for validation severity levels
- Violation: Dataclass for representing rule violations
- FileResult: Dataclass for single file validation results
- ValidationResult: Dataclass for overall validation results
- BaseValidator: Base class for domain-specific validators
"""

import json
import re
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Any


class Severity(Enum):
    """Validation severity levels"""

    ERROR = "error"
    WARNING = "warning"
    INFO = "info"


@dataclass
class Violation:
    """Represents an architectural rule violation"""

    rule_id: str
    rule_name: str
    severity: Severity
    file_path: Path
    line_number: int
    message: str
    context: str = ""
    suggestion: str = ""


@dataclass
class FileResult:
    """Results for a single file validation"""

    file_path: Path
    errors: int = 0
    warnings: int = 0

    @property
    def passed(self) -> bool:
        return self.errors == 0

    @property
    def status(self) -> str:
        if self.errors > 0:
            return "FAILED"
        if self.warnings > 0:
            return "PASSED*"
        return "PASSED"

    @property
    def status_icon(self) -> str:
        if self.errors > 0:
            return "❌"
        if self.warnings > 0:
            return "⚠️"
        return "✅"


@dataclass
class ValidationResult:
    """Results of architecture validation"""

    violations: list[Violation] = field(default_factory=list)
    files_checked: int = 0
    rules_applied: int = 0
    file_results: list[FileResult] = field(default_factory=list)

    def has_errors(self) -> bool:
        """Check if there are any error-level violations"""
        return any(v.severity == Severity.ERROR for v in self.violations)

    def has_warnings(self) -> bool:
        """Check if there are any warning-level violations"""
        return any(v.severity == Severity.WARNING for v in self.violations)


class BaseValidator:
    """
    Base class for domain-specific validators.

    Provides common functionality for all validators including:
    - Violation tracking
    - File filtering
    - Rule lookup
    - Common pattern matching utilities
    """

    def __init__(
        self,
        config: dict[str, Any],
        result: ValidationResult,
        project_root: Path,
        verbose: bool = False,
    ):
        """
        Initialize validator with shared state.

        Args:
            config: Loaded architecture rules configuration
            result: Shared ValidationResult for tracking violations
            project_root: Root path of the project
            verbose: Whether to show verbose output
        """
        self.config = config
        self.result = result
        self.project_root = project_root
        self.verbose = verbose

    def validate(self, target_path: Path) -> None:
        """
        Run validation on target path.

        Must be implemented by subclasses.

        Args:
            target_path: Path to validate (file or directory)
        """
        raise NotImplementedError("Subclasses must implement validate()")

    def _add_violation(
        self,
        rule_id: str,
        rule_name: str,
        severity: Severity,
        file_path: Path,
        line_number: int,
        message: str,
        context: str = "",
        suggestion: str = "",
    ) -> None:
        """Add a violation to results"""
        violation = Violation(
            rule_id=rule_id,
            rule_name=rule_name,
            severity=severity,
            file_path=file_path,
            line_number=line_number,
            message=message,
            context=context,
            suggestion=suggestion,
        )
        self.result.violations.append(violation)

    def _should_ignore_file(self, file_path: Path) -> bool:
        """Check if file should be ignored"""
        ignore_patterns = self.config.get("ignore", {}).get("files", [])

        # Convert to string for easier matching
        file_path_str = str(file_path)

        for pattern in ignore_patterns:
            # Check if any part of the path matches the pattern
            if file_path.match(pattern):
                return True
            # Also check if pattern appears in the path (for .venv, venv, etc.)
            if "/.venv/" in file_path_str or file_path_str.startswith(".venv/"):
                return True
            if "/venv/" in file_path_str or file_path_str.startswith("venv/"):
                return True

        return False

    def _get_rule(self, rule_id: str) -> dict[str, Any] | None:
        """Get rule configuration by ID"""
        # Look in different rule categories
        for category in [
            "api_endpoint_rules",
            "service_layer_rules",
            "model_rules",
            "exception_rules",
            "naming_rules",
            "auth_rules",
            "middleware_rules",
            "javascript_rules",
            "template_rules",
            "frontend_component_rules",
            "styling_rules",
            "language_rules",
            "multi_tenancy_rules",
            "code_quality_rules",
        ]:
            rules = self.config.get(category, [])
            for rule in rules:
                if rule.get("id") == rule_id:
                    return rule
        return None

    def _get_files(self, target_path: Path, pattern: str) -> list[Path]:
        """Get files matching a glob pattern, excluding ignored files"""
        files = list(target_path.glob(pattern))
        return [f for f in files if not self._should_ignore_file(f)]

    def _find_decorators(self, content: str) -> list[tuple[int, str, str]]:
        """
        Find all function decorators and their associated functions.

        Returns list of (line_number, decorator, function_name) tuples.
        """
        results = []
        lines = content.split("\n")

        i = 0
        while i < len(lines):
            line = lines[i].strip()
            if line.startswith("@"):
                decorator = line
                # Look for the function definition
                for j in range(i + 1, min(i + 10, len(lines))):
                    next_line = lines[j].strip()
                    if next_line.startswith("def ") or next_line.startswith(
                        "async def "
                    ):
                        # Extract function name
                        match = re.search(r"(?:async\s+)?def\s+(\w+)", next_line)
                        if match:
                            func_name = match.group(1)
                            results.append((i + 1, decorator, func_name))
                        break
                    if next_line.startswith("@"):
                        # Multiple decorators - continue to next
                        continue
                    if next_line and not next_line.startswith("#"):
                        # Non-decorator, non-comment line - stop looking
                        break
            i += 1

        return results

    def _check_pattern_in_lines(
        self,
        file_path: Path,
        lines: list[str],
        pattern: str,
        rule_id: str,
        rule_name: str,
        severity: Severity,
        message: str,
        suggestion: str = "",
        exclude_comments: bool = True,
        exclude_patterns: list[str] = None,
    ) -> None:
        """
        Check for pattern violations in file lines.

        Args:
            file_path: Path to the file
            lines: File content split by lines
            pattern: Regex pattern to search for
            rule_id: Rule identifier
            rule_name: Human-readable rule name
            severity: Violation severity
            message: Violation message
            suggestion: Suggested fix
            exclude_comments: Skip lines that are comments
            exclude_patterns: Additional patterns that mark lines to skip
        """
        exclude_patterns = exclude_patterns or []

        for i, line in enumerate(lines, 1):
            stripped = line.strip()

            # Skip comments if requested
            if exclude_comments and stripped.startswith("#"):
                continue

            # Check exclusion patterns
            skip = False
            for exc in exclude_patterns:
                if exc in line:
                    skip = True
                    break
            if skip:
                continue

            # Check for pattern
            if re.search(pattern, line):
                self._add_violation(
                    rule_id=rule_id,
                    rule_name=rule_name,
                    severity=severity,
                    file_path=file_path,
                    line_number=i,
                    message=message,
                    context=stripped[:80],
                    suggestion=suggestion,
                )

    def _is_valid_json(self, file_path: Path) -> tuple[bool, str]:
        """
        Check if a file contains valid JSON.

        Returns (is_valid, error_message) tuple.
        """
        try:
            with open(file_path, encoding="utf-8") as f:
                json.load(f)
            return True, ""
        except json.JSONDecodeError as e:
            return False, f"Line {e.lineno}: {e.msg}"
        except Exception as e:
            return False, str(e)