orion/scripts/tests/run_affected_tests.py

#!/usr/bin/env python3
"""
Module-Aware Test Impact Analysis

Determines which modules are affected by recent changes and runs only their
tests. Uses the explicit `requires=[]` dependency declarations in each
module's definition.py to build a reverse dependency graph.

Usage:
    python scripts/tests/run_affected_tests.py                        # diff against gitea/master
    python scripts/tests/run_affected_tests.py --base origin/master   # diff against specific ref
    python scripts/tests/run_affected_tests.py --dry-run              # show what would run
    python scripts/tests/run_affected_tests.py --all                  # run all tests
"""

import argparse
import re
import subprocess
import sys
from collections import deque
from pathlib import Path

PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
MODULES_PATH = PROJECT_ROOT / "app" / "modules"

# Changes to these paths trigger a full test suite run
SHARED_PATHS = (
    "app/core/",
    "app/api/",
    "app/utils/",
    "app/exceptions/",
    "app/handlers/",
    "app/tasks/",
    "tests/conftest.py",
    "tests/fixtures/",
    "conftest.py",
    "static/shared/",
    "pyproject.toml",
    "requirements",  # matches requirements*.txt
    "middleware/",
    "models/",
)

MODULE_PATH_RE = re.compile(r"^app/modules/([^/]+)/")
REQUIRES_RE = re.compile(r"requires\s*=\s*\[([^\]]*)\]")


def get_changed_files(base_ref: str) -> list[str]:
    """Get list of files changed between base_ref and HEAD."""
    try:
        result = subprocess.run(
            ["git", "diff", "--name-only", f"{base_ref}..HEAD"],
            capture_output=True,
            text=True,
            cwd=PROJECT_ROOT,
            check=True,
        )
        return [f for f in result.stdout.strip().splitlines() if f]
    except subprocess.CalledProcessError:
        # If the ref doesn't exist, try without ..HEAD (uncommitted changes)
        try:
            result = subprocess.run(
                ["git", "diff", "--name-only", base_ref],
                capture_output=True,
                text=True,
                cwd=PROJECT_ROOT,
                check=True,
            )
            return [f for f in result.stdout.strip().splitlines() if f]
        except subprocess.CalledProcessError as e:
            print(f"Error: Could not get diff against '{base_ref}': {e.stderr.strip()}", file=sys.stderr)
            sys.exit(1)


def map_files_to_modules(changed_files: list[str]) -> tuple[set[str], bool]:
    """Map changed files to module names. Returns (modules, shared_changed)."""
    modules = set()
    shared_changed = False

    for filepath in changed_files:
        # Check if it's a shared path
        if any(filepath.startswith(sp) for sp in SHARED_PATHS):
            shared_changed = True
            continue

        # Check if it's a module path
        match = MODULE_PATH_RE.match(filepath)
        if match:
            modules.add(match.group(1))

    return modules, shared_changed


def parse_module_dependencies() -> dict[str, list[str]]:
    """Parse requires=[] from each module's definition.py."""
    deps: dict[str, list[str]] = {}

    for module_dir in MODULES_PATH.iterdir():
        if not module_dir.is_dir() or module_dir.name.startswith((".", "__")):
            continue

        module_name = module_dir.name
        definition = module_dir / "definition.py"

        if not definition.exists():
            deps[module_name] = []
            continue

        content = definition.read_text()
        match = REQUIRES_RE.search(content)
        if match:
            raw = match.group(1)
            requires = [s.strip().strip("\"'") for s in raw.split(",") if s.strip().strip("\"'")]
            deps[module_name] = requires
        else:
            deps[module_name] = []

    return deps


def build_reverse_deps(deps: dict[str, list[str]]) -> dict[str, set[str]]:
    """Build reverse dependency graph: if A requires B, then B -> A."""
    reverse: dict[str, set[str]] = {mod: set() for mod in deps}
    for module, requires in deps.items():
        for req in requires:
            if req in reverse:
                reverse[req].add(module)
    return reverse


def expand_affected(changed_modules: set[str], reverse_deps: dict[str, set[str]]) -> set[str]:
    """BFS to find all transitively affected modules."""
    affected = set(changed_modules)
    queue = deque(changed_modules)

    while queue:
        module = queue.popleft()
        for dependent in reverse_deps.get(module, set()):
            if dependent not in affected:
                affected.add(dependent)
                queue.append(dependent)

    return affected


def get_all_modules() -> set[str]:
    """Get all module names from the filesystem."""
    return {
        d.name
        for d in MODULES_PATH.iterdir()
        if d.is_dir() and not d.name.startswith((".", "__"))
    }


def get_test_dirs(modules: set[str]) -> list[Path]:
    """Get test directories for the given modules."""
    test_dirs = []
    for mod in sorted(modules):
        test_dir = MODULES_PATH / mod / "tests"
        if test_dir.exists():
            test_dirs.append(test_dir)
    return test_dirs


def run_pytest(test_dirs: list[Path], extra_args: list[str] | None = None) -> int:
    """Run pytest on the given test directories."""
    cmd = [
        sys.executable, "-m", "pytest",
        "--override-ini=addopts=",
        "-v", "--tb=short", "--strict-markers", "-q",
    ]

    if extra_args:
        cmd.extend(extra_args)

    cmd.extend(str(d) for d in test_dirs)

    result = subprocess.run(cmd, cwd=PROJECT_ROOT)
    return result.returncode


def main():
    parser = argparse.ArgumentParser(
        description="Run tests only for modules affected by recent changes"
    )
    parser.add_argument(
        "--base",
        default="gitea/master",
        help="Base ref to diff against (default: gitea/master)",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Show what would run without executing tests",
    )
    parser.add_argument(
        "--all",
        action="store_true",
        help="Run all tests regardless of changes",
    )
    parser.add_argument(
        "pytest_args",
        nargs="*",
        help="Extra arguments to pass to pytest",
    )

    args = parser.parse_args()

    all_modules = get_all_modules()
    deps = parse_module_dependencies()
    reverse_deps = build_reverse_deps(deps)

    # --all: run everything
    if args.all:
        test_dirs = get_test_dirs(all_modules)
        print(f"Running ALL tests ({len(all_modules)} modules, {len(test_dirs)} test dirs)")
        if args.dry_run:
            for d in test_dirs:
                print(f"  {d.relative_to(PROJECT_ROOT)}")
            return 0
        return run_pytest(test_dirs, args.pytest_args or None)

    # Analyze changes
    print(f"Analyzing changes ({args.base}..HEAD)...")
    changed_files = get_changed_files(args.base)

    if not changed_files:
        print("  No changes detected. Nothing to test.")
        return 0

    print(f"  Changed files: {len(changed_files)}")

    changed_modules, shared_changed = map_files_to_modules(changed_files)

    # Shared code changed → run all tests
    if shared_changed:
        print("  Shared code changed → running ALL tests")
        test_dirs = get_test_dirs(all_modules)
        if args.dry_run:
            for d in test_dirs:
                print(f"  {d.relative_to(PROJECT_ROOT)}")
            return 0
        return run_pytest(test_dirs, args.pytest_args or None)

    if not changed_modules:
        print("  No module changes detected (only non-module files changed).")
        print("  Consider running full suite if changes affect test infrastructure.")
        return 0

    print(f"  Modules changed: {', '.join(sorted(changed_modules))}")

    affected = expand_affected(changed_modules, reverse_deps)
    skipped = all_modules - affected

    print(f"  Affected (with deps): {', '.join(sorted(affected))}")
    if skipped:
        print(f"  Skipped: {len(skipped)} modules")

    test_dirs = get_test_dirs(affected)

    if not test_dirs:
        print("  No test directories found for affected modules.")
        return 0

    if args.dry_run:
        print("\nWould run tests for:")
        for d in test_dirs:
            print(f"  {d.relative_to(PROJECT_ROOT)}")
        return 0

    print(f"\nRunning tests for: {', '.join(sorted(affected))}")
    return run_pytest(test_dirs, args.pytest_args or None)


if __name__ == "__main__":
    sys.exit(main())