diff --git a/.audit-rules/_main.yaml b/.audit-rules/_main.yaml new file mode 100644 index 00000000..eda3e482 --- /dev/null +++ b/.audit-rules/_main.yaml @@ -0,0 +1,59 @@ +# IT Internal Audit Rules +# ======================== +# These rules ensure compliance with internal governance policies, +# regulatory requirements, and audit best practices. +# +# Purpose: Independent verification of controls for: +# - SOX compliance (financial systems) +# - GDPR/CCPA compliance (data privacy) +# - Internal governance policies +# - Risk management frameworks + +version: "1.0" +name: "IT Internal Audit Rules" + +# Rule categories and their weights for scoring +categories: + audit_trail: + weight: 25 + description: "Logging, traceability, and audit log integrity" + access_control: + weight: 20 + description: "Authentication, authorization, and least privilege" + data_governance: + weight: 20 + description: "PII handling, data classification, and retention" + compliance: + weight: 15 + description: "Regulatory and policy compliance" + change_management: + weight: 10 + description: "Version control, approvals, and rollback" + third_party: + weight: 5 + description: "Dependency management and license compliance" + documentation: + weight: 5 + description: "Required documentation and traceability" + +# Minimum passing score (percentage) +minimum_score: 80 + +# Files/directories to exclude from audit +excluded_paths: + - ".venv/" + - "__pycache__/" + - ".git/" + - "node_modules/" + - ".pytest_cache/" + - "htmlcov/" + - "site/" + - ".mypy_cache/" + +# Severity levels +severity_levels: + critical: "Must be fixed immediately - audit finding" + high: "Must be fixed before next audit cycle" + medium: "Should be addressed in remediation plan" + low: "Recommendation for improvement" + info: "Informational observation" diff --git a/.audit-rules/access_control.yaml b/.audit-rules/access_control.yaml new file mode 100644 index 00000000..73193189 --- /dev/null +++ b/.audit-rules/access_control.yaml @@ -0,0 +1,171 @@ +# Access Control Rules +# ==================== +# Ensures proper authentication, authorization, and least privilege. +# Critical for preventing unauthorized access. + +rules: + # =================== + # AUTHENTICATION + # =================== + + - id: ACCESS-AUTH-001 + name: "Protected endpoints must require authentication" + description: "API endpoints handling sensitive data must enforce authentication" + severity: critical + check: + type: pattern_required + paths: + - "app/api/v1/*.py" + patterns: + - "CurrentUser|Depends.*get_current_user|AdminUser" + exclude_patterns: + - "health.py" + - "auth.py" # Auth endpoints handle their own logic + message: "API endpoints must require authentication" + + - id: ACCESS-AUTH-002 + name: "Admin endpoints must verify admin role" + description: "Administrative functions must check for admin privileges" + severity: critical + check: + type: pattern_required + paths: + - "app/routes/admin.py" + patterns: + - "is_admin|AdminUser|require_admin|admin_required" + message: "Admin routes must verify admin privileges" + + - id: ACCESS-AUTH-003 + name: "Session management must be secure" + description: "Sessions must have proper timeout and security settings" + severity: high + check: + type: pattern_required + paths: + - "app/core/config.py" + - "app/core/security.py" + patterns: + - "ACCESS_TOKEN_EXPIRE|SESSION_TIMEOUT|token.*expire" + message: "Session/token expiration must be configured" + + # =================== + # AUTHORIZATION (RBAC) + # =================== + + - id: ACCESS-RBAC-001 + name: "Role-based access control implementation" + description: "System must implement role-based access control" + severity: high + check: + type: pattern_required + paths: + - "models/database/user.py" + patterns: + - "is_admin|role|permission" + message: "User model must support role-based access" + + - id: ACCESS-RBAC-002 + name: "Authorization checks before data access" + description: "Data access must verify user permissions" + severity: high + check: + type: pattern_recommended + paths: + - "app/api/v1/*.py" + patterns: + - "user\\.id|current_user|owner|created_by" + message: "Consider ownership checks for data access" + + - id: ACCESS-RBAC-003 + name: "Separation of duties" + description: "Critical operations should require different roles" + severity: medium + check: + type: documentation + message: "Document separation of duties in critical workflows" + + # =================== + # LEAST PRIVILEGE + # =================== + + - id: ACCESS-PRIV-001 + name: "Database connections use least privilege" + description: "Database user should have minimal required permissions" + severity: high + check: + type: documentation + paths: + - "docs/**/*.md" + - "README.md" + patterns: + - "database.*permission|db.*role|least.*privilege" + message: "Document database user permissions" + + - id: ACCESS-PRIV-002 + name: "API endpoints return minimal data" + description: "Responses should not include unnecessary sensitive fields" + severity: medium + check: + type: pattern_forbidden + paths: + - "models/schema/*.py" + patterns: + - "password_hash|password.*Field" + exclude_patterns: + - "password.*exclude.*True" + message: "Password hashes must not be included in API responses" + + - id: ACCESS-PRIV-003 + name: "Environment-specific access" + description: "Debug/admin features disabled in production" + severity: high + check: + type: pattern_required + paths: + - "main.py" + - "app/core/environment.py" + patterns: + - "is_production|ENVIRONMENT|DEBUG" + message: "Environment-based feature flags required" + + # =================== + # ACCOUNT SECURITY + # =================== + + - id: ACCESS-ACCT-001 + name: "Account lockout mechanism" + description: "Failed login attempts should trigger account lockout" + severity: high + check: + type: pattern_recommended + paths: + - "app/api/v1/auth.py" + - "app/services/user_service.py" + patterns: + - "failed.*attempt|lockout|rate.*limit|throttle" + message: "Consider implementing account lockout after failed attempts" + + - id: ACCESS-ACCT-002 + name: "Password complexity requirements" + description: "Passwords must meet minimum complexity requirements" + severity: high + check: + type: pattern_required + paths: + - "models/schema/auth.py" + - "models/schema/user.py" + patterns: + - "min_length|MinLen|Field.*ge.*8" + message: "Password minimum length must be enforced" + + - id: ACCESS-ACCT-003 + name: "Secure password storage" + description: "Passwords must be hashed with strong algorithm" + severity: critical + check: + type: pattern_required + paths: + - "app/core/security.py" + patterns: + - "bcrypt|argon2|scrypt|pbkdf2" + message: "Passwords must use approved hashing algorithms" diff --git a/.audit-rules/audit_trail.yaml b/.audit-rules/audit_trail.yaml new file mode 100644 index 00000000..18797439 --- /dev/null +++ b/.audit-rules/audit_trail.yaml @@ -0,0 +1,170 @@ +# Audit Trail Rules +# ================== +# Ensures all sensitive operations are logged and traceable. +# Critical for SOX compliance and incident investigation. + +rules: + # =================== + # LOGGING REQUIREMENTS + # =================== + + - id: AUDIT-LOG-001 + name: "Authentication events must be logged" + description: "All login, logout, and failed authentication attempts must be logged" + severity: critical + check: + type: pattern_required + paths: + - "app/api/v1/auth.py" + - "app/routes/admin.py" + patterns: + - "logger\\.(info|warning|error).*login" + - "logger\\.(info|warning|error).*auth" + message: "Authentication endpoints must log all attempts" + + - id: AUDIT-LOG-002 + name: "Data modification must be logged" + description: "Create, update, delete operations must include audit logging" + severity: critical + check: + type: pattern_required + paths: + - "app/services/*.py" + patterns: + - "logger\\." + exclude_patterns: + - "__init__.py" + message: "Service layer must include logging for data modifications" + + - id: AUDIT-LOG-003 + name: "Admin actions must be logged" + description: "All administrative actions require audit logging" + severity: critical + check: + type: pattern_required + paths: + - "app/routes/admin.py" + patterns: + - "logger\\." + message: "Admin routes must log all actions" + + - id: AUDIT-LOG-004 + name: "Log must include user identity" + description: "Audit logs must include the user ID performing the action" + severity: high + check: + type: pattern_recommended + paths: + - "app/**/*.py" + patterns: + - "user_id|current_user|admin_user" + context: "logging" + message: "Logs should include user identity for traceability" + + # =================== + # AUDIT FIELDS + # =================== + + - id: AUDIT-FIELD-001 + name: "Models must have audit timestamps" + description: "Database models must include created_at and updated_at fields" + severity: high + check: + type: pattern_required + paths: + - "models/database/*.py" + patterns: + - "created_at" + - "updated_at" + exclude_patterns: + - "__init__.py" + - "base.py" + - "audit_log.py" # Uses timestamp field instead + message: "Database models must include audit timestamp fields" + + - id: AUDIT-FIELD-002 + name: "Models should track who made changes" + description: "Models should include created_by and updated_by fields" + severity: medium + check: + type: pattern_recommended + paths: + - "models/database/*.py" + patterns: + - "created_by|updated_by|modified_by" + message: "Consider adding created_by/updated_by fields for accountability" + + # =================== + # LOG INTEGRITY + # =================== + + - id: AUDIT-INT-001 + name: "Logs must not be modifiable by application" + description: "Application should not have delete/modify access to audit logs" + severity: high + check: + type: pattern_forbidden + paths: + - "app/**/*.py" + patterns: + - "os\\.remove.*\\.log" + - "truncate.*log" + - "open.*\\.log.*[\"']w[\"']" + message: "Application must not modify or delete log files" + + - id: AUDIT-INT-002 + name: "Structured logging required" + description: "Use structured logging for machine-parseable audit trails" + severity: medium + check: + type: pattern_recommended + paths: + - "app/core/logging.py" + patterns: + - "structlog|json|JSONFormatter" + message: "Consider structured logging for better audit trail analysis" + + # =================== + # SENSITIVE OPERATIONS + # =================== + + - id: AUDIT-SENS-001 + name: "Password changes must be logged" + description: "All password change operations require audit logging" + severity: critical + check: + type: pattern_required + paths: + - "app/services/user_service.py" + - "app/api/v1/users.py" + patterns: + - "password" + context: "must have corresponding logger call" + message: "Password operations must be logged" + + - id: AUDIT-SENS-002 + name: "Permission changes must be logged" + description: "Role and permission changes require audit logging" + severity: critical + check: + type: pattern_required + paths: + - "app/services/*.py" + patterns: + - "is_admin|role|permission" + context: "must have corresponding logger call" + message: "Permission changes must be logged" + + - id: AUDIT-SENS-003 + name: "Data exports must be logged" + description: "Any data export functionality must be logged" + severity: high + check: + type: pattern_check + paths: + - "app/**/*.py" + patterns: + - "export|download|csv|xlsx" + requires: + - "logger\\." + message: "Data export operations must be logged" diff --git a/.audit-rules/change_management.yaml b/.audit-rules/change_management.yaml new file mode 100644 index 00000000..32723a21 --- /dev/null +++ b/.audit-rules/change_management.yaml @@ -0,0 +1,226 @@ +# Change Management Rules +# ======================= +# Ensures proper version control, approval workflows, and rollback capability. +# Critical for maintaining system stability and audit trail. + +rules: + # =================== + # VERSION CONTROL + # =================== + + - id: CHANGE-VC-001 + name: "Git repository required" + description: "All code must be in version control" + severity: critical + check: + type: file_exists + paths: + - ".git" + message: "Git repository required" + + - id: CHANGE-VC-002 + name: "Gitignore configured" + description: "Sensitive files must be excluded from version control" + severity: high + check: + type: file_exists + paths: + - ".gitignore" + message: ".gitignore file required" + + - id: CHANGE-VC-003 + name: "Secrets excluded from VCS" + description: "Secret files must be in .gitignore" + severity: critical + check: + type: pattern_required + paths: + - ".gitignore" + patterns: + - "\\.env" + - "\\*\\.pem" + - "\\*\\.key" + - "secrets" + message: "Secret files must be excluded from version control" + + # =================== + # CODE REVIEW + # =================== + + - id: CHANGE-REV-001 + name: "Pull request template" + description: "PR template ensures consistent review information" + severity: medium + check: + type: file_exists + paths: + - ".github/PULL_REQUEST_TEMPLATE.md" + message: "Pull request template recommended" + + - id: CHANGE-REV-002 + name: "Branch protection documentation" + description: "Main branch should require reviews" + severity: high + check: + type: documentation + message: "Document branch protection rules" + + - id: CHANGE-REV-003 + name: "Code owners defined" + description: "Critical paths should have designated owners" + severity: medium + check: + type: file_exists + paths: + - ".github/CODEOWNERS" + - "CODEOWNERS" + message: "Consider defining code owners for critical paths" + + # =================== + # CI/CD PIPELINE + # =================== + + - id: CHANGE-CI-001 + name: "Automated testing in CI" + description: "Tests must run automatically on changes" + severity: high + check: + type: file_exists + paths: + - ".github/workflows/ci.yml" + - ".github/workflows/test.yml" + message: "CI workflow for automated testing required" + + - id: CHANGE-CI-002 + name: "Security scanning in CI" + description: "Security scans should run in CI pipeline" + severity: high + check: + type: pattern_recommended + paths: + - ".github/workflows/*.yml" + patterns: + - "security|bandit|safety|snyk|trivy" + message: "Consider security scanning in CI pipeline" + + - id: CHANGE-CI-003 + name: "Linting and code quality" + description: "Code quality checks should run in CI" + severity: medium + check: + type: pattern_required + paths: + - ".github/workflows/*.yml" + patterns: + - "ruff|flake8|pylint|mypy|lint" + message: "Code quality checks required in CI" + + # =================== + # DEPLOYMENT + # =================== + + - id: CHANGE-DEP-001 + name: "Environment separation" + description: "Development, staging, and production must be separate" + severity: high + check: + type: pattern_required + paths: + - "app/core/config.py" + - "app/core/environment.py" + patterns: + - "ENVIRONMENT|development|staging|production" + message: "Environment separation required" + + - id: CHANGE-DEP-002 + name: "Deployment automation" + description: "Deployments should be automated and repeatable" + severity: medium + check: + type: file_exists + paths: + - ".github/workflows/release.yml" + - ".github/workflows/deploy.yml" + - "Dockerfile" + message: "Automated deployment process recommended" + + - id: CHANGE-DEP-003 + name: "Infrastructure as code" + description: "Infrastructure should be version controlled" + severity: medium + check: + type: file_exists + paths: + - "docker-compose.yml" + - "Dockerfile" + - "terraform/" + - "kubernetes/" + message: "Infrastructure as code recommended" + + # =================== + # ROLLBACK CAPABILITY + # =================== + + - id: CHANGE-ROLL-001 + name: "Database migration versioning" + description: "Database changes must be versioned and reversible" + severity: high + check: + type: file_exists + paths: + - "alembic/" + - "alembic.ini" + message: "Database migration tool required" + + - id: CHANGE-ROLL-002 + name: "Migration downgrade support" + description: "Database migrations should support rollback" + severity: medium + check: + type: pattern_required + paths: + - "alembic/versions/*.py" + patterns: + - "def downgrade" + message: "Migration downgrade functions required" + + - id: CHANGE-ROLL-003 + name: "Container versioning" + description: "Container images should be versioned" + severity: medium + check: + type: pattern_recommended + paths: + - "Dockerfile" + - ".github/workflows/*.yml" + patterns: + - "tag|version|:v" + message: "Container image versioning recommended" + + # =================== + # CHANGE DOCUMENTATION + # =================== + + - id: CHANGE-DOC-001 + name: "Changelog maintained" + description: "Changes should be documented in changelog" + severity: medium + check: + type: file_exists + paths: + - "CHANGELOG.md" + - "CHANGES.md" + - "HISTORY.md" + message: "Consider maintaining a changelog" + + - id: CHANGE-DOC-002 + name: "Release documentation" + description: "Releases should be documented" + severity: low + check: + type: pattern_recommended + paths: + - ".github/workflows/release.yml" + patterns: + - "release|changelog|notes" + message: "Consider automated release notes" diff --git a/.audit-rules/compliance.yaml b/.audit-rules/compliance.yaml new file mode 100644 index 00000000..c305d26c --- /dev/null +++ b/.audit-rules/compliance.yaml @@ -0,0 +1,191 @@ +# Compliance Rules +# ================= +# Ensures adherence to regulatory and internal policy requirements. +# Covers SOX, GDPR, CCPA, and internal governance policies. + +rules: + # =================== + # REGULATORY COMPLIANCE + # =================== + + - id: COMP-REG-001 + name: "Error messages must not expose internals" + description: "Error responses must not reveal system internals (SOX, security)" + severity: high + check: + type: pattern_forbidden + paths: + - "app/exceptions/*.py" + patterns: + - "traceback|stack.*trace|sys\\.exc_info" + exclude_patterns: + - "if.*debug|if.*development" + message: "Production errors must not expose stack traces" + + - id: COMP-REG-002 + name: "HTTPS enforcement" + description: "All communications must use HTTPS in production" + severity: critical + check: + type: pattern_required + paths: + - "app/core/config.py" + - "main.py" + patterns: + - "https|SSL|TLS|SECURE" + message: "HTTPS configuration required" + + - id: COMP-REG-003 + name: "Security headers" + description: "Security headers must be configured" + severity: high + check: + type: pattern_recommended + paths: + - "main.py" + - "middleware/*.py" + patterns: + - "X-Frame-Options|X-Content-Type|Strict-Transport|CSP|Content-Security-Policy" + message: "Consider security headers middleware" + + # =================== + # CONSENT MANAGEMENT + # =================== + + - id: COMP-CONS-001 + name: "Terms acceptance tracking" + description: "User acceptance of terms must be recorded" + severity: medium + check: + type: pattern_recommended + paths: + - "models/database/user.py" + patterns: + - "terms_accepted|consent|accepted_at" + message: "Consider tracking terms/consent acceptance" + + - id: COMP-CONS-002 + name: "Cookie consent" + description: "Cookie usage must comply with consent requirements" + severity: medium + check: + type: documentation + message: "Document cookie consent mechanism" + + # =================== + # FINANCIAL CONTROLS (SOX) + # =================== + + - id: COMP-SOX-001 + name: "Financial transaction logging" + description: "Financial transactions must have complete audit trail" + severity: critical + check: + type: pattern_check + paths: + - "app/**/*.py" + patterns: + - "payment|transaction|invoice|billing" + requires: + - "logger\\." + message: "Financial operations require audit logging" + + - id: COMP-SOX-002 + name: "Dual approval for critical operations" + description: "Critical financial operations should require dual approval" + severity: medium + check: + type: documentation + message: "Document approval workflow for critical operations" + + - id: COMP-SOX-003 + name: "Immutable transaction records" + description: "Financial records must not be modifiable" + severity: high + check: + type: pattern_forbidden + paths: + - "app/**/*.py" + patterns: + - "update.*transaction|delete.*payment|modify.*invoice" + message: "Financial records should be immutable" + + # =================== + # INTERNAL POLICIES + # =================== + + - id: COMP-POL-001 + name: "Code review requirement" + description: "Code changes must go through review process" + severity: high + check: + type: file_exists + paths: + - ".github/PULL_REQUEST_TEMPLATE.md" + - "CONTRIBUTING.md" + - ".github/workflows/*.yml" + message: "Code review process must be documented/enforced" + + - id: COMP-POL-002 + name: "Change approval documentation" + description: "Changes must have documented approval" + severity: medium + check: + type: file_exists + paths: + - ".github/CODEOWNERS" + - ".github/workflows/*.yml" + message: "Document change approval requirements" + + - id: COMP-POL-003 + name: "Incident response documentation" + description: "Incident response procedures must be documented" + severity: medium + check: + type: file_exists + paths: + - "docs/**/incident*.md" + - "docs/**/security*.md" + - "SECURITY.md" + message: "Document incident response procedures" + + # =================== + # EVIDENCE COLLECTION + # =================== + + - id: COMP-EVID-001 + name: "Automated testing evidence" + description: "Test results must be captured for audit evidence" + severity: medium + check: + type: file_exists + paths: + - ".github/workflows/ci.yml" + - "pytest.ini" + - "pyproject.toml" + patterns: + - "pytest|test|coverage" + message: "Automated testing must be configured" + + - id: COMP-EVID-002 + name: "Deployment audit trail" + description: "Deployments must be logged and traceable" + severity: high + check: + type: file_exists + paths: + - ".github/workflows/*.yml" + patterns: + - "deploy|release" + message: "Deployment process must be automated and logged" + + - id: COMP-EVID-003 + name: "Version control usage" + description: "All code must be version controlled" + severity: critical + check: + type: file_exists + paths: + - ".git" + - ".gitignore" + message: "Version control is required" diff --git a/.audit-rules/data_governance.yaml b/.audit-rules/data_governance.yaml new file mode 100644 index 00000000..a47aed9c --- /dev/null +++ b/.audit-rules/data_governance.yaml @@ -0,0 +1,201 @@ +# Data Governance Rules +# ===================== +# Ensures proper handling of PII, data classification, and retention. +# Critical for GDPR, CCPA, and data privacy compliance. + +rules: + # =================== + # PII IDENTIFICATION + # =================== + + - id: DATA-PII-001 + name: "PII fields must be identified" + description: "Personal Identifiable Information fields must be marked/documented" + severity: high + check: + type: pattern_recommended + paths: + - "models/database/*.py" + patterns: + - "# PII|pii.*=.*True|sensitive.*=.*True" + message: "Consider marking PII fields with comments or metadata" + + - id: DATA-PII-002 + name: "Email addresses are PII" + description: "Email fields must be treated as PII" + severity: medium + check: + type: documentation + message: "Document email as PII in data classification" + + - id: DATA-PII-003 + name: "PII must not be logged" + description: "Sensitive data must not appear in logs" + severity: critical + check: + type: pattern_forbidden + paths: + - "app/**/*.py" + - "middleware/**/*.py" + patterns: + - "logger.*password|log.*password" + - "logger.*credit.*card|log.*ssn" + - "print\\(.*password" + message: "PII/sensitive data must not be logged" + + # =================== + # DATA CLASSIFICATION + # =================== + + - id: DATA-CLASS-001 + name: "Data classification scheme" + description: "System must have documented data classification" + severity: medium + check: + type: file_exists + paths: + - "docs/**/data-classification.md" + - "docs/**/data-governance.md" + - "docs/**/security*.md" + message: "Document data classification scheme" + + - id: DATA-CLASS-002 + name: "Sensitive data encryption at rest" + description: "Highly sensitive data should be encrypted in database" + severity: medium + check: + type: pattern_recommended + paths: + - "models/database/*.py" + patterns: + - "encrypt|EncryptedType|Fernet" + message: "Consider encryption for highly sensitive fields" + + - id: DATA-CLASS-003 + name: "Data masking in non-production" + description: "PII should be masked in development/test environments" + severity: medium + check: + type: documentation + message: "Document data masking procedures for non-production" + + # =================== + # DATA RETENTION + # =================== + + - id: DATA-RET-001 + name: "Soft delete for audit trail" + description: "Records should use soft delete to maintain audit trail" + severity: medium + check: + type: pattern_recommended + paths: + - "models/database/*.py" + patterns: + - "deleted_at|is_deleted|soft_delete" + message: "Consider soft delete for audit trail preservation" + + - id: DATA-RET-002 + name: "Data retention policy" + description: "System must have documented data retention policy" + severity: high + check: + type: file_exists + paths: + - "docs/**/retention*.md" + - "docs/**/data*.md" + message: "Document data retention policy" + + - id: DATA-RET-003 + name: "Automated data cleanup" + description: "Old data should be automatically purged per retention policy" + severity: low + check: + type: pattern_recommended + paths: + - "app/tasks/*.py" + - "scripts/*.py" + patterns: + - "cleanup|purge|archive|retention" + message: "Consider automated data retention enforcement" + + # =================== + # DATA PRIVACY RIGHTS + # =================== + + - id: DATA-PRIV-001 + name: "Right to access (GDPR Art. 15)" + description: "Users must be able to access their personal data" + severity: high + check: + type: pattern_required + paths: + - "app/api/v1/users.py" + patterns: + - "/me|/current|get_current_user" + message: "Endpoint for users to access their data required" + + - id: DATA-PRIV-002 + name: "Right to erasure (GDPR Art. 17)" + description: "System must support user data deletion requests" + severity: high + check: + type: pattern_recommended + paths: + - "app/api/v1/users.py" + - "app/services/user_service.py" + patterns: + - "delete|remove|erase|anonymize" + message: "Support for user data deletion required" + + - id: DATA-PRIV-003 + name: "Right to portability (GDPR Art. 20)" + description: "Users should be able to export their data" + severity: medium + check: + type: pattern_recommended + paths: + - "app/api/v1/*.py" + patterns: + - "export|download|portable" + message: "Consider data export functionality for portability" + + # =================== + # DATA INTEGRITY + # =================== + + - id: DATA-INT-001 + name: "Input validation" + description: "All input data must be validated" + severity: high + check: + type: pattern_required + paths: + - "models/schema/*.py" + patterns: + - "Field|validator|field_validator" + message: "Pydantic validation required for data integrity" + + - id: DATA-INT-002 + name: "Database constraints" + description: "Database should enforce data integrity constraints" + severity: medium + check: + type: pattern_required + paths: + - "models/database/*.py" + patterns: + - "nullable|unique|ForeignKey|CheckConstraint" + message: "Database constraints should enforce data integrity" + + - id: DATA-INT-003 + name: "Referential integrity" + description: "Foreign key relationships must be properly defined" + severity: medium + check: + type: pattern_required + paths: + - "models/database/*.py" + patterns: + - "ForeignKey|relationship" + message: "Define foreign key relationships for referential integrity" diff --git a/.audit-rules/documentation.yaml b/.audit-rules/documentation.yaml new file mode 100644 index 00000000..0e982b8b --- /dev/null +++ b/.audit-rules/documentation.yaml @@ -0,0 +1,230 @@ +# Documentation Rules +# ==================== +# Ensures required documentation for audit trail and compliance. +# Critical for demonstrating controls and due diligence. + +rules: + # =================== + # PROJECT DOCUMENTATION + # =================== + + - id: DOC-PROJ-001 + name: "README file required" + description: "Project must have a README with basic information" + severity: high + check: + type: file_exists + paths: + - "README.md" + - "README.rst" + - "README.txt" + message: "Project README required" + + - id: DOC-PROJ-002 + name: "Setup instructions" + description: "README must include setup/installation instructions" + severity: medium + check: + type: pattern_required + paths: + - "README.md" + patterns: + - "[Ii]nstall|[Ss]etup|[Gg]etting [Ss]tarted" + message: "Setup instructions required in README" + + - id: DOC-PROJ-003 + name: "Contributing guidelines" + description: "Project should have contribution guidelines" + severity: low + check: + type: file_exists + paths: + - "CONTRIBUTING.md" + - "docs/guides/contributing.md" + message: "Consider adding contribution guidelines" + + # =================== + # API DOCUMENTATION + # =================== + + - id: DOC-API-001 + name: "OpenAPI documentation" + description: "API must have OpenAPI/Swagger documentation" + severity: high + check: + type: pattern_required + paths: + - "main.py" + patterns: + - "openapi|docs_url|redoc" + message: "OpenAPI documentation required for APIs" + + - id: DOC-API-002 + name: "Endpoint documentation" + description: "API endpoints must have docstrings" + severity: medium + check: + type: pattern_required + paths: + - "app/api/v1/*.py" + patterns: + - '""".*"""' + message: "API endpoints should have docstrings" + + - id: DOC-API-003 + name: "API versioning documented" + description: "API versioning strategy must be documented" + severity: medium + check: + type: file_exists + paths: + - "docs/api/*.md" + message: "API documentation required" + + # =================== + # SECURITY DOCUMENTATION + # =================== + + - id: DOC-SEC-001 + name: "Security policy" + description: "Project must have security policy" + severity: high + check: + type: file_exists + paths: + - "SECURITY.md" + - ".github/SECURITY.md" + message: "Security policy (SECURITY.md) required" + + - id: DOC-SEC-002 + name: "Authentication documentation" + description: "Authentication mechanism must be documented" + severity: high + check: + type: file_exists + paths: + - "docs/api/authentication.md" + - "docs/**/auth*.md" + message: "Authentication documentation required" + + - id: DOC-SEC-003 + name: "Security controls documentation" + description: "Security controls must be documented" + severity: medium + check: + type: pattern_required + paths: + - "docs/**/*.md" + patterns: + - "[Ss]ecurity|[Aa]uthentication|[Aa]uthorization" + message: "Security controls should be documented" + + # =================== + # OPERATIONAL DOCUMENTATION + # =================== + + - id: DOC-OPS-001 + name: "Deployment documentation" + description: "Deployment process must be documented" + severity: high + check: + type: file_exists + paths: + - "docs/guides/deployment.md" + - "docs/**/deploy*.md" + message: "Deployment documentation required" + + - id: DOC-OPS-002 + name: "Configuration documentation" + description: "Configuration options must be documented" + severity: medium + check: + type: file_exists + paths: + - "docs/getting-started/configuration.md" + - ".env.example" + message: "Configuration documentation required" + + - id: DOC-OPS-003 + name: "Monitoring documentation" + description: "Monitoring and alerting should be documented" + severity: low + check: + type: file_exists + paths: + - "docs/**/monitoring*.md" + - "docs/**/observability*.md" + message: "Consider documenting monitoring setup" + + # =================== + # COMPLIANCE DOCUMENTATION + # =================== + + - id: DOC-COMP-001 + name: "Data handling documentation" + description: "Data handling practices must be documented" + severity: high + check: + type: file_exists + paths: + - "docs/**/data*.md" + - "docs/**/privacy*.md" + message: "Data handling documentation recommended" + + - id: DOC-COMP-002 + name: "Audit controls documentation" + description: "Audit controls must be documented" + severity: medium + check: + type: pattern_required + paths: + - "docs/**/*.md" + patterns: + - "[Aa]udit|[Ll]ogging|[Tt]raceability" + message: "Audit controls should be documented" + + - id: DOC-COMP-003 + name: "Compliance requirements documented" + description: "Applicable compliance requirements must be listed" + severity: medium + check: + type: documentation + message: "Document applicable compliance requirements (GDPR, SOX, etc.)" + + # =================== + # ARCHITECTURE DOCUMENTATION + # =================== + + - id: DOC-ARCH-001 + name: "Architecture overview" + description: "System architecture must be documented" + severity: medium + check: + type: file_exists + paths: + - "docs/architecture/*.md" + message: "Architecture documentation required" + + - id: DOC-ARCH-002 + name: "Component diagram" + description: "System should have component/architecture diagram" + severity: low + check: + type: pattern_recommended + paths: + - "docs/**/*.md" + - "README.md" + patterns: + - "mermaid|diagram|architecture.*png|architecture.*svg" + message: "Consider adding architecture diagrams" + + - id: DOC-ARCH-003 + name: "Decision records" + description: "Major decisions should be documented (ADRs)" + severity: low + check: + type: file_exists + paths: + - "docs/adr/*.md" + - "docs/decisions/*.md" + message: "Consider documenting architecture decisions (ADRs)" diff --git a/.audit-rules/third_party.yaml b/.audit-rules/third_party.yaml new file mode 100644 index 00000000..c6310d12 --- /dev/null +++ b/.audit-rules/third_party.yaml @@ -0,0 +1,192 @@ +# Third-Party Risk Rules +# ====================== +# Ensures proper management of external dependencies. +# Critical for supply chain security and license compliance. + +rules: + # =================== + # DEPENDENCY MANAGEMENT + # =================== + + - id: THIRD-DEP-001 + name: "Dependency lock file required" + description: "Dependencies must be locked to specific versions" + severity: high + check: + type: file_exists + paths: + - "uv.lock" + - "poetry.lock" + - "requirements.lock" + - "Pipfile.lock" + message: "Dependency lock file required for reproducible builds" + + - id: THIRD-DEP-002 + name: "Dependencies defined in manifest" + description: "All dependencies must be declared" + severity: critical + check: + type: file_exists + paths: + - "pyproject.toml" + - "requirements.txt" + - "Pipfile" + message: "Dependency manifest file required" + + - id: THIRD-DEP-003 + name: "Pinned dependency versions" + description: "Production dependencies should have pinned versions" + severity: high + check: + type: pattern_recommended + paths: + - "pyproject.toml" + patterns: + - '>=.*,<|==|~=' + message: "Consider pinning dependency version ranges" + + # =================== + # VULNERABILITY MANAGEMENT + # =================== + + - id: THIRD-VULN-001 + name: "Dependency vulnerability scanning" + description: "Dependencies must be scanned for vulnerabilities" + severity: high + check: + type: file_exists + paths: + - ".github/workflows/*.yml" + patterns: + - "safety|pip-audit|snyk|dependabot" + message: "Dependency vulnerability scanning required" + + - id: THIRD-VULN-002 + name: "Dependabot enabled" + description: "Automated dependency updates should be configured" + severity: medium + check: + type: file_exists + paths: + - ".github/dependabot.yml" + message: "Consider enabling Dependabot for security updates" + + - id: THIRD-VULN-003 + name: "Container base image scanning" + description: "Container base images should be scanned" + severity: medium + check: + type: pattern_recommended + paths: + - ".github/workflows/*.yml" + patterns: + - "trivy|grype|snyk.*container" + message: "Consider container image vulnerability scanning" + + # =================== + # LICENSE COMPLIANCE + # =================== + + - id: THIRD-LIC-001 + name: "License compatibility check" + description: "Dependency licenses must be compatible" + severity: high + check: + type: documentation + message: "Document license compliance verification process" + + - id: THIRD-LIC-002 + name: "No copyleft in proprietary code" + description: "GPL/AGPL dependencies require careful handling" + severity: high + check: + type: pattern_forbidden + paths: + - "pyproject.toml" + patterns: + - "gpl|agpl" + case_insensitive: true + message: "Copyleft licenses require legal review" + + - id: THIRD-LIC-003 + name: "Project license declared" + description: "Project must have explicit license" + severity: medium + check: + type: file_exists + paths: + - "LICENSE" + - "LICENSE.md" + - "LICENSE.txt" + message: "Project license file recommended" + + # =================== + # VENDOR ASSESSMENT + # =================== + + - id: THIRD-VEND-001 + name: "Trusted package sources" + description: "Packages should come from trusted sources" + severity: high + check: + type: pattern_forbidden + paths: + - "pyproject.toml" + - "requirements.txt" + patterns: + - "git\\+http://|--index-url.*http://" + message: "Only HTTPS sources allowed for packages" + + - id: THIRD-VEND-002 + name: "No direct Git dependencies in production" + description: "Production should use released packages" + severity: medium + check: + type: pattern_recommended + paths: + - "pyproject.toml" + patterns: + - "git\\+" + invert: true + message: "Prefer released packages over Git dependencies" + + - id: THIRD-VEND-003 + name: "Minimal dependencies" + description: "Only necessary dependencies should be included" + severity: low + check: + type: documentation + message: "Document justification for external dependencies" + + # =================== + # SUPPLY CHAIN SECURITY + # =================== + + - id: THIRD-CHAIN-001 + name: "Package integrity verification" + description: "Package hashes should be verified" + severity: medium + check: + type: pattern_recommended + paths: + - "uv.lock" + - "requirements.txt" + patterns: + - "sha256|hash" + message: "Consider hash verification for packages" + + - id: THIRD-CHAIN-002 + name: "Signed commits for dependencies" + description: "Critical dependencies should use signed releases" + severity: low + check: + type: documentation + message: "Consider verifying signatures for critical dependencies" + + - id: THIRD-CHAIN-003 + name: "Private package registry" + description: "Internal packages should use private registry" + severity: low + check: + type: documentation + message: "Document private package registry usage if applicable" diff --git a/scripts/base_validator.py b/scripts/base_validator.py index ce2d4224..8b012c19 100755 --- a/scripts/base_validator.py +++ b/scripts/base_validator.py @@ -1,465 +1,111 @@ -#!/usr/bin/env python3 """ -Base Validator -============== -Shared base class for all validation scripts (architecture, security, performance). +Base Validator Class -Provides common functionality for: -- Loading YAML configuration -- File pattern matching -- Violation tracking -- Output formatting (human-readable and JSON) +Shared functionality for all validators. """ -import json -import re -import sys from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from enum import Enum from pathlib import Path from typing import Any import yaml -class Severity(Enum): - """Validation severity levels""" - - ERROR = "error" - WARNING = "warning" - INFO = "info" - - -@dataclass -class Violation: - """Represents a rule violation""" - - rule_id: str - rule_name: str - severity: Severity - file_path: Path - line_number: int - message: str - context: str = "" - suggestion: str = "" - - -@dataclass -class FileResult: - """Results for a single file validation""" - - file_path: Path - errors: int = 0 - warnings: int = 0 - info: int = 0 - - @property - def passed(self) -> bool: - return self.errors == 0 - - @property - def status(self) -> str: - if self.errors > 0: - return "FAILED" - if self.warnings > 0: - return "PASSED*" - return "PASSED" - - @property - def status_icon(self) -> str: - if self.errors > 0: - return "āŒ" - if self.warnings > 0: - return "āš ļø" - return "āœ…" - - -@dataclass -class ValidationResult: - """Results of validation""" - - violations: list[Violation] = field(default_factory=list) - files_checked: int = 0 - rules_applied: int = 0 - file_results: list[FileResult] = field(default_factory=list) - - def has_errors(self) -> bool: - """Check if there are any error-level violations""" - return any(v.severity == Severity.ERROR for v in self.violations) - - def has_warnings(self) -> bool: - """Check if there are any warning-level violations""" - return any(v.severity == Severity.WARNING for v in self.violations) - - def error_count(self) -> int: - return sum(1 for v in self.violations if v.severity == Severity.ERROR) - - def warning_count(self) -> int: - return sum(1 for v in self.violations if v.severity == Severity.WARNING) - - def info_count(self) -> int: - return sum(1 for v in self.violations if v.severity == Severity.INFO) - - class BaseValidator(ABC): - """Abstract base validator class""" + """Base class for architecture, security, and performance validators.""" - # Subclasses should override these - VALIDATOR_NAME = "Base Validator" - VALIDATOR_EMOJI = "šŸ”" - RULES_DIR_NAME = ".rules" - CONFIG_FILE_NAME = ".rules.yaml" + def __init__(self, rules_dir: str, project_root: Path | None = None): + self.rules_dir = rules_dir + self.project_root = project_root or Path.cwd() + self.rules: list[dict[str, Any]] = [] + self.errors: list[dict[str, Any]] = [] + self.warnings: list[dict[str, Any]] = [] - def __init__(self, config_path: Path = None, verbose: bool = False): - """Initialize validator with configuration""" - self.project_root = Path.cwd() - self.config_path = config_path or self.project_root / self.CONFIG_FILE_NAME - self.verbose = verbose - self.config = self._load_config() - self.result = ValidationResult() - - def _load_config(self) -> dict[str, Any]: - """ - Load validation rules from YAML config. - - Supports two modes: - 1. Split directory mode: rules directory with multiple YAML files - 2. Single file mode: single YAML file (legacy) - - The split directory mode takes precedence if it exists. - """ - # Check for split directory mode first - rules_dir = self.project_root / self.RULES_DIR_NAME - if rules_dir.is_dir(): - return self._load_config_from_directory(rules_dir) - - # Fall back to single file mode - if not self.config_path.exists(): - print(f"āŒ Configuration file not found: {self.config_path}") - print(f" (Also checked for directory: {rules_dir})") - sys.exit(1) - - with open(self.config_path) as f: - config = yaml.safe_load(f) - - print(f"šŸ“‹ Loaded {self.VALIDATOR_NAME}: {config.get('project', 'unknown')}") - return config - - def _load_config_from_directory(self, rules_dir: Path) -> dict[str, Any]: - """ - Load and merge configuration from split YAML files in a directory. - - Reads _main.yaml first for base config, then merges all other YAML files. - """ - config: dict[str, Any] = {} - - # Load _main.yaml first (contains project info, principles, ignore patterns) - main_file = rules_dir / "_main.yaml" - if main_file.exists(): - with open(main_file) as f: - config = yaml.safe_load(f) or {} - - # Load all other YAML files and merge their contents - yaml_files = sorted(rules_dir.glob("*.yaml")) - for yaml_file in yaml_files: - if yaml_file.name == "_main.yaml": - continue # Already loaded - - with open(yaml_file) as f: - file_config = yaml.safe_load(f) or {} - - # Merge rule sections from this file into main config - for key, value in file_config.items(): - if key.endswith("_rules") and isinstance(value, list): - # Merge rule lists - if key not in config: - config[key] = [] - config[key].extend(value) - elif key not in config: - # Add new top-level keys - config[key] = value - - print(f"šŸ“‹ Loaded {self.VALIDATOR_NAME}: {config.get('project', 'unknown')}") - print(f" (from {len(yaml_files)} files in {rules_dir.name}/)") - return config - - def _should_ignore_file(self, file_path: Path) -> bool: - """Check if a file should be ignored based on config patterns""" - import fnmatch - - ignore_config = self.config.get("ignore", {}) - ignore_files = ignore_config.get("files", []) - - # Get relative path for matching - try: - rel_path = file_path.relative_to(self.project_root) - except ValueError: - rel_path = file_path - - rel_path_str = str(rel_path) - - for pattern in ignore_files: - # Handle glob patterns using fnmatch - if "*" in pattern: - # fnmatch handles *, **, and ? patterns correctly - if fnmatch.fnmatch(rel_path_str, pattern): - return True - # Also check each path component for patterns like **/.venv/** - # This handles cases where the pattern expects any prefix - if pattern.startswith("**/"): - # Try matching without the **/ prefix (e.g., .venv/** matches .venv/foo) - suffix_pattern = pattern[3:] # Remove "**/"" - if fnmatch.fnmatch(rel_path_str, suffix_pattern): - return True - elif pattern in rel_path_str: - return True - - return False - - def _add_violation( - self, - rule_id: str, - rule_name: str, - severity: Severity, - file_path: Path, - line_number: int, - message: str, - context: str = "", - suggestion: str = "", - ): - """Add a violation to the results""" - # Check for inline noqa comment - if f"noqa: {rule_id.lower()}" in context.lower(): + def load_rules(self) -> None: + """Load rules from YAML files.""" + rules_path = self.project_root / self.rules_dir + if not rules_path.exists(): + print(f"Rules directory not found: {rules_path}") return - self.result.violations.append( - Violation( - rule_id=rule_id, - rule_name=rule_name, - severity=severity, - file_path=file_path, - line_number=line_number, - message=message, - context=context, - suggestion=suggestion, - ) - ) + for rule_file in rules_path.glob("*.yaml"): + if rule_file.name.startswith("_"): + continue # Skip main config - def _get_rule(self, rule_id: str) -> dict | None: - """Look up a rule by ID across all rule categories""" - for key, value in self.config.items(): - if key.endswith("_rules") and isinstance(value, list): - for rule in value: - if rule.get("id") == rule_id: - return rule - return None - - def _check_pattern_in_file( - self, - file_path: Path, - content: str, - lines: list[str], - pattern: str, - rule_id: str, - rule_name: str, - severity: Severity, - message: str, - suggestion: str = "", - exclude_patterns: list[str] = None, - ): - """Check for a regex pattern in a file and report violations""" - exclude_patterns = exclude_patterns or [] - - for i, line in enumerate(lines, 1): - if re.search(pattern, line, re.IGNORECASE): - # Check exclusions - should_exclude = False - for exclude in exclude_patterns: - if exclude in line: - should_exclude = True - break - - if not should_exclude: - self._add_violation( - rule_id=rule_id, - rule_name=rule_name, - severity=severity, - file_path=file_path, - line_number=i, - message=message, - context=line.strip()[:100], - suggestion=suggestion, - ) + with open(rule_file) as f: + data = yaml.safe_load(f) + if data and "rules" in data: + self.rules.extend(data["rules"]) @abstractmethod - def validate_all(self, target_path: Path = None) -> ValidationResult: - """Validate all files in a directory - must be implemented by subclasses""" - pass + def validate(self) -> bool: + """Run validation. Returns True if passed.""" - def validate_file(self, file_path: Path, quiet: bool = False) -> ValidationResult: - """Validate a single file""" - if not file_path.exists(): - if not quiet: - print(f"āŒ File not found: {file_path}") - return self.result - - if not file_path.is_file(): - if not quiet: - print(f"āŒ Not a file: {file_path}") - return self.result - - if not quiet: - print(f"\n{self.VALIDATOR_EMOJI} Validating single file: {file_path}\n") - - # Resolve file path to absolute - file_path = file_path.resolve() - - if self._should_ignore_file(file_path): - if not quiet: - print("ā­ļø File is in ignore list, skipping") - return self.result - - self.result.files_checked += 1 - - # Track violations before this file - violations_before = len(self.result.violations) - - content = file_path.read_text() - lines = content.split("\n") - - # Call subclass-specific validation - self._validate_file_content(file_path, content, lines) - - # Calculate violations for this file - file_violations = self.result.violations[violations_before:] - errors = sum(1 for v in file_violations if v.severity == Severity.ERROR) - warnings = sum(1 for v in file_violations if v.severity == Severity.WARNING) - info = sum(1 for v in file_violations if v.severity == Severity.INFO) - - # Track file result - self.result.file_results.append( - FileResult(file_path=file_path, errors=errors, warnings=warnings, info=info) + def add_error( + self, rule_id: str, message: str, file: str = "", line: int = 0 + ) -> None: + """Add an error.""" + self.errors.append( + { + "rule_id": rule_id, + "message": message, + "file": file, + "line": line, + "severity": "error", + } ) - return self.result - - @abstractmethod - def _validate_file_content(self, file_path: Path, content: str, lines: list[str]): - """Validate file content - must be implemented by subclasses""" - pass - - def output_results(self, json_output: bool = False, errors_only: bool = False): - """Output validation results""" - if json_output: - self._output_json() - else: - self._output_human(errors_only) - - def _output_json(self): - """Output results as JSON - - Format matches code quality service expectations: - - file_path (not file) - - line_number (not line) - - total_violations count - """ - try: - rel_base = self.project_root - except Exception: - rel_base = Path.cwd() - - def get_relative_path(file_path: Path) -> str: - """Get relative path from project root""" - try: - return str(file_path.relative_to(rel_base)) - except ValueError: - return str(file_path) - - output = { - "validator": self.VALIDATOR_NAME, - "files_checked": self.result.files_checked, - "total_violations": len(self.result.violations), - "errors": self.result.error_count(), - "warnings": self.result.warning_count(), - "info": self.result.info_count(), - "violations": [ - { - "rule_id": v.rule_id, - "rule_name": v.rule_name, - "severity": v.severity.value, - "file_path": get_relative_path(v.file_path), - "line_number": v.line_number, - "message": v.message, - "context": v.context, - "suggestion": v.suggestion, - } - for v in self.result.violations - ], - } - print(json.dumps(output, indent=2)) - - def _output_human(self, errors_only: bool = False): - """Output results in human-readable format""" - print("\n" + "=" * 80) - print(f"šŸ“Š {self.VALIDATOR_NAME.upper()} REPORT") - print("=" * 80) - - errors = [v for v in self.result.violations if v.severity == Severity.ERROR] - warnings = [v for v in self.result.violations if v.severity == Severity.WARNING] - info = [v for v in self.result.violations if v.severity == Severity.INFO] - - print( - f"\nFiles checked: {self.result.files_checked}" - ) - print( - f"Findings: {len(errors)} errors, {len(warnings)} warnings, {len(info)} info" + def add_warning( + self, rule_id: str, message: str, file: str = "", line: int = 0 + ) -> None: + """Add a warning.""" + self.warnings.append( + { + "rule_id": rule_id, + "message": message, + "file": file, + "line": line, + "severity": "warning", + } ) - if errors: - print(f"\n\nāŒ ERRORS ({len(errors)}):") - print("-" * 80) - for v in errors: - self._print_violation(v) + def add_info( + self, rule_id: str, message: str, file: str = "", line: int = 0 + ) -> None: + """Add an informational note.""" + self.warnings.append( + { + "rule_id": rule_id, + "message": message, + "file": file, + "line": line, + "severity": "info", + } + ) - if warnings and not errors_only: - print(f"\n\nāš ļø WARNINGS ({len(warnings)}):") - print("-" * 80) - for v in warnings: - self._print_violation(v) + def print_results(self) -> None: + """Print validation results.""" + if not self.errors and not self.warnings: + print(f"āœ… All {self.rules_dir} rules passed!") + return - if info and not errors_only: - print(f"\nā„¹ļø INFO ({len(info)}):") - print("-" * 80) - for v in info: - self._print_violation(v) + if self.errors: + print(f"\nāŒ {len(self.errors)} errors found:") + for error in self.errors: + print(f" [{error['rule_id']}] {error['message']}") + if error["file"]: + print(f" File: {error['file']}:{error['line']}") - print("\n" + "=" * 80) - if errors: - print("āŒ VALIDATION FAILED") - elif warnings: - print(f"āš ļø VALIDATION PASSED WITH {len(warnings)} WARNING(S)") - else: - print("āœ… VALIDATION PASSED") - print("=" * 80) + if self.warnings: + print(f"\nāš ļø {len(self.warnings)} warnings:") + for warning in self.warnings: + print(f" [{warning['rule_id']}] {warning['message']}") + if warning["file"]: + print(f" File: {warning['file']}:{warning['line']}") - def _print_violation(self, v: Violation): - """Print a single violation""" - try: - rel_path = v.file_path.relative_to(self.project_root) - except ValueError: - rel_path = v.file_path - - print(f"\n [{v.rule_id}] {v.rule_name}") - print(f" File: {rel_path}:{v.line_number}") - print(f" Issue: {v.message}") - if v.context and self.verbose: - print(f" Context: {v.context}") - if v.suggestion: - print(f" šŸ’” Suggestion: {v.suggestion}") - - def get_exit_code(self) -> int: - """Get appropriate exit code based on results""" - if self.result.has_errors(): - return 1 - return 0 + def run(self) -> int: + """Run validation and return exit code.""" + self.load_rules() + passed = self.validate() + self.print_results() + return 0 if passed else 1 diff --git a/scripts/validate_audit.py b/scripts/validate_audit.py new file mode 100644 index 00000000..e7800120 --- /dev/null +++ b/scripts/validate_audit.py @@ -0,0 +1,532 @@ +#!/usr/bin/env python3 +""" +IT Internal Audit Validator + +Validates code against internal audit rules defined in .audit-rules/ +Focuses on governance, compliance, and control requirements. +""" + +import re +import sys +from pathlib import Path + + +# Add project root to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from scripts.base_validator import BaseValidator + + +class AuditValidator(BaseValidator): + """Validates IT internal audit rules.""" + + def __init__(self, project_root: Path | None = None): + super().__init__(".audit-rules", project_root) + + def validate(self) -> bool: + """Run all audit validations.""" + self._validate_audit_trail() + self._validate_access_control() + self._validate_data_governance() + self._validate_compliance() + self._validate_change_management() + self._validate_third_party() + self._validate_documentation() + return len(self.errors) == 0 + + # ================== + # AUDIT TRAIL + # ================== + + def _validate_audit_trail(self) -> None: + """Validate audit trail requirements.""" + # Check authentication logging + auth_files = [ + self.project_root / "app" / "api" / "v1" / "auth.py", + self.project_root / "app" / "routes" / "admin.py", + ] + + for file in auth_files: + if file.exists(): + content = file.read_text() + if "logger" not in content: + self.add_error( + "AUDIT-LOG-001", + "Authentication operations must include logging", + str(file), + ) + + # Check service layer logging + services_path = self.project_root / "app" / "services" + if services_path.exists(): + for file in services_path.glob("*.py"): + if file.name == "__init__.py": + continue + content = file.read_text() + # Services that modify data should have logging + if re.search(r"def (create|update|delete)", content): + if "logger" not in content: + self.add_warning( + "AUDIT-LOG-002", + "Service with data modifications should include logging", + str(file), + ) + + # Check for audit timestamp fields in models + # Models can have timestamps directly or inherit from BaseModel/TimestampMixin + models_path = self.project_root / "models" / "database" + if models_path.exists(): + for file in models_path.glob("*.py"): + # audit_log.py uses timestamp field instead of created_at/updated_at + if file.name in ("__init__.py", "base.py", "audit_log.py"): + continue + content = file.read_text() + if "class " in content: # Has model definition + # Check if timestamps are present directly or via inheritance + has_timestamps = ( + "created_at" in content + or "updated_at" in content + or "BaseModel" in content # Inherits from BaseModel + or "TimestampMixin" in content # Uses TimestampMixin + ) + if not has_timestamps: + self.add_warning( + "AUDIT-FIELD-001", + "Database model should include audit timestamp fields", + str(file), + ) + + # Check for forbidden log modification patterns + self._check_forbidden_patterns( + paths=["app/**/*.py"], + patterns=[ + r"os\.remove.*\.log", + r"truncate.*log", + r"open.*\.log.*['\"]w['\"]", + ], + rule_id="AUDIT-INT-001", + message="Application must not modify or delete log files", + ) + + # ================== + # ACCESS CONTROL + # ================== + + def _validate_access_control(self) -> None: + """Validate access control requirements.""" + # Check API endpoints have authentication + api_path = self.project_root / "app" / "api" / "v1" + if api_path.exists(): + for file in api_path.glob("*.py"): + # Skip endpoints that are intentionally unauthenticated + if file.name in ("__init__.py", "health.py", "metrics.py"): + continue + content = file.read_text() + # Check for authentication dependency + if "@router" in content: + if not re.search( + r"CurrentUser|Depends.*get_current_user|AdminUser", content + ): + # auth.py handles its own auth + if file.name != "auth.py": + self.add_warning( + "ACCESS-AUTH-001", + "API endpoint should require authentication", + str(file), + ) + + # Check admin routes verify admin role + admin_route = self.project_root / "app" / "routes" / "admin.py" + if admin_route.exists(): + content = admin_route.read_text() + if "is_admin" not in content and "admin_required" not in content: + self.add_warning( + "ACCESS-AUTH-002", + "Admin routes should verify admin privileges", + str(admin_route), + ) + + # Check password hashing + security_file = self.project_root / "app" / "core" / "security.py" + if security_file.exists(): + content = security_file.read_text() + if not re.search(r"bcrypt|argon2|scrypt|pbkdf2", content, re.IGNORECASE): + self.add_error( + "ACCESS-ACCT-003", + "Passwords must use approved hashing algorithms", + str(security_file), + ) + + # Check password not in API responses + # Note: Only flag if a class with "Response" in name directly defines password_hash + # Internal schemas (like UserInDB) are not flagged as they're not API responses + schema_path = self.project_root / "models" / "schema" + if schema_path.exists(): + for file in schema_path.glob("*.py"): + content = file.read_text() + # Check for Response classes that directly define password_hash + # Split by class definitions and check each + class_blocks = re.split(r"(?=^class\s)", content, flags=re.MULTILINE) + for block in class_blocks: + # Check if this class is a Response class + class_match = re.match(r"class\s+(\w*Response\w*)", block) + if class_match: + # Check if password_hash is defined in this class (not inherited) + if "password_hash:" in block or "password_hash =" in block: + if "exclude" not in block.lower(): + self.add_error( + "ACCESS-PRIV-002", + f"Password hash must be excluded from {class_match.group(1)}", + str(file), + ) + + # ================== + # DATA GOVERNANCE + # ================== + + def _validate_data_governance(self) -> None: + """Validate data governance requirements.""" + # Check PII not logged + # Note: Patterns detect actual password values, not descriptive usage like "Password reset" + # We look for patterns that suggest password values are being logged: + # - password= or password: followed by a variable + # - %s or {} after password indicating interpolation of password value + self._check_forbidden_patterns( + paths=["app/**/*.py", "middleware/**/*.py"], + patterns=[ + r"logger\.\w+\(.*password\s*[=:]\s*['\"]?%", # password=%s + r"logger\.\w+\(.*password\s*[=:]\s*\{", # password={var} + r"logging\.\w+\(.*password\s*[=:]\s*['\"]?%", # password=%s + r"print\(.*password\s*=", # print(password=xxx) + r"logger.*credit.*card.*\d", # credit card with numbers + r"logger.*\bssn\b.*\d", # SSN with numbers + ], + rule_id="DATA-PII-003", + message="PII/sensitive data must not be logged", + ) + + # Check input validation (Pydantic) + schema_path = self.project_root / "models" / "schema" + if schema_path.exists(): + has_validation = False + for file in schema_path.glob("*.py"): + content = file.read_text() + if re.search(r"Field|validator|field_validator", content): + has_validation = True + break + if not has_validation: + self.add_error( + "DATA-INT-001", + "Pydantic validation required for data integrity", + str(schema_path), + ) + + # Check user data access endpoint exists (GDPR) + users_api = self.project_root / "app" / "api" / "v1" / "users.py" + if users_api.exists(): + content = users_api.read_text() + if "/me" not in content and "current" not in content.lower(): + self.add_warning( + "DATA-PRIV-001", + "Endpoint for users to access their own data required (GDPR Art. 15)", + str(users_api), + ) + + # ================== + # COMPLIANCE + # ================== + + def _validate_compliance(self) -> None: + """Validate compliance requirements.""" + # Check HTTPS configuration + config_files = [ + self.project_root / "app" / "core" / "config.py", + self.project_root / "main.py", + ] + https_configured = False + for file in config_files: + if file.exists(): + content = file.read_text() + if re.search(r"https|SSL|TLS|SECURE", content, re.IGNORECASE): + https_configured = True + break + if not https_configured: + self.add_warning( + "COMP-REG-002", + "HTTPS configuration should be documented", + "app/core/config.py", + ) + + # Check version control + if not (self.project_root / ".git").exists(): + self.add_error( + "COMP-EVID-003", + "Version control (Git) is required", + str(self.project_root), + ) + + # Check CI/CD exists + ci_workflow = self.project_root / ".github" / "workflows" / "ci.yml" + if not ci_workflow.exists(): + self.add_warning( + "COMP-EVID-001", + "CI workflow for automated testing recommended", + ".github/workflows/ci.yml", + ) + + # Check code review process + pr_template = self.project_root / ".github" / "PULL_REQUEST_TEMPLATE.md" + if not pr_template.exists(): + self.add_warning( + "COMP-POL-001", + "Pull request template recommended for code review", + ".github/PULL_REQUEST_TEMPLATE.md", + ) + + # ================== + # CHANGE MANAGEMENT + # ================== + + def _validate_change_management(self) -> None: + """Validate change management requirements.""" + # Check .gitignore exists and excludes secrets + gitignore = self.project_root / ".gitignore" + if gitignore.exists(): + content = gitignore.read_text() + required_exclusions = [".env", "*.pem", "*.key"] + for pattern in required_exclusions: + # Simplified check - just look for the pattern + if pattern.replace("*", "") not in content: + self.add_warning( + "CHANGE-VC-003", + f"Secret pattern '{pattern}' should be in .gitignore", + str(gitignore), + ) + else: + self.add_error( + "CHANGE-VC-002", + ".gitignore file required", + str(self.project_root), + ) + + # Check database migrations + alembic_dir = self.project_root / "alembic" + if not alembic_dir.exists(): + self.add_warning( + "CHANGE-ROLL-001", + "Database migration tool (Alembic) recommended", + "alembic/", + ) + else: + # Check for downgrade functions + versions_dir = alembic_dir / "versions" + if versions_dir.exists(): + for file in versions_dir.glob("*.py"): + content = file.read_text() + if "def upgrade" in content and "def downgrade" not in content: + self.add_warning( + "CHANGE-ROLL-002", + "Migration should include downgrade function", + str(file), + ) + + # Check environment separation + config_file = self.project_root / "app" / "core" / "config.py" + if config_file.exists(): + content = config_file.read_text() + if not re.search(r"ENVIRONMENT|development|staging|production", content): + self.add_warning( + "CHANGE-DEP-001", + "Environment separation configuration recommended", + str(config_file), + ) + + # ================== + # THIRD PARTY + # ================== + + def _validate_third_party(self) -> None: + """Validate third-party dependency management.""" + # Check dependency lock file exists + lock_files = ["uv.lock", "poetry.lock", "Pipfile.lock", "requirements.lock"] + has_lock = any((self.project_root / f).exists() for f in lock_files) + if not has_lock: + self.add_warning( + "THIRD-DEP-001", + "Dependency lock file recommended for reproducible builds", + "uv.lock or similar", + ) + + # Check dependency manifest exists + manifest_files = ["pyproject.toml", "requirements.txt", "Pipfile"] + has_manifest = any((self.project_root / f).exists() for f in manifest_files) + if not has_manifest: + self.add_error( + "THIRD-DEP-002", + "Dependency manifest file required", + "pyproject.toml", + ) + + # Check for Dependabot + dependabot = self.project_root / ".github" / "dependabot.yml" + if not dependabot.exists(): + self.add_info( + "THIRD-VULN-002", + "Consider enabling Dependabot for security updates", + ".github/dependabot.yml", + ) + + # Check for insecure package sources + pyproject = self.project_root / "pyproject.toml" + if pyproject.exists(): + content = pyproject.read_text() + if "http://" in content and "https://" not in content: + self.add_error( + "THIRD-VEND-001", + "Only HTTPS sources allowed for packages", + str(pyproject), + ) + + # ================== + # DOCUMENTATION + # ================== + + def _validate_documentation(self) -> None: + """Validate documentation requirements.""" + # Check README exists + readme_files = ["README.md", "README.rst", "README.txt"] + has_readme = any((self.project_root / f).exists() for f in readme_files) + if not has_readme: + self.add_error( + "DOC-PROJ-001", + "Project README required", + "README.md", + ) + else: + # Check README has setup instructions + for readme in readme_files: + readme_path = self.project_root / readme + if readme_path.exists(): + content = readme_path.read_text().lower() + has_setup = any( + term in content + for term in [ + "install", + "setup", + "quick start", + "getting started", + ] + ) + if not has_setup: + self.add_warning( + "DOC-PROJ-002", + "README should include setup instructions", + str(readme_path), + ) + break + + # Check security policy exists + security_files = ["SECURITY.md", ".github/SECURITY.md"] + has_security = any((self.project_root / f).exists() for f in security_files) + if not has_security: + self.add_warning( + "DOC-SEC-001", + "Security policy (SECURITY.md) recommended", + "SECURITY.md", + ) + + # Check API documentation + docs_api = self.project_root / "docs" / "api" + if not docs_api.exists() or not list(docs_api.glob("*.md")): + self.add_warning( + "DOC-API-003", + "API documentation recommended", + "docs/api/", + ) + + # Check authentication documentation + auth_doc = self.project_root / "docs" / "api" / "authentication.md" + if not auth_doc.exists(): + self.add_warning( + "DOC-SEC-002", + "Authentication documentation recommended", + "docs/api/authentication.md", + ) + + # Check architecture documentation + arch_docs = self.project_root / "docs" / "architecture" + if not arch_docs.exists() or not list(arch_docs.glob("*.md")): + self.add_warning( + "DOC-ARCH-001", + "Architecture documentation recommended", + "docs/architecture/", + ) + + # Check deployment documentation + deploy_doc = self.project_root / "docs" / "guides" / "deployment.md" + if not deploy_doc.exists(): + self.add_warning( + "DOC-OPS-001", + "Deployment documentation recommended", + "docs/guides/deployment.md", + ) + + # ================== + # HELPERS + # ================== + + def _check_forbidden_patterns( + self, + paths: list[str], + patterns: list[str], + rule_id: str, + message: str, + ) -> None: + """Check for forbidden patterns in files.""" + for path_pattern in paths: + if "**" in path_pattern: + base, pattern = path_pattern.split("**", 1) + base_path = self.project_root / base.rstrip("/") + if base_path.exists(): + files = base_path.rglob(pattern.lstrip("/")) + else: + continue + else: + files = [self.project_root / path_pattern] + + for file in files: + if not file.exists() or not file.is_file(): + continue + try: + content = file.read_text() + for pattern in patterns: + if re.search(pattern, content, re.IGNORECASE): + self.add_error(rule_id, message, str(file)) + except Exception: + pass + + +def main() -> int: + """Run audit validation.""" + import argparse + + parser = argparse.ArgumentParser(description="Validate IT internal audit rules") + parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") + parser.add_argument( + "--format", + choices=["text", "json"], + default="text", + help="Output format", + ) + args = parser.parse_args() + + validator = AuditValidator() + validator.load_rules() + success = validator.validate() + validator.print_results() + + return 0 if success else 1 + + +if __name__ == "__main__": + sys.exit(main())