orion/.performance-rules/database.yaml

# Database Performance Rules
# ==========================

database_rules:
  - id: "PERF-001"
    name: "N+1 query detection"
    severity: warning
    description: |
      Accessing relationships in loops causes N+1 queries.
      For each item in a list, a separate query is executed.

      Solutions:
      - joinedload(): Eager load with JOIN
      - selectinload(): Eager load with IN clause
      - subqueryload(): Eager load with subquery
    file_pattern: "**/service*.py|**/api/**/*.py"
    anti_patterns:
      - 'for\s+\w+\s+in\s+\w+\.all\(\):\s*\n[^}]*\.\w+\.\w+'
    suggested_patterns:
      - "joinedload|selectinload|subqueryload"
    example_bad: |
      orders = db.query(Order).all()
      for order in orders:
          customer_name = order.customer.name  # N+1 query!
    example_good: |
      orders = db.query(Order).options(
          joinedload(Order.customer)
      ).all()
      for order in orders:
          customer_name = order.customer.name  # Already loaded

  - id: "PERF-002"
    name: "Eager loading for known relationships"
    severity: info
    description: |
      When you always need related data, use eager loading
      to reduce the number of database round trips.
    file_pattern: "**/service*.py"
    suggested_patterns:
      - "joinedload|selectinload|subqueryload"

  - id: "PERF-003"
    name: "Query result limiting"
    severity: warning
    description: |
      All list queries should have pagination or limits.
      Unbounded queries can cause memory issues and slow responses.
    file_pattern: "**/service*.py|**/api/**/*.py"
    anti_patterns:
      - '\\.all\\(\\)(?![^\\n]*limit|[^\\n]*\\[:)'
    exclude_patterns:
      - "# noqa: PERF-003"
      - "# bounded query"
      - ".filter("
    suggested_patterns:
      - "limit|offset|skip|paginate"
    example_bad: |
      all_products = db.query(Product).all()
    example_good: |
      products = db.query(Product).limit(100).all()
      # Or with pagination
      products = db.query(Product).offset(skip).limit(limit).all()

  - id: "PERF-004"
    name: "Index usage for filtered columns"
    severity: info
    description: |
      Columns frequently used in WHERE clauses should have indexes:
      - Foreign keys (vendor_id, customer_id)
      - Status fields
      - Date fields used for filtering
      - Boolean flags used for filtering
    file_pattern: "**/models/database/*.py"
    suggested_patterns:
      - "index=True|Index\\("

  - id: "PERF-005"
    name: "Select only needed columns"
    severity: info
    description: |
      For large tables, select only the columns you need.
      Use .with_entities() or load_only() to reduce data transfer.
    file_pattern: "**/service*.py"
    suggested_patterns:
      - "with_entities|load_only|defer"
    example_good: |
      # Only load id and name columns
      products = db.query(Product).options(
          load_only(Product.id, Product.name)
      ).all()

  - id: "PERF-006"
    name: "Bulk operations for multiple records"
    severity: warning
    description: |
      Use bulk operations instead of individual operations in loops:
      - bulk_insert_mappings() for inserts
      - bulk_update_mappings() for updates
      - add_all() for ORM inserts
    file_pattern: "**/service*.py"
    anti_patterns:
      - 'for\\s+\\w+\\s+in\\s+\\w+:\\s*\\n[^}]*db\\.add\\s*\\('
      - 'for\\s+\\w+\\s+in\\s+\\w+:\\s*\\n[^}]*\\.save\\s*\\('
    suggested_patterns:
      - "bulk_insert_mappings|bulk_update_mappings|add_all"
    example_bad: |
      for item in items:
          product = Product(**item)
          db.add(product)
    example_good: |
      products = [Product(**item) for item in items]
      db.add_all(products)

  - id: "PERF-007"
    name: "Connection pool configuration"
    severity: info
    description: |
      Configure database connection pool for optimal performance:
      - pool_size: Number of persistent connections
      - max_overflow: Additional connections allowed
      - pool_pre_ping: Check connection health
      - pool_recycle: Recycle connections periodically
    file_pattern: "**/database.py|**/config*.py"
    suggested_patterns:
      - "pool_size|pool_pre_ping|pool_recycle|max_overflow"

  - id: "PERF-008"
    name: "Use EXISTS for existence checks"
    severity: info
    description: |
      Use EXISTS or .first() is not None instead of count() > 0.
      EXISTS stops at first match, count() scans all matches.
    file_pattern: "**/service*.py"
    anti_patterns:
      - '\\.count\\(\\)\\s*>\\s*0'
      - '\\.count\\(\\)\\s*>=\\s*1'
      - '\\.count\\(\\)\\s*!=\\s*0'
    suggested_patterns:
      - "exists\\(\\)|scalar\\(exists"
    example_bad: |
      if db.query(Order).filter_by(customer_id=id).count() > 0:
    example_good: |
      exists_query = db.query(exists().where(Order.customer_id == id))
      if db.scalar(exists_query):

  - id: "PERF-009"
    name: "Batch updates instead of loops"
    severity: warning
    description: |
      Use .update() with filters instead of updating in a loop.
      One UPDATE statement is faster than N individual updates.
    file_pattern: "**/service*.py"
    anti_patterns:
      - 'for\\s+\\w+\\s+in\\s+\\w+:\\s*\\n[^}]*\\w+\\.\\w+\\s*='
    suggested_patterns:
      - "\\.update\\(\\{"
    example_bad: |
      for product in products:
          product.is_active = False
          db.add(product)
    example_good: |
      db.query(Product).filter(
          Product.id.in_(product_ids)
      ).update({"is_active": False}, synchronize_session=False)

  - id: "PERF-010"
    name: "Avoid SELECT * patterns"
    severity: info
    description: |
      When you only need specific columns, don't load entire rows.
      This reduces memory usage and network transfer.
    file_pattern: "**/service*.py"

  - id: "PERF-011"
    name: "Use appropriate join strategies"
    severity: info
    description: |
      Choose the right join strategy:
      - joinedload: Few related items, always needed
      - selectinload: Many related items, always needed
      - subqueryload: Complex queries, many related items
      - lazyload: Rarely accessed relationships
    file_pattern: "**/service*.py"

  - id: "PERF-012"
    name: "Transaction scope optimization"
    severity: warning
    description: |
      Keep transactions short and focused:
      - Don't hold transactions during I/O
      - Commit after bulk operations
      - Use read-only transactions when possible
    file_pattern: "**/service*.py"

  - id: "PERF-013"
    name: "Query result caching"
    severity: info
    description: |
      Consider caching for:
      - Frequently accessed, rarely changed data
      - Configuration tables
      - Reference data (categories, statuses)
    file_pattern: "**/service*.py"
    suggested_patterns:
      - "@cache|@lru_cache|redis|memcache"

  - id: "PERF-014"
    name: "Composite indexes for multi-column filters"
    severity: info
    description: |
      Queries filtering on multiple columns benefit from composite indexes.
      Order columns by selectivity (most selective first).
    file_pattern: "**/models/database/*.py"
    suggested_patterns:
      - "Index\\([^)]*,[^)]*\\)"

  - id: "PERF-015"
    name: "Avoid correlated subqueries"
    severity: info
    description: |
      Correlated subqueries execute once per row.
      Use JOINs or window functions instead when possible.
    file_pattern: "**/service*.py"