orion/.performance-rules/memory.yaml

# Memory Management Performance Rules
# ====================================

memory_rules:
  - id: "PERF-046"
    name: "Generators for large datasets"
    severity: warning
    description: |
      Use generators/iterators for processing large datasets.
      Avoids loading everything into memory at once.
    file_pattern: "**/service*.py"
    anti_patterns:
      - '\\.all\\(\\).*for\\s+\\w+\\s+in'
    suggested_patterns:
      - "yield|yield_per|iter"
    example_bad: |
      products = db.query(Product).all()  # Loads all into memory
      for product in products:
          process(product)
    example_good: |
      for product in db.query(Product).yield_per(100):
          process(product)

  - id: "PERF-047"
    name: "Stream large file uploads"
    severity: warning
    description: |
      Large files should be streamed to disk, not held in memory.
      Use SpooledTemporaryFile or direct disk writing.
    file_pattern: "**/upload*.py|**/attachment*.py"
    suggested_patterns:
      - "SpooledTemporaryFile|chunk|stream"
    example_bad: |
      content = await file.read()  # Entire file in memory
      with open(path, 'wb') as f:
          f.write(content)
    example_good: |
      with open(path, 'wb') as f:
          while chunk := await file.read(8192):
              f.write(chunk)

  - id: "PERF-048"
    name: "Chunked processing for imports"
    severity: warning
    description: |
      Bulk imports should process in chunks:
      - Read in batches
      - Commit in batches
      - Report progress periodically
    file_pattern: "**/import*.py|**/csv*.py"
    required_patterns:
      - "chunk|batch|yield"
    example_bad: |
      rows = list(csv_reader)  # All rows in memory
      for row in rows:
          process(row)
    example_good: |
      def process_in_chunks(reader, chunk_size=1000):
          chunk = []
          for row in reader:
              chunk.append(row)
              if len(chunk) >= chunk_size:
                  yield chunk
                  chunk = []
          if chunk:
              yield chunk

  - id: "PERF-049"
    name: "Context managers for resources"
    severity: error
    description: |
      Use context managers for file operations.
      Ensures resources are properly released.
    file_pattern: "**/*.py"
    anti_patterns:
      - 'f\\s*=\\s*open\\s*\\([^)]+\\)(?!\\s*#.*context)'
      - '^(?!.*with).*open\\s*\\([^)]+\\)\\s*$'
    exclude_patterns:
      - "# noqa: PERF-049"
      - "with open"
    example_bad: |
      f = open('file.txt')
      content = f.read()
      f.close()  # May not run if exception
    example_good: |
      with open('file.txt') as f:
          content = f.read()

  - id: "PERF-050"
    name: "Limit in-memory collections"
    severity: info
    description: |
      Avoid building large lists in memory.
      Use generators, itertools, or database pagination.
    file_pattern: "**/service*.py"
    anti_patterns:
      - '\\[.*for.*in.*\\](?!.*[:10])'

  - id: "PERF-051"
    name: "String concatenation efficiency"
    severity: info
    description: |
      For many string concatenations, use join() or StringIO.
      Repeated += creates many intermediate strings.
    file_pattern: "**/*.py"
    anti_patterns:
      - 'for.*:\\s*\\n[^}]*\\+='
    suggested_patterns:
      - "\\.join\\(|StringIO"
    example_bad: |
      result = ""
      for item in items:
          result += str(item)
    example_good: |
      result = "".join(str(item) for item in items)

  - id: "PERF-052"
    name: "Efficient data structures"
    severity: info
    description: |
      Choose appropriate data structures:
      - set for membership testing
      - dict for key-value lookup
      - deque for queue operations
      - defaultdict for grouping
    file_pattern: "**/*.py"

  - id: "PERF-053"
    name: "Object pooling for expensive objects"
    severity: info
    description: |
      Reuse expensive-to-create objects:
      - Database connections
      - HTTP clients
      - Template engines
    file_pattern: "**/*.py"

  - id: "PERF-054"
    name: "Weak references for caches"
    severity: info
    description: |
      Use weak references for large object caches.
      Allows garbage collection when memory is needed.
    file_pattern: "**/*cache*.py"
    suggested_patterns:
      - "WeakValueDictionary|WeakKeyDictionary|weakref"

  - id: "PERF-055"
    name: "Slots for frequently instantiated classes"
    severity: info
    description: |
      Use __slots__ for classes with many instances.
      Reduces memory footprint per instance.
    file_pattern: "**/models/**/*.py"
    suggested_patterns:
      - "__slots__"