fixing DQ issues
This commit is contained in:
@@ -1,4 +1,12 @@
|
||||
# utils/data_processing.py
|
||||
"""Data processing utilities for GTIN validation and price parsing.
|
||||
|
||||
This module provides classes and functions for:
|
||||
- GTIN (Global Trade Item Number) validation and normalization
|
||||
- Price parsing with currency detection
|
||||
- Data cleaning and validation utilities
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional, Tuple
|
||||
@@ -9,14 +17,15 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GTINProcessor:
|
||||
"""Handles GTIN normalization and validation"""
|
||||
"""Handles GTIN normalization and validation."""
|
||||
|
||||
VALID_LENGTHS = [8, 12, 13, 14]
|
||||
|
||||
def normalize(self, gtin_value: any) -> Optional[str]:
|
||||
"""
|
||||
Normalize GTIN to proper format
|
||||
Returns None for invalid GTINs
|
||||
Normalize GTIN to proper format.
|
||||
|
||||
Returns None for invalid GTINs.
|
||||
"""
|
||||
if not gtin_value or pd.isna(gtin_value):
|
||||
return None
|
||||
@@ -63,14 +72,14 @@ class GTINProcessor:
|
||||
return None
|
||||
|
||||
def validate(self, gtin: str) -> bool:
|
||||
"""Validate GTIN format"""
|
||||
"""Validate GTIN format."""
|
||||
if not gtin:
|
||||
return False
|
||||
return len(gtin) in self.VALID_LENGTHS and gtin.isdigit()
|
||||
|
||||
|
||||
class PriceProcessor:
|
||||
"""Handles price parsing and currency extraction"""
|
||||
"""Handles price parsing and currency extraction."""
|
||||
|
||||
CURRENCY_PATTERNS = {
|
||||
# Amount followed by currency
|
||||
@@ -92,7 +101,8 @@ class PriceProcessor:
|
||||
self, price_str: any
|
||||
) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""
|
||||
Parse price string into (price, currency) tuple
|
||||
Parse price string into (price, currency) tuple.
|
||||
|
||||
Returns (None, None) if parsing fails
|
||||
"""
|
||||
if not price_str or pd.isna(price_str):
|
||||
|
||||
Reference in New Issue
Block a user