code quality run
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
# utils/data_processing.py
|
||||
import re
|
||||
import pandas as pd
|
||||
from typing import Tuple, Optional
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -25,11 +26,11 @@ class GTINProcessor:
|
||||
return None
|
||||
|
||||
# Remove decimal point (e.g., "889698116923.0" -> "889698116923")
|
||||
if '.' in gtin_str:
|
||||
gtin_str = gtin_str.split('.')[0]
|
||||
if "." in gtin_str:
|
||||
gtin_str = gtin_str.split(".")[0]
|
||||
|
||||
# Keep only digits
|
||||
gtin_clean = ''.join(filter(str.isdigit, gtin_str))
|
||||
gtin_clean = "".join(filter(str.isdigit, gtin_str))
|
||||
|
||||
if not gtin_clean:
|
||||
return None
|
||||
@@ -73,23 +74,23 @@ class PriceProcessor:
|
||||
|
||||
CURRENCY_PATTERNS = {
|
||||
# Amount followed by currency
|
||||
r'([0-9.,]+)\s*(EUR|€)': lambda m: (m.group(1), 'EUR'),
|
||||
r'([0-9.,]+)\s*(USD|\$)': lambda m: (m.group(1), 'USD'),
|
||||
r'([0-9.,]+)\s*(GBP|£)': lambda m: (m.group(1), 'GBP'),
|
||||
r'([0-9.,]+)\s*(CHF)': lambda m: (m.group(1), 'CHF'),
|
||||
r'([0-9.,]+)\s*(CAD|AUD|JPY|¥)': lambda m: (m.group(1), m.group(2).upper()),
|
||||
|
||||
r"([0-9.,]+)\s*(EUR|€)": lambda m: (m.group(1), "EUR"),
|
||||
r"([0-9.,]+)\s*(USD|\$)": lambda m: (m.group(1), "USD"),
|
||||
r"([0-9.,]+)\s*(GBP|£)": lambda m: (m.group(1), "GBP"),
|
||||
r"([0-9.,]+)\s*(CHF)": lambda m: (m.group(1), "CHF"),
|
||||
r"([0-9.,]+)\s*(CAD|AUD|JPY|¥)": lambda m: (m.group(1), m.group(2).upper()),
|
||||
# Currency followed by amount
|
||||
r'(EUR|€)\s*([0-9.,]+)': lambda m: (m.group(2), 'EUR'),
|
||||
r'(USD|\$)\s*([0-9.,]+)': lambda m: (m.group(2), 'USD'),
|
||||
r'(GBP|£)\s*([0-9.,]+)': lambda m: (m.group(2), 'GBP'),
|
||||
|
||||
r"(EUR|€)\s*([0-9.,]+)": lambda m: (m.group(2), "EUR"),
|
||||
r"(USD|\$)\s*([0-9.,]+)": lambda m: (m.group(2), "USD"),
|
||||
r"(GBP|£)\s*([0-9.,]+)": lambda m: (m.group(2), "GBP"),
|
||||
# Generic 3-letter currency codes
|
||||
r'([0-9.,]+)\s*([A-Z]{3})': lambda m: (m.group(1), m.group(2)),
|
||||
r'([A-Z]{3})\s*([0-9.,]+)': lambda m: (m.group(2), m.group(1)),
|
||||
r"([0-9.,]+)\s*([A-Z]{3})": lambda m: (m.group(1), m.group(2)),
|
||||
r"([A-Z]{3})\s*([0-9.,]+)": lambda m: (m.group(2), m.group(1)),
|
||||
}
|
||||
|
||||
def parse_price_currency(self, price_str: any) -> Tuple[Optional[str], Optional[str]]:
|
||||
def parse_price_currency(
|
||||
self, price_str: any
|
||||
) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""
|
||||
Parse price string into (price, currency) tuple
|
||||
Returns (None, None) if parsing fails
|
||||
@@ -108,7 +109,7 @@ class PriceProcessor:
|
||||
try:
|
||||
price_val, currency_val = extract_func(match)
|
||||
# Normalize price (remove spaces, handle comma as decimal)
|
||||
price_val = price_val.replace(' ', '').replace(',', '.')
|
||||
price_val = price_val.replace(" ", "").replace(",", ".")
|
||||
# Validate numeric
|
||||
float(price_val)
|
||||
return price_val, currency_val.upper()
|
||||
@@ -116,10 +117,10 @@ class PriceProcessor:
|
||||
continue
|
||||
|
||||
# Fallback: extract just numbers
|
||||
number_match = re.search(r'([0-9.,]+)', price_str)
|
||||
number_match = re.search(r"([0-9.,]+)", price_str)
|
||||
if number_match:
|
||||
try:
|
||||
price_val = number_match.group(1).replace(',', '.')
|
||||
price_val = number_match.group(1).replace(",", ".")
|
||||
float(price_val) # Validate
|
||||
return price_val, None
|
||||
except ValueError:
|
||||
|
||||
Reference in New Issue
Block a user