code quality run

This commit is contained in:
2025-09-13 21:58:54 +02:00
parent 0dfd885847
commit 3eb18ef91e
63 changed files with 1802 additions and 1289 deletions

View File

@@ -1,8 +1,9 @@
# utils/data_processing.py
import re
import pandas as pd
from typing import Tuple, Optional
import logging
import re
from typing import Optional, Tuple
import pandas as pd
logger = logging.getLogger(__name__)
@@ -25,11 +26,11 @@ class GTINProcessor:
return None
# Remove decimal point (e.g., "889698116923.0" -> "889698116923")
if '.' in gtin_str:
gtin_str = gtin_str.split('.')[0]
if "." in gtin_str:
gtin_str = gtin_str.split(".")[0]
# Keep only digits
gtin_clean = ''.join(filter(str.isdigit, gtin_str))
gtin_clean = "".join(filter(str.isdigit, gtin_str))
if not gtin_clean:
return None
@@ -73,23 +74,23 @@ class PriceProcessor:
CURRENCY_PATTERNS = {
# Amount followed by currency
r'([0-9.,]+)\s*(EUR|€)': lambda m: (m.group(1), 'EUR'),
r'([0-9.,]+)\s*(USD|\$)': lambda m: (m.group(1), 'USD'),
r'([0-9.,]+)\s*(GBP|£)': lambda m: (m.group(1), 'GBP'),
r'([0-9.,]+)\s*(CHF)': lambda m: (m.group(1), 'CHF'),
r'([0-9.,]+)\s*(CAD|AUD|JPY|¥)': lambda m: (m.group(1), m.group(2).upper()),
r"([0-9.,]+)\s*(EUR|€)": lambda m: (m.group(1), "EUR"),
r"([0-9.,]+)\s*(USD|\$)": lambda m: (m.group(1), "USD"),
r"([0-9.,]+)\s*(GBP|£)": lambda m: (m.group(1), "GBP"),
r"([0-9.,]+)\s*(CHF)": lambda m: (m.group(1), "CHF"),
r"([0-9.,]+)\s*(CAD|AUD|JPY|¥)": lambda m: (m.group(1), m.group(2).upper()),
# Currency followed by amount
r'(EUR|€)\s*([0-9.,]+)': lambda m: (m.group(2), 'EUR'),
r'(USD|\$)\s*([0-9.,]+)': lambda m: (m.group(2), 'USD'),
r'(GBP|£)\s*([0-9.,]+)': lambda m: (m.group(2), 'GBP'),
r"(EUR|€)\s*([0-9.,]+)": lambda m: (m.group(2), "EUR"),
r"(USD|\$)\s*([0-9.,]+)": lambda m: (m.group(2), "USD"),
r"(GBP|£)\s*([0-9.,]+)": lambda m: (m.group(2), "GBP"),
# Generic 3-letter currency codes
r'([0-9.,]+)\s*([A-Z]{3})': lambda m: (m.group(1), m.group(2)),
r'([A-Z]{3})\s*([0-9.,]+)': lambda m: (m.group(2), m.group(1)),
r"([0-9.,]+)\s*([A-Z]{3})": lambda m: (m.group(1), m.group(2)),
r"([A-Z]{3})\s*([0-9.,]+)": lambda m: (m.group(2), m.group(1)),
}
def parse_price_currency(self, price_str: any) -> Tuple[Optional[str], Optional[str]]:
def parse_price_currency(
self, price_str: any
) -> Tuple[Optional[str], Optional[str]]:
"""
Parse price string into (price, currency) tuple
Returns (None, None) if parsing fails
@@ -108,7 +109,7 @@ class PriceProcessor:
try:
price_val, currency_val = extract_func(match)
# Normalize price (remove spaces, handle comma as decimal)
price_val = price_val.replace(' ', '').replace(',', '.')
price_val = price_val.replace(" ", "").replace(",", ".")
# Validate numeric
float(price_val)
return price_val, currency_val.upper()
@@ -116,10 +117,10 @@ class PriceProcessor:
continue
# Fallback: extract just numbers
number_match = re.search(r'([0-9.,]+)', price_str)
number_match = re.search(r"([0-9.,]+)", price_str)
if number_match:
try:
price_val = number_match.group(1).replace(',', '.')
price_val = number_match.group(1).replace(",", ".")
float(price_val) # Validate
return price_val, None
except ValueError: