fixing DQ issues
This commit is contained in:
@@ -1,4 +1,12 @@
|
||||
# utils/csv_processor.py
|
||||
"""CSV processor utilities ....
|
||||
|
||||
This module provides classes and functions for:
|
||||
- ....
|
||||
- ....
|
||||
- ....
|
||||
"""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from io import StringIO
|
||||
@@ -15,7 +23,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CSVProcessor:
|
||||
"""Handles CSV import with robust parsing and batching"""
|
||||
"""Handles CSV import with robust parsing and batching."""
|
||||
|
||||
ENCODINGS = ["utf-8", "latin-1", "iso-8859-1", "cp1252", "utf-8-sig"]
|
||||
|
||||
@@ -75,13 +83,14 @@ class CSVProcessor:
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
"""Class constructor."""
|
||||
from utils.data_processing import GTINProcessor, PriceProcessor
|
||||
|
||||
self.gtin_processor = GTINProcessor()
|
||||
self.price_processor = PriceProcessor()
|
||||
|
||||
def download_csv(self, url: str) -> str:
|
||||
"""Download and decode CSV with multiple encoding attempts"""
|
||||
"""Download and decode CSV with multiple encoding attempts."""
|
||||
try:
|
||||
response = requests.get(url, timeout=30)
|
||||
response.raise_for_status()
|
||||
@@ -107,8 +116,7 @@ class CSVProcessor:
|
||||
raise
|
||||
|
||||
def parse_csv(self, csv_content: str) -> pd.DataFrame:
|
||||
"""Parse CSV with multiple separator attempts"""
|
||||
|
||||
"""Parse CSV with multiple separator attempts."""
|
||||
for config in self.PARSING_CONFIGS:
|
||||
try:
|
||||
df = pd.read_csv(
|
||||
@@ -127,7 +135,7 @@ class CSVProcessor:
|
||||
raise pd.errors.ParserError("Could not parse CSV with any configuration")
|
||||
|
||||
def normalize_columns(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Normalize column names using mapping"""
|
||||
"""Normalize column names using mapping."""
|
||||
# Clean column names
|
||||
df.columns = df.columns.str.strip()
|
||||
|
||||
@@ -138,7 +146,7 @@ class CSVProcessor:
|
||||
return df
|
||||
|
||||
def _clean_row_data(self, row_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Process a single row with data normalization"""
|
||||
"""Process a single row with data normalization."""
|
||||
# Handle NaN values
|
||||
processed_data = {k: (v if pd.notna(v) else None) for k, v in row_data.items()}
|
||||
|
||||
@@ -182,7 +190,7 @@ class CSVProcessor:
|
||||
self, url: str, marketplace: str, shop_name: str, batch_size: int, db: Session
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Process CSV from URL with marketplace and shop information
|
||||
Process CSV from URL with marketplace and shop information.
|
||||
|
||||
Args:
|
||||
url: URL to the CSV file
|
||||
@@ -194,7 +202,6 @@ class CSVProcessor:
|
||||
Returns:
|
||||
Dictionary with processing results
|
||||
"""
|
||||
|
||||
logger.info(
|
||||
f"Starting marketplace CSV import from {url} for {marketplace} -> {shop_name}"
|
||||
)
|
||||
@@ -239,13 +246,14 @@ class CSVProcessor:
|
||||
db: Session,
|
||||
batch_num: int,
|
||||
) -> Dict[str, int]:
|
||||
"""Process a batch of CSV rows with marketplace information"""
|
||||
"""Process a batch of CSV rows with marketplace information."""
|
||||
imported = 0
|
||||
updated = 0
|
||||
errors = 0
|
||||
|
||||
logger.info(
|
||||
f"Processing batch {batch_num} with {len(batch_df)} rows for {marketplace} -> {shop_name}"
|
||||
f"Processing batch {batch_num} with {len(batch_df)} rows for "
|
||||
f"{marketplace} -> {shop_name}"
|
||||
)
|
||||
|
||||
for index, row in batch_df.iterrows():
|
||||
@@ -285,7 +293,8 @@ class CSVProcessor:
|
||||
existing_product.updated_at = datetime.utcnow()
|
||||
updated += 1
|
||||
logger.debug(
|
||||
f"Updated product {product_data['product_id']} for {marketplace} and shop {shop_name}"
|
||||
f"Updated product {product_data['product_id']} for "
|
||||
f"{marketplace} and shop {shop_name}"
|
||||
)
|
||||
else:
|
||||
# Create new product
|
||||
@@ -299,8 +308,8 @@ class CSVProcessor:
|
||||
db.add(new_product)
|
||||
imported += 1
|
||||
logger.debug(
|
||||
f"Imported new product {product_data['product_id']} for {marketplace} and shop "
|
||||
f"{shop_name}"
|
||||
f"Imported new product {product_data['product_id']} "
|
||||
f"for {marketplace} and shop {shop_name}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user