Added marketplace support

This commit is contained in:
2025-09-05 22:14:52 +02:00
parent 9dd177bddc
commit 4fb67e594d
6 changed files with 1307 additions and 117 deletions

View File

@@ -134,7 +134,7 @@ class CSVProcessor:
logger.info(f"Normalized columns: {list(df.columns)}")
return df
def process_row(self, row_data: Dict[str, Any]) -> Dict[str, Any]:
def _clean_row_data(self, row_data: Dict[str, Any]) -> Dict[str, Any]:
"""Process a single row with data normalization"""
# Handle NaN values
processed_data = {k: (v if pd.notna(v) else None) for k, v in row_data.items()}
@@ -169,14 +169,35 @@ class CSVProcessor:
return processed_data
async def process_csv_from_url(self, url: str, batch_size: int, db: Session) -> Dict[str, int]:
"""Process CSV import with batching"""
async def process_marketplace_csv_from_url(
self,
url: str,
marketplace: str,
shop_name: str,
batch_size: int,
db: Session
) -> Dict[str, Any]:
"""
Process CSV from URL with marketplace and shop information
Args:
url: URL to the CSV file
marketplace: Name of the marketplace (e.g., 'Letzshop', 'Amazon')
shop_name: Name of the shop
batch_size: Number of rows to process in each batch
db: Database session
Returns:
Dictionary with processing results
"""
logger.info(f"Starting marketplace CSV import from {url} for {marketplace} -> {shop_name}")
# Download and parse CSV
csv_content = self.download_csv(url)
df = self.parse_csv(csv_content)
df = self.normalize_columns(df)
logger.info(f"Processing CSV with {len(df)} rows")
logger.info(f"Processing CSV with {len(df)} rows and {len(df.columns)} columns")
imported = 0
updated = 0
@@ -185,69 +206,102 @@ class CSVProcessor:
# Process in batches
for i in range(0, len(df), batch_size):
batch_df = df.iloc[i:i + batch_size]
batch_imported, batch_updated, batch_errors = self._process_batch(batch_df, db)
batch_result = await self._process_marketplace_batch(
batch_df, marketplace, shop_name, db, i // batch_size + 1
)
imported += batch_imported
updated += batch_updated
errors += batch_errors
imported += batch_result['imported']
updated += batch_result['updated']
errors += batch_result['errors']
# Commit batch
try:
db.commit()
logger.info(
f"Processed batch {i // batch_size + 1}: +{batch_imported} imported, +{batch_updated} updated, +{batch_errors} errors")
except Exception as e:
db.rollback()
logger.error(f"Batch commit failed: {e}")
errors += len(batch_df)
logger.info(f"Processed batch {i // batch_size + 1}: {batch_result}")
return {
"imported": imported,
"updated": updated,
"errors": errors,
"total_processed": imported + updated + errors
'total_processed': imported + updated + errors,
'imported': imported,
'updated': updated,
'errors': errors,
'marketplace': marketplace,
'shop_name': shop_name
}
def _process_batch(self, df_batch: pd.DataFrame, db: Session) -> tuple:
"""Process a single batch of rows"""
async def _process_marketplace_batch(
self,
batch_df: pd.DataFrame,
marketplace: str,
shop_name: str,
db: Session,
batch_num: int
) -> Dict[str, int]:
"""Process a batch of CSV rows with marketplace information"""
imported = 0
updated = 0
errors = 0
for _, row in df_batch.iterrows():
logger.info(f"Processing batch {batch_num} with {len(batch_df)} rows for {marketplace} -> {shop_name}")
for index, row in batch_df.iterrows():
try:
product_data = self.process_row(row.to_dict())
# Convert row to dictionary and clean up
product_data = self._clean_row_data(row.to_dict())
# Add marketplace and shop information
product_data['marketplace'] = marketplace
product_data['shop_name'] = shop_name
# Validate required fields
product_id = product_data.get('product_id')
title = product_data.get('title')
if not product_id or not title:
if not product_data.get('product_id'):
logger.warning(f"Row {index}: Missing product_id, skipping")
errors += 1
continue
# Check for existing product
if not product_data.get('title'):
logger.warning(f"Row {index}: Missing title, skipping")
errors += 1
continue
# Check if product exists
existing_product = db.query(Product).filter(
Product.product_id == product_id
Product.product_id == product_data['product_id']
).first()
if existing_product:
# Update existing
# Update existing product
for key, value in product_data.items():
if key not in ['id', 'created_at'] and hasattr(existing_product, key):
setattr(existing_product, key, value)
existing_product.updated_at = datetime.utcnow()
updated += 1
logger.debug(f"Updated product {product_data['product_id']} for {marketplace} and shop {shop_name}")
else:
# Create new
# Create new product
filtered_data = {k: v for k, v in product_data.items()
if k not in ['id', 'created_at', 'updated_at'] and hasattr(Product, k)}
new_product = Product(**filtered_data)
db.add(new_product)
imported += 1
logger.debug(f"Imported new product {product_data['product_id']} for {marketplace} and shop "
f"{shop_name}")
except Exception as e:
logger.error(f"Error processing row: {e}")
errors += 1
continue
return imported, updated, errors
# Commit the batch
try:
db.commit()
logger.info(f"Batch {batch_num} committed successfully")
except Exception as e:
logger.error(f"Failed to commit batch {batch_num}: {e}")
db.rollback()
# Count all rows in this batch as errors
errors = len(batch_df)
imported = 0
updated = 0
return {
'imported': imported,
'updated': updated,
'errors': errors
}