Added marketplace support
This commit is contained in:
@@ -134,7 +134,7 @@ class CSVProcessor:
|
||||
logger.info(f"Normalized columns: {list(df.columns)}")
|
||||
return df
|
||||
|
||||
def process_row(self, row_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _clean_row_data(self, row_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Process a single row with data normalization"""
|
||||
# Handle NaN values
|
||||
processed_data = {k: (v if pd.notna(v) else None) for k, v in row_data.items()}
|
||||
@@ -169,14 +169,35 @@ class CSVProcessor:
|
||||
|
||||
return processed_data
|
||||
|
||||
async def process_csv_from_url(self, url: str, batch_size: int, db: Session) -> Dict[str, int]:
|
||||
"""Process CSV import with batching"""
|
||||
async def process_marketplace_csv_from_url(
|
||||
self,
|
||||
url: str,
|
||||
marketplace: str,
|
||||
shop_name: str,
|
||||
batch_size: int,
|
||||
db: Session
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Process CSV from URL with marketplace and shop information
|
||||
|
||||
Args:
|
||||
url: URL to the CSV file
|
||||
marketplace: Name of the marketplace (e.g., 'Letzshop', 'Amazon')
|
||||
shop_name: Name of the shop
|
||||
batch_size: Number of rows to process in each batch
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Dictionary with processing results
|
||||
"""
|
||||
|
||||
logger.info(f"Starting marketplace CSV import from {url} for {marketplace} -> {shop_name}")
|
||||
# Download and parse CSV
|
||||
csv_content = self.download_csv(url)
|
||||
df = self.parse_csv(csv_content)
|
||||
df = self.normalize_columns(df)
|
||||
|
||||
logger.info(f"Processing CSV with {len(df)} rows")
|
||||
logger.info(f"Processing CSV with {len(df)} rows and {len(df.columns)} columns")
|
||||
|
||||
imported = 0
|
||||
updated = 0
|
||||
@@ -185,69 +206,102 @@ class CSVProcessor:
|
||||
# Process in batches
|
||||
for i in range(0, len(df), batch_size):
|
||||
batch_df = df.iloc[i:i + batch_size]
|
||||
batch_imported, batch_updated, batch_errors = self._process_batch(batch_df, db)
|
||||
batch_result = await self._process_marketplace_batch(
|
||||
batch_df, marketplace, shop_name, db, i // batch_size + 1
|
||||
)
|
||||
|
||||
imported += batch_imported
|
||||
updated += batch_updated
|
||||
errors += batch_errors
|
||||
imported += batch_result['imported']
|
||||
updated += batch_result['updated']
|
||||
errors += batch_result['errors']
|
||||
|
||||
# Commit batch
|
||||
try:
|
||||
db.commit()
|
||||
logger.info(
|
||||
f"Processed batch {i // batch_size + 1}: +{batch_imported} imported, +{batch_updated} updated, +{batch_errors} errors")
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.error(f"Batch commit failed: {e}")
|
||||
errors += len(batch_df)
|
||||
logger.info(f"Processed batch {i // batch_size + 1}: {batch_result}")
|
||||
|
||||
return {
|
||||
"imported": imported,
|
||||
"updated": updated,
|
||||
"errors": errors,
|
||||
"total_processed": imported + updated + errors
|
||||
'total_processed': imported + updated + errors,
|
||||
'imported': imported,
|
||||
'updated': updated,
|
||||
'errors': errors,
|
||||
'marketplace': marketplace,
|
||||
'shop_name': shop_name
|
||||
}
|
||||
|
||||
def _process_batch(self, df_batch: pd.DataFrame, db: Session) -> tuple:
|
||||
"""Process a single batch of rows"""
|
||||
async def _process_marketplace_batch(
|
||||
self,
|
||||
batch_df: pd.DataFrame,
|
||||
marketplace: str,
|
||||
shop_name: str,
|
||||
db: Session,
|
||||
batch_num: int
|
||||
) -> Dict[str, int]:
|
||||
"""Process a batch of CSV rows with marketplace information"""
|
||||
imported = 0
|
||||
updated = 0
|
||||
errors = 0
|
||||
|
||||
for _, row in df_batch.iterrows():
|
||||
logger.info(f"Processing batch {batch_num} with {len(batch_df)} rows for {marketplace} -> {shop_name}")
|
||||
|
||||
for index, row in batch_df.iterrows():
|
||||
try:
|
||||
product_data = self.process_row(row.to_dict())
|
||||
# Convert row to dictionary and clean up
|
||||
product_data = self._clean_row_data(row.to_dict())
|
||||
|
||||
# Add marketplace and shop information
|
||||
product_data['marketplace'] = marketplace
|
||||
product_data['shop_name'] = shop_name
|
||||
|
||||
# Validate required fields
|
||||
product_id = product_data.get('product_id')
|
||||
title = product_data.get('title')
|
||||
|
||||
if not product_id or not title:
|
||||
if not product_data.get('product_id'):
|
||||
logger.warning(f"Row {index}: Missing product_id, skipping")
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
# Check for existing product
|
||||
if not product_data.get('title'):
|
||||
logger.warning(f"Row {index}: Missing title, skipping")
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
# Check if product exists
|
||||
existing_product = db.query(Product).filter(
|
||||
Product.product_id == product_id
|
||||
Product.product_id == product_data['product_id']
|
||||
).first()
|
||||
|
||||
if existing_product:
|
||||
# Update existing
|
||||
# Update existing product
|
||||
for key, value in product_data.items():
|
||||
if key not in ['id', 'created_at'] and hasattr(existing_product, key):
|
||||
setattr(existing_product, key, value)
|
||||
existing_product.updated_at = datetime.utcnow()
|
||||
updated += 1
|
||||
logger.debug(f"Updated product {product_data['product_id']} for {marketplace} and shop {shop_name}")
|
||||
else:
|
||||
# Create new
|
||||
# Create new product
|
||||
filtered_data = {k: v for k, v in product_data.items()
|
||||
if k not in ['id', 'created_at', 'updated_at'] and hasattr(Product, k)}
|
||||
new_product = Product(**filtered_data)
|
||||
db.add(new_product)
|
||||
imported += 1
|
||||
logger.debug(f"Imported new product {product_data['product_id']} for {marketplace} and shop "
|
||||
f"{shop_name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing row: {e}")
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
return imported, updated, errors
|
||||
# Commit the batch
|
||||
try:
|
||||
db.commit()
|
||||
logger.info(f"Batch {batch_num} committed successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to commit batch {batch_num}: {e}")
|
||||
db.rollback()
|
||||
# Count all rows in this batch as errors
|
||||
errors = len(batch_df)
|
||||
imported = 0
|
||||
updated = 0
|
||||
|
||||
return {
|
||||
'imported': imported,
|
||||
'updated': updated,
|
||||
'errors': errors
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user