Added marketplace support

2025-09-05 22:14:52 +02:00
parent 9dd177bddc
commit 4fb67e594d
6 changed files with 1307 additions and 117 deletions
--- a/utils/csv_processor.py
+++ b/utils/csv_processor.py
@@ -134,7 +134,7 @@ class CSVProcessor:
        logger.info(f"Normalized columns: {list(df.columns)}")
        return df

-    def process_row(self, row_data: Dict[str, Any]) -> Dict[str, Any]:
+    def _clean_row_data(self, row_data: Dict[str, Any]) -> Dict[str, Any]:
        """Process a single row with data normalization"""
        # Handle NaN values
        processed_data = {k: (v if pd.notna(v) else None) for k, v in row_data.items()}
@@ -169,14 +169,35 @@ class CSVProcessor:

        return processed_data

-    async def process_csv_from_url(self, url: str, batch_size: int, db: Session) -> Dict[str, int]:
-        """Process CSV import with batching"""
+    async def process_marketplace_csv_from_url(
+            self,
+            url: str,
+            marketplace: str,
+            shop_name: str,
+            batch_size: int,
+            db: Session
+    ) -> Dict[str, Any]:
+        """
+        Process CSV from URL with marketplace and shop information
+
+        Args:
+            url: URL to the CSV file
+            marketplace: Name of the marketplace (e.g., 'Letzshop', 'Amazon')
+            shop_name: Name of the shop
+            batch_size: Number of rows to process in each batch
+            db: Database session
+
+        Returns:
+            Dictionary with processing results
+        """
+
+        logger.info(f"Starting marketplace CSV import from {url} for {marketplace} -> {shop_name}")
        # Download and parse CSV
        csv_content = self.download_csv(url)
        df = self.parse_csv(csv_content)
        df = self.normalize_columns(df)

-        logger.info(f"Processing CSV with {len(df)} rows")
+        logger.info(f"Processing CSV with {len(df)} rows and {len(df.columns)} columns")

        imported = 0
        updated = 0
@@ -185,69 +206,102 @@ class CSVProcessor:
        # Process in batches
        for i in range(0, len(df), batch_size):
            batch_df = df.iloc[i:i + batch_size]
-            batch_imported, batch_updated, batch_errors = self._process_batch(batch_df, db)
+            batch_result = await self._process_marketplace_batch(
+                batch_df, marketplace, shop_name, db, i // batch_size + 1
+            )

-            imported += batch_imported
-            updated += batch_updated
-            errors += batch_errors
+            imported += batch_result['imported']
+            updated += batch_result['updated']
+            errors += batch_result['errors']

-            # Commit batch
-            try:
-                db.commit()
-                logger.info(
-                    f"Processed batch {i // batch_size + 1}: +{batch_imported} imported, +{batch_updated} updated, +{batch_errors} errors")
-            except Exception as e:
-                db.rollback()
-                logger.error(f"Batch commit failed: {e}")
-                errors += len(batch_df)
+            logger.info(f"Processed batch {i // batch_size + 1}: {batch_result}")

        return {
-            "imported": imported,
-            "updated": updated,
-            "errors": errors,
-            "total_processed": imported + updated + errors
+            'total_processed': imported + updated + errors,
+            'imported': imported,
+            'updated': updated,
+            'errors': errors,
+            'marketplace': marketplace,
+            'shop_name': shop_name
        }

-    def _process_batch(self, df_batch: pd.DataFrame, db: Session) -> tuple:
-        """Process a single batch of rows"""
+    async def _process_marketplace_batch(
+        self,
+        batch_df: pd.DataFrame,
+        marketplace: str,
+        shop_name: str,
+        db: Session,
+        batch_num: int
+    ) -> Dict[str, int]:
+        """Process a batch of CSV rows with marketplace information"""
        imported = 0
        updated = 0
        errors = 0

-        for _, row in df_batch.iterrows():
+        logger.info(f"Processing batch {batch_num} with {len(batch_df)} rows for {marketplace} -> {shop_name}")
+
+        for index, row in batch_df.iterrows():
            try:
-                product_data = self.process_row(row.to_dict())
+                # Convert row to dictionary and clean up
+                product_data = self._clean_row_data(row.to_dict())
+
+                # Add marketplace and shop information
+                product_data['marketplace'] = marketplace
+                product_data['shop_name'] = shop_name

                # Validate required fields
-                product_id = product_data.get('product_id')
-                title = product_data.get('title')
-
-                if not product_id or not title:
+                if not product_data.get('product_id'):
+                    logger.warning(f"Row {index}: Missing product_id, skipping")
                    errors += 1
                    continue

-                # Check for existing product
+                if not product_data.get('title'):
+                    logger.warning(f"Row {index}: Missing title, skipping")
+                    errors += 1
+                    continue
+
+                # Check if product exists
                existing_product = db.query(Product).filter(
-                    Product.product_id == product_id
+                    Product.product_id == product_data['product_id']
                ).first()

                if existing_product:
-                    # Update existing
+                    # Update existing product
                    for key, value in product_data.items():
                        if key not in ['id', 'created_at'] and hasattr(existing_product, key):
                            setattr(existing_product, key, value)
                    existing_product.updated_at = datetime.utcnow()
                    updated += 1
+                    logger.debug(f"Updated product {product_data['product_id']} for {marketplace} and shop {shop_name}")
                else:
-                    # Create new
+                    # Create new product
                    filtered_data = {k: v for k, v in product_data.items()
                                     if k not in ['id', 'created_at', 'updated_at'] and hasattr(Product, k)}
                    new_product = Product(**filtered_data)
                    db.add(new_product)
                    imported += 1
+                    logger.debug(f"Imported new product {product_data['product_id']} for {marketplace} and shop "
+                                 f"{shop_name}")

            except Exception as e:
                logger.error(f"Error processing row: {e}")
                errors += 1
+                continue

-        return imported, updated, errors
+        # Commit the batch
+        try:
+            db.commit()
+            logger.info(f"Batch {batch_num} committed successfully")
+        except Exception as e:
+            logger.error(f"Failed to commit batch {batch_num}: {e}")
+            db.rollback()
+            # Count all rows in this batch as errors
+            errors = len(batch_df)
+            imported = 0
+            updated = 0
+
+        return {
+            'imported': imported,
+            'updated': updated,
+            'errors': errors
+        }