feat: update CSV import to support multi-language translations

- Add language parameter to import endpoints and background tasks - Extract translation fields (title, description, short_description) - Create/update MarketplaceProductTranslation records during import - Add MarketplaceProductTranslationSchema for API responses - Map product_type column to product_type_raw to avoid enum conflict - Parse prices to numeric format (price_numeric, sale_price_numeric) - Update marketplace product service for translation-based lookups - Update CSV export to retrieve titles from translations 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 17:29:13 +01:00
parent 92a1c0249f
commit f2af3aae29
7 changed files with 535 additions and 103 deletions
--- a/app/utils/csv_processor.py
+++ b/app/utils/csv_processor.py
@@ -1,13 +1,14 @@
 # app/utils/csv_processor.py
-"""CSV processor utilities ....
+"""CSV processor utilities for marketplace product imports.

 This module provides classes and functions for:
- ....
- ....
- ....
+- Downloading and parsing CSV files with multiple encoding support
+- Normalizing column names to match database schema
+- Creating/updating MarketplaceProduct records with translations
 """

 import logging
+import re
 from datetime import UTC, datetime
 from io import StringIO
 from typing import Any
@@ -18,6 +19,7 @@ from sqlalchemy import literal
 from sqlalchemy.orm import Session

 from models.database.marketplace_product import MarketplaceProduct
+from models.database.marketplace_product_translation import MarketplaceProductTranslation

 logger = logging.getLogger(__name__)

@@ -38,6 +40,9 @@ class CSVProcessor:
        {"sep": "\t", "engine": "python"},
    ]

+    # Fields that belong to the translation table, not MarketplaceProduct
+    TRANSLATION_FIELDS = {"title", "description", "short_description"}
+
    COLUMN_MAPPING = {
        # Standard variations
        "id": "marketplace_product_id",
@@ -72,7 +77,8 @@ class CSVProcessor:
        "g:size_system": "size_system",
        "g:item_group_id": "item_group_id",
        "g:google_product_category": "google_product_category",
-        "g:product_type": "product_type",
+        "g:product_type": "product_type_raw",  # Maps to product_type_raw (renamed)
+        "product_type": "product_type_raw",  # Also map plain product_type
        "g:custom_label_0": "custom_label_0",
        "g:custom_label_1": "custom_label_1",
        "g:custom_label_2": "custom_label_2",
@@ -145,6 +151,21 @@ class CSVProcessor:
        logger.info(f"Normalized columns: {list(df.columns)}")
        return df

+    def _parse_price_to_numeric(self, price_str: str | None) -> float | None:
+        """Parse price string like '19.99 EUR' to float."""
+        if not price_str:
+            return None
+
+        # Extract numeric value
+        numbers = re.findall(r"[\d.,]+", str(price_str))
+        if numbers:
+            num_str = numbers[0].replace(",", ".")
+            try:
+                return float(num_str)
+            except ValueError:
+                pass
+        return None
+
    def _clean_row_data(self, row_data: dict[str, Any]) -> dict[str, Any]:
        """Process a single row with data normalization."""
        # Handle NaN values
@@ -161,15 +182,22 @@ class CSVProcessor:
            parsed_price, currency = self.price_processor.parse_price_currency(
                processed_data["price"]
            )
+            # Store both raw price string and numeric value
+            raw_price = processed_data["price"]
            processed_data["price"] = parsed_price
+            processed_data["price_numeric"] = self._parse_price_to_numeric(raw_price)
            processed_data["currency"] = currency

        # Process sale_price
        if processed_data.get("sale_price"):
+            raw_sale_price = processed_data["sale_price"]
            parsed_sale_price, _ = self.price_processor.parse_price_currency(
                processed_data["sale_price"]
            )
            processed_data["sale_price"] = parsed_sale_price
+            processed_data["sale_price_numeric"] = self._parse_price_to_numeric(
+                raw_sale_price
+            )

        # Clean MPN (remove .0 endings)
        if processed_data.get("mpn"):
@@ -186,8 +214,72 @@ class CSVProcessor:

        return processed_data

+    def _extract_translation_data(
+        self, product_data: dict[str, Any]
+    ) -> dict[str, Any]:
+        """Extract translation fields from product data.
+
+        Returns a dict with title, description, etc. that belong
+        in the translation table. Removes these fields from product_data in place.
+        """
+        translation_data = {}
+        for field in self.TRANSLATION_FIELDS:
+            if field in product_data:
+                translation_data[field] = product_data.pop(field)
+        return translation_data
+
+    def _create_or_update_translation(
+        self,
+        db: Session,
+        marketplace_product: MarketplaceProduct,
+        translation_data: dict[str, Any],
+        language: str = "en",
+        source_file: str | None = None,
+    ) -> None:
+        """Create or update a translation record for the marketplace product."""
+        if not translation_data.get("title"):
+            # Title is required for translations
+            return
+
+        # Check if translation exists
+        existing_translation = (
+            db.query(MarketplaceProductTranslation)
+            .filter(
+                MarketplaceProductTranslation.marketplace_product_id
+                == marketplace_product.id,
+                MarketplaceProductTranslation.language == language,
+            )
+            .first()
+        )
+
+        if existing_translation:
+            # Update existing translation
+            for key, value in translation_data.items():
+                if hasattr(existing_translation, key):
+                    setattr(existing_translation, key, value)
+            existing_translation.updated_at = datetime.now(UTC)
+            if source_file:
+                existing_translation.source_file = source_file
+        else:
+            # Create new translation
+            new_translation = MarketplaceProductTranslation(
+                marketplace_product_id=marketplace_product.id,
+                language=language,
+                title=translation_data.get("title"),
+                description=translation_data.get("description"),
+                short_description=translation_data.get("short_description"),
+                source_file=source_file,
+            )
+            db.add(new_translation)
+
    async def process_marketplace_csv_from_url(
-        self, url: str, marketplace: str, vendor_name: str, batch_size: int, db: Session
+        self,
+        url: str,
+        marketplace: str,
+        vendor_name: str,
+        batch_size: int,
+        db: Session,
+        language: str = "en",
    ) -> dict[str, Any]:
        """
        Process CSV from URL with marketplace and vendor information.
@@ -198,12 +290,13 @@ class CSVProcessor:
            vendor_name: Name of the vendor
            batch_size: Number of rows to process in each batch
            db: Database session
+            language: Language code for translations (default: 'en')

        Returns:
            Dictionary with processing results
        """
        logger.info(
-            f"Starting marketplace CSV import from {url} for {marketplace} -> {vendor_name}"
+            f"Starting marketplace CSV import from {url} for {marketplace} -> {vendor_name} (lang={language})"
        )
        # Download and parse CSV
        csv_content = self.download_csv(url)
@@ -216,11 +309,20 @@ class CSVProcessor:
        updated = 0
        errors = 0

+        # Extract source file name from URL
+        source_file = url.split("/")[-1] if "/" in url else url
+
        # Process in batches
        for i in range(0, len(df), batch_size):
            batch_df = df.iloc[i : i + batch_size]
            batch_result = await self._process_marketplace_batch(
-                batch_df, marketplace, vendor_name, db, i // batch_size + 1
+                batch_df,
+                marketplace,
+                vendor_name,
+                db,
+                i // batch_size + 1,
+                language=language,
+                source_file=source_file,
            )

            imported += batch_result["imported"]
@@ -235,7 +337,8 @@ class CSVProcessor:
            "updated": updated,
            "errors": errors,
            "marketplace": marketplace,
-            "name": vendor_name,
+            "vendor_name": vendor_name,
+            "language": language,
        }

    async def _process_marketplace_batch(
@@ -245,6 +348,8 @@ class CSVProcessor:
        vendor_name: str,
        db: Session,
        batch_num: int,
+        language: str = "en",
+        source_file: str | None = None,
    ) -> dict[str, int]:
        """Process a batch of CSV rows with marketplace information."""
        imported = 0
@@ -261,9 +366,12 @@ class CSVProcessor:
                # Convert row to dictionary and clean up
                product_data = self._clean_row_data(row.to_dict())

+                # Extract translation fields BEFORE processing product
+                translation_data = self._extract_translation_data(product_data)
+
                # Add marketplace and vendor information
                product_data["marketplace"] = marketplace
-                product_data["name"] = vendor_name
+                product_data["vendor_name"] = vendor_name

                # Validate required fields
                if not product_data.get("marketplace_product_id"):
@@ -273,7 +381,8 @@ class CSVProcessor:
                    errors += 1
                    continue

-                if not product_data.get("title"):
+                # Title is now required in translation_data
+                if not translation_data.get("title"):
                    logger.warning(f"Row {index}: Missing title, skipping")
                    errors += 1
                    continue
@@ -289,20 +398,30 @@ class CSVProcessor:
                )

                if existing_product:
-                    # Update existing product
+                    # Update existing product (only non-translation fields)
                    for key, value in product_data.items():
                        if key not in ["id", "created_at"] and hasattr(
                            existing_product, key
                        ):
                            setattr(existing_product, key, value)
                    existing_product.updated_at = datetime.now(UTC)
+
+                    # Update or create translation
+                    self._create_or_update_translation(
+                        db,
+                        existing_product,
+                        translation_data,
+                        language=language,
+                        source_file=source_file,
+                    )
+
                    updated += 1
                    logger.debug(
                        f"Updated product {product_data['marketplace_product_id']} for "
                        f"{marketplace} and vendor {vendor_name}"
                    )
                else:
-                    # Create new product
+                    # Create new product (filter to valid model fields)
                    filtered_data = {
                        k: v
                        for k, v in product_data.items()
@@ -311,6 +430,17 @@ class CSVProcessor:
                    }
                    new_product = MarketplaceProduct(**filtered_data)
                    db.add(new_product)
+                    db.flush()  # Get the ID for the translation
+
+                    # Create translation for new product
+                    self._create_or_update_translation(
+                        db,
+                        new_product,
+                        translation_data,
+                        language=language,
+                        source_file=source_file,
+                    )
+
                    imported += 1
                    logger.debug(
                        f"Imported new product {product_data['marketplace_product_id']} "