feat: update CSV import to support multi-language translations

- Add language parameter to import endpoints and background tasks
- Extract translation fields (title, description, short_description)
- Create/update MarketplaceProductTranslation records during import
- Add MarketplaceProductTranslationSchema for API responses
- Map product_type column to product_type_raw to avoid enum conflict
- Parse prices to numeric format (price_numeric, sale_price_numeric)
- Update marketplace product service for translation-based lookups
- Update CSV export to retrieve titles from translations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-11 17:29:13 +01:00
parent 92a1c0249f
commit f2af3aae29
7 changed files with 535 additions and 103 deletions

View File

@@ -1,13 +1,14 @@
# app/utils/csv_processor.py
"""CSV processor utilities ....
"""CSV processor utilities for marketplace product imports.
This module provides classes and functions for:
- ....
- ....
- ....
- Downloading and parsing CSV files with multiple encoding support
- Normalizing column names to match database schema
- Creating/updating MarketplaceProduct records with translations
"""
import logging
import re
from datetime import UTC, datetime
from io import StringIO
from typing import Any
@@ -18,6 +19,7 @@ from sqlalchemy import literal
from sqlalchemy.orm import Session
from models.database.marketplace_product import MarketplaceProduct
from models.database.marketplace_product_translation import MarketplaceProductTranslation
logger = logging.getLogger(__name__)
@@ -38,6 +40,9 @@ class CSVProcessor:
{"sep": "\t", "engine": "python"},
]
# Fields that belong to the translation table, not MarketplaceProduct
TRANSLATION_FIELDS = {"title", "description", "short_description"}
COLUMN_MAPPING = {
# Standard variations
"id": "marketplace_product_id",
@@ -72,7 +77,8 @@ class CSVProcessor:
"g:size_system": "size_system",
"g:item_group_id": "item_group_id",
"g:google_product_category": "google_product_category",
"g:product_type": "product_type",
"g:product_type": "product_type_raw", # Maps to product_type_raw (renamed)
"product_type": "product_type_raw", # Also map plain product_type
"g:custom_label_0": "custom_label_0",
"g:custom_label_1": "custom_label_1",
"g:custom_label_2": "custom_label_2",
@@ -145,6 +151,21 @@ class CSVProcessor:
logger.info(f"Normalized columns: {list(df.columns)}")
return df
def _parse_price_to_numeric(self, price_str: str | None) -> float | None:
"""Parse price string like '19.99 EUR' to float."""
if not price_str:
return None
# Extract numeric value
numbers = re.findall(r"[\d.,]+", str(price_str))
if numbers:
num_str = numbers[0].replace(",", ".")
try:
return float(num_str)
except ValueError:
pass
return None
def _clean_row_data(self, row_data: dict[str, Any]) -> dict[str, Any]:
"""Process a single row with data normalization."""
# Handle NaN values
@@ -161,15 +182,22 @@ class CSVProcessor:
parsed_price, currency = self.price_processor.parse_price_currency(
processed_data["price"]
)
# Store both raw price string and numeric value
raw_price = processed_data["price"]
processed_data["price"] = parsed_price
processed_data["price_numeric"] = self._parse_price_to_numeric(raw_price)
processed_data["currency"] = currency
# Process sale_price
if processed_data.get("sale_price"):
raw_sale_price = processed_data["sale_price"]
parsed_sale_price, _ = self.price_processor.parse_price_currency(
processed_data["sale_price"]
)
processed_data["sale_price"] = parsed_sale_price
processed_data["sale_price_numeric"] = self._parse_price_to_numeric(
raw_sale_price
)
# Clean MPN (remove .0 endings)
if processed_data.get("mpn"):
@@ -186,8 +214,72 @@ class CSVProcessor:
return processed_data
def _extract_translation_data(
self, product_data: dict[str, Any]
) -> dict[str, Any]:
"""Extract translation fields from product data.
Returns a dict with title, description, etc. that belong
in the translation table. Removes these fields from product_data in place.
"""
translation_data = {}
for field in self.TRANSLATION_FIELDS:
if field in product_data:
translation_data[field] = product_data.pop(field)
return translation_data
def _create_or_update_translation(
self,
db: Session,
marketplace_product: MarketplaceProduct,
translation_data: dict[str, Any],
language: str = "en",
source_file: str | None = None,
) -> None:
"""Create or update a translation record for the marketplace product."""
if not translation_data.get("title"):
# Title is required for translations
return
# Check if translation exists
existing_translation = (
db.query(MarketplaceProductTranslation)
.filter(
MarketplaceProductTranslation.marketplace_product_id
== marketplace_product.id,
MarketplaceProductTranslation.language == language,
)
.first()
)
if existing_translation:
# Update existing translation
for key, value in translation_data.items():
if hasattr(existing_translation, key):
setattr(existing_translation, key, value)
existing_translation.updated_at = datetime.now(UTC)
if source_file:
existing_translation.source_file = source_file
else:
# Create new translation
new_translation = MarketplaceProductTranslation(
marketplace_product_id=marketplace_product.id,
language=language,
title=translation_data.get("title"),
description=translation_data.get("description"),
short_description=translation_data.get("short_description"),
source_file=source_file,
)
db.add(new_translation)
async def process_marketplace_csv_from_url(
self, url: str, marketplace: str, vendor_name: str, batch_size: int, db: Session
self,
url: str,
marketplace: str,
vendor_name: str,
batch_size: int,
db: Session,
language: str = "en",
) -> dict[str, Any]:
"""
Process CSV from URL with marketplace and vendor information.
@@ -198,12 +290,13 @@ class CSVProcessor:
vendor_name: Name of the vendor
batch_size: Number of rows to process in each batch
db: Database session
language: Language code for translations (default: 'en')
Returns:
Dictionary with processing results
"""
logger.info(
f"Starting marketplace CSV import from {url} for {marketplace} -> {vendor_name}"
f"Starting marketplace CSV import from {url} for {marketplace} -> {vendor_name} (lang={language})"
)
# Download and parse CSV
csv_content = self.download_csv(url)
@@ -216,11 +309,20 @@ class CSVProcessor:
updated = 0
errors = 0
# Extract source file name from URL
source_file = url.split("/")[-1] if "/" in url else url
# Process in batches
for i in range(0, len(df), batch_size):
batch_df = df.iloc[i : i + batch_size]
batch_result = await self._process_marketplace_batch(
batch_df, marketplace, vendor_name, db, i // batch_size + 1
batch_df,
marketplace,
vendor_name,
db,
i // batch_size + 1,
language=language,
source_file=source_file,
)
imported += batch_result["imported"]
@@ -235,7 +337,8 @@ class CSVProcessor:
"updated": updated,
"errors": errors,
"marketplace": marketplace,
"name": vendor_name,
"vendor_name": vendor_name,
"language": language,
}
async def _process_marketplace_batch(
@@ -245,6 +348,8 @@ class CSVProcessor:
vendor_name: str,
db: Session,
batch_num: int,
language: str = "en",
source_file: str | None = None,
) -> dict[str, int]:
"""Process a batch of CSV rows with marketplace information."""
imported = 0
@@ -261,9 +366,12 @@ class CSVProcessor:
# Convert row to dictionary and clean up
product_data = self._clean_row_data(row.to_dict())
# Extract translation fields BEFORE processing product
translation_data = self._extract_translation_data(product_data)
# Add marketplace and vendor information
product_data["marketplace"] = marketplace
product_data["name"] = vendor_name
product_data["vendor_name"] = vendor_name
# Validate required fields
if not product_data.get("marketplace_product_id"):
@@ -273,7 +381,8 @@ class CSVProcessor:
errors += 1
continue
if not product_data.get("title"):
# Title is now required in translation_data
if not translation_data.get("title"):
logger.warning(f"Row {index}: Missing title, skipping")
errors += 1
continue
@@ -289,20 +398,30 @@ class CSVProcessor:
)
if existing_product:
# Update existing product
# Update existing product (only non-translation fields)
for key, value in product_data.items():
if key not in ["id", "created_at"] and hasattr(
existing_product, key
):
setattr(existing_product, key, value)
existing_product.updated_at = datetime.now(UTC)
# Update or create translation
self._create_or_update_translation(
db,
existing_product,
translation_data,
language=language,
source_file=source_file,
)
updated += 1
logger.debug(
f"Updated product {product_data['marketplace_product_id']} for "
f"{marketplace} and vendor {vendor_name}"
)
else:
# Create new product
# Create new product (filter to valid model fields)
filtered_data = {
k: v
for k, v in product_data.items()
@@ -311,6 +430,17 @@ class CSVProcessor:
}
new_product = MarketplaceProduct(**filtered_data)
db.add(new_product)
db.flush() # Get the ID for the translation
# Create translation for new product
self._create_or_update_translation(
db,
new_product,
translation_data,
language=language,
source_file=source_file,
)
imported += 1
logger.debug(
f"Imported new product {product_data['marketplace_product_id']} "