feat: update CSV import to support multi-language translations
- Add language parameter to import endpoints and background tasks - Extract translation fields (title, description, short_description) - Create/update MarketplaceProductTranslation records during import - Add MarketplaceProductTranslationSchema for API responses - Map product_type column to product_type_raw to avoid enum conflict - Parse prices to numeric format (price_numeric, sale_price_numeric) - Update marketplace product service for translation-based lookups - Update CSV export to retrieve titles from translations 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,13 +1,14 @@
|
||||
# app/utils/csv_processor.py
|
||||
"""CSV processor utilities ....
|
||||
"""CSV processor utilities for marketplace product imports.
|
||||
|
||||
This module provides classes and functions for:
|
||||
- ....
|
||||
- ....
|
||||
- ....
|
||||
- Downloading and parsing CSV files with multiple encoding support
|
||||
- Normalizing column names to match database schema
|
||||
- Creating/updating MarketplaceProduct records with translations
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from datetime import UTC, datetime
|
||||
from io import StringIO
|
||||
from typing import Any
|
||||
@@ -18,6 +19,7 @@ from sqlalchemy import literal
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from models.database.marketplace_product import MarketplaceProduct
|
||||
from models.database.marketplace_product_translation import MarketplaceProductTranslation
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -38,6 +40,9 @@ class CSVProcessor:
|
||||
{"sep": "\t", "engine": "python"},
|
||||
]
|
||||
|
||||
# Fields that belong to the translation table, not MarketplaceProduct
|
||||
TRANSLATION_FIELDS = {"title", "description", "short_description"}
|
||||
|
||||
COLUMN_MAPPING = {
|
||||
# Standard variations
|
||||
"id": "marketplace_product_id",
|
||||
@@ -72,7 +77,8 @@ class CSVProcessor:
|
||||
"g:size_system": "size_system",
|
||||
"g:item_group_id": "item_group_id",
|
||||
"g:google_product_category": "google_product_category",
|
||||
"g:product_type": "product_type",
|
||||
"g:product_type": "product_type_raw", # Maps to product_type_raw (renamed)
|
||||
"product_type": "product_type_raw", # Also map plain product_type
|
||||
"g:custom_label_0": "custom_label_0",
|
||||
"g:custom_label_1": "custom_label_1",
|
||||
"g:custom_label_2": "custom_label_2",
|
||||
@@ -145,6 +151,21 @@ class CSVProcessor:
|
||||
logger.info(f"Normalized columns: {list(df.columns)}")
|
||||
return df
|
||||
|
||||
def _parse_price_to_numeric(self, price_str: str | None) -> float | None:
|
||||
"""Parse price string like '19.99 EUR' to float."""
|
||||
if not price_str:
|
||||
return None
|
||||
|
||||
# Extract numeric value
|
||||
numbers = re.findall(r"[\d.,]+", str(price_str))
|
||||
if numbers:
|
||||
num_str = numbers[0].replace(",", ".")
|
||||
try:
|
||||
return float(num_str)
|
||||
except ValueError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _clean_row_data(self, row_data: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Process a single row with data normalization."""
|
||||
# Handle NaN values
|
||||
@@ -161,15 +182,22 @@ class CSVProcessor:
|
||||
parsed_price, currency = self.price_processor.parse_price_currency(
|
||||
processed_data["price"]
|
||||
)
|
||||
# Store both raw price string and numeric value
|
||||
raw_price = processed_data["price"]
|
||||
processed_data["price"] = parsed_price
|
||||
processed_data["price_numeric"] = self._parse_price_to_numeric(raw_price)
|
||||
processed_data["currency"] = currency
|
||||
|
||||
# Process sale_price
|
||||
if processed_data.get("sale_price"):
|
||||
raw_sale_price = processed_data["sale_price"]
|
||||
parsed_sale_price, _ = self.price_processor.parse_price_currency(
|
||||
processed_data["sale_price"]
|
||||
)
|
||||
processed_data["sale_price"] = parsed_sale_price
|
||||
processed_data["sale_price_numeric"] = self._parse_price_to_numeric(
|
||||
raw_sale_price
|
||||
)
|
||||
|
||||
# Clean MPN (remove .0 endings)
|
||||
if processed_data.get("mpn"):
|
||||
@@ -186,8 +214,72 @@ class CSVProcessor:
|
||||
|
||||
return processed_data
|
||||
|
||||
def _extract_translation_data(
|
||||
self, product_data: dict[str, Any]
|
||||
) -> dict[str, Any]:
|
||||
"""Extract translation fields from product data.
|
||||
|
||||
Returns a dict with title, description, etc. that belong
|
||||
in the translation table. Removes these fields from product_data in place.
|
||||
"""
|
||||
translation_data = {}
|
||||
for field in self.TRANSLATION_FIELDS:
|
||||
if field in product_data:
|
||||
translation_data[field] = product_data.pop(field)
|
||||
return translation_data
|
||||
|
||||
def _create_or_update_translation(
|
||||
self,
|
||||
db: Session,
|
||||
marketplace_product: MarketplaceProduct,
|
||||
translation_data: dict[str, Any],
|
||||
language: str = "en",
|
||||
source_file: str | None = None,
|
||||
) -> None:
|
||||
"""Create or update a translation record for the marketplace product."""
|
||||
if not translation_data.get("title"):
|
||||
# Title is required for translations
|
||||
return
|
||||
|
||||
# Check if translation exists
|
||||
existing_translation = (
|
||||
db.query(MarketplaceProductTranslation)
|
||||
.filter(
|
||||
MarketplaceProductTranslation.marketplace_product_id
|
||||
== marketplace_product.id,
|
||||
MarketplaceProductTranslation.language == language,
|
||||
)
|
||||
.first()
|
||||
)
|
||||
|
||||
if existing_translation:
|
||||
# Update existing translation
|
||||
for key, value in translation_data.items():
|
||||
if hasattr(existing_translation, key):
|
||||
setattr(existing_translation, key, value)
|
||||
existing_translation.updated_at = datetime.now(UTC)
|
||||
if source_file:
|
||||
existing_translation.source_file = source_file
|
||||
else:
|
||||
# Create new translation
|
||||
new_translation = MarketplaceProductTranslation(
|
||||
marketplace_product_id=marketplace_product.id,
|
||||
language=language,
|
||||
title=translation_data.get("title"),
|
||||
description=translation_data.get("description"),
|
||||
short_description=translation_data.get("short_description"),
|
||||
source_file=source_file,
|
||||
)
|
||||
db.add(new_translation)
|
||||
|
||||
async def process_marketplace_csv_from_url(
|
||||
self, url: str, marketplace: str, vendor_name: str, batch_size: int, db: Session
|
||||
self,
|
||||
url: str,
|
||||
marketplace: str,
|
||||
vendor_name: str,
|
||||
batch_size: int,
|
||||
db: Session,
|
||||
language: str = "en",
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Process CSV from URL with marketplace and vendor information.
|
||||
@@ -198,12 +290,13 @@ class CSVProcessor:
|
||||
vendor_name: Name of the vendor
|
||||
batch_size: Number of rows to process in each batch
|
||||
db: Database session
|
||||
language: Language code for translations (default: 'en')
|
||||
|
||||
Returns:
|
||||
Dictionary with processing results
|
||||
"""
|
||||
logger.info(
|
||||
f"Starting marketplace CSV import from {url} for {marketplace} -> {vendor_name}"
|
||||
f"Starting marketplace CSV import from {url} for {marketplace} -> {vendor_name} (lang={language})"
|
||||
)
|
||||
# Download and parse CSV
|
||||
csv_content = self.download_csv(url)
|
||||
@@ -216,11 +309,20 @@ class CSVProcessor:
|
||||
updated = 0
|
||||
errors = 0
|
||||
|
||||
# Extract source file name from URL
|
||||
source_file = url.split("/")[-1] if "/" in url else url
|
||||
|
||||
# Process in batches
|
||||
for i in range(0, len(df), batch_size):
|
||||
batch_df = df.iloc[i : i + batch_size]
|
||||
batch_result = await self._process_marketplace_batch(
|
||||
batch_df, marketplace, vendor_name, db, i // batch_size + 1
|
||||
batch_df,
|
||||
marketplace,
|
||||
vendor_name,
|
||||
db,
|
||||
i // batch_size + 1,
|
||||
language=language,
|
||||
source_file=source_file,
|
||||
)
|
||||
|
||||
imported += batch_result["imported"]
|
||||
@@ -235,7 +337,8 @@ class CSVProcessor:
|
||||
"updated": updated,
|
||||
"errors": errors,
|
||||
"marketplace": marketplace,
|
||||
"name": vendor_name,
|
||||
"vendor_name": vendor_name,
|
||||
"language": language,
|
||||
}
|
||||
|
||||
async def _process_marketplace_batch(
|
||||
@@ -245,6 +348,8 @@ class CSVProcessor:
|
||||
vendor_name: str,
|
||||
db: Session,
|
||||
batch_num: int,
|
||||
language: str = "en",
|
||||
source_file: str | None = None,
|
||||
) -> dict[str, int]:
|
||||
"""Process a batch of CSV rows with marketplace information."""
|
||||
imported = 0
|
||||
@@ -261,9 +366,12 @@ class CSVProcessor:
|
||||
# Convert row to dictionary and clean up
|
||||
product_data = self._clean_row_data(row.to_dict())
|
||||
|
||||
# Extract translation fields BEFORE processing product
|
||||
translation_data = self._extract_translation_data(product_data)
|
||||
|
||||
# Add marketplace and vendor information
|
||||
product_data["marketplace"] = marketplace
|
||||
product_data["name"] = vendor_name
|
||||
product_data["vendor_name"] = vendor_name
|
||||
|
||||
# Validate required fields
|
||||
if not product_data.get("marketplace_product_id"):
|
||||
@@ -273,7 +381,8 @@ class CSVProcessor:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
if not product_data.get("title"):
|
||||
# Title is now required in translation_data
|
||||
if not translation_data.get("title"):
|
||||
logger.warning(f"Row {index}: Missing title, skipping")
|
||||
errors += 1
|
||||
continue
|
||||
@@ -289,20 +398,30 @@ class CSVProcessor:
|
||||
)
|
||||
|
||||
if existing_product:
|
||||
# Update existing product
|
||||
# Update existing product (only non-translation fields)
|
||||
for key, value in product_data.items():
|
||||
if key not in ["id", "created_at"] and hasattr(
|
||||
existing_product, key
|
||||
):
|
||||
setattr(existing_product, key, value)
|
||||
existing_product.updated_at = datetime.now(UTC)
|
||||
|
||||
# Update or create translation
|
||||
self._create_or_update_translation(
|
||||
db,
|
||||
existing_product,
|
||||
translation_data,
|
||||
language=language,
|
||||
source_file=source_file,
|
||||
)
|
||||
|
||||
updated += 1
|
||||
logger.debug(
|
||||
f"Updated product {product_data['marketplace_product_id']} for "
|
||||
f"{marketplace} and vendor {vendor_name}"
|
||||
)
|
||||
else:
|
||||
# Create new product
|
||||
# Create new product (filter to valid model fields)
|
||||
filtered_data = {
|
||||
k: v
|
||||
for k, v in product_data.items()
|
||||
@@ -311,6 +430,17 @@ class CSVProcessor:
|
||||
}
|
||||
new_product = MarketplaceProduct(**filtered_data)
|
||||
db.add(new_product)
|
||||
db.flush() # Get the ID for the translation
|
||||
|
||||
# Create translation for new product
|
||||
self._create_or_update_translation(
|
||||
db,
|
||||
new_product,
|
||||
translation_data,
|
||||
language=language,
|
||||
source_file=source_file,
|
||||
)
|
||||
|
||||
imported += 1
|
||||
logger.debug(
|
||||
f"Imported new product {product_data['marketplace_product_id']} "
|
||||
|
||||
Reference in New Issue
Block a user