feat: update CSV import to support multi-language translations

- Add language parameter to import endpoints and background tasks
- Extract translation fields (title, description, short_description)
- Create/update MarketplaceProductTranslation records during import
- Add MarketplaceProductTranslationSchema for API responses
- Map product_type column to product_type_raw to avoid enum conflict
- Parse prices to numeric format (price_numeric, sale_price_numeric)
- Update marketplace product service for translation-based lookups
- Update CSV export to retrieve titles from translations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-11 17:29:13 +01:00
parent 92a1c0249f
commit f2af3aae29
7 changed files with 535 additions and 103 deletions

View File

@@ -22,6 +22,7 @@ from models.schema.marketplace_import_job import (
MarketplaceImportJobRequest,
MarketplaceImportJobResponse,
)
from models.schema.stats import ImportStatsResponse
router = APIRouter(prefix="/marketplace-import-jobs")
logger = logging.getLogger(__name__)
@@ -68,6 +69,9 @@ async def create_marketplace_import_job(
Admins can trigger imports for any vendor by specifying vendor_id.
The import is processed asynchronously in the background.
The `language` parameter specifies the language code for product
translations (e.g., 'en', 'fr', 'de'). Default is 'en'.
"""
vendor = vendor_service.get_vendor_by_id(db, request.vendor_id)
@@ -75,6 +79,7 @@ async def create_marketplace_import_job(
source_url=request.source_url,
marketplace=request.marketplace,
batch_size=request.batch_size,
language=request.language,
)
job = marketplace_import_job_service.create_import_job(
@@ -87,7 +92,7 @@ async def create_marketplace_import_job(
logger.info(
f"Admin {current_admin.username} created import job {job.id} "
f"for vendor {vendor.vendor_code}"
f"for vendor {vendor.vendor_code} (language={request.language})"
)
background_tasks.add_task(
@@ -97,19 +102,21 @@ async def create_marketplace_import_job(
request.marketplace,
vendor.id,
request.batch_size or 1000,
request.language, # Pass language to background task
)
return marketplace_import_job_service.convert_to_response_model(job)
# NOTE: /stats must be defined BEFORE /{job_id} to avoid route conflicts
@router.get("/stats")
@router.get("/stats", response_model=ImportStatsResponse)
def get_import_statistics(
db: Session = Depends(get_db),
current_admin: User = Depends(get_current_admin_api),
):
"""Get marketplace import statistics (Admin only)."""
return stats_service.get_import_statistics(db)
stats = stats_service.get_import_statistics(db)
return ImportStatsResponse(**stats)
@router.get("/{job_id}", response_model=AdminMarketplaceImportJobResponse)

View File

@@ -35,12 +35,19 @@ async def import_products_from_marketplace(
current_user: User = Depends(get_current_vendor_api),
db: Session = Depends(get_db),
):
"""Import products from marketplace CSV with background processing (Protected)."""
"""Import products from marketplace CSV with background processing (Protected).
The `language` parameter specifies the language code for product
translations (e.g., 'en', 'fr', 'de'). Default is 'en'.
For multi-language imports, call this endpoint multiple times with
different language codes and CSV files containing translations.
"""
vendor = vendor_service.get_vendor_by_id(db, current_user.token_vendor_id)
logger.info(
f"Starting marketplace import: {request.marketplace} for vendor {vendor.vendor_code} "
f"by user {current_user.username}"
f"by user {current_user.username} (language={request.language})"
)
# Create import job (vendor comes from token)
@@ -49,7 +56,7 @@ async def import_products_from_marketplace(
)
db.commit()
# Process in background
# Process in background with language parameter
background_tasks.add_task(
process_marketplace_import,
import_job.id,
@@ -57,6 +64,7 @@ async def import_products_from_marketplace(
request.marketplace,
vendor.id,
request.batch_size or 1000,
request.language, # Pass language to background task
)
return MarketplaceImportJobResponse(
@@ -67,6 +75,7 @@ async def import_products_from_marketplace(
vendor_code=vendor.vendor_code,
vendor_name=vendor.name,
source_url=request.source_url,
language=request.language,
message=f"Marketplace import started from {request.marketplace}. "
f"Check status with /import-status/{import_job.id}",
imported=0,

View File

@@ -7,6 +7,9 @@ This module provides classes and functions for:
- Advanced product filtering and search
- Inventory information integration
- CSV export functionality
Note: Title and description are now stored in MarketplaceProductTranslation table.
Use get_title(language) and get_description(language) methods on the model.
"""
import csv
@@ -15,8 +18,9 @@ from collections.abc import Generator
from datetime import UTC, datetime
from io import StringIO
from sqlalchemy import or_
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session
from sqlalchemy.orm import Session, joinedload
from app.exceptions import (
InvalidMarketplaceProductDataException,
@@ -28,6 +32,7 @@ from app.exceptions import (
from app.utils.data_processing import GTINProcessor, PriceProcessor
from models.database.inventory import Inventory
from models.database.marketplace_product import MarketplaceProduct
from models.database.marketplace_product_translation import MarketplaceProductTranslation
from models.schema.inventory import InventoryLocationResponse, InventorySummaryResponse
from models.schema.marketplace_product import (
MarketplaceProductCreate,
@@ -46,9 +51,25 @@ class MarketplaceProductService:
self.price_processor = PriceProcessor()
def create_product(
self, db: Session, product_data: MarketplaceProductCreate
self,
db: Session,
product_data: MarketplaceProductCreate,
title: str | None = None,
description: str | None = None,
language: str = "en",
) -> MarketplaceProduct:
"""Create a new product with validation."""
"""Create a new product with validation.
Args:
db: Database session
product_data: Product data from schema
title: Product title (stored in translations table)
description: Product description (stored in translations table)
language: Language code for translation (default: 'en')
Returns:
Created MarketplaceProduct instance
"""
try:
# Process and validate GTIN if provided
if product_data.gtin:
@@ -85,13 +106,26 @@ class MarketplaceProductService:
"MarketplaceProduct ID is required", field="marketplace_product_id"
)
if not product_data.title or not product_data.title.strip():
raise MarketplaceProductValidationException(
"MarketplaceProduct title is required", field="title"
)
# Create the product (without title/description - those go in translations)
product_dict = product_data.model_dump()
# Remove any title/description if present in schema (for backwards compatibility)
product_dict.pop("title", None)
product_dict.pop("description", None)
db_product = MarketplaceProduct(**product_data.model_dump())
db_product = MarketplaceProduct(**product_dict)
db.add(db_product)
db.flush() # Get the ID
# Create translation if title is provided
if title and title.strip():
translation = MarketplaceProductTranslation(
marketplace_product_id=db_product.id,
language=language,
title=title.strip(),
description=description.strip() if description else None,
)
db.add(translation)
db.flush()
db.refresh(db_product)
@@ -123,6 +157,7 @@ class MarketplaceProductService:
try:
return (
db.query(MarketplaceProduct)
.options(joinedload(MarketplaceProduct.translations))
.filter(
MarketplaceProduct.marketplace_product_id == marketplace_product_id
)
@@ -164,6 +199,7 @@ class MarketplaceProductService:
marketplace: str | None = None,
vendor_name: str | None = None,
search: str | None = None,
language: str = "en",
) -> tuple[list[MarketplaceProduct], int]:
"""
Get products with filtering and pagination.
@@ -177,13 +213,16 @@ class MarketplaceProductService:
availability: Availability filter
marketplace: Marketplace filter
vendor_name: Vendor name filter
search: Search term
search: Search term (searches in translations too)
language: Language for search (default: 'en')
Returns:
Tuple of (products_list, total_count)
"""
try:
query = db.query(MarketplaceProduct)
query = db.query(MarketplaceProduct).options(
joinedload(MarketplaceProduct.translations)
)
# Apply filters
if brand:
@@ -203,14 +242,22 @@ class MarketplaceProductService:
MarketplaceProduct.vendor_name.ilike(f"%{vendor_name}%")
)
if search:
# Search in title, description, marketplace, and name
# Search in marketplace, vendor_name, brand, and translations
search_term = f"%{search}%"
query = query.filter(
(MarketplaceProduct.title.ilike(search_term))
| (MarketplaceProduct.description.ilike(search_term))
| (MarketplaceProduct.marketplace.ilike(search_term))
| (MarketplaceProduct.vendor_name.ilike(search_term))
# Join with translations for title/description search
query = query.outerjoin(MarketplaceProductTranslation).filter(
or_(
MarketplaceProduct.marketplace.ilike(search_term),
MarketplaceProduct.vendor_name.ilike(search_term),
MarketplaceProduct.brand.ilike(search_term),
MarketplaceProduct.gtin.ilike(search_term),
MarketplaceProduct.marketplace_product_id.ilike(search_term),
MarketplaceProductTranslation.title.ilike(search_term),
MarketplaceProductTranslation.description.ilike(search_term),
)
)
# Remove duplicates from join
query = query.distinct()
total = query.count()
products = query.offset(skip).limit(limit).all()
@@ -226,14 +273,33 @@ class MarketplaceProductService:
db: Session,
marketplace_product_id: str,
product_update: MarketplaceProductUpdate,
title: str | None = None,
description: str | None = None,
language: str = "en",
) -> MarketplaceProduct:
"""Update product with validation."""
"""Update product with validation.
Args:
db: Database session
marketplace_product_id: ID of product to update
product_update: Product update data from schema
title: Updated title (stored in translations table)
description: Updated description (stored in translations table)
language: Language code for translation (default: 'en')
Returns:
Updated MarketplaceProduct instance
"""
try:
product = self.get_product_by_id_or_raise(db, marketplace_product_id)
# Update fields
update_data = product_update.model_dump(exclude_unset=True)
# Remove title/description from update data (handled separately)
update_data.pop("title", None)
update_data.pop("description", None)
# Validate GTIN if being updated
if "gtin" in update_data and update_data["gtin"]:
normalized_gtin = self.gtin_processor.normalize(update_data["gtin"])
@@ -256,18 +322,19 @@ class MarketplaceProductService:
# Convert ValueError to domain-specific exception
raise InvalidMarketplaceProductDataException(str(e), field="price")
# Validate required fields if being updated
if "title" in update_data and (
not update_data["title"] or not update_data["title"].strip()
):
raise MarketplaceProductValidationException(
"MarketplaceProduct title cannot be empty", field="title"
)
# Apply updates to product
for key, value in update_data.items():
setattr(product, key, value)
if hasattr(product, key):
setattr(product, key, value)
product.updated_at = datetime.now(UTC)
# Update or create translation if title/description provided
if title is not None or description is not None:
self._update_or_create_translation(
db, product, title, description, language
)
db.flush()
db.refresh(product)
@@ -284,6 +351,41 @@ class MarketplaceProductService:
logger.error(f"Error updating product {marketplace_product_id}: {str(e)}")
raise ValidationException("Failed to update product")
def _update_or_create_translation(
self,
db: Session,
product: MarketplaceProduct,
title: str | None,
description: str | None,
language: str,
) -> None:
"""Update existing translation or create new one."""
existing = (
db.query(MarketplaceProductTranslation)
.filter(
MarketplaceProductTranslation.marketplace_product_id == product.id,
MarketplaceProductTranslation.language == language,
)
.first()
)
if existing:
if title is not None:
existing.title = title.strip() if title else existing.title
if description is not None:
existing.description = description.strip() if description else None
existing.updated_at = datetime.now(UTC)
else:
# Only create if we have a title
if title and title.strip():
new_translation = MarketplaceProductTranslation(
marketplace_product_id=product.id,
language=language,
title=title.strip(),
description=description.strip() if description else None,
)
db.add(new_translation)
def delete_product(self, db: Session, marketplace_product_id: str) -> bool:
"""
Delete product and associated inventory.
@@ -305,6 +407,7 @@ class MarketplaceProductService:
if product.gtin:
db.query(Inventory).filter(Inventory.gtin == product.gtin).delete()
# Translations will be cascade deleted
db.delete(product)
db.flush()
@@ -354,16 +457,12 @@ class MarketplaceProductService:
logger.error(f"Error getting inventory info for GTIN {gtin}: {str(e)}")
return None
import csv
from io import StringIO
from sqlalchemy.orm import Session
def generate_csv_export(
self,
db: Session,
marketplace: str | None = None,
vendor_name: str | None = None,
language: str = "en",
) -> Generator[str, None, None]:
"""
Generate CSV export with streaming for memory efficiency and proper CSV escaping.
@@ -372,6 +471,7 @@ class MarketplaceProductService:
db: Database session
marketplace: Optional marketplace filter
vendor_name: Optional vendor name filter
language: Language code for title/description (default: 'en')
Yields:
CSV content as strings with proper escaping
@@ -394,7 +494,7 @@ class MarketplaceProductService:
"brand",
"gtin",
"marketplace",
"name",
"vendor_name",
]
writer.writerow(headers)
yield output.getvalue()
@@ -407,7 +507,9 @@ class MarketplaceProductService:
offset = 0
while True:
query = db.query(MarketplaceProduct)
query = db.query(MarketplaceProduct).options(
joinedload(MarketplaceProduct.translations)
)
# Apply marketplace filters
if marketplace:
@@ -424,11 +526,15 @@ class MarketplaceProductService:
break
for product in products:
# Get title and description from translations
title = product.get_title(language) or ""
description = product.get_description(language) or ""
# Create CSV row with proper escaping
row_data = [
product.marketplace_product_id or "",
product.title or "",
product.description or "",
title,
description,
product.link or "",
product.image_link or "",
product.availability or "",
@@ -471,7 +577,7 @@ class MarketplaceProductService:
# Private helper methods
def _validate_product_data(self, product_data: dict) -> None:
"""Validate product data structure."""
required_fields = ["marketplace_product_id", "title"]
required_fields = ["marketplace_product_id"]
for field in required_fields:
if field not in product_data or not product_data[field]:
@@ -486,11 +592,9 @@ class MarketplaceProductService:
# Trim whitespace from string fields
string_fields = [
"marketplace_product_id",
"title",
"description",
"brand",
"marketplace",
"name",
"vendor_name",
]
for field in string_fields:
if field in normalized and normalized[field]:

View File

@@ -1,4 +1,6 @@
# app/tasks/background_tasks.py
"""Background tasks for marketplace imports."""
import logging
from datetime import UTC, datetime
@@ -14,10 +16,20 @@ async def process_marketplace_import(
job_id: int,
url: str,
marketplace: str,
vendor_id: int, # FIXED: Changed from vendor_name to vendor_id
vendor_id: int,
batch_size: int = 1000,
language: str = "en",
):
"""Background task to process marketplace CSV import."""
"""Background task to process marketplace CSV import.
Args:
job_id: ID of the MarketplaceImportJob record
url: URL to the CSV file
marketplace: Name of the marketplace (e.g., 'Letzshop')
vendor_id: ID of the vendor
batch_size: Number of rows to process per batch
language: Language code for translations (default: 'en')
"""
db = SessionLocal()
csv_processor = CSVProcessor()
job = None
@@ -50,16 +62,17 @@ async def process_marketplace_import(
logger.info(
f"Processing import: Job {job_id}, Marketplace: {marketplace}, "
f"Vendor: {vendor.name} ({vendor.vendor_code})"
f"Vendor: {vendor.name} ({vendor.vendor_code}), Language: {language}"
)
# Process CSV with vendor_id
# Process CSV with vendor name and language
result = await csv_processor.process_marketplace_csv_from_url(
url,
marketplace,
vendor_id, # FIXED: Pass vendor_id instead of vendor_name
batch_size,
db,
url=url,
marketplace=marketplace,
vendor_name=vendor.name, # Pass vendor name to CSV processor
batch_size=batch_size,
db=db,
language=language, # Pass language for translations
)
# Update job with results

View File

@@ -1,13 +1,14 @@
# app/utils/csv_processor.py
"""CSV processor utilities ....
"""CSV processor utilities for marketplace product imports.
This module provides classes and functions for:
- ....
- ....
- ....
- Downloading and parsing CSV files with multiple encoding support
- Normalizing column names to match database schema
- Creating/updating MarketplaceProduct records with translations
"""
import logging
import re
from datetime import UTC, datetime
from io import StringIO
from typing import Any
@@ -18,6 +19,7 @@ from sqlalchemy import literal
from sqlalchemy.orm import Session
from models.database.marketplace_product import MarketplaceProduct
from models.database.marketplace_product_translation import MarketplaceProductTranslation
logger = logging.getLogger(__name__)
@@ -38,6 +40,9 @@ class CSVProcessor:
{"sep": "\t", "engine": "python"},
]
# Fields that belong to the translation table, not MarketplaceProduct
TRANSLATION_FIELDS = {"title", "description", "short_description"}
COLUMN_MAPPING = {
# Standard variations
"id": "marketplace_product_id",
@@ -72,7 +77,8 @@ class CSVProcessor:
"g:size_system": "size_system",
"g:item_group_id": "item_group_id",
"g:google_product_category": "google_product_category",
"g:product_type": "product_type",
"g:product_type": "product_type_raw", # Maps to product_type_raw (renamed)
"product_type": "product_type_raw", # Also map plain product_type
"g:custom_label_0": "custom_label_0",
"g:custom_label_1": "custom_label_1",
"g:custom_label_2": "custom_label_2",
@@ -145,6 +151,21 @@ class CSVProcessor:
logger.info(f"Normalized columns: {list(df.columns)}")
return df
def _parse_price_to_numeric(self, price_str: str | None) -> float | None:
"""Parse price string like '19.99 EUR' to float."""
if not price_str:
return None
# Extract numeric value
numbers = re.findall(r"[\d.,]+", str(price_str))
if numbers:
num_str = numbers[0].replace(",", ".")
try:
return float(num_str)
except ValueError:
pass
return None
def _clean_row_data(self, row_data: dict[str, Any]) -> dict[str, Any]:
"""Process a single row with data normalization."""
# Handle NaN values
@@ -161,15 +182,22 @@ class CSVProcessor:
parsed_price, currency = self.price_processor.parse_price_currency(
processed_data["price"]
)
# Store both raw price string and numeric value
raw_price = processed_data["price"]
processed_data["price"] = parsed_price
processed_data["price_numeric"] = self._parse_price_to_numeric(raw_price)
processed_data["currency"] = currency
# Process sale_price
if processed_data.get("sale_price"):
raw_sale_price = processed_data["sale_price"]
parsed_sale_price, _ = self.price_processor.parse_price_currency(
processed_data["sale_price"]
)
processed_data["sale_price"] = parsed_sale_price
processed_data["sale_price_numeric"] = self._parse_price_to_numeric(
raw_sale_price
)
# Clean MPN (remove .0 endings)
if processed_data.get("mpn"):
@@ -186,8 +214,72 @@ class CSVProcessor:
return processed_data
def _extract_translation_data(
self, product_data: dict[str, Any]
) -> dict[str, Any]:
"""Extract translation fields from product data.
Returns a dict with title, description, etc. that belong
in the translation table. Removes these fields from product_data in place.
"""
translation_data = {}
for field in self.TRANSLATION_FIELDS:
if field in product_data:
translation_data[field] = product_data.pop(field)
return translation_data
def _create_or_update_translation(
self,
db: Session,
marketplace_product: MarketplaceProduct,
translation_data: dict[str, Any],
language: str = "en",
source_file: str | None = None,
) -> None:
"""Create or update a translation record for the marketplace product."""
if not translation_data.get("title"):
# Title is required for translations
return
# Check if translation exists
existing_translation = (
db.query(MarketplaceProductTranslation)
.filter(
MarketplaceProductTranslation.marketplace_product_id
== marketplace_product.id,
MarketplaceProductTranslation.language == language,
)
.first()
)
if existing_translation:
# Update existing translation
for key, value in translation_data.items():
if hasattr(existing_translation, key):
setattr(existing_translation, key, value)
existing_translation.updated_at = datetime.now(UTC)
if source_file:
existing_translation.source_file = source_file
else:
# Create new translation
new_translation = MarketplaceProductTranslation(
marketplace_product_id=marketplace_product.id,
language=language,
title=translation_data.get("title"),
description=translation_data.get("description"),
short_description=translation_data.get("short_description"),
source_file=source_file,
)
db.add(new_translation)
async def process_marketplace_csv_from_url(
self, url: str, marketplace: str, vendor_name: str, batch_size: int, db: Session
self,
url: str,
marketplace: str,
vendor_name: str,
batch_size: int,
db: Session,
language: str = "en",
) -> dict[str, Any]:
"""
Process CSV from URL with marketplace and vendor information.
@@ -198,12 +290,13 @@ class CSVProcessor:
vendor_name: Name of the vendor
batch_size: Number of rows to process in each batch
db: Database session
language: Language code for translations (default: 'en')
Returns:
Dictionary with processing results
"""
logger.info(
f"Starting marketplace CSV import from {url} for {marketplace} -> {vendor_name}"
f"Starting marketplace CSV import from {url} for {marketplace} -> {vendor_name} (lang={language})"
)
# Download and parse CSV
csv_content = self.download_csv(url)
@@ -216,11 +309,20 @@ class CSVProcessor:
updated = 0
errors = 0
# Extract source file name from URL
source_file = url.split("/")[-1] if "/" in url else url
# Process in batches
for i in range(0, len(df), batch_size):
batch_df = df.iloc[i : i + batch_size]
batch_result = await self._process_marketplace_batch(
batch_df, marketplace, vendor_name, db, i // batch_size + 1
batch_df,
marketplace,
vendor_name,
db,
i // batch_size + 1,
language=language,
source_file=source_file,
)
imported += batch_result["imported"]
@@ -235,7 +337,8 @@ class CSVProcessor:
"updated": updated,
"errors": errors,
"marketplace": marketplace,
"name": vendor_name,
"vendor_name": vendor_name,
"language": language,
}
async def _process_marketplace_batch(
@@ -245,6 +348,8 @@ class CSVProcessor:
vendor_name: str,
db: Session,
batch_num: int,
language: str = "en",
source_file: str | None = None,
) -> dict[str, int]:
"""Process a batch of CSV rows with marketplace information."""
imported = 0
@@ -261,9 +366,12 @@ class CSVProcessor:
# Convert row to dictionary and clean up
product_data = self._clean_row_data(row.to_dict())
# Extract translation fields BEFORE processing product
translation_data = self._extract_translation_data(product_data)
# Add marketplace and vendor information
product_data["marketplace"] = marketplace
product_data["name"] = vendor_name
product_data["vendor_name"] = vendor_name
# Validate required fields
if not product_data.get("marketplace_product_id"):
@@ -273,7 +381,8 @@ class CSVProcessor:
errors += 1
continue
if not product_data.get("title"):
# Title is now required in translation_data
if not translation_data.get("title"):
logger.warning(f"Row {index}: Missing title, skipping")
errors += 1
continue
@@ -289,20 +398,30 @@ class CSVProcessor:
)
if existing_product:
# Update existing product
# Update existing product (only non-translation fields)
for key, value in product_data.items():
if key not in ["id", "created_at"] and hasattr(
existing_product, key
):
setattr(existing_product, key, value)
existing_product.updated_at = datetime.now(UTC)
# Update or create translation
self._create_or_update_translation(
db,
existing_product,
translation_data,
language=language,
source_file=source_file,
)
updated += 1
logger.debug(
f"Updated product {product_data['marketplace_product_id']} for "
f"{marketplace} and vendor {vendor_name}"
)
else:
# Create new product
# Create new product (filter to valid model fields)
filtered_data = {
k: v
for k, v in product_data.items()
@@ -311,6 +430,17 @@ class CSVProcessor:
}
new_product = MarketplaceProduct(**filtered_data)
db.add(new_product)
db.flush() # Get the ID for the translation
# Create translation for new product
self._create_or_update_translation(
db,
new_product,
translation_data,
language=language,
source_file=source_file,
)
imported += 1
logger.debug(
f"Imported new product {product_data['marketplace_product_id']} "