- Auto-fixed 4,496 lint issues (import sorting, modern syntax, etc.) - Added ignore rules for patterns intentional in this codebase: E402 (late imports), E712 (SQLAlchemy filters), B904 (raise from), SIM108/SIM105/SIM117 (readability preferences) - Added per-file ignores for tests and scripts - Excluded broken scripts/rename_terminology.py (has curly quotes) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
463 lines
16 KiB
Python
463 lines
16 KiB
Python
# tests/unit/utils/test_csv_processor.py
|
|
"""Unit tests for CSV processor with translation support."""
|
|
|
|
from unittest.mock import Mock, patch
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
import requests
|
|
import requests.exceptions
|
|
|
|
from app.modules.marketplace.models import MarketplaceProduct
|
|
from app.utils.csv_processor import CSVProcessor
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestCSVProcessor:
|
|
def setup_method(self):
|
|
self.processor = CSVProcessor()
|
|
|
|
@patch("requests.get")
|
|
def test_download_csv_encoding_fallback(self, mock_get):
|
|
"""Test CSV download with encoding fallback"""
|
|
# Create content with special characters that would fail UTF-8 if not properly encoded
|
|
special_content = (
|
|
"marketplace_product_id,title,price\nTEST001,Café MarketplaceProduct,10.99"
|
|
)
|
|
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
# Use latin-1 encoding which your method should try
|
|
mock_response.content = special_content.encode("latin-1")
|
|
mock_response.raise_for_status.return_value = None
|
|
mock_get.return_value = mock_response
|
|
|
|
csv_content = self.processor.download_csv("http://example.com/test.csv")
|
|
|
|
mock_get.assert_called_once_with("http://example.com/test.csv", timeout=30)
|
|
assert isinstance(csv_content, str)
|
|
assert "Café MarketplaceProduct" in csv_content
|
|
|
|
@patch("requests.get")
|
|
def test_download_csv_encoding_ignore_fallback(self, mock_get):
|
|
"""Test CSV download falls back to UTF-8 with error ignoring"""
|
|
# Create problematic bytes that would fail most encoding attempts
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
# Create bytes that will fail most encodings
|
|
mock_response.content = b"marketplace_product_id,title,price\nTEST001,\xff\xfe MarketplaceProduct,10.99"
|
|
mock_response.raise_for_status.return_value = None
|
|
mock_get.return_value = mock_response
|
|
|
|
csv_content = self.processor.download_csv("http://example.com/test.csv")
|
|
|
|
mock_get.assert_called_once_with("http://example.com/test.csv", timeout=30)
|
|
assert isinstance(csv_content, str)
|
|
# Should still contain basic content even with ignored errors
|
|
assert "marketplace_product_id,title,price" in csv_content
|
|
assert "TEST001" in csv_content
|
|
|
|
@patch("requests.get")
|
|
def test_download_csv_request_exception(self, mock_get):
|
|
"""Test CSV download with request exception"""
|
|
mock_get.side_effect = requests.exceptions.RequestException("Connection error")
|
|
|
|
with pytest.raises(requests.exceptions.RequestException):
|
|
self.processor.download_csv("http://example.com/test.csv")
|
|
|
|
@patch("requests.get")
|
|
def test_download_csv_http_error(self, mock_get):
|
|
"""Test CSV download with HTTP error"""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 404
|
|
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(
|
|
"404 Not Found"
|
|
)
|
|
mock_get.return_value = mock_response
|
|
|
|
with pytest.raises(requests.exceptions.HTTPError):
|
|
self.processor.download_csv("http://example.com/nonexistent.csv")
|
|
|
|
@patch("requests.get")
|
|
def test_download_csv_failure(self, mock_get):
|
|
"""Test CSV download failure"""
|
|
# Mock failed HTTP response - need to make raise_for_status() raise an exception
|
|
mock_response = Mock()
|
|
mock_response.status_code = 404
|
|
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(
|
|
"404 Not Found"
|
|
)
|
|
mock_get.return_value = mock_response
|
|
|
|
with pytest.raises(requests.exceptions.HTTPError):
|
|
self.processor.download_csv("http://example.com/nonexistent.csv")
|
|
|
|
def test_parse_csv_content(self):
|
|
"""Test CSV content parsing"""
|
|
csv_content = """marketplace_product_id,title,price,marketplace
|
|
TEST001,Test MarketplaceProduct 1,10.99,TestMarket
|
|
TEST002,Test MarketplaceProduct 2,15.99,TestMarket"""
|
|
|
|
df = self.processor.parse_csv(csv_content)
|
|
|
|
assert len(df) == 2
|
|
assert "marketplace_product_id" in df.columns
|
|
assert df.iloc[0]["marketplace_product_id"] == "TEST001"
|
|
assert df.iloc[1]["price"] == 15.99
|
|
|
|
def test_normalize_columns_google_shopping(self):
|
|
"""Test column normalization for Google Shopping feed format"""
|
|
csv_content = """g:id,g:title,g:description,g:price,g:brand,g:product_type
|
|
TEST001,Product 1,Description 1,19.99 EUR,Brand1,Category1"""
|
|
|
|
df = self.processor.parse_csv(csv_content)
|
|
df = self.processor.normalize_columns(df)
|
|
|
|
assert "marketplace_product_id" in df.columns
|
|
assert "title" in df.columns
|
|
assert "description" in df.columns
|
|
assert "product_type_raw" in df.columns # Renamed from product_type
|
|
assert df.iloc[0]["marketplace_product_id"] == "TEST001"
|
|
assert df.iloc[0]["title"] == "Product 1"
|
|
|
|
def test_extract_translation_data(self):
|
|
"""Test extraction of translation fields from product data"""
|
|
product_data = {
|
|
"marketplace_product_id": "TEST001",
|
|
"title": "Test Product",
|
|
"description": "Test Description",
|
|
"short_description": "Short desc",
|
|
"price": "19.99",
|
|
"brand": "TestBrand",
|
|
}
|
|
|
|
translation_data = self.processor._extract_translation_data(product_data)
|
|
|
|
# Translation fields should be extracted
|
|
assert translation_data["title"] == "Test Product"
|
|
assert translation_data["description"] == "Test Description"
|
|
assert translation_data["short_description"] == "Short desc"
|
|
|
|
# Product data should no longer have translation fields
|
|
assert "title" not in product_data
|
|
assert "description" not in product_data
|
|
assert "short_description" not in product_data
|
|
|
|
# Non-translation fields should remain
|
|
assert product_data["marketplace_product_id"] == "TEST001"
|
|
assert product_data["price"] == "19.99"
|
|
assert product_data["brand"] == "TestBrand"
|
|
|
|
def test_parse_price_to_cents(self):
|
|
"""Test price string to cents conversion"""
|
|
assert self.processor._parse_price_to_cents("19.99 EUR") == 1999
|
|
assert self.processor._parse_price_to_cents("19,99 EUR") == 1999
|
|
assert self.processor._parse_price_to_cents("$29.99") == 2999
|
|
assert self.processor._parse_price_to_cents("100") == 10000
|
|
assert self.processor._parse_price_to_cents(None) is None
|
|
assert self.processor._parse_price_to_cents("") is None
|
|
|
|
def test_clean_row_data_with_prices(self):
|
|
"""Test row data cleaning with price parsing to cents"""
|
|
row_data = {
|
|
"marketplace_product_id": "TEST001",
|
|
"title": "Test Product",
|
|
"price": "19.99 EUR",
|
|
"sale_price": "14.99 EUR",
|
|
"gtin": "1234567890123",
|
|
}
|
|
|
|
cleaned = self.processor._clean_row_data(row_data)
|
|
|
|
assert cleaned["price_cents"] == 1999 # Integer cents
|
|
assert cleaned["sale_price_cents"] == 1499 # Integer cents
|
|
assert cleaned["currency"] == "EUR"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_process_marketplace_csv_from_url(self, db):
|
|
"""Test complete marketplace CSV processing"""
|
|
with (
|
|
patch.object(self.processor, "download_csv") as mock_download,
|
|
patch.object(self.processor, "parse_csv") as mock_parse,
|
|
):
|
|
# Mock successful download and parsing
|
|
mock_download.return_value = "csv_content"
|
|
mock_df = pd.DataFrame(
|
|
{
|
|
"marketplace_product_id": ["TEST001", "TEST002"],
|
|
"title": ["MarketplaceProduct 1", "MarketplaceProduct 2"],
|
|
"price": ["10.99", "15.99"],
|
|
"marketplace": ["TestMarket", "TestMarket"],
|
|
"store_name": ["TestStore", "TestStore"],
|
|
}
|
|
)
|
|
mock_parse.return_value = mock_df
|
|
|
|
result = await self.processor.process_marketplace_csv_from_url(
|
|
"http://example.com/test.csv",
|
|
"TestMarket",
|
|
"TestStore",
|
|
1000,
|
|
db,
|
|
language="en",
|
|
)
|
|
|
|
assert "imported" in result
|
|
assert "updated" in result
|
|
assert "total_processed" in result
|
|
|
|
assert "language" in result
|
|
assert result["language"] == "en"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_process_batch_creates_translations(self, db):
|
|
"""Test that batch processing creates translation records"""
|
|
# Clean up any existing test data
|
|
existing = (
|
|
db.query(MarketplaceProduct)
|
|
.filter(
|
|
MarketplaceProduct.marketplace_product_id.in_(
|
|
["TRANS_TEST_001", "TRANS_TEST_002"]
|
|
)
|
|
)
|
|
.all()
|
|
)
|
|
for p in existing:
|
|
db.delete(p)
|
|
db.commit()
|
|
|
|
# Create test DataFrame
|
|
batch_df = pd.DataFrame(
|
|
{
|
|
"marketplace_product_id": ["TRANS_TEST_001", "TRANS_TEST_002"],
|
|
"title": ["Product One", "Product Two"],
|
|
"description": ["Description One", "Description Two"],
|
|
"price": ["19.99 EUR", "29.99 EUR"],
|
|
"brand": ["Brand1", "Brand2"],
|
|
}
|
|
)
|
|
|
|
result = await self.processor._process_marketplace_batch(
|
|
batch_df,
|
|
"TestMarket",
|
|
"TestStore",
|
|
db,
|
|
batch_num=1,
|
|
language="en",
|
|
source_file="test.csv",
|
|
)
|
|
|
|
assert result["imported"] == 2
|
|
assert result["errors"] == 0
|
|
|
|
# Verify products were created
|
|
products = (
|
|
db.query(MarketplaceProduct)
|
|
.filter(
|
|
MarketplaceProduct.marketplace_product_id.in_(
|
|
["TRANS_TEST_001", "TRANS_TEST_002"]
|
|
)
|
|
)
|
|
.all()
|
|
)
|
|
assert len(products) == 2
|
|
|
|
# Verify translations were created
|
|
for product in products:
|
|
assert len(product.translations) == 1
|
|
translation = product.translations[0]
|
|
assert translation.language == "en"
|
|
assert translation.title is not None
|
|
assert translation.source_file == "test.csv"
|
|
|
|
# Verify get_title method works
|
|
product1 = next(
|
|
p for p in products if p.marketplace_product_id == "TRANS_TEST_001"
|
|
)
|
|
assert product1.get_title("en") == "Product One"
|
|
assert product1.get_description("en") == "Description One"
|
|
|
|
# Clean up
|
|
for p in products:
|
|
db.delete(p)
|
|
db.commit()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_process_batch_updates_existing_translations(self, db):
|
|
"""Test that batch processing updates existing translation records"""
|
|
# Clean up any existing test data
|
|
existing = (
|
|
db.query(MarketplaceProduct)
|
|
.filter(MarketplaceProduct.marketplace_product_id == "UPDATE_TEST_001")
|
|
.first()
|
|
)
|
|
if existing:
|
|
db.delete(existing)
|
|
db.commit()
|
|
|
|
# Create initial product with translation
|
|
batch_df = pd.DataFrame(
|
|
{
|
|
"marketplace_product_id": ["UPDATE_TEST_001"],
|
|
"title": ["Original Title"],
|
|
"description": ["Original Description"],
|
|
"price": ["19.99 EUR"],
|
|
}
|
|
)
|
|
|
|
await self.processor._process_marketplace_batch(
|
|
batch_df, "TestMarket", "TestStore", db, 1, language="en"
|
|
)
|
|
|
|
# Update with new data
|
|
update_df = pd.DataFrame(
|
|
{
|
|
"marketplace_product_id": ["UPDATE_TEST_001"],
|
|
"title": ["Updated Title"],
|
|
"description": ["Updated Description"],
|
|
"price": ["24.99 EUR"],
|
|
}
|
|
)
|
|
|
|
result = await self.processor._process_marketplace_batch(
|
|
update_df, "TestMarket", "TestStore", db, 1, language="en"
|
|
)
|
|
|
|
assert result["updated"] == 1
|
|
assert result["imported"] == 0
|
|
|
|
# Verify translation was updated
|
|
product = (
|
|
db.query(MarketplaceProduct)
|
|
.filter(MarketplaceProduct.marketplace_product_id == "UPDATE_TEST_001")
|
|
.first()
|
|
)
|
|
assert product.get_title("en") == "Updated Title"
|
|
assert product.get_description("en") == "Updated Description"
|
|
|
|
# Clean up
|
|
db.delete(product)
|
|
db.commit()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_process_batch_multi_language(self, db):
|
|
"""Test importing same product in multiple languages"""
|
|
# Clean up
|
|
existing = (
|
|
db.query(MarketplaceProduct)
|
|
.filter(MarketplaceProduct.marketplace_product_id == "MULTI_LANG_001")
|
|
.first()
|
|
)
|
|
if existing:
|
|
db.delete(existing)
|
|
db.commit()
|
|
|
|
# Import English version
|
|
en_df = pd.DataFrame(
|
|
{
|
|
"marketplace_product_id": ["MULTI_LANG_001"],
|
|
"title": ["English Title"],
|
|
"description": ["English Description"],
|
|
"price": ["19.99 EUR"],
|
|
"brand": ["TestBrand"],
|
|
}
|
|
)
|
|
|
|
await self.processor._process_marketplace_batch(
|
|
en_df, "TestMarket", "TestStore", db, 1, language="en"
|
|
)
|
|
|
|
# Import French version (same product, different language)
|
|
fr_df = pd.DataFrame(
|
|
{
|
|
"marketplace_product_id": ["MULTI_LANG_001"],
|
|
"title": ["Titre Français"],
|
|
"description": ["Description Française"],
|
|
"price": ["19.99 EUR"],
|
|
"brand": ["TestBrand"],
|
|
}
|
|
)
|
|
|
|
result = await self.processor._process_marketplace_batch(
|
|
fr_df, "TestMarket", "TestStore", db, 1, language="fr"
|
|
)
|
|
|
|
assert result["updated"] == 1 # Product existed, so it's an update
|
|
|
|
# Verify both translations exist
|
|
product = (
|
|
db.query(MarketplaceProduct)
|
|
.filter(MarketplaceProduct.marketplace_product_id == "MULTI_LANG_001")
|
|
.first()
|
|
)
|
|
assert len(product.translations) == 2
|
|
|
|
# Verify each language
|
|
assert product.get_title("en") == "English Title"
|
|
assert product.get_title("fr") == "Titre Français"
|
|
assert product.get_description("en") == "English Description"
|
|
assert product.get_description("fr") == "Description Française"
|
|
|
|
# Test fallback to English for unknown language
|
|
assert product.get_title("de") == "English Title"
|
|
|
|
# Clean up
|
|
db.delete(product)
|
|
db.commit()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_process_batch_skips_missing_title(self, db):
|
|
"""Test that rows without title are skipped"""
|
|
batch_df = pd.DataFrame(
|
|
{
|
|
"marketplace_product_id": ["NO_TITLE_001", "HAS_TITLE_001"],
|
|
"title": [None, "Has Title"],
|
|
"price": ["19.99", "29.99"],
|
|
}
|
|
)
|
|
|
|
result = await self.processor._process_marketplace_batch(
|
|
batch_df, "TestMarket", "TestStore", db, 1, language="en"
|
|
)
|
|
|
|
assert result["imported"] == 1
|
|
assert result["errors"] == 1 # Missing title is an error
|
|
|
|
# Clean up
|
|
product = (
|
|
db.query(MarketplaceProduct)
|
|
.filter(MarketplaceProduct.marketplace_product_id == "HAS_TITLE_001")
|
|
.first()
|
|
)
|
|
if product:
|
|
db.delete(product)
|
|
db.commit()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_process_batch_skips_missing_product_id(self, db):
|
|
"""Test that rows without marketplace_product_id are skipped"""
|
|
batch_df = pd.DataFrame(
|
|
{
|
|
"marketplace_product_id": [None, "HAS_ID_001"],
|
|
"title": ["No ID Product", "Has ID Product"],
|
|
"price": ["19.99", "29.99"],
|
|
}
|
|
)
|
|
|
|
result = await self.processor._process_marketplace_batch(
|
|
batch_df, "TestMarket", "TestStore", db, 1, language="en"
|
|
)
|
|
|
|
assert result["imported"] == 1
|
|
assert result["errors"] == 1 # Missing ID is an error
|
|
|
|
# Clean up
|
|
product = (
|
|
db.query(MarketplaceProduct)
|
|
.filter(MarketplaceProduct.marketplace_product_id == "HAS_ID_001")
|
|
.first()
|
|
)
|
|
if product:
|
|
db.delete(product)
|
|
db.commit()
|