Files
orion/tests/unit/utils/test_csv_processor.py
Samir Boulahtit f20266167d
Some checks failed
CI / ruff (push) Failing after 7s
CI / pytest (push) Failing after 1s
CI / architecture (push) Failing after 9s
CI / dependency-scanning (push) Successful in 27s
CI / audit (push) Successful in 8s
CI / docs (push) Has been skipped
fix(lint): auto-fix ruff violations and tune lint rules
- Auto-fixed 4,496 lint issues (import sorting, modern syntax, etc.)
- Added ignore rules for patterns intentional in this codebase:
  E402 (late imports), E712 (SQLAlchemy filters), B904 (raise from),
  SIM108/SIM105/SIM117 (readability preferences)
- Added per-file ignores for tests and scripts
- Excluded broken scripts/rename_terminology.py (has curly quotes)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-12 23:10:42 +01:00

463 lines
16 KiB
Python

# tests/unit/utils/test_csv_processor.py
"""Unit tests for CSV processor with translation support."""
from unittest.mock import Mock, patch
import pandas as pd
import pytest
import requests
import requests.exceptions
from app.modules.marketplace.models import MarketplaceProduct
from app.utils.csv_processor import CSVProcessor
@pytest.mark.unit
class TestCSVProcessor:
def setup_method(self):
self.processor = CSVProcessor()
@patch("requests.get")
def test_download_csv_encoding_fallback(self, mock_get):
"""Test CSV download with encoding fallback"""
# Create content with special characters that would fail UTF-8 if not properly encoded
special_content = (
"marketplace_product_id,title,price\nTEST001,Café MarketplaceProduct,10.99"
)
mock_response = Mock()
mock_response.status_code = 200
# Use latin-1 encoding which your method should try
mock_response.content = special_content.encode("latin-1")
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response
csv_content = self.processor.download_csv("http://example.com/test.csv")
mock_get.assert_called_once_with("http://example.com/test.csv", timeout=30)
assert isinstance(csv_content, str)
assert "Café MarketplaceProduct" in csv_content
@patch("requests.get")
def test_download_csv_encoding_ignore_fallback(self, mock_get):
"""Test CSV download falls back to UTF-8 with error ignoring"""
# Create problematic bytes that would fail most encoding attempts
mock_response = Mock()
mock_response.status_code = 200
# Create bytes that will fail most encodings
mock_response.content = b"marketplace_product_id,title,price\nTEST001,\xff\xfe MarketplaceProduct,10.99"
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response
csv_content = self.processor.download_csv("http://example.com/test.csv")
mock_get.assert_called_once_with("http://example.com/test.csv", timeout=30)
assert isinstance(csv_content, str)
# Should still contain basic content even with ignored errors
assert "marketplace_product_id,title,price" in csv_content
assert "TEST001" in csv_content
@patch("requests.get")
def test_download_csv_request_exception(self, mock_get):
"""Test CSV download with request exception"""
mock_get.side_effect = requests.exceptions.RequestException("Connection error")
with pytest.raises(requests.exceptions.RequestException):
self.processor.download_csv("http://example.com/test.csv")
@patch("requests.get")
def test_download_csv_http_error(self, mock_get):
"""Test CSV download with HTTP error"""
mock_response = Mock()
mock_response.status_code = 404
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(
"404 Not Found"
)
mock_get.return_value = mock_response
with pytest.raises(requests.exceptions.HTTPError):
self.processor.download_csv("http://example.com/nonexistent.csv")
@patch("requests.get")
def test_download_csv_failure(self, mock_get):
"""Test CSV download failure"""
# Mock failed HTTP response - need to make raise_for_status() raise an exception
mock_response = Mock()
mock_response.status_code = 404
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(
"404 Not Found"
)
mock_get.return_value = mock_response
with pytest.raises(requests.exceptions.HTTPError):
self.processor.download_csv("http://example.com/nonexistent.csv")
def test_parse_csv_content(self):
"""Test CSV content parsing"""
csv_content = """marketplace_product_id,title,price,marketplace
TEST001,Test MarketplaceProduct 1,10.99,TestMarket
TEST002,Test MarketplaceProduct 2,15.99,TestMarket"""
df = self.processor.parse_csv(csv_content)
assert len(df) == 2
assert "marketplace_product_id" in df.columns
assert df.iloc[0]["marketplace_product_id"] == "TEST001"
assert df.iloc[1]["price"] == 15.99
def test_normalize_columns_google_shopping(self):
"""Test column normalization for Google Shopping feed format"""
csv_content = """g:id,g:title,g:description,g:price,g:brand,g:product_type
TEST001,Product 1,Description 1,19.99 EUR,Brand1,Category1"""
df = self.processor.parse_csv(csv_content)
df = self.processor.normalize_columns(df)
assert "marketplace_product_id" in df.columns
assert "title" in df.columns
assert "description" in df.columns
assert "product_type_raw" in df.columns # Renamed from product_type
assert df.iloc[0]["marketplace_product_id"] == "TEST001"
assert df.iloc[0]["title"] == "Product 1"
def test_extract_translation_data(self):
"""Test extraction of translation fields from product data"""
product_data = {
"marketplace_product_id": "TEST001",
"title": "Test Product",
"description": "Test Description",
"short_description": "Short desc",
"price": "19.99",
"brand": "TestBrand",
}
translation_data = self.processor._extract_translation_data(product_data)
# Translation fields should be extracted
assert translation_data["title"] == "Test Product"
assert translation_data["description"] == "Test Description"
assert translation_data["short_description"] == "Short desc"
# Product data should no longer have translation fields
assert "title" not in product_data
assert "description" not in product_data
assert "short_description" not in product_data
# Non-translation fields should remain
assert product_data["marketplace_product_id"] == "TEST001"
assert product_data["price"] == "19.99"
assert product_data["brand"] == "TestBrand"
def test_parse_price_to_cents(self):
"""Test price string to cents conversion"""
assert self.processor._parse_price_to_cents("19.99 EUR") == 1999
assert self.processor._parse_price_to_cents("19,99 EUR") == 1999
assert self.processor._parse_price_to_cents("$29.99") == 2999
assert self.processor._parse_price_to_cents("100") == 10000
assert self.processor._parse_price_to_cents(None) is None
assert self.processor._parse_price_to_cents("") is None
def test_clean_row_data_with_prices(self):
"""Test row data cleaning with price parsing to cents"""
row_data = {
"marketplace_product_id": "TEST001",
"title": "Test Product",
"price": "19.99 EUR",
"sale_price": "14.99 EUR",
"gtin": "1234567890123",
}
cleaned = self.processor._clean_row_data(row_data)
assert cleaned["price_cents"] == 1999 # Integer cents
assert cleaned["sale_price_cents"] == 1499 # Integer cents
assert cleaned["currency"] == "EUR"
@pytest.mark.asyncio
async def test_process_marketplace_csv_from_url(self, db):
"""Test complete marketplace CSV processing"""
with (
patch.object(self.processor, "download_csv") as mock_download,
patch.object(self.processor, "parse_csv") as mock_parse,
):
# Mock successful download and parsing
mock_download.return_value = "csv_content"
mock_df = pd.DataFrame(
{
"marketplace_product_id": ["TEST001", "TEST002"],
"title": ["MarketplaceProduct 1", "MarketplaceProduct 2"],
"price": ["10.99", "15.99"],
"marketplace": ["TestMarket", "TestMarket"],
"store_name": ["TestStore", "TestStore"],
}
)
mock_parse.return_value = mock_df
result = await self.processor.process_marketplace_csv_from_url(
"http://example.com/test.csv",
"TestMarket",
"TestStore",
1000,
db,
language="en",
)
assert "imported" in result
assert "updated" in result
assert "total_processed" in result
assert "language" in result
assert result["language"] == "en"
@pytest.mark.asyncio
async def test_process_batch_creates_translations(self, db):
"""Test that batch processing creates translation records"""
# Clean up any existing test data
existing = (
db.query(MarketplaceProduct)
.filter(
MarketplaceProduct.marketplace_product_id.in_(
["TRANS_TEST_001", "TRANS_TEST_002"]
)
)
.all()
)
for p in existing:
db.delete(p)
db.commit()
# Create test DataFrame
batch_df = pd.DataFrame(
{
"marketplace_product_id": ["TRANS_TEST_001", "TRANS_TEST_002"],
"title": ["Product One", "Product Two"],
"description": ["Description One", "Description Two"],
"price": ["19.99 EUR", "29.99 EUR"],
"brand": ["Brand1", "Brand2"],
}
)
result = await self.processor._process_marketplace_batch(
batch_df,
"TestMarket",
"TestStore",
db,
batch_num=1,
language="en",
source_file="test.csv",
)
assert result["imported"] == 2
assert result["errors"] == 0
# Verify products were created
products = (
db.query(MarketplaceProduct)
.filter(
MarketplaceProduct.marketplace_product_id.in_(
["TRANS_TEST_001", "TRANS_TEST_002"]
)
)
.all()
)
assert len(products) == 2
# Verify translations were created
for product in products:
assert len(product.translations) == 1
translation = product.translations[0]
assert translation.language == "en"
assert translation.title is not None
assert translation.source_file == "test.csv"
# Verify get_title method works
product1 = next(
p for p in products if p.marketplace_product_id == "TRANS_TEST_001"
)
assert product1.get_title("en") == "Product One"
assert product1.get_description("en") == "Description One"
# Clean up
for p in products:
db.delete(p)
db.commit()
@pytest.mark.asyncio
async def test_process_batch_updates_existing_translations(self, db):
"""Test that batch processing updates existing translation records"""
# Clean up any existing test data
existing = (
db.query(MarketplaceProduct)
.filter(MarketplaceProduct.marketplace_product_id == "UPDATE_TEST_001")
.first()
)
if existing:
db.delete(existing)
db.commit()
# Create initial product with translation
batch_df = pd.DataFrame(
{
"marketplace_product_id": ["UPDATE_TEST_001"],
"title": ["Original Title"],
"description": ["Original Description"],
"price": ["19.99 EUR"],
}
)
await self.processor._process_marketplace_batch(
batch_df, "TestMarket", "TestStore", db, 1, language="en"
)
# Update with new data
update_df = pd.DataFrame(
{
"marketplace_product_id": ["UPDATE_TEST_001"],
"title": ["Updated Title"],
"description": ["Updated Description"],
"price": ["24.99 EUR"],
}
)
result = await self.processor._process_marketplace_batch(
update_df, "TestMarket", "TestStore", db, 1, language="en"
)
assert result["updated"] == 1
assert result["imported"] == 0
# Verify translation was updated
product = (
db.query(MarketplaceProduct)
.filter(MarketplaceProduct.marketplace_product_id == "UPDATE_TEST_001")
.first()
)
assert product.get_title("en") == "Updated Title"
assert product.get_description("en") == "Updated Description"
# Clean up
db.delete(product)
db.commit()
@pytest.mark.asyncio
async def test_process_batch_multi_language(self, db):
"""Test importing same product in multiple languages"""
# Clean up
existing = (
db.query(MarketplaceProduct)
.filter(MarketplaceProduct.marketplace_product_id == "MULTI_LANG_001")
.first()
)
if existing:
db.delete(existing)
db.commit()
# Import English version
en_df = pd.DataFrame(
{
"marketplace_product_id": ["MULTI_LANG_001"],
"title": ["English Title"],
"description": ["English Description"],
"price": ["19.99 EUR"],
"brand": ["TestBrand"],
}
)
await self.processor._process_marketplace_batch(
en_df, "TestMarket", "TestStore", db, 1, language="en"
)
# Import French version (same product, different language)
fr_df = pd.DataFrame(
{
"marketplace_product_id": ["MULTI_LANG_001"],
"title": ["Titre Français"],
"description": ["Description Française"],
"price": ["19.99 EUR"],
"brand": ["TestBrand"],
}
)
result = await self.processor._process_marketplace_batch(
fr_df, "TestMarket", "TestStore", db, 1, language="fr"
)
assert result["updated"] == 1 # Product existed, so it's an update
# Verify both translations exist
product = (
db.query(MarketplaceProduct)
.filter(MarketplaceProduct.marketplace_product_id == "MULTI_LANG_001")
.first()
)
assert len(product.translations) == 2
# Verify each language
assert product.get_title("en") == "English Title"
assert product.get_title("fr") == "Titre Français"
assert product.get_description("en") == "English Description"
assert product.get_description("fr") == "Description Française"
# Test fallback to English for unknown language
assert product.get_title("de") == "English Title"
# Clean up
db.delete(product)
db.commit()
@pytest.mark.asyncio
async def test_process_batch_skips_missing_title(self, db):
"""Test that rows without title are skipped"""
batch_df = pd.DataFrame(
{
"marketplace_product_id": ["NO_TITLE_001", "HAS_TITLE_001"],
"title": [None, "Has Title"],
"price": ["19.99", "29.99"],
}
)
result = await self.processor._process_marketplace_batch(
batch_df, "TestMarket", "TestStore", db, 1, language="en"
)
assert result["imported"] == 1
assert result["errors"] == 1 # Missing title is an error
# Clean up
product = (
db.query(MarketplaceProduct)
.filter(MarketplaceProduct.marketplace_product_id == "HAS_TITLE_001")
.first()
)
if product:
db.delete(product)
db.commit()
@pytest.mark.asyncio
async def test_process_batch_skips_missing_product_id(self, db):
"""Test that rows without marketplace_product_id are skipped"""
batch_df = pd.DataFrame(
{
"marketplace_product_id": [None, "HAS_ID_001"],
"title": ["No ID Product", "Has ID Product"],
"price": ["19.99", "29.99"],
}
)
result = await self.processor._process_marketplace_batch(
batch_df, "TestMarket", "TestStore", db, 1, language="en"
)
assert result["imported"] == 1
assert result["errors"] == 1 # Missing ID is an error
# Clean up
product = (
db.query(MarketplaceProduct)
.filter(MarketplaceProduct.marketplace_product_id == "HAS_ID_001")
.first()
)
if product:
db.delete(product)
db.commit()