# tests/unit/utils/test_csv_processor.py """Unit tests for CSV processor with translation support.""" from unittest.mock import Mock, patch import pandas as pd import pytest import requests import requests.exceptions from app.utils.csv_processor import CSVProcessor from models.database.marketplace_product import MarketplaceProduct @pytest.mark.unit class TestCSVProcessor: def setup_method(self): self.processor = CSVProcessor() @patch("requests.get") def test_download_csv_encoding_fallback(self, mock_get): """Test CSV download with encoding fallback""" # Create content with special characters that would fail UTF-8 if not properly encoded special_content = ( "marketplace_product_id,title,price\nTEST001,Café MarketplaceProduct,10.99" ) mock_response = Mock() mock_response.status_code = 200 # Use latin-1 encoding which your method should try mock_response.content = special_content.encode("latin-1") mock_response.raise_for_status.return_value = None mock_get.return_value = mock_response csv_content = self.processor.download_csv("http://example.com/test.csv") mock_get.assert_called_once_with("http://example.com/test.csv", timeout=30) assert isinstance(csv_content, str) assert "Café MarketplaceProduct" in csv_content @patch("requests.get") def test_download_csv_encoding_ignore_fallback(self, mock_get): """Test CSV download falls back to UTF-8 with error ignoring""" # Create problematic bytes that would fail most encoding attempts mock_response = Mock() mock_response.status_code = 200 # Create bytes that will fail most encodings mock_response.content = b"marketplace_product_id,title,price\nTEST001,\xff\xfe MarketplaceProduct,10.99" mock_response.raise_for_status.return_value = None mock_get.return_value = mock_response csv_content = self.processor.download_csv("http://example.com/test.csv") mock_get.assert_called_once_with("http://example.com/test.csv", timeout=30) assert isinstance(csv_content, str) # Should still contain basic content even with ignored errors assert "marketplace_product_id,title,price" in csv_content assert "TEST001" in csv_content @patch("requests.get") def test_download_csv_request_exception(self, mock_get): """Test CSV download with request exception""" mock_get.side_effect = requests.exceptions.RequestException("Connection error") with pytest.raises(requests.exceptions.RequestException): self.processor.download_csv("http://example.com/test.csv") @patch("requests.get") def test_download_csv_http_error(self, mock_get): """Test CSV download with HTTP error""" mock_response = Mock() mock_response.status_code = 404 mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError( "404 Not Found" ) mock_get.return_value = mock_response with pytest.raises(requests.exceptions.HTTPError): self.processor.download_csv("http://example.com/nonexistent.csv") @patch("requests.get") def test_download_csv_failure(self, mock_get): """Test CSV download failure""" # Mock failed HTTP response - need to make raise_for_status() raise an exception mock_response = Mock() mock_response.status_code = 404 mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError( "404 Not Found" ) mock_get.return_value = mock_response with pytest.raises(requests.exceptions.HTTPError): self.processor.download_csv("http://example.com/nonexistent.csv") def test_parse_csv_content(self): """Test CSV content parsing""" csv_content = """marketplace_product_id,title,price,marketplace TEST001,Test MarketplaceProduct 1,10.99,TestMarket TEST002,Test MarketplaceProduct 2,15.99,TestMarket""" df = self.processor.parse_csv(csv_content) assert len(df) == 2 assert "marketplace_product_id" in df.columns assert df.iloc[0]["marketplace_product_id"] == "TEST001" assert df.iloc[1]["price"] == 15.99 def test_normalize_columns_google_shopping(self): """Test column normalization for Google Shopping feed format""" csv_content = """g:id,g:title,g:description,g:price,g:brand,g:product_type TEST001,Product 1,Description 1,19.99 EUR,Brand1,Category1""" df = self.processor.parse_csv(csv_content) df = self.processor.normalize_columns(df) assert "marketplace_product_id" in df.columns assert "title" in df.columns assert "description" in df.columns assert "product_type_raw" in df.columns # Renamed from product_type assert df.iloc[0]["marketplace_product_id"] == "TEST001" assert df.iloc[0]["title"] == "Product 1" def test_extract_translation_data(self): """Test extraction of translation fields from product data""" product_data = { "marketplace_product_id": "TEST001", "title": "Test Product", "description": "Test Description", "short_description": "Short desc", "price": "19.99", "brand": "TestBrand", } translation_data = self.processor._extract_translation_data(product_data) # Translation fields should be extracted assert translation_data["title"] == "Test Product" assert translation_data["description"] == "Test Description" assert translation_data["short_description"] == "Short desc" # Product data should no longer have translation fields assert "title" not in product_data assert "description" not in product_data assert "short_description" not in product_data # Non-translation fields should remain assert product_data["marketplace_product_id"] == "TEST001" assert product_data["price"] == "19.99" assert product_data["brand"] == "TestBrand" def test_parse_price_to_cents(self): """Test price string to cents conversion""" assert self.processor._parse_price_to_cents("19.99 EUR") == 1999 assert self.processor._parse_price_to_cents("19,99 EUR") == 1999 assert self.processor._parse_price_to_cents("$29.99") == 2999 assert self.processor._parse_price_to_cents("100") == 10000 assert self.processor._parse_price_to_cents(None) is None assert self.processor._parse_price_to_cents("") is None def test_clean_row_data_with_prices(self): """Test row data cleaning with price parsing to cents""" row_data = { "marketplace_product_id": "TEST001", "title": "Test Product", "price": "19.99 EUR", "sale_price": "14.99 EUR", "gtin": "1234567890123", } cleaned = self.processor._clean_row_data(row_data) assert cleaned["price_cents"] == 1999 # Integer cents assert cleaned["sale_price_cents"] == 1499 # Integer cents assert cleaned["currency"] == "EUR" @pytest.mark.asyncio async def test_process_marketplace_csv_from_url(self, db): """Test complete marketplace CSV processing""" with ( patch.object(self.processor, "download_csv") as mock_download, patch.object(self.processor, "parse_csv") as mock_parse, ): # Mock successful download and parsing mock_download.return_value = "csv_content" mock_df = pd.DataFrame( { "marketplace_product_id": ["TEST001", "TEST002"], "title": ["MarketplaceProduct 1", "MarketplaceProduct 2"], "price": ["10.99", "15.99"], "marketplace": ["TestMarket", "TestMarket"], "vendor_name": ["TestVendor", "TestVendor"], } ) mock_parse.return_value = mock_df result = await self.processor.process_marketplace_csv_from_url( "http://example.com/test.csv", "TestMarket", "TestVendor", 1000, db, language="en", ) assert "imported" in result assert "updated" in result assert "total_processed" in result assert "language" in result assert result["language"] == "en" @pytest.mark.asyncio async def test_process_batch_creates_translations(self, db): """Test that batch processing creates translation records""" # Clean up any existing test data existing = ( db.query(MarketplaceProduct) .filter( MarketplaceProduct.marketplace_product_id.in_( ["TRANS_TEST_001", "TRANS_TEST_002"] ) ) .all() ) for p in existing: db.delete(p) db.commit() # Create test DataFrame batch_df = pd.DataFrame( { "marketplace_product_id": ["TRANS_TEST_001", "TRANS_TEST_002"], "title": ["Product One", "Product Two"], "description": ["Description One", "Description Two"], "price": ["19.99 EUR", "29.99 EUR"], "brand": ["Brand1", "Brand2"], } ) result = await self.processor._process_marketplace_batch( batch_df, "TestMarket", "TestVendor", db, batch_num=1, language="en", source_file="test.csv", ) assert result["imported"] == 2 assert result["errors"] == 0 # Verify products were created products = ( db.query(MarketplaceProduct) .filter( MarketplaceProduct.marketplace_product_id.in_( ["TRANS_TEST_001", "TRANS_TEST_002"] ) ) .all() ) assert len(products) == 2 # Verify translations were created for product in products: assert len(product.translations) == 1 translation = product.translations[0] assert translation.language == "en" assert translation.title is not None assert translation.source_file == "test.csv" # Verify get_title method works product1 = next( p for p in products if p.marketplace_product_id == "TRANS_TEST_001" ) assert product1.get_title("en") == "Product One" assert product1.get_description("en") == "Description One" # Clean up for p in products: db.delete(p) db.commit() @pytest.mark.asyncio async def test_process_batch_updates_existing_translations(self, db): """Test that batch processing updates existing translation records""" # Clean up any existing test data existing = ( db.query(MarketplaceProduct) .filter(MarketplaceProduct.marketplace_product_id == "UPDATE_TEST_001") .first() ) if existing: db.delete(existing) db.commit() # Create initial product with translation batch_df = pd.DataFrame( { "marketplace_product_id": ["UPDATE_TEST_001"], "title": ["Original Title"], "description": ["Original Description"], "price": ["19.99 EUR"], } ) await self.processor._process_marketplace_batch( batch_df, "TestMarket", "TestVendor", db, 1, language="en" ) # Update with new data update_df = pd.DataFrame( { "marketplace_product_id": ["UPDATE_TEST_001"], "title": ["Updated Title"], "description": ["Updated Description"], "price": ["24.99 EUR"], } ) result = await self.processor._process_marketplace_batch( update_df, "TestMarket", "TestVendor", db, 1, language="en" ) assert result["updated"] == 1 assert result["imported"] == 0 # Verify translation was updated product = ( db.query(MarketplaceProduct) .filter(MarketplaceProduct.marketplace_product_id == "UPDATE_TEST_001") .first() ) assert product.get_title("en") == "Updated Title" assert product.get_description("en") == "Updated Description" # Clean up db.delete(product) db.commit() @pytest.mark.asyncio async def test_process_batch_multi_language(self, db): """Test importing same product in multiple languages""" # Clean up existing = ( db.query(MarketplaceProduct) .filter(MarketplaceProduct.marketplace_product_id == "MULTI_LANG_001") .first() ) if existing: db.delete(existing) db.commit() # Import English version en_df = pd.DataFrame( { "marketplace_product_id": ["MULTI_LANG_001"], "title": ["English Title"], "description": ["English Description"], "price": ["19.99 EUR"], "brand": ["TestBrand"], } ) await self.processor._process_marketplace_batch( en_df, "TestMarket", "TestVendor", db, 1, language="en" ) # Import French version (same product, different language) fr_df = pd.DataFrame( { "marketplace_product_id": ["MULTI_LANG_001"], "title": ["Titre Français"], "description": ["Description Française"], "price": ["19.99 EUR"], "brand": ["TestBrand"], } ) result = await self.processor._process_marketplace_batch( fr_df, "TestMarket", "TestVendor", db, 1, language="fr" ) assert result["updated"] == 1 # Product existed, so it's an update # Verify both translations exist product = ( db.query(MarketplaceProduct) .filter(MarketplaceProduct.marketplace_product_id == "MULTI_LANG_001") .first() ) assert len(product.translations) == 2 # Verify each language assert product.get_title("en") == "English Title" assert product.get_title("fr") == "Titre Français" assert product.get_description("en") == "English Description" assert product.get_description("fr") == "Description Française" # Test fallback to English for unknown language assert product.get_title("de") == "English Title" # Clean up db.delete(product) db.commit() @pytest.mark.asyncio async def test_process_batch_skips_missing_title(self, db): """Test that rows without title are skipped""" batch_df = pd.DataFrame( { "marketplace_product_id": ["NO_TITLE_001", "HAS_TITLE_001"], "title": [None, "Has Title"], "price": ["19.99", "29.99"], } ) result = await self.processor._process_marketplace_batch( batch_df, "TestMarket", "TestVendor", db, 1, language="en" ) assert result["imported"] == 1 assert result["errors"] == 1 # Missing title is an error # Clean up product = ( db.query(MarketplaceProduct) .filter(MarketplaceProduct.marketplace_product_id == "HAS_TITLE_001") .first() ) if product: db.delete(product) db.commit() @pytest.mark.asyncio async def test_process_batch_skips_missing_product_id(self, db): """Test that rows without marketplace_product_id are skipped""" batch_df = pd.DataFrame( { "marketplace_product_id": [None, "HAS_ID_001"], "title": ["No ID Product", "Has ID Product"], "price": ["19.99", "29.99"], } ) result = await self.processor._process_marketplace_batch( batch_df, "TestMarket", "TestVendor", db, 1, language="en" ) assert result["imported"] == 1 assert result["errors"] == 1 # Missing ID is an error # Clean up product = ( db.query(MarketplaceProduct) .filter(MarketplaceProduct.marketplace_product_id == "HAS_ID_001") .first() ) if product: db.delete(product) db.commit()