# tests/test_csv_processor.py from unittest.mock import Mock, patch import pandas as pd import pytest import requests import requests.exceptions from app.utils.csv_processor import CSVProcessor @pytest.mark.unit class TestCSVProcessor: def setup_method(self): self.processor = CSVProcessor() @patch("requests.get") def test_download_csv_encoding_fallback(self, mock_get): """Test CSV download with encoding fallback""" # Create content with special characters that would fail UTF-8 if not properly encoded special_content = ( "marketplace_product_id,title,price\nTEST001,Café MarketplaceProduct,10.99" ) mock_response = Mock() mock_response.status_code = 200 # Use latin-1 encoding which your method should try mock_response.content = special_content.encode("latin-1") mock_response.raise_for_status.return_value = None mock_get.return_value = mock_response csv_content = self.processor.download_csv("http://example.com/test.csv") mock_get.assert_called_once_with("http://example.com/test.csv", timeout=30) assert isinstance(csv_content, str) assert "Café MarketplaceProduct" in csv_content @patch("requests.get") def test_download_csv_encoding_ignore_fallback(self, mock_get): """Test CSV download falls back to UTF-8 with error ignoring""" # Create problematic bytes that would fail most encoding attempts mock_response = Mock() mock_response.status_code = 200 # Create bytes that will fail most encodings mock_response.content = b"marketplace_product_id,title,price\nTEST001,\xff\xfe MarketplaceProduct,10.99" mock_response.raise_for_status.return_value = None mock_get.return_value = mock_response csv_content = self.processor.download_csv("http://example.com/test.csv") mock_get.assert_called_once_with("http://example.com/test.csv", timeout=30) assert isinstance(csv_content, str) # Should still contain basic content even with ignored errors assert "marketplace_product_id,title,price" in csv_content assert "TEST001" in csv_content @patch("requests.get") def test_download_csv_request_exception(self, mock_get): """Test CSV download with request exception""" mock_get.side_effect = requests.exceptions.RequestException("Connection error") with pytest.raises(requests.exceptions.RequestException): self.processor.download_csv("http://example.com/test.csv") @patch("requests.get") def test_download_csv_http_error(self, mock_get): """Test CSV download with HTTP error""" mock_response = Mock() mock_response.status_code = 404 mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError( "404 Not Found" ) mock_get.return_value = mock_response with pytest.raises(requests.exceptions.HTTPError): self.processor.download_csv("http://example.com/nonexistent.csv") @patch("requests.get") def test_download_csv_failure(self, mock_get): """Test CSV download failure""" # Mock failed HTTP response - need to make raise_for_status() raise an exception mock_response = Mock() mock_response.status_code = 404 mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError( "404 Not Found" ) mock_get.return_value = mock_response with pytest.raises(requests.exceptions.HTTPError): self.processor.download_csv("http://example.com/nonexistent.csv") def test_parse_csv_content(self): """Test CSV content parsing""" csv_content = """marketplace_product_id,title,price,marketplace TEST001,Test MarketplaceProduct 1,10.99,TestMarket TEST002,Test MarketplaceProduct 2,15.99,TestMarket""" df = self.processor.parse_csv(csv_content) assert len(df) == 2 assert "marketplace_product_id" in df.columns assert df.iloc[0]["marketplace_product_id"] == "TEST001" assert df.iloc[1]["price"] == 15.99 @pytest.mark.asyncio async def test_process_marketplace_csv_from_url(self, db): """Test complete marketplace CSV processing""" with ( patch.object(self.processor, "download_csv") as mock_download, patch.object(self.processor, "parse_csv") as mock_parse, ): # Mock successful download and parsing mock_download.return_value = "csv_content" mock_df = pd.DataFrame( { "marketplace_product_id": ["TEST001", "TEST002"], "title": ["MarketplaceProduct 1", "MarketplaceProduct 2"], "price": ["10.99", "15.99"], "marketplace": ["TestMarket", "TestMarket"], "name": ["TestVendor", "TestVendor"], } ) mock_parse.return_value = mock_df result = await self.processor.process_marketplace_csv_from_url( "http://example.com/test.csv", "TestMarket", "TestVendor", 1000, db ) assert "imported" in result assert "updated" in result assert "total_processed" in result