Tests restructuring

This commit is contained in:
2025-09-19 21:23:57 +02:00
parent d0924f90c4
commit 366093bbc6
70 changed files with 212 additions and 1957 deletions

View File

@@ -0,0 +1,3 @@
# tests/unit/utils/__init__.py
"""Utility function unit tests."""

View File

@@ -0,0 +1,126 @@
# tests/test_csv_processor.py
from unittest.mock import Mock, patch
import pandas as pd
import pytest
import requests
import requests.exceptions
from utils.csv_processor import CSVProcessor
@pytest.mark.unit
class TestCSVProcessor:
def setup_method(self):
self.processor = CSVProcessor()
@patch("requests.get")
def test_download_csv_encoding_fallback(self, mock_get):
"""Test CSV download with encoding fallback"""
# Create content with special characters that would fail UTF-8 if not properly encoded
special_content = "product_id,title,price\nTEST001,Café Product,10.99"
mock_response = Mock()
mock_response.status_code = 200
# Use latin-1 encoding which your method should try
mock_response.content = special_content.encode("latin-1")
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response
csv_content = self.processor.download_csv("http://example.com/test.csv")
mock_get.assert_called_once_with("http://example.com/test.csv", timeout=30)
assert isinstance(csv_content, str)
assert "Café Product" in csv_content
@patch("requests.get")
def test_download_csv_encoding_ignore_fallback(self, mock_get):
"""Test CSV download falls back to UTF-8 with error ignoring"""
# Create problematic bytes that would fail most encoding attempts
mock_response = Mock()
mock_response.status_code = 200
# Create bytes that will fail most encodings
mock_response.content = (
b"product_id,title,price\nTEST001,\xff\xfe Product,10.99"
)
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response
csv_content = self.processor.download_csv("http://example.com/test.csv")
mock_get.assert_called_once_with("http://example.com/test.csv", timeout=30)
assert isinstance(csv_content, str)
# Should still contain basic content even with ignored errors
assert "product_id,title,price" in csv_content
assert "TEST001" in csv_content
@patch("requests.get")
def test_download_csv_request_exception(self, mock_get):
"""Test CSV download with request exception"""
mock_get.side_effect = requests.exceptions.RequestException("Connection error")
with pytest.raises(requests.exceptions.RequestException):
self.processor.download_csv("http://example.com/test.csv")
@patch("requests.get")
def test_download_csv_http_error(self, mock_get):
"""Test CSV download with HTTP error"""
mock_response = Mock()
mock_response.status_code = 404
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(
"404 Not Found"
)
mock_get.return_value = mock_response
with pytest.raises(requests.exceptions.HTTPError):
self.processor.download_csv("http://example.com/nonexistent.csv")
@patch("requests.get")
def test_download_csv_failure(self, mock_get):
"""Test CSV download failure"""
# Mock failed HTTP response
mock_response = Mock()
mock_response.status_code = 404
mock_get.return_value = mock_response
with pytest.raises(Exception):
self.processor._download_csv("http://example.com/nonexistent.csv")
def test_parse_csv_content(self):
"""Test CSV content parsing"""
csv_content = """product_id,title,price,marketplace
TEST001,Test Product 1,10.99,TestMarket
TEST002,Test Product 2,15.99,TestMarket"""
df = self.processor.parse_csv(csv_content)
assert len(df) == 2
assert "product_id" in df.columns
assert df.iloc[0]["product_id"] == "TEST001"
assert df.iloc[1]["price"] == 15.99
@pytest.mark.asyncio
async def test_process_marketplace_csv_from_url(self, db):
"""Test complete marketplace CSV processing"""
with patch.object(
self.processor, "download_csv"
) as mock_download, patch.object(self.processor, "parse_csv") as mock_parse:
# Mock successful download and parsing
mock_download.return_value = "csv_content"
mock_df = pd.DataFrame(
{
"product_id": ["TEST001", "TEST002"],
"title": ["Product 1", "Product 2"],
"price": ["10.99", "15.99"],
"marketplace": ["TestMarket", "TestMarket"],
"shop_name": ["TestShop", "TestShop"],
}
)
mock_parse.return_value = mock_df
result = await self.processor.process_marketplace_csv_from_url(
"http://example.com/test.csv", "TestMarket", "TestShop", 1000, db
)
assert "imported" in result
assert "updated" in result
assert "total_processed" in result

View File

@@ -0,0 +1,121 @@
# tests/unit/utils/test_data_processing.py
import pytest
from utils.data_processing import GTINProcessor, PriceProcessor
@pytest.mark.unit
class TestGTINProcessor:
def setup_method(self):
self.processor = GTINProcessor()
def test_normalize_valid_gtin(self):
"""Test GTIN normalization with valid inputs"""
# Test EAN-13
assert self.processor.normalize("1234567890123") == "1234567890123"
# Test UPC-A (12 digits)
assert self.processor.normalize("123456789012") == "123456789012"
# Test with decimal point
assert self.processor.normalize("123456789012.0") == "123456789012"
# Test EAN-8
assert self.processor.normalize("12345678") == "12345678"
def test_normalize_invalid_gtin(self):
"""Test GTIN normalization with invalid inputs"""
assert self.processor.normalize("") is None
assert self.processor.normalize(None) is None
assert self.processor.normalize("abc") is None
# Test short number (gets padded)
assert self.processor.normalize("123") == "0000000000123"
def test_normalize_gtin_with_formatting(self):
"""Test GTIN normalization with various formatting"""
# Test with spaces
assert self.processor.normalize("123 456 789 012") == "123456789012"
# Test with dashes
assert self.processor.normalize("123-456-789-012") == "123456789012"
# Test with mixed formatting
assert self.processor.normalize("123 456-789 012") == "123456789012"
def test_validate_gtin(self):
"""Test GTIN validation"""
assert self.processor.validate("1234567890123") is True
assert self.processor.validate("123456789012") is True
assert self.processor.validate("12345678") is True
assert self.processor.validate("123") is False
assert self.processor.validate("") is False
assert self.processor.validate(None) is False
def test_gtin_checksum_validation(self):
"""Test GTIN checksum validation if implemented"""
# This test would verify checksum calculation if your GTINProcessor implements it
# For now, we'll test the structure validation
assert self.processor.validate("1234567890123") is True
assert self.processor.validate("12345678901234") is True # 14 digits
assert self.processor.validate("123456789012345") is False # 15 digits
@pytest.mark.unit
class TestPriceProcessor:
def setup_method(self):
self.processor = PriceProcessor()
def test_parse_price_currency_eur(self):
"""Test EUR price parsing"""
price, currency = self.processor.parse_price_currency("8.26 EUR")
assert price == "8.26"
assert currency == "EUR"
# Test with euro symbol
price, currency = self.processor.parse_price_currency("8.26 €")
assert price == "8.26"
assert currency == "EUR"
def test_parse_price_currency_usd(self):
"""Test USD price parsing"""
price, currency = self.processor.parse_price_currency("$12.50")
assert price == "12.50"
assert currency == "USD"
price, currency = self.processor.parse_price_currency("12.50 USD")
assert price == "12.50"
assert currency == "USD"
def test_parse_price_currency_comma_decimal(self):
"""Test price parsing with comma as decimal separator"""
price, currency = self.processor.parse_price_currency("8,26 EUR")
assert price == "8.26"
assert currency == "EUR"
def test_parse_invalid_price(self):
"""Test invalid price parsing"""
price, currency = self.processor.parse_price_currency("")
assert price is None
assert currency is None
price, currency = self.processor.parse_price_currency(None)
assert price is None
assert currency is None
def test_parse_price_edge_cases(self):
"""Test edge cases in price parsing"""
# Test price without currency
price, currency = self.processor.parse_price_currency("15.99")
assert price == "15.99"
# currency might be None or default value
# Test currency before price
price, currency = self.processor.parse_price_currency("EUR 25.50")
assert price == "25.50"
assert currency == "EUR"
# Test with multiple decimal places
price, currency = self.processor.parse_price_currency("12.999 USD")
assert price == "12.999"
assert currency == "USD"

View File

@@ -0,0 +1,47 @@
# tests/test_data_validation.py
import pytest
from utils.data_processing import GTINProcessor, PriceProcessor
@pytest.mark.unit
class TestDataValidation:
def test_gtin_normalization_edge_cases(self):
"""Test GTIN normalization with edge cases"""
processor = GTINProcessor()
# Test with leading zeros
assert processor.normalize("000123456789") == "000123456789"
# Test with spaces
assert processor.normalize("123 456 789 012") == "123456789012"
# Test with dashes
assert processor.normalize("123-456-789-012") == "123456789012"
# Test very long numbers
long_number = "1234567890123456789"
normalized = processor.normalize(long_number)
assert len(normalized) <= 14 # Should be truncated
def test_price_parsing_edge_cases(self):
"""Test price parsing with edge cases"""
processor = PriceProcessor()
# Test with multiple decimal places
price, currency = processor.parse_price_currency("12.999 EUR")
assert price == "12.999"
# Test with no currency
price, currency = processor.parse_price_currency("15.50")
assert price == "15.50"
# Test with unusual formatting
price, currency = processor.parse_price_currency("EUR 25,50")
assert currency == "EUR"
assert price == "25.50" # Comma should be converted to dot
def test_input_sanitization(self):
"""Test input sanitization"""
# These tests would verify that inputs are properly sanitized
# to prevent SQL injection, XSS, etc.
pass # Implementation would depend on your sanitization logic