"""Unit tests for enhanced cache manager.""" import pytest import pytest_asyncio import asyncio import json from datetime import datetime, timedelta from unittest.mock import Mock, AsyncMock, patch, MagicMock from backend.services.enhanced_cache_manager import ( EnhancedCacheManager, CacheConfig, CachePolicy, CacheMetrics ) @pytest.fixture def cache_config(): """Create test cache configuration.""" return CacheConfig( transcript_ttl_hours=1, summary_ttl_hours=1, memory_max_size_mb=10, redis_url=None, # Use memory cache for tests enable_warming=False, enable_analytics=True ) @pytest_asyncio.fixture async def cache_manager(cache_config): """Create cache manager instance.""" manager = EnhancedCacheManager(cache_config) await manager.initialize() yield manager await manager.close() @pytest_asyncio.fixture async def cache_manager_with_redis(cache_config): """Create cache manager with mocked Redis.""" cache_config.redis_url = "redis://localhost:6379/0" manager = EnhancedCacheManager(cache_config) # Mock Redis client mock_redis = AsyncMock() mock_redis.ping = AsyncMock(return_value=True) mock_redis.get = AsyncMock(return_value=None) mock_redis.setex = AsyncMock(return_value=True) mock_redis.delete = AsyncMock(return_value=1) mock_redis.info = AsyncMock(return_value={'used_memory': 1024 * 1024 * 5}) # 5MB mock_redis.scan = AsyncMock(return_value=(0, [])) mock_redis.flushdb = AsyncMock(return_value=True) with patch('backend.services.enhanced_cache_manager.aioredis.from_url', AsyncMock(return_value=mock_redis)): await manager.initialize() manager.redis_client = mock_redis yield manager await manager.close() class TestCacheKeyGeneration: """Test cache key generation methods.""" def test_transcript_key_generation(self, cache_config): """Test transcript cache key generation.""" manager = EnhancedCacheManager(cache_config) key = manager._generate_transcript_key("abc123", "en") assert key == "yt:transcript:abc123:en" key = manager._generate_transcript_key("xyz789", "es") assert key == "yt:transcript:xyz789:es" def test_summary_key_generation(self, cache_config): """Test summary cache key generation.""" manager = EnhancedCacheManager(cache_config) key = manager._generate_summary_key("hash123", "confighash456") assert key == "yt:summary:hash123:confighash456" def test_content_hash_generation(self, cache_config): """Test content hash generation.""" manager = EnhancedCacheManager(cache_config) hash1 = manager.generate_content_hash("test content") hash2 = manager.generate_content_hash("test content") hash3 = manager.generate_content_hash("different content") assert hash1 == hash2 # Same content produces same hash assert hash1 != hash3 # Different content produces different hash assert len(hash1) == 16 # Hash is truncated to 16 chars def test_config_hash_generation(self, cache_config): """Test configuration hash generation.""" manager = EnhancedCacheManager(cache_config) config1 = {"model": "gpt-4", "temperature": 0.7} config2 = {"temperature": 0.7, "model": "gpt-4"} # Different order config3 = {"model": "gpt-3.5", "temperature": 0.7} hash1 = manager.generate_config_hash(config1) hash2 = manager.generate_config_hash(config2) hash3 = manager.generate_config_hash(config3) assert hash1 == hash2 # Order doesn't matter assert hash1 != hash3 # Different values produce different hash class TestTranscriptCaching: """Test transcript caching functionality.""" @pytest.mark.asyncio async def test_cache_and_retrieve_transcript(self, cache_manager): """Test caching and retrieving a transcript.""" video_id = "test123" language = "en" transcript_data = { "transcript": "This is a test transcript", "metadata": {"duration": 300} } # Cache transcript success = await cache_manager.cache_transcript(video_id, language, transcript_data) assert success is True # Retrieve transcript cached = await cache_manager.get_cached_transcript(video_id, language) assert cached is not None assert cached["transcript"] == transcript_data["transcript"] assert cached["metadata"] == transcript_data["metadata"] @pytest.mark.asyncio async def test_transcript_cache_miss(self, cache_manager): """Test cache miss for non-existent transcript.""" cached = await cache_manager.get_cached_transcript("nonexistent", "en") assert cached is None assert cache_manager.metrics.misses == 1 assert cache_manager.metrics.hits == 0 @pytest.mark.asyncio async def test_transcript_cache_expiration(self, cache_manager): """Test transcript cache expiration.""" video_id = "test123" transcript_data = {"transcript": "Test"} # Cache with very short TTL cache_manager.config.transcript_ttl_hours = 0.0001 # Very short TTL await cache_manager.cache_transcript(video_id, "en", transcript_data) # Wait for expiration await asyncio.sleep(0.5) # Should return None due to expiration cached = await cache_manager.get_cached_transcript(video_id, "en") assert cached is None @pytest.mark.asyncio async def test_transcript_caching_with_redis(self, cache_manager_with_redis): """Test transcript caching with Redis backend.""" video_id = "test123" transcript_data = {"transcript": "Test with Redis"} # Mock Redis get to return cached data cached_data = json.dumps(transcript_data) cache_manager_with_redis.redis_client.get = AsyncMock(return_value=cached_data) # Retrieve should hit Redis cached = await cache_manager_with_redis.get_cached_transcript(video_id, "en") assert cached == transcript_data assert cache_manager_with_redis.metrics.hits == 1 # Verify Redis was called cache_manager_with_redis.redis_client.get.assert_called_once() class TestSummaryCaching: """Test summary caching functionality.""" @pytest.mark.asyncio async def test_cache_and_retrieve_summary(self, cache_manager): """Test caching and retrieving a summary.""" transcript_hash = "hash123" config_hash = "confighash456" summary_data = { "summary": "This is a test summary", "key_points": ["point1", "point2"], "model": "claude-3-5-haiku-20241022" } # Cache summary success = await cache_manager.cache_summary(transcript_hash, config_hash, summary_data) assert success is True # Retrieve summary cached = await cache_manager.get_cached_summary(transcript_hash, config_hash) assert cached is not None assert cached["summary"] == summary_data["summary"] assert cached["key_points"] == summary_data["key_points"] assert "_cache_metadata" in cached @pytest.mark.asyncio async def test_summary_cache_validation(self, cache_manager): """Test summary cache validation based on version.""" transcript_hash = "hash123" config_hash = "config456" # Create summary with outdated cache version summary_data = { "summary": "Test summary", "_cache_metadata": { "cache_version": "0.5", # Outdated version "cached_at": datetime.utcnow().isoformat() } } # Manually insert into memory cache cache_key = cache_manager._generate_summary_key(transcript_hash, config_hash) cache_manager._memory_cache[cache_key] = { "data": summary_data, "expires_at": (datetime.utcnow() + timedelta(hours=1)).isoformat() } # Should return None due to version mismatch cached = await cache_manager.get_cached_summary(transcript_hash, config_hash) assert cached is None class TestCacheMetrics: """Test cache metrics and analytics.""" def test_metrics_initialization(self): """Test metrics are properly initialized.""" metrics = CacheMetrics() assert metrics.hits == 0 assert metrics.misses == 0 assert metrics.hit_rate == 0.0 assert metrics.total_operations == 0 def test_hit_rate_calculation(self): """Test hit rate calculation.""" metrics = CacheMetrics(hits=75, misses=25) assert metrics.hit_rate == 0.75 metrics = CacheMetrics(hits=0, misses=100) assert metrics.hit_rate == 0.0 metrics = CacheMetrics(hits=100, misses=0) assert metrics.hit_rate == 1.0 @pytest.mark.asyncio async def test_cache_analytics(self, cache_manager): """Test cache analytics generation.""" # Generate some cache activity await cache_manager.cache_transcript("video1", "en", {"transcript": "test"}) await cache_manager.get_cached_transcript("video1", "en") # Hit await cache_manager.get_cached_transcript("video2", "en") # Miss analytics = await cache_manager.get_cache_analytics() assert "performance_metrics" in analytics assert analytics["performance_metrics"]["hits"] == 1 assert analytics["performance_metrics"]["misses"] == 1 assert analytics["performance_metrics"]["write_operations"] == 1 assert analytics["performance_metrics"]["hit_rate"] == 0.5 assert "memory_cache_usage" in analytics assert analytics["memory_cache_usage"]["entries"] == 1 assert "configuration" in analytics assert analytics["configuration"]["using_redis"] is False class TestCacheInvalidation: """Test cache invalidation functionality.""" @pytest.mark.asyncio async def test_invalidate_all_cache(self, cache_manager): """Test invalidating all cache entries.""" # Add some cache entries await cache_manager.cache_transcript("video1", "en", {"transcript": "test1"}) await cache_manager.cache_transcript("video2", "es", {"transcript": "test2"}) # Invalidate all count = await cache_manager.invalidate_cache() assert count == 2 assert len(cache_manager._memory_cache) == 0 @pytest.mark.asyncio async def test_invalidate_pattern_match(self, cache_manager): """Test invalidating cache entries by pattern.""" # Add cache entries with different patterns cache_manager._memory_cache["yt:transcript:video1:en"] = {"data": {}, "expires_at": "2025-01-01"} cache_manager._memory_cache["yt:transcript:video2:en"] = {"data": {}, "expires_at": "2025-01-01"} cache_manager._memory_cache["yt:summary:hash1:config1"] = {"data": {}, "expires_at": "2025-01-01"} # Invalidate only transcript entries count = await cache_manager.invalidate_cache("transcript") assert count == 2 assert len(cache_manager._memory_cache) == 1 assert "yt:summary:hash1:config1" in cache_manager._memory_cache class TestCacheWritePolicies: """Test different cache write policies.""" @pytest.mark.asyncio async def test_write_through_policy(self, cache_manager): """Test WRITE_THROUGH policy.""" video_id = "test123" transcript_data = {"transcript": "test"} success = await cache_manager.cache_transcript( video_id, "en", transcript_data, policy=CachePolicy.WRITE_THROUGH ) assert success is True # Data should be in cache cached = await cache_manager.get_cached_transcript(video_id, "en") assert cached is not None @pytest.mark.asyncio async def test_write_back_policy(self, cache_manager): """Test WRITE_BACK policy.""" video_id = "test456" transcript_data = {"transcript": "test"} success = await cache_manager.cache_transcript( video_id, "en", transcript_data, policy=CachePolicy.WRITE_BACK ) assert success is True # Data should still be in memory cache cached = await cache_manager.get_cached_transcript(video_id, "en") assert cached is not None class TestBackgroundTasks: """Test background tasks functionality.""" @pytest.mark.asyncio async def test_memory_cache_cleanup(self, cache_manager): """Test expired entry cleanup from memory cache.""" # Add expired entry cache_manager._memory_cache["test_key"] = { "data": {}, "expires_at": (datetime.utcnow() - timedelta(hours=1)).isoformat() } # Add valid entry cache_manager._memory_cache["valid_key"] = { "data": {}, "expires_at": (datetime.utcnow() + timedelta(hours=1)).isoformat() } # Run cleanup await cache_manager._cleanup_memory_cache() assert "test_key" not in cache_manager._memory_cache assert "valid_key" in cache_manager._memory_cache @pytest.mark.asyncio async def test_background_task_lifecycle(self, cache_config): """Test starting and stopping background tasks.""" manager = EnhancedCacheManager(cache_config) # Initialize starts background tasks await manager.initialize() assert manager._cleanup_task is not None assert not manager._cleanup_task.done() # Close stops background tasks await manager.close() assert manager._cleanup_task.cancelled() class TestCompatibilityMethods: """Test compatibility with existing CacheManager.""" @pytest.mark.asyncio async def test_cache_pipeline_result(self, cache_manager): """Test caching pipeline result (compatibility method).""" job_id = "job123" result = {"status": "completed", "summary": "test"} success = await cache_manager.cache_pipeline_result(job_id, result) assert success is True cached = await cache_manager.get_cached_pipeline_result(job_id) assert cached == result @pytest.mark.asyncio async def test_get_cache_stats(self, cache_manager): """Test getting cache stats (compatibility method).""" stats = await cache_manager.get_cache_stats() assert "performance_metrics" in stats assert "configuration" in stats