youtube-summarizer/backend/tests/unit/test_enhanced_cache_manager.py

402 lines
15 KiB
Python

"""Unit tests for enhanced cache manager."""
import pytest
import pytest_asyncio
import asyncio
import json
from datetime import datetime, timedelta
from unittest.mock import Mock, AsyncMock, patch, MagicMock
from backend.services.enhanced_cache_manager import (
EnhancedCacheManager,
CacheConfig,
CachePolicy,
CacheMetrics
)
@pytest.fixture
def cache_config():
"""Create test cache configuration."""
return CacheConfig(
transcript_ttl_hours=1,
summary_ttl_hours=1,
memory_max_size_mb=10,
redis_url=None, # Use memory cache for tests
enable_warming=False,
enable_analytics=True
)
@pytest_asyncio.fixture
async def cache_manager(cache_config):
"""Create cache manager instance."""
manager = EnhancedCacheManager(cache_config)
await manager.initialize()
yield manager
await manager.close()
@pytest_asyncio.fixture
async def cache_manager_with_redis(cache_config):
"""Create cache manager with mocked Redis."""
cache_config.redis_url = "redis://localhost:6379/0"
manager = EnhancedCacheManager(cache_config)
# Mock Redis client
mock_redis = AsyncMock()
mock_redis.ping = AsyncMock(return_value=True)
mock_redis.get = AsyncMock(return_value=None)
mock_redis.setex = AsyncMock(return_value=True)
mock_redis.delete = AsyncMock(return_value=1)
mock_redis.info = AsyncMock(return_value={'used_memory': 1024 * 1024 * 5}) # 5MB
mock_redis.scan = AsyncMock(return_value=(0, []))
mock_redis.flushdb = AsyncMock(return_value=True)
with patch('backend.services.enhanced_cache_manager.aioredis.from_url', AsyncMock(return_value=mock_redis)):
await manager.initialize()
manager.redis_client = mock_redis
yield manager
await manager.close()
class TestCacheKeyGeneration:
"""Test cache key generation methods."""
def test_transcript_key_generation(self, cache_config):
"""Test transcript cache key generation."""
manager = EnhancedCacheManager(cache_config)
key = manager._generate_transcript_key("abc123", "en")
assert key == "yt:transcript:abc123:en"
key = manager._generate_transcript_key("xyz789", "es")
assert key == "yt:transcript:xyz789:es"
def test_summary_key_generation(self, cache_config):
"""Test summary cache key generation."""
manager = EnhancedCacheManager(cache_config)
key = manager._generate_summary_key("hash123", "confighash456")
assert key == "yt:summary:hash123:confighash456"
def test_content_hash_generation(self, cache_config):
"""Test content hash generation."""
manager = EnhancedCacheManager(cache_config)
hash1 = manager.generate_content_hash("test content")
hash2 = manager.generate_content_hash("test content")
hash3 = manager.generate_content_hash("different content")
assert hash1 == hash2 # Same content produces same hash
assert hash1 != hash3 # Different content produces different hash
assert len(hash1) == 16 # Hash is truncated to 16 chars
def test_config_hash_generation(self, cache_config):
"""Test configuration hash generation."""
manager = EnhancedCacheManager(cache_config)
config1 = {"model": "gpt-4", "temperature": 0.7}
config2 = {"temperature": 0.7, "model": "gpt-4"} # Different order
config3 = {"model": "gpt-3.5", "temperature": 0.7}
hash1 = manager.generate_config_hash(config1)
hash2 = manager.generate_config_hash(config2)
hash3 = manager.generate_config_hash(config3)
assert hash1 == hash2 # Order doesn't matter
assert hash1 != hash3 # Different values produce different hash
class TestTranscriptCaching:
"""Test transcript caching functionality."""
@pytest.mark.asyncio
async def test_cache_and_retrieve_transcript(self, cache_manager):
"""Test caching and retrieving a transcript."""
video_id = "test123"
language = "en"
transcript_data = {
"transcript": "This is a test transcript",
"metadata": {"duration": 300}
}
# Cache transcript
success = await cache_manager.cache_transcript(video_id, language, transcript_data)
assert success is True
# Retrieve transcript
cached = await cache_manager.get_cached_transcript(video_id, language)
assert cached is not None
assert cached["transcript"] == transcript_data["transcript"]
assert cached["metadata"] == transcript_data["metadata"]
@pytest.mark.asyncio
async def test_transcript_cache_miss(self, cache_manager):
"""Test cache miss for non-existent transcript."""
cached = await cache_manager.get_cached_transcript("nonexistent", "en")
assert cached is None
assert cache_manager.metrics.misses == 1
assert cache_manager.metrics.hits == 0
@pytest.mark.asyncio
async def test_transcript_cache_expiration(self, cache_manager):
"""Test transcript cache expiration."""
video_id = "test123"
transcript_data = {"transcript": "Test"}
# Cache with very short TTL
cache_manager.config.transcript_ttl_hours = 0.0001 # Very short TTL
await cache_manager.cache_transcript(video_id, "en", transcript_data)
# Wait for expiration
await asyncio.sleep(0.5)
# Should return None due to expiration
cached = await cache_manager.get_cached_transcript(video_id, "en")
assert cached is None
@pytest.mark.asyncio
async def test_transcript_caching_with_redis(self, cache_manager_with_redis):
"""Test transcript caching with Redis backend."""
video_id = "test123"
transcript_data = {"transcript": "Test with Redis"}
# Mock Redis get to return cached data
cached_data = json.dumps(transcript_data)
cache_manager_with_redis.redis_client.get = AsyncMock(return_value=cached_data)
# Retrieve should hit Redis
cached = await cache_manager_with_redis.get_cached_transcript(video_id, "en")
assert cached == transcript_data
assert cache_manager_with_redis.metrics.hits == 1
# Verify Redis was called
cache_manager_with_redis.redis_client.get.assert_called_once()
class TestSummaryCaching:
"""Test summary caching functionality."""
@pytest.mark.asyncio
async def test_cache_and_retrieve_summary(self, cache_manager):
"""Test caching and retrieving a summary."""
transcript_hash = "hash123"
config_hash = "confighash456"
summary_data = {
"summary": "This is a test summary",
"key_points": ["point1", "point2"],
"model": "claude-3-5-haiku-20241022"
}
# Cache summary
success = await cache_manager.cache_summary(transcript_hash, config_hash, summary_data)
assert success is True
# Retrieve summary
cached = await cache_manager.get_cached_summary(transcript_hash, config_hash)
assert cached is not None
assert cached["summary"] == summary_data["summary"]
assert cached["key_points"] == summary_data["key_points"]
assert "_cache_metadata" in cached
@pytest.mark.asyncio
async def test_summary_cache_validation(self, cache_manager):
"""Test summary cache validation based on version."""
transcript_hash = "hash123"
config_hash = "config456"
# Create summary with outdated cache version
summary_data = {
"summary": "Test summary",
"_cache_metadata": {
"cache_version": "0.5", # Outdated version
"cached_at": datetime.utcnow().isoformat()
}
}
# Manually insert into memory cache
cache_key = cache_manager._generate_summary_key(transcript_hash, config_hash)
cache_manager._memory_cache[cache_key] = {
"data": summary_data,
"expires_at": (datetime.utcnow() + timedelta(hours=1)).isoformat()
}
# Should return None due to version mismatch
cached = await cache_manager.get_cached_summary(transcript_hash, config_hash)
assert cached is None
class TestCacheMetrics:
"""Test cache metrics and analytics."""
def test_metrics_initialization(self):
"""Test metrics are properly initialized."""
metrics = CacheMetrics()
assert metrics.hits == 0
assert metrics.misses == 0
assert metrics.hit_rate == 0.0
assert metrics.total_operations == 0
def test_hit_rate_calculation(self):
"""Test hit rate calculation."""
metrics = CacheMetrics(hits=75, misses=25)
assert metrics.hit_rate == 0.75
metrics = CacheMetrics(hits=0, misses=100)
assert metrics.hit_rate == 0.0
metrics = CacheMetrics(hits=100, misses=0)
assert metrics.hit_rate == 1.0
@pytest.mark.asyncio
async def test_cache_analytics(self, cache_manager):
"""Test cache analytics generation."""
# Generate some cache activity
await cache_manager.cache_transcript("video1", "en", {"transcript": "test"})
await cache_manager.get_cached_transcript("video1", "en") # Hit
await cache_manager.get_cached_transcript("video2", "en") # Miss
analytics = await cache_manager.get_cache_analytics()
assert "performance_metrics" in analytics
assert analytics["performance_metrics"]["hits"] == 1
assert analytics["performance_metrics"]["misses"] == 1
assert analytics["performance_metrics"]["write_operations"] == 1
assert analytics["performance_metrics"]["hit_rate"] == 0.5
assert "memory_cache_usage" in analytics
assert analytics["memory_cache_usage"]["entries"] == 1
assert "configuration" in analytics
assert analytics["configuration"]["using_redis"] is False
class TestCacheInvalidation:
"""Test cache invalidation functionality."""
@pytest.mark.asyncio
async def test_invalidate_all_cache(self, cache_manager):
"""Test invalidating all cache entries."""
# Add some cache entries
await cache_manager.cache_transcript("video1", "en", {"transcript": "test1"})
await cache_manager.cache_transcript("video2", "es", {"transcript": "test2"})
# Invalidate all
count = await cache_manager.invalidate_cache()
assert count == 2
assert len(cache_manager._memory_cache) == 0
@pytest.mark.asyncio
async def test_invalidate_pattern_match(self, cache_manager):
"""Test invalidating cache entries by pattern."""
# Add cache entries with different patterns
cache_manager._memory_cache["yt:transcript:video1:en"] = {"data": {}, "expires_at": "2025-01-01"}
cache_manager._memory_cache["yt:transcript:video2:en"] = {"data": {}, "expires_at": "2025-01-01"}
cache_manager._memory_cache["yt:summary:hash1:config1"] = {"data": {}, "expires_at": "2025-01-01"}
# Invalidate only transcript entries
count = await cache_manager.invalidate_cache("transcript")
assert count == 2
assert len(cache_manager._memory_cache) == 1
assert "yt:summary:hash1:config1" in cache_manager._memory_cache
class TestCacheWritePolicies:
"""Test different cache write policies."""
@pytest.mark.asyncio
async def test_write_through_policy(self, cache_manager):
"""Test WRITE_THROUGH policy."""
video_id = "test123"
transcript_data = {"transcript": "test"}
success = await cache_manager.cache_transcript(
video_id, "en", transcript_data,
policy=CachePolicy.WRITE_THROUGH
)
assert success is True
# Data should be in cache
cached = await cache_manager.get_cached_transcript(video_id, "en")
assert cached is not None
@pytest.mark.asyncio
async def test_write_back_policy(self, cache_manager):
"""Test WRITE_BACK policy."""
video_id = "test456"
transcript_data = {"transcript": "test"}
success = await cache_manager.cache_transcript(
video_id, "en", transcript_data,
policy=CachePolicy.WRITE_BACK
)
assert success is True
# Data should still be in memory cache
cached = await cache_manager.get_cached_transcript(video_id, "en")
assert cached is not None
class TestBackgroundTasks:
"""Test background tasks functionality."""
@pytest.mark.asyncio
async def test_memory_cache_cleanup(self, cache_manager):
"""Test expired entry cleanup from memory cache."""
# Add expired entry
cache_manager._memory_cache["test_key"] = {
"data": {},
"expires_at": (datetime.utcnow() - timedelta(hours=1)).isoformat()
}
# Add valid entry
cache_manager._memory_cache["valid_key"] = {
"data": {},
"expires_at": (datetime.utcnow() + timedelta(hours=1)).isoformat()
}
# Run cleanup
await cache_manager._cleanup_memory_cache()
assert "test_key" not in cache_manager._memory_cache
assert "valid_key" in cache_manager._memory_cache
@pytest.mark.asyncio
async def test_background_task_lifecycle(self, cache_config):
"""Test starting and stopping background tasks."""
manager = EnhancedCacheManager(cache_config)
# Initialize starts background tasks
await manager.initialize()
assert manager._cleanup_task is not None
assert not manager._cleanup_task.done()
# Close stops background tasks
await manager.close()
assert manager._cleanup_task.cancelled()
class TestCompatibilityMethods:
"""Test compatibility with existing CacheManager."""
@pytest.mark.asyncio
async def test_cache_pipeline_result(self, cache_manager):
"""Test caching pipeline result (compatibility method)."""
job_id = "job123"
result = {"status": "completed", "summary": "test"}
success = await cache_manager.cache_pipeline_result(job_id, result)
assert success is True
cached = await cache_manager.get_cached_pipeline_result(job_id)
assert cached == result
@pytest.mark.asyncio
async def test_get_cache_stats(self, cache_manager):
"""Test getting cache stats (compatibility method)."""
stats = await cache_manager.get_cache_stats()
assert "performance_metrics" in stats
assert "configuration" in stats