402 lines
15 KiB
Python
402 lines
15 KiB
Python
"""Unit tests for enhanced cache manager."""
|
|
|
|
import pytest
|
|
import pytest_asyncio
|
|
import asyncio
|
|
import json
|
|
from datetime import datetime, timedelta
|
|
from unittest.mock import Mock, AsyncMock, patch, MagicMock
|
|
|
|
from backend.services.enhanced_cache_manager import (
|
|
EnhancedCacheManager,
|
|
CacheConfig,
|
|
CachePolicy,
|
|
CacheMetrics
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def cache_config():
|
|
"""Create test cache configuration."""
|
|
return CacheConfig(
|
|
transcript_ttl_hours=1,
|
|
summary_ttl_hours=1,
|
|
memory_max_size_mb=10,
|
|
redis_url=None, # Use memory cache for tests
|
|
enable_warming=False,
|
|
enable_analytics=True
|
|
)
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def cache_manager(cache_config):
|
|
"""Create cache manager instance."""
|
|
manager = EnhancedCacheManager(cache_config)
|
|
await manager.initialize()
|
|
yield manager
|
|
await manager.close()
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def cache_manager_with_redis(cache_config):
|
|
"""Create cache manager with mocked Redis."""
|
|
cache_config.redis_url = "redis://localhost:6379/0"
|
|
manager = EnhancedCacheManager(cache_config)
|
|
|
|
# Mock Redis client
|
|
mock_redis = AsyncMock()
|
|
mock_redis.ping = AsyncMock(return_value=True)
|
|
mock_redis.get = AsyncMock(return_value=None)
|
|
mock_redis.setex = AsyncMock(return_value=True)
|
|
mock_redis.delete = AsyncMock(return_value=1)
|
|
mock_redis.info = AsyncMock(return_value={'used_memory': 1024 * 1024 * 5}) # 5MB
|
|
mock_redis.scan = AsyncMock(return_value=(0, []))
|
|
mock_redis.flushdb = AsyncMock(return_value=True)
|
|
|
|
with patch('backend.services.enhanced_cache_manager.aioredis.from_url', AsyncMock(return_value=mock_redis)):
|
|
await manager.initialize()
|
|
manager.redis_client = mock_redis
|
|
yield manager
|
|
|
|
await manager.close()
|
|
|
|
|
|
class TestCacheKeyGeneration:
|
|
"""Test cache key generation methods."""
|
|
|
|
def test_transcript_key_generation(self, cache_config):
|
|
"""Test transcript cache key generation."""
|
|
manager = EnhancedCacheManager(cache_config)
|
|
|
|
key = manager._generate_transcript_key("abc123", "en")
|
|
assert key == "yt:transcript:abc123:en"
|
|
|
|
key = manager._generate_transcript_key("xyz789", "es")
|
|
assert key == "yt:transcript:xyz789:es"
|
|
|
|
def test_summary_key_generation(self, cache_config):
|
|
"""Test summary cache key generation."""
|
|
manager = EnhancedCacheManager(cache_config)
|
|
|
|
key = manager._generate_summary_key("hash123", "confighash456")
|
|
assert key == "yt:summary:hash123:confighash456"
|
|
|
|
def test_content_hash_generation(self, cache_config):
|
|
"""Test content hash generation."""
|
|
manager = EnhancedCacheManager(cache_config)
|
|
|
|
hash1 = manager.generate_content_hash("test content")
|
|
hash2 = manager.generate_content_hash("test content")
|
|
hash3 = manager.generate_content_hash("different content")
|
|
|
|
assert hash1 == hash2 # Same content produces same hash
|
|
assert hash1 != hash3 # Different content produces different hash
|
|
assert len(hash1) == 16 # Hash is truncated to 16 chars
|
|
|
|
def test_config_hash_generation(self, cache_config):
|
|
"""Test configuration hash generation."""
|
|
manager = EnhancedCacheManager(cache_config)
|
|
|
|
config1 = {"model": "gpt-4", "temperature": 0.7}
|
|
config2 = {"temperature": 0.7, "model": "gpt-4"} # Different order
|
|
config3 = {"model": "gpt-3.5", "temperature": 0.7}
|
|
|
|
hash1 = manager.generate_config_hash(config1)
|
|
hash2 = manager.generate_config_hash(config2)
|
|
hash3 = manager.generate_config_hash(config3)
|
|
|
|
assert hash1 == hash2 # Order doesn't matter
|
|
assert hash1 != hash3 # Different values produce different hash
|
|
|
|
|
|
class TestTranscriptCaching:
|
|
"""Test transcript caching functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cache_and_retrieve_transcript(self, cache_manager):
|
|
"""Test caching and retrieving a transcript."""
|
|
video_id = "test123"
|
|
language = "en"
|
|
transcript_data = {
|
|
"transcript": "This is a test transcript",
|
|
"metadata": {"duration": 300}
|
|
}
|
|
|
|
# Cache transcript
|
|
success = await cache_manager.cache_transcript(video_id, language, transcript_data)
|
|
assert success is True
|
|
|
|
# Retrieve transcript
|
|
cached = await cache_manager.get_cached_transcript(video_id, language)
|
|
assert cached is not None
|
|
assert cached["transcript"] == transcript_data["transcript"]
|
|
assert cached["metadata"] == transcript_data["metadata"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_transcript_cache_miss(self, cache_manager):
|
|
"""Test cache miss for non-existent transcript."""
|
|
cached = await cache_manager.get_cached_transcript("nonexistent", "en")
|
|
assert cached is None
|
|
assert cache_manager.metrics.misses == 1
|
|
assert cache_manager.metrics.hits == 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_transcript_cache_expiration(self, cache_manager):
|
|
"""Test transcript cache expiration."""
|
|
video_id = "test123"
|
|
transcript_data = {"transcript": "Test"}
|
|
|
|
# Cache with very short TTL
|
|
cache_manager.config.transcript_ttl_hours = 0.0001 # Very short TTL
|
|
await cache_manager.cache_transcript(video_id, "en", transcript_data)
|
|
|
|
# Wait for expiration
|
|
await asyncio.sleep(0.5)
|
|
|
|
# Should return None due to expiration
|
|
cached = await cache_manager.get_cached_transcript(video_id, "en")
|
|
assert cached is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_transcript_caching_with_redis(self, cache_manager_with_redis):
|
|
"""Test transcript caching with Redis backend."""
|
|
video_id = "test123"
|
|
transcript_data = {"transcript": "Test with Redis"}
|
|
|
|
# Mock Redis get to return cached data
|
|
cached_data = json.dumps(transcript_data)
|
|
cache_manager_with_redis.redis_client.get = AsyncMock(return_value=cached_data)
|
|
|
|
# Retrieve should hit Redis
|
|
cached = await cache_manager_with_redis.get_cached_transcript(video_id, "en")
|
|
assert cached == transcript_data
|
|
assert cache_manager_with_redis.metrics.hits == 1
|
|
|
|
# Verify Redis was called
|
|
cache_manager_with_redis.redis_client.get.assert_called_once()
|
|
|
|
|
|
class TestSummaryCaching:
|
|
"""Test summary caching functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cache_and_retrieve_summary(self, cache_manager):
|
|
"""Test caching and retrieving a summary."""
|
|
transcript_hash = "hash123"
|
|
config_hash = "confighash456"
|
|
summary_data = {
|
|
"summary": "This is a test summary",
|
|
"key_points": ["point1", "point2"],
|
|
"model": "claude-3-5-haiku-20241022"
|
|
}
|
|
|
|
# Cache summary
|
|
success = await cache_manager.cache_summary(transcript_hash, config_hash, summary_data)
|
|
assert success is True
|
|
|
|
# Retrieve summary
|
|
cached = await cache_manager.get_cached_summary(transcript_hash, config_hash)
|
|
assert cached is not None
|
|
assert cached["summary"] == summary_data["summary"]
|
|
assert cached["key_points"] == summary_data["key_points"]
|
|
assert "_cache_metadata" in cached
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_summary_cache_validation(self, cache_manager):
|
|
"""Test summary cache validation based on version."""
|
|
transcript_hash = "hash123"
|
|
config_hash = "config456"
|
|
|
|
# Create summary with outdated cache version
|
|
summary_data = {
|
|
"summary": "Test summary",
|
|
"_cache_metadata": {
|
|
"cache_version": "0.5", # Outdated version
|
|
"cached_at": datetime.utcnow().isoformat()
|
|
}
|
|
}
|
|
|
|
# Manually insert into memory cache
|
|
cache_key = cache_manager._generate_summary_key(transcript_hash, config_hash)
|
|
cache_manager._memory_cache[cache_key] = {
|
|
"data": summary_data,
|
|
"expires_at": (datetime.utcnow() + timedelta(hours=1)).isoformat()
|
|
}
|
|
|
|
# Should return None due to version mismatch
|
|
cached = await cache_manager.get_cached_summary(transcript_hash, config_hash)
|
|
assert cached is None
|
|
|
|
|
|
class TestCacheMetrics:
|
|
"""Test cache metrics and analytics."""
|
|
|
|
def test_metrics_initialization(self):
|
|
"""Test metrics are properly initialized."""
|
|
metrics = CacheMetrics()
|
|
assert metrics.hits == 0
|
|
assert metrics.misses == 0
|
|
assert metrics.hit_rate == 0.0
|
|
assert metrics.total_operations == 0
|
|
|
|
def test_hit_rate_calculation(self):
|
|
"""Test hit rate calculation."""
|
|
metrics = CacheMetrics(hits=75, misses=25)
|
|
assert metrics.hit_rate == 0.75
|
|
|
|
metrics = CacheMetrics(hits=0, misses=100)
|
|
assert metrics.hit_rate == 0.0
|
|
|
|
metrics = CacheMetrics(hits=100, misses=0)
|
|
assert metrics.hit_rate == 1.0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cache_analytics(self, cache_manager):
|
|
"""Test cache analytics generation."""
|
|
# Generate some cache activity
|
|
await cache_manager.cache_transcript("video1", "en", {"transcript": "test"})
|
|
await cache_manager.get_cached_transcript("video1", "en") # Hit
|
|
await cache_manager.get_cached_transcript("video2", "en") # Miss
|
|
|
|
analytics = await cache_manager.get_cache_analytics()
|
|
|
|
assert "performance_metrics" in analytics
|
|
assert analytics["performance_metrics"]["hits"] == 1
|
|
assert analytics["performance_metrics"]["misses"] == 1
|
|
assert analytics["performance_metrics"]["write_operations"] == 1
|
|
assert analytics["performance_metrics"]["hit_rate"] == 0.5
|
|
|
|
assert "memory_cache_usage" in analytics
|
|
assert analytics["memory_cache_usage"]["entries"] == 1
|
|
|
|
assert "configuration" in analytics
|
|
assert analytics["configuration"]["using_redis"] is False
|
|
|
|
|
|
class TestCacheInvalidation:
|
|
"""Test cache invalidation functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_invalidate_all_cache(self, cache_manager):
|
|
"""Test invalidating all cache entries."""
|
|
# Add some cache entries
|
|
await cache_manager.cache_transcript("video1", "en", {"transcript": "test1"})
|
|
await cache_manager.cache_transcript("video2", "es", {"transcript": "test2"})
|
|
|
|
# Invalidate all
|
|
count = await cache_manager.invalidate_cache()
|
|
assert count == 2
|
|
assert len(cache_manager._memory_cache) == 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_invalidate_pattern_match(self, cache_manager):
|
|
"""Test invalidating cache entries by pattern."""
|
|
# Add cache entries with different patterns
|
|
cache_manager._memory_cache["yt:transcript:video1:en"] = {"data": {}, "expires_at": "2025-01-01"}
|
|
cache_manager._memory_cache["yt:transcript:video2:en"] = {"data": {}, "expires_at": "2025-01-01"}
|
|
cache_manager._memory_cache["yt:summary:hash1:config1"] = {"data": {}, "expires_at": "2025-01-01"}
|
|
|
|
# Invalidate only transcript entries
|
|
count = await cache_manager.invalidate_cache("transcript")
|
|
assert count == 2
|
|
assert len(cache_manager._memory_cache) == 1
|
|
assert "yt:summary:hash1:config1" in cache_manager._memory_cache
|
|
|
|
|
|
class TestCacheWritePolicies:
|
|
"""Test different cache write policies."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_write_through_policy(self, cache_manager):
|
|
"""Test WRITE_THROUGH policy."""
|
|
video_id = "test123"
|
|
transcript_data = {"transcript": "test"}
|
|
|
|
success = await cache_manager.cache_transcript(
|
|
video_id, "en", transcript_data,
|
|
policy=CachePolicy.WRITE_THROUGH
|
|
)
|
|
assert success is True
|
|
|
|
# Data should be in cache
|
|
cached = await cache_manager.get_cached_transcript(video_id, "en")
|
|
assert cached is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_write_back_policy(self, cache_manager):
|
|
"""Test WRITE_BACK policy."""
|
|
video_id = "test456"
|
|
transcript_data = {"transcript": "test"}
|
|
|
|
success = await cache_manager.cache_transcript(
|
|
video_id, "en", transcript_data,
|
|
policy=CachePolicy.WRITE_BACK
|
|
)
|
|
assert success is True
|
|
|
|
# Data should still be in memory cache
|
|
cached = await cache_manager.get_cached_transcript(video_id, "en")
|
|
assert cached is not None
|
|
|
|
|
|
class TestBackgroundTasks:
|
|
"""Test background tasks functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_memory_cache_cleanup(self, cache_manager):
|
|
"""Test expired entry cleanup from memory cache."""
|
|
# Add expired entry
|
|
cache_manager._memory_cache["test_key"] = {
|
|
"data": {},
|
|
"expires_at": (datetime.utcnow() - timedelta(hours=1)).isoformat()
|
|
}
|
|
|
|
# Add valid entry
|
|
cache_manager._memory_cache["valid_key"] = {
|
|
"data": {},
|
|
"expires_at": (datetime.utcnow() + timedelta(hours=1)).isoformat()
|
|
}
|
|
|
|
# Run cleanup
|
|
await cache_manager._cleanup_memory_cache()
|
|
|
|
assert "test_key" not in cache_manager._memory_cache
|
|
assert "valid_key" in cache_manager._memory_cache
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_background_task_lifecycle(self, cache_config):
|
|
"""Test starting and stopping background tasks."""
|
|
manager = EnhancedCacheManager(cache_config)
|
|
|
|
# Initialize starts background tasks
|
|
await manager.initialize()
|
|
assert manager._cleanup_task is not None
|
|
assert not manager._cleanup_task.done()
|
|
|
|
# Close stops background tasks
|
|
await manager.close()
|
|
assert manager._cleanup_task.cancelled()
|
|
|
|
|
|
class TestCompatibilityMethods:
|
|
"""Test compatibility with existing CacheManager."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cache_pipeline_result(self, cache_manager):
|
|
"""Test caching pipeline result (compatibility method)."""
|
|
job_id = "job123"
|
|
result = {"status": "completed", "summary": "test"}
|
|
|
|
success = await cache_manager.cache_pipeline_result(job_id, result)
|
|
assert success is True
|
|
|
|
cached = await cache_manager.get_cached_pipeline_result(job_id)
|
|
assert cached == result
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_cache_stats(self, cache_manager):
|
|
"""Test getting cache stats (compatibility method)."""
|
|
stats = await cache_manager.get_cache_stats()
|
|
|
|
assert "performance_metrics" in stats
|
|
assert "configuration" in stats |