youtube-summarizer/backend/tests/unit/test_multi_model_service.py

377 lines
14 KiB
Python

"""Unit tests for multi-model AI service."""
import pytest
import pytest_asyncio
from unittest.mock import Mock, AsyncMock, patch
from datetime import datetime
from backend.services.multi_model_service import MultiModelService
from backend.services.ai_model_registry import (
AIModelRegistry,
ModelProvider,
ModelConfig,
ModelSelectionContext,
ModelSelectionStrategy,
ModelCapability,
ModelMetrics
)
from backend.services.ai_service import SummaryRequest, SummaryResult, SummaryLength, ModelUsage
@pytest.fixture
def mock_summary_result():
"""Create a mock summary result."""
return SummaryResult(
summary="Test summary",
key_points=["Point 1", "Point 2"],
main_themes=["Theme 1"],
actionable_insights=["Insight 1"],
confidence_score=0.9,
processing_metadata={"model": "test"},
usage=ModelUsage(
input_tokens=100,
output_tokens=50,
total_tokens=150,
model="test-model"
),
cost_data={
"input_cost": 0.01,
"output_cost": 0.02,
"total_cost": 0.03
}
)
@pytest.fixture
def mock_openai_service(mock_summary_result):
"""Create a mock OpenAI service."""
service = Mock()
service.generate_summary = AsyncMock(return_value=mock_summary_result)
return service
@pytest.fixture
def mock_anthropic_service(mock_summary_result):
"""Create a mock Anthropic service."""
service = Mock()
service.generate_summary = AsyncMock(return_value=mock_summary_result)
return service
@pytest.fixture
def mock_deepseek_service(mock_summary_result):
"""Create a mock DeepSeek service."""
service = Mock()
service.generate_summary = AsyncMock(return_value=mock_summary_result)
return service
class TestAIModelRegistry:
"""Test AI model registry functionality."""
def test_register_model(self):
"""Test registering a model configuration."""
registry = AIModelRegistry()
config = ModelConfig(
provider=ModelProvider.OPENAI,
model_name="test-model",
display_name="Test Model",
max_tokens=1000,
context_window=10000,
input_cost_per_1k=0.001,
output_cost_per_1k=0.002
)
registry.register_model(config)
assert ModelProvider.OPENAI in registry.models
assert registry.models[ModelProvider.OPENAI] == config
def test_model_selection_cost_optimized(self):
"""Test cost-optimized model selection."""
registry = AIModelRegistry()
# Registry already has default models
context = ModelSelectionContext(
content_length=1000,
strategy=ModelSelectionStrategy.COST_OPTIMIZED
)
# Should select cheapest available model
selected = registry.select_model(context)
assert selected is not None
assert selected == ModelProvider.DEEPSEEK # DeepSeek is cheapest
def test_model_selection_quality_optimized(self):
"""Test quality-optimized model selection."""
registry = AIModelRegistry()
context = ModelSelectionContext(
content_length=1000,
strategy=ModelSelectionStrategy.QUALITY_OPTIMIZED
)
# Should select highest quality model
selected = registry.select_model(context)
assert selected is not None
assert selected == ModelProvider.ANTHROPIC # Anthropic has highest quality
def test_model_selection_with_constraints(self):
"""Test model selection with constraints."""
registry = AIModelRegistry()
# Very low cost constraint
context = ModelSelectionContext(
content_length=10000, # Long content
max_cost=0.0001, # Very low budget
strategy=ModelSelectionStrategy.BALANCED
)
# May not find suitable model
selected = registry.select_model(context)
# Result depends on exact cost calculations
def test_cost_comparison(self):
"""Test cost comparison across models."""
registry = AIModelRegistry()
comparison = registry.get_cost_comparison(1000)
assert "openai" in comparison
assert "anthropic" in comparison
assert "deepseek" in comparison
for provider, data in comparison.items():
assert "cost_usd" in data
assert "model" in data
assert "quality_score" in data
@pytest.mark.asyncio
async def test_execute_with_fallback_success(self, mock_openai_service, mock_summary_result):
"""Test successful execution with fallback."""
registry = AIModelRegistry()
registry.register_service(ModelProvider.OPENAI, mock_openai_service)
request = SummaryRequest(transcript="Test transcript")
result, provider = await registry.execute_with_fallback(request)
assert result == mock_summary_result
assert provider == ModelProvider.OPENAI
mock_openai_service.generate_summary.assert_called_once()
@pytest.mark.asyncio
async def test_execute_with_fallback_retry(self, mock_anthropic_service, mock_summary_result):
"""Test execution with retry on failure."""
registry = AIModelRegistry()
# First call fails, second succeeds
mock_anthropic_service.generate_summary = AsyncMock(
side_effect=[Exception("Network error"), mock_summary_result]
)
registry.register_service(ModelProvider.ANTHROPIC, mock_anthropic_service)
request = SummaryRequest(transcript="Test transcript")
result, provider = await registry.execute_with_fallback(request, max_retries=2)
assert result == mock_summary_result
assert provider == ModelProvider.ANTHROPIC
assert mock_anthropic_service.generate_summary.call_count == 2
def test_metrics_tracking(self):
"""Test metrics tracking."""
registry = AIModelRegistry()
# Get initial metrics
metrics = registry.get_metrics(ModelProvider.OPENAI)
assert metrics["total_requests"] == 0
assert metrics["success_rate"] == 0.0
class TestMultiModelService:
"""Test multi-model service functionality."""
@pytest.mark.asyncio
async def test_initialization_with_api_keys(self, mock_openai_service, mock_anthropic_service):
"""Test service initialization with API keys."""
with patch('backend.services.multi_model_service.OpenAISummarizer', return_value=mock_openai_service):
with patch('backend.services.multi_model_service.AnthropicSummarizer', return_value=mock_anthropic_service):
service = MultiModelService(
openai_api_key="test-key",
anthropic_api_key="test-key"
)
assert ModelProvider.OPENAI in service.active_providers
assert ModelProvider.ANTHROPIC in service.active_providers
assert len(service.active_providers) == 2
def test_initialization_no_keys(self):
"""Test initialization fails without API keys."""
with pytest.raises(ValueError, match="No AI service API keys provided"):
MultiModelService()
def test_content_type_detection_technical(self):
"""Test technical content type detection."""
service = MultiModelService(openai_api_key="test")
transcript = "Let's debug this code. The function has an algorithm that compiles the API database queries."
content_type = service._determine_content_type(transcript)
assert content_type == ModelCapability.TECHNICAL
def test_content_type_detection_educational(self):
"""Test educational content type detection."""
service = MultiModelService(openai_api_key="test")
transcript = "In this lesson, we'll learn and explain the tutorial to help you understand the course material."
content_type = service._determine_content_type(transcript)
assert content_type == ModelCapability.EDUCATIONAL
def test_content_type_detection_by_length(self):
"""Test content type detection by length."""
service = MultiModelService(openai_api_key="test")
# Short form
short_transcript = "Short video " * 50 # ~100 words
assert service._determine_content_type(short_transcript) == ModelCapability.SHORT_FORM
# Medium form
medium_transcript = "Medium video " * 500 # ~1000 words
assert service._determine_content_type(medium_transcript) == ModelCapability.MEDIUM_FORM
# Long form
long_transcript = "Long video " * 3000 # ~6000 words
assert service._determine_content_type(long_transcript) == ModelCapability.LONG_FORM
@pytest.mark.asyncio
async def test_generate_summary_with_strategy(self, mock_openai_service, mock_summary_result):
"""Test summary generation with specific strategy."""
with patch('backend.services.multi_model_service.OpenAISummarizer', return_value=mock_openai_service):
service = MultiModelService(openai_api_key="test-key")
# Mock the registry's execute_with_fallback
service.registry.execute_with_fallback = AsyncMock(
return_value=(mock_summary_result, ModelProvider.OPENAI)
)
request = SummaryRequest(transcript="Test transcript")
result, provider = await service.generate_summary(
request,
strategy=ModelSelectionStrategy.COST_OPTIMIZED
)
assert result == mock_summary_result
assert provider == ModelProvider.OPENAI
assert result.processing_metadata["provider"] == "openai"
assert result.processing_metadata["strategy"] == "cost_optimized"
@pytest.mark.asyncio
async def test_generate_summary_simple(self, mock_anthropic_service, mock_summary_result):
"""Test simple summary generation (AIService interface)."""
with patch('backend.services.multi_model_service.AnthropicSummarizer', return_value=mock_anthropic_service):
service = MultiModelService(anthropic_api_key="test-key")
service.registry.execute_with_fallback = AsyncMock(
return_value=(mock_summary_result, ModelProvider.ANTHROPIC)
)
request = SummaryRequest(transcript="Test transcript")
result = await service.generate_summary_simple(request)
assert result == mock_summary_result
def test_estimate_cost(self):
"""Test cost estimation."""
service = MultiModelService(openai_api_key="test")
estimates = service.estimate_cost(4000) # 4000 chars ≈ 1000 tokens
assert "estimated_tokens" in estimates
assert estimates["estimated_tokens"] == 1000
assert "comparison" in estimates
assert "recommendations" in estimates
# Check recommendations
recs = estimates["recommendations"]
assert len(recs) >= 3
assert any(r["type"] == "cost_optimized" for r in recs)
assert any(r["type"] == "quality_optimized" for r in recs)
assert any(r["type"] == "speed_optimized" for r in recs)
def test_get_metrics(self):
"""Test getting metrics."""
service = MultiModelService(openai_api_key="test")
metrics = service.get_metrics()
assert isinstance(metrics, dict)
# Should have metrics for each provider
if "openai" in metrics:
assert "total_requests" in metrics["openai"]
assert "success_rate" in metrics["openai"]
def test_reset_availability(self):
"""Test resetting model availability."""
service = MultiModelService(openai_api_key="test")
# Mark a model as unavailable
service.registry.models[ModelProvider.OPENAI].is_available = False
# Reset availability
service.reset_model_availability(ModelProvider.OPENAI)
# Should be available again
assert service.registry.models[ModelProvider.OPENAI].is_available
def test_set_default_strategy(self):
"""Test setting default strategy."""
service = MultiModelService(openai_api_key="test")
# Initially balanced
assert service.default_strategy == ModelSelectionStrategy.BALANCED
# Change to cost optimized
service.set_default_strategy(ModelSelectionStrategy.COST_OPTIMIZED)
assert service.default_strategy == ModelSelectionStrategy.COST_OPTIMIZED
class TestModelSelection:
"""Test model selection logic."""
def test_score_model_balanced(self):
"""Test model scoring with balanced strategy."""
registry = AIModelRegistry()
context = ModelSelectionContext(
content_length=1000,
strategy=ModelSelectionStrategy.BALANCED
)
# Score each model
openai_score = registry._score_model(ModelProvider.OPENAI, context)
anthropic_score = registry._score_model(ModelProvider.ANTHROPIC, context)
deepseek_score = registry._score_model(ModelProvider.DEEPSEEK, context)
# All should have reasonable scores
assert 0 <= openai_score <= 100
assert 0 <= anthropic_score <= 100
assert 0 <= deepseek_score <= 100
def test_capability_matching(self):
"""Test capability matching in selection."""
registry = AIModelRegistry()
# Technical content should prefer models with technical capability
context = ModelSelectionContext(
content_length=1000,
content_type=ModelCapability.TECHNICAL,
strategy=ModelSelectionStrategy.QUALITY_OPTIMIZED
)
selected = registry.select_model(context)
assert selected is not None
# Selected model should support technical content
config = registry.models[selected]
assert ModelCapability.TECHNICAL in config.capabilities