377 lines
14 KiB
Python
377 lines
14 KiB
Python
"""Unit tests for multi-model AI service."""
|
|
|
|
import pytest
|
|
import pytest_asyncio
|
|
from unittest.mock import Mock, AsyncMock, patch
|
|
from datetime import datetime
|
|
|
|
from backend.services.multi_model_service import MultiModelService
|
|
from backend.services.ai_model_registry import (
|
|
AIModelRegistry,
|
|
ModelProvider,
|
|
ModelConfig,
|
|
ModelSelectionContext,
|
|
ModelSelectionStrategy,
|
|
ModelCapability,
|
|
ModelMetrics
|
|
)
|
|
from backend.services.ai_service import SummaryRequest, SummaryResult, SummaryLength, ModelUsage
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_summary_result():
|
|
"""Create a mock summary result."""
|
|
return SummaryResult(
|
|
summary="Test summary",
|
|
key_points=["Point 1", "Point 2"],
|
|
main_themes=["Theme 1"],
|
|
actionable_insights=["Insight 1"],
|
|
confidence_score=0.9,
|
|
processing_metadata={"model": "test"},
|
|
usage=ModelUsage(
|
|
input_tokens=100,
|
|
output_tokens=50,
|
|
total_tokens=150,
|
|
model="test-model"
|
|
),
|
|
cost_data={
|
|
"input_cost": 0.01,
|
|
"output_cost": 0.02,
|
|
"total_cost": 0.03
|
|
}
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_openai_service(mock_summary_result):
|
|
"""Create a mock OpenAI service."""
|
|
service = Mock()
|
|
service.generate_summary = AsyncMock(return_value=mock_summary_result)
|
|
return service
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_anthropic_service(mock_summary_result):
|
|
"""Create a mock Anthropic service."""
|
|
service = Mock()
|
|
service.generate_summary = AsyncMock(return_value=mock_summary_result)
|
|
return service
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_deepseek_service(mock_summary_result):
|
|
"""Create a mock DeepSeek service."""
|
|
service = Mock()
|
|
service.generate_summary = AsyncMock(return_value=mock_summary_result)
|
|
return service
|
|
|
|
|
|
class TestAIModelRegistry:
|
|
"""Test AI model registry functionality."""
|
|
|
|
def test_register_model(self):
|
|
"""Test registering a model configuration."""
|
|
registry = AIModelRegistry()
|
|
|
|
config = ModelConfig(
|
|
provider=ModelProvider.OPENAI,
|
|
model_name="test-model",
|
|
display_name="Test Model",
|
|
max_tokens=1000,
|
|
context_window=10000,
|
|
input_cost_per_1k=0.001,
|
|
output_cost_per_1k=0.002
|
|
)
|
|
|
|
registry.register_model(config)
|
|
|
|
assert ModelProvider.OPENAI in registry.models
|
|
assert registry.models[ModelProvider.OPENAI] == config
|
|
|
|
def test_model_selection_cost_optimized(self):
|
|
"""Test cost-optimized model selection."""
|
|
registry = AIModelRegistry()
|
|
|
|
# Registry already has default models
|
|
context = ModelSelectionContext(
|
|
content_length=1000,
|
|
strategy=ModelSelectionStrategy.COST_OPTIMIZED
|
|
)
|
|
|
|
# Should select cheapest available model
|
|
selected = registry.select_model(context)
|
|
assert selected is not None
|
|
assert selected == ModelProvider.DEEPSEEK # DeepSeek is cheapest
|
|
|
|
def test_model_selection_quality_optimized(self):
|
|
"""Test quality-optimized model selection."""
|
|
registry = AIModelRegistry()
|
|
|
|
context = ModelSelectionContext(
|
|
content_length=1000,
|
|
strategy=ModelSelectionStrategy.QUALITY_OPTIMIZED
|
|
)
|
|
|
|
# Should select highest quality model
|
|
selected = registry.select_model(context)
|
|
assert selected is not None
|
|
assert selected == ModelProvider.ANTHROPIC # Anthropic has highest quality
|
|
|
|
def test_model_selection_with_constraints(self):
|
|
"""Test model selection with constraints."""
|
|
registry = AIModelRegistry()
|
|
|
|
# Very low cost constraint
|
|
context = ModelSelectionContext(
|
|
content_length=10000, # Long content
|
|
max_cost=0.0001, # Very low budget
|
|
strategy=ModelSelectionStrategy.BALANCED
|
|
)
|
|
|
|
# May not find suitable model
|
|
selected = registry.select_model(context)
|
|
# Result depends on exact cost calculations
|
|
|
|
def test_cost_comparison(self):
|
|
"""Test cost comparison across models."""
|
|
registry = AIModelRegistry()
|
|
|
|
comparison = registry.get_cost_comparison(1000)
|
|
|
|
assert "openai" in comparison
|
|
assert "anthropic" in comparison
|
|
assert "deepseek" in comparison
|
|
|
|
for provider, data in comparison.items():
|
|
assert "cost_usd" in data
|
|
assert "model" in data
|
|
assert "quality_score" in data
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_with_fallback_success(self, mock_openai_service, mock_summary_result):
|
|
"""Test successful execution with fallback."""
|
|
registry = AIModelRegistry()
|
|
registry.register_service(ModelProvider.OPENAI, mock_openai_service)
|
|
|
|
request = SummaryRequest(transcript="Test transcript")
|
|
result, provider = await registry.execute_with_fallback(request)
|
|
|
|
assert result == mock_summary_result
|
|
assert provider == ModelProvider.OPENAI
|
|
mock_openai_service.generate_summary.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_execute_with_fallback_retry(self, mock_anthropic_service, mock_summary_result):
|
|
"""Test execution with retry on failure."""
|
|
registry = AIModelRegistry()
|
|
|
|
# First call fails, second succeeds
|
|
mock_anthropic_service.generate_summary = AsyncMock(
|
|
side_effect=[Exception("Network error"), mock_summary_result]
|
|
)
|
|
registry.register_service(ModelProvider.ANTHROPIC, mock_anthropic_service)
|
|
|
|
request = SummaryRequest(transcript="Test transcript")
|
|
result, provider = await registry.execute_with_fallback(request, max_retries=2)
|
|
|
|
assert result == mock_summary_result
|
|
assert provider == ModelProvider.ANTHROPIC
|
|
assert mock_anthropic_service.generate_summary.call_count == 2
|
|
|
|
def test_metrics_tracking(self):
|
|
"""Test metrics tracking."""
|
|
registry = AIModelRegistry()
|
|
|
|
# Get initial metrics
|
|
metrics = registry.get_metrics(ModelProvider.OPENAI)
|
|
assert metrics["total_requests"] == 0
|
|
assert metrics["success_rate"] == 0.0
|
|
|
|
|
|
class TestMultiModelService:
|
|
"""Test multi-model service functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_initialization_with_api_keys(self, mock_openai_service, mock_anthropic_service):
|
|
"""Test service initialization with API keys."""
|
|
with patch('backend.services.multi_model_service.OpenAISummarizer', return_value=mock_openai_service):
|
|
with patch('backend.services.multi_model_service.AnthropicSummarizer', return_value=mock_anthropic_service):
|
|
service = MultiModelService(
|
|
openai_api_key="test-key",
|
|
anthropic_api_key="test-key"
|
|
)
|
|
|
|
assert ModelProvider.OPENAI in service.active_providers
|
|
assert ModelProvider.ANTHROPIC in service.active_providers
|
|
assert len(service.active_providers) == 2
|
|
|
|
def test_initialization_no_keys(self):
|
|
"""Test initialization fails without API keys."""
|
|
with pytest.raises(ValueError, match="No AI service API keys provided"):
|
|
MultiModelService()
|
|
|
|
def test_content_type_detection_technical(self):
|
|
"""Test technical content type detection."""
|
|
service = MultiModelService(openai_api_key="test")
|
|
|
|
transcript = "Let's debug this code. The function has an algorithm that compiles the API database queries."
|
|
content_type = service._determine_content_type(transcript)
|
|
|
|
assert content_type == ModelCapability.TECHNICAL
|
|
|
|
def test_content_type_detection_educational(self):
|
|
"""Test educational content type detection."""
|
|
service = MultiModelService(openai_api_key="test")
|
|
|
|
transcript = "In this lesson, we'll learn and explain the tutorial to help you understand the course material."
|
|
content_type = service._determine_content_type(transcript)
|
|
|
|
assert content_type == ModelCapability.EDUCATIONAL
|
|
|
|
def test_content_type_detection_by_length(self):
|
|
"""Test content type detection by length."""
|
|
service = MultiModelService(openai_api_key="test")
|
|
|
|
# Short form
|
|
short_transcript = "Short video " * 50 # ~100 words
|
|
assert service._determine_content_type(short_transcript) == ModelCapability.SHORT_FORM
|
|
|
|
# Medium form
|
|
medium_transcript = "Medium video " * 500 # ~1000 words
|
|
assert service._determine_content_type(medium_transcript) == ModelCapability.MEDIUM_FORM
|
|
|
|
# Long form
|
|
long_transcript = "Long video " * 3000 # ~6000 words
|
|
assert service._determine_content_type(long_transcript) == ModelCapability.LONG_FORM
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_generate_summary_with_strategy(self, mock_openai_service, mock_summary_result):
|
|
"""Test summary generation with specific strategy."""
|
|
with patch('backend.services.multi_model_service.OpenAISummarizer', return_value=mock_openai_service):
|
|
service = MultiModelService(openai_api_key="test-key")
|
|
|
|
# Mock the registry's execute_with_fallback
|
|
service.registry.execute_with_fallback = AsyncMock(
|
|
return_value=(mock_summary_result, ModelProvider.OPENAI)
|
|
)
|
|
|
|
request = SummaryRequest(transcript="Test transcript")
|
|
result, provider = await service.generate_summary(
|
|
request,
|
|
strategy=ModelSelectionStrategy.COST_OPTIMIZED
|
|
)
|
|
|
|
assert result == mock_summary_result
|
|
assert provider == ModelProvider.OPENAI
|
|
assert result.processing_metadata["provider"] == "openai"
|
|
assert result.processing_metadata["strategy"] == "cost_optimized"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_generate_summary_simple(self, mock_anthropic_service, mock_summary_result):
|
|
"""Test simple summary generation (AIService interface)."""
|
|
with patch('backend.services.multi_model_service.AnthropicSummarizer', return_value=mock_anthropic_service):
|
|
service = MultiModelService(anthropic_api_key="test-key")
|
|
|
|
service.registry.execute_with_fallback = AsyncMock(
|
|
return_value=(mock_summary_result, ModelProvider.ANTHROPIC)
|
|
)
|
|
|
|
request = SummaryRequest(transcript="Test transcript")
|
|
result = await service.generate_summary_simple(request)
|
|
|
|
assert result == mock_summary_result
|
|
|
|
def test_estimate_cost(self):
|
|
"""Test cost estimation."""
|
|
service = MultiModelService(openai_api_key="test")
|
|
|
|
estimates = service.estimate_cost(4000) # 4000 chars ≈ 1000 tokens
|
|
|
|
assert "estimated_tokens" in estimates
|
|
assert estimates["estimated_tokens"] == 1000
|
|
assert "comparison" in estimates
|
|
assert "recommendations" in estimates
|
|
|
|
# Check recommendations
|
|
recs = estimates["recommendations"]
|
|
assert len(recs) >= 3
|
|
assert any(r["type"] == "cost_optimized" for r in recs)
|
|
assert any(r["type"] == "quality_optimized" for r in recs)
|
|
assert any(r["type"] == "speed_optimized" for r in recs)
|
|
|
|
def test_get_metrics(self):
|
|
"""Test getting metrics."""
|
|
service = MultiModelService(openai_api_key="test")
|
|
|
|
metrics = service.get_metrics()
|
|
assert isinstance(metrics, dict)
|
|
|
|
# Should have metrics for each provider
|
|
if "openai" in metrics:
|
|
assert "total_requests" in metrics["openai"]
|
|
assert "success_rate" in metrics["openai"]
|
|
|
|
def test_reset_availability(self):
|
|
"""Test resetting model availability."""
|
|
service = MultiModelService(openai_api_key="test")
|
|
|
|
# Mark a model as unavailable
|
|
service.registry.models[ModelProvider.OPENAI].is_available = False
|
|
|
|
# Reset availability
|
|
service.reset_model_availability(ModelProvider.OPENAI)
|
|
|
|
# Should be available again
|
|
assert service.registry.models[ModelProvider.OPENAI].is_available
|
|
|
|
def test_set_default_strategy(self):
|
|
"""Test setting default strategy."""
|
|
service = MultiModelService(openai_api_key="test")
|
|
|
|
# Initially balanced
|
|
assert service.default_strategy == ModelSelectionStrategy.BALANCED
|
|
|
|
# Change to cost optimized
|
|
service.set_default_strategy(ModelSelectionStrategy.COST_OPTIMIZED)
|
|
assert service.default_strategy == ModelSelectionStrategy.COST_OPTIMIZED
|
|
|
|
|
|
class TestModelSelection:
|
|
"""Test model selection logic."""
|
|
|
|
def test_score_model_balanced(self):
|
|
"""Test model scoring with balanced strategy."""
|
|
registry = AIModelRegistry()
|
|
|
|
context = ModelSelectionContext(
|
|
content_length=1000,
|
|
strategy=ModelSelectionStrategy.BALANCED
|
|
)
|
|
|
|
# Score each model
|
|
openai_score = registry._score_model(ModelProvider.OPENAI, context)
|
|
anthropic_score = registry._score_model(ModelProvider.ANTHROPIC, context)
|
|
deepseek_score = registry._score_model(ModelProvider.DEEPSEEK, context)
|
|
|
|
# All should have reasonable scores
|
|
assert 0 <= openai_score <= 100
|
|
assert 0 <= anthropic_score <= 100
|
|
assert 0 <= deepseek_score <= 100
|
|
|
|
def test_capability_matching(self):
|
|
"""Test capability matching in selection."""
|
|
registry = AIModelRegistry()
|
|
|
|
# Technical content should prefer models with technical capability
|
|
context = ModelSelectionContext(
|
|
content_length=1000,
|
|
content_type=ModelCapability.TECHNICAL,
|
|
strategy=ModelSelectionStrategy.QUALITY_OPTIMIZED
|
|
)
|
|
|
|
selected = registry.select_model(context)
|
|
assert selected is not None
|
|
|
|
# Selected model should support technical content
|
|
config = registry.models[selected]
|
|
assert ModelCapability.TECHNICAL in config.capabilities |