227 lines
9.7 KiB
Python
227 lines
9.7 KiB
Python
"""Unit tests for Anthropic summarizer service."""
|
|
import json
|
|
import pytest
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
from backend.services.anthropic_summarizer import AnthropicSummarizer
|
|
from backend.services.ai_service import SummaryRequest, SummaryLength
|
|
from backend.core.exceptions import AIServiceError
|
|
|
|
|
|
class TestAnthropicSummarizer:
|
|
"""Test cases for Anthropic summarization service."""
|
|
|
|
@pytest.fixture
|
|
def summarizer(self):
|
|
"""Create test summarizer instance."""
|
|
return AnthropicSummarizer(api_key="test-key")
|
|
|
|
@pytest.fixture
|
|
def mock_anthropic_response(self):
|
|
"""Create mock Anthropic API response."""
|
|
mock_response = MagicMock()
|
|
mock_response.content = [MagicMock()]
|
|
mock_response.content[0].text = json.dumps({
|
|
"summary": "This is a test summary of the video content.",
|
|
"key_points": ["Point 1", "Point 2", "Point 3"],
|
|
"main_themes": ["Theme 1", "Theme 2"],
|
|
"actionable_insights": ["Insight 1", "Insight 2"],
|
|
"confidence_score": 0.92
|
|
})
|
|
mock_response.usage.input_tokens = 100
|
|
mock_response.usage.output_tokens = 50
|
|
return mock_response
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_generate_summary_success(self, summarizer, mock_anthropic_response):
|
|
"""Test successful summary generation."""
|
|
|
|
with patch.object(summarizer.client.messages, 'create', new_callable=AsyncMock, return_value=mock_anthropic_response) as mock_create:
|
|
request = SummaryRequest(
|
|
transcript="This is a test transcript with some content to summarize.",
|
|
length=SummaryLength.STANDARD
|
|
)
|
|
|
|
result = await summarizer.generate_summary(request)
|
|
|
|
# Verify the request was made
|
|
mock_create.assert_called_once()
|
|
call_args = mock_create.call_args
|
|
assert call_args[1]['model'] == "claude-3-5-haiku-20241022"
|
|
assert call_args[1]['temperature'] == 0.3
|
|
|
|
# Verify the result
|
|
assert result.summary == "This is a test summary of the video content."
|
|
assert len(result.key_points) == 3
|
|
assert len(result.main_themes) == 2
|
|
assert len(result.actionable_insights) == 2
|
|
assert result.confidence_score == 0.92
|
|
assert result.cost_data["total_cost_usd"] > 0
|
|
assert result.processing_metadata["model"] == "claude-3-5-haiku-20241022"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_generate_summary_with_focus_areas(self, summarizer, mock_anthropic_response):
|
|
"""Test summary generation with focus areas."""
|
|
|
|
with patch.object(summarizer.client.messages, 'create', new_callable=AsyncMock, return_value=mock_anthropic_response) as mock_create:
|
|
request = SummaryRequest(
|
|
transcript="This is a technical tutorial about Python programming.",
|
|
length=SummaryLength.DETAILED,
|
|
focus_areas=["technical", "educational"]
|
|
)
|
|
|
|
await summarizer.generate_summary(request)
|
|
|
|
# Verify focus areas were included in the prompt
|
|
call_args = mock_create.call_args
|
|
prompt = call_args[1]['messages'][0]['content']
|
|
assert "technical, educational" in prompt
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chunked_processing_triggered(self, summarizer):
|
|
"""Test that chunked processing is triggered for long transcripts."""
|
|
|
|
# Create a very long transcript (Claude has 200k token limit)
|
|
long_transcript = "This is a sentence. " * 40000 # ~160k tokens
|
|
|
|
with patch.object(summarizer, '_generate_chunked_summary') as mock_chunked:
|
|
mock_chunked.return_value = MagicMock()
|
|
|
|
request = SummaryRequest(
|
|
transcript=long_transcript,
|
|
length=SummaryLength.STANDARD
|
|
)
|
|
|
|
await summarizer.generate_summary(request)
|
|
|
|
# Should have triggered chunked processing
|
|
mock_chunked.assert_called_once_with(request)
|
|
|
|
def test_cost_estimation(self, summarizer):
|
|
"""Test cost estimation accuracy."""
|
|
transcript = "Test transcript for cost estimation. This should have a reasonable cost."
|
|
|
|
cost = summarizer.estimate_cost(transcript, SummaryLength.STANDARD)
|
|
|
|
assert isinstance(cost, float)
|
|
assert cost > 0
|
|
assert cost < 0.01 # Should be very cheap for short transcript
|
|
|
|
def test_cost_estimation_different_lengths(self, summarizer):
|
|
"""Test that longer summaries cost more."""
|
|
transcript = "Test transcript for cost estimation."
|
|
|
|
brief_cost = summarizer.estimate_cost(transcript, SummaryLength.BRIEF)
|
|
standard_cost = summarizer.estimate_cost(transcript, SummaryLength.STANDARD)
|
|
detailed_cost = summarizer.estimate_cost(transcript, SummaryLength.DETAILED)
|
|
|
|
assert brief_cost < standard_cost < detailed_cost
|
|
|
|
def test_token_counting(self, summarizer):
|
|
"""Test token counting accuracy."""
|
|
text = "Hello world, this is a test."
|
|
|
|
token_count = summarizer.get_token_count(text)
|
|
|
|
assert isinstance(token_count, int)
|
|
assert token_count > 0
|
|
assert token_count < 20 # Should be reasonable for short text
|
|
|
|
def test_split_transcript_intelligently(self, summarizer):
|
|
"""Test intelligent transcript splitting."""
|
|
# Create a very long single paragraph that exceeds token limit to force sentence splitting
|
|
very_long_paragraph = "This is a very long sentence with many words that will definitely exceed our token limit. " * 50
|
|
|
|
chunks = summarizer._split_transcript_intelligently(very_long_paragraph, max_tokens=1000)
|
|
|
|
assert len(chunks) > 1 # Should split into multiple chunks
|
|
assert all(isinstance(chunk, str) for chunk in chunks)
|
|
assert all(len(chunk.strip()) > 0 for chunk in chunks)
|
|
|
|
def test_get_max_tokens(self, summarizer):
|
|
"""Test max token calculation for different lengths."""
|
|
brief_tokens = summarizer._get_max_tokens(SummaryLength.BRIEF)
|
|
standard_tokens = summarizer._get_max_tokens(SummaryLength.STANDARD)
|
|
detailed_tokens = summarizer._get_max_tokens(SummaryLength.DETAILED)
|
|
|
|
assert brief_tokens < standard_tokens < detailed_tokens
|
|
assert brief_tokens == 400
|
|
assert standard_tokens == 800
|
|
assert detailed_tokens == 1500
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_api_error_handling(self, summarizer):
|
|
"""Test handling of Anthropic API errors."""
|
|
|
|
with patch.object(summarizer.client.messages, 'create', new_callable=AsyncMock, side_effect=Exception("API Error")):
|
|
request = SummaryRequest(
|
|
transcript="Test transcript",
|
|
length=SummaryLength.STANDARD
|
|
)
|
|
|
|
with pytest.raises(AIServiceError) as exc_info:
|
|
await summarizer.generate_summary(request)
|
|
|
|
assert "Anthropic summarization failed" in str(exc_info.value)
|
|
assert exc_info.value.details["error_type"] == "Exception"
|
|
|
|
def test_build_summary_prompt(self, summarizer):
|
|
"""Test prompt building for different configurations."""
|
|
request = SummaryRequest(
|
|
transcript="Test transcript content here.",
|
|
length=SummaryLength.STANDARD,
|
|
focus_areas=["technical", "educational"]
|
|
)
|
|
|
|
prompt = summarizer._build_summary_prompt(request)
|
|
|
|
assert "comprehensive summary in 300-500 words" in prompt
|
|
assert "technical, educational" in prompt
|
|
assert "Test transcript content here." in prompt
|
|
assert "JSON format" in prompt
|
|
assert "confidence_score" in prompt
|
|
|
|
def test_extract_json_from_response(self, summarizer):
|
|
"""Test JSON extraction from Claude responses."""
|
|
|
|
# Test direct JSON response
|
|
json_response = '{"summary": "Test summary", "key_points": ["Point 1"], "main_themes": ["Theme 1"], "actionable_insights": ["Insight 1"], "confidence_score": 0.9}'
|
|
result = summarizer._extract_json_from_response(json_response)
|
|
|
|
assert result["summary"] == "Test summary"
|
|
assert len(result["key_points"]) == 1
|
|
|
|
# Test JSON embedded in text
|
|
text_with_json = 'Here is the analysis: {"summary": "Embedded summary", "key_points": ["Point 1"], "main_themes": ["Theme 1"], "actionable_insights": ["Insight 1"], "confidence_score": 0.85} Hope this helps!'
|
|
result = summarizer._extract_json_from_response(text_with_json)
|
|
|
|
assert result["summary"] == "Embedded summary"
|
|
|
|
def test_parse_structured_response(self, summarizer):
|
|
"""Test fallback parsing for non-JSON responses."""
|
|
|
|
response_text = """
|
|
Summary: This is the main summary of the content.
|
|
|
|
Key Points:
|
|
- First important point
|
|
- Second important point
|
|
|
|
Main Themes:
|
|
- Primary theme
|
|
- Secondary theme
|
|
|
|
Actionable Insights:
|
|
- First actionable insight
|
|
- Second actionable insight
|
|
|
|
Confidence: 0.88
|
|
"""
|
|
|
|
result = summarizer._parse_structured_response(response_text)
|
|
|
|
assert "main summary" in result["summary"]
|
|
assert len(result["key_points"]) == 2
|
|
assert len(result["main_themes"]) == 2
|
|
assert len(result["actionable_insights"]) == 2
|
|
assert result["confidence_score"] == 0.88 |