youtube-summarizer/backend/tests/unit/test_anthropic_summarizer.py

227 lines
9.7 KiB
Python

"""Unit tests for Anthropic summarizer service."""
import json
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from backend.services.anthropic_summarizer import AnthropicSummarizer
from backend.services.ai_service import SummaryRequest, SummaryLength
from backend.core.exceptions import AIServiceError
class TestAnthropicSummarizer:
"""Test cases for Anthropic summarization service."""
@pytest.fixture
def summarizer(self):
"""Create test summarizer instance."""
return AnthropicSummarizer(api_key="test-key")
@pytest.fixture
def mock_anthropic_response(self):
"""Create mock Anthropic API response."""
mock_response = MagicMock()
mock_response.content = [MagicMock()]
mock_response.content[0].text = json.dumps({
"summary": "This is a test summary of the video content.",
"key_points": ["Point 1", "Point 2", "Point 3"],
"main_themes": ["Theme 1", "Theme 2"],
"actionable_insights": ["Insight 1", "Insight 2"],
"confidence_score": 0.92
})
mock_response.usage.input_tokens = 100
mock_response.usage.output_tokens = 50
return mock_response
@pytest.mark.asyncio
async def test_generate_summary_success(self, summarizer, mock_anthropic_response):
"""Test successful summary generation."""
with patch.object(summarizer.client.messages, 'create', new_callable=AsyncMock, return_value=mock_anthropic_response) as mock_create:
request = SummaryRequest(
transcript="This is a test transcript with some content to summarize.",
length=SummaryLength.STANDARD
)
result = await summarizer.generate_summary(request)
# Verify the request was made
mock_create.assert_called_once()
call_args = mock_create.call_args
assert call_args[1]['model'] == "claude-3-5-haiku-20241022"
assert call_args[1]['temperature'] == 0.3
# Verify the result
assert result.summary == "This is a test summary of the video content."
assert len(result.key_points) == 3
assert len(result.main_themes) == 2
assert len(result.actionable_insights) == 2
assert result.confidence_score == 0.92
assert result.cost_data["total_cost_usd"] > 0
assert result.processing_metadata["model"] == "claude-3-5-haiku-20241022"
@pytest.mark.asyncio
async def test_generate_summary_with_focus_areas(self, summarizer, mock_anthropic_response):
"""Test summary generation with focus areas."""
with patch.object(summarizer.client.messages, 'create', new_callable=AsyncMock, return_value=mock_anthropic_response) as mock_create:
request = SummaryRequest(
transcript="This is a technical tutorial about Python programming.",
length=SummaryLength.DETAILED,
focus_areas=["technical", "educational"]
)
await summarizer.generate_summary(request)
# Verify focus areas were included in the prompt
call_args = mock_create.call_args
prompt = call_args[1]['messages'][0]['content']
assert "technical, educational" in prompt
@pytest.mark.asyncio
async def test_chunked_processing_triggered(self, summarizer):
"""Test that chunked processing is triggered for long transcripts."""
# Create a very long transcript (Claude has 200k token limit)
long_transcript = "This is a sentence. " * 40000 # ~160k tokens
with patch.object(summarizer, '_generate_chunked_summary') as mock_chunked:
mock_chunked.return_value = MagicMock()
request = SummaryRequest(
transcript=long_transcript,
length=SummaryLength.STANDARD
)
await summarizer.generate_summary(request)
# Should have triggered chunked processing
mock_chunked.assert_called_once_with(request)
def test_cost_estimation(self, summarizer):
"""Test cost estimation accuracy."""
transcript = "Test transcript for cost estimation. This should have a reasonable cost."
cost = summarizer.estimate_cost(transcript, SummaryLength.STANDARD)
assert isinstance(cost, float)
assert cost > 0
assert cost < 0.01 # Should be very cheap for short transcript
def test_cost_estimation_different_lengths(self, summarizer):
"""Test that longer summaries cost more."""
transcript = "Test transcript for cost estimation."
brief_cost = summarizer.estimate_cost(transcript, SummaryLength.BRIEF)
standard_cost = summarizer.estimate_cost(transcript, SummaryLength.STANDARD)
detailed_cost = summarizer.estimate_cost(transcript, SummaryLength.DETAILED)
assert brief_cost < standard_cost < detailed_cost
def test_token_counting(self, summarizer):
"""Test token counting accuracy."""
text = "Hello world, this is a test."
token_count = summarizer.get_token_count(text)
assert isinstance(token_count, int)
assert token_count > 0
assert token_count < 20 # Should be reasonable for short text
def test_split_transcript_intelligently(self, summarizer):
"""Test intelligent transcript splitting."""
# Create a very long single paragraph that exceeds token limit to force sentence splitting
very_long_paragraph = "This is a very long sentence with many words that will definitely exceed our token limit. " * 50
chunks = summarizer._split_transcript_intelligently(very_long_paragraph, max_tokens=1000)
assert len(chunks) > 1 # Should split into multiple chunks
assert all(isinstance(chunk, str) for chunk in chunks)
assert all(len(chunk.strip()) > 0 for chunk in chunks)
def test_get_max_tokens(self, summarizer):
"""Test max token calculation for different lengths."""
brief_tokens = summarizer._get_max_tokens(SummaryLength.BRIEF)
standard_tokens = summarizer._get_max_tokens(SummaryLength.STANDARD)
detailed_tokens = summarizer._get_max_tokens(SummaryLength.DETAILED)
assert brief_tokens < standard_tokens < detailed_tokens
assert brief_tokens == 400
assert standard_tokens == 800
assert detailed_tokens == 1500
@pytest.mark.asyncio
async def test_api_error_handling(self, summarizer):
"""Test handling of Anthropic API errors."""
with patch.object(summarizer.client.messages, 'create', new_callable=AsyncMock, side_effect=Exception("API Error")):
request = SummaryRequest(
transcript="Test transcript",
length=SummaryLength.STANDARD
)
with pytest.raises(AIServiceError) as exc_info:
await summarizer.generate_summary(request)
assert "Anthropic summarization failed" in str(exc_info.value)
assert exc_info.value.details["error_type"] == "Exception"
def test_build_summary_prompt(self, summarizer):
"""Test prompt building for different configurations."""
request = SummaryRequest(
transcript="Test transcript content here.",
length=SummaryLength.STANDARD,
focus_areas=["technical", "educational"]
)
prompt = summarizer._build_summary_prompt(request)
assert "comprehensive summary in 300-500 words" in prompt
assert "technical, educational" in prompt
assert "Test transcript content here." in prompt
assert "JSON format" in prompt
assert "confidence_score" in prompt
def test_extract_json_from_response(self, summarizer):
"""Test JSON extraction from Claude responses."""
# Test direct JSON response
json_response = '{"summary": "Test summary", "key_points": ["Point 1"], "main_themes": ["Theme 1"], "actionable_insights": ["Insight 1"], "confidence_score": 0.9}'
result = summarizer._extract_json_from_response(json_response)
assert result["summary"] == "Test summary"
assert len(result["key_points"]) == 1
# Test JSON embedded in text
text_with_json = 'Here is the analysis: {"summary": "Embedded summary", "key_points": ["Point 1"], "main_themes": ["Theme 1"], "actionable_insights": ["Insight 1"], "confidence_score": 0.85} Hope this helps!'
result = summarizer._extract_json_from_response(text_with_json)
assert result["summary"] == "Embedded summary"
def test_parse_structured_response(self, summarizer):
"""Test fallback parsing for non-JSON responses."""
response_text = """
Summary: This is the main summary of the content.
Key Points:
- First important point
- Second important point
Main Themes:
- Primary theme
- Secondary theme
Actionable Insights:
- First actionable insight
- Second actionable insight
Confidence: 0.88
"""
result = summarizer._parse_structured_response(response_text)
assert "main summary" in result["summary"]
assert len(result["key_points"]) == 2
assert len(result["main_themes"]) == 2
assert len(result["actionable_insights"]) == 2
assert result["confidence_score"] == 0.88