"""Unit tests for OpenAI summarizer service.""" import json import pytest from unittest.mock import AsyncMock, MagicMock, patch from backend.services.openai_summarizer import OpenAISummarizer from backend.services.ai_service import SummaryRequest, SummaryLength from backend.core.exceptions import AIServiceError class TestOpenAISummarizer: """Test cases for OpenAI summarization service.""" @pytest.fixture def summarizer(self): """Create test summarizer instance.""" return OpenAISummarizer(api_key="test-key") @pytest.fixture def mock_openai_response(self): """Create mock OpenAI API response.""" mock_response = MagicMock() mock_response.choices[0].message.content = json.dumps({ "summary": "This is a test summary of the video content.", "key_points": ["Point 1", "Point 2", "Point 3"], "main_themes": ["Theme 1", "Theme 2"], "actionable_insights": ["Insight 1", "Insight 2"], "confidence_score": 0.92 }) mock_response.usage.prompt_tokens = 100 mock_response.usage.completion_tokens = 50 mock_response.usage.total_tokens = 150 return mock_response @pytest.mark.asyncio async def test_generate_summary_success(self, summarizer, mock_openai_response): """Test successful summary generation.""" with patch.object(summarizer.client.chat.completions, 'create', new_callable=AsyncMock, return_value=mock_openai_response) as mock_create: request = SummaryRequest( transcript="This is a test transcript with some content to summarize.", length=SummaryLength.STANDARD ) result = await summarizer.generate_summary(request) # Verify the request was made mock_create.assert_called_once() call_args = mock_create.call_args assert call_args[1]['model'] == "gpt-4o-mini" assert call_args[1]['temperature'] == 0.3 assert call_args[1]['response_format'] == {"type": "json_object"} # Verify the result assert result.summary == "This is a test summary of the video content." assert len(result.key_points) == 3 assert len(result.main_themes) == 2 assert len(result.actionable_insights) == 2 assert result.confidence_score == 0.92 assert result.cost_data["total_cost_usd"] > 0 assert result.processing_metadata["model"] == "gpt-4o-mini" @pytest.mark.asyncio async def test_generate_summary_with_focus_areas(self, summarizer, mock_openai_response): """Test summary generation with focus areas.""" with patch.object(summarizer.client.chat.completions, 'create', new_callable=AsyncMock, return_value=mock_openai_response) as mock_create: request = SummaryRequest( transcript="This is a technical tutorial about Python programming.", length=SummaryLength.DETAILED, focus_areas=["technical", "educational"] ) await summarizer.generate_summary(request) # Verify focus areas were included in the prompt call_args = mock_create.call_args prompt = call_args[1]['messages'][1]['content'] assert "technical, educational" in prompt @pytest.mark.asyncio async def test_chunked_processing_triggered(self, summarizer): """Test that chunked processing is triggered for long transcripts.""" # Create a very long transcript long_transcript = "This is a sentence. " * 3000 # ~6000 tokens with patch.object(summarizer, '_generate_chunked_summary') as mock_chunked: mock_chunked.return_value = MagicMock() request = SummaryRequest( transcript=long_transcript, length=SummaryLength.STANDARD ) await summarizer.generate_summary(request) # Should have triggered chunked processing mock_chunked.assert_called_once_with(request) def test_cost_estimation(self, summarizer): """Test cost estimation accuracy.""" transcript = "Test transcript for cost estimation. This should have a reasonable cost." cost = summarizer.estimate_cost(transcript, SummaryLength.STANDARD) assert isinstance(cost, float) assert cost > 0 assert cost < 0.01 # Should be very cheap for short transcript def test_cost_estimation_different_lengths(self, summarizer): """Test that longer summaries cost more.""" transcript = "Test transcript for cost estimation." brief_cost = summarizer.estimate_cost(transcript, SummaryLength.BRIEF) standard_cost = summarizer.estimate_cost(transcript, SummaryLength.STANDARD) detailed_cost = summarizer.estimate_cost(transcript, SummaryLength.DETAILED) assert brief_cost < standard_cost < detailed_cost def test_token_counting(self, summarizer): """Test token counting accuracy.""" text = "Hello world, this is a test." token_count = summarizer.get_token_count(text) assert isinstance(token_count, int) assert token_count > 0 assert token_count < 20 # Should be reasonable for short text def test_split_transcript_intelligently(self, summarizer): """Test intelligent transcript splitting.""" # Create a very long single paragraph that exceeds token limit to force sentence splitting very_long_paragraph = "This is a very long sentence with many words that will definitely exceed our token limit. " * 20 chunks = summarizer._split_transcript_intelligently(very_long_paragraph, max_tokens=50) assert len(chunks) > 1 # Should split into multiple chunks assert all(isinstance(chunk, str) for chunk in chunks) assert all(len(chunk.strip()) > 0 for chunk in chunks) def test_get_max_tokens(self, summarizer): """Test max token calculation for different lengths.""" brief_tokens = summarizer._get_max_tokens(SummaryLength.BRIEF) standard_tokens = summarizer._get_max_tokens(SummaryLength.STANDARD) detailed_tokens = summarizer._get_max_tokens(SummaryLength.DETAILED) assert brief_tokens < standard_tokens < detailed_tokens assert brief_tokens == 300 assert standard_tokens == 700 assert detailed_tokens == 1200 @pytest.mark.asyncio async def test_api_error_handling(self, summarizer): """Test handling of OpenAI API errors.""" with patch.object(summarizer.client.chat.completions, 'create', new_callable=AsyncMock, side_effect=Exception("API Error")): request = SummaryRequest( transcript="Test transcript", length=SummaryLength.STANDARD ) with pytest.raises(AIServiceError) as exc_info: await summarizer.generate_summary(request) assert "OpenAI summarization failed" in str(exc_info.value) assert exc_info.value.details["error_type"] == "Exception" def test_build_summary_prompt(self, summarizer): """Test prompt building for different configurations.""" request = SummaryRequest( transcript="Test transcript content here.", length=SummaryLength.STANDARD, focus_areas=["technical", "educational"] ) prompt = summarizer._build_summary_prompt(request) assert "comprehensive summary in 300-500 words" in prompt assert "technical, educational" in prompt assert "Test transcript content here." in prompt assert "JSON format" in prompt assert "confidence_score" in prompt