"""Unit tests for SummaryPipeline orchestration service.""" import pytest import asyncio from unittest.mock import Mock, AsyncMock, patch from datetime import datetime, timedelta from backend.services.summary_pipeline import SummaryPipeline from backend.models.pipeline import ( PipelineStage, PipelineConfig, PipelineResult, ContentAnalysis ) from backend.services.ai_service import SummaryResult @pytest.fixture def mock_video_service(): """Mock VideoService for testing.""" service = Mock() service.extract_video_id = AsyncMock(return_value="test_video_id") service.get_video_metadata = AsyncMock(return_value={ "title": "Test Video", "description": "Test description", "duration": "PT10M30S", "category": "Education", "tags": ["test", "tutorial"], "language": "en" }) return service @pytest.fixture def mock_transcript_service(): """Mock TranscriptService for testing.""" service = Mock() service.extract_transcript = AsyncMock() service.extract_transcript.return_value = Mock( transcript="This is a test transcript with educational content. " "We will learn about important concepts and examples." ) return service @pytest.fixture def mock_ai_service(): """Mock AnthropicSummarizer for testing.""" service = Mock() service.generate_summary = AsyncMock() service.generate_summary.return_value = SummaryResult( summary="Test summary of the video content", key_points=["Point 1", "Point 2", "Point 3"], main_themes=["Theme 1", "Theme 2"], actionable_insights=["Insight 1"], confidence_score=0.85, processing_metadata={"tokens_used": 1000}, cost_data={"total_cost": 0.01} ) return service @pytest.fixture def mock_cache_manager(): """Mock CacheManager for testing.""" cache = Mock() cache.get_cached_pipeline_result = AsyncMock(return_value=None) cache.cache_pipeline_result = AsyncMock(return_value=True) cache.get_cached_video_metadata = AsyncMock(return_value=None) cache.cache_video_metadata = AsyncMock(return_value=True) return cache @pytest.fixture def mock_notification_service(): """Mock NotificationService for testing.""" service = Mock() service.send_completion_notification = AsyncMock(return_value=True) service.send_error_notification = AsyncMock(return_value=True) service.send_progress_notification = AsyncMock(return_value=True) return service @pytest.fixture def pipeline(mock_video_service, mock_transcript_service, mock_ai_service, mock_cache_manager, mock_notification_service): """Create SummaryPipeline instance with mocked dependencies.""" return SummaryPipeline( video_service=mock_video_service, transcript_service=mock_transcript_service, ai_service=mock_ai_service, cache_manager=mock_cache_manager, notification_service=mock_notification_service ) class TestSummaryPipeline: """Test suite for SummaryPipeline class.""" @pytest.mark.asyncio async def test_process_video_initialization(self, pipeline): """Test video processing initialization.""" video_url = "https://youtube.com/watch?v=test123" config = PipelineConfig(summary_length="standard") job_id = await pipeline.process_video(video_url, config) # Verify job ID is generated assert job_id is not None assert len(job_id) > 0 # Verify job is tracked assert job_id in pipeline.active_jobs # Verify initial state result = pipeline.active_jobs[job_id] assert result.job_id == job_id assert result.video_url == video_url assert result.status == PipelineStage.INITIALIZED assert result.started_at is not None @pytest.mark.asyncio async def test_process_video_with_progress_callback(self, pipeline): """Test video processing with progress callback.""" video_url = "https://youtube.com/watch?v=test123" progress_updates = [] async def progress_callback(job_id, progress): progress_updates.append((job_id, progress)) job_id = await pipeline.process_video( video_url, progress_callback=progress_callback ) # Allow some processing time await asyncio.sleep(0.1) # Verify callback is registered assert job_id in pipeline.progress_callbacks assert len(pipeline.progress_callbacks[job_id]) == 1 @pytest.mark.asyncio async def test_successful_pipeline_execution(self, pipeline): """Test complete successful pipeline execution.""" video_url = "https://youtube.com/watch?v=test123" config = PipelineConfig( summary_length="standard", enable_notifications=True ) # Start processing job_id = await pipeline.process_video(video_url, config) # Wait for completion (with timeout) max_wait = 5.0 waited = 0.0 while waited < max_wait: result = pipeline.active_jobs.get(job_id) if result and result.status in [PipelineStage.COMPLETED, PipelineStage.FAILED]: break await asyncio.sleep(0.1) waited += 0.1 # Verify completion result = pipeline.active_jobs[job_id] assert result.status == PipelineStage.COMPLETED assert result.video_id == "test_video_id" assert result.summary is not None assert result.key_points is not None assert result.main_themes is not None assert result.quality_score is not None assert result.completed_at is not None # Verify services were called pipeline.video_service.extract_video_id.assert_called_once() pipeline.transcript_service.extract_transcript.assert_called_once() pipeline.ai_service.generate_summary.assert_called_once() pipeline.cache_manager.cache_pipeline_result.assert_called_once() pipeline.notification_service.send_completion_notification.assert_called_once() @pytest.mark.asyncio async def test_pipeline_error_handling(self, pipeline): """Test pipeline error handling and retry logic.""" # Make video service fail pipeline.video_service.extract_video_id.side_effect = Exception("Video not found") video_url = "https://youtube.com/watch?v=invalid" config = PipelineConfig(max_retries=1) job_id = await pipeline.process_video(video_url, config) # Wait for failure max_wait = 5.0 waited = 0.0 while waited < max_wait: result = pipeline.active_jobs.get(job_id) if result and result.status == PipelineStage.FAILED: break await asyncio.sleep(0.1) waited += 0.1 # Verify failure handling result = pipeline.active_jobs[job_id] assert result.status == PipelineStage.FAILED assert result.error is not None assert "Video not found" in result.error["message"] assert result.retry_count <= config.max_retries @pytest.mark.asyncio async def test_content_analysis(self, pipeline): """Test content analysis functionality.""" transcript = """ This is a technical tutorial about programming concepts. We will learn about algorithms, functions, and databases. The implementation details are complex but important to understand. """ metadata = { "title": "Programming Tutorial", "category": "Education", "tags": ["programming", "tutorial", "technical"], "language": "en" } analysis = await pipeline._analyze_content_characteristics(transcript, metadata) assert isinstance(analysis, ContentAnalysis) assert analysis.content_type == "technical" assert analysis.word_count > 0 assert analysis.language == "en" assert len(analysis.technical_indicators) > 0 assert "algorithm" in analysis.technical_indicators assert "function" in analysis.technical_indicators def test_config_optimization(self, pipeline): """Test configuration optimization based on content analysis.""" base_config = PipelineConfig( summary_length="standard", focus_areas=[], quality_threshold=0.7 ) # Test technical content optimization technical_analysis = ContentAnalysis( transcript_length=5000, word_count=1000, estimated_reading_time=4.0, complexity_score=0.8, content_type="technical", language="en", technical_indicators=["algorithm", "function", "code"], educational_indicators=[], entertainment_indicators=[] ) optimized_config = pipeline._optimize_config_for_content( base_config, technical_analysis ) assert optimized_config.summary_length == "standard" # Not changed for 1000 words assert "technical concepts" in optimized_config.focus_areas assert optimized_config.quality_threshold < base_config.quality_threshold # Lowered due to complexity @pytest.mark.asyncio async def test_quality_validation(self, pipeline): """Test summary quality validation.""" result = PipelineResult( job_id="test", video_url="test", video_id="test", status=PipelineStage.COMPLETED, summary="This is a good summary with appropriate length and detail.", key_points=["Point 1", "Point 2", "Point 3", "Point 4"], main_themes=["Theme 1", "Theme 2"], actionable_insights=["Insight 1"], confidence_score=0.9 ) analysis = ContentAnalysis( transcript_length=10000, word_count=2000, estimated_reading_time=8.0, complexity_score=0.5, content_type="general", language="en", technical_indicators=[], educational_indicators=[], entertainment_indicators=[] ) quality_score = await pipeline._validate_summary_quality(result, analysis) assert 0.0 <= quality_score <= 1.0 assert quality_score > 0.5 # Should be reasonably high for good summary @pytest.mark.asyncio async def test_pipeline_cancellation(self, pipeline): """Test pipeline cancellation functionality.""" video_url = "https://youtube.com/watch?v=test123" # Start processing job_id = await pipeline.process_video(video_url) # Verify job is active assert job_id in pipeline.active_jobs # Cancel the job success = await pipeline.cancel_pipeline(job_id) assert success is True # Verify job is cancelled result = pipeline.active_jobs[job_id] assert result.status == PipelineStage.CANCELLED assert result.completed_at is not None @pytest.mark.asyncio async def test_get_pipeline_result(self, pipeline): """Test getting pipeline result.""" video_url = "https://youtube.com/watch?v=test123" # Test non-existent job result = await pipeline.get_pipeline_result("non_existent") assert result is None # Test active job job_id = await pipeline.process_video(video_url) result = await pipeline.get_pipeline_result(job_id) assert result is not None assert result.job_id == job_id def test_iso_duration_parsing(self, pipeline): """Test ISO 8601 duration parsing.""" # Test various duration formats assert pipeline._parse_iso_duration("PT10M30S") == 630 # 10:30 assert pipeline._parse_iso_duration("PT1H5M") == 3900 # 1:05:00 assert pipeline._parse_iso_duration("PT45S") == 45 # 0:45 assert pipeline._parse_iso_duration("PT2H") == 7200 # 2:00:00 assert pipeline._parse_iso_duration("invalid") == 0 # Invalid format @pytest.mark.asyncio async def test_cache_integration(self, pipeline): """Test cache integration in pipeline.""" # Setup cache to return existing result cached_result = { "job_id": "cached_job", "video_url": "https://youtube.com/watch?v=cached", "video_id": "cached_id", "status": PipelineStage.COMPLETED.value, "summary": "Cached summary", "key_points": ["Cached point 1", "Cached point 2"], "quality_score": 0.8, "completed_at": datetime.utcnow().isoformat() } pipeline.cache_manager.get_cached_pipeline_result.return_value = cached_result video_url = "https://youtube.com/watch?v=cached" job_id = await pipeline.process_video(video_url) # Wait for cache restoration await asyncio.sleep(0.2) # Verify cache was checked pipeline.cache_manager.get_cached_pipeline_result.assert_called() @pytest.mark.asyncio async def test_notification_integration(self, pipeline): """Test notification service integration.""" video_url = "https://youtube.com/watch?v=test123" config = PipelineConfig(enable_notifications=True) job_id = await pipeline.process_video(video_url, config) # Wait for completion max_wait = 5.0 waited = 0.0 while waited < max_wait: result = pipeline.active_jobs.get(job_id) if result and result.status == PipelineStage.COMPLETED: break await asyncio.sleep(0.1) waited += 0.1 # Verify notifications were sent pipeline.notification_service.send_progress_notification.assert_called() pipeline.notification_service.send_completion_notification.assert_called_once() @pytest.mark.asyncio async def test_cleanup_completed_jobs(self, pipeline): """Test cleanup of old completed jobs.""" # Create some old completed jobs old_time = datetime.utcnow() - timedelta(hours=25) pipeline.active_jobs["old_job"] = PipelineResult( job_id="old_job", video_url="test", video_id="test", status=PipelineStage.COMPLETED, completed_at=old_time ) pipeline.active_jobs["recent_job"] = PipelineResult( job_id="recent_job", video_url="test", video_id="test", status=PipelineStage.COMPLETED, completed_at=datetime.utcnow() - timedelta(hours=1) ) # Cleanup jobs older than 24 hours await pipeline.cleanup_completed_jobs(max_age_hours=24) # Verify old job was removed but recent job remains assert "old_job" not in pipeline.active_jobs assert "recent_job" in pipeline.active_jobs def test_get_active_jobs(self, pipeline): """Test getting list of active jobs.""" # Initially no jobs assert pipeline.get_active_jobs() == [] # Add some jobs manually for testing pipeline.active_jobs["job1"] = Mock() pipeline.active_jobs["job2"] = Mock() active_jobs = pipeline.get_active_jobs() assert len(active_jobs) == 2 assert "job1" in active_jobs assert "job2" in active_jobs class TestPipelineEdgeCases: """Test edge cases and error conditions.""" @pytest.mark.asyncio async def test_empty_transcript_handling(self, pipeline): """Test handling of empty transcript.""" pipeline.transcript_service.extract_transcript.return_value = Mock( transcript="" ) video_url = "https://youtube.com/watch?v=empty" job_id = await pipeline.process_video(video_url) # Wait for processing await asyncio.sleep(0.2) # Should handle empty transcript gracefully result = pipeline.active_jobs.get(job_id) assert result is not None @pytest.mark.asyncio async def test_invalid_video_url_handling(self, pipeline): """Test handling of invalid video URLs.""" pipeline.video_service.extract_video_id.side_effect = ValueError("Invalid URL") video_url = "https://not-youtube.com/watch?v=invalid" config = PipelineConfig(max_retries=0) # No retries for faster test job_id = await pipeline.process_video(video_url, config) # Wait for failure await asyncio.sleep(0.2) result = pipeline.active_jobs.get(job_id) assert result.status == PipelineStage.FAILED assert "Invalid URL" in result.error["message"] @pytest.mark.asyncio async def test_ai_service_failure_with_retry(self, pipeline): """Test AI service failure with retry mechanism.""" # Make AI service fail initially, then succeed call_count = 0 async def failing_generate_summary(*args, **kwargs): nonlocal call_count call_count += 1 if call_count == 1: raise Exception("API rate limit exceeded") return SummaryResult( summary="Retry successful", key_points=["Point 1"], main_themes=["Theme 1"], actionable_insights=[], confidence_score=0.7, processing_metadata={}, cost_data={} ) pipeline.ai_service.generate_summary.side_effect = failing_generate_summary video_url = "https://youtube.com/watch?v=retry_test" config = PipelineConfig(max_retries=1) job_id = await pipeline.process_video(video_url, config) # Wait for retry and completion max_wait = 10.0 waited = 0.0 while waited < max_wait: result = pipeline.active_jobs.get(job_id) if result and result.status in [PipelineStage.COMPLETED, PipelineStage.FAILED]: break await asyncio.sleep(0.1) waited += 0.1 result = pipeline.active_jobs[job_id] # Should eventually succeed after retry if result.status == PipelineStage.COMPLETED: assert result.retry_count > 0 assert call_count > 1