""" Unit tests for intelligent video downloader orchestrator """ import pytest import asyncio from unittest.mock import Mock, AsyncMock, patch, MagicMock from datetime import datetime, timedelta import uuid from backend.models.video_download import ( DownloadMethod, DownloadPreferences, VideoDownloadResult, DownloadStatus, DownloadJobStatus, DownloadMetrics, HealthCheckResult, AllMethodsFailedError, DownloaderException, VideoNotAvailableError, NetworkError ) from backend.config.video_download_config import VideoDownloadConfig from backend.services.intelligent_video_downloader import IntelligentVideoDownloader class TestIntelligentVideoDownloader: """Test intelligent video downloader orchestration""" @pytest.fixture def mock_config(self, tmp_path): """Mock configuration for testing""" config = VideoDownloadConfig( storage_path=tmp_path, max_storage_gb=1.0, enabled_methods=[DownloadMethod.PYTUBEFIX, DownloadMethod.TRANSCRIPT_ONLY], method_timeout_seconds=30, max_retries_per_method=2, max_concurrent_downloads=2 ) return config @pytest.fixture def mock_downloader_factory(self): """Mock downloader factory""" with patch('backend.services.intelligent_video_downloader.DownloaderFactory') as mock_factory: # Mock successful downloader mock_successful_downloader = AsyncMock() mock_successful_downloader.download_video.return_value = VideoDownloadResult( video_id="test123", video_url="https://youtube.com/watch?v=test123", status=DownloadStatus.COMPLETED, method=DownloadMethod.PYTUBEFIX ) mock_successful_downloader.test_connection.return_value = True # Mock failing downloader mock_failing_downloader = AsyncMock() mock_failing_downloader.download_video.side_effect = DownloaderException("Download failed") mock_failing_downloader.test_connection.return_value = False # Mock transcript downloader mock_transcript_downloader = AsyncMock() mock_transcript_downloader.download_video.return_value = VideoDownloadResult( video_id="test123", video_url="https://youtube.com/watch?v=test123", status=DownloadStatus.PARTIAL, method=DownloadMethod.TRANSCRIPT_ONLY, is_partial=True ) mock_transcript_downloader.test_connection.return_value = True mock_factory.get_available_methods.return_value = [ DownloadMethod.PYTUBEFIX, DownloadMethod.TRANSCRIPT_ONLY ] def create_side_effect(method, config): if method == DownloadMethod.PYTUBEFIX: return mock_successful_downloader elif method == DownloadMethod.TRANSCRIPT_ONLY: return mock_transcript_downloader else: return mock_failing_downloader mock_factory.create.side_effect = create_side_effect yield mock_factory @pytest.fixture def downloader(self, mock_config, mock_downloader_factory): """Create intelligent downloader instance for testing""" return IntelligentVideoDownloader(mock_config) def test_initialization(self, downloader, mock_config): """Test downloader initialization""" assert downloader.config == mock_config assert len(downloader.downloaders) == 2 # PYTUBEFIX and TRANSCRIPT_ONLY assert DownloadMethod.PYTUBEFIX in downloader.downloaders assert DownloadMethod.TRANSCRIPT_ONLY in downloader.downloaders assert isinstance(downloader.metrics, DownloadMetrics) assert downloader.success_cache == {} assert downloader.active_jobs == {} @pytest.mark.asyncio async def test_successful_download_first_method(self, downloader): """Test successful download on first method attempt""" url = "https://youtube.com/watch?v=test123" preferences = DownloadPreferences() result = await downloader.download_video(url, preferences) assert result.status == DownloadStatus.COMPLETED assert result.video_id == "test123" assert result.method == DownloadMethod.PYTUBEFIX assert downloader.metrics.successful_downloads == 1 assert downloader.metrics.total_attempts == 1 @pytest.mark.asyncio async def test_fallback_to_transcript_only(self, downloader): """Test fallback to transcript-only when primary methods fail""" # Make pytubefix fail downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video.side_effect = NetworkError("403 Forbidden") url = "https://youtube.com/watch?v=test123" preferences = DownloadPreferences() result = await downloader.download_video(url, preferences) assert result.status == DownloadStatus.PARTIAL assert result.method == DownloadMethod.TRANSCRIPT_ONLY assert result.is_partial is True @pytest.mark.asyncio async def test_all_methods_fail(self, downloader): """Test when all methods fail""" # Make all downloaders fail for downloader_instance in downloader.downloaders.values(): downloader_instance.download_video.side_effect = DownloaderException("All failed") url = "https://youtube.com/watch?v=test123" preferences = DownloadPreferences() with pytest.raises(AllMethodsFailedError): await downloader.download_video(url, preferences) assert downloader.metrics.failed_downloads == 1 @pytest.mark.asyncio async def test_video_not_available_error(self, downloader): """Test handling of permanent video unavailability""" downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video.side_effect = VideoNotAvailableError("Private video") downloader.downloaders[DownloadMethod.TRANSCRIPT_ONLY].download_video.side_effect = VideoNotAvailableError("Private video") url = "https://youtube.com/watch?v=private123" preferences = DownloadPreferences() with pytest.raises(AllMethodsFailedError): await downloader.download_video(url, preferences) @pytest.mark.asyncio async def test_retry_logic(self, downloader): """Test retry logic for transient failures""" # First call fails, second succeeds call_count = 0 async def side_effect(*args, **kwargs): nonlocal call_count call_count += 1 if call_count == 1: raise DownloaderException("Transient error") else: return VideoDownloadResult( video_id="test123", video_url="https://youtube.com/watch?v=test123", status=DownloadStatus.COMPLETED, method=DownloadMethod.PYTUBEFIX ) downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video.side_effect = side_effect url = "https://youtube.com/watch?v=test123" preferences = DownloadPreferences() result = await downloader.download_video(url, preferences) assert result.status == DownloadStatus.COMPLETED assert call_count == 2 # One retry occurred @pytest.mark.asyncio async def test_concurrent_download_limit(self, downloader): """Test concurrent download limiting""" # Create multiple download tasks urls = [ "https://youtube.com/watch?v=test1", "https://youtube.com/watch?v=test2", "https://youtube.com/watch?v=test3", "https://youtube.com/watch?v=test4" ] preferences = DownloadPreferences() # Start all downloads concurrently tasks = [downloader.download_video(url, preferences) for url in urls] results = await asyncio.gather(*tasks) # All should complete successfully assert len(results) == 4 assert all(result.status == DownloadStatus.COMPLETED for result in results) # Check that semaphore was respected (indirectly via successful completion) assert downloader.metrics.successful_downloads == 4 @pytest.mark.asyncio async def test_get_prioritized_methods(self, downloader): """Test method prioritization logic""" preferences = DownloadPreferences(prefer_audio_only=True) # Mock supports_audio_only for testing downloader.downloaders[DownloadMethod.PYTUBEFIX].supports_audio_only.return_value = True downloader.downloaders[DownloadMethod.TRANSCRIPT_ONLY].supports_audio_only.return_value = False prioritized = await downloader._get_prioritized_methods("test123", preferences) # Should prioritize audio-capable methods assert len(prioritized) >= 2 # TRANSCRIPT_ONLY should be last as ultimate fallback assert prioritized[-1] == DownloadMethod.TRANSCRIPT_ONLY @pytest.mark.asyncio async def test_job_status_tracking(self, downloader): """Test job status tracking during download""" url = "https://youtube.com/watch?v=test123" preferences = DownloadPreferences() # Mock slow download to test job tracking async def slow_download(*args, **kwargs): await asyncio.sleep(0.1) # Small delay return VideoDownloadResult( video_id="test123", video_url=url, status=DownloadStatus.COMPLETED, method=DownloadMethod.PYTUBEFIX ) downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video = slow_download # Start download download_task = asyncio.create_task(downloader.download_video(url, preferences)) # Give it a moment to start await asyncio.sleep(0.01) # Check that job was created active_jobs = downloader.get_active_jobs() assert len(active_jobs) == 1 job_id = list(active_jobs.keys())[0] job_status = await downloader.get_job_status(job_id) assert job_status is not None assert job_status.video_url == url assert job_status.status in [DownloadStatus.IN_PROGRESS, DownloadStatus.COMPLETED] # Wait for completion result = await download_task # Job should be cleaned up assert len(downloader.get_active_jobs()) == 0 @pytest.mark.asyncio async def test_job_cancellation(self, downloader): """Test job cancellation functionality""" # Create a mock job job_id = str(uuid.uuid4()) job_status = DownloadJobStatus( job_id=job_id, video_url="https://youtube.com/watch?v=test123", status=DownloadStatus.IN_PROGRESS ) downloader.active_jobs[job_id] = job_status # Cancel job success = await downloader.cancel_job(job_id) assert success is True assert downloader.active_jobs[job_id].status == DownloadStatus.CANCELLED # Try to cancel non-existent job success = await downloader.cancel_job("nonexistent") assert success is False @pytest.mark.asyncio async def test_health_check(self, downloader): """Test health check functionality""" health_result = await downloader.health_check() assert isinstance(health_result, HealthCheckResult) assert health_result.total_methods == 2 assert health_result.healthy_methods >= 1 # At least transcript should work assert health_result.overall_status in ['healthy', 'degraded', 'unhealthy'] assert isinstance(health_result.method_details, dict) assert len(health_result.method_details) == 2 def test_metrics_tracking(self, downloader): """Test metrics tracking functionality""" # Update some metrics downloader._update_success_metrics(DownloadMethod.PYTUBEFIX, "test123", True, 0) downloader._update_success_metrics(DownloadMethod.PYTUBEFIX, "test456", False, 1) downloader._update_success_metrics(DownloadMethod.TRANSCRIPT_ONLY, "test789", True, 0) metrics = downloader.get_metrics() assert metrics.total_attempts == 3 assert metrics.successful_downloads == 2 assert metrics.failed_downloads == 1 # Check method-specific success rates pytubefix_rate = metrics.method_success_rates.get(DownloadMethod.PYTUBEFIX.value, 0) transcript_rate = metrics.method_success_rates.get(DownloadMethod.TRANSCRIPT_ONLY.value, 0) assert 0 <= pytubefix_rate <= 1 assert transcript_rate == 1.0 # 100% success for transcript @pytest.mark.asyncio async def test_extract_video_id(self, downloader): """Test video ID extraction""" test_cases = [ ("https://youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"), ("https://youtu.be/dQw4w9WgXcQ", "dQw4w9WgXcQ"), ("https://youtube.com/embed/dQw4w9WgXcQ", "dQw4w9WgXcQ") ] for url, expected_id in test_cases: video_id = await downloader._extract_video_id(url) assert video_id == expected_id # Test invalid URL with pytest.raises(DownloaderException): await downloader._extract_video_id("https://example.com/not-youtube") @pytest.mark.asyncio async def test_cleanup_old_files(self, downloader, tmp_path): """Test old file cleanup functionality""" # Create some test files with old timestamps old_file = tmp_path / "videos" / "old_video.mp4" old_file.parent.mkdir(parents=True, exist_ok=True) old_file.write_text("old video content") new_file = tmp_path / "videos" / "new_video.mp4" new_file.write_text("new video content") # Artificially age the old file old_time = datetime.now() - timedelta(days=35) old_timestamp = old_time.timestamp() import os os.utime(old_file, (old_timestamp, old_timestamp)) # Run cleanup (30 day default) stats = await downloader.cleanup_old_files(30) assert stats['files_deleted'] == 1 assert stats['bytes_freed'] > 0 assert not old_file.exists() assert new_file.exists() @pytest.mark.asyncio async def test_method_timeout_handling(self, downloader): """Test timeout handling for download methods""" # Mock a downloader that takes too long async def slow_download(*args, **kwargs): await asyncio.sleep(2) # Longer than config timeout return VideoDownloadResult( video_id="test123", video_url="https://youtube.com/watch?v=test123", status=DownloadStatus.COMPLETED, method=DownloadMethod.PYTUBEFIX ) downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video = slow_download # This should timeout and fallback to transcript-only url = "https://youtube.com/watch?v=test123" preferences = DownloadPreferences() result = await downloader.download_video(url, preferences) # Should fallback to transcript method assert result.status == DownloadStatus.PARTIAL assert result.method == DownloadMethod.TRANSCRIPT_ONLY @pytest.mark.integration class TestIntelligentVideoDownloaderIntegration: """Integration tests for intelligent video downloader""" @pytest.mark.asyncio async def test_real_config_initialization(self, tmp_path): """Test initialization with real configuration""" config = VideoDownloadConfig( storage_path=tmp_path, enabled_methods=[DownloadMethod.TRANSCRIPT_ONLY] # Use only reliable method ) # This would normally fail without mocking, but we're testing config handling with patch('backend.services.intelligent_video_downloader.DownloaderFactory'): downloader = IntelligentVideoDownloader(config) assert downloader.config == config @pytest.mark.asyncio async def test_storage_directories_creation(self, tmp_path): """Test that storage directories are created properly""" config = VideoDownloadConfig(storage_path=tmp_path) storage_dirs = config.get_storage_dirs() # Directories should be created by config.ensure_directories() for dir_path in storage_dirs.values(): assert dir_path.exists() assert dir_path.is_dir() def test_metrics_update_thread_safety(self, downloader): """Test thread safety of metrics updates""" import threading def update_metrics(): for i in range(10): downloader._update_success_metrics( DownloadMethod.PYTUBEFIX, f"test{i}", i % 2 == 0, # Alternate success/failure 0 ) # Run concurrent updates threads = [threading.Thread(target=update_metrics) for _ in range(3)] for thread in threads: thread.start() for thread in threads: thread.join() # Verify metrics consistency metrics = downloader.get_metrics() assert metrics.total_attempts == 30 # 3 threads * 10 updates each assert 0 <= metrics.successful_downloads <= 30 assert 0 <= metrics.failed_downloads <= 30 assert metrics.successful_downloads + metrics.failed_downloads == 30