""" Unit tests for transcript-only downloader """ import pytest from unittest.mock import Mock, AsyncMock, patch, MagicMock from pathlib import Path from backend.models.video_download import ( DownloadMethod, DownloadPreferences, VideoDownloadResult, DownloadStatus, VideoMetadata, TranscriptData, DownloaderException ) from backend.services.video_downloaders.transcript_downloader import TranscriptOnlyDownloader class TestTranscriptOnlyDownloader: """Test transcript-only downloader functionality""" @pytest.fixture def mock_config(self, tmp_path): """Mock configuration for testing""" return { 'youtube_api_key': 'test_api_key', 'output_dir': str(tmp_path), 'timeout': 30 } @pytest.fixture def downloader(self, mock_config): """Create downloader instance for testing""" return TranscriptOnlyDownloader(config=mock_config) def test_initialization(self, downloader, mock_config): """Test downloader initialization""" assert downloader.method == DownloadMethod.TRANSCRIPT_ONLY assert downloader.youtube_api_key == mock_config['youtube_api_key'] assert downloader.output_dir == Path(mock_config['output_dir']) def test_initialization_no_api_key(self, tmp_path): """Test initialization without API key""" config = {'output_dir': str(tmp_path)} downloader = TranscriptOnlyDownloader(config=config) assert downloader.youtube_api_key is None def test_capabilities(self, downloader): """Test downloader capabilities""" assert downloader.supports_audio_only() is False assert downloader.supports_quality_selection() is False assert downloader.get_supported_formats() == ["json", "txt"] @patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi') @pytest.mark.asyncio async def test_successful_transcript_download(self, mock_transcript_api, downloader): """Test successful transcript extraction""" # Mock transcript API response mock_api_instance = Mock() mock_transcript_list = [ {'text': 'Hello world', 'start': 0.0, 'duration': 2.0}, {'text': 'This is a test', 'start': 2.0, 'duration': 3.0}, {'text': 'Video transcript', 'start': 5.0, 'duration': 2.5} ] mock_api_instance.get_transcript.return_value = mock_transcript_list mock_transcript_api.return_value = mock_api_instance url = "https://youtube.com/watch?v=test123" preferences = DownloadPreferences() result = await downloader.download_video(url, preferences) assert result.status == DownloadStatus.PARTIAL assert result.video_id == "test123" assert result.method == DownloadMethod.TRANSCRIPT_ONLY assert result.is_partial is True assert result.video_path is None assert result.audio_path is None # Check transcript data assert result.transcript is not None assert result.transcript.text == "Hello world This is a test Video transcript" assert result.transcript.language == 'en' assert result.transcript.is_auto_generated is False assert len(result.transcript.segments) == 3 assert result.transcript.source == "youtube-transcript-api" @patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi') @patch('backend.services.video_downloaders.transcript_downloader.build') @pytest.mark.asyncio async def test_with_metadata_extraction(self, mock_build, mock_transcript_api, downloader): """Test transcript download with metadata extraction""" # Mock YouTube API mock_service = Mock() mock_video_response = { 'items': [{ 'id': 'test123', 'snippet': { 'title': 'Test Video', 'description': 'Test description', 'publishedAt': '2024-01-01T00:00:00Z', 'channelTitle': 'Test Channel', 'tags': ['test', 'video'], 'defaultLanguage': 'en', 'thumbnails': { 'high': {'url': 'http://example.com/thumb.jpg'} } }, 'contentDetails': { 'duration': 'PT4M30S' # 4 minutes 30 seconds }, 'statistics': { 'viewCount': '1000000' }, 'status': { 'privacyStatus': 'public' } }] } mock_service.videos.return_value.list.return_value.execute.return_value = mock_video_response mock_build.return_value = mock_service # Mock transcript API mock_api_instance = Mock() mock_transcript_list = [ {'text': 'Test transcript', 'start': 0.0, 'duration': 2.0} ] mock_api_instance.get_transcript.return_value = mock_transcript_list mock_transcript_api.return_value = mock_api_instance url = "https://youtube.com/watch?v=test123" preferences = DownloadPreferences() result = await downloader.download_video(url, preferences) assert result.status == DownloadStatus.PARTIAL assert result.metadata is not None assert result.metadata.title == "Test Video" assert result.metadata.description == "Test description" assert result.metadata.duration_seconds == 270 # 4m30s assert result.metadata.view_count == 1000000 assert result.metadata.uploader == "Test Channel" @patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi') @pytest.mark.asyncio async def test_transcript_unavailable(self, mock_transcript_api, downloader): """Test handling when transcript is unavailable""" mock_transcript_api.side_effect = Exception("No transcript available") url = "https://youtube.com/watch?v=notranscript123" preferences = DownloadPreferences() result = await downloader.download_video(url, preferences) assert result.status == DownloadStatus.FAILED assert "No transcript available" in result.error_message assert result.transcript is None @patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi') @pytest.mark.asyncio async def test_auto_generated_transcript(self, mock_transcript_api, downloader): """Test handling of auto-generated transcripts""" # Mock transcript API to return auto-generated transcript mock_api_instance = Mock() mock_transcript_list = [ {'text': 'Auto generated text', 'start': 0.0, 'duration': 2.0} ] mock_api_instance.get_transcript.return_value = mock_transcript_list # Mock list_transcripts to show it's auto-generated mock_transcript_entry = Mock() mock_transcript_entry.is_generated = True mock_transcript_entry.language_code = 'en' mock_api_instance.list_transcripts.return_value = [mock_transcript_entry] mock_transcript_api.return_value = mock_api_instance url = "https://youtube.com/watch?v=auto123" preferences = DownloadPreferences() result = await downloader.download_video(url, preferences) assert result.status == DownloadStatus.PARTIAL assert result.transcript.is_auto_generated is True @patch('backend.services.video_downloaders.transcript_downloader.build') @pytest.mark.asyncio async def test_metadata_only_extraction(self, mock_build, downloader): """Test metadata-only extraction without transcript""" # Mock YouTube API for metadata mock_service = Mock() mock_video_response = { 'items': [{ 'id': 'test123', 'snippet': { 'title': 'Metadata Only Video', 'description': 'Just metadata', 'publishedAt': '2024-01-01T00:00:00Z', 'channelTitle': 'Test Channel' }, 'contentDetails': { 'duration': 'PT2M15S' }, 'statistics': { 'viewCount': '500' } }] } mock_service.videos.return_value.list.return_value.execute.return_value = mock_video_response mock_build.return_value = mock_service metadata = await downloader.get_video_metadata("test123") assert metadata is not None assert metadata.video_id == "test123" assert metadata.title == "Metadata Only Video" assert metadata.duration_seconds == 135 # 2m15s assert metadata.view_count == 500 @pytest.mark.asyncio async def test_get_transcript_direct(self, downloader): """Test direct transcript extraction""" with patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi') as mock_api: mock_api_instance = Mock() mock_transcript = [ {'text': 'Direct transcript', 'start': 0.0, 'duration': 2.0} ] mock_api_instance.get_transcript.return_value = mock_transcript mock_api.return_value = mock_api_instance transcript = await downloader.get_transcript("test123") assert transcript is not None assert transcript.text == "Direct transcript" assert len(transcript.segments) == 1 @pytest.mark.asyncio async def test_connection_test_success(self, downloader): """Test successful connection test""" with patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi'): result = await downloader.test_connection() assert result is True @pytest.mark.asyncio async def test_connection_test_failure(self, downloader): """Test failed connection test""" with patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi', side_effect=Exception("Connection failed")): result = await downloader.test_connection() assert result is False def test_parse_duration(self, downloader): """Test ISO 8601 duration parsing""" test_cases = [ ("PT1M30S", 90), # 1 minute 30 seconds ("PT2H15M", 8100), # 2 hours 15 minutes ("PT45S", 45), # 45 seconds ("PT1H", 3600), # 1 hour ("PT10M", 600), # 10 minutes ("P1DT2H3M4S", 93784), # 1 day 2 hours 3 minutes 4 seconds ("", 0), # Empty string ("invalid", 0) # Invalid format ] for duration_str, expected_seconds in test_cases: result = downloader._parse_duration(duration_str) assert result == expected_seconds, f"Failed for {duration_str}: expected {expected_seconds}, got {result}" @pytest.mark.asyncio async def test_language_preference(self, downloader): """Test transcript language preference""" with patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi') as mock_api: mock_api_instance = Mock() # Mock transcript list with multiple languages mock_transcripts = Mock() # Mock English transcript english_transcript = Mock() english_transcript.language_code = 'en' english_transcript.fetch.return_value = [ {'text': 'English transcript', 'start': 0.0, 'duration': 2.0} ] # Mock Spanish transcript spanish_transcript = Mock() spanish_transcript.language_code = 'es' spanish_transcript.fetch.return_value = [ {'text': 'Spanish transcript', 'start': 0.0, 'duration': 2.0} ] mock_transcripts.__iter__ = Mock(return_value=iter([english_transcript, spanish_transcript])) mock_transcripts.find_transcript.return_value = spanish_transcript mock_api_instance.list_transcripts.return_value = mock_transcripts mock_api.return_value = mock_api_instance # Request Spanish transcript preferences = DownloadPreferences() # Note: This test assumes language preference would be implemented # Currently the downloader uses default language preference url = "https://youtube.com/watch?v=multilang123" result = await downloader.download_video(url, preferences) assert result.status == DownloadStatus.PARTIAL # The actual language returned depends on the implementation assert result.transcript is not None @patch('backend.services.video_downloaders.transcript_downloader.build') @pytest.mark.asyncio async def test_api_quota_exceeded(self, mock_build, downloader): """Test handling of YouTube API quota exceeded""" mock_service = Mock() mock_service.videos.return_value.list.return_value.execute.side_effect = Exception("Quota exceeded") mock_build.return_value = mock_service # Should still work without metadata if transcript is available with patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi') as mock_transcript_api: mock_api_instance = Mock() mock_transcript = [{'text': 'Transcript without metadata', 'start': 0.0, 'duration': 2.0}] mock_api_instance.get_transcript.return_value = mock_transcript mock_transcript_api.return_value = mock_api_instance url = "https://youtube.com/watch?v=quota123" preferences = DownloadPreferences() result = await downloader.download_video(url, preferences) assert result.status == DownloadStatus.PARTIAL assert result.transcript is not None assert result.metadata is None # Metadata extraction failed due to quota @pytest.mark.asyncio async def test_invalid_video_id(self, downloader): """Test handling of invalid video ID""" with patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi') as mock_api: mock_api.side_effect = Exception("Video not found") url = "https://youtube.com/watch?v=invalidid123" preferences = DownloadPreferences() result = await downloader.download_video(url, preferences) assert result.status == DownloadStatus.FAILED assert "Video not found" in result.error_message