347 lines
15 KiB
Python
347 lines
15 KiB
Python
"""
|
|
Unit tests for transcript-only downloader
|
|
"""
|
|
import pytest
|
|
from unittest.mock import Mock, AsyncMock, patch, MagicMock
|
|
from pathlib import Path
|
|
|
|
from backend.models.video_download import (
|
|
DownloadMethod,
|
|
DownloadPreferences,
|
|
VideoDownloadResult,
|
|
DownloadStatus,
|
|
VideoMetadata,
|
|
TranscriptData,
|
|
DownloaderException
|
|
)
|
|
from backend.services.video_downloaders.transcript_downloader import TranscriptOnlyDownloader
|
|
|
|
|
|
class TestTranscriptOnlyDownloader:
|
|
"""Test transcript-only downloader functionality"""
|
|
|
|
@pytest.fixture
|
|
def mock_config(self, tmp_path):
|
|
"""Mock configuration for testing"""
|
|
return {
|
|
'youtube_api_key': 'test_api_key',
|
|
'output_dir': str(tmp_path),
|
|
'timeout': 30
|
|
}
|
|
|
|
@pytest.fixture
|
|
def downloader(self, mock_config):
|
|
"""Create downloader instance for testing"""
|
|
return TranscriptOnlyDownloader(config=mock_config)
|
|
|
|
def test_initialization(self, downloader, mock_config):
|
|
"""Test downloader initialization"""
|
|
assert downloader.method == DownloadMethod.TRANSCRIPT_ONLY
|
|
assert downloader.youtube_api_key == mock_config['youtube_api_key']
|
|
assert downloader.output_dir == Path(mock_config['output_dir'])
|
|
|
|
def test_initialization_no_api_key(self, tmp_path):
|
|
"""Test initialization without API key"""
|
|
config = {'output_dir': str(tmp_path)}
|
|
downloader = TranscriptOnlyDownloader(config=config)
|
|
assert downloader.youtube_api_key is None
|
|
|
|
def test_capabilities(self, downloader):
|
|
"""Test downloader capabilities"""
|
|
assert downloader.supports_audio_only() is False
|
|
assert downloader.supports_quality_selection() is False
|
|
assert downloader.get_supported_formats() == ["json", "txt"]
|
|
|
|
@patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi')
|
|
@pytest.mark.asyncio
|
|
async def test_successful_transcript_download(self, mock_transcript_api, downloader):
|
|
"""Test successful transcript extraction"""
|
|
# Mock transcript API response
|
|
mock_api_instance = Mock()
|
|
mock_transcript_list = [
|
|
{'text': 'Hello world', 'start': 0.0, 'duration': 2.0},
|
|
{'text': 'This is a test', 'start': 2.0, 'duration': 3.0},
|
|
{'text': 'Video transcript', 'start': 5.0, 'duration': 2.5}
|
|
]
|
|
mock_api_instance.get_transcript.return_value = mock_transcript_list
|
|
mock_transcript_api.return_value = mock_api_instance
|
|
|
|
url = "https://youtube.com/watch?v=test123"
|
|
preferences = DownloadPreferences()
|
|
|
|
result = await downloader.download_video(url, preferences)
|
|
|
|
assert result.status == DownloadStatus.PARTIAL
|
|
assert result.video_id == "test123"
|
|
assert result.method == DownloadMethod.TRANSCRIPT_ONLY
|
|
assert result.is_partial is True
|
|
assert result.video_path is None
|
|
assert result.audio_path is None
|
|
|
|
# Check transcript data
|
|
assert result.transcript is not None
|
|
assert result.transcript.text == "Hello world This is a test Video transcript"
|
|
assert result.transcript.language == 'en'
|
|
assert result.transcript.is_auto_generated is False
|
|
assert len(result.transcript.segments) == 3
|
|
assert result.transcript.source == "youtube-transcript-api"
|
|
|
|
@patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi')
|
|
@patch('backend.services.video_downloaders.transcript_downloader.build')
|
|
@pytest.mark.asyncio
|
|
async def test_with_metadata_extraction(self, mock_build, mock_transcript_api, downloader):
|
|
"""Test transcript download with metadata extraction"""
|
|
# Mock YouTube API
|
|
mock_service = Mock()
|
|
mock_video_response = {
|
|
'items': [{
|
|
'id': 'test123',
|
|
'snippet': {
|
|
'title': 'Test Video',
|
|
'description': 'Test description',
|
|
'publishedAt': '2024-01-01T00:00:00Z',
|
|
'channelTitle': 'Test Channel',
|
|
'tags': ['test', 'video'],
|
|
'defaultLanguage': 'en',
|
|
'thumbnails': {
|
|
'high': {'url': 'http://example.com/thumb.jpg'}
|
|
}
|
|
},
|
|
'contentDetails': {
|
|
'duration': 'PT4M30S' # 4 minutes 30 seconds
|
|
},
|
|
'statistics': {
|
|
'viewCount': '1000000'
|
|
},
|
|
'status': {
|
|
'privacyStatus': 'public'
|
|
}
|
|
}]
|
|
}
|
|
|
|
mock_service.videos.return_value.list.return_value.execute.return_value = mock_video_response
|
|
mock_build.return_value = mock_service
|
|
|
|
# Mock transcript API
|
|
mock_api_instance = Mock()
|
|
mock_transcript_list = [
|
|
{'text': 'Test transcript', 'start': 0.0, 'duration': 2.0}
|
|
]
|
|
mock_api_instance.get_transcript.return_value = mock_transcript_list
|
|
mock_transcript_api.return_value = mock_api_instance
|
|
|
|
url = "https://youtube.com/watch?v=test123"
|
|
preferences = DownloadPreferences()
|
|
|
|
result = await downloader.download_video(url, preferences)
|
|
|
|
assert result.status == DownloadStatus.PARTIAL
|
|
assert result.metadata is not None
|
|
assert result.metadata.title == "Test Video"
|
|
assert result.metadata.description == "Test description"
|
|
assert result.metadata.duration_seconds == 270 # 4m30s
|
|
assert result.metadata.view_count == 1000000
|
|
assert result.metadata.uploader == "Test Channel"
|
|
|
|
@patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi')
|
|
@pytest.mark.asyncio
|
|
async def test_transcript_unavailable(self, mock_transcript_api, downloader):
|
|
"""Test handling when transcript is unavailable"""
|
|
mock_transcript_api.side_effect = Exception("No transcript available")
|
|
|
|
url = "https://youtube.com/watch?v=notranscript123"
|
|
preferences = DownloadPreferences()
|
|
|
|
result = await downloader.download_video(url, preferences)
|
|
|
|
assert result.status == DownloadStatus.FAILED
|
|
assert "No transcript available" in result.error_message
|
|
assert result.transcript is None
|
|
|
|
@patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi')
|
|
@pytest.mark.asyncio
|
|
async def test_auto_generated_transcript(self, mock_transcript_api, downloader):
|
|
"""Test handling of auto-generated transcripts"""
|
|
# Mock transcript API to return auto-generated transcript
|
|
mock_api_instance = Mock()
|
|
mock_transcript_list = [
|
|
{'text': 'Auto generated text', 'start': 0.0, 'duration': 2.0}
|
|
]
|
|
mock_api_instance.get_transcript.return_value = mock_transcript_list
|
|
|
|
# Mock list_transcripts to show it's auto-generated
|
|
mock_transcript_entry = Mock()
|
|
mock_transcript_entry.is_generated = True
|
|
mock_transcript_entry.language_code = 'en'
|
|
mock_api_instance.list_transcripts.return_value = [mock_transcript_entry]
|
|
|
|
mock_transcript_api.return_value = mock_api_instance
|
|
|
|
url = "https://youtube.com/watch?v=auto123"
|
|
preferences = DownloadPreferences()
|
|
|
|
result = await downloader.download_video(url, preferences)
|
|
|
|
assert result.status == DownloadStatus.PARTIAL
|
|
assert result.transcript.is_auto_generated is True
|
|
|
|
@patch('backend.services.video_downloaders.transcript_downloader.build')
|
|
@pytest.mark.asyncio
|
|
async def test_metadata_only_extraction(self, mock_build, downloader):
|
|
"""Test metadata-only extraction without transcript"""
|
|
# Mock YouTube API for metadata
|
|
mock_service = Mock()
|
|
mock_video_response = {
|
|
'items': [{
|
|
'id': 'test123',
|
|
'snippet': {
|
|
'title': 'Metadata Only Video',
|
|
'description': 'Just metadata',
|
|
'publishedAt': '2024-01-01T00:00:00Z',
|
|
'channelTitle': 'Test Channel'
|
|
},
|
|
'contentDetails': {
|
|
'duration': 'PT2M15S'
|
|
},
|
|
'statistics': {
|
|
'viewCount': '500'
|
|
}
|
|
}]
|
|
}
|
|
mock_service.videos.return_value.list.return_value.execute.return_value = mock_video_response
|
|
mock_build.return_value = mock_service
|
|
|
|
metadata = await downloader.get_video_metadata("test123")
|
|
|
|
assert metadata is not None
|
|
assert metadata.video_id == "test123"
|
|
assert metadata.title == "Metadata Only Video"
|
|
assert metadata.duration_seconds == 135 # 2m15s
|
|
assert metadata.view_count == 500
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_transcript_direct(self, downloader):
|
|
"""Test direct transcript extraction"""
|
|
with patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi') as mock_api:
|
|
mock_api_instance = Mock()
|
|
mock_transcript = [
|
|
{'text': 'Direct transcript', 'start': 0.0, 'duration': 2.0}
|
|
]
|
|
mock_api_instance.get_transcript.return_value = mock_transcript
|
|
mock_api.return_value = mock_api_instance
|
|
|
|
transcript = await downloader.get_transcript("test123")
|
|
|
|
assert transcript is not None
|
|
assert transcript.text == "Direct transcript"
|
|
assert len(transcript.segments) == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_connection_test_success(self, downloader):
|
|
"""Test successful connection test"""
|
|
with patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi'):
|
|
result = await downloader.test_connection()
|
|
assert result is True
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_connection_test_failure(self, downloader):
|
|
"""Test failed connection test"""
|
|
with patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi', side_effect=Exception("Connection failed")):
|
|
result = await downloader.test_connection()
|
|
assert result is False
|
|
|
|
def test_parse_duration(self, downloader):
|
|
"""Test ISO 8601 duration parsing"""
|
|
test_cases = [
|
|
("PT1M30S", 90), # 1 minute 30 seconds
|
|
("PT2H15M", 8100), # 2 hours 15 minutes
|
|
("PT45S", 45), # 45 seconds
|
|
("PT1H", 3600), # 1 hour
|
|
("PT10M", 600), # 10 minutes
|
|
("P1DT2H3M4S", 93784), # 1 day 2 hours 3 minutes 4 seconds
|
|
("", 0), # Empty string
|
|
("invalid", 0) # Invalid format
|
|
]
|
|
|
|
for duration_str, expected_seconds in test_cases:
|
|
result = downloader._parse_duration(duration_str)
|
|
assert result == expected_seconds, f"Failed for {duration_str}: expected {expected_seconds}, got {result}"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_language_preference(self, downloader):
|
|
"""Test transcript language preference"""
|
|
with patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi') as mock_api:
|
|
mock_api_instance = Mock()
|
|
|
|
# Mock transcript list with multiple languages
|
|
mock_transcripts = Mock()
|
|
|
|
# Mock English transcript
|
|
english_transcript = Mock()
|
|
english_transcript.language_code = 'en'
|
|
english_transcript.fetch.return_value = [
|
|
{'text': 'English transcript', 'start': 0.0, 'duration': 2.0}
|
|
]
|
|
|
|
# Mock Spanish transcript
|
|
spanish_transcript = Mock()
|
|
spanish_transcript.language_code = 'es'
|
|
spanish_transcript.fetch.return_value = [
|
|
{'text': 'Spanish transcript', 'start': 0.0, 'duration': 2.0}
|
|
]
|
|
|
|
mock_transcripts.__iter__ = Mock(return_value=iter([english_transcript, spanish_transcript]))
|
|
mock_transcripts.find_transcript.return_value = spanish_transcript
|
|
|
|
mock_api_instance.list_transcripts.return_value = mock_transcripts
|
|
mock_api.return_value = mock_api_instance
|
|
|
|
# Request Spanish transcript
|
|
preferences = DownloadPreferences()
|
|
# Note: This test assumes language preference would be implemented
|
|
# Currently the downloader uses default language preference
|
|
|
|
url = "https://youtube.com/watch?v=multilang123"
|
|
result = await downloader.download_video(url, preferences)
|
|
|
|
assert result.status == DownloadStatus.PARTIAL
|
|
# The actual language returned depends on the implementation
|
|
assert result.transcript is not None
|
|
|
|
@patch('backend.services.video_downloaders.transcript_downloader.build')
|
|
@pytest.mark.asyncio
|
|
async def test_api_quota_exceeded(self, mock_build, downloader):
|
|
"""Test handling of YouTube API quota exceeded"""
|
|
mock_service = Mock()
|
|
mock_service.videos.return_value.list.return_value.execute.side_effect = Exception("Quota exceeded")
|
|
mock_build.return_value = mock_service
|
|
|
|
# Should still work without metadata if transcript is available
|
|
with patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi') as mock_transcript_api:
|
|
mock_api_instance = Mock()
|
|
mock_transcript = [{'text': 'Transcript without metadata', 'start': 0.0, 'duration': 2.0}]
|
|
mock_api_instance.get_transcript.return_value = mock_transcript
|
|
mock_transcript_api.return_value = mock_api_instance
|
|
|
|
url = "https://youtube.com/watch?v=quota123"
|
|
preferences = DownloadPreferences()
|
|
|
|
result = await downloader.download_video(url, preferences)
|
|
|
|
assert result.status == DownloadStatus.PARTIAL
|
|
assert result.transcript is not None
|
|
assert result.metadata is None # Metadata extraction failed due to quota
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_invalid_video_id(self, downloader):
|
|
"""Test handling of invalid video ID"""
|
|
with patch('backend.services.video_downloaders.transcript_downloader.YouTubeTranscriptApi') as mock_api:
|
|
mock_api.side_effect = Exception("Video not found")
|
|
|
|
url = "https://youtube.com/watch?v=invalidid123"
|
|
preferences = DownloadPreferences()
|
|
|
|
result = await downloader.download_video(url, preferences)
|
|
|
|
assert result.status == DownloadStatus.FAILED
|
|
assert "Video not found" in result.error_message |