youtube-summarizer/backend/tests/unit/test_pytubefix_downloader.py

332 lines
13 KiB
Python

"""
Unit tests for pytubefix downloader
"""
import pytest
from unittest.mock import Mock, AsyncMock, patch, MagicMock
from pathlib import Path
import asyncio
from backend.models.video_download import (
DownloadMethod,
DownloadPreferences,
VideoDownloadResult,
DownloadStatus,
VideoQuality,
VideoMetadata,
TranscriptData,
DownloaderException,
VideoNotAvailableError
)
from backend.services.video_downloaders.pytubefix_downloader import PytubefixDownloader
class TestPytubefixDownloader:
"""Test pytubefix downloader functionality"""
@pytest.fixture
def mock_config(self, tmp_path):
"""Mock configuration for testing"""
return {
'output_dir': str(tmp_path),
'timeout': 60
}
@pytest.fixture
def downloader(self, mock_config):
"""Create downloader instance for testing"""
return PytubefixDownloader(config=mock_config)
def test_initialization(self, downloader, mock_config):
"""Test downloader initialization"""
assert downloader.method == DownloadMethod.PYTUBEFIX
assert downloader.output_dir == Path(mock_config['output_dir'])
assert downloader.output_dir.exists()
def test_capabilities(self, downloader):
"""Test downloader capabilities"""
assert downloader.supports_audio_only() is True
assert downloader.supports_quality_selection() is True
assert "mp4" in downloader.get_supported_formats()
assert "mp3" in downloader.get_supported_formats()
@patch('pytubefix.YouTube')
@pytest.mark.asyncio
async def test_successful_audio_download(self, mock_youtube_class, downloader):
"""Test successful audio-only download"""
# Setup mock YouTube object
mock_yt = Mock()
mock_yt.title = "Test Video"
mock_yt.description = "Test description"
mock_yt.length = 240 # 4 minutes
mock_yt.views = 1000000
mock_yt.publish_date = None
mock_yt.author = "Test Author"
mock_yt.thumbnail_url = "http://example.com/thumb.jpg"
mock_yt.keywords = ["test", "video"]
# Setup mock audio stream
mock_stream = Mock()
mock_stream.download.return_value = str(downloader.output_dir / "test_temp_audio.mp4")
mock_streams = Mock()
mock_streams.filter.return_value.order_by.return_value.desc.return_value.first.return_value = mock_stream
mock_yt.streams = mock_streams
mock_youtube_class.return_value = mock_yt
# Create test audio file
test_audio_file = downloader.output_dir / "test123_temp_audio.mp4"
test_audio_file.write_text("fake audio content")
# Mock ffmpeg not available (so it just renames)
with patch('ffmpeg', side_effect=ImportError):
url = "https://youtube.com/watch?v=test123"
preferences = DownloadPreferences(prefer_audio_only=True)
result = await downloader.download_video(url, preferences)
assert result.status == DownloadStatus.COMPLETED
assert result.video_id == "test123"
assert result.method == DownloadMethod.PYTUBEFIX
assert result.audio_path is not None
assert result.video_path is None
assert result.metadata.title == "Test Video"
assert result.metadata.duration_seconds == 240
@patch('pytubefix.YouTube')
@pytest.mark.asyncio
async def test_video_too_long_rejection(self, mock_youtube_class, downloader):
"""Test rejection of videos that exceed duration limit"""
mock_yt = Mock()
mock_yt.title = "Long Video"
mock_yt.length = 7200 # 2 hours
mock_yt.views = 1000
mock_yt.author = "Test Author"
mock_youtube_class.return_value = mock_yt
url = "https://youtube.com/watch?v=long123"
preferences = DownloadPreferences(max_duration_minutes=60) # 1 hour limit
result = await downloader.download_video(url, preferences)
assert result.status == DownloadStatus.FAILED
assert "too long" in result.error_message.lower()
@patch('pytubefix.YouTube')
@pytest.mark.asyncio
async def test_private_video_error(self, mock_youtube_class, downloader):
"""Test handling of private video error"""
mock_youtube_class.side_effect = Exception("Video is private")
url = "https://youtube.com/watch?v=private123"
preferences = DownloadPreferences()
with pytest.raises(VideoNotAvailableError, match="Video not available"):
await downloader.download_video(url, preferences)
@patch('pytubefix.YouTube')
@pytest.mark.asyncio
async def test_age_restricted_video_error(self, mock_youtube_class, downloader):
"""Test handling of age-restricted video error"""
mock_youtube_class.side_effect = Exception("Age restricted content")
url = "https://youtube.com/watch?v=restricted123"
preferences = DownloadPreferences()
with pytest.raises(VideoNotAvailableError, match="Age-restricted video"):
await downloader.download_video(url, preferences)
@patch('pytubefix.YouTube')
@pytest.mark.asyncio
async def test_generic_pytubefix_error(self, mock_youtube_class, downloader):
"""Test handling of generic pytubefix error"""
mock_youtube_class.side_effect = Exception("Network error")
url = "https://youtube.com/watch?v=error123"
preferences = DownloadPreferences()
with pytest.raises(DownloaderException, match="Pytubefix error"):
await downloader.download_video(url, preferences)
@patch('youtube_transcript_api.YouTubeTranscriptApi')
@pytest.mark.asyncio
async def test_transcript_extraction_success(self, mock_transcript_api, downloader):
"""Test successful transcript extraction"""
# Mock transcript API
mock_api_instance = Mock()
mock_transcript = Mock()
mock_transcript.snippets = [
Mock(text="Hello world", start=0.0, duration=2.0),
Mock(text="This is a test", start=2.0, duration=3.0)
]
mock_transcript.is_generated = False
mock_transcript.language_code = 'en'
mock_api_instance.fetch.return_value = mock_transcript
mock_transcript_api.return_value = mock_api_instance
# Test transcript extraction
transcript = await downloader._extract_transcript(Mock(), "test123")
assert transcript is not None
assert transcript.text == "Hello world This is a test"
assert transcript.language == 'en'
assert transcript.is_auto_generated is False
assert len(transcript.segments) == 2
assert transcript.source == "youtube-transcript-api"
@patch('youtube_transcript_api.YouTubeTranscriptApi')
@pytest.mark.asyncio
async def test_transcript_extraction_failure(self, mock_transcript_api, downloader):
"""Test transcript extraction failure"""
mock_transcript_api.side_effect = Exception("No transcript available")
transcript = await downloader._extract_transcript(Mock(), "test123")
assert transcript is None
@patch('pytubefix.YouTube')
@pytest.mark.asyncio
async def test_video_and_audio_download(self, mock_youtube_class, downloader):
"""Test downloading both video and audio"""
mock_yt = Mock()
mock_yt.title = "Test Video"
mock_yt.length = 120
mock_yt.views = 1000
mock_yt.author = "Test Author"
# Mock video stream
mock_video_stream = Mock()
mock_video_stream.download.return_value = str(downloader.output_dir / "test123_temp_video.mp4")
# Mock audio stream
mock_audio_stream = Mock()
mock_audio_stream.download.return_value = str(downloader.output_dir / "test123_temp_audio.mp4")
# Setup streams mock
mock_streams = Mock()
def mock_filter(**kwargs):
filter_mock = Mock()
filter_mock.order_by.return_value.desc.return_value.first.return_value = (
mock_video_stream if kwargs.get('only_video') else mock_audio_stream
)
return filter_mock
mock_streams.filter = mock_filter
mock_yt.streams = mock_streams
mock_youtube_class.return_value = mock_yt
# Create test files
(downloader.output_dir / "test123_temp_video.mp4").write_text("fake video")
(downloader.output_dir / "test123_temp_audio.mp4").write_text("fake audio")
# Mock ffmpeg not available
with patch('ffmpeg', side_effect=ImportError):
url = "https://youtube.com/watch?v=test123"
preferences = DownloadPreferences(prefer_audio_only=False, save_video=True)
result = await downloader.download_video(url, preferences)
assert result.status == DownloadStatus.COMPLETED
assert result.video_path is not None
assert result.audio_path is not None
@patch('pytubefix.YouTube')
@pytest.mark.asyncio
async def test_no_streams_available(self, mock_youtube_class, downloader):
"""Test handling when no streams are available"""
mock_yt = Mock()
mock_yt.title = "Test Video"
mock_yt.length = 120
# Mock streams returning None
mock_streams = Mock()
mock_streams.filter.return_value.order_by.return_value.desc.return_value.first.return_value = None
mock_yt.streams = mock_streams
mock_youtube_class.return_value = mock_yt
url = "https://youtube.com/watch?v=test123"
preferences = DownloadPreferences(prefer_audio_only=True)
result = await downloader.download_video(url, preferences)
# Should still complete but with no audio file
assert result.status == DownloadStatus.COMPLETED
assert result.audio_path is None
@patch('pytubefix.YouTube')
@pytest.mark.asyncio
async def test_connection_test_success(self, mock_youtube_class, downloader):
"""Test successful connection test"""
mock_yt = Mock()
mock_yt.title = "Test Video"
mock_youtube_class.return_value = mock_yt
result = await downloader.test_connection()
assert result is True
@patch('pytubefix.YouTube')
@pytest.mark.asyncio
async def test_connection_test_failure(self, mock_youtube_class, downloader):
"""Test failed connection test"""
mock_youtube_class.side_effect = Exception("Connection failed")
result = await downloader.test_connection()
assert result is False
@pytest.mark.asyncio
async def test_metadata_extraction(self, downloader):
"""Test metadata extraction from YouTube object"""
# Mock YouTube object with all metadata
mock_yt = Mock()
mock_yt.title = "Test Video Title"
mock_yt.description = "This is a test video description"
mock_yt.length = 300 # 5 minutes
mock_yt.views = 1500000
mock_yt.publish_date = Mock()
mock_yt.publish_date.isoformat.return_value = "2024-01-01T00:00:00"
mock_yt.author = "Test Channel"
mock_yt.thumbnail_url = "https://example.com/thumbnail.jpg"
mock_yt.keywords = ["test", "video", "example"]
metadata = await downloader._extract_metadata(mock_yt, "test123")
assert metadata.video_id == "test123"
assert metadata.title == "Test Video Title"
assert metadata.description == "This is a test video description"
assert metadata.duration_seconds == 300
assert metadata.view_count == 1500000
assert metadata.upload_date == "2024-01-01T00:00:00"
assert metadata.uploader == "Test Channel"
assert metadata.thumbnail_url == "https://example.com/thumbnail.jpg"
assert metadata.tags == ["test", "video", "example"]
@pytest.mark.asyncio
async def test_metadata_extraction_partial(self, downloader):
"""Test metadata extraction with missing fields"""
# Mock YouTube object with minimal metadata
mock_yt = Mock()
mock_yt.title = "Minimal Video"
mock_yt.description = None
mock_yt.length = None
mock_yt.views = None
mock_yt.publish_date = None
mock_yt.author = None
mock_yt.thumbnail_url = None
mock_yt.keywords = []
metadata = await downloader._extract_metadata(mock_yt, "minimal123")
assert metadata.video_id == "minimal123"
assert metadata.title == "Minimal Video"
assert metadata.description is None
assert metadata.duration_seconds is None
assert metadata.view_count is None
assert metadata.upload_date is None
assert metadata.uploader is None
assert metadata.thumbnail_url is None
assert metadata.tags == []