youtube-summarizer/backend/tests/unit/test_playwright_downloader.py

434 lines
16 KiB
Python

"""
Unit tests for Playwright browser-based downloader
"""
import pytest
from unittest.mock import Mock, AsyncMock, patch, MagicMock
from pathlib import Path
import json
from backend.models.video_download import (
DownloadMethod,
DownloadPreferences,
VideoDownloadResult,
DownloadStatus,
VideoQuality,
VideoMetadata,
TranscriptData,
DownloaderException,
NetworkError
)
from backend.services.video_downloaders.playwright_downloader import PlaywrightDownloader
class TestPlaywrightDownloader:
"""Test Playwright browser-based downloader functionality"""
@pytest.fixture
def mock_config(self, tmp_path):
"""Mock configuration for testing"""
return {
'output_dir': str(tmp_path),
'timeout': 60,
'headless': True,
'user_data_dir': None
}
@pytest.fixture
def mock_mcp_client(self):
"""Mock MCP client for Playwright operations"""
client = AsyncMock()
return client
@pytest.fixture
def downloader(self, mock_config, mock_mcp_client):
"""Create downloader instance for testing"""
with patch('backend.services.video_downloaders.playwright_downloader.MCPClient', return_value=mock_mcp_client):
return PlaywrightDownloader(config=mock_config)
def test_initialization(self, downloader, mock_config):
"""Test downloader initialization"""
assert downloader.method == DownloadMethod.PLAYWRIGHT
assert downloader.output_dir == Path(mock_config['output_dir'])
assert downloader.timeout == mock_config['timeout']
assert downloader.headless is True
def test_capabilities(self, downloader):
"""Test downloader capabilities"""
assert downloader.supports_audio_only() is True
assert downloader.supports_quality_selection() is True
assert "mp4" in downloader.get_supported_formats()
assert "webm" in downloader.get_supported_formats()
assert "mp3" in downloader.get_supported_formats()
@pytest.mark.asyncio
async def test_successful_video_download(self, downloader):
"""Test successful video download via browser automation"""
# Mock MCP client responses
mock_client = downloader.mcp_client
# Mock navigation
mock_client.call_tool.side_effect = [
{"success": True}, # navigate
{ # extract video info
"success": True,
"video_info": {
"title": "Test Video",
"duration": 240,
"video_url": "https://example.com/video.mp4",
"audio_url": "https://example.com/audio.mp4",
"quality": "720p"
}
},
{"success": True, "file_path": str(downloader.output_dir / "test123.mp4")} # download
]
# Create mock downloaded file
test_file = downloader.output_dir / "test123.mp4"
test_file.write_text("fake video content")
url = "https://youtube.com/watch?v=test123"
preferences = DownloadPreferences(save_video=True)
result = await downloader.download_video(url, preferences)
assert result.status == DownloadStatus.COMPLETED
assert result.video_id == "test123"
assert result.method == DownloadMethod.PLAYWRIGHT
assert result.video_path is not None
assert result.metadata is not None
assert result.metadata.title == "Test Video"
assert result.metadata.duration_seconds == 240
@pytest.mark.asyncio
async def test_audio_only_download(self, downloader):
"""Test audio-only download via browser"""
mock_client = downloader.mcp_client
# Mock responses for audio-only download
mock_client.call_tool.side_effect = [
{"success": True}, # navigate
{ # extract audio info
"success": True,
"video_info": {
"title": "Audio Test",
"duration": 180,
"audio_url": "https://example.com/audio.mp4",
"quality": "medium"
}
},
{"success": True, "file_path": str(downloader.output_dir / "audio123.mp4")} # download
]
# Create mock downloaded file
test_file = downloader.output_dir / "audio123.mp4"
test_file.write_text("fake audio content")
url = "https://youtube.com/watch?v=audio123"
preferences = DownloadPreferences(prefer_audio_only=True)
result = await downloader.download_video(url, preferences)
assert result.status == DownloadStatus.COMPLETED
assert result.audio_path is not None
assert result.video_path is None # Audio-only mode
@pytest.mark.asyncio
async def test_navigation_failure(self, downloader):
"""Test handling of browser navigation failure"""
mock_client = downloader.mcp_client
mock_client.call_tool.return_value = {
"success": False,
"error": "Navigation timeout"
}
url = "https://youtube.com/watch?v=timeout123"
preferences = DownloadPreferences()
with pytest.raises(DownloaderException, match="Browser navigation failed"):
await downloader.download_video(url, preferences)
@pytest.mark.asyncio
async def test_video_info_extraction_failure(self, downloader):
"""Test handling of video info extraction failure"""
mock_client = downloader.mcp_client
mock_client.call_tool.side_effect = [
{"success": True}, # navigate successful
{ # extract info fails
"success": False,
"error": "Could not find video player"
}
]
url = "https://youtube.com/watch?v=noinfo123"
preferences = DownloadPreferences()
with pytest.raises(DownloaderException, match="Failed to extract video information"):
await downloader.download_video(url, preferences)
@pytest.mark.asyncio
async def test_download_failure(self, downloader):
"""Test handling of file download failure"""
mock_client = downloader.mcp_client
mock_client.call_tool.side_effect = [
{"success": True}, # navigate
{ # extract info
"success": True,
"video_info": {
"title": "Test Video",
"video_url": "https://example.com/video.mp4"
}
},
{ # download fails
"success": False,
"error": "Download interrupted"
}
]
url = "https://youtube.com/watch?v=downloadfail123"
preferences = DownloadPreferences()
with pytest.raises(DownloaderException, match="Download failed"):
await downloader.download_video(url, preferences)
@pytest.mark.asyncio
async def test_quality_selection(self, downloader):
"""Test video quality selection"""
mock_client = downloader.mcp_client
mock_client.call_tool.side_effect = [
{"success": True}, # navigate
{ # extract info with quality preference
"success": True,
"video_info": {
"title": "HD Video",
"video_url": "https://example.com/video_1080p.mp4",
"quality": "1080p"
}
},
{"success": True, "file_path": str(downloader.output_dir / "hd123.mp4")} # download
]
# Create mock file
test_file = downloader.output_dir / "hd123.mp4"
test_file.write_text("fake HD video")
url = "https://youtube.com/watch?v=hd123"
preferences = DownloadPreferences(quality=VideoQuality.HIGH_1080P)
result = await downloader.download_video(url, preferences)
assert result.status == DownloadStatus.COMPLETED
# Check that quality preference was passed
call_args = mock_client.call_tool.call_args_list[1][1] # Second call (extract_info)
assert call_args["args"]["quality"] == "1080p"
@pytest.mark.asyncio
async def test_connection_test_success(self, downloader):
"""Test successful connection test"""
mock_client = downloader.mcp_client
mock_client.call_tool.return_value = {"success": True}
result = await downloader.test_connection()
assert result is True
mock_client.call_tool.assert_called_once()
@pytest.mark.asyncio
async def test_connection_test_failure(self, downloader):
"""Test failed connection test"""
mock_client = downloader.mcp_client
mock_client.call_tool.return_value = {"success": False, "error": "Browser not available"}
result = await downloader.test_connection()
assert result is False
@pytest.mark.asyncio
async def test_mcp_client_exception(self, downloader):
"""Test handling of MCP client exceptions"""
mock_client = downloader.mcp_client
mock_client.call_tool.side_effect = Exception("MCP connection lost")
url = "https://youtube.com/watch?v=mcpfail123"
preferences = DownloadPreferences()
with pytest.raises(DownloaderException, match="Browser automation error"):
await downloader.download_video(url, preferences)
def test_get_browser_options(self, downloader):
"""Test browser options generation"""
options = downloader._get_browser_options()
assert isinstance(options, dict)
assert "headless" in options
assert options["headless"] is True
assert "timeout" in options
assert options["timeout"] == 60000 # Converted to milliseconds
def test_get_browser_options_with_user_data(self, tmp_path):
"""Test browser options with user data directory"""
user_data_dir = tmp_path / "browser_data"
user_data_dir.mkdir()
config = {
'output_dir': str(tmp_path),
'headless': False,
'user_data_dir': str(user_data_dir)
}
with patch('backend.services.video_downloaders.playwright_downloader.MCPClient'):
downloader = PlaywrightDownloader(config=config)
options = downloader._get_browser_options()
assert options["headless"] is False
assert "userDataDir" in options
assert options["userDataDir"] == str(user_data_dir)
def test_format_quality_preference(self, downloader):
"""Test quality preference formatting"""
test_cases = [
(VideoQuality.LOW_480P, "480p"),
(VideoQuality.MEDIUM_720P, "720p"),
(VideoQuality.HIGH_1080P, "1080p"),
(VideoQuality.AUTO, "best")
]
for quality, expected in test_cases:
result = downloader._format_quality_preference(quality)
assert result == expected
@pytest.mark.asyncio
async def test_extract_metadata_from_video_info(self, downloader):
"""Test metadata extraction from browser video info"""
video_info = {
"title": "Browser Test Video",
"description": "Video extracted via browser",
"duration": 300,
"view_count": 50000,
"upload_date": "2024-01-01",
"uploader": "Browser Channel",
"thumbnail_url": "https://example.com/thumb.jpg",
"tags": ["browser", "automation", "test"]
}
metadata = await downloader._extract_metadata_from_video_info(video_info, "browser123")
assert metadata.video_id == "browser123"
assert metadata.title == "Browser Test Video"
assert metadata.description == "Video extracted via browser"
assert metadata.duration_seconds == 300
assert metadata.view_count == 50000
assert metadata.upload_date == "2024-01-01"
assert metadata.uploader == "Browser Channel"
assert metadata.thumbnail_url == "https://example.com/thumb.jpg"
assert metadata.tags == ["browser", "automation", "test"]
@pytest.mark.asyncio
async def test_extract_metadata_minimal_info(self, downloader):
"""Test metadata extraction with minimal video info"""
video_info = {
"title": "Minimal Video"
}
metadata = await downloader._extract_metadata_from_video_info(video_info, "minimal123")
assert metadata.video_id == "minimal123"
assert metadata.title == "Minimal Video"
assert metadata.description is None
assert metadata.duration_seconds is None
assert metadata.view_count is None
@pytest.mark.asyncio
async def test_private_video_detection(self, downloader):
"""Test detection and handling of private videos"""
mock_client = downloader.mcp_client
mock_client.call_tool.side_effect = [
{"success": True}, # navigate
{ # extract fails due to privacy
"success": False,
"error": "Video is private"
}
]
url = "https://youtube.com/watch?v=private123"
preferences = DownloadPreferences()
with pytest.raises(DownloaderException, match="Failed to extract video information"):
await downloader.download_video(url, preferences)
@pytest.mark.asyncio
async def test_age_restricted_video_handling(self, downloader):
"""Test handling of age-restricted videos"""
mock_client = downloader.mcp_client
mock_client.call_tool.side_effect = [
{"success": True}, # navigate
{ # extract fails due to age restriction
"success": False,
"error": "Sign in to confirm your age"
}
]
url = "https://youtube.com/watch?v=restricted123"
preferences = DownloadPreferences()
with pytest.raises(DownloaderException, match="Failed to extract video information"):
await downloader.download_video(url, preferences)
@pytest.mark.asyncio
async def test_network_timeout_handling(self, downloader):
"""Test handling of network timeouts"""
mock_client = downloader.mcp_client
mock_client.call_tool.side_effect = Exception("Request timeout")
url = "https://youtube.com/watch?v=timeout123"
preferences = DownloadPreferences()
with pytest.raises(DownloaderException, match="Browser automation error"):
await downloader.download_video(url, preferences)
@pytest.mark.asyncio
async def test_cleanup_temp_files(self, downloader):
"""Test cleanup of temporary files"""
# Create temporary files
temp_dir = downloader.output_dir / "temp_browser"
temp_dir.mkdir()
temp_file = temp_dir / "temp_download.mp4"
temp_file.write_text("temporary content")
assert temp_dir.exists()
assert temp_file.exists()
# Clean up
await downloader.cleanup_temp_files(temp_dir)
assert not temp_dir.exists()
@pytest.mark.asyncio
async def test_large_file_download(self, downloader):
"""Test handling of large file downloads"""
mock_client = downloader.mcp_client
mock_client.call_tool.side_effect = [
{"success": True}, # navigate
{ # extract info
"success": True,
"video_info": {
"title": "Large Video",
"duration": 3600, # 1 hour
"video_url": "https://example.com/large_video.mp4",
"file_size": 1073741824 # 1 GB
}
},
{"success": True, "file_path": str(downloader.output_dir / "large123.mp4")} # download
]
# Create mock large file (just a small file for testing)
test_file = downloader.output_dir / "large123.mp4"
test_file.write_text("fake large video content")
url = "https://youtube.com/watch?v=large123"
preferences = DownloadPreferences()
result = await downloader.download_video(url, preferences)
assert result.status == DownloadStatus.COMPLETED
assert result.metadata.duration_seconds == 3600