youtube-summarizer/backend/tests/unit/test_playwright_downloader.py

"""
Unit tests for Playwright browser-based downloader
"""
import pytest
from unittest.mock import Mock, AsyncMock, patch, MagicMock
from pathlib import Path
import json

from backend.models.video_download import (
    DownloadMethod,
    DownloadPreferences,
    VideoDownloadResult,
    DownloadStatus,
    VideoQuality,
    VideoMetadata,
    TranscriptData,
    DownloaderException,
    NetworkError
)
from backend.services.video_downloaders.playwright_downloader import PlaywrightDownloader


class TestPlaywrightDownloader:
    """Test Playwright browser-based downloader functionality"""

    @pytest.fixture
    def mock_config(self, tmp_path):
        """Mock configuration for testing"""
        return {
            'output_dir': str(tmp_path),
            'timeout': 60,
            'headless': True,
            'user_data_dir': None
        }

    @pytest.fixture
    def mock_mcp_client(self):
        """Mock MCP client for Playwright operations"""
        client = AsyncMock()
        return client

    @pytest.fixture
    def downloader(self, mock_config, mock_mcp_client):
        """Create downloader instance for testing"""
        with patch('backend.services.video_downloaders.playwright_downloader.MCPClient', return_value=mock_mcp_client):
            return PlaywrightDownloader(config=mock_config)

    def test_initialization(self, downloader, mock_config):
        """Test downloader initialization"""
        assert downloader.method == DownloadMethod.PLAYWRIGHT
        assert downloader.output_dir == Path(mock_config['output_dir'])
        assert downloader.timeout == mock_config['timeout']
        assert downloader.headless is True

    def test_capabilities(self, downloader):
        """Test downloader capabilities"""
        assert downloader.supports_audio_only() is True
        assert downloader.supports_quality_selection() is True
        assert "mp4" in downloader.get_supported_formats()
        assert "webm" in downloader.get_supported_formats()
        assert "mp3" in downloader.get_supported_formats()

    @pytest.mark.asyncio
    async def test_successful_video_download(self, downloader):
        """Test successful video download via browser automation"""
        # Mock MCP client responses
        mock_client = downloader.mcp_client

        # Mock navigation
        mock_client.call_tool.side_effect = [
            {"success": True},  # navigate
            {  # extract video info
                "success": True,
                "video_info": {
                    "title": "Test Video",
                    "duration": 240,
                    "video_url": "https://example.com/video.mp4",
                    "audio_url": "https://example.com/audio.mp4",
                    "quality": "720p"
                }
            },
            {"success": True, "file_path": str(downloader.output_dir / "test123.mp4")}  # download
        ]

        # Create mock downloaded file
        test_file = downloader.output_dir / "test123.mp4"
        test_file.write_text("fake video content")

        url = "https://youtube.com/watch?v=test123"
        preferences = DownloadPreferences(save_video=True)

        result = await downloader.download_video(url, preferences)

        assert result.status == DownloadStatus.COMPLETED
        assert result.video_id == "test123"
        assert result.method == DownloadMethod.PLAYWRIGHT
        assert result.video_path is not None
        assert result.metadata is not None
        assert result.metadata.title == "Test Video"
        assert result.metadata.duration_seconds == 240

    @pytest.mark.asyncio
    async def test_audio_only_download(self, downloader):
        """Test audio-only download via browser"""
        mock_client = downloader.mcp_client

        # Mock responses for audio-only download
        mock_client.call_tool.side_effect = [
            {"success": True},  # navigate
            {  # extract audio info
                "success": True,
                "video_info": {
                    "title": "Audio Test",
                    "duration": 180,
                    "audio_url": "https://example.com/audio.mp4",
                    "quality": "medium"
                }
            },
            {"success": True, "file_path": str(downloader.output_dir / "audio123.mp4")}  # download
        ]

        # Create mock downloaded file
        test_file = downloader.output_dir / "audio123.mp4"
        test_file.write_text("fake audio content")

        url = "https://youtube.com/watch?v=audio123"
        preferences = DownloadPreferences(prefer_audio_only=True)

        result = await downloader.download_video(url, preferences)

        assert result.status == DownloadStatus.COMPLETED
        assert result.audio_path is not None
        assert result.video_path is None  # Audio-only mode

    @pytest.mark.asyncio
    async def test_navigation_failure(self, downloader):
        """Test handling of browser navigation failure"""
        mock_client = downloader.mcp_client
        mock_client.call_tool.return_value = {
            "success": False,
            "error": "Navigation timeout"
        }

        url = "https://youtube.com/watch?v=timeout123"
        preferences = DownloadPreferences()

        with pytest.raises(DownloaderException, match="Browser navigation failed"):
            await downloader.download_video(url, preferences)

    @pytest.mark.asyncio
    async def test_video_info_extraction_failure(self, downloader):
        """Test handling of video info extraction failure"""
        mock_client = downloader.mcp_client
        mock_client.call_tool.side_effect = [
            {"success": True},  # navigate successful
            {  # extract info fails
                "success": False,
                "error": "Could not find video player"
            }
        ]

        url = "https://youtube.com/watch?v=noinfo123"
        preferences = DownloadPreferences()

        with pytest.raises(DownloaderException, match="Failed to extract video information"):
            await downloader.download_video(url, preferences)

    @pytest.mark.asyncio
    async def test_download_failure(self, downloader):
        """Test handling of file download failure"""
        mock_client = downloader.mcp_client
        mock_client.call_tool.side_effect = [
            {"success": True},  # navigate
            {  # extract info
                "success": True,
                "video_info": {
                    "title": "Test Video",
                    "video_url": "https://example.com/video.mp4"
                }
            },
            {  # download fails
                "success": False,
                "error": "Download interrupted"
            }
        ]

        url = "https://youtube.com/watch?v=downloadfail123"
        preferences = DownloadPreferences()

        with pytest.raises(DownloaderException, match="Download failed"):
            await downloader.download_video(url, preferences)

    @pytest.mark.asyncio
    async def test_quality_selection(self, downloader):
        """Test video quality selection"""
        mock_client = downloader.mcp_client
        mock_client.call_tool.side_effect = [
            {"success": True},  # navigate
            {  # extract info with quality preference
                "success": True,
                "video_info": {
                    "title": "HD Video",
                    "video_url": "https://example.com/video_1080p.mp4",
                    "quality": "1080p"
                }
            },
            {"success": True, "file_path": str(downloader.output_dir / "hd123.mp4")}  # download
        ]

        # Create mock file
        test_file = downloader.output_dir / "hd123.mp4"
        test_file.write_text("fake HD video")

        url = "https://youtube.com/watch?v=hd123"
        preferences = DownloadPreferences(quality=VideoQuality.HIGH_1080P)

        result = await downloader.download_video(url, preferences)

        assert result.status == DownloadStatus.COMPLETED

        # Check that quality preference was passed
        call_args = mock_client.call_tool.call_args_list[1][1]  # Second call (extract_info)
        assert call_args["args"]["quality"] == "1080p"

    @pytest.mark.asyncio
    async def test_connection_test_success(self, downloader):
        """Test successful connection test"""
        mock_client = downloader.mcp_client
        mock_client.call_tool.return_value = {"success": True}

        result = await downloader.test_connection()

        assert result is True
        mock_client.call_tool.assert_called_once()

    @pytest.mark.asyncio
    async def test_connection_test_failure(self, downloader):
        """Test failed connection test"""
        mock_client = downloader.mcp_client
        mock_client.call_tool.return_value = {"success": False, "error": "Browser not available"}

        result = await downloader.test_connection()

        assert result is False

    @pytest.mark.asyncio
    async def test_mcp_client_exception(self, downloader):
        """Test handling of MCP client exceptions"""
        mock_client = downloader.mcp_client
        mock_client.call_tool.side_effect = Exception("MCP connection lost")

        url = "https://youtube.com/watch?v=mcpfail123"
        preferences = DownloadPreferences()

        with pytest.raises(DownloaderException, match="Browser automation error"):
            await downloader.download_video(url, preferences)

    def test_get_browser_options(self, downloader):
        """Test browser options generation"""
        options = downloader._get_browser_options()

        assert isinstance(options, dict)
        assert "headless" in options
        assert options["headless"] is True
        assert "timeout" in options
        assert options["timeout"] == 60000  # Converted to milliseconds

    def test_get_browser_options_with_user_data(self, tmp_path):
        """Test browser options with user data directory"""
        user_data_dir = tmp_path / "browser_data"
        user_data_dir.mkdir()

        config = {
            'output_dir': str(tmp_path),
            'headless': False,
            'user_data_dir': str(user_data_dir)
        }

        with patch('backend.services.video_downloaders.playwright_downloader.MCPClient'):
            downloader = PlaywrightDownloader(config=config)
            options = downloader._get_browser_options()

            assert options["headless"] is False
            assert "userDataDir" in options
            assert options["userDataDir"] == str(user_data_dir)

    def test_format_quality_preference(self, downloader):
        """Test quality preference formatting"""
        test_cases = [
            (VideoQuality.LOW_480P, "480p"),
            (VideoQuality.MEDIUM_720P, "720p"),
            (VideoQuality.HIGH_1080P, "1080p"),
            (VideoQuality.AUTO, "best")
        ]

        for quality, expected in test_cases:
            result = downloader._format_quality_preference(quality)
            assert result == expected

    @pytest.mark.asyncio
    async def test_extract_metadata_from_video_info(self, downloader):
        """Test metadata extraction from browser video info"""
        video_info = {
            "title": "Browser Test Video",
            "description": "Video extracted via browser",
            "duration": 300,
            "view_count": 50000,
            "upload_date": "2024-01-01",
            "uploader": "Browser Channel",
            "thumbnail_url": "https://example.com/thumb.jpg",
            "tags": ["browser", "automation", "test"]
        }

        metadata = await downloader._extract_metadata_from_video_info(video_info, "browser123")

        assert metadata.video_id == "browser123"
        assert metadata.title == "Browser Test Video"
        assert metadata.description == "Video extracted via browser"
        assert metadata.duration_seconds == 300
        assert metadata.view_count == 50000
        assert metadata.upload_date == "2024-01-01"
        assert metadata.uploader == "Browser Channel"
        assert metadata.thumbnail_url == "https://example.com/thumb.jpg"
        assert metadata.tags == ["browser", "automation", "test"]

    @pytest.mark.asyncio
    async def test_extract_metadata_minimal_info(self, downloader):
        """Test metadata extraction with minimal video info"""
        video_info = {
            "title": "Minimal Video"
        }

        metadata = await downloader._extract_metadata_from_video_info(video_info, "minimal123")

        assert metadata.video_id == "minimal123"
        assert metadata.title == "Minimal Video"
        assert metadata.description is None
        assert metadata.duration_seconds is None
        assert metadata.view_count is None

    @pytest.mark.asyncio
    async def test_private_video_detection(self, downloader):
        """Test detection and handling of private videos"""
        mock_client = downloader.mcp_client
        mock_client.call_tool.side_effect = [
            {"success": True},  # navigate
            {  # extract fails due to privacy
                "success": False,
                "error": "Video is private"
            }
        ]

        url = "https://youtube.com/watch?v=private123"
        preferences = DownloadPreferences()

        with pytest.raises(DownloaderException, match="Failed to extract video information"):
            await downloader.download_video(url, preferences)

    @pytest.mark.asyncio
    async def test_age_restricted_video_handling(self, downloader):
        """Test handling of age-restricted videos"""
        mock_client = downloader.mcp_client
        mock_client.call_tool.side_effect = [
            {"success": True},  # navigate
            {  # extract fails due to age restriction
                "success": False,
                "error": "Sign in to confirm your age"
            }
        ]

        url = "https://youtube.com/watch?v=restricted123"
        preferences = DownloadPreferences()

        with pytest.raises(DownloaderException, match="Failed to extract video information"):
            await downloader.download_video(url, preferences)

    @pytest.mark.asyncio
    async def test_network_timeout_handling(self, downloader):
        """Test handling of network timeouts"""
        mock_client = downloader.mcp_client
        mock_client.call_tool.side_effect = Exception("Request timeout")

        url = "https://youtube.com/watch?v=timeout123"
        preferences = DownloadPreferences()

        with pytest.raises(DownloaderException, match="Browser automation error"):
            await downloader.download_video(url, preferences)

    @pytest.mark.asyncio
    async def test_cleanup_temp_files(self, downloader):
        """Test cleanup of temporary files"""
        # Create temporary files
        temp_dir = downloader.output_dir / "temp_browser"
        temp_dir.mkdir()
        temp_file = temp_dir / "temp_download.mp4"
        temp_file.write_text("temporary content")

        assert temp_dir.exists()
        assert temp_file.exists()

        # Clean up
        await downloader.cleanup_temp_files(temp_dir)

        assert not temp_dir.exists()

    @pytest.mark.asyncio
    async def test_large_file_download(self, downloader):
        """Test handling of large file downloads"""
        mock_client = downloader.mcp_client
        mock_client.call_tool.side_effect = [
            {"success": True},  # navigate
            {  # extract info
                "success": True,
                "video_info": {
                    "title": "Large Video",
                    "duration": 3600,  # 1 hour
                    "video_url": "https://example.com/large_video.mp4",
                    "file_size": 1073741824  # 1 GB
                }
            },
            {"success": True, "file_path": str(downloader.output_dir / "large123.mp4")}  # download
        ]

        # Create mock large file (just a small file for testing)
        test_file = downloader.output_dir / "large123.mp4"
        test_file.write_text("fake large video content")

        url = "https://youtube.com/watch?v=large123"
        preferences = DownloadPreferences()

        result = await downloader.download_video(url, preferences)

        assert result.status == DownloadStatus.COMPLETED
        assert result.metadata.duration_seconds == 3600