youtube-summarizer/backend/tests/unit/test_intelligent_video_down...

447 lines
18 KiB
Python

"""
Unit tests for intelligent video downloader orchestrator
"""
import pytest
import asyncio
from unittest.mock import Mock, AsyncMock, patch, MagicMock
from datetime import datetime, timedelta
import uuid
from backend.models.video_download import (
DownloadMethod,
DownloadPreferences,
VideoDownloadResult,
DownloadStatus,
DownloadJobStatus,
DownloadMetrics,
HealthCheckResult,
AllMethodsFailedError,
DownloaderException,
VideoNotAvailableError,
NetworkError
)
from backend.config.video_download_config import VideoDownloadConfig
from backend.services.intelligent_video_downloader import IntelligentVideoDownloader
class TestIntelligentVideoDownloader:
"""Test intelligent video downloader orchestration"""
@pytest.fixture
def mock_config(self, tmp_path):
"""Mock configuration for testing"""
config = VideoDownloadConfig(
storage_path=tmp_path,
max_storage_gb=1.0,
enabled_methods=[DownloadMethod.PYTUBEFIX, DownloadMethod.TRANSCRIPT_ONLY],
method_timeout_seconds=30,
max_retries_per_method=2,
max_concurrent_downloads=2
)
return config
@pytest.fixture
def mock_downloader_factory(self):
"""Mock downloader factory"""
with patch('backend.services.intelligent_video_downloader.DownloaderFactory') as mock_factory:
# Mock successful downloader
mock_successful_downloader = AsyncMock()
mock_successful_downloader.download_video.return_value = VideoDownloadResult(
video_id="test123",
video_url="https://youtube.com/watch?v=test123",
status=DownloadStatus.COMPLETED,
method=DownloadMethod.PYTUBEFIX
)
mock_successful_downloader.test_connection.return_value = True
# Mock failing downloader
mock_failing_downloader = AsyncMock()
mock_failing_downloader.download_video.side_effect = DownloaderException("Download failed")
mock_failing_downloader.test_connection.return_value = False
# Mock transcript downloader
mock_transcript_downloader = AsyncMock()
mock_transcript_downloader.download_video.return_value = VideoDownloadResult(
video_id="test123",
video_url="https://youtube.com/watch?v=test123",
status=DownloadStatus.PARTIAL,
method=DownloadMethod.TRANSCRIPT_ONLY,
is_partial=True
)
mock_transcript_downloader.test_connection.return_value = True
mock_factory.get_available_methods.return_value = [
DownloadMethod.PYTUBEFIX,
DownloadMethod.TRANSCRIPT_ONLY
]
def create_side_effect(method, config):
if method == DownloadMethod.PYTUBEFIX:
return mock_successful_downloader
elif method == DownloadMethod.TRANSCRIPT_ONLY:
return mock_transcript_downloader
else:
return mock_failing_downloader
mock_factory.create.side_effect = create_side_effect
yield mock_factory
@pytest.fixture
def downloader(self, mock_config, mock_downloader_factory):
"""Create intelligent downloader instance for testing"""
return IntelligentVideoDownloader(mock_config)
def test_initialization(self, downloader, mock_config):
"""Test downloader initialization"""
assert downloader.config == mock_config
assert len(downloader.downloaders) == 2 # PYTUBEFIX and TRANSCRIPT_ONLY
assert DownloadMethod.PYTUBEFIX in downloader.downloaders
assert DownloadMethod.TRANSCRIPT_ONLY in downloader.downloaders
assert isinstance(downloader.metrics, DownloadMetrics)
assert downloader.success_cache == {}
assert downloader.active_jobs == {}
@pytest.mark.asyncio
async def test_successful_download_first_method(self, downloader):
"""Test successful download on first method attempt"""
url = "https://youtube.com/watch?v=test123"
preferences = DownloadPreferences()
result = await downloader.download_video(url, preferences)
assert result.status == DownloadStatus.COMPLETED
assert result.video_id == "test123"
assert result.method == DownloadMethod.PYTUBEFIX
assert downloader.metrics.successful_downloads == 1
assert downloader.metrics.total_attempts == 1
@pytest.mark.asyncio
async def test_fallback_to_transcript_only(self, downloader):
"""Test fallback to transcript-only when primary methods fail"""
# Make pytubefix fail
downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video.side_effect = NetworkError("403 Forbidden")
url = "https://youtube.com/watch?v=test123"
preferences = DownloadPreferences()
result = await downloader.download_video(url, preferences)
assert result.status == DownloadStatus.PARTIAL
assert result.method == DownloadMethod.TRANSCRIPT_ONLY
assert result.is_partial is True
@pytest.mark.asyncio
async def test_all_methods_fail(self, downloader):
"""Test when all methods fail"""
# Make all downloaders fail
for downloader_instance in downloader.downloaders.values():
downloader_instance.download_video.side_effect = DownloaderException("All failed")
url = "https://youtube.com/watch?v=test123"
preferences = DownloadPreferences()
with pytest.raises(AllMethodsFailedError):
await downloader.download_video(url, preferences)
assert downloader.metrics.failed_downloads == 1
@pytest.mark.asyncio
async def test_video_not_available_error(self, downloader):
"""Test handling of permanent video unavailability"""
downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video.side_effect = VideoNotAvailableError("Private video")
downloader.downloaders[DownloadMethod.TRANSCRIPT_ONLY].download_video.side_effect = VideoNotAvailableError("Private video")
url = "https://youtube.com/watch?v=private123"
preferences = DownloadPreferences()
with pytest.raises(AllMethodsFailedError):
await downloader.download_video(url, preferences)
@pytest.mark.asyncio
async def test_retry_logic(self, downloader):
"""Test retry logic for transient failures"""
# First call fails, second succeeds
call_count = 0
async def side_effect(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count == 1:
raise DownloaderException("Transient error")
else:
return VideoDownloadResult(
video_id="test123",
video_url="https://youtube.com/watch?v=test123",
status=DownloadStatus.COMPLETED,
method=DownloadMethod.PYTUBEFIX
)
downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video.side_effect = side_effect
url = "https://youtube.com/watch?v=test123"
preferences = DownloadPreferences()
result = await downloader.download_video(url, preferences)
assert result.status == DownloadStatus.COMPLETED
assert call_count == 2 # One retry occurred
@pytest.mark.asyncio
async def test_concurrent_download_limit(self, downloader):
"""Test concurrent download limiting"""
# Create multiple download tasks
urls = [
"https://youtube.com/watch?v=test1",
"https://youtube.com/watch?v=test2",
"https://youtube.com/watch?v=test3",
"https://youtube.com/watch?v=test4"
]
preferences = DownloadPreferences()
# Start all downloads concurrently
tasks = [downloader.download_video(url, preferences) for url in urls]
results = await asyncio.gather(*tasks)
# All should complete successfully
assert len(results) == 4
assert all(result.status == DownloadStatus.COMPLETED for result in results)
# Check that semaphore was respected (indirectly via successful completion)
assert downloader.metrics.successful_downloads == 4
@pytest.mark.asyncio
async def test_get_prioritized_methods(self, downloader):
"""Test method prioritization logic"""
preferences = DownloadPreferences(prefer_audio_only=True)
# Mock supports_audio_only for testing
downloader.downloaders[DownloadMethod.PYTUBEFIX].supports_audio_only.return_value = True
downloader.downloaders[DownloadMethod.TRANSCRIPT_ONLY].supports_audio_only.return_value = False
prioritized = await downloader._get_prioritized_methods("test123", preferences)
# Should prioritize audio-capable methods
assert len(prioritized) >= 2
# TRANSCRIPT_ONLY should be last as ultimate fallback
assert prioritized[-1] == DownloadMethod.TRANSCRIPT_ONLY
@pytest.mark.asyncio
async def test_job_status_tracking(self, downloader):
"""Test job status tracking during download"""
url = "https://youtube.com/watch?v=test123"
preferences = DownloadPreferences()
# Mock slow download to test job tracking
async def slow_download(*args, **kwargs):
await asyncio.sleep(0.1) # Small delay
return VideoDownloadResult(
video_id="test123",
video_url=url,
status=DownloadStatus.COMPLETED,
method=DownloadMethod.PYTUBEFIX
)
downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video = slow_download
# Start download
download_task = asyncio.create_task(downloader.download_video(url, preferences))
# Give it a moment to start
await asyncio.sleep(0.01)
# Check that job was created
active_jobs = downloader.get_active_jobs()
assert len(active_jobs) == 1
job_id = list(active_jobs.keys())[0]
job_status = await downloader.get_job_status(job_id)
assert job_status is not None
assert job_status.video_url == url
assert job_status.status in [DownloadStatus.IN_PROGRESS, DownloadStatus.COMPLETED]
# Wait for completion
result = await download_task
# Job should be cleaned up
assert len(downloader.get_active_jobs()) == 0
@pytest.mark.asyncio
async def test_job_cancellation(self, downloader):
"""Test job cancellation functionality"""
# Create a mock job
job_id = str(uuid.uuid4())
job_status = DownloadJobStatus(
job_id=job_id,
video_url="https://youtube.com/watch?v=test123",
status=DownloadStatus.IN_PROGRESS
)
downloader.active_jobs[job_id] = job_status
# Cancel job
success = await downloader.cancel_job(job_id)
assert success is True
assert downloader.active_jobs[job_id].status == DownloadStatus.CANCELLED
# Try to cancel non-existent job
success = await downloader.cancel_job("nonexistent")
assert success is False
@pytest.mark.asyncio
async def test_health_check(self, downloader):
"""Test health check functionality"""
health_result = await downloader.health_check()
assert isinstance(health_result, HealthCheckResult)
assert health_result.total_methods == 2
assert health_result.healthy_methods >= 1 # At least transcript should work
assert health_result.overall_status in ['healthy', 'degraded', 'unhealthy']
assert isinstance(health_result.method_details, dict)
assert len(health_result.method_details) == 2
def test_metrics_tracking(self, downloader):
"""Test metrics tracking functionality"""
# Update some metrics
downloader._update_success_metrics(DownloadMethod.PYTUBEFIX, "test123", True, 0)
downloader._update_success_metrics(DownloadMethod.PYTUBEFIX, "test456", False, 1)
downloader._update_success_metrics(DownloadMethod.TRANSCRIPT_ONLY, "test789", True, 0)
metrics = downloader.get_metrics()
assert metrics.total_attempts == 3
assert metrics.successful_downloads == 2
assert metrics.failed_downloads == 1
# Check method-specific success rates
pytubefix_rate = metrics.method_success_rates.get(DownloadMethod.PYTUBEFIX.value, 0)
transcript_rate = metrics.method_success_rates.get(DownloadMethod.TRANSCRIPT_ONLY.value, 0)
assert 0 <= pytubefix_rate <= 1
assert transcript_rate == 1.0 # 100% success for transcript
@pytest.mark.asyncio
async def test_extract_video_id(self, downloader):
"""Test video ID extraction"""
test_cases = [
("https://youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"),
("https://youtu.be/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
("https://youtube.com/embed/dQw4w9WgXcQ", "dQw4w9WgXcQ")
]
for url, expected_id in test_cases:
video_id = await downloader._extract_video_id(url)
assert video_id == expected_id
# Test invalid URL
with pytest.raises(DownloaderException):
await downloader._extract_video_id("https://example.com/not-youtube")
@pytest.mark.asyncio
async def test_cleanup_old_files(self, downloader, tmp_path):
"""Test old file cleanup functionality"""
# Create some test files with old timestamps
old_file = tmp_path / "videos" / "old_video.mp4"
old_file.parent.mkdir(parents=True, exist_ok=True)
old_file.write_text("old video content")
new_file = tmp_path / "videos" / "new_video.mp4"
new_file.write_text("new video content")
# Artificially age the old file
old_time = datetime.now() - timedelta(days=35)
old_timestamp = old_time.timestamp()
import os
os.utime(old_file, (old_timestamp, old_timestamp))
# Run cleanup (30 day default)
stats = await downloader.cleanup_old_files(30)
assert stats['files_deleted'] == 1
assert stats['bytes_freed'] > 0
assert not old_file.exists()
assert new_file.exists()
@pytest.mark.asyncio
async def test_method_timeout_handling(self, downloader):
"""Test timeout handling for download methods"""
# Mock a downloader that takes too long
async def slow_download(*args, **kwargs):
await asyncio.sleep(2) # Longer than config timeout
return VideoDownloadResult(
video_id="test123",
video_url="https://youtube.com/watch?v=test123",
status=DownloadStatus.COMPLETED,
method=DownloadMethod.PYTUBEFIX
)
downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video = slow_download
# This should timeout and fallback to transcript-only
url = "https://youtube.com/watch?v=test123"
preferences = DownloadPreferences()
result = await downloader.download_video(url, preferences)
# Should fallback to transcript method
assert result.status == DownloadStatus.PARTIAL
assert result.method == DownloadMethod.TRANSCRIPT_ONLY
@pytest.mark.integration
class TestIntelligentVideoDownloaderIntegration:
"""Integration tests for intelligent video downloader"""
@pytest.mark.asyncio
async def test_real_config_initialization(self, tmp_path):
"""Test initialization with real configuration"""
config = VideoDownloadConfig(
storage_path=tmp_path,
enabled_methods=[DownloadMethod.TRANSCRIPT_ONLY] # Use only reliable method
)
# This would normally fail without mocking, but we're testing config handling
with patch('backend.services.intelligent_video_downloader.DownloaderFactory'):
downloader = IntelligentVideoDownloader(config)
assert downloader.config == config
@pytest.mark.asyncio
async def test_storage_directories_creation(self, tmp_path):
"""Test that storage directories are created properly"""
config = VideoDownloadConfig(storage_path=tmp_path)
storage_dirs = config.get_storage_dirs()
# Directories should be created by config.ensure_directories()
for dir_path in storage_dirs.values():
assert dir_path.exists()
assert dir_path.is_dir()
def test_metrics_update_thread_safety(self, downloader):
"""Test thread safety of metrics updates"""
import threading
def update_metrics():
for i in range(10):
downloader._update_success_metrics(
DownloadMethod.PYTUBEFIX,
f"test{i}",
i % 2 == 0, # Alternate success/failure
0
)
# Run concurrent updates
threads = [threading.Thread(target=update_metrics) for _ in range(3)]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
# Verify metrics consistency
metrics = downloader.get_metrics()
assert metrics.total_attempts == 30 # 3 threads * 10 updates each
assert 0 <= metrics.successful_downloads <= 30
assert 0 <= metrics.failed_downloads <= 30
assert metrics.successful_downloads + metrics.failed_downloads == 30