447 lines
18 KiB
Python
447 lines
18 KiB
Python
"""
|
|
Unit tests for intelligent video downloader orchestrator
|
|
"""
|
|
import pytest
|
|
import asyncio
|
|
from unittest.mock import Mock, AsyncMock, patch, MagicMock
|
|
from datetime import datetime, timedelta
|
|
import uuid
|
|
|
|
from backend.models.video_download import (
|
|
DownloadMethod,
|
|
DownloadPreferences,
|
|
VideoDownloadResult,
|
|
DownloadStatus,
|
|
DownloadJobStatus,
|
|
DownloadMetrics,
|
|
HealthCheckResult,
|
|
AllMethodsFailedError,
|
|
DownloaderException,
|
|
VideoNotAvailableError,
|
|
NetworkError
|
|
)
|
|
from backend.config.video_download_config import VideoDownloadConfig
|
|
from backend.services.intelligent_video_downloader import IntelligentVideoDownloader
|
|
|
|
|
|
class TestIntelligentVideoDownloader:
|
|
"""Test intelligent video downloader orchestration"""
|
|
|
|
@pytest.fixture
|
|
def mock_config(self, tmp_path):
|
|
"""Mock configuration for testing"""
|
|
config = VideoDownloadConfig(
|
|
storage_path=tmp_path,
|
|
max_storage_gb=1.0,
|
|
enabled_methods=[DownloadMethod.PYTUBEFIX, DownloadMethod.TRANSCRIPT_ONLY],
|
|
method_timeout_seconds=30,
|
|
max_retries_per_method=2,
|
|
max_concurrent_downloads=2
|
|
)
|
|
return config
|
|
|
|
@pytest.fixture
|
|
def mock_downloader_factory(self):
|
|
"""Mock downloader factory"""
|
|
with patch('backend.services.intelligent_video_downloader.DownloaderFactory') as mock_factory:
|
|
# Mock successful downloader
|
|
mock_successful_downloader = AsyncMock()
|
|
mock_successful_downloader.download_video.return_value = VideoDownloadResult(
|
|
video_id="test123",
|
|
video_url="https://youtube.com/watch?v=test123",
|
|
status=DownloadStatus.COMPLETED,
|
|
method=DownloadMethod.PYTUBEFIX
|
|
)
|
|
mock_successful_downloader.test_connection.return_value = True
|
|
|
|
# Mock failing downloader
|
|
mock_failing_downloader = AsyncMock()
|
|
mock_failing_downloader.download_video.side_effect = DownloaderException("Download failed")
|
|
mock_failing_downloader.test_connection.return_value = False
|
|
|
|
# Mock transcript downloader
|
|
mock_transcript_downloader = AsyncMock()
|
|
mock_transcript_downloader.download_video.return_value = VideoDownloadResult(
|
|
video_id="test123",
|
|
video_url="https://youtube.com/watch?v=test123",
|
|
status=DownloadStatus.PARTIAL,
|
|
method=DownloadMethod.TRANSCRIPT_ONLY,
|
|
is_partial=True
|
|
)
|
|
mock_transcript_downloader.test_connection.return_value = True
|
|
|
|
mock_factory.get_available_methods.return_value = [
|
|
DownloadMethod.PYTUBEFIX,
|
|
DownloadMethod.TRANSCRIPT_ONLY
|
|
]
|
|
|
|
def create_side_effect(method, config):
|
|
if method == DownloadMethod.PYTUBEFIX:
|
|
return mock_successful_downloader
|
|
elif method == DownloadMethod.TRANSCRIPT_ONLY:
|
|
return mock_transcript_downloader
|
|
else:
|
|
return mock_failing_downloader
|
|
|
|
mock_factory.create.side_effect = create_side_effect
|
|
|
|
yield mock_factory
|
|
|
|
@pytest.fixture
|
|
def downloader(self, mock_config, mock_downloader_factory):
|
|
"""Create intelligent downloader instance for testing"""
|
|
return IntelligentVideoDownloader(mock_config)
|
|
|
|
def test_initialization(self, downloader, mock_config):
|
|
"""Test downloader initialization"""
|
|
assert downloader.config == mock_config
|
|
assert len(downloader.downloaders) == 2 # PYTUBEFIX and TRANSCRIPT_ONLY
|
|
assert DownloadMethod.PYTUBEFIX in downloader.downloaders
|
|
assert DownloadMethod.TRANSCRIPT_ONLY in downloader.downloaders
|
|
assert isinstance(downloader.metrics, DownloadMetrics)
|
|
assert downloader.success_cache == {}
|
|
assert downloader.active_jobs == {}
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_successful_download_first_method(self, downloader):
|
|
"""Test successful download on first method attempt"""
|
|
url = "https://youtube.com/watch?v=test123"
|
|
preferences = DownloadPreferences()
|
|
|
|
result = await downloader.download_video(url, preferences)
|
|
|
|
assert result.status == DownloadStatus.COMPLETED
|
|
assert result.video_id == "test123"
|
|
assert result.method == DownloadMethod.PYTUBEFIX
|
|
assert downloader.metrics.successful_downloads == 1
|
|
assert downloader.metrics.total_attempts == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fallback_to_transcript_only(self, downloader):
|
|
"""Test fallback to transcript-only when primary methods fail"""
|
|
# Make pytubefix fail
|
|
downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video.side_effect = NetworkError("403 Forbidden")
|
|
|
|
url = "https://youtube.com/watch?v=test123"
|
|
preferences = DownloadPreferences()
|
|
|
|
result = await downloader.download_video(url, preferences)
|
|
|
|
assert result.status == DownloadStatus.PARTIAL
|
|
assert result.method == DownloadMethod.TRANSCRIPT_ONLY
|
|
assert result.is_partial is True
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_all_methods_fail(self, downloader):
|
|
"""Test when all methods fail"""
|
|
# Make all downloaders fail
|
|
for downloader_instance in downloader.downloaders.values():
|
|
downloader_instance.download_video.side_effect = DownloaderException("All failed")
|
|
|
|
url = "https://youtube.com/watch?v=test123"
|
|
preferences = DownloadPreferences()
|
|
|
|
with pytest.raises(AllMethodsFailedError):
|
|
await downloader.download_video(url, preferences)
|
|
|
|
assert downloader.metrics.failed_downloads == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_video_not_available_error(self, downloader):
|
|
"""Test handling of permanent video unavailability"""
|
|
downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video.side_effect = VideoNotAvailableError("Private video")
|
|
downloader.downloaders[DownloadMethod.TRANSCRIPT_ONLY].download_video.side_effect = VideoNotAvailableError("Private video")
|
|
|
|
url = "https://youtube.com/watch?v=private123"
|
|
preferences = DownloadPreferences()
|
|
|
|
with pytest.raises(AllMethodsFailedError):
|
|
await downloader.download_video(url, preferences)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_retry_logic(self, downloader):
|
|
"""Test retry logic for transient failures"""
|
|
# First call fails, second succeeds
|
|
call_count = 0
|
|
|
|
async def side_effect(*args, **kwargs):
|
|
nonlocal call_count
|
|
call_count += 1
|
|
if call_count == 1:
|
|
raise DownloaderException("Transient error")
|
|
else:
|
|
return VideoDownloadResult(
|
|
video_id="test123",
|
|
video_url="https://youtube.com/watch?v=test123",
|
|
status=DownloadStatus.COMPLETED,
|
|
method=DownloadMethod.PYTUBEFIX
|
|
)
|
|
|
|
downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video.side_effect = side_effect
|
|
|
|
url = "https://youtube.com/watch?v=test123"
|
|
preferences = DownloadPreferences()
|
|
|
|
result = await downloader.download_video(url, preferences)
|
|
|
|
assert result.status == DownloadStatus.COMPLETED
|
|
assert call_count == 2 # One retry occurred
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_concurrent_download_limit(self, downloader):
|
|
"""Test concurrent download limiting"""
|
|
# Create multiple download tasks
|
|
urls = [
|
|
"https://youtube.com/watch?v=test1",
|
|
"https://youtube.com/watch?v=test2",
|
|
"https://youtube.com/watch?v=test3",
|
|
"https://youtube.com/watch?v=test4"
|
|
]
|
|
|
|
preferences = DownloadPreferences()
|
|
|
|
# Start all downloads concurrently
|
|
tasks = [downloader.download_video(url, preferences) for url in urls]
|
|
results = await asyncio.gather(*tasks)
|
|
|
|
# All should complete successfully
|
|
assert len(results) == 4
|
|
assert all(result.status == DownloadStatus.COMPLETED for result in results)
|
|
|
|
# Check that semaphore was respected (indirectly via successful completion)
|
|
assert downloader.metrics.successful_downloads == 4
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_prioritized_methods(self, downloader):
|
|
"""Test method prioritization logic"""
|
|
preferences = DownloadPreferences(prefer_audio_only=True)
|
|
|
|
# Mock supports_audio_only for testing
|
|
downloader.downloaders[DownloadMethod.PYTUBEFIX].supports_audio_only.return_value = True
|
|
downloader.downloaders[DownloadMethod.TRANSCRIPT_ONLY].supports_audio_only.return_value = False
|
|
|
|
prioritized = await downloader._get_prioritized_methods("test123", preferences)
|
|
|
|
# Should prioritize audio-capable methods
|
|
assert len(prioritized) >= 2
|
|
# TRANSCRIPT_ONLY should be last as ultimate fallback
|
|
assert prioritized[-1] == DownloadMethod.TRANSCRIPT_ONLY
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_job_status_tracking(self, downloader):
|
|
"""Test job status tracking during download"""
|
|
url = "https://youtube.com/watch?v=test123"
|
|
preferences = DownloadPreferences()
|
|
|
|
# Mock slow download to test job tracking
|
|
async def slow_download(*args, **kwargs):
|
|
await asyncio.sleep(0.1) # Small delay
|
|
return VideoDownloadResult(
|
|
video_id="test123",
|
|
video_url=url,
|
|
status=DownloadStatus.COMPLETED,
|
|
method=DownloadMethod.PYTUBEFIX
|
|
)
|
|
|
|
downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video = slow_download
|
|
|
|
# Start download
|
|
download_task = asyncio.create_task(downloader.download_video(url, preferences))
|
|
|
|
# Give it a moment to start
|
|
await asyncio.sleep(0.01)
|
|
|
|
# Check that job was created
|
|
active_jobs = downloader.get_active_jobs()
|
|
assert len(active_jobs) == 1
|
|
|
|
job_id = list(active_jobs.keys())[0]
|
|
job_status = await downloader.get_job_status(job_id)
|
|
|
|
assert job_status is not None
|
|
assert job_status.video_url == url
|
|
assert job_status.status in [DownloadStatus.IN_PROGRESS, DownloadStatus.COMPLETED]
|
|
|
|
# Wait for completion
|
|
result = await download_task
|
|
|
|
# Job should be cleaned up
|
|
assert len(downloader.get_active_jobs()) == 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_job_cancellation(self, downloader):
|
|
"""Test job cancellation functionality"""
|
|
# Create a mock job
|
|
job_id = str(uuid.uuid4())
|
|
job_status = DownloadJobStatus(
|
|
job_id=job_id,
|
|
video_url="https://youtube.com/watch?v=test123",
|
|
status=DownloadStatus.IN_PROGRESS
|
|
)
|
|
downloader.active_jobs[job_id] = job_status
|
|
|
|
# Cancel job
|
|
success = await downloader.cancel_job(job_id)
|
|
|
|
assert success is True
|
|
assert downloader.active_jobs[job_id].status == DownloadStatus.CANCELLED
|
|
|
|
# Try to cancel non-existent job
|
|
success = await downloader.cancel_job("nonexistent")
|
|
assert success is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_health_check(self, downloader):
|
|
"""Test health check functionality"""
|
|
health_result = await downloader.health_check()
|
|
|
|
assert isinstance(health_result, HealthCheckResult)
|
|
assert health_result.total_methods == 2
|
|
assert health_result.healthy_methods >= 1 # At least transcript should work
|
|
assert health_result.overall_status in ['healthy', 'degraded', 'unhealthy']
|
|
assert isinstance(health_result.method_details, dict)
|
|
assert len(health_result.method_details) == 2
|
|
|
|
def test_metrics_tracking(self, downloader):
|
|
"""Test metrics tracking functionality"""
|
|
# Update some metrics
|
|
downloader._update_success_metrics(DownloadMethod.PYTUBEFIX, "test123", True, 0)
|
|
downloader._update_success_metrics(DownloadMethod.PYTUBEFIX, "test456", False, 1)
|
|
downloader._update_success_metrics(DownloadMethod.TRANSCRIPT_ONLY, "test789", True, 0)
|
|
|
|
metrics = downloader.get_metrics()
|
|
|
|
assert metrics.total_attempts == 3
|
|
assert metrics.successful_downloads == 2
|
|
assert metrics.failed_downloads == 1
|
|
|
|
# Check method-specific success rates
|
|
pytubefix_rate = metrics.method_success_rates.get(DownloadMethod.PYTUBEFIX.value, 0)
|
|
transcript_rate = metrics.method_success_rates.get(DownloadMethod.TRANSCRIPT_ONLY.value, 0)
|
|
|
|
assert 0 <= pytubefix_rate <= 1
|
|
assert transcript_rate == 1.0 # 100% success for transcript
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_video_id(self, downloader):
|
|
"""Test video ID extraction"""
|
|
test_cases = [
|
|
("https://youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
("https://youtu.be/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
("https://youtube.com/embed/dQw4w9WgXcQ", "dQw4w9WgXcQ")
|
|
]
|
|
|
|
for url, expected_id in test_cases:
|
|
video_id = await downloader._extract_video_id(url)
|
|
assert video_id == expected_id
|
|
|
|
# Test invalid URL
|
|
with pytest.raises(DownloaderException):
|
|
await downloader._extract_video_id("https://example.com/not-youtube")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cleanup_old_files(self, downloader, tmp_path):
|
|
"""Test old file cleanup functionality"""
|
|
# Create some test files with old timestamps
|
|
old_file = tmp_path / "videos" / "old_video.mp4"
|
|
old_file.parent.mkdir(parents=True, exist_ok=True)
|
|
old_file.write_text("old video content")
|
|
|
|
new_file = tmp_path / "videos" / "new_video.mp4"
|
|
new_file.write_text("new video content")
|
|
|
|
# Artificially age the old file
|
|
old_time = datetime.now() - timedelta(days=35)
|
|
old_timestamp = old_time.timestamp()
|
|
import os
|
|
os.utime(old_file, (old_timestamp, old_timestamp))
|
|
|
|
# Run cleanup (30 day default)
|
|
stats = await downloader.cleanup_old_files(30)
|
|
|
|
assert stats['files_deleted'] == 1
|
|
assert stats['bytes_freed'] > 0
|
|
assert not old_file.exists()
|
|
assert new_file.exists()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_method_timeout_handling(self, downloader):
|
|
"""Test timeout handling for download methods"""
|
|
# Mock a downloader that takes too long
|
|
async def slow_download(*args, **kwargs):
|
|
await asyncio.sleep(2) # Longer than config timeout
|
|
return VideoDownloadResult(
|
|
video_id="test123",
|
|
video_url="https://youtube.com/watch?v=test123",
|
|
status=DownloadStatus.COMPLETED,
|
|
method=DownloadMethod.PYTUBEFIX
|
|
)
|
|
|
|
downloader.downloaders[DownloadMethod.PYTUBEFIX].download_video = slow_download
|
|
|
|
# This should timeout and fallback to transcript-only
|
|
url = "https://youtube.com/watch?v=test123"
|
|
preferences = DownloadPreferences()
|
|
|
|
result = await downloader.download_video(url, preferences)
|
|
|
|
# Should fallback to transcript method
|
|
assert result.status == DownloadStatus.PARTIAL
|
|
assert result.method == DownloadMethod.TRANSCRIPT_ONLY
|
|
|
|
|
|
@pytest.mark.integration
|
|
class TestIntelligentVideoDownloaderIntegration:
|
|
"""Integration tests for intelligent video downloader"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_real_config_initialization(self, tmp_path):
|
|
"""Test initialization with real configuration"""
|
|
config = VideoDownloadConfig(
|
|
storage_path=tmp_path,
|
|
enabled_methods=[DownloadMethod.TRANSCRIPT_ONLY] # Use only reliable method
|
|
)
|
|
|
|
# This would normally fail without mocking, but we're testing config handling
|
|
with patch('backend.services.intelligent_video_downloader.DownloaderFactory'):
|
|
downloader = IntelligentVideoDownloader(config)
|
|
assert downloader.config == config
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_storage_directories_creation(self, tmp_path):
|
|
"""Test that storage directories are created properly"""
|
|
config = VideoDownloadConfig(storage_path=tmp_path)
|
|
|
|
storage_dirs = config.get_storage_dirs()
|
|
|
|
# Directories should be created by config.ensure_directories()
|
|
for dir_path in storage_dirs.values():
|
|
assert dir_path.exists()
|
|
assert dir_path.is_dir()
|
|
|
|
def test_metrics_update_thread_safety(self, downloader):
|
|
"""Test thread safety of metrics updates"""
|
|
import threading
|
|
|
|
def update_metrics():
|
|
for i in range(10):
|
|
downloader._update_success_metrics(
|
|
DownloadMethod.PYTUBEFIX,
|
|
f"test{i}",
|
|
i % 2 == 0, # Alternate success/failure
|
|
0
|
|
)
|
|
|
|
# Run concurrent updates
|
|
threads = [threading.Thread(target=update_metrics) for _ in range(3)]
|
|
for thread in threads:
|
|
thread.start()
|
|
for thread in threads:
|
|
thread.join()
|
|
|
|
# Verify metrics consistency
|
|
metrics = downloader.get_metrics()
|
|
assert metrics.total_attempts == 30 # 3 threads * 10 updates each
|
|
assert 0 <= metrics.successful_downloads <= 30
|
|
assert 0 <= metrics.failed_downloads <= 30
|
|
assert metrics.successful_downloads + metrics.failed_downloads == 30 |