"""Tests for YouTube metadata extraction service.""" import asyncio import json import pytest from unittest.mock import AsyncMock, MagicMock, patch from datetime import datetime, timezone from src.services.youtube_service import YouTubeMetadataService, CurlYouTubeExtractor from src.repositories.youtube_repository import YouTubeRepository from src.database.models import YouTubeVideo class TestCurlYouTubeExtractor: """Test the curl-based YouTube metadata extractor.""" def test_extract_youtube_id_from_various_urls(self): """Test YouTube ID extraction from various URL formats.""" extractor = CurlYouTubeExtractor() test_cases = [ ("https://www.youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"), ("https://youtu.be/dQw4w9WgXcQ", "dQw4w9WgXcQ"), ("https://www.youtube.com/embed/dQw4w9WgXcQ", "dQw4w9WgXcQ"), ("https://www.youtube.com/v/dQw4w9WgXcQ", "dQw4w9WgXcQ"), ("https://youtube.com/watch?v=dQw4w9WgXcQ&t=30s", "dQw4w9WgXcQ"), ] for url, expected_id in test_cases: assert extractor._extract_youtube_id(url) == expected_id def test_extract_youtube_id_invalid_url(self): """Test YouTube ID extraction with invalid URL.""" extractor = CurlYouTubeExtractor() with pytest.raises(ValueError, match="Could not extract YouTube ID"): extractor._extract_youtube_id("https://example.com/video") @pytest.mark.asyncio async def test_extract_metadata_success(self): """Test successful metadata extraction.""" extractor = CurlYouTubeExtractor() # Mock yt-dlp output mock_metadata = { "title": "Test Video Title", "uploader": "Test Channel", "description": "This is a test video description", "duration": 180, # 3 minutes "id": "dQw4w9WgXcQ" } with patch('asyncio.create_subprocess_exec') as mock_subprocess: # Mock successful subprocess execution mock_process = AsyncMock() mock_process.communicate.return_value = ( json.dumps(mock_metadata).encode(), b"" ) mock_process.returncode = 0 mock_subprocess.return_value = mock_process result = await extractor.extract_metadata("https://youtube.com/watch?v=dQw4w9WgXcQ") assert result["youtube_id"] == "dQw4w9WgXcQ" assert result["title"] == "Test Video Title" assert result["channel"] == "Test Channel" assert result["description"] == "This is a test video description" assert result["duration_seconds"] == 180 assert result["url"] == "https://youtube.com/watch?v=dQw4w9WgXcQ" assert "metadata_extracted_at" in result @pytest.mark.asyncio async def test_extract_metadata_failure(self): """Test metadata extraction failure.""" extractor = CurlYouTubeExtractor() with patch('asyncio.create_subprocess_exec') as mock_subprocess: # Mock failed subprocess execution mock_process = AsyncMock() mock_process.communicate.return_value = ( b"", b"Error: Video not found" ) mock_process.returncode = 1 mock_subprocess.return_value = mock_process with pytest.raises(Exception, match="Failed to extract metadata"): await extractor.extract_metadata("https://youtube.com/watch?v=invalid") class TestYouTubeMetadataService: """Test the YouTube metadata service.""" @pytest.mark.asyncio async def test_service_initialization(self): """Test service initialization.""" service = YouTubeMetadataService() await service.initialize() assert service.status.value == "healthy" assert service.name == "youtube_metadata" @pytest.mark.asyncio async def test_extract_and_store_metadata_new_video(self): """Test extracting and storing metadata for a new video.""" service = YouTubeMetadataService() await service.initialize() # Mock the extractor mock_metadata = { "youtube_id": "dQw4w9WgXcQ", "title": "Test Video", "channel": "Test Channel", "description": "Test description", "duration_seconds": 180, "url": "https://youtube.com/watch?v=dQw4w9WgXcQ", "metadata_extracted_at": datetime.now(timezone.utc) } with patch.object(service.extractor, 'extract_metadata', return_value=mock_metadata): with patch('src.database.connection.get_db_session') as mock_session: # Mock database session mock_session_instance = MagicMock() mock_session.return_value.__enter__.return_value = mock_session_instance mock_session_instance.query.return_value.filter.return_value.first.return_value = None # Video doesn't exist video = await service.extract_and_store_metadata("https://youtube.com/watch?v=dQw4w9WgXcQ") assert video.youtube_id == "dQw4w9WgXcQ" assert video.title == "Test Video" assert video.channel == "Test Channel" assert video.duration_seconds == 180 @pytest.mark.asyncio async def test_extract_and_store_metadata_existing_video(self): """Test extracting and storing metadata for an existing video.""" service = YouTubeMetadataService() await service.initialize() # Mock the extractor mock_metadata = { "youtube_id": "dQw4w9WgXcQ", "title": "Updated Video Title", "channel": "Test Channel", "description": "Updated description", "duration_seconds": 180, "url": "https://youtube.com/watch?v=dQw4w9WgXcQ", "metadata_extracted_at": datetime.now(timezone.utc) } # Mock existing video existing_video = YouTubeVideo( youtube_id="dQw4w9WgXcQ", title="Old Title", channel="Test Channel", description="Old description", duration_seconds=180, url="https://youtube.com/watch?v=dQw4w9WgXcQ" ) with patch.object(service.extractor, 'extract_metadata', return_value=mock_metadata): with patch('src.database.connection.get_db_session') as mock_session: # Mock database session mock_session_instance = MagicMock() mock_session.return_value.__enter__.return_value = mock_session_instance mock_session_instance.query.return_value.filter.return_value.first.return_value = existing_video # Video exists video = await service.extract_and_store_metadata("https://youtube.com/watch?v=dQw4w9WgXcQ") assert video.title == "Updated Video Title" assert video.description == "Updated description" def test_health_status(self): """Test service health status.""" service = YouTubeMetadataService() with patch('subprocess.run') as mock_run: # Mock yt-dlp availability check mock_run.return_value.returncode = 0 health = service.get_health_status() assert "status" in health assert "yt_dlp_available" in health assert "cache_dir" in health class TestYouTubeRepository: """Test the YouTube repository.""" @pytest.mark.asyncio async def test_create_video(self): """Test creating a new video record.""" repo = YouTubeRepository() video_data = { "youtube_id": "dQw4w9WgXcQ", "title": "Test Video", "channel": "Test Channel", "description": "Test description", "duration_seconds": 180, "url": "https://youtube.com/watch?v=dQw4w9WgXcQ" } with patch('src.repositories.youtube_repository.get_db_session') as mock_session: mock_session_instance = MagicMock() mock_session.return_value.__enter__.return_value = mock_session_instance # Mock the video object that would be created mock_video = MagicMock() mock_video.youtube_id = "dQw4w9WgXcQ" mock_video.title = "Test Video" video = await repo.create(video_data) assert video.youtube_id == "dQw4w9WgXcQ" assert video.title == "Test Video" mock_session_instance.add.assert_called_once() mock_session_instance.commit.assert_called_once() @pytest.mark.asyncio async def test_get_by_youtube_id(self): """Test getting video by YouTube ID.""" repo = YouTubeRepository() with patch('src.repositories.youtube_repository.get_db_session') as mock_session: mock_session_instance = MagicMock() mock_session.return_value.__enter__.return_value = mock_session_instance # Mock database result mock_video = MagicMock() mock_video.youtube_id = "dQw4w9WgXcQ" mock_video.title = "Test Video" mock_session_instance.query.return_value.filter.return_value.first.return_value = mock_video video = await repo.get_by_youtube_id("dQw4w9WgXcQ") assert video is not None assert video.youtube_id == "dQw4w9WgXcQ" @pytest.mark.asyncio async def test_search_by_title(self): """Test searching videos by title.""" repo = YouTubeRepository() with patch('src.repositories.youtube_repository.get_db_session') as mock_session: mock_session_instance = MagicMock() mock_session.return_value.__enter__.return_value = mock_session_instance # Mock database results mock_videos = [ MagicMock(youtube_id="dQw4w9WgXcQ", title="Test Video 1"), MagicMock(youtube_id="abc123", title="Test Video 2") ] # Set up the query chain properly mock_query = MagicMock() mock_filter = MagicMock() mock_order_by = MagicMock() mock_limit = MagicMock() mock_limit.all.return_value = mock_videos mock_session_instance.query.return_value = mock_query mock_query.filter.return_value = mock_filter mock_filter.order_by.return_value = mock_order_by mock_order_by.limit.return_value = mock_limit videos = await repo.search_by_title("Test", limit=10) assert len(videos) == 2 assert videos[0].youtube_id == "dQw4w9WgXcQ" assert videos[1].youtube_id == "abc123" @pytest.mark.asyncio async def test_get_statistics(self): """Test getting video statistics.""" repo = YouTubeRepository() with patch('src.repositories.youtube_repository.get_db_session') as mock_session: mock_session_instance = MagicMock() mock_session.return_value.__enter__.return_value = mock_session_instance # Mock statistics results - set up separate query chains mock_count_query = MagicMock() mock_count_query.scalar.return_value = 10 mock_duration_query = MagicMock() mock_duration_query.scalar.return_value = 3600 mock_channels_query = MagicMock() mock_channels_query.group_by.return_value.order_by.return_value.limit.return_value.all.return_value = [ MagicMock(channel="Test Channel", count=5) ] # Set up query to return different mocks based on what's being queried def mock_query_side_effect(*args, **kwargs): if 'count' in str(args): return mock_count_query elif 'sum' in str(args): return mock_duration_query else: return mock_channels_query mock_session_instance.query.side_effect = mock_query_side_effect stats = await repo.get_statistics() assert stats["total_videos"] == 10 assert stats["total_duration_seconds"] == 3600 assert stats["total_duration_hours"] == 1.0 @pytest.mark.asyncio async def test_integration_youtube_workflow(): """Test the complete YouTube metadata workflow.""" # This is an integration test that would require a real database # and yt-dlp installation. In a real environment, this would be # run against a test database with actual YouTube URLs. # For now, we'll test the workflow with mocks service = YouTubeMetadataService() repo = YouTubeRepository() await service.initialize() # Mock the entire workflow mock_metadata = { "youtube_id": "dQw4w9WgXcQ", "title": "Integration Test Video", "channel": "Test Channel", "description": "Integration test description", "duration_seconds": 300, "url": "https://youtube.com/watch?v=dQw4w9WgXcQ", "metadata_extracted_at": datetime.now(timezone.utc) } with patch.object(service.extractor, 'extract_metadata', return_value=mock_metadata): with patch('src.database.connection.get_db_session') as mock_session: mock_session_instance = MagicMock() mock_session.return_value.__enter__.return_value = mock_session_instance mock_session_instance.query.return_value.filter.return_value.first.return_value = None # Test the complete workflow video = await service.extract_and_store_metadata("https://youtube.com/watch?v=dQw4w9WgXcQ") assert video.youtube_id == "dQw4w9WgXcQ" assert video.title == "Integration Test Video" assert video.duration_seconds == 300 # Verify database operations were called mock_session_instance.add.assert_called_once() mock_session_instance.commit.assert_called_once() mock_session_instance.refresh.assert_called_once()