trax/tests/test_youtube_service.py

354 lines
14 KiB
Python

"""Tests for YouTube metadata extraction service."""
import asyncio
import json
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from datetime import datetime, timezone
from src.services.youtube_service import YouTubeMetadataService, CurlYouTubeExtractor
from src.repositories.youtube_repository import YouTubeRepository
from src.database.models import YouTubeVideo
class TestCurlYouTubeExtractor:
"""Test the curl-based YouTube metadata extractor."""
def test_extract_youtube_id_from_various_urls(self):
"""Test YouTube ID extraction from various URL formats."""
extractor = CurlYouTubeExtractor()
test_cases = [
("https://www.youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"),
("https://youtu.be/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
("https://www.youtube.com/embed/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
("https://www.youtube.com/v/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
("https://youtube.com/watch?v=dQw4w9WgXcQ&t=30s", "dQw4w9WgXcQ"),
]
for url, expected_id in test_cases:
assert extractor._extract_youtube_id(url) == expected_id
def test_extract_youtube_id_invalid_url(self):
"""Test YouTube ID extraction with invalid URL."""
extractor = CurlYouTubeExtractor()
with pytest.raises(ValueError, match="Could not extract YouTube ID"):
extractor._extract_youtube_id("https://example.com/video")
@pytest.mark.asyncio
async def test_extract_metadata_success(self):
"""Test successful metadata extraction."""
extractor = CurlYouTubeExtractor()
# Mock yt-dlp output
mock_metadata = {
"title": "Test Video Title",
"uploader": "Test Channel",
"description": "This is a test video description",
"duration": 180, # 3 minutes
"id": "dQw4w9WgXcQ"
}
with patch('asyncio.create_subprocess_exec') as mock_subprocess:
# Mock successful subprocess execution
mock_process = AsyncMock()
mock_process.communicate.return_value = (
json.dumps(mock_metadata).encode(),
b""
)
mock_process.returncode = 0
mock_subprocess.return_value = mock_process
result = await extractor.extract_metadata("https://youtube.com/watch?v=dQw4w9WgXcQ")
assert result["youtube_id"] == "dQw4w9WgXcQ"
assert result["title"] == "Test Video Title"
assert result["channel"] == "Test Channel"
assert result["description"] == "This is a test video description"
assert result["duration_seconds"] == 180
assert result["url"] == "https://youtube.com/watch?v=dQw4w9WgXcQ"
assert "metadata_extracted_at" in result
@pytest.mark.asyncio
async def test_extract_metadata_failure(self):
"""Test metadata extraction failure."""
extractor = CurlYouTubeExtractor()
with patch('asyncio.create_subprocess_exec') as mock_subprocess:
# Mock failed subprocess execution
mock_process = AsyncMock()
mock_process.communicate.return_value = (
b"",
b"Error: Video not found"
)
mock_process.returncode = 1
mock_subprocess.return_value = mock_process
with pytest.raises(Exception, match="Failed to extract metadata"):
await extractor.extract_metadata("https://youtube.com/watch?v=invalid")
class TestYouTubeMetadataService:
"""Test the YouTube metadata service."""
@pytest.mark.asyncio
async def test_service_initialization(self):
"""Test service initialization."""
service = YouTubeMetadataService()
await service.initialize()
assert service.status.value == "healthy"
assert service.name == "youtube_metadata"
@pytest.mark.asyncio
async def test_extract_and_store_metadata_new_video(self):
"""Test extracting and storing metadata for a new video."""
service = YouTubeMetadataService()
await service.initialize()
# Mock the extractor
mock_metadata = {
"youtube_id": "dQw4w9WgXcQ",
"title": "Test Video",
"channel": "Test Channel",
"description": "Test description",
"duration_seconds": 180,
"url": "https://youtube.com/watch?v=dQw4w9WgXcQ",
"metadata_extracted_at": datetime.now(timezone.utc)
}
with patch.object(service.extractor, 'extract_metadata', return_value=mock_metadata):
with patch('src.database.connection.get_db_session') as mock_session:
# Mock database session
mock_session_instance = MagicMock()
mock_session.return_value.__enter__.return_value = mock_session_instance
mock_session_instance.query.return_value.filter.return_value.first.return_value = None # Video doesn't exist
video = await service.extract_and_store_metadata("https://youtube.com/watch?v=dQw4w9WgXcQ")
assert video.youtube_id == "dQw4w9WgXcQ"
assert video.title == "Test Video"
assert video.channel == "Test Channel"
assert video.duration_seconds == 180
@pytest.mark.asyncio
async def test_extract_and_store_metadata_existing_video(self):
"""Test extracting and storing metadata for an existing video."""
service = YouTubeMetadataService()
await service.initialize()
# Mock the extractor
mock_metadata = {
"youtube_id": "dQw4w9WgXcQ",
"title": "Updated Video Title",
"channel": "Test Channel",
"description": "Updated description",
"duration_seconds": 180,
"url": "https://youtube.com/watch?v=dQw4w9WgXcQ",
"metadata_extracted_at": datetime.now(timezone.utc)
}
# Mock existing video
existing_video = YouTubeVideo(
youtube_id="dQw4w9WgXcQ",
title="Old Title",
channel="Test Channel",
description="Old description",
duration_seconds=180,
url="https://youtube.com/watch?v=dQw4w9WgXcQ"
)
with patch.object(service.extractor, 'extract_metadata', return_value=mock_metadata):
with patch('src.database.connection.get_db_session') as mock_session:
# Mock database session
mock_session_instance = MagicMock()
mock_session.return_value.__enter__.return_value = mock_session_instance
mock_session_instance.query.return_value.filter.return_value.first.return_value = existing_video # Video exists
video = await service.extract_and_store_metadata("https://youtube.com/watch?v=dQw4w9WgXcQ")
assert video.title == "Updated Video Title"
assert video.description == "Updated description"
def test_health_status(self):
"""Test service health status."""
service = YouTubeMetadataService()
with patch('subprocess.run') as mock_run:
# Mock yt-dlp availability check
mock_run.return_value.returncode = 0
health = service.get_health_status()
assert "status" in health
assert "yt_dlp_available" in health
assert "cache_dir" in health
class TestYouTubeRepository:
"""Test the YouTube repository."""
@pytest.mark.asyncio
async def test_create_video(self):
"""Test creating a new video record."""
repo = YouTubeRepository()
video_data = {
"youtube_id": "dQw4w9WgXcQ",
"title": "Test Video",
"channel": "Test Channel",
"description": "Test description",
"duration_seconds": 180,
"url": "https://youtube.com/watch?v=dQw4w9WgXcQ"
}
with patch('src.repositories.youtube_repository.get_db_session') as mock_session:
mock_session_instance = MagicMock()
mock_session.return_value.__enter__.return_value = mock_session_instance
# Mock the video object that would be created
mock_video = MagicMock()
mock_video.youtube_id = "dQw4w9WgXcQ"
mock_video.title = "Test Video"
video = await repo.create(video_data)
assert video.youtube_id == "dQw4w9WgXcQ"
assert video.title == "Test Video"
mock_session_instance.add.assert_called_once()
mock_session_instance.commit.assert_called_once()
@pytest.mark.asyncio
async def test_get_by_youtube_id(self):
"""Test getting video by YouTube ID."""
repo = YouTubeRepository()
with patch('src.repositories.youtube_repository.get_db_session') as mock_session:
mock_session_instance = MagicMock()
mock_session.return_value.__enter__.return_value = mock_session_instance
# Mock database result
mock_video = MagicMock()
mock_video.youtube_id = "dQw4w9WgXcQ"
mock_video.title = "Test Video"
mock_session_instance.query.return_value.filter.return_value.first.return_value = mock_video
video = await repo.get_by_youtube_id("dQw4w9WgXcQ")
assert video is not None
assert video.youtube_id == "dQw4w9WgXcQ"
@pytest.mark.asyncio
async def test_search_by_title(self):
"""Test searching videos by title."""
repo = YouTubeRepository()
with patch('src.repositories.youtube_repository.get_db_session') as mock_session:
mock_session_instance = MagicMock()
mock_session.return_value.__enter__.return_value = mock_session_instance
# Mock database results
mock_videos = [
MagicMock(youtube_id="dQw4w9WgXcQ", title="Test Video 1"),
MagicMock(youtube_id="abc123", title="Test Video 2")
]
# Set up the query chain properly
mock_query = MagicMock()
mock_filter = MagicMock()
mock_order_by = MagicMock()
mock_limit = MagicMock()
mock_limit.all.return_value = mock_videos
mock_session_instance.query.return_value = mock_query
mock_query.filter.return_value = mock_filter
mock_filter.order_by.return_value = mock_order_by
mock_order_by.limit.return_value = mock_limit
videos = await repo.search_by_title("Test", limit=10)
assert len(videos) == 2
assert videos[0].youtube_id == "dQw4w9WgXcQ"
assert videos[1].youtube_id == "abc123"
@pytest.mark.asyncio
async def test_get_statistics(self):
"""Test getting video statistics."""
repo = YouTubeRepository()
with patch('src.repositories.youtube_repository.get_db_session') as mock_session:
mock_session_instance = MagicMock()
mock_session.return_value.__enter__.return_value = mock_session_instance
# Mock statistics results - set up separate query chains
mock_count_query = MagicMock()
mock_count_query.scalar.return_value = 10
mock_duration_query = MagicMock()
mock_duration_query.scalar.return_value = 3600
mock_channels_query = MagicMock()
mock_channels_query.group_by.return_value.order_by.return_value.limit.return_value.all.return_value = [
MagicMock(channel="Test Channel", count=5)
]
# Set up query to return different mocks based on what's being queried
def mock_query_side_effect(*args, **kwargs):
if 'count' in str(args):
return mock_count_query
elif 'sum' in str(args):
return mock_duration_query
else:
return mock_channels_query
mock_session_instance.query.side_effect = mock_query_side_effect
stats = await repo.get_statistics()
assert stats["total_videos"] == 10
assert stats["total_duration_seconds"] == 3600
assert stats["total_duration_hours"] == 1.0
@pytest.mark.asyncio
async def test_integration_youtube_workflow():
"""Test the complete YouTube metadata workflow."""
# This is an integration test that would require a real database
# and yt-dlp installation. In a real environment, this would be
# run against a test database with actual YouTube URLs.
# For now, we'll test the workflow with mocks
service = YouTubeMetadataService()
repo = YouTubeRepository()
await service.initialize()
# Mock the entire workflow
mock_metadata = {
"youtube_id": "dQw4w9WgXcQ",
"title": "Integration Test Video",
"channel": "Test Channel",
"description": "Integration test description",
"duration_seconds": 300,
"url": "https://youtube.com/watch?v=dQw4w9WgXcQ",
"metadata_extracted_at": datetime.now(timezone.utc)
}
with patch.object(service.extractor, 'extract_metadata', return_value=mock_metadata):
with patch('src.database.connection.get_db_session') as mock_session:
mock_session_instance = MagicMock()
mock_session.return_value.__enter__.return_value = mock_session_instance
mock_session_instance.query.return_value.filter.return_value.first.return_value = None
# Test the complete workflow
video = await service.extract_and_store_metadata("https://youtube.com/watch?v=dQw4w9WgXcQ")
assert video.youtube_id == "dQw4w9WgXcQ"
assert video.title == "Integration Test Video"
assert video.duration_seconds == 300
# Verify database operations were called
mock_session_instance.add.assert_called_once()
mock_session_instance.commit.assert_called_once()
mock_session_instance.refresh.assert_called_once()