354 lines
14 KiB
Python
354 lines
14 KiB
Python
"""Tests for YouTube metadata extraction service."""
|
|
|
|
import asyncio
|
|
import json
|
|
import pytest
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
from datetime import datetime, timezone
|
|
|
|
from src.services.youtube_service import YouTubeMetadataService, CurlYouTubeExtractor
|
|
from src.repositories.youtube_repository import YouTubeRepository
|
|
from src.database.models import YouTubeVideo
|
|
|
|
|
|
class TestCurlYouTubeExtractor:
|
|
"""Test the curl-based YouTube metadata extractor."""
|
|
|
|
def test_extract_youtube_id_from_various_urls(self):
|
|
"""Test YouTube ID extraction from various URL formats."""
|
|
extractor = CurlYouTubeExtractor()
|
|
|
|
test_cases = [
|
|
("https://www.youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
("https://youtu.be/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
("https://www.youtube.com/embed/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
("https://www.youtube.com/v/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
|
|
("https://youtube.com/watch?v=dQw4w9WgXcQ&t=30s", "dQw4w9WgXcQ"),
|
|
]
|
|
|
|
for url, expected_id in test_cases:
|
|
assert extractor._extract_youtube_id(url) == expected_id
|
|
|
|
def test_extract_youtube_id_invalid_url(self):
|
|
"""Test YouTube ID extraction with invalid URL."""
|
|
extractor = CurlYouTubeExtractor()
|
|
|
|
with pytest.raises(ValueError, match="Could not extract YouTube ID"):
|
|
extractor._extract_youtube_id("https://example.com/video")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_metadata_success(self):
|
|
"""Test successful metadata extraction."""
|
|
extractor = CurlYouTubeExtractor()
|
|
|
|
# Mock yt-dlp output
|
|
mock_metadata = {
|
|
"title": "Test Video Title",
|
|
"uploader": "Test Channel",
|
|
"description": "This is a test video description",
|
|
"duration": 180, # 3 minutes
|
|
"id": "dQw4w9WgXcQ"
|
|
}
|
|
|
|
with patch('asyncio.create_subprocess_exec') as mock_subprocess:
|
|
# Mock successful subprocess execution
|
|
mock_process = AsyncMock()
|
|
mock_process.communicate.return_value = (
|
|
json.dumps(mock_metadata).encode(),
|
|
b""
|
|
)
|
|
mock_process.returncode = 0
|
|
mock_subprocess.return_value = mock_process
|
|
|
|
result = await extractor.extract_metadata("https://youtube.com/watch?v=dQw4w9WgXcQ")
|
|
|
|
assert result["youtube_id"] == "dQw4w9WgXcQ"
|
|
assert result["title"] == "Test Video Title"
|
|
assert result["channel"] == "Test Channel"
|
|
assert result["description"] == "This is a test video description"
|
|
assert result["duration_seconds"] == 180
|
|
assert result["url"] == "https://youtube.com/watch?v=dQw4w9WgXcQ"
|
|
assert "metadata_extracted_at" in result
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_metadata_failure(self):
|
|
"""Test metadata extraction failure."""
|
|
extractor = CurlYouTubeExtractor()
|
|
|
|
with patch('asyncio.create_subprocess_exec') as mock_subprocess:
|
|
# Mock failed subprocess execution
|
|
mock_process = AsyncMock()
|
|
mock_process.communicate.return_value = (
|
|
b"",
|
|
b"Error: Video not found"
|
|
)
|
|
mock_process.returncode = 1
|
|
mock_subprocess.return_value = mock_process
|
|
|
|
with pytest.raises(Exception, match="Failed to extract metadata"):
|
|
await extractor.extract_metadata("https://youtube.com/watch?v=invalid")
|
|
|
|
|
|
class TestYouTubeMetadataService:
|
|
"""Test the YouTube metadata service."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_service_initialization(self):
|
|
"""Test service initialization."""
|
|
service = YouTubeMetadataService()
|
|
await service.initialize()
|
|
|
|
assert service.status.value == "healthy"
|
|
assert service.name == "youtube_metadata"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_and_store_metadata_new_video(self):
|
|
"""Test extracting and storing metadata for a new video."""
|
|
service = YouTubeMetadataService()
|
|
await service.initialize()
|
|
|
|
# Mock the extractor
|
|
mock_metadata = {
|
|
"youtube_id": "dQw4w9WgXcQ",
|
|
"title": "Test Video",
|
|
"channel": "Test Channel",
|
|
"description": "Test description",
|
|
"duration_seconds": 180,
|
|
"url": "https://youtube.com/watch?v=dQw4w9WgXcQ",
|
|
"metadata_extracted_at": datetime.now(timezone.utc)
|
|
}
|
|
|
|
with patch.object(service.extractor, 'extract_metadata', return_value=mock_metadata):
|
|
with patch('src.database.connection.get_db_session') as mock_session:
|
|
# Mock database session
|
|
mock_session_instance = MagicMock()
|
|
mock_session.return_value.__enter__.return_value = mock_session_instance
|
|
mock_session_instance.query.return_value.filter.return_value.first.return_value = None # Video doesn't exist
|
|
|
|
video = await service.extract_and_store_metadata("https://youtube.com/watch?v=dQw4w9WgXcQ")
|
|
|
|
assert video.youtube_id == "dQw4w9WgXcQ"
|
|
assert video.title == "Test Video"
|
|
assert video.channel == "Test Channel"
|
|
assert video.duration_seconds == 180
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_and_store_metadata_existing_video(self):
|
|
"""Test extracting and storing metadata for an existing video."""
|
|
service = YouTubeMetadataService()
|
|
await service.initialize()
|
|
|
|
# Mock the extractor
|
|
mock_metadata = {
|
|
"youtube_id": "dQw4w9WgXcQ",
|
|
"title": "Updated Video Title",
|
|
"channel": "Test Channel",
|
|
"description": "Updated description",
|
|
"duration_seconds": 180,
|
|
"url": "https://youtube.com/watch?v=dQw4w9WgXcQ",
|
|
"metadata_extracted_at": datetime.now(timezone.utc)
|
|
}
|
|
|
|
# Mock existing video
|
|
existing_video = YouTubeVideo(
|
|
youtube_id="dQw4w9WgXcQ",
|
|
title="Old Title",
|
|
channel="Test Channel",
|
|
description="Old description",
|
|
duration_seconds=180,
|
|
url="https://youtube.com/watch?v=dQw4w9WgXcQ"
|
|
)
|
|
|
|
with patch.object(service.extractor, 'extract_metadata', return_value=mock_metadata):
|
|
with patch('src.database.connection.get_db_session') as mock_session:
|
|
# Mock database session
|
|
mock_session_instance = MagicMock()
|
|
mock_session.return_value.__enter__.return_value = mock_session_instance
|
|
mock_session_instance.query.return_value.filter.return_value.first.return_value = existing_video # Video exists
|
|
|
|
video = await service.extract_and_store_metadata("https://youtube.com/watch?v=dQw4w9WgXcQ")
|
|
|
|
assert video.title == "Updated Video Title"
|
|
assert video.description == "Updated description"
|
|
|
|
def test_health_status(self):
|
|
"""Test service health status."""
|
|
service = YouTubeMetadataService()
|
|
|
|
with patch('subprocess.run') as mock_run:
|
|
# Mock yt-dlp availability check
|
|
mock_run.return_value.returncode = 0
|
|
|
|
health = service.get_health_status()
|
|
|
|
assert "status" in health
|
|
assert "yt_dlp_available" in health
|
|
assert "cache_dir" in health
|
|
|
|
|
|
class TestYouTubeRepository:
|
|
"""Test the YouTube repository."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_create_video(self):
|
|
"""Test creating a new video record."""
|
|
repo = YouTubeRepository()
|
|
|
|
video_data = {
|
|
"youtube_id": "dQw4w9WgXcQ",
|
|
"title": "Test Video",
|
|
"channel": "Test Channel",
|
|
"description": "Test description",
|
|
"duration_seconds": 180,
|
|
"url": "https://youtube.com/watch?v=dQw4w9WgXcQ"
|
|
}
|
|
|
|
with patch('src.repositories.youtube_repository.get_db_session') as mock_session:
|
|
mock_session_instance = MagicMock()
|
|
mock_session.return_value.__enter__.return_value = mock_session_instance
|
|
|
|
# Mock the video object that would be created
|
|
mock_video = MagicMock()
|
|
mock_video.youtube_id = "dQw4w9WgXcQ"
|
|
mock_video.title = "Test Video"
|
|
|
|
video = await repo.create(video_data)
|
|
|
|
assert video.youtube_id == "dQw4w9WgXcQ"
|
|
assert video.title == "Test Video"
|
|
mock_session_instance.add.assert_called_once()
|
|
mock_session_instance.commit.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_by_youtube_id(self):
|
|
"""Test getting video by YouTube ID."""
|
|
repo = YouTubeRepository()
|
|
|
|
with patch('src.repositories.youtube_repository.get_db_session') as mock_session:
|
|
mock_session_instance = MagicMock()
|
|
mock_session.return_value.__enter__.return_value = mock_session_instance
|
|
|
|
# Mock database result
|
|
mock_video = MagicMock()
|
|
mock_video.youtube_id = "dQw4w9WgXcQ"
|
|
mock_video.title = "Test Video"
|
|
mock_session_instance.query.return_value.filter.return_value.first.return_value = mock_video
|
|
|
|
video = await repo.get_by_youtube_id("dQw4w9WgXcQ")
|
|
|
|
assert video is not None
|
|
assert video.youtube_id == "dQw4w9WgXcQ"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_search_by_title(self):
|
|
"""Test searching videos by title."""
|
|
repo = YouTubeRepository()
|
|
|
|
with patch('src.repositories.youtube_repository.get_db_session') as mock_session:
|
|
mock_session_instance = MagicMock()
|
|
mock_session.return_value.__enter__.return_value = mock_session_instance
|
|
|
|
# Mock database results
|
|
mock_videos = [
|
|
MagicMock(youtube_id="dQw4w9WgXcQ", title="Test Video 1"),
|
|
MagicMock(youtube_id="abc123", title="Test Video 2")
|
|
]
|
|
# Set up the query chain properly
|
|
mock_query = MagicMock()
|
|
mock_filter = MagicMock()
|
|
mock_order_by = MagicMock()
|
|
mock_limit = MagicMock()
|
|
mock_limit.all.return_value = mock_videos
|
|
|
|
mock_session_instance.query.return_value = mock_query
|
|
mock_query.filter.return_value = mock_filter
|
|
mock_filter.order_by.return_value = mock_order_by
|
|
mock_order_by.limit.return_value = mock_limit
|
|
|
|
videos = await repo.search_by_title("Test", limit=10)
|
|
|
|
assert len(videos) == 2
|
|
assert videos[0].youtube_id == "dQw4w9WgXcQ"
|
|
assert videos[1].youtube_id == "abc123"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_statistics(self):
|
|
"""Test getting video statistics."""
|
|
repo = YouTubeRepository()
|
|
|
|
with patch('src.repositories.youtube_repository.get_db_session') as mock_session:
|
|
mock_session_instance = MagicMock()
|
|
mock_session.return_value.__enter__.return_value = mock_session_instance
|
|
|
|
# Mock statistics results - set up separate query chains
|
|
mock_count_query = MagicMock()
|
|
mock_count_query.scalar.return_value = 10
|
|
|
|
mock_duration_query = MagicMock()
|
|
mock_duration_query.scalar.return_value = 3600
|
|
|
|
mock_channels_query = MagicMock()
|
|
mock_channels_query.group_by.return_value.order_by.return_value.limit.return_value.all.return_value = [
|
|
MagicMock(channel="Test Channel", count=5)
|
|
]
|
|
|
|
# Set up query to return different mocks based on what's being queried
|
|
def mock_query_side_effect(*args, **kwargs):
|
|
if 'count' in str(args):
|
|
return mock_count_query
|
|
elif 'sum' in str(args):
|
|
return mock_duration_query
|
|
else:
|
|
return mock_channels_query
|
|
|
|
mock_session_instance.query.side_effect = mock_query_side_effect
|
|
|
|
stats = await repo.get_statistics()
|
|
|
|
assert stats["total_videos"] == 10
|
|
assert stats["total_duration_seconds"] == 3600
|
|
assert stats["total_duration_hours"] == 1.0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_integration_youtube_workflow():
|
|
"""Test the complete YouTube metadata workflow."""
|
|
# This is an integration test that would require a real database
|
|
# and yt-dlp installation. In a real environment, this would be
|
|
# run against a test database with actual YouTube URLs.
|
|
|
|
# For now, we'll test the workflow with mocks
|
|
service = YouTubeMetadataService()
|
|
repo = YouTubeRepository()
|
|
|
|
await service.initialize()
|
|
|
|
# Mock the entire workflow
|
|
mock_metadata = {
|
|
"youtube_id": "dQw4w9WgXcQ",
|
|
"title": "Integration Test Video",
|
|
"channel": "Test Channel",
|
|
"description": "Integration test description",
|
|
"duration_seconds": 300,
|
|
"url": "https://youtube.com/watch?v=dQw4w9WgXcQ",
|
|
"metadata_extracted_at": datetime.now(timezone.utc)
|
|
}
|
|
|
|
with patch.object(service.extractor, 'extract_metadata', return_value=mock_metadata):
|
|
with patch('src.database.connection.get_db_session') as mock_session:
|
|
mock_session_instance = MagicMock()
|
|
mock_session.return_value.__enter__.return_value = mock_session_instance
|
|
mock_session_instance.query.return_value.filter.return_value.first.return_value = None
|
|
|
|
# Test the complete workflow
|
|
video = await service.extract_and_store_metadata("https://youtube.com/watch?v=dQw4w9WgXcQ")
|
|
|
|
assert video.youtube_id == "dQw4w9WgXcQ"
|
|
assert video.title == "Integration Test Video"
|
|
assert video.duration_seconds == 300
|
|
|
|
# Verify database operations were called
|
|
mock_session_instance.add.assert_called_once()
|
|
mock_session_instance.commit.assert_called_once()
|
|
mock_session_instance.refresh.assert_called_once()
|