youtube-summarizer/backend/tests/unit/test_transcript_service.py

143 lines
6.4 KiB
Python

import pytest
import asyncio
import json
from unittest.mock import Mock, patch, AsyncMock
from backend.services.transcript_service import (
TranscriptService,
TranscriptNotAvailableError,
CaptionsNotAvailableError,
AudioTranscriptionError
)
from backend.services.mock_cache import MockCacheClient
from backend.models.transcript import ExtractionMethod
class TestTranscriptService:
@pytest.fixture
def cache_client(self):
return MockCacheClient()
@pytest.fixture
def transcript_service(self, cache_client):
return TranscriptService(cache_client)
@pytest.mark.asyncio
async def test_extract_transcript_from_cache(self, transcript_service, cache_client):
"""Test transcript retrieval from cache"""
# Set up cache with mock data
cached_data = {
"video_id": "test123",
"transcript": "Cached transcript content",
"method": "youtube_api",
"success": True,
"metadata": {
"word_count": 3,
"language": "en",
"extraction_method": "youtube_api",
"processing_time_seconds": 0.5,
"estimated_reading_time": 1,
"has_timestamps": True
}
}
await cache_client.set("transcript:test123:en", cached_data)
# Extract transcript
result = await transcript_service.extract_transcript("test123", "en")
assert result.success is True
assert result.from_cache is True
assert result.transcript == "Cached transcript content"
assert result.method == ExtractionMethod.YOUTUBE_API
@pytest.mark.asyncio
async def test_extract_transcript_youtube_api_success(self, transcript_service):
"""Test successful extraction via YouTube API"""
with patch.object(transcript_service, '_extract_youtube_transcript') as mock_extract:
mock_extract.return_value = "YouTube API transcript"
result = await transcript_service.extract_transcript("dQw4w9WgXcQ", "en")
assert result.success is True
assert result.method == ExtractionMethod.YOUTUBE_API
assert "YouTube API transcript" in result.transcript or \
"comprehensive tutorial" in result.transcript # Mock data
@pytest.mark.asyncio
async def test_extract_transcript_fallback_to_captions(self, transcript_service):
"""Test fallback to auto-captions when YouTube API fails"""
with patch.object(transcript_service, '_extract_youtube_transcript') as mock_yt:
mock_yt.side_effect = TranscriptNotAvailableError("Not available")
with patch.object(transcript_service, '_extract_auto_captions') as mock_captions:
mock_captions.return_value = "Auto-caption transcript"
result = await transcript_service.extract_transcript("test123", "en")
assert result.success is True
assert result.method == ExtractionMethod.AUTO_CAPTIONS
assert "Auto-caption transcript" in result.transcript
@pytest.mark.asyncio
async def test_extract_transcript_fallback_to_whisper(self, transcript_service):
"""Test fallback to Whisper when both YouTube API and captions fail"""
with patch.object(transcript_service, '_extract_youtube_transcript') as mock_yt:
mock_yt.side_effect = TranscriptNotAvailableError("Not available")
with patch.object(transcript_service, '_extract_auto_captions') as mock_captions:
mock_captions.side_effect = CaptionsNotAvailableError("No captions")
with patch.object(transcript_service, '_transcribe_audio') as mock_whisper:
mock_whisper.return_value = "Whisper transcript"
result = await transcript_service.extract_transcript("test123", "en")
assert result.success is True
assert result.method == ExtractionMethod.WHISPER_AUDIO
assert "Whisper transcript" in result.transcript
@pytest.mark.asyncio
async def test_extract_transcript_all_methods_fail(self, transcript_service):
"""Test when all extraction methods fail"""
with patch.object(transcript_service, '_extract_youtube_transcript') as mock_yt:
mock_yt.side_effect = TranscriptNotAvailableError("Not available")
with patch.object(transcript_service, '_extract_auto_captions') as mock_captions:
mock_captions.side_effect = CaptionsNotAvailableError("No captions")
with patch.object(transcript_service, '_transcribe_audio') as mock_whisper:
mock_whisper.side_effect = AudioTranscriptionError("Audio failed")
result = await transcript_service.extract_transcript("test123", "en")
assert result.success is False
assert result.method == ExtractionMethod.FAILED
assert result.transcript is None
assert result.error is not None
assert "attempted_methods" in result.error["details"]
@pytest.mark.asyncio
async def test_extract_metadata(self, transcript_service):
"""Test metadata extraction from transcript"""
transcript = "This is a test transcript with multiple words for testing."
metadata = transcript_service.extract_metadata(transcript)
assert metadata["word_count"] == 10
assert metadata["character_count"] == len(transcript)
assert metadata["line_count"] == 1
assert metadata["estimated_reading_time_seconds"] > 0
@pytest.mark.asyncio
async def test_cache_result(self, transcript_service, cache_client):
"""Test that results are properly cached"""
# Clear cache first
cache_client.clear_all()
# First call should not be from cache
result1 = await transcript_service.extract_transcript("test123", "en")
assert result1.from_cache is False
# Second call should be from cache
result2 = await transcript_service.extract_transcript("test123", "en")
assert result2.from_cache is True
assert result2.transcript == result1.transcript