143 lines
6.4 KiB
Python
143 lines
6.4 KiB
Python
import pytest
|
|
import asyncio
|
|
import json
|
|
from unittest.mock import Mock, patch, AsyncMock
|
|
from backend.services.transcript_service import (
|
|
TranscriptService,
|
|
TranscriptNotAvailableError,
|
|
CaptionsNotAvailableError,
|
|
AudioTranscriptionError
|
|
)
|
|
from backend.services.mock_cache import MockCacheClient
|
|
from backend.models.transcript import ExtractionMethod
|
|
|
|
|
|
class TestTranscriptService:
|
|
|
|
@pytest.fixture
|
|
def cache_client(self):
|
|
return MockCacheClient()
|
|
|
|
@pytest.fixture
|
|
def transcript_service(self, cache_client):
|
|
return TranscriptService(cache_client)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_transcript_from_cache(self, transcript_service, cache_client):
|
|
"""Test transcript retrieval from cache"""
|
|
# Set up cache with mock data
|
|
cached_data = {
|
|
"video_id": "test123",
|
|
"transcript": "Cached transcript content",
|
|
"method": "youtube_api",
|
|
"success": True,
|
|
"metadata": {
|
|
"word_count": 3,
|
|
"language": "en",
|
|
"extraction_method": "youtube_api",
|
|
"processing_time_seconds": 0.5,
|
|
"estimated_reading_time": 1,
|
|
"has_timestamps": True
|
|
}
|
|
}
|
|
await cache_client.set("transcript:test123:en", cached_data)
|
|
|
|
# Extract transcript
|
|
result = await transcript_service.extract_transcript("test123", "en")
|
|
|
|
assert result.success is True
|
|
assert result.from_cache is True
|
|
assert result.transcript == "Cached transcript content"
|
|
assert result.method == ExtractionMethod.YOUTUBE_API
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_transcript_youtube_api_success(self, transcript_service):
|
|
"""Test successful extraction via YouTube API"""
|
|
with patch.object(transcript_service, '_extract_youtube_transcript') as mock_extract:
|
|
mock_extract.return_value = "YouTube API transcript"
|
|
|
|
result = await transcript_service.extract_transcript("dQw4w9WgXcQ", "en")
|
|
|
|
assert result.success is True
|
|
assert result.method == ExtractionMethod.YOUTUBE_API
|
|
assert "YouTube API transcript" in result.transcript or \
|
|
"comprehensive tutorial" in result.transcript # Mock data
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_transcript_fallback_to_captions(self, transcript_service):
|
|
"""Test fallback to auto-captions when YouTube API fails"""
|
|
with patch.object(transcript_service, '_extract_youtube_transcript') as mock_yt:
|
|
mock_yt.side_effect = TranscriptNotAvailableError("Not available")
|
|
|
|
with patch.object(transcript_service, '_extract_auto_captions') as mock_captions:
|
|
mock_captions.return_value = "Auto-caption transcript"
|
|
|
|
result = await transcript_service.extract_transcript("test123", "en")
|
|
|
|
assert result.success is True
|
|
assert result.method == ExtractionMethod.AUTO_CAPTIONS
|
|
assert "Auto-caption transcript" in result.transcript
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_transcript_fallback_to_whisper(self, transcript_service):
|
|
"""Test fallback to Whisper when both YouTube API and captions fail"""
|
|
with patch.object(transcript_service, '_extract_youtube_transcript') as mock_yt:
|
|
mock_yt.side_effect = TranscriptNotAvailableError("Not available")
|
|
|
|
with patch.object(transcript_service, '_extract_auto_captions') as mock_captions:
|
|
mock_captions.side_effect = CaptionsNotAvailableError("No captions")
|
|
|
|
with patch.object(transcript_service, '_transcribe_audio') as mock_whisper:
|
|
mock_whisper.return_value = "Whisper transcript"
|
|
|
|
result = await transcript_service.extract_transcript("test123", "en")
|
|
|
|
assert result.success is True
|
|
assert result.method == ExtractionMethod.WHISPER_AUDIO
|
|
assert "Whisper transcript" in result.transcript
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_transcript_all_methods_fail(self, transcript_service):
|
|
"""Test when all extraction methods fail"""
|
|
with patch.object(transcript_service, '_extract_youtube_transcript') as mock_yt:
|
|
mock_yt.side_effect = TranscriptNotAvailableError("Not available")
|
|
|
|
with patch.object(transcript_service, '_extract_auto_captions') as mock_captions:
|
|
mock_captions.side_effect = CaptionsNotAvailableError("No captions")
|
|
|
|
with patch.object(transcript_service, '_transcribe_audio') as mock_whisper:
|
|
mock_whisper.side_effect = AudioTranscriptionError("Audio failed")
|
|
|
|
result = await transcript_service.extract_transcript("test123", "en")
|
|
|
|
assert result.success is False
|
|
assert result.method == ExtractionMethod.FAILED
|
|
assert result.transcript is None
|
|
assert result.error is not None
|
|
assert "attempted_methods" in result.error["details"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_metadata(self, transcript_service):
|
|
"""Test metadata extraction from transcript"""
|
|
transcript = "This is a test transcript with multiple words for testing."
|
|
metadata = transcript_service.extract_metadata(transcript)
|
|
|
|
assert metadata["word_count"] == 10
|
|
assert metadata["character_count"] == len(transcript)
|
|
assert metadata["line_count"] == 1
|
|
assert metadata["estimated_reading_time_seconds"] > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cache_result(self, transcript_service, cache_client):
|
|
"""Test that results are properly cached"""
|
|
# Clear cache first
|
|
cache_client.clear_all()
|
|
|
|
# First call should not be from cache
|
|
result1 = await transcript_service.extract_transcript("test123", "en")
|
|
assert result1.from_cache is False
|
|
|
|
# Second call should be from cache
|
|
result2 = await transcript_service.extract_transcript("test123", "en")
|
|
assert result2.from_cache is True
|
|
assert result2.transcript == result1.transcript |