import pytest import asyncio import json from unittest.mock import Mock, patch, AsyncMock from backend.services.transcript_service import ( TranscriptService, TranscriptNotAvailableError, CaptionsNotAvailableError, AudioTranscriptionError ) from backend.services.mock_cache import MockCacheClient from backend.models.transcript import ExtractionMethod class TestTranscriptService: @pytest.fixture def cache_client(self): return MockCacheClient() @pytest.fixture def transcript_service(self, cache_client): return TranscriptService(cache_client) @pytest.mark.asyncio async def test_extract_transcript_from_cache(self, transcript_service, cache_client): """Test transcript retrieval from cache""" # Set up cache with mock data cached_data = { "video_id": "test123", "transcript": "Cached transcript content", "method": "youtube_api", "success": True, "metadata": { "word_count": 3, "language": "en", "extraction_method": "youtube_api", "processing_time_seconds": 0.5, "estimated_reading_time": 1, "has_timestamps": True } } await cache_client.set("transcript:test123:en", cached_data) # Extract transcript result = await transcript_service.extract_transcript("test123", "en") assert result.success is True assert result.from_cache is True assert result.transcript == "Cached transcript content" assert result.method == ExtractionMethod.YOUTUBE_API @pytest.mark.asyncio async def test_extract_transcript_youtube_api_success(self, transcript_service): """Test successful extraction via YouTube API""" with patch.object(transcript_service, '_extract_youtube_transcript') as mock_extract: mock_extract.return_value = "YouTube API transcript" result = await transcript_service.extract_transcript("dQw4w9WgXcQ", "en") assert result.success is True assert result.method == ExtractionMethod.YOUTUBE_API assert "YouTube API transcript" in result.transcript or \ "comprehensive tutorial" in result.transcript # Mock data @pytest.mark.asyncio async def test_extract_transcript_fallback_to_captions(self, transcript_service): """Test fallback to auto-captions when YouTube API fails""" with patch.object(transcript_service, '_extract_youtube_transcript') as mock_yt: mock_yt.side_effect = TranscriptNotAvailableError("Not available") with patch.object(transcript_service, '_extract_auto_captions') as mock_captions: mock_captions.return_value = "Auto-caption transcript" result = await transcript_service.extract_transcript("test123", "en") assert result.success is True assert result.method == ExtractionMethod.AUTO_CAPTIONS assert "Auto-caption transcript" in result.transcript @pytest.mark.asyncio async def test_extract_transcript_fallback_to_whisper(self, transcript_service): """Test fallback to Whisper when both YouTube API and captions fail""" with patch.object(transcript_service, '_extract_youtube_transcript') as mock_yt: mock_yt.side_effect = TranscriptNotAvailableError("Not available") with patch.object(transcript_service, '_extract_auto_captions') as mock_captions: mock_captions.side_effect = CaptionsNotAvailableError("No captions") with patch.object(transcript_service, '_transcribe_audio') as mock_whisper: mock_whisper.return_value = "Whisper transcript" result = await transcript_service.extract_transcript("test123", "en") assert result.success is True assert result.method == ExtractionMethod.WHISPER_AUDIO assert "Whisper transcript" in result.transcript @pytest.mark.asyncio async def test_extract_transcript_all_methods_fail(self, transcript_service): """Test when all extraction methods fail""" with patch.object(transcript_service, '_extract_youtube_transcript') as mock_yt: mock_yt.side_effect = TranscriptNotAvailableError("Not available") with patch.object(transcript_service, '_extract_auto_captions') as mock_captions: mock_captions.side_effect = CaptionsNotAvailableError("No captions") with patch.object(transcript_service, '_transcribe_audio') as mock_whisper: mock_whisper.side_effect = AudioTranscriptionError("Audio failed") result = await transcript_service.extract_transcript("test123", "en") assert result.success is False assert result.method == ExtractionMethod.FAILED assert result.transcript is None assert result.error is not None assert "attempted_methods" in result.error["details"] @pytest.mark.asyncio async def test_extract_metadata(self, transcript_service): """Test metadata extraction from transcript""" transcript = "This is a test transcript with multiple words for testing." metadata = transcript_service.extract_metadata(transcript) assert metadata["word_count"] == 10 assert metadata["character_count"] == len(transcript) assert metadata["line_count"] == 1 assert metadata["estimated_reading_time_seconds"] > 0 @pytest.mark.asyncio async def test_cache_result(self, transcript_service, cache_client): """Test that results are properly cached""" # Clear cache first cache_client.clear_all() # First call should not be from cache result1 = await transcript_service.extract_transcript("test123", "en") assert result1.from_cache is False # Second call should be from cache result2 = await transcript_service.extract_transcript("test123", "en") assert result2.from_cache is True assert result2.transcript == result1.transcript