#!/usr/bin/env python3 """Test script to verify the complete transcript extraction fallback chain""" import asyncio import sys from pathlib import Path # Add project to path sys.path.insert(0, str(Path(__file__).parent)) from backend.services.transcript_service import TranscriptService from backend.services.service_factory import ServiceFactory from backend.core.config import settings async def test_fallback_chain(): """Test the complete fallback chain with a real YouTube video""" # Ensure we're using real services print(f"USE_MOCK_SERVICES: {settings.USE_MOCK_SERVICES}") print(f"ENABLE_REAL_TRANSCRIPT_EXTRACTION: {settings.ENABLE_REAL_TRANSCRIPT_EXTRACTION}") # Create transcript service with real dependencies transcript_service = ServiceFactory.create_transcript_service() # Test video ID (Rick Astley - Never Gonna Give You Up) video_id = "dQw4w9WgXcQ" print(f"\n{'='*60}") print(f"Testing fallback chain for video: {video_id}") print(f"{'='*60}\n") try: result = await transcript_service.extract_transcript(video_id, "en") if result.success: print(f"✅ Success! Method used: {result.method}") print(f" Transcript length: {len(result.transcript) if result.transcript else 0} characters") print(f" From cache: {result.from_cache}") if result.transcript: # Show first 200 characters of transcript preview = result.transcript[:200] + "..." if len(result.transcript) > 200 else result.transcript print(f"\n Preview: {preview}") if result.metadata: print(f"\n Metadata:") print(f" - Word count: {result.metadata.word_count}") print(f" - Language: {result.metadata.language}") print(f" - Has timestamps: {result.metadata.has_timestamps}") print(f" - Extraction method: {result.metadata.extraction_method}") else: print(f"❌ Failed to extract transcript") if result.error: print(f" Error: {result.error}") except Exception as e: print(f"❌ Exception during extraction: {e}") import traceback traceback.print_exc() print(f"\n{'='*60}") print("Fallback chain methods available:") print("1. YouTube Transcript API (youtube-transcript-api)") print("2. Auto-generated Captions") print("3. Whisper Audio Transcription") print("4. PyTubeFix (alternative YouTube library)") print("5. YT-DLP (robust downloader)") print("6. Playwright (browser automation)") print("7. External Tools") print("8. Web Services") print("9. Transcript Only (final fallback)") print(f"{'='*60}\n") if __name__ == "__main__": asyncio.run(test_fallback_chain())