youtube-summarizer/test_fallback_chain.py

76 lines
2.9 KiB
Python

#!/usr/bin/env python3
"""Test script to verify the complete transcript extraction fallback chain"""
import asyncio
import sys
from pathlib import Path
# Add project to path
sys.path.insert(0, str(Path(__file__).parent))
from backend.services.transcript_service import TranscriptService
from backend.services.service_factory import ServiceFactory
from backend.core.config import settings
async def test_fallback_chain():
"""Test the complete fallback chain with a real YouTube video"""
# Ensure we're using real services
print(f"USE_MOCK_SERVICES: {settings.USE_MOCK_SERVICES}")
print(f"ENABLE_REAL_TRANSCRIPT_EXTRACTION: {settings.ENABLE_REAL_TRANSCRIPT_EXTRACTION}")
# Create transcript service with real dependencies
transcript_service = ServiceFactory.create_transcript_service()
# Test video ID (Rick Astley - Never Gonna Give You Up)
video_id = "dQw4w9WgXcQ"
print(f"\n{'='*60}")
print(f"Testing fallback chain for video: {video_id}")
print(f"{'='*60}\n")
try:
result = await transcript_service.extract_transcript(video_id, "en")
if result.success:
print(f"✅ Success! Method used: {result.method}")
print(f" Transcript length: {len(result.transcript) if result.transcript else 0} characters")
print(f" From cache: {result.from_cache}")
if result.transcript:
# Show first 200 characters of transcript
preview = result.transcript[:200] + "..." if len(result.transcript) > 200 else result.transcript
print(f"\n Preview: {preview}")
if result.metadata:
print(f"\n Metadata:")
print(f" - Word count: {result.metadata.word_count}")
print(f" - Language: {result.metadata.language}")
print(f" - Has timestamps: {result.metadata.has_timestamps}")
print(f" - Extraction method: {result.metadata.extraction_method}")
else:
print(f"❌ Failed to extract transcript")
if result.error:
print(f" Error: {result.error}")
except Exception as e:
print(f"❌ Exception during extraction: {e}")
import traceback
traceback.print_exc()
print(f"\n{'='*60}")
print("Fallback chain methods available:")
print("1. YouTube Transcript API (youtube-transcript-api)")
print("2. Auto-generated Captions")
print("3. Whisper Audio Transcription")
print("4. PyTubeFix (alternative YouTube library)")
print("5. YT-DLP (robust downloader)")
print("6. Playwright (browser automation)")
print("7. External Tools")
print("8. Web Services")
print("9. Transcript Only (final fallback)")
print(f"{'='*60}\n")
if __name__ == "__main__":
asyncio.run(test_fallback_chain())