76 lines
2.9 KiB
Python
76 lines
2.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Test script to verify the complete transcript extraction fallback chain"""
|
|
|
|
import asyncio
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add project to path
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from backend.services.transcript_service import TranscriptService
|
|
from backend.services.service_factory import ServiceFactory
|
|
from backend.core.config import settings
|
|
|
|
async def test_fallback_chain():
|
|
"""Test the complete fallback chain with a real YouTube video"""
|
|
|
|
# Ensure we're using real services
|
|
print(f"USE_MOCK_SERVICES: {settings.USE_MOCK_SERVICES}")
|
|
print(f"ENABLE_REAL_TRANSCRIPT_EXTRACTION: {settings.ENABLE_REAL_TRANSCRIPT_EXTRACTION}")
|
|
|
|
# Create transcript service with real dependencies
|
|
transcript_service = ServiceFactory.create_transcript_service()
|
|
|
|
# Test video ID (Rick Astley - Never Gonna Give You Up)
|
|
video_id = "dQw4w9WgXcQ"
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"Testing fallback chain for video: {video_id}")
|
|
print(f"{'='*60}\n")
|
|
|
|
try:
|
|
result = await transcript_service.extract_transcript(video_id, "en")
|
|
|
|
if result.success:
|
|
print(f"✅ Success! Method used: {result.method}")
|
|
print(f" Transcript length: {len(result.transcript) if result.transcript else 0} characters")
|
|
print(f" From cache: {result.from_cache}")
|
|
|
|
if result.transcript:
|
|
# Show first 200 characters of transcript
|
|
preview = result.transcript[:200] + "..." if len(result.transcript) > 200 else result.transcript
|
|
print(f"\n Preview: {preview}")
|
|
|
|
if result.metadata:
|
|
print(f"\n Metadata:")
|
|
print(f" - Word count: {result.metadata.word_count}")
|
|
print(f" - Language: {result.metadata.language}")
|
|
print(f" - Has timestamps: {result.metadata.has_timestamps}")
|
|
print(f" - Extraction method: {result.metadata.extraction_method}")
|
|
|
|
else:
|
|
print(f"❌ Failed to extract transcript")
|
|
if result.error:
|
|
print(f" Error: {result.error}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Exception during extraction: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
print(f"\n{'='*60}")
|
|
print("Fallback chain methods available:")
|
|
print("1. YouTube Transcript API (youtube-transcript-api)")
|
|
print("2. Auto-generated Captions")
|
|
print("3. Whisper Audio Transcription")
|
|
print("4. PyTubeFix (alternative YouTube library)")
|
|
print("5. YT-DLP (robust downloader)")
|
|
print("6. Playwright (browser automation)")
|
|
print("7. External Tools")
|
|
print("8. Web Services")
|
|
print("9. Transcript Only (final fallback)")
|
|
print(f"{'='*60}\n")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(test_fallback_chain()) |