#!/usr/bin/env python3 """ Test script for faster-whisper with a speech-heavy video and VAD disabled for comparison. """ import asyncio import sys import logging from pathlib import Path # Add backend to path sys.path.insert(0, str(Path(__file__).parent / "backend")) from backend.services.faster_whisper_transcript_service import FasterWhisperTranscriptService # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) async def test_with_vad_disabled(): """Test with VAD disabled to ensure we get transcription.""" # Use same video but disable VAD for testing test_video_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" # Rick Roll test_video_id = "dQw4w9WgXcQ" logger.info("🚀 Testing FasterWhisperTranscriptService with VAD DISABLED") logger.info(f"📹 Test video: {test_video_url}") try: # Create service with VAD disabled service = FasterWhisperTranscriptService( model_size="large-v3-turbo", device="auto", compute_type="auto", beam_size=5, vad_filter=False, # DISABLE VAD for testing word_timestamps=True, temperature=0.0, best_of=5 ) logger.info(f"⚙️ Configuration: VAD={service.vad_filter}, Model={service.model_size}") # Progress callback async def progress_callback(message: str): logger.info(f"📊 {message}") # Run transcription logger.info("🎬 Starting transcription with VAD disabled...") import time start_time = time.time() segments, metadata = await service.transcribe_video( video_id=test_video_id, video_url=test_video_url, progress_callback=progress_callback ) end_time = time.time() total_time = end_time - start_time # Report results logger.info(f"✅ Transcription completed!") logger.info(f"⏱️ Total time: {total_time:.2f} seconds") logger.info(f"📊 Results:") logger.info(f" Segments: {len(segments)}") logger.info(f" Word count: {metadata.word_count}") logger.info(f" Quality: {metadata.quality_score:.3f}") logger.info(f" Confidence: {metadata.confidence_score:.3f}") logger.info(f" Processing time: {metadata.processing_time_seconds:.2f}s") # Show segments if segments: logger.info(f"📝 First 5 segments:") for i, segment in enumerate(segments[:5]): logger.info(f" [{segment.start_time:.1f}s - {segment.end_time:.1f}s] {segment.text}") # Calculate speed if segments[-1].end_time: video_duration = segments[-1].end_time speed_ratio = video_duration / metadata.processing_time_seconds logger.info(f"🚀 Speed: {speed_ratio:.1f}x faster than realtime") else: logger.warning("⚠️ No segments generated even with VAD disabled") await service.cleanup() logger.info("🎉 Test completed!") return True except Exception as e: logger.error(f"❌ Test failed: {e}") import traceback traceback.print_exc() return False async def test_with_different_video(): """Test with a video that has clear speech.""" # Use a TED talk or similar video with clear speech # This is a short educational video with clear speech test_video_url = "https://www.youtube.com/watch?v=9bZkp7q19f0" # TED talk (example) test_video_id = "9bZkp7q19f0" logger.info("🚀 Testing with speech-heavy video") logger.info(f"📹 Test video: {test_video_url}") try: # Use VAD enabled for speech video service = FasterWhisperTranscriptService( model_size="base", # Use smaller model for faster testing device="auto", compute_type="auto", vad_filter=True, beam_size=3 # Faster beam search ) logger.info(f"⚙️ Using {service.model_size} model with VAD enabled") async def progress_callback(message: str): logger.info(f"📊 {message}") import time start_time = time.time() segments, metadata = await service.transcribe_video( video_id=test_video_id, video_url=test_video_url, progress_callback=progress_callback ) end_time = time.time() logger.info(f"✅ Speech video test completed!") logger.info(f"⏱️ Time: {end_time - start_time:.2f}s") logger.info(f"📊 Segments: {len(segments)}") if segments: logger.info(f"📝 Sample segments:") for segment in segments[:3]: logger.info(f" [{segment.start_time:.1f}s] {segment.text[:100]}...") await service.cleanup() return True except Exception as e: logger.error(f"❌ Speech video test failed: {e}") return False if __name__ == "__main__": # First test with VAD disabled logger.info("=" * 60) logger.info("TEST 1: VAD DISABLED") logger.info("=" * 60) success1 = asyncio.run(test_with_vad_disabled()) # Then test with a speech video logger.info("\n" + "=" * 60) logger.info("TEST 2: SPEECH VIDEO") logger.info("=" * 60) success2 = asyncio.run(test_with_different_video()) if success1: logger.info("✅ faster-whisper integration is working!") sys.exit(0) else: logger.error("❌ faster-whisper integration has issues") sys.exit(1)