126 lines
4.7 KiB
Python
126 lines
4.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script for faster-whisper implementation.
|
|
Tests the new FasterWhisperTranscriptService with a short YouTube video.
|
|
"""
|
|
|
|
import asyncio
|
|
import sys
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
# Add backend to path
|
|
sys.path.insert(0, str(Path(__file__).parent / "backend"))
|
|
|
|
from backend.services.faster_whisper_transcript_service import FasterWhisperTranscriptService
|
|
from backend.config.video_download_config import VideoDownloadConfig
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def test_faster_whisper():
|
|
"""Test the faster-whisper implementation with a short video."""
|
|
|
|
# Use a short test video for quick testing
|
|
# This is a ~30 second video that should transcribe quickly
|
|
test_video_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" # Rick Roll - short and well-known
|
|
test_video_id = "dQw4w9WgXcQ"
|
|
|
|
logger.info("🚀 Testing FasterWhisperTranscriptService")
|
|
logger.info(f"📹 Test video: {test_video_url}")
|
|
|
|
try:
|
|
# Load configuration
|
|
config = VideoDownloadConfig()
|
|
logger.info(f"📋 Configuration:")
|
|
logger.info(f" Model: {config.whisper_model}")
|
|
logger.info(f" Device: {config.whisper_device}")
|
|
logger.info(f" Compute Type: {config.whisper_compute_type}")
|
|
logger.info(f" VAD Filter: {config.whisper_vad_filter}")
|
|
logger.info(f" Beam Size: {config.whisper_beam_size}")
|
|
|
|
# Create service with configuration
|
|
service = FasterWhisperTranscriptService(
|
|
model_size=config.whisper_model,
|
|
device=config.whisper_device,
|
|
compute_type=config.whisper_compute_type,
|
|
beam_size=config.whisper_beam_size,
|
|
vad_filter=config.whisper_vad_filter,
|
|
word_timestamps=config.whisper_word_timestamps,
|
|
temperature=config.whisper_temperature,
|
|
best_of=config.whisper_best_of
|
|
)
|
|
|
|
# Show performance info
|
|
perf_info = service.get_performance_info()
|
|
logger.info(f"⚡ Performance Info:")
|
|
for key, value in perf_info.items():
|
|
if isinstance(value, list):
|
|
logger.info(f" {key}:")
|
|
for item in value:
|
|
logger.info(f" - {item}")
|
|
else:
|
|
logger.info(f" {key}: {value}")
|
|
|
|
# Progress callback for testing
|
|
async def progress_callback(message: str):
|
|
logger.info(f"📊 Progress: {message}")
|
|
|
|
# Run transcription
|
|
logger.info("🎬 Starting transcription...")
|
|
import time
|
|
start_time = time.time()
|
|
|
|
segments, metadata = await service.transcribe_video(
|
|
video_id=test_video_id,
|
|
video_url=test_video_url,
|
|
progress_callback=progress_callback
|
|
)
|
|
|
|
end_time = time.time()
|
|
total_time = end_time - start_time
|
|
|
|
# Report results
|
|
logger.info(f"✅ Transcription completed!")
|
|
logger.info(f"⏱️ Total time: {total_time:.2f} seconds")
|
|
logger.info(f"📊 Metadata:")
|
|
logger.info(f" Video ID: {metadata.video_id}")
|
|
logger.info(f" Language: {metadata.language}")
|
|
logger.info(f" Word count: {metadata.word_count}")
|
|
logger.info(f" Segments: {metadata.total_segments}")
|
|
logger.info(f" Quality score: {metadata.quality_score:.3f}")
|
|
logger.info(f" Confidence score: {metadata.confidence_score:.3f}")
|
|
logger.info(f" Processing time: {metadata.processing_time_seconds:.2f}s")
|
|
logger.info(f" Method: {metadata.extraction_method}")
|
|
|
|
# Show first few segments
|
|
logger.info(f"📝 First 3 segments:")
|
|
for i, segment in enumerate(segments[:3]):
|
|
logger.info(f" [{segment.start_time:.1f}s - {segment.end_time:.1f}s] {segment.text}")
|
|
|
|
# Calculate speed ratio if possible
|
|
if hasattr(metadata, 'duration_seconds') or len(segments) > 0:
|
|
if segments:
|
|
# Estimate video duration from last segment
|
|
video_duration = segments[-1].end_time if segments[-1].end_time else 60
|
|
speed_ratio = video_duration / metadata.processing_time_seconds
|
|
logger.info(f"🚀 Speed improvement: {speed_ratio:.1f}x faster than realtime")
|
|
|
|
# Cleanup
|
|
await service.cleanup()
|
|
|
|
logger.info("🎉 Test completed successfully!")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"❌ Test failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
success = asyncio.run(test_faster_whisper())
|
|
sys.exit(0 if success else 1) |