#!/usr/bin/env python3 """Simple script to transcribe the BAP South meeting recording.""" import asyncio import json from pathlib import Path from src.services.local_transcription_service import create_local_transcription_service, LocalTranscriptionConfig from src.repositories.transcription_repository import create_transcription_repository async def main(): """Transcribe the BAP South meeting recording.""" # Use the clean WAV file instead of the problematic M4A audio_file = Path("BAP_South_Meeting_Clean.wav") if not audio_file.exists(): print(f"❌ Audio file not found: {audio_file}") return print(f"🎵 Transcribing: {audio_file.name}") print(f"📁 File size: {audio_file.stat().st_size / (1024*1024):.1f} MB") try: # Create repository and service repository = await create_transcription_repository() service = create_local_transcription_service(repository) # Initialize service await service.initialize() # Configure transcription config = LocalTranscriptionConfig( model="distil-large-v3", language=None, # Auto-detect temperature=0.0, # Deterministic chunk_size_seconds=600, # 10 minutes enable_m3_preprocessing=True ) print("🚀 Starting transcription...") # Transcribe the audio result = await service.transcribe_audio(audio_file, config) print("✅ Transcription completed!") print(f"📝 Text length: {len(result.text_content)} characters") print(f"⏱️ Processing time: {result.processing_time_ms / 1000:.1f} seconds") print(f"🎯 Accuracy estimate: {result.accuracy_estimate * 100:.1f}%") # Save to text file output_file = Path("BAP_South_Meeting_Transcript.txt") with open(output_file, "w", encoding="utf-8") as f: f.write(f"BAP South Meeting - August 28, 2025\n") f.write(f"Transcription completed at: {result.completed_at}\n") f.write(f"Model: {result.model_used}\n") f.write(f"Accuracy: {result.accuracy_estimate * 100:.1f}%\n") f.write(f"Processing time: {result.processing_time_ms / 1000:.1f} seconds\n") f.write(f"Word count: {result.word_count}\n") f.write("=" * 80 + "\n\n") f.write(result.text_content) print(f"💾 Transcript saved to: {output_file}") # Also save as JSON for detailed analysis json_output = { "text": result.text_content, "segments": result.segments, "accuracy": result.accuracy_estimate, "processing_time": result.processing_time_ms / 1000.0, "word_count": result.word_count, "model_used": result.model_used, "completed_at": result.completed_at, "quality_warnings": result.quality_warnings } json_file = Path("BAP_South_Meeting_Transcript.json") with open(json_file, "w", encoding="utf-8") as f: json.dump(json_output, f, indent=2, ensure_ascii=False) print(f"📊 Detailed data saved to: {json_file}") except Exception as e: print(f"❌ Transcription failed: {e}") import traceback traceback.print_exc() if __name__ == "__main__": asyncio.run(main())