trax/simple_transcribe.py

120 lines
4.3 KiB
Python

#!/usr/bin/env python3
"""Simple transcription script using faster-whisper directly."""
import time
from pathlib import Path
from faster_whisper import WhisperModel
def main():
"""Transcribe the BAP South meeting recording using faster-whisper directly."""
audio_file = Path("BAP_South_Meeting_Clean.wav")
if not audio_file.exists():
print(f"❌ Audio file not found: {audio_file}")
return
print(f"🎵 Transcribing: {audio_file.name}")
print(f"📁 File size: {audio_file.stat().st_size / (1024*1024):.1f} MB")
try:
print("🚀 Loading Whisper model (distil-large-v3)...")
start_time = time.time()
# Load the model directly
model = WhisperModel(
"distil-large-v3",
device="cpu",
compute_type="int8_float32"
)
model_load_time = time.time() - start_time
print(f"✅ Model loaded in {model_load_time:.1f} seconds")
print("🎯 Starting transcription...")
transcription_start = time.time()
# Transcribe the audio
segments, info = model.transcribe(
str(audio_file),
language=None, # Auto-detect
temperature=0.0, # Deterministic
beam_size=1,
best_of=1
)
# Convert generator to list and extract text
segments_list = list(segments)
full_text = " ".join([seg.text for seg in segments_list])
transcription_time = time.time() - transcription_start
total_time = time.time() - start_time
print("✅ Transcription completed!")
print(f"📝 Text length: {len(full_text)} characters")
print(f"⏱️ Transcription time: {transcription_time:.1f} seconds")
print(f"⏱️ Total time (including model load): {total_time:.1f} seconds")
print(f"🎯 Language detected: {info.language}")
print(f"📊 Segments: {len(segments_list)}")
# Save to text file
output_file = Path("BAP_South_Meeting_Transcript.txt")
with open(output_file, "w", encoding="utf-8") as f:
f.write(f"BAP South Meeting - August 28, 2025\n")
f.write(f"Transcription completed at: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"Model: distil-large-v3\n")
f.write(f"Language: {info.language}\n")
f.write(f"Transcription time: {transcription_time:.1f} seconds\n")
f.write(f"Total time: {total_time:.1f} seconds\n")
f.write(f"Segments: {len(segments_list)}\n")
f.write("=" * 80 + "\n\n")
f.write(full_text)
print(f"💾 Transcript saved to: {output_file}")
# Also save as JSON for detailed analysis
import json
json_output = {
"text": full_text,
"segments": [
{
"start": seg.start,
"end": seg.end,
"text": seg.text,
"avg_logprob": seg.avg_logprob,
"no_speech_prob": seg.no_speech_prob
}
for seg in segments_list
],
"info": {
"language": info.language,
"language_probability": info.language_probability,
"all_language_probs": info.all_language_probs
},
"processing_time": transcription_time,
"total_time": total_time,
"model": "distil-large-v3",
"segments_count": len(segments_list)
}
json_file = Path("BAP_South_Meeting_Transcript.json")
with open(json_file, "w", encoding="utf-8") as f:
json.dump(json_output, f, indent=2, ensure_ascii=False)
print(f"📊 Detailed data saved to: {json_file}")
# Show first few segments as preview
print(f"\n📋 Preview (first 3 segments):")
for i, seg in enumerate(segments_list[:3]):
print(f" {i+1}. [{seg.start:.1f}s - {seg.end:.1f}s] {seg.text}")
if len(segments_list) > 3:
print(f" ... and {len(segments_list) - 3} more segments")
except Exception as e:
print(f"❌ Transcription failed: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()