120 lines
4.3 KiB
Python
120 lines
4.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Simple transcription script using faster-whisper directly."""
|
|
|
|
import time
|
|
from pathlib import Path
|
|
from faster_whisper import WhisperModel
|
|
|
|
def main():
|
|
"""Transcribe the BAP South meeting recording using faster-whisper directly."""
|
|
audio_file = Path("BAP_South_Meeting_Clean.wav")
|
|
|
|
if not audio_file.exists():
|
|
print(f"❌ Audio file not found: {audio_file}")
|
|
return
|
|
|
|
print(f"🎵 Transcribing: {audio_file.name}")
|
|
print(f"📁 File size: {audio_file.stat().st_size / (1024*1024):.1f} MB")
|
|
|
|
try:
|
|
print("🚀 Loading Whisper model (distil-large-v3)...")
|
|
start_time = time.time()
|
|
|
|
# Load the model directly
|
|
model = WhisperModel(
|
|
"distil-large-v3",
|
|
device="cpu",
|
|
compute_type="int8_float32"
|
|
)
|
|
|
|
model_load_time = time.time() - start_time
|
|
print(f"✅ Model loaded in {model_load_time:.1f} seconds")
|
|
|
|
print("🎯 Starting transcription...")
|
|
transcription_start = time.time()
|
|
|
|
# Transcribe the audio
|
|
segments, info = model.transcribe(
|
|
str(audio_file),
|
|
language=None, # Auto-detect
|
|
temperature=0.0, # Deterministic
|
|
beam_size=1,
|
|
best_of=1
|
|
)
|
|
|
|
# Convert generator to list and extract text
|
|
segments_list = list(segments)
|
|
full_text = " ".join([seg.text for seg in segments_list])
|
|
|
|
transcription_time = time.time() - transcription_start
|
|
total_time = time.time() - start_time
|
|
|
|
print("✅ Transcription completed!")
|
|
print(f"📝 Text length: {len(full_text)} characters")
|
|
print(f"⏱️ Transcription time: {transcription_time:.1f} seconds")
|
|
print(f"⏱️ Total time (including model load): {total_time:.1f} seconds")
|
|
print(f"🎯 Language detected: {info.language}")
|
|
print(f"📊 Segments: {len(segments_list)}")
|
|
|
|
# Save to text file
|
|
output_file = Path("BAP_South_Meeting_Transcript.txt")
|
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
f.write(f"BAP South Meeting - August 28, 2025\n")
|
|
f.write(f"Transcription completed at: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
f.write(f"Model: distil-large-v3\n")
|
|
f.write(f"Language: {info.language}\n")
|
|
f.write(f"Transcription time: {transcription_time:.1f} seconds\n")
|
|
f.write(f"Total time: {total_time:.1f} seconds\n")
|
|
f.write(f"Segments: {len(segments_list)}\n")
|
|
f.write("=" * 80 + "\n\n")
|
|
f.write(full_text)
|
|
|
|
print(f"💾 Transcript saved to: {output_file}")
|
|
|
|
# Also save as JSON for detailed analysis
|
|
import json
|
|
json_output = {
|
|
"text": full_text,
|
|
"segments": [
|
|
{
|
|
"start": seg.start,
|
|
"end": seg.end,
|
|
"text": seg.text,
|
|
"avg_logprob": seg.avg_logprob,
|
|
"no_speech_prob": seg.no_speech_prob
|
|
}
|
|
for seg in segments_list
|
|
],
|
|
"info": {
|
|
"language": info.language,
|
|
"language_probability": info.language_probability,
|
|
"all_language_probs": info.all_language_probs
|
|
},
|
|
"processing_time": transcription_time,
|
|
"total_time": total_time,
|
|
"model": "distil-large-v3",
|
|
"segments_count": len(segments_list)
|
|
}
|
|
|
|
json_file = Path("BAP_South_Meeting_Transcript.json")
|
|
with open(json_file, "w", encoding="utf-8") as f:
|
|
json.dump(json_output, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"📊 Detailed data saved to: {json_file}")
|
|
|
|
# Show first few segments as preview
|
|
print(f"\n📋 Preview (first 3 segments):")
|
|
for i, seg in enumerate(segments_list[:3]):
|
|
print(f" {i+1}. [{seg.start:.1f}s - {seg.end:.1f}s] {seg.text}")
|
|
|
|
if len(segments_list) > 3:
|
|
print(f" ... and {len(segments_list) - 3} more segments")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Transcription failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|