"""Tests for MultiPassTranscriptionPipeline first pass (subtask 7.1).""" from __future__ import annotations from pathlib import Path import pytest from src.services.multi_pass_transcription import MultiPassTranscriptionPipeline from src.services.model_manager import ModelManager FIXTURES_DIR = Path(__file__).parent / "fixtures" / "audio" @pytest.mark.parametrize( "audio_name", [ "sample_5s.wav", "sample_noisy.wav", "sample_multi.wav", "sample_30s.mp3", "sample_tech.mp3", ], ) def test_first_pass_segments_exist_and_have_fields(audio_name: str): audio_path = FIXTURES_DIR / audio_name assert audio_path.exists(), f"Missing fixture audio: {audio_path}" # Skip if fixture is a placeholder text file (not real audio) try: with open(audio_path, "r", encoding="utf-8") as f: first_line = f.readline().strip() if first_line.startswith("# Test audio placeholder") or first_line.startswith("# Placeholder"): pytest.skip("Placeholder audio fixture; skipping decode-dependent test") except UnicodeDecodeError: # Binary/real audio: proceed pass pipeline = MultiPassTranscriptionPipeline(ModelManager()) segments = pipeline._perform_first_pass(audio_path) # Basic structure checks assert isinstance(segments, list) if segments: # Some very short/no-speech clips may produce 0 segments for seg in segments: assert set(seg.keys()) >= {"start", "end", "text"} assert isinstance(seg["start"], float) assert isinstance(seg["end"], float) assert isinstance(seg["text"], str) def test_first_pass_handles_missing_file(tmp_path: Path): missing = tmp_path / "does_not_exist.wav" pipeline = MultiPassTranscriptionPipeline(ModelManager()) with pytest.raises(FileNotFoundError): pipeline._perform_first_pass(missing)