60 lines
1.9 KiB
Python
60 lines
1.9 KiB
Python
"""Tests for MultiPassTranscriptionPipeline first pass (subtask 7.1)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from src.services.multi_pass_transcription import MultiPassTranscriptionPipeline
|
|
from src.services.model_manager import ModelManager
|
|
|
|
|
|
FIXTURES_DIR = Path(__file__).parent / "fixtures" / "audio"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"audio_name",
|
|
[
|
|
"sample_5s.wav",
|
|
"sample_noisy.wav",
|
|
"sample_multi.wav",
|
|
"sample_30s.mp3",
|
|
"sample_tech.mp3",
|
|
],
|
|
)
|
|
def test_first_pass_segments_exist_and_have_fields(audio_name: str):
|
|
audio_path = FIXTURES_DIR / audio_name
|
|
assert audio_path.exists(), f"Missing fixture audio: {audio_path}"
|
|
|
|
# Skip if fixture is a placeholder text file (not real audio)
|
|
try:
|
|
with open(audio_path, "r", encoding="utf-8") as f:
|
|
first_line = f.readline().strip()
|
|
if first_line.startswith("# Test audio placeholder") or first_line.startswith("# Placeholder"):
|
|
pytest.skip("Placeholder audio fixture; skipping decode-dependent test")
|
|
except UnicodeDecodeError:
|
|
# Binary/real audio: proceed
|
|
pass
|
|
|
|
pipeline = MultiPassTranscriptionPipeline(ModelManager())
|
|
segments = pipeline._perform_first_pass(audio_path)
|
|
|
|
# Basic structure checks
|
|
assert isinstance(segments, list)
|
|
if segments: # Some very short/no-speech clips may produce 0 segments
|
|
for seg in segments:
|
|
assert set(seg.keys()) >= {"start", "end", "text"}
|
|
assert isinstance(seg["start"], float)
|
|
assert isinstance(seg["end"], float)
|
|
assert isinstance(seg["text"], str)
|
|
|
|
|
|
def test_first_pass_handles_missing_file(tmp_path: Path):
|
|
missing = tmp_path / "does_not_exist.wav"
|
|
pipeline = MultiPassTranscriptionPipeline(ModelManager())
|
|
with pytest.raises(FileNotFoundError):
|
|
pipeline._perform_first_pass(missing)
|
|
|
|
|