trax/tests/test_transcription_integrat...

148 lines
5.7 KiB
Python

"""Integration tests for transcription service interactions."""
import pytest
from pathlib import Path
from src.services.mocks import create_mock_transcription_service
from src.services.protocols import TranscriptionServiceProtocol, TranscriptionConfig
class TestTranscriptionServiceIntegration:
"""Test transcription service interactions and workflows."""
@pytest.fixture
def transcription_service(self):
"""Create mock transcription service for testing."""
return create_mock_transcription_service()
@pytest.mark.asyncio
async def test_audio_transcription_workflow(self, transcription_service):
"""Test complete audio transcription workflow."""
audio_path = Path("/tmp/test_audio.wav")
result = await transcription_service.transcribe_audio(audio_path)
assert result.raw_content is not None
assert result.segments is not None
assert result.confidence_scores is not None
assert result.accuracy_estimate > 0.8
assert result.word_count > 0
assert result.processing_time_ms > 0
assert result.model_used == "whisper-1"
@pytest.mark.asyncio
async def test_file_transcription_workflow(self, transcription_service):
"""Test file transcription workflow."""
# Create mock media file
mock_media_file = type('MockMediaFile', (), {
'id': 'test-media-id',
'file_path': '/tmp/test_audio.wav'
})()
result = await transcription_service.transcribe_file(mock_media_file)
assert result.raw_content is not None
assert result.segments is not None
assert result.confidence_scores is not None
assert result.accuracy_estimate > 0.8
assert result.word_count > 0
assert result.processing_time_ms > 0
@pytest.mark.asyncio
async def test_transcription_job_management(self, transcription_service):
"""Test transcription job creation and management."""
# Create mock media file
mock_media_file = type('MockMediaFile', (), {
'id': 'test-media-id',
'file_path': '/tmp/test_audio.wav'
})()
# Test job creation
job = await transcription_service.create_transcription_job(mock_media_file)
assert job is not None
assert hasattr(job, 'id')
assert hasattr(job, 'media_file_id')
assert hasattr(job, 'status')
assert hasattr(job, 'config')
assert hasattr(job, 'created_at')
# Test job status retrieval
status = await transcription_service.get_job_status(job.id)
assert status == "completed"
# Test job cancellation
cancelled = await transcription_service.cancel_job(job.id)
assert cancelled is True
@pytest.mark.asyncio
async def test_transcription_with_custom_config(self, transcription_service):
"""Test transcription with custom configuration."""
audio_path = Path("/tmp/test_audio.wav")
custom_config = TranscriptionConfig(
model="whisper-large-v3",
language="en",
task="transcribe",
temperature=0.0
)
result = await transcription_service.transcribe_audio(audio_path, custom_config)
assert result.raw_content is not None
assert result.model_used == "whisper-large-v3"
assert result.accuracy_estimate > 0.8
@pytest.mark.asyncio
async def test_transcription_result_structure(self, transcription_service):
"""Test that transcription results have correct structure."""
audio_path = Path("/tmp/test_audio.wav")
result = await transcription_service.transcribe_audio(audio_path)
# Check segments structure
assert isinstance(result.segments, list)
assert len(result.segments) > 0
for segment in result.segments:
assert "start" in segment
assert "end" in segment
assert "text" in segment
assert "confidence" in segment
assert isinstance(segment["start"], (int, float))
assert isinstance(segment["end"], (int, float))
assert isinstance(segment["text"], str)
assert isinstance(segment["confidence"], (int, float))
# Check confidence scores
assert isinstance(result.confidence_scores, list)
assert len(result.confidence_scores) == len(result.segments)
for score in result.confidence_scores:
assert isinstance(score, (int, float))
assert 0 <= score <= 1
@pytest.mark.asyncio
async def test_transcription_accuracy_metrics(self, transcription_service):
"""Test transcription accuracy and quality metrics."""
audio_path = Path("/tmp/test_audio.wav")
result = await transcription_service.transcribe_audio(audio_path)
# Check accuracy estimate
assert 0 <= result.accuracy_estimate <= 1
assert result.accuracy_estimate > 0.8 # Mock service should provide good accuracy
# Check word count
assert result.word_count > 0
assert isinstance(result.word_count, int)
# Check processing time
assert result.processing_time_ms > 0
assert isinstance(result.processing_time_ms, int)
@pytest.mark.asyncio
async def test_service_protocol_compliance(self, transcription_service):
"""Test that transcription service properly implements its protocol."""
from src.services.protocols import validate_protocol_implementation, TranscriptionServiceProtocol
assert validate_protocol_implementation(transcription_service, TranscriptionServiceProtocol)
if __name__ == "__main__":
pytest.main([__file__])