148 lines
5.7 KiB
Python
148 lines
5.7 KiB
Python
"""Integration tests for transcription service interactions."""
|
|
|
|
import pytest
|
|
from pathlib import Path
|
|
|
|
from src.services.mocks import create_mock_transcription_service
|
|
from src.services.protocols import TranscriptionServiceProtocol, TranscriptionConfig
|
|
|
|
|
|
class TestTranscriptionServiceIntegration:
|
|
"""Test transcription service interactions and workflows."""
|
|
|
|
@pytest.fixture
|
|
def transcription_service(self):
|
|
"""Create mock transcription service for testing."""
|
|
return create_mock_transcription_service()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_audio_transcription_workflow(self, transcription_service):
|
|
"""Test complete audio transcription workflow."""
|
|
audio_path = Path("/tmp/test_audio.wav")
|
|
result = await transcription_service.transcribe_audio(audio_path)
|
|
|
|
assert result.raw_content is not None
|
|
assert result.segments is not None
|
|
assert result.confidence_scores is not None
|
|
assert result.accuracy_estimate > 0.8
|
|
assert result.word_count > 0
|
|
assert result.processing_time_ms > 0
|
|
assert result.model_used == "whisper-1"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_file_transcription_workflow(self, transcription_service):
|
|
"""Test file transcription workflow."""
|
|
# Create mock media file
|
|
mock_media_file = type('MockMediaFile', (), {
|
|
'id': 'test-media-id',
|
|
'file_path': '/tmp/test_audio.wav'
|
|
})()
|
|
|
|
result = await transcription_service.transcribe_file(mock_media_file)
|
|
|
|
assert result.raw_content is not None
|
|
assert result.segments is not None
|
|
assert result.confidence_scores is not None
|
|
assert result.accuracy_estimate > 0.8
|
|
assert result.word_count > 0
|
|
assert result.processing_time_ms > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_transcription_job_management(self, transcription_service):
|
|
"""Test transcription job creation and management."""
|
|
# Create mock media file
|
|
mock_media_file = type('MockMediaFile', (), {
|
|
'id': 'test-media-id',
|
|
'file_path': '/tmp/test_audio.wav'
|
|
})()
|
|
|
|
# Test job creation
|
|
job = await transcription_service.create_transcription_job(mock_media_file)
|
|
assert job is not None
|
|
assert hasattr(job, 'id')
|
|
assert hasattr(job, 'media_file_id')
|
|
assert hasattr(job, 'status')
|
|
assert hasattr(job, 'config')
|
|
assert hasattr(job, 'created_at')
|
|
|
|
# Test job status retrieval
|
|
status = await transcription_service.get_job_status(job.id)
|
|
assert status == "completed"
|
|
|
|
# Test job cancellation
|
|
cancelled = await transcription_service.cancel_job(job.id)
|
|
assert cancelled is True
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_transcription_with_custom_config(self, transcription_service):
|
|
"""Test transcription with custom configuration."""
|
|
audio_path = Path("/tmp/test_audio.wav")
|
|
custom_config = TranscriptionConfig(
|
|
model="whisper-large-v3",
|
|
language="en",
|
|
task="transcribe",
|
|
temperature=0.0
|
|
)
|
|
|
|
result = await transcription_service.transcribe_audio(audio_path, custom_config)
|
|
|
|
assert result.raw_content is not None
|
|
assert result.model_used == "whisper-large-v3"
|
|
assert result.accuracy_estimate > 0.8
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_transcription_result_structure(self, transcription_service):
|
|
"""Test that transcription results have correct structure."""
|
|
audio_path = Path("/tmp/test_audio.wav")
|
|
result = await transcription_service.transcribe_audio(audio_path)
|
|
|
|
# Check segments structure
|
|
assert isinstance(result.segments, list)
|
|
assert len(result.segments) > 0
|
|
|
|
for segment in result.segments:
|
|
assert "start" in segment
|
|
assert "end" in segment
|
|
assert "text" in segment
|
|
assert "confidence" in segment
|
|
assert isinstance(segment["start"], (int, float))
|
|
assert isinstance(segment["end"], (int, float))
|
|
assert isinstance(segment["text"], str)
|
|
assert isinstance(segment["confidence"], (int, float))
|
|
|
|
# Check confidence scores
|
|
assert isinstance(result.confidence_scores, list)
|
|
assert len(result.confidence_scores) == len(result.segments)
|
|
for score in result.confidence_scores:
|
|
assert isinstance(score, (int, float))
|
|
assert 0 <= score <= 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_transcription_accuracy_metrics(self, transcription_service):
|
|
"""Test transcription accuracy and quality metrics."""
|
|
audio_path = Path("/tmp/test_audio.wav")
|
|
result = await transcription_service.transcribe_audio(audio_path)
|
|
|
|
# Check accuracy estimate
|
|
assert 0 <= result.accuracy_estimate <= 1
|
|
assert result.accuracy_estimate > 0.8 # Mock service should provide good accuracy
|
|
|
|
# Check word count
|
|
assert result.word_count > 0
|
|
assert isinstance(result.word_count, int)
|
|
|
|
# Check processing time
|
|
assert result.processing_time_ms > 0
|
|
assert isinstance(result.processing_time_ms, int)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_service_protocol_compliance(self, transcription_service):
|
|
"""Test that transcription service properly implements its protocol."""
|
|
from src.services.protocols import validate_protocol_implementation, TranscriptionServiceProtocol
|
|
|
|
assert validate_protocol_implementation(transcription_service, TranscriptionServiceProtocol)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__])
|