trax/tests/test_transcription_integrat...

"""Integration tests for transcription service interactions."""

import pytest
from pathlib import Path

from src.services.mocks import create_mock_transcription_service
from src.services.protocols import TranscriptionServiceProtocol, TranscriptionConfig


class TestTranscriptionServiceIntegration:
    """Test transcription service interactions and workflows."""

    @pytest.fixture
    def transcription_service(self):
        """Create mock transcription service for testing."""
        return create_mock_transcription_service()

    @pytest.mark.asyncio
    async def test_audio_transcription_workflow(self, transcription_service):
        """Test complete audio transcription workflow."""
        audio_path = Path("/tmp/test_audio.wav")
        result = await transcription_service.transcribe_audio(audio_path)

        assert result.raw_content is not None
        assert result.segments is not None
        assert result.confidence_scores is not None
        assert result.accuracy_estimate > 0.8
        assert result.word_count > 0
        assert result.processing_time_ms > 0
        assert result.model_used == "whisper-1"

    @pytest.mark.asyncio
    async def test_file_transcription_workflow(self, transcription_service):
        """Test file transcription workflow."""
        # Create mock media file
        mock_media_file = type('MockMediaFile', (), {
            'id': 'test-media-id',
            'file_path': '/tmp/test_audio.wav'
        })()

        result = await transcription_service.transcribe_file(mock_media_file)

        assert result.raw_content is not None
        assert result.segments is not None
        assert result.confidence_scores is not None
        assert result.accuracy_estimate > 0.8
        assert result.word_count > 0
        assert result.processing_time_ms > 0

    @pytest.mark.asyncio
    async def test_transcription_job_management(self, transcription_service):
        """Test transcription job creation and management."""
        # Create mock media file
        mock_media_file = type('MockMediaFile', (), {
            'id': 'test-media-id',
            'file_path': '/tmp/test_audio.wav'
        })()

        # Test job creation
        job = await transcription_service.create_transcription_job(mock_media_file)
        assert job is not None
        assert hasattr(job, 'id')
        assert hasattr(job, 'media_file_id')
        assert hasattr(job, 'status')
        assert hasattr(job, 'config')
        assert hasattr(job, 'created_at')

        # Test job status retrieval
        status = await transcription_service.get_job_status(job.id)
        assert status == "completed"

        # Test job cancellation
        cancelled = await transcription_service.cancel_job(job.id)
        assert cancelled is True

    @pytest.mark.asyncio
    async def test_transcription_with_custom_config(self, transcription_service):
        """Test transcription with custom configuration."""
        audio_path = Path("/tmp/test_audio.wav")
        custom_config = TranscriptionConfig(
            model="whisper-large-v3",
            language="en",
            task="transcribe",
            temperature=0.0
        )

        result = await transcription_service.transcribe_audio(audio_path, custom_config)

        assert result.raw_content is not None
        assert result.model_used == "whisper-large-v3"
        assert result.accuracy_estimate > 0.8

    @pytest.mark.asyncio
    async def test_transcription_result_structure(self, transcription_service):
        """Test that transcription results have correct structure."""
        audio_path = Path("/tmp/test_audio.wav")
        result = await transcription_service.transcribe_audio(audio_path)

        # Check segments structure
        assert isinstance(result.segments, list)
        assert len(result.segments) > 0

        for segment in result.segments:
            assert "start" in segment
            assert "end" in segment
            assert "text" in segment
            assert "confidence" in segment
            assert isinstance(segment["start"], (int, float))
            assert isinstance(segment["end"], (int, float))
            assert isinstance(segment["text"], str)
            assert isinstance(segment["confidence"], (int, float))

        # Check confidence scores
        assert isinstance(result.confidence_scores, list)
        assert len(result.confidence_scores) == len(result.segments)
        for score in result.confidence_scores:
            assert isinstance(score, (int, float))
            assert 0 <= score <= 1

    @pytest.mark.asyncio
    async def test_transcription_accuracy_metrics(self, transcription_service):
        """Test transcription accuracy and quality metrics."""
        audio_path = Path("/tmp/test_audio.wav")
        result = await transcription_service.transcribe_audio(audio_path)

        # Check accuracy estimate
        assert 0 <= result.accuracy_estimate <= 1
        assert result.accuracy_estimate > 0.8  # Mock service should provide good accuracy

        # Check word count
        assert result.word_count > 0
        assert isinstance(result.word_count, int)

        # Check processing time
        assert result.processing_time_ms > 0
        assert isinstance(result.processing_time_ms, int)

    @pytest.mark.asyncio
    async def test_service_protocol_compliance(self, transcription_service):
        """Test that transcription service properly implements its protocol."""
        from src.services.protocols import validate_protocol_implementation, TranscriptionServiceProtocol

        assert validate_protocol_implementation(transcription_service, TranscriptionServiceProtocol)


if __name__ == "__main__":
    pytest.main([__file__])