trax/tests/test_batch_processor.py

"""
Unit tests for batch processing system.

Tests cover:
- Async worker pool functionality
- Queue management and priority handling
- Progress tracking and reporting
- Error recovery and retry logic
- Resource monitoring
- Task processing for different types
- Pause/resume functionality
"""

import asyncio
import pytest
from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
import tempfile
import os

from src.services.batch_processor import (
    BatchProcessor,
    BatchTask,
    BatchProgress,
    BatchResult,
    TaskType,
    create_batch_processor
)
from src.services.transcription_service import TranscriptionConfig


class TestBatchTask:
    """Test BatchTask dataclass functionality."""

    def test_batch_task_creation(self):
        """Test creating a batch task with all fields."""
        task = BatchTask(
            id="test_task_1",
            task_type=TaskType.TRANSCRIBE,
            data={"file_path": "/test/file.mp3"},
            priority=1,
            max_retries=5
        )

        assert task.id == "test_task_1"
        assert task.task_type == TaskType.TRANSCRIBE
        assert task.data["file_path"] == "/test/file.mp3"
        assert task.priority == 1
        assert task.max_retries == 5
        assert task.retry_count == 0
        assert task.created_at is not None
        assert task.started_at is None
        assert task.completed_at is None
        assert task.error is None
        assert task.result is None

    def test_batch_task_defaults(self):
        """Test batch task creation with default values."""
        task = BatchTask(
            id="test_task_2",
            task_type=TaskType.ENHANCE,
            data={"transcript_id": "123"}
        )

        assert task.priority == 0
        assert task.max_retries == 3
        assert task.retry_count == 0


class TestBatchProgress:
    """Test BatchProgress dataclass functionality."""

    def test_batch_progress_creation(self):
        """Test creating batch progress with initial values."""
        progress = BatchProgress(total_tasks=10)

        assert progress.total_tasks == 10
        assert progress.completed_tasks == 0
        assert progress.failed_tasks == 0
        assert progress.in_progress_tasks == 0
        assert progress.queued_tasks == 0
        assert progress.start_time is None
        assert progress.estimated_completion is None
        assert progress.current_worker_count == 0
        assert progress.memory_usage_mb == 0.0
        assert progress.cpu_usage_percent == 0.0

    def test_success_rate_calculation(self):
        """Test success rate calculation."""
        progress = BatchProgress(total_tasks=10)
        progress.completed_tasks = 7
        progress.failed_tasks = 2

        assert progress.success_rate == 70.0

    def test_success_rate_zero_total(self):
        """Test success rate with zero total tasks."""
        progress = BatchProgress(total_tasks=0)
        assert progress.success_rate == 0.0

    def test_failure_rate_calculation(self):
        """Test failure rate calculation."""
        progress = BatchProgress(total_tasks=10)
        progress.failed_tasks = 3

        assert progress.failure_rate == 30.0

    def test_elapsed_time_calculation(self):
        """Test elapsed time calculation."""
        start_time = datetime.now(timezone.utc)
        progress = BatchProgress(total_tasks=5)
        progress.start_time = start_time

        # Should be close to 0 since we just set it
        elapsed = progress.elapsed_time
        assert elapsed is not None
        assert elapsed >= 0.0
        assert elapsed < 1.0  # Should be very small

    def test_elapsed_time_no_start(self):
        """Test elapsed time when start_time is None."""
        progress = BatchProgress(total_tasks=5)
        assert progress.elapsed_time is None


class TestBatchResult:
    """Test BatchResult dataclass functionality."""

    def test_batch_result_creation(self):
        """Test creating batch result with all fields."""
        result = BatchResult(
            success_count=8,
            failure_count=2,
            total_count=10,
            results=[{"status": "completed"}],
            failures=[{"task_id": "1", "error": "test error"}],
            processing_time=120.5,
            memory_peak_mb=512.0,
            cpu_peak_percent=75.0,
            quality_metrics={"avg_accuracy": 95.5}
        )

        assert result.success_count == 8
        assert result.failure_count == 2
        assert result.total_count == 10
        assert len(result.results) == 1
        assert len(result.failures) == 1
        assert result.processing_time == 120.5
        assert result.memory_peak_mb == 512.0
        assert result.cpu_peak_percent == 75.0
        assert result.quality_metrics["avg_accuracy"] == 95.5

    def test_success_rate_calculation(self):
        """Test success rate calculation in batch result."""
        result = BatchResult(
            success_count=9,
            failure_count=1,
            total_count=10,
            results=[],
            failures=[],
            processing_time=0.0,
            memory_peak_mb=0.0,
            cpu_peak_percent=0.0,
            quality_metrics={}
        )

        assert result.success_rate == 90.0

    def test_success_rate_zero_total(self):
        """Test success rate with zero total count."""
        result = BatchResult(
            success_count=0,
            failure_count=0,
            total_count=0,
            results=[],
            failures=[],
            processing_time=0.0,
            memory_peak_mb=0.0,
            cpu_peak_percent=0.0,
            quality_metrics={}
        )

        assert result.success_rate == 0.0


class TestBatchProcessor:
    """Test BatchProcessor functionality."""

    @pytest.fixture
    def batch_processor(self):
        """Create a batch processor for testing."""
        return BatchProcessor(max_workers=2, progress_interval=0.1)

    @pytest.fixture
    def mock_services(self):
        """Mock all required services."""
        with patch('src.services.batch_processor.create_transcription_service') as mock_trans, \
             patch('src.services.batch_processor.create_enhancement_service') as mock_enhance, \
             patch('src.services.batch_processor.create_media_service') as mock_media, \
             patch('src.services.batch_processor.create_media_repository') as mock_repo:

            mock_trans.return_value = AsyncMock()
            mock_enhance.return_value = AsyncMock()
            mock_media.return_value = AsyncMock()
            mock_repo.return_value = AsyncMock()

            yield {
                'transcription': mock_trans.return_value,
                'enhancement': mock_enhance.return_value,
                'media': mock_media.return_value,
                'repository': mock_repo.return_value
            }

    @pytest.mark.asyncio
    async def test_batch_processor_initialization(self, batch_processor):
        """Test batch processor initialization."""
        assert batch_processor.max_workers == 2
        assert batch_processor.progress_interval == 0.1
        assert not batch_processor.running
        assert not batch_processor.paused
        assert not batch_processor.stopped
        assert batch_processor.progress.total_tasks == 0
        assert len(batch_processor.workers) == 0

    @pytest.mark.asyncio
    async def test_add_task(self, batch_processor):
        """Test adding tasks to the queue."""
        task_id = await batch_processor.add_task(
            TaskType.TRANSCRIBE,
            {"file_path": "/test/file.mp3"},
            priority=1
        )

        assert task_id.startswith("task_1_transcribe")
        assert batch_processor.progress.total_tasks == 1
        assert batch_processor.progress.queued_tasks == 1
        assert not batch_processor.task_queue.empty()

    @pytest.mark.asyncio
    async def test_add_multiple_tasks(self, batch_processor):
        """Test adding multiple tasks with different priorities."""
        # Add tasks with different priorities
        await batch_processor.add_task(TaskType.TRANSCRIBE, {"file": "1.mp3"}, priority=2)
        await batch_processor.add_task(TaskType.ENHANCE, {"id": "123"}, priority=1)
        await batch_processor.add_task(TaskType.YOUTUBE, {"url": "test.com"}, priority=0)

        assert batch_processor.progress.total_tasks == 3
        assert batch_processor.progress.queued_tasks == 3

        # Check that tasks are ordered by priority (lower = higher priority)
        tasks = []
        while not batch_processor.task_queue.empty():
            priority, task = await batch_processor.task_queue.get()
            tasks.append((priority, task.task_type))

        # Should be ordered by priority (0, 1, 2)
        assert tasks[0][0] == 0  # YouTube task
        assert tasks[1][0] == 1  # Enhance task
        assert tasks[2][0] == 2  # Transcribe task

    @pytest.mark.asyncio
    async def test_initialize_services(self, batch_processor, mock_services):
        """Test service initialization."""
        await batch_processor._initialize_services()

        assert batch_processor.transcription_service is not None
        assert batch_processor.enhancement_service is not None
        assert batch_processor.media_service is not None

        # Verify services were initialized
        mock_services['transcription'].initialize.assert_called_once()

    @pytest.mark.asyncio
    async def test_process_transcription_task(self, batch_processor, mock_services):
        """Test processing a transcription task."""
        await batch_processor._initialize_services()

        task = BatchTask(
            id="test_task",
            task_type=TaskType.TRANSCRIBE,
            data={
                "file_path": "/test/file.mp3",
                "config": {"model": "whisper-1"}
            }
        )

        # Mock transcription result
        mock_result = MagicMock()
        mock_result.text_content = "Test transcript"
        mock_result.segments = [{"text": "Test", "start": 0, "end": 1}]
        mock_result.accuracy = 95.5
        mock_result.processing_time = 10.0
        mock_result.quality_warnings = []

        mock_services['transcription'].transcribe_file.return_value = mock_result

        result = await batch_processor._process_transcription(task)

        assert result["status"] == "completed"
        assert result["file_path"] == "/test/file.mp3"
        assert result["transcript"] == "Test transcript"
        assert result["accuracy"] == 95.5
        assert result["processing_time"] == 10.0

        mock_services['transcription'].transcribe_file.assert_called_once()

    @pytest.mark.asyncio
    async def test_process_enhancement_task(self, batch_processor, mock_services):
        """Test processing an enhancement task."""
        await batch_processor._initialize_services()

        task = BatchTask(
            id="test_task",
            task_type=TaskType.ENHANCE,
            data={"transcript_id": "123"}
        )

        # Mock enhancement result
        mock_result = MagicMock()
        mock_result.enhanced_content = "Enhanced transcript"
        mock_result.accuracy_improvement = 2.5
        mock_result.processing_time = 5.0

        mock_services['enhancement'].enhance_transcript.return_value = mock_result

        result = await batch_processor._process_enhancement(task)

        assert result["status"] == "completed"
        assert result["transcript_id"] == "123"
        assert result["enhanced_content"] == "Enhanced transcript"
        assert result["accuracy_improvement"] == 2.5

        mock_services['enhancement'].enhance_transcript.assert_called_once_with("123")

    @pytest.mark.asyncio
    async def test_task_retry_on_failure(self, batch_processor, mock_services):
        """Test task retry mechanism on failure."""
        await batch_processor._initialize_services()

        task = BatchTask(
            id="test_task",
            task_type=TaskType.TRANSCRIBE,
            data={"file_path": "/test/file.mp3"},
            max_retries=2
        )

        # Mock service to fail twice, then succeed
        mock_services['transcription'].transcribe_file.side_effect = [
            Exception("First failure"),
            Exception("Second failure"),
            MagicMock(text_content="Success", segments=[], accuracy=95.0, processing_time=10.0, quality_warnings=[])
        ]

        # First attempt should fail and retry
        result1 = await batch_processor._process_transcription(task)
        assert result1["status"] == "retrying"
        assert result1["retry_count"] == 1

        # Second attempt should fail and retry
        result2 = await batch_processor._process_transcription(task)
        assert result2["status"] == "retrying"
        assert result2["retry_count"] == 2

        # Third attempt should succeed
        result3 = await batch_processor._process_transcription(task)
        assert result3["status"] == "completed"

    @pytest.mark.asyncio
    async def test_task_permanent_failure(self, batch_processor, mock_services):
        """Test task permanent failure after max retries."""
        await batch_processor._initialize_services()

        task = BatchTask(
            id="test_task",
            task_type=TaskType.TRANSCRIBE,
            data={"file_path": "/test/file.mp3"},
            max_retries=1
        )

        # Mock service to always fail
        mock_services['transcription'].transcribe_file.side_effect = Exception("Permanent failure")

        # First attempt should retry
        result1 = await batch_processor._process_transcription(task)
        assert result1["status"] == "retrying"

        # Second attempt should fail permanently
        result2 = await batch_processor._process_transcription(task)
        assert result2["status"] == "failed"
        assert "Permanent failure" in result2["error"]

        # Task should be in failed tasks list
        assert len(batch_processor.failed_tasks) == 1
        assert batch_processor.failed_tasks[0].id == "test_task"

    @pytest.mark.asyncio
    async def test_pause_resume_functionality(self, batch_processor):
        """Test pause and resume functionality."""
        assert not batch_processor.paused

        # Pause when not running should do nothing
        await batch_processor.pause()
        assert not batch_processor.paused

        # Start the processor
        batch_processor.running = True

        # Pause
        await batch_processor.pause()
        assert batch_processor.paused

        # Resume
        await batch_processor.resume()
        assert not batch_processor.paused

    @pytest.mark.asyncio
    async def test_stop_functionality(self, batch_processor):
        """Test stop functionality."""
        assert not batch_processor.stopped

        await batch_processor.stop()
        assert batch_processor.stopped
        assert not batch_processor.running

    @pytest.mark.asyncio
    async def test_get_progress(self, batch_processor):
        """Test getting current progress."""
        progress = batch_processor.get_progress()

        assert isinstance(progress, BatchProgress)
        assert progress.total_tasks == 0
        assert progress.completed_tasks == 0
        assert progress.failed_tasks == 0

    @pytest.mark.asyncio
    async def test_simple_batch_processing(self, batch_processor, mock_services):
        """Test simple batch processing with one task."""
        await batch_processor._initialize_services()

        # Add a task
        await batch_processor.add_task(
            TaskType.TRANSCRIBE,
            {"file_path": "/test/file.mp3"}
        )

        # Mock successful transcription
        mock_result = MagicMock()
        mock_result.text_content = "Test transcript"
        mock_result.segments = []
        mock_result.accuracy = 95.0
        mock_result.processing_time = 10.0
        mock_result.quality_warnings = []

        mock_services['transcription'].transcribe_file.return_value = mock_result

        # Start processing
        result = await batch_processor.start()

        assert result.success_count == 1
        assert result.failure_count == 0
        assert result.total_count == 1
        assert result.success_rate == 100.0
        assert len(result.results) == 1
        assert len(result.failures) == 0


class TestCreateBatchProcessor:
    """Test batch processor factory function."""

    def test_create_batch_processor_defaults(self):
        """Test creating batch processor with default parameters."""
        processor = create_batch_processor()

        assert processor.max_workers == 8
        assert processor.queue_size == 1000
        assert processor.progress_interval == 5.0
        assert processor.memory_limit_mb == 2048.0
        assert processor.cpu_limit_percent == 90.0

    def test_create_batch_processor_custom(self):
        """Test creating batch processor with custom parameters."""
        processor = create_batch_processor(
            max_workers=4,
            queue_size=500,
            progress_interval=2.0,
            memory_limit_mb=1024.0,
            cpu_limit_percent=80.0
        )

        assert processor.max_workers == 4
        assert processor.queue_size == 500
        assert processor.progress_interval == 2.0
        assert processor.memory_limit_mb == 1024.0
        assert processor.cpu_limit_percent == 80.0


class TestBatchProcessorIntegration:
    """Integration tests for batch processor."""

    @pytest.mark.asyncio
    async def test_multiple_task_types(self):
        """Test processing multiple different task types."""
        processor = BatchProcessor(max_workers=2, progress_interval=0.1)

        # Mock services
        with patch('src.services.batch_processor.create_transcription_service') as mock_trans, \
             patch('src.services.batch_processor.create_enhancement_service') as mock_enhance, \
             patch('src.services.batch_processor.create_media_service') as mock_media, \
             patch('src.services.batch_processor.create_media_repository') as mock_repo:

            mock_trans.return_value = AsyncMock()
            mock_enhance.return_value = AsyncMock()
            mock_media.return_value = AsyncMock()
            mock_repo.return_value = AsyncMock()

            # Mock results
            mock_trans.return_value.transcribe_file.return_value = MagicMock(
                text_content="Transcript", segments=[], accuracy=95.0, processing_time=10.0, quality_warnings=[]
            )
            mock_enhance.return_value.enhance_transcript.return_value = MagicMock(
                enhanced_content="Enhanced", accuracy_improvement=2.0, processing_time=5.0
            )
            mock_media.return_value.download_media.return_value = MagicMock(
                file_path=Path("/test/file.mp3"), file_size=1024, duration=60.0
            )

            # Add different types of tasks
            await processor.add_task(TaskType.TRANSCRIBE, {"file_path": "/test1.mp3"})
            await processor.add_task(TaskType.ENHANCE, {"transcript_id": "123"})
            await processor.add_task(TaskType.DOWNLOAD, {"url": "https://test.com"})

            # Process all tasks
            result = await processor.start()

            assert result.success_count == 3
            assert result.failure_count == 0
            assert result.total_count == 3
            assert result.success_rate == 100.0

    @pytest.mark.asyncio
    async def test_progress_callback(self):
        """Test progress callback functionality."""
        processor = BatchProcessor(max_workers=1, progress_interval=0.1)

        progress_updates = []

        def progress_callback(progress: BatchProgress):
            progress_updates.append(progress)

        # Mock services
        with patch('src.services.batch_processor.create_transcription_service') as mock_trans, \
             patch('src.services.batch_processor.create_enhancement_service') as mock_enhance, \
             patch('src.services.batch_processor.create_media_service') as mock_media, \
             patch('src.services.batch_processor.create_media_repository') as mock_repo:

            mock_trans.return_value = AsyncMock()
            mock_enhance.return_value = AsyncMock()
            mock_media.return_value = AsyncMock()
            mock_repo.return_value = AsyncMock()

            mock_trans.return_value.transcribe_file.return_value = MagicMock(
                text_content="Test", segments=[], accuracy=95.0, processing_time=10.0, quality_warnings=[]
            )

            # Add a task
            await processor.add_task(TaskType.TRANSCRIBE, {"file_path": "/test.mp3"})

            # Process with callback
            result = await processor.start(progress_callback=progress_callback)

            # Should have received progress updates
            assert len(progress_updates) > 0

            # Check final progress
            final_progress = progress_updates[-1]
            assert final_progress.total_tasks == 1
            assert final_progress.completed_tasks == 1
            assert final_progress.failed_tasks == 0
            assert final_progress.success_rate == 100.0