trax/tests/test_parallel_processing.py

#!/usr/bin/env python3
"""
Test Parallel Chunk Processing for M3 Transcription Optimization.

Following TDD principles - tests written BEFORE implementation.
These tests define the expected behavior of the parallel processing system.
"""

import pytest
import asyncio
import time
import numpy as np
from pathlib import Path
from typing import List, Dict
from unittest.mock import MagicMock, AsyncMock, patch

# Import the classes we will implement
from src.services.parallel_transcription import (
    ParallelTranscriber,
    TranscriptionResult,
    ChunkResult
)


class TestParallelProcessing:
    """Test suite for parallel chunk processing - 2-4x speed improvement."""

    @pytest.fixture
    def sample_audio_30s(self):
        """Real 30-second audio file for testing."""
        return Path("tests/fixtures/audio/sample_30s.wav")

    @pytest.fixture
    def sample_audio_2m(self):
        """Real 2-minute audio file for testing."""
        return Path("tests/fixtures/audio/sample_2m.wav")

    @pytest.fixture
    def sample_audio_5m(self):
        """Real 5-minute audio file for testing."""
        return Path("tests/fixtures/audio/sample_5m.wav")

    @pytest.fixture
    def mock_whisper_model(self):
        """Mock Whisper model for testing without actual ML inference."""
        model = MagicMock()
        model.transcribe = MagicMock(return_value={"text": "Test transcription"})
        return model

    @pytest.mark.asyncio
    async def test_parallel_faster_than_sequential(self, sample_audio_2m):
        """Test that parallel processing is 2-4x faster than sequential."""
        transcriber = ParallelTranscriber(max_workers=4, chunk_size_seconds=30)

        # Measure sequential processing time
        start = time.time()
        seq_result = await transcriber.transcribe_sequential(sample_audio_2m)
        sequential_time = time.time() - start

        # Measure parallel processing time
        start = time.time()
        par_result = await transcriber.transcribe_parallel(sample_audio_2m)
        parallel_time = time.time() - start

        # Assertions
        assert seq_result.text == par_result.text  # Same output
        assert parallel_time < sequential_time * 0.5  # At least 2x faster
        assert len(par_result.chunks) >= 4  # Used multiple chunks
        assert par_result.speedup_factor >= 2.0  # Documented speedup

    @pytest.mark.asyncio
    async def test_chunk_splitting_logic(self):
        """Test audio is correctly split into overlapping chunks."""
        transcriber = ParallelTranscriber(
            max_workers=4,
            chunk_size_seconds=30,
            overlap_seconds=2
        )

        # Create synthetic 2-minute audio (120 seconds)
        sample_rate = 16000
        duration = 120
        audio_array = np.random.randn(sample_rate * duration).astype(np.float32)

        chunks = await transcriber._split_audio(audio_array, sample_rate)

        # Verify chunk properties
        assert len(chunks) > 1  # Multiple chunks created

        for i, chunk in enumerate(chunks):
            assert "audio" in chunk
            assert "start_time" in chunk
            assert "end_time" in chunk
            assert "chunk_id" in chunk

            # Check chunk duration (except last chunk)
            if i < len(chunks) - 1:
                duration = chunk["end_time"] - chunk["start_time"]
                assert 28 <= duration <= 30  # Approximately chunk_size_seconds

            # Check overlap with next chunk
            if i < len(chunks) - 1:
                next_chunk = chunks[i + 1]
                overlap = chunk["end_time"] - next_chunk["start_time"]
                assert 1.5 <= overlap <= 2.5  # Approximately overlap_seconds

    @pytest.mark.asyncio
    async def test_chunk_merging_handles_overlaps(self):
        """Test that overlapping transcriptions are merged correctly."""
        transcriber = ParallelTranscriber()

        # Create overlapping chunk results
        chunks = [
            ChunkResult(
                text="This is the first chunk of text.",
                start_time=0.0,
                end_time=10.0,
                chunk_id=0
            ),
            ChunkResult(
                text="chunk of text. This is the second",
                start_time=8.0,
                end_time=18.0,
                chunk_id=1
            ),
            ChunkResult(
                text="the second chunk with more content.",
                start_time=16.0,
                end_time=26.0,
                chunk_id=2
            )
        ]

        merged_text = await transcriber._merge_transcriptions(chunks)

        # Should intelligently merge overlapping text
        expected = "This is the first chunk of text. This is the second chunk with more content."
        assert merged_text == expected

    @pytest.mark.asyncio
    async def test_semaphore_limits_concurrent_workers(self):
        """Test that semaphore properly limits concurrent processing."""
        max_workers = 2
        transcriber = ParallelTranscriber(max_workers=max_workers)

        # Track concurrent executions
        concurrent_count = 0
        max_concurrent = 0
        lock = asyncio.Lock()

        async def mock_process_chunk(chunk):
            nonlocal concurrent_count, max_concurrent
            async with lock:
                concurrent_count += 1
                max_concurrent = max(max_concurrent, concurrent_count)

            await asyncio.sleep(0.1)  # Simulate processing

            async with lock:
                concurrent_count -= 1

            return ChunkResult(
                text=f"Chunk {chunk['chunk_id']}",
                start_time=chunk["start_time"],
                end_time=chunk["end_time"],
                chunk_id=chunk["chunk_id"]
            )

        # Replace process method with mock
        transcriber._process_chunk = mock_process_chunk

        # Create multiple chunks
        chunks = [{"chunk_id": i, "start_time": i*10, "end_time": (i+1)*10}
                  for i in range(6)]

        # Process chunks
        await asyncio.gather(*[transcriber._process_chunk(c) for c in chunks])

        # Verify max concurrent never exceeded limit
        assert max_concurrent <= max_workers

    @pytest.mark.asyncio
    async def test_memory_usage_under_2gb(self, sample_audio_5m):
        """Test that memory usage stays under 2GB target."""
        import psutil
        import gc

        gc.collect()
        process = psutil.Process()
        baseline_memory = process.memory_info().rss / (1024 * 1024)  # MB

        transcriber = ParallelTranscriber(max_workers=4)
        result = await transcriber.transcribe_parallel(sample_audio_5m)

        peak_memory = process.memory_info().rss / (1024 * 1024)  # MB
        memory_used = peak_memory - baseline_memory

        # Should stay well under 2GB (2048 MB)
        assert memory_used < 2048
        assert result.memory_usage_mb < 2048

    @pytest.mark.asyncio
    async def test_handles_chunk_failures_gracefully(self):
        """Test error handling when a chunk fails to process."""
        transcriber = ParallelTranscriber(max_workers=2)

        # Mock process to fail on specific chunks
        async def mock_process(chunk):
            if chunk["chunk_id"] == 2:
                raise Exception("Processing failed for chunk 2")
            return ChunkResult(
                text=f"Chunk {chunk['chunk_id']}",
                start_time=chunk["start_time"],
                end_time=chunk["end_time"],
                chunk_id=chunk["chunk_id"]
            )

        transcriber._process_chunk = mock_process

        chunks = [{"chunk_id": i, "start_time": i*10, "end_time": (i+1)*10}
                  for i in range(4)]

        # Should handle failure and continue with other chunks
        results = await transcriber._process_chunks_parallel(chunks)

        assert len(results) == 3  # One chunk failed
        assert all(r.chunk_id != 2 for r in results)  # Chunk 2 missing

    @pytest.mark.asyncio
    async def test_adaptive_chunk_sizing(self, sample_audio_2m):
        """Test that chunk size adapts based on audio characteristics."""
        # Short audio should use smaller chunks
        short_transcriber = ParallelTranscriber(adaptive_chunking=True)
        short_chunks = await short_transcriber._determine_chunk_size(
            duration_seconds=30
        )
        assert short_chunks <= 15  # Smaller chunks for short audio

        # Long audio should use larger chunks
        long_chunks = await short_transcriber._determine_chunk_size(
            duration_seconds=600  # 10 minutes
        )
        assert long_chunks >= 30  # Larger chunks for long audio

    @pytest.mark.asyncio
    async def test_performance_metrics_accurate(self, sample_audio_30s):
        """Test that performance metrics are accurately reported."""
        transcriber = ParallelTranscriber(max_workers=2)

        start = time.time()
        result = await transcriber.transcribe_parallel(sample_audio_30s)
        actual_time = time.time() - start

        # Verify metrics
        assert result.processing_time > 0
        assert abs(result.processing_time - actual_time) < 0.1  # Within 100ms
        assert result.chunks_processed >= 1
        assert result.speedup_factor >= 1.0
        assert result.worker_utilization > 0

    @pytest.mark.asyncio
    async def test_maintains_transcription_quality(self, sample_audio_30s):
        """Test that parallel processing maintains transcription accuracy."""
        transcriber = ParallelTranscriber(max_workers=4)

        # Get sequential result as baseline
        seq_result = await transcriber.transcribe_sequential(sample_audio_30s)

        # Get parallel result
        par_result = await transcriber.transcribe_parallel(sample_audio_30s)

        # Calculate similarity (should be very high)
        from difflib import SequenceMatcher
        similarity = SequenceMatcher(None, seq_result.text, par_result.text).ratio()

        assert similarity > 0.95  # At least 95% similar

    @pytest.mark.asyncio
    async def test_cli_integration(self, sample_audio_2m):
        """Test that parallel processing integrates with CLI properly."""
        from src.cli.main import transcribe_command

        # Mock the CLI context
        with patch("src.cli.main.get_transcriber") as mock_get:
            transcriber = ParallelTranscriber(max_workers=4)
            mock_get.return_value = transcriber

            # Run CLI command with parallel flag
            result = await transcribe_command(
                audio_path=str(sample_audio_2m),
                parallel=True,
                chunks=4,
                show_progress=True
            )

            assert result.success
            assert "Speedup" in result.message
            assert result.speedup_factor >= 2.0


class TestPerformanceBenchmarks:
    """Performance benchmarks to validate 2-4x speed improvement."""

    @pytest.mark.benchmark
    @pytest.mark.asyncio
    async def test_benchmark_30s_audio(self, benchmark, sample_audio_30s):
        """Benchmark 30-second audio processing."""
        transcriber = ParallelTranscriber(max_workers=4)

        result = await benchmark(
            transcriber.transcribe_parallel,
            sample_audio_30s
        )

        assert result.processing_time < 15  # Should process in <15s

    @pytest.mark.benchmark
    @pytest.mark.asyncio
    async def test_benchmark_5m_audio(self, benchmark, sample_audio_5m):
        """Benchmark 5-minute audio - should meet <30s target."""
        transcriber = ParallelTranscriber(max_workers=4)

        result = await benchmark(
            transcriber.transcribe_parallel,
            sample_audio_5m
        )

        # Must meet v1 target: 5-minute audio in <30 seconds
        assert result.processing_time < 30
        assert result.speedup_factor >= 2.0