trax/tests/test_performance_profiling.py

"""
Unit tests for performance profiling infrastructure.

Tests the comprehensive performance profiling system that measures
memory usage, processing speed, and resource utilization across
all pipeline components.
"""

import asyncio
import json
import pytest
import time
from unittest.mock import MagicMock, patch, AsyncMock
from pathlib import Path
from typing import Dict, List, Any
from datetime import datetime, timezone

from src.services.performance import (
    ResourceMonitor,
    SystemResources,
    PerformanceMetrics,
    M3OptimizationConfig
)
from src.services.performance_benchmarker import PerformanceBenchmarker


class TestPerformanceProfilingInfrastructure:
    """Test the comprehensive performance profiling infrastructure."""

    @pytest.fixture
    def mock_model_manager(self):
        """Create a mock model manager for testing."""
        manager = MagicMock()
        manager.transcribe_batch = AsyncMock()
        manager.transcribe_batch.return_value = ["transcript1", "transcript2"]
        return manager

    @pytest.fixture
    def mock_diarization_manager(self):
        """Create a mock diarization manager for testing."""
        manager = MagicMock()
        manager.process_audio = AsyncMock()
        manager.process_audio.return_value = {"speakers": 2, "segments": 10}
        return manager

    @pytest.fixture
    def mock_domain_adapter(self):
        """Create a mock domain adapter for testing."""
        adapter = MagicMock()
        adapter.adapt_transcript = AsyncMock()
        adapter.adapt_transcript.return_value = "adapted_transcript"
        return adapter

    @pytest.fixture
    def sample_audio_files(self):
        """Create sample audio file paths for testing."""
        return [
            "tests/fixtures/sample_5s.wav",
            "tests/fixtures/sample_30s.mp3",
            "tests/fixtures/sample_2m.mp4"
        ]

    def test_performance_benchmark_initialization(self, mock_model_manager,
                                                 mock_diarization_manager,
                                                 mock_domain_adapter):
        """Test performance benchmark initialization."""
        from src.services.performance_profiling import PerformanceBenchmark

        benchmark = PerformanceBenchmark(
            model_manager=mock_model_manager,
            diarization_manager=mock_diarization_manager,
            domain_adapter=mock_domain_adapter
        )

        assert benchmark.model_manager == mock_model_manager
        assert benchmark.diarization_manager == mock_diarization_manager
        assert benchmark.domain_adapter == mock_domain_adapter
        assert benchmark.results == {}

    @pytest.mark.asyncio
    async def test_benchmark_transcription_basic(self, mock_model_manager,
                                                mock_diarization_manager,
                                                mock_domain_adapter,
                                                sample_audio_files):
        """Test basic transcription benchmarking."""
        from src.services.performance_profiling import PerformanceBenchmark

        benchmark = PerformanceBenchmark(
            model_manager=mock_model_manager,
            diarization_manager=mock_diarization_manager,
            domain_adapter=mock_domain_adapter
        )

        # Mock CUDA functions
        with patch('torch.cuda.reset_peak_memory_stats'), \
             patch('torch.cuda.empty_cache'), \
             patch('torch.cuda.max_memory_allocated') as mock_max_memory:

            mock_max_memory.return_value = 1024 * 1024 * 1024  # 1GB

            results = await benchmark.benchmark_transcription(
                audio_files=sample_audio_files,
                batch_sizes=[1, 2],
                device='cuda'
            )

            assert len(results) == 2  # Two batch sizes
            assert 'batch_size' in results.columns
            assert 'total_time' in results.columns
            assert 'throughput' in results.columns
            assert 'peak_memory_gb' in results.columns

            # Verify model manager was called
            assert mock_model_manager.transcribe_batch.called

    @pytest.mark.asyncio
    async def test_benchmark_diarization(self, mock_model_manager,
                                        mock_diarization_manager,
                                        mock_domain_adapter,
                                        sample_audio_files):
        """Test diarization benchmarking."""
        from src.services.performance_profiling import PerformanceBenchmark

        benchmark = PerformanceBenchmark(
            model_manager=mock_model_manager,
            diarization_manager=mock_diarization_manager,
            domain_adapter=mock_domain_adapter
        )

        # Mock psutil
        with patch('psutil.Process') as mock_process:
            mock_process_instance = MagicMock()
            mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 100  # 100MB
            mock_process.return_value = mock_process_instance

            results = await benchmark.benchmark_diarization(sample_audio_files)

            assert 'total_time' in results
            assert 'per_file_avg' in results
            assert 'peak_memory_mb' in results
            assert results['total_time'] > 0
            assert results['per_file_avg'] > 0

            # Verify diarization manager was called
            assert mock_diarization_manager.process_audio.called

    def test_memory_tracking_accuracy(self):
        """Test memory tracking accuracy."""
        from src.services.performance_profiling import MemoryTracker

        tracker = MemoryTracker()

        # Test memory tracking
        with patch('psutil.Process') as mock_process:
            mock_process_instance = MagicMock()
            mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 50  # 50MB
            mock_process.return_value = mock_process_instance

            # Mock the process instance
            tracker.process = mock_process_instance

            memory_usage = tracker.get_current_memory_mb()
            assert memory_usage == 50.0

    def test_timing_decorator(self):
        """Test timing decorator functionality."""
        from src.services.performance_profiling import timing_decorator

        @timing_decorator
        def test_function():
            time.sleep(0.1)  # Simulate work
            return "result"

        result = test_function()
        assert result == "result"

        # Check that timing was recorded
        # This would require access to the timing storage mechanism
        # Implementation will depend on how timing data is stored

    def test_benchmark_data_serialization(self):
        """Test benchmark data serialization and deserialization."""
        from src.services.performance_profiling import BenchmarkData

        # Create sample benchmark data
        data = BenchmarkData(
            operation_name="test_transcription",
            batch_size=4,
            duration_seconds=10.5,
            peak_memory_mb=2048.0,
            throughput_items_per_second=2.5,
            timestamp=datetime.now(timezone.utc)
        )

        # Test serialization
        serialized = data.to_dict()
        assert 'operation_name' in serialized
        assert 'batch_size' in serialized
        assert 'duration_seconds' in serialized
        assert serialized['duration_seconds'] == 10.5

        # Test deserialization
        deserialized = BenchmarkData.from_dict(serialized)
        assert deserialized.operation_name == "test_transcription"
        assert deserialized.batch_size == 4
        assert deserialized.duration_seconds == 10.5

    def test_system_information_collection(self):
        """Test system information collection."""
        from src.services.performance_profiling import SystemInfoCollector

        collector = SystemInfoCollector()

        with patch('platform.processor') as mock_processor, \
             patch('platform.machine') as mock_machine, \
             patch('psutil.cpu_count') as mock_cpu_count, \
             patch('psutil.virtual_memory') as mock_vm:

            mock_processor.return_value = "Apple M3"
            mock_machine.return_value = "arm64"
            mock_cpu_count.return_value = 8
            mock_vm.return_value.total = 16 * 1024 * 1024 * 1024  # 16GB

            system_info = collector.collect_system_info()

            assert 'cpu_model' in system_info
            assert 'architecture' in system_info
            assert 'cpu_cores' in system_info
            assert 'total_memory_gb' in system_info
            assert system_info['cpu_model'] == "Apple M3"
            assert system_info['cpu_cores'] == 8

    @pytest.mark.asyncio
    async def test_end_to_end_benchmarking(self, mock_model_manager,
                                          mock_diarization_manager,
                                          mock_domain_adapter,
                                          sample_audio_files):
        """Test end-to-end benchmarking."""
        from src.services.performance_profiling import PerformanceBenchmark

        benchmark = PerformanceBenchmark(
            model_manager=mock_model_manager,
            diarization_manager=mock_diarization_manager,
            domain_adapter=mock_domain_adapter
        )

        # Mock all necessary components
        with patch('torch.cuda.reset_peak_memory_stats'), \
             patch('torch.cuda.empty_cache'), \
             patch('torch.cuda.max_memory_allocated') as mock_max_memory, \
             patch('psutil.Process') as mock_process:

            mock_max_memory.return_value = 1024 * 1024 * 1024  # 1GB
            mock_process_instance = MagicMock()
            mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 100  # 100MB
            mock_process.return_value = mock_process_instance

            # Run end-to-end benchmark
            results = await benchmark.benchmark_end_to_end(sample_audio_files)

            assert 'total_processing_time' in results
            assert 'transcription_time' in results
            assert 'diarization_time' in results
            assert 'adaptation_time' in results
            assert 'peak_memory_usage' in results
            assert 'throughput' in results

    def test_benchmark_data_storage(self, tmp_path):
        """Test benchmark data storage and retrieval."""
        from src.services.performance_profiling import BenchmarkDataStore, BenchmarkData

        store = BenchmarkDataStore(storage_path=tmp_path)

        # Create sample data
        data = BenchmarkData(
            operation_name="test_operation",
            batch_size=2,
            duration_seconds=5.0,
            peak_memory_mb=1024.0,
            throughput_items_per_second=1.0,
            timestamp=datetime.now(timezone.utc)
        )

        # Store data
        store.store_benchmark_data(data)

        # Retrieve data
        retrieved_data = store.get_benchmark_data("test_operation")
        assert len(retrieved_data) == 1
        assert retrieved_data[0].operation_name == "test_operation"
        assert retrieved_data[0].duration_seconds == 5.0

    def test_performance_metrics_aggregation(self):
        """Test performance metrics aggregation."""
        from src.services.performance_profiling import MetricsAggregator

        aggregator = MetricsAggregator()

        # Create sample metrics
        metrics = [
            PerformanceMetrics(
                operation="transcription",
                duration_seconds=10.0,
                memory_peak_mb=1024.0,
                cpu_peak_percent=50.0,
                throughput_items_per_second=2.0,
                error_count=0,
                success_count=10,
                total_count=10
            ),
            PerformanceMetrics(
                operation="transcription",
                duration_seconds=12.0,
                memory_peak_mb=1536.0,
                cpu_peak_percent=60.0,
                throughput_items_per_second=1.8,
                error_count=1,
                success_count=9,
                total_count=10
            )
        ]

        # Aggregate metrics
        aggregated = aggregator.aggregate_metrics(metrics)

        assert aggregated['avg_duration_seconds'] == 11.0
        assert aggregated['avg_memory_peak_mb'] == 1280.0
        assert aggregated['avg_cpu_peak_percent'] == 55.0
        assert aggregated['total_operations'] == 2
        assert aggregated['success_rate'] == 95.0  # 19/20 * 100

    def test_performance_threshold_monitoring(self):
        """Test performance threshold monitoring."""
        from src.services.performance_profiling import PerformanceThresholdMonitor

        monitor = PerformanceThresholdMonitor(
            max_duration_seconds=30.0,
            max_memory_mb=2048.0,
            max_cpu_percent=90.0
        )

        # Test within thresholds
        metrics = PerformanceMetrics(
            operation="test",
            duration_seconds=15.0,
            memory_peak_mb=1024.0,
            cpu_peak_percent=50.0,
            throughput_items_per_second=1.0,
            error_count=0,
            success_count=10,
            total_count=10
        )

        violations = monitor.check_thresholds(metrics)
        assert len(violations) == 0

        # Test threshold violations
        metrics.duration_seconds = 35.0
        metrics.memory_peak_mb = 3072.0
        metrics.cpu_peak_percent = 95.0

        violations = monitor.check_thresholds(metrics)
        assert len(violations) == 3
        assert any('Duration exceeded' in v for v in violations)
        assert any('Memory exceeded' in v for v in violations)
        assert any('CPU exceeded' in v for v in violations)


class TestPerformanceProfilingIntegration:
    """Integration tests for performance profiling."""

    @pytest.mark.asyncio
    async def test_full_profiling_workflow(self, tmp_path):
        """Test the complete profiling workflow."""
        from src.services.performance_profiling import (
            PerformanceBenchmark,
            BenchmarkDataStore,
            MetricsAggregator,
            PerformanceThresholdMonitor,
            BenchmarkData
        )

        # Create mock managers
        mock_model_manager = MagicMock()
        mock_model_manager.transcribe_batch = AsyncMock(return_value=["transcript"])

        mock_diarization_manager = MagicMock()
        mock_diarization_manager.process_audio = AsyncMock(return_value={"speakers": 2})

        mock_domain_adapter = MagicMock()
        mock_domain_adapter.adapt_transcript = AsyncMock(return_value="adapted")

        # Create profiling components
        benchmark = PerformanceBenchmark(
            model_manager=mock_model_manager,
            diarization_manager=mock_diarization_manager,
            domain_adapter=mock_domain_adapter
        )

        store = BenchmarkDataStore(storage_path=tmp_path)
        aggregator = MetricsAggregator()
        monitor = PerformanceThresholdMonitor()

        # Run benchmark
        audio_files = ["tests/fixtures/sample_5s.wav"]

        with patch('torch.cuda.reset_peak_memory_stats'), \
             patch('torch.cuda.empty_cache'), \
             patch('torch.cuda.max_memory_allocated') as mock_max_memory, \
             patch('psutil.Process') as mock_process:

            mock_max_memory.return_value = 1024 * 1024 * 512  # 512MB
            mock_process_instance = MagicMock()
            mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 50  # 50MB
            mock_process.return_value = mock_process_instance

            # Run transcription benchmark
            results = await benchmark.benchmark_transcription(
                audio_files=audio_files,
                batch_sizes=[1],
                device='cuda'
            )

            # Store results
            for _, row in results.iterrows():
                data = BenchmarkData(
                    operation_name="transcription",
                    batch_size=row['batch_size'],
                    duration_seconds=row['total_time'],
                    peak_memory_mb=row['peak_memory_gb'] * 1024,
                    throughput_items_per_second=row['throughput'],
                    timestamp=datetime.now(timezone.utc)
                )
                store.store_benchmark_data(data)

            # Retrieve and aggregate
            stored_data = store.get_benchmark_data("transcription")

            # Convert BenchmarkData to PerformanceMetrics for aggregation
            metrics_data = []
            for data in stored_data:
                metrics = PerformanceMetrics(
                    operation="transcription",
                    duration_seconds=data.duration_seconds,
                    memory_peak_mb=data.peak_memory_mb,
                    cpu_peak_percent=50.0,  # Default value for test
                    throughput_items_per_second=data.throughput_items_per_second,
                    error_count=0,  # Default value for test
                    success_count=1,  # Default value for test
                    total_count=1    # Default value for test
                )
                metrics_data.append(metrics)

            aggregated = aggregator.aggregate_metrics(metrics_data)

            # Check thresholds
            metrics = PerformanceMetrics(
                operation="transcription",
                duration_seconds=aggregated['avg_duration_seconds'],
                memory_peak_mb=aggregated['avg_memory_peak_mb'],
                cpu_peak_percent=aggregated['avg_cpu_peak_percent'],
                throughput_items_per_second=aggregated['avg_throughput_items_per_second'],
                error_count=aggregated['total_errors'],
                success_count=aggregated['total_successes'],
                total_count=aggregated['total_operations']
            )

            violations = monitor.check_thresholds(metrics)

            # Assertions
            assert len(results) > 0
            assert len(stored_data) > 0
            assert 'avg_duration_seconds' in aggregated
            assert isinstance(violations, list)