""" Unit tests for performance profiling infrastructure. Tests the comprehensive performance profiling system that measures memory usage, processing speed, and resource utilization across all pipeline components. """ import asyncio import json import pytest import time from unittest.mock import MagicMock, patch, AsyncMock from pathlib import Path from typing import Dict, List, Any from datetime import datetime, timezone from src.services.performance import ( ResourceMonitor, SystemResources, PerformanceMetrics, M3OptimizationConfig ) from src.services.performance_benchmarker import PerformanceBenchmarker class TestPerformanceProfilingInfrastructure: """Test the comprehensive performance profiling infrastructure.""" @pytest.fixture def mock_model_manager(self): """Create a mock model manager for testing.""" manager = MagicMock() manager.transcribe_batch = AsyncMock() manager.transcribe_batch.return_value = ["transcript1", "transcript2"] return manager @pytest.fixture def mock_diarization_manager(self): """Create a mock diarization manager for testing.""" manager = MagicMock() manager.process_audio = AsyncMock() manager.process_audio.return_value = {"speakers": 2, "segments": 10} return manager @pytest.fixture def mock_domain_adapter(self): """Create a mock domain adapter for testing.""" adapter = MagicMock() adapter.adapt_transcript = AsyncMock() adapter.adapt_transcript.return_value = "adapted_transcript" return adapter @pytest.fixture def sample_audio_files(self): """Create sample audio file paths for testing.""" return [ "tests/fixtures/sample_5s.wav", "tests/fixtures/sample_30s.mp3", "tests/fixtures/sample_2m.mp4" ] def test_performance_benchmark_initialization(self, mock_model_manager, mock_diarization_manager, mock_domain_adapter): """Test performance benchmark initialization.""" from src.services.performance_profiling import PerformanceBenchmark benchmark = PerformanceBenchmark( model_manager=mock_model_manager, diarization_manager=mock_diarization_manager, domain_adapter=mock_domain_adapter ) assert benchmark.model_manager == mock_model_manager assert benchmark.diarization_manager == mock_diarization_manager assert benchmark.domain_adapter == mock_domain_adapter assert benchmark.results == {} @pytest.mark.asyncio async def test_benchmark_transcription_basic(self, mock_model_manager, mock_diarization_manager, mock_domain_adapter, sample_audio_files): """Test basic transcription benchmarking.""" from src.services.performance_profiling import PerformanceBenchmark benchmark = PerformanceBenchmark( model_manager=mock_model_manager, diarization_manager=mock_diarization_manager, domain_adapter=mock_domain_adapter ) # Mock CUDA functions with patch('torch.cuda.reset_peak_memory_stats'), \ patch('torch.cuda.empty_cache'), \ patch('torch.cuda.max_memory_allocated') as mock_max_memory: mock_max_memory.return_value = 1024 * 1024 * 1024 # 1GB results = await benchmark.benchmark_transcription( audio_files=sample_audio_files, batch_sizes=[1, 2], device='cuda' ) assert len(results) == 2 # Two batch sizes assert 'batch_size' in results.columns assert 'total_time' in results.columns assert 'throughput' in results.columns assert 'peak_memory_gb' in results.columns # Verify model manager was called assert mock_model_manager.transcribe_batch.called @pytest.mark.asyncio async def test_benchmark_diarization(self, mock_model_manager, mock_diarization_manager, mock_domain_adapter, sample_audio_files): """Test diarization benchmarking.""" from src.services.performance_profiling import PerformanceBenchmark benchmark = PerformanceBenchmark( model_manager=mock_model_manager, diarization_manager=mock_diarization_manager, domain_adapter=mock_domain_adapter ) # Mock psutil with patch('psutil.Process') as mock_process: mock_process_instance = MagicMock() mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 100 # 100MB mock_process.return_value = mock_process_instance results = await benchmark.benchmark_diarization(sample_audio_files) assert 'total_time' in results assert 'per_file_avg' in results assert 'peak_memory_mb' in results assert results['total_time'] > 0 assert results['per_file_avg'] > 0 # Verify diarization manager was called assert mock_diarization_manager.process_audio.called def test_memory_tracking_accuracy(self): """Test memory tracking accuracy.""" from src.services.performance_profiling import MemoryTracker tracker = MemoryTracker() # Test memory tracking with patch('psutil.Process') as mock_process: mock_process_instance = MagicMock() mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 50 # 50MB mock_process.return_value = mock_process_instance # Mock the process instance tracker.process = mock_process_instance memory_usage = tracker.get_current_memory_mb() assert memory_usage == 50.0 def test_timing_decorator(self): """Test timing decorator functionality.""" from src.services.performance_profiling import timing_decorator @timing_decorator def test_function(): time.sleep(0.1) # Simulate work return "result" result = test_function() assert result == "result" # Check that timing was recorded # This would require access to the timing storage mechanism # Implementation will depend on how timing data is stored def test_benchmark_data_serialization(self): """Test benchmark data serialization and deserialization.""" from src.services.performance_profiling import BenchmarkData # Create sample benchmark data data = BenchmarkData( operation_name="test_transcription", batch_size=4, duration_seconds=10.5, peak_memory_mb=2048.0, throughput_items_per_second=2.5, timestamp=datetime.now(timezone.utc) ) # Test serialization serialized = data.to_dict() assert 'operation_name' in serialized assert 'batch_size' in serialized assert 'duration_seconds' in serialized assert serialized['duration_seconds'] == 10.5 # Test deserialization deserialized = BenchmarkData.from_dict(serialized) assert deserialized.operation_name == "test_transcription" assert deserialized.batch_size == 4 assert deserialized.duration_seconds == 10.5 def test_system_information_collection(self): """Test system information collection.""" from src.services.performance_profiling import SystemInfoCollector collector = SystemInfoCollector() with patch('platform.processor') as mock_processor, \ patch('platform.machine') as mock_machine, \ patch('psutil.cpu_count') as mock_cpu_count, \ patch('psutil.virtual_memory') as mock_vm: mock_processor.return_value = "Apple M3" mock_machine.return_value = "arm64" mock_cpu_count.return_value = 8 mock_vm.return_value.total = 16 * 1024 * 1024 * 1024 # 16GB system_info = collector.collect_system_info() assert 'cpu_model' in system_info assert 'architecture' in system_info assert 'cpu_cores' in system_info assert 'total_memory_gb' in system_info assert system_info['cpu_model'] == "Apple M3" assert system_info['cpu_cores'] == 8 @pytest.mark.asyncio async def test_end_to_end_benchmarking(self, mock_model_manager, mock_diarization_manager, mock_domain_adapter, sample_audio_files): """Test end-to-end benchmarking.""" from src.services.performance_profiling import PerformanceBenchmark benchmark = PerformanceBenchmark( model_manager=mock_model_manager, diarization_manager=mock_diarization_manager, domain_adapter=mock_domain_adapter ) # Mock all necessary components with patch('torch.cuda.reset_peak_memory_stats'), \ patch('torch.cuda.empty_cache'), \ patch('torch.cuda.max_memory_allocated') as mock_max_memory, \ patch('psutil.Process') as mock_process: mock_max_memory.return_value = 1024 * 1024 * 1024 # 1GB mock_process_instance = MagicMock() mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 100 # 100MB mock_process.return_value = mock_process_instance # Run end-to-end benchmark results = await benchmark.benchmark_end_to_end(sample_audio_files) assert 'total_processing_time' in results assert 'transcription_time' in results assert 'diarization_time' in results assert 'adaptation_time' in results assert 'peak_memory_usage' in results assert 'throughput' in results def test_benchmark_data_storage(self, tmp_path): """Test benchmark data storage and retrieval.""" from src.services.performance_profiling import BenchmarkDataStore, BenchmarkData store = BenchmarkDataStore(storage_path=tmp_path) # Create sample data data = BenchmarkData( operation_name="test_operation", batch_size=2, duration_seconds=5.0, peak_memory_mb=1024.0, throughput_items_per_second=1.0, timestamp=datetime.now(timezone.utc) ) # Store data store.store_benchmark_data(data) # Retrieve data retrieved_data = store.get_benchmark_data("test_operation") assert len(retrieved_data) == 1 assert retrieved_data[0].operation_name == "test_operation" assert retrieved_data[0].duration_seconds == 5.0 def test_performance_metrics_aggregation(self): """Test performance metrics aggregation.""" from src.services.performance_profiling import MetricsAggregator aggregator = MetricsAggregator() # Create sample metrics metrics = [ PerformanceMetrics( operation="transcription", duration_seconds=10.0, memory_peak_mb=1024.0, cpu_peak_percent=50.0, throughput_items_per_second=2.0, error_count=0, success_count=10, total_count=10 ), PerformanceMetrics( operation="transcription", duration_seconds=12.0, memory_peak_mb=1536.0, cpu_peak_percent=60.0, throughput_items_per_second=1.8, error_count=1, success_count=9, total_count=10 ) ] # Aggregate metrics aggregated = aggregator.aggregate_metrics(metrics) assert aggregated['avg_duration_seconds'] == 11.0 assert aggregated['avg_memory_peak_mb'] == 1280.0 assert aggregated['avg_cpu_peak_percent'] == 55.0 assert aggregated['total_operations'] == 2 assert aggregated['success_rate'] == 95.0 # 19/20 * 100 def test_performance_threshold_monitoring(self): """Test performance threshold monitoring.""" from src.services.performance_profiling import PerformanceThresholdMonitor monitor = PerformanceThresholdMonitor( max_duration_seconds=30.0, max_memory_mb=2048.0, max_cpu_percent=90.0 ) # Test within thresholds metrics = PerformanceMetrics( operation="test", duration_seconds=15.0, memory_peak_mb=1024.0, cpu_peak_percent=50.0, throughput_items_per_second=1.0, error_count=0, success_count=10, total_count=10 ) violations = monitor.check_thresholds(metrics) assert len(violations) == 0 # Test threshold violations metrics.duration_seconds = 35.0 metrics.memory_peak_mb = 3072.0 metrics.cpu_peak_percent = 95.0 violations = monitor.check_thresholds(metrics) assert len(violations) == 3 assert any('Duration exceeded' in v for v in violations) assert any('Memory exceeded' in v for v in violations) assert any('CPU exceeded' in v for v in violations) class TestPerformanceProfilingIntegration: """Integration tests for performance profiling.""" @pytest.mark.asyncio async def test_full_profiling_workflow(self, tmp_path): """Test the complete profiling workflow.""" from src.services.performance_profiling import ( PerformanceBenchmark, BenchmarkDataStore, MetricsAggregator, PerformanceThresholdMonitor, BenchmarkData ) # Create mock managers mock_model_manager = MagicMock() mock_model_manager.transcribe_batch = AsyncMock(return_value=["transcript"]) mock_diarization_manager = MagicMock() mock_diarization_manager.process_audio = AsyncMock(return_value={"speakers": 2}) mock_domain_adapter = MagicMock() mock_domain_adapter.adapt_transcript = AsyncMock(return_value="adapted") # Create profiling components benchmark = PerformanceBenchmark( model_manager=mock_model_manager, diarization_manager=mock_diarization_manager, domain_adapter=mock_domain_adapter ) store = BenchmarkDataStore(storage_path=tmp_path) aggregator = MetricsAggregator() monitor = PerformanceThresholdMonitor() # Run benchmark audio_files = ["tests/fixtures/sample_5s.wav"] with patch('torch.cuda.reset_peak_memory_stats'), \ patch('torch.cuda.empty_cache'), \ patch('torch.cuda.max_memory_allocated') as mock_max_memory, \ patch('psutil.Process') as mock_process: mock_max_memory.return_value = 1024 * 1024 * 512 # 512MB mock_process_instance = MagicMock() mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 50 # 50MB mock_process.return_value = mock_process_instance # Run transcription benchmark results = await benchmark.benchmark_transcription( audio_files=audio_files, batch_sizes=[1], device='cuda' ) # Store results for _, row in results.iterrows(): data = BenchmarkData( operation_name="transcription", batch_size=row['batch_size'], duration_seconds=row['total_time'], peak_memory_mb=row['peak_memory_gb'] * 1024, throughput_items_per_second=row['throughput'], timestamp=datetime.now(timezone.utc) ) store.store_benchmark_data(data) # Retrieve and aggregate stored_data = store.get_benchmark_data("transcription") # Convert BenchmarkData to PerformanceMetrics for aggregation metrics_data = [] for data in stored_data: metrics = PerformanceMetrics( operation="transcription", duration_seconds=data.duration_seconds, memory_peak_mb=data.peak_memory_mb, cpu_peak_percent=50.0, # Default value for test throughput_items_per_second=data.throughput_items_per_second, error_count=0, # Default value for test success_count=1, # Default value for test total_count=1 # Default value for test ) metrics_data.append(metrics) aggregated = aggregator.aggregate_metrics(metrics_data) # Check thresholds metrics = PerformanceMetrics( operation="transcription", duration_seconds=aggregated['avg_duration_seconds'], memory_peak_mb=aggregated['avg_memory_peak_mb'], cpu_peak_percent=aggregated['avg_cpu_peak_percent'], throughput_items_per_second=aggregated['avg_throughput_items_per_second'], error_count=aggregated['total_errors'], success_count=aggregated['total_successes'], total_count=aggregated['total_operations'] ) violations = monitor.check_thresholds(metrics) # Assertions assert len(results) > 0 assert len(stored_data) > 0 assert 'avg_duration_seconds' in aggregated assert isinstance(violations, list)