"""Unit tests for processing speed optimizations.""" import pytest import torch import time import asyncio from unittest.mock import Mock, patch, MagicMock from typing import List, Dict, Any import tempfile import os from src.services.speed_optimization import ( SpeedOptimizer, ParallelProcessor, PipelineParallelizer, CacheManager, AudioChunker, ModelFusion, JITCompiler, AdaptiveComputeAllocator ) class TestSpeedOptimizer: """Test the main SpeedOptimizer class.""" def test_speed_optimizer_initialization(self): """Test SpeedOptimizer initialization with default settings.""" optimizer = SpeedOptimizer() assert optimizer.max_workers == 8 assert optimizer.chunk_size_seconds == 10 assert optimizer.cache_enabled is True assert optimizer.pipeline_parallelism is True assert optimizer.jit_compilation is True def test_speed_optimizer_custom_initialization(self): """Test SpeedOptimizer initialization with custom settings.""" optimizer = SpeedOptimizer( max_workers=16, chunk_size_seconds=5, cache_enabled=False, pipeline_parallelism=False, jit_compilation=False ) assert optimizer.max_workers == 16 assert optimizer.chunk_size_seconds == 5 assert optimizer.cache_enabled is False assert optimizer.pipeline_parallelism is False assert optimizer.jit_compilation is False def test_optimize_pipeline_speed(self): """Test complete pipeline speed optimization.""" optimizer = SpeedOptimizer() model_manager = Mock() diarization_manager = Mock() # Mock model components model_manager.model = Mock() diarization_manager.model = Mock() result = optimizer.optimize_pipeline_speed( model_manager, diarization_manager, batch_size=4 ) assert 'processing_time_seconds' in result assert 'throughput_files_per_minute' in result assert 'optimization_applied' in result assert 'recommended_workers' in result assert result['throughput_files_per_minute'] > 0 def test_measure_processing_speed(self): """Test processing speed measurement.""" optimizer = SpeedOptimizer() # Mock processing function def mock_process(data): time.sleep(0.1) # Simulate processing time return {'result': 'processed'} result = optimizer.measure_processing_speed(mock_process, [1, 2, 3, 4]) assert 'total_time_seconds' in result assert 'average_time_per_item' in result assert 'throughput_items_per_second' in result assert result['total_time_seconds'] > 0 class TestParallelProcessor: """Test parallel processing functionality.""" def test_parallel_processor_initialization(self): """Test ParallelProcessor initialization.""" processor = ParallelProcessor() assert processor.max_workers == 8 assert processor.chunk_size == 100 assert processor.timeout_seconds == 300 def test_parallel_processor_custom_initialization(self): """Test ParallelProcessor initialization with custom settings.""" processor = ParallelProcessor( max_workers=16, chunk_size=200, timeout_seconds=600 ) assert processor.max_workers == 16 assert processor.chunk_size == 200 assert processor.timeout_seconds == 600 def test_process_in_parallel(self): """Test parallel processing of items.""" processor = ParallelProcessor(max_workers=4) # Mock processing function def mock_process(item): time.sleep(0.01) # Simulate work return f"processed_{item}" items = list(range(10)) results = processor.process_in_parallel(mock_process, items) assert len(results) == 10 assert all(result.startswith('processed_') for result in results) def test_optimize_worker_count(self): """Test worker count optimization.""" processor = ParallelProcessor() # Mock performance data performance_data = [ {'workers': 1, 'throughput': 10, 'latency': 1.0}, {'workers': 2, 'throughput': 18, 'latency': 0.9}, {'workers': 4, 'throughput': 32, 'latency': 0.8}, {'workers': 8, 'throughput': 45, 'latency': 0.7} ] result = processor.optimize_worker_count(performance_data) assert 'optimal_workers' in result assert 'expected_throughput' in result assert 'reasoning' in result assert result['optimal_workers'] > 0 def test_measure_parallel_efficiency(self): """Test parallel efficiency measurement.""" processor = ParallelProcessor() # Mock sequential and parallel processing times sequential_time = 10.0 parallel_time = 2.5 num_workers = 4 result = processor.measure_parallel_efficiency( sequential_time, parallel_time, num_workers ) assert 'speedup' in result assert 'efficiency' in result assert 'scalability' in result assert result['speedup'] > 1.0 assert result['efficiency'] <= 1.0 class TestPipelineParallelizer: """Test pipeline parallelism functionality.""" def test_pipeline_parallelizer_initialization(self): """Test PipelineParallelizer initialization.""" parallelizer = PipelineParallelizer() assert parallelizer.num_stages == 3 assert parallelizer.buffer_size == 10 assert parallelizer.overlap_enabled is True def test_create_pipeline_stages(self): """Test pipeline stage creation.""" parallelizer = PipelineParallelizer() # Mock stage functions def stage1(data): return f"stage1_{data}" def stage2(data): return f"stage2_{data}" def stage3(data): return f"stage3_{data}" stages = [stage1, stage2, stage3] pipeline = parallelizer.create_pipeline_stages(stages) assert pipeline is not None assert hasattr(pipeline, 'process') def test_measure_pipeline_throughput(self): """Test pipeline throughput measurement.""" parallelizer = PipelineParallelizer() # Mock pipeline pipeline = Mock() pipeline.process.return_value = "processed" # Mock timing with patch('time.time') as mock_time: mock_time.side_effect = [0, 5] # 5 seconds for 10 items result = parallelizer.measure_pipeline_throughput( pipeline, list(range(10)) ) assert 'total_time_seconds' in result assert 'throughput_items_per_second' in result assert 'latency_seconds' in result assert result['throughput_items_per_second'] == 2.0 # 10 items / 5 seconds def test_optimize_pipeline_configuration(self): """Test pipeline configuration optimization.""" parallelizer = PipelineParallelizer() # Mock performance data performance_data = [ {'stages': 2, 'throughput': 15, 'latency': 0.8}, {'stages': 3, 'throughput': 25, 'latency': 0.6}, {'stages': 4, 'throughput': 30, 'latency': 0.5} ] result = parallelizer.optimize_pipeline_configuration(performance_data) assert 'optimal_stages' in result assert 'expected_throughput' in result assert 'expected_latency' in result assert result['optimal_stages'] > 0 class TestCacheManager: """Test caching functionality.""" def test_cache_manager_initialization(self): """Test CacheManager initialization.""" cache_manager = CacheManager() assert cache_manager.max_size == 1000 assert cache_manager.ttl_seconds == 3600 assert cache_manager.eviction_policy == 'lru' def test_cache_manager_custom_initialization(self): """Test CacheManager initialization with custom settings.""" cache_manager = CacheManager( max_size=500, ttl_seconds=1800, eviction_policy='fifo' ) assert cache_manager.max_size == 500 assert cache_manager.ttl_seconds == 1800 assert cache_manager.eviction_policy == 'fifo' def test_cache_operations(self): """Test basic cache operations.""" cache_manager = CacheManager() # Test set and get cache_manager.set('key1', 'value1') value = cache_manager.get('key1') assert value == 'value1' # Test cache miss value = cache_manager.get('nonexistent') assert value is None def test_cache_eviction(self): """Test cache eviction policies.""" cache_manager = CacheManager(max_size=3) # Fill cache cache_manager.set('key1', 'value1') cache_manager.set('key2', 'value2') cache_manager.set('key3', 'value3') # Add one more to trigger eviction cache_manager.set('key4', 'value4') # Check that oldest item was evicted (LRU) assert cache_manager.get('key1') is None assert cache_manager.get('key4') == 'value4' def test_cache_performance(self): """Test cache performance measurement.""" cache_manager = CacheManager() # Populate cache for i in range(100): cache_manager.set(f'key{i}', f'value{i}') # Measure cache hit rate hits = 0 misses = 0 for i in range(50): if cache_manager.get(f'key{i}') is not None: hits += 1 else: misses += 1 for i in range(100, 150): if cache_manager.get(f'key{i}') is not None: hits += 1 else: misses += 1 hit_rate = hits / (hits + misses) result = cache_manager.measure_performance() assert 'hit_rate' in result assert 'size' in result assert 'memory_usage_mb' in result class TestAudioChunker: """Test audio chunking functionality.""" def test_audio_chunker_initialization(self): """Test AudioChunker initialization.""" chunker = AudioChunker() assert chunker.chunk_size_seconds == 10 assert chunker.overlap_seconds == 2 assert chunker.min_chunk_size_seconds == 1 def test_chunk_audio_file(self): """Test audio file chunking.""" chunker = AudioChunker(chunk_size_seconds=5, overlap_seconds=1) # Mock audio file with 20 seconds duration audio_file = Mock() audio_file.duration = 20.0 audio_file.sample_rate = 16000 chunks = chunker.chunk_audio_file(audio_file) assert len(chunks) == 5 # 20 seconds with 5-second chunks and 1-second overlap assert all(chunk['start_time'] >= 0 for chunk in chunks) assert all(chunk['end_time'] <= 20.0 for chunk in chunks) def test_optimize_chunk_size(self): """Test chunk size optimization.""" chunker = AudioChunker() # Mock performance data performance_data = [ {'chunk_size': 5, 'processing_time': 2.0, 'memory_usage': 1.0}, {'chunk_size': 10, 'processing_time': 3.5, 'memory_usage': 1.8}, {'chunk_size': 15, 'processing_time': 5.0, 'memory_usage': 2.5} ] result = chunker.optimize_chunk_size(performance_data) assert 'optimal_chunk_size' in result assert 'expected_processing_time' in result assert 'expected_memory_usage' in result assert result['optimal_chunk_size'] > 0 def test_adaptive_chunking(self): """Test adaptive chunking based on file characteristics.""" chunker = AudioChunker() # Mock audio files with different characteristics short_file = Mock(duration=5.0, sample_rate=16000) long_file = Mock(duration=60.0, sample_rate=16000) noisy_file = Mock(duration=30.0, sample_rate=16000) # Test adaptive chunking short_chunks = chunker.adaptive_chunk(short_file) long_chunks = chunker.adaptive_chunk(long_file) noisy_chunks = chunker.adaptive_chunk(noisy_file) assert len(short_chunks) <= len(long_chunks) # Longer files get more chunks # Note: Adaptive chunking adjusts based on duration, not noise characteristics class TestModelFusion: """Test model fusion functionality.""" def test_model_fusion_initialization(self): """Test ModelFusion initialization.""" fusion = ModelFusion() assert fusion.fusion_enabled is True assert fusion.fusion_type == 'sequential' assert fusion.optimization_level == 'balanced' def test_fuse_models(self): """Test model fusion.""" fusion = ModelFusion() # Mock models model1 = Mock() model2 = Mock() model3 = Mock() models = [model1, model2, model3] result = fusion.fuse_models(models) assert result['fused'] is True assert result['num_models'] == 3 assert result['fusion_type'] == 'sequential' def test_measure_fusion_impact(self): """Test fusion impact measurement.""" fusion = ModelFusion() # Mock before and after measurements before_metrics = { 'total_parameters': 1000000, 'inference_time': 2.0, 'memory_usage': 4.0 } after_metrics = { 'total_parameters': 800000, 'inference_time': 1.5, 'memory_usage': 3.0 } result = fusion.measure_fusion_impact(before_metrics, after_metrics) assert 'parameter_reduction_percent' in result assert 'speedup_factor' in result assert 'memory_savings_percent' in result assert result['parameter_reduction_percent'] == 20.0 assert result['speedup_factor'] == pytest.approx(1.33, rel=0.1) assert result['memory_savings_percent'] == 25.0 def test_optimize_fusion_strategy(self): """Test fusion strategy optimization.""" fusion = ModelFusion() # Mock performance data for different fusion strategies performance_data = [ {'strategy': 'sequential', 'speedup': 1.2, 'memory_savings': 15}, {'strategy': 'parallel', 'speedup': 1.8, 'memory_savings': 10}, {'strategy': 'hybrid', 'speedup': 1.5, 'memory_savings': 20} ] result = fusion.optimize_fusion_strategy(performance_data) assert 'optimal_strategy' in result assert 'expected_speedup' in result assert 'expected_memory_savings' in result assert result['optimal_strategy'] in ['sequential', 'parallel', 'hybrid'] class TestJITCompiler: """Test JIT compilation functionality.""" def test_jit_compiler_initialization(self): """Test JITCompiler initialization.""" compiler = JITCompiler() assert compiler.compilation_enabled is True assert compiler.optimization_level == 2 assert compiler.target_device == 'cpu' def test_compile_function(self): """Test function compilation.""" compiler = JITCompiler() # Mock function to compile def mock_function(x, y): return x + y result = compiler.compile_function(mock_function) assert result['compiled'] is True assert result['optimization_level'] == 2 assert result['target_device'] == 'cpu' def test_measure_compilation_impact(self): """Test compilation impact measurement.""" compiler = JITCompiler() # Mock function def mock_function(x, y): return x * y + x + y # Measure before compilation start_time = time.time() for i in range(1000): mock_function(i, i+1) before_time = time.time() - start_time # Compile and measure after compiled_result = compiler.compile_function(mock_function) # Mock compiled function performance after_time = before_time * 0.7 # 30% improvement result = compiler.measure_compilation_impact( mock_function, before_time, after_time ) assert 'speedup_factor' in result assert 'compilation_time' in result assert 'memory_overhead' in result assert result['speedup_factor'] == pytest.approx(1.43, rel=0.1) def test_optimize_compilation_settings(self): """Test compilation settings optimization.""" compiler = JITCompiler() # Mock performance data for different settings performance_data = [ {'optimization_level': 0, 'speedup': 1.0, 'compilation_time': 0.1}, {'optimization_level': 1, 'speedup': 1.2, 'compilation_time': 0.5}, {'optimization_level': 2, 'speedup': 1.4, 'compilation_time': 1.0} ] result = compiler.optimize_compilation_settings(performance_data) assert 'optimal_optimization_level' in result assert 'expected_speedup' in result assert 'expected_compilation_time' in result assert result['optimal_optimization_level'] >= 0 class TestAdaptiveComputeAllocator: """Test adaptive compute allocation functionality.""" def test_adaptive_compute_allocator_initialization(self): """Test AdaptiveComputeAllocator initialization.""" allocator = AdaptiveComputeAllocator() assert allocator.max_resources == 1.0 assert allocator.min_resources == 0.1 assert allocator.adaptation_rate == 0.1 def test_allocate_resources(self): """Test resource allocation.""" allocator = AdaptiveComputeAllocator() # Mock file complexity simple_file = Mock(duration=30.0, sample_rate=16000) complex_file = Mock(duration=300.0, sample_rate=48000) simple_allocation = allocator.allocate_resources(simple_file) complex_allocation = allocator.allocate_resources(complex_file) assert simple_allocation['cpu_cores'] <= complex_allocation['cpu_cores'] assert simple_allocation['memory_gb'] <= complex_allocation['memory_gb'] assert simple_allocation['gpu_memory_gb'] <= complex_allocation['gpu_memory_gb'] def test_adapt_allocation(self): """Test allocation adaptation based on performance.""" allocator = AdaptiveComputeAllocator() # Mock performance feedback current_allocation = { 'cpu_cores': 4, 'memory_gb': 8, 'gpu_memory_gb': 4 } performance_feedback = { 'processing_time': 10.0, 'target_time': 5.0, 'resource_utilization': 0.8 } new_allocation = allocator.adapt_allocation( current_allocation, performance_feedback ) assert 'cpu_cores' in new_allocation assert 'memory_gb' in new_allocation assert 'gpu_memory_gb' in new_allocation # Note: Adaptation can increase or decrease resources based on performance def test_optimize_resource_distribution(self): """Test resource distribution optimization.""" allocator = AdaptiveComputeAllocator() # Mock workload workload = [ Mock(duration=30.0, sample_rate=16000), Mock(duration=60.0, sample_rate=16000), Mock(duration=120.0, sample_rate=48000) ] result = allocator.optimize_resource_distribution(workload) assert 'total_cpu_cores' in result assert 'total_memory_gb' in result assert 'total_gpu_memory_gb' in result assert 'efficiency_score' in result assert result['efficiency_score'] > 0 def test_measure_allocation_efficiency(self): """Test allocation efficiency measurement.""" allocator = AdaptiveComputeAllocator() # Mock allocation and performance data allocation = { 'cpu_cores': 8, 'memory_gb': 16, 'gpu_memory_gb': 8 } performance = { 'processing_time': 5.0, 'target_time': 5.0, 'resource_utilization': 0.75 } result = allocator.measure_allocation_efficiency(allocation, performance) assert 'efficiency_score' in result assert 'resource_utilization' in result assert 'time_efficiency' in result assert result['efficiency_score'] > 0 assert result['efficiency_score'] <= 1.0 class TestSpeedOptimizationIntegration: """Integration tests for speed optimization components.""" def test_end_to_end_speed_optimization(self): """Test complete speed optimization workflow.""" optimizer = SpeedOptimizer() model_manager = Mock() diarization_manager = Mock() # Mock model components model_manager.model = Mock() diarization_manager.model = Mock() result = optimizer.optimize_pipeline_speed( model_manager, diarization_manager, batch_size=4 ) assert 'processing_time_seconds' in result assert 'throughput_files_per_minute' in result assert 'optimization_applied' in result assert 'recommended_workers' in result assert result['throughput_files_per_minute'] > 0 def test_speed_optimization_with_parallel_processing(self): """Test speed optimization with parallel processing enabled.""" optimizer = SpeedOptimizer() model_manager = Mock() diarization_manager = Mock() # Mock model components model_manager.model = Mock() diarization_manager.model = Mock() result = optimizer.optimize_pipeline_speed( model_manager, diarization_manager, batch_size=8 ) assert result['parallel_processing_applied'] is True assert result['throughput_files_per_minute'] > 0 def test_speed_optimization_with_caching(self): """Test speed optimization with caching enabled.""" optimizer = SpeedOptimizer(cache_enabled=True) model_manager = Mock() diarization_manager = Mock() # Mock model components model_manager.model = Mock() diarization_manager.model = Mock() result = optimizer.optimize_pipeline_speed( model_manager, diarization_manager, batch_size=4 ) assert result['caching_applied'] is True assert result['throughput_files_per_minute'] > 0 def test_speed_optimization_with_jit_compilation(self): """Test speed optimization with JIT compilation enabled.""" optimizer = SpeedOptimizer(jit_compilation=True) model_manager = Mock() diarization_manager = Mock() # Mock model components model_manager.model = Mock() diarization_manager.model = Mock() result = optimizer.optimize_pipeline_speed( model_manager, diarization_manager, batch_size=4 ) assert result['jit_compilation_applied'] is True assert result['throughput_files_per_minute'] > 0 if __name__ == '__main__': pytest.main([__file__])