trax/tests/test_performance_profiling.py

475 lines
18 KiB
Python

"""
Unit tests for performance profiling infrastructure.
Tests the comprehensive performance profiling system that measures
memory usage, processing speed, and resource utilization across
all pipeline components.
"""
import asyncio
import json
import pytest
import time
from unittest.mock import MagicMock, patch, AsyncMock
from pathlib import Path
from typing import Dict, List, Any
from datetime import datetime, timezone
from src.services.performance import (
ResourceMonitor,
SystemResources,
PerformanceMetrics,
M3OptimizationConfig
)
from src.services.performance_benchmarker import PerformanceBenchmarker
class TestPerformanceProfilingInfrastructure:
"""Test the comprehensive performance profiling infrastructure."""
@pytest.fixture
def mock_model_manager(self):
"""Create a mock model manager for testing."""
manager = MagicMock()
manager.transcribe_batch = AsyncMock()
manager.transcribe_batch.return_value = ["transcript1", "transcript2"]
return manager
@pytest.fixture
def mock_diarization_manager(self):
"""Create a mock diarization manager for testing."""
manager = MagicMock()
manager.process_audio = AsyncMock()
manager.process_audio.return_value = {"speakers": 2, "segments": 10}
return manager
@pytest.fixture
def mock_domain_adapter(self):
"""Create a mock domain adapter for testing."""
adapter = MagicMock()
adapter.adapt_transcript = AsyncMock()
adapter.adapt_transcript.return_value = "adapted_transcript"
return adapter
@pytest.fixture
def sample_audio_files(self):
"""Create sample audio file paths for testing."""
return [
"tests/fixtures/sample_5s.wav",
"tests/fixtures/sample_30s.mp3",
"tests/fixtures/sample_2m.mp4"
]
def test_performance_benchmark_initialization(self, mock_model_manager,
mock_diarization_manager,
mock_domain_adapter):
"""Test performance benchmark initialization."""
from src.services.performance_profiling import PerformanceBenchmark
benchmark = PerformanceBenchmark(
model_manager=mock_model_manager,
diarization_manager=mock_diarization_manager,
domain_adapter=mock_domain_adapter
)
assert benchmark.model_manager == mock_model_manager
assert benchmark.diarization_manager == mock_diarization_manager
assert benchmark.domain_adapter == mock_domain_adapter
assert benchmark.results == {}
@pytest.mark.asyncio
async def test_benchmark_transcription_basic(self, mock_model_manager,
mock_diarization_manager,
mock_domain_adapter,
sample_audio_files):
"""Test basic transcription benchmarking."""
from src.services.performance_profiling import PerformanceBenchmark
benchmark = PerformanceBenchmark(
model_manager=mock_model_manager,
diarization_manager=mock_diarization_manager,
domain_adapter=mock_domain_adapter
)
# Mock CUDA functions
with patch('torch.cuda.reset_peak_memory_stats'), \
patch('torch.cuda.empty_cache'), \
patch('torch.cuda.max_memory_allocated') as mock_max_memory:
mock_max_memory.return_value = 1024 * 1024 * 1024 # 1GB
results = await benchmark.benchmark_transcription(
audio_files=sample_audio_files,
batch_sizes=[1, 2],
device='cuda'
)
assert len(results) == 2 # Two batch sizes
assert 'batch_size' in results.columns
assert 'total_time' in results.columns
assert 'throughput' in results.columns
assert 'peak_memory_gb' in results.columns
# Verify model manager was called
assert mock_model_manager.transcribe_batch.called
@pytest.mark.asyncio
async def test_benchmark_diarization(self, mock_model_manager,
mock_diarization_manager,
mock_domain_adapter,
sample_audio_files):
"""Test diarization benchmarking."""
from src.services.performance_profiling import PerformanceBenchmark
benchmark = PerformanceBenchmark(
model_manager=mock_model_manager,
diarization_manager=mock_diarization_manager,
domain_adapter=mock_domain_adapter
)
# Mock psutil
with patch('psutil.Process') as mock_process:
mock_process_instance = MagicMock()
mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 100 # 100MB
mock_process.return_value = mock_process_instance
results = await benchmark.benchmark_diarization(sample_audio_files)
assert 'total_time' in results
assert 'per_file_avg' in results
assert 'peak_memory_mb' in results
assert results['total_time'] > 0
assert results['per_file_avg'] > 0
# Verify diarization manager was called
assert mock_diarization_manager.process_audio.called
def test_memory_tracking_accuracy(self):
"""Test memory tracking accuracy."""
from src.services.performance_profiling import MemoryTracker
tracker = MemoryTracker()
# Test memory tracking
with patch('psutil.Process') as mock_process:
mock_process_instance = MagicMock()
mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 50 # 50MB
mock_process.return_value = mock_process_instance
# Mock the process instance
tracker.process = mock_process_instance
memory_usage = tracker.get_current_memory_mb()
assert memory_usage == 50.0
def test_timing_decorator(self):
"""Test timing decorator functionality."""
from src.services.performance_profiling import timing_decorator
@timing_decorator
def test_function():
time.sleep(0.1) # Simulate work
return "result"
result = test_function()
assert result == "result"
# Check that timing was recorded
# This would require access to the timing storage mechanism
# Implementation will depend on how timing data is stored
def test_benchmark_data_serialization(self):
"""Test benchmark data serialization and deserialization."""
from src.services.performance_profiling import BenchmarkData
# Create sample benchmark data
data = BenchmarkData(
operation_name="test_transcription",
batch_size=4,
duration_seconds=10.5,
peak_memory_mb=2048.0,
throughput_items_per_second=2.5,
timestamp=datetime.now(timezone.utc)
)
# Test serialization
serialized = data.to_dict()
assert 'operation_name' in serialized
assert 'batch_size' in serialized
assert 'duration_seconds' in serialized
assert serialized['duration_seconds'] == 10.5
# Test deserialization
deserialized = BenchmarkData.from_dict(serialized)
assert deserialized.operation_name == "test_transcription"
assert deserialized.batch_size == 4
assert deserialized.duration_seconds == 10.5
def test_system_information_collection(self):
"""Test system information collection."""
from src.services.performance_profiling import SystemInfoCollector
collector = SystemInfoCollector()
with patch('platform.processor') as mock_processor, \
patch('platform.machine') as mock_machine, \
patch('psutil.cpu_count') as mock_cpu_count, \
patch('psutil.virtual_memory') as mock_vm:
mock_processor.return_value = "Apple M3"
mock_machine.return_value = "arm64"
mock_cpu_count.return_value = 8
mock_vm.return_value.total = 16 * 1024 * 1024 * 1024 # 16GB
system_info = collector.collect_system_info()
assert 'cpu_model' in system_info
assert 'architecture' in system_info
assert 'cpu_cores' in system_info
assert 'total_memory_gb' in system_info
assert system_info['cpu_model'] == "Apple M3"
assert system_info['cpu_cores'] == 8
@pytest.mark.asyncio
async def test_end_to_end_benchmarking(self, mock_model_manager,
mock_diarization_manager,
mock_domain_adapter,
sample_audio_files):
"""Test end-to-end benchmarking."""
from src.services.performance_profiling import PerformanceBenchmark
benchmark = PerformanceBenchmark(
model_manager=mock_model_manager,
diarization_manager=mock_diarization_manager,
domain_adapter=mock_domain_adapter
)
# Mock all necessary components
with patch('torch.cuda.reset_peak_memory_stats'), \
patch('torch.cuda.empty_cache'), \
patch('torch.cuda.max_memory_allocated') as mock_max_memory, \
patch('psutil.Process') as mock_process:
mock_max_memory.return_value = 1024 * 1024 * 1024 # 1GB
mock_process_instance = MagicMock()
mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 100 # 100MB
mock_process.return_value = mock_process_instance
# Run end-to-end benchmark
results = await benchmark.benchmark_end_to_end(sample_audio_files)
assert 'total_processing_time' in results
assert 'transcription_time' in results
assert 'diarization_time' in results
assert 'adaptation_time' in results
assert 'peak_memory_usage' in results
assert 'throughput' in results
def test_benchmark_data_storage(self, tmp_path):
"""Test benchmark data storage and retrieval."""
from src.services.performance_profiling import BenchmarkDataStore, BenchmarkData
store = BenchmarkDataStore(storage_path=tmp_path)
# Create sample data
data = BenchmarkData(
operation_name="test_operation",
batch_size=2,
duration_seconds=5.0,
peak_memory_mb=1024.0,
throughput_items_per_second=1.0,
timestamp=datetime.now(timezone.utc)
)
# Store data
store.store_benchmark_data(data)
# Retrieve data
retrieved_data = store.get_benchmark_data("test_operation")
assert len(retrieved_data) == 1
assert retrieved_data[0].operation_name == "test_operation"
assert retrieved_data[0].duration_seconds == 5.0
def test_performance_metrics_aggregation(self):
"""Test performance metrics aggregation."""
from src.services.performance_profiling import MetricsAggregator
aggregator = MetricsAggregator()
# Create sample metrics
metrics = [
PerformanceMetrics(
operation="transcription",
duration_seconds=10.0,
memory_peak_mb=1024.0,
cpu_peak_percent=50.0,
throughput_items_per_second=2.0,
error_count=0,
success_count=10,
total_count=10
),
PerformanceMetrics(
operation="transcription",
duration_seconds=12.0,
memory_peak_mb=1536.0,
cpu_peak_percent=60.0,
throughput_items_per_second=1.8,
error_count=1,
success_count=9,
total_count=10
)
]
# Aggregate metrics
aggregated = aggregator.aggregate_metrics(metrics)
assert aggregated['avg_duration_seconds'] == 11.0
assert aggregated['avg_memory_peak_mb'] == 1280.0
assert aggregated['avg_cpu_peak_percent'] == 55.0
assert aggregated['total_operations'] == 2
assert aggregated['success_rate'] == 95.0 # 19/20 * 100
def test_performance_threshold_monitoring(self):
"""Test performance threshold monitoring."""
from src.services.performance_profiling import PerformanceThresholdMonitor
monitor = PerformanceThresholdMonitor(
max_duration_seconds=30.0,
max_memory_mb=2048.0,
max_cpu_percent=90.0
)
# Test within thresholds
metrics = PerformanceMetrics(
operation="test",
duration_seconds=15.0,
memory_peak_mb=1024.0,
cpu_peak_percent=50.0,
throughput_items_per_second=1.0,
error_count=0,
success_count=10,
total_count=10
)
violations = monitor.check_thresholds(metrics)
assert len(violations) == 0
# Test threshold violations
metrics.duration_seconds = 35.0
metrics.memory_peak_mb = 3072.0
metrics.cpu_peak_percent = 95.0
violations = monitor.check_thresholds(metrics)
assert len(violations) == 3
assert any('Duration exceeded' in v for v in violations)
assert any('Memory exceeded' in v for v in violations)
assert any('CPU exceeded' in v for v in violations)
class TestPerformanceProfilingIntegration:
"""Integration tests for performance profiling."""
@pytest.mark.asyncio
async def test_full_profiling_workflow(self, tmp_path):
"""Test the complete profiling workflow."""
from src.services.performance_profiling import (
PerformanceBenchmark,
BenchmarkDataStore,
MetricsAggregator,
PerformanceThresholdMonitor,
BenchmarkData
)
# Create mock managers
mock_model_manager = MagicMock()
mock_model_manager.transcribe_batch = AsyncMock(return_value=["transcript"])
mock_diarization_manager = MagicMock()
mock_diarization_manager.process_audio = AsyncMock(return_value={"speakers": 2})
mock_domain_adapter = MagicMock()
mock_domain_adapter.adapt_transcript = AsyncMock(return_value="adapted")
# Create profiling components
benchmark = PerformanceBenchmark(
model_manager=mock_model_manager,
diarization_manager=mock_diarization_manager,
domain_adapter=mock_domain_adapter
)
store = BenchmarkDataStore(storage_path=tmp_path)
aggregator = MetricsAggregator()
monitor = PerformanceThresholdMonitor()
# Run benchmark
audio_files = ["tests/fixtures/sample_5s.wav"]
with patch('torch.cuda.reset_peak_memory_stats'), \
patch('torch.cuda.empty_cache'), \
patch('torch.cuda.max_memory_allocated') as mock_max_memory, \
patch('psutil.Process') as mock_process:
mock_max_memory.return_value = 1024 * 1024 * 512 # 512MB
mock_process_instance = MagicMock()
mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 50 # 50MB
mock_process.return_value = mock_process_instance
# Run transcription benchmark
results = await benchmark.benchmark_transcription(
audio_files=audio_files,
batch_sizes=[1],
device='cuda'
)
# Store results
for _, row in results.iterrows():
data = BenchmarkData(
operation_name="transcription",
batch_size=row['batch_size'],
duration_seconds=row['total_time'],
peak_memory_mb=row['peak_memory_gb'] * 1024,
throughput_items_per_second=row['throughput'],
timestamp=datetime.now(timezone.utc)
)
store.store_benchmark_data(data)
# Retrieve and aggregate
stored_data = store.get_benchmark_data("transcription")
# Convert BenchmarkData to PerformanceMetrics for aggregation
metrics_data = []
for data in stored_data:
metrics = PerformanceMetrics(
operation="transcription",
duration_seconds=data.duration_seconds,
memory_peak_mb=data.peak_memory_mb,
cpu_peak_percent=50.0, # Default value for test
throughput_items_per_second=data.throughput_items_per_second,
error_count=0, # Default value for test
success_count=1, # Default value for test
total_count=1 # Default value for test
)
metrics_data.append(metrics)
aggregated = aggregator.aggregate_metrics(metrics_data)
# Check thresholds
metrics = PerformanceMetrics(
operation="transcription",
duration_seconds=aggregated['avg_duration_seconds'],
memory_peak_mb=aggregated['avg_memory_peak_mb'],
cpu_peak_percent=aggregated['avg_cpu_peak_percent'],
throughput_items_per_second=aggregated['avg_throughput_items_per_second'],
error_count=aggregated['total_errors'],
success_count=aggregated['total_successes'],
total_count=aggregated['total_operations']
)
violations = monitor.check_thresholds(metrics)
# Assertions
assert len(results) > 0
assert len(stored_data) > 0
assert 'avg_duration_seconds' in aggregated
assert isinstance(violations, list)