475 lines
18 KiB
Python
475 lines
18 KiB
Python
"""
|
|
Unit tests for performance profiling infrastructure.
|
|
|
|
Tests the comprehensive performance profiling system that measures
|
|
memory usage, processing speed, and resource utilization across
|
|
all pipeline components.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import pytest
|
|
import time
|
|
from unittest.mock import MagicMock, patch, AsyncMock
|
|
from pathlib import Path
|
|
from typing import Dict, List, Any
|
|
from datetime import datetime, timezone
|
|
|
|
from src.services.performance import (
|
|
ResourceMonitor,
|
|
SystemResources,
|
|
PerformanceMetrics,
|
|
M3OptimizationConfig
|
|
)
|
|
from src.services.performance_benchmarker import PerformanceBenchmarker
|
|
|
|
|
|
class TestPerformanceProfilingInfrastructure:
|
|
"""Test the comprehensive performance profiling infrastructure."""
|
|
|
|
@pytest.fixture
|
|
def mock_model_manager(self):
|
|
"""Create a mock model manager for testing."""
|
|
manager = MagicMock()
|
|
manager.transcribe_batch = AsyncMock()
|
|
manager.transcribe_batch.return_value = ["transcript1", "transcript2"]
|
|
return manager
|
|
|
|
@pytest.fixture
|
|
def mock_diarization_manager(self):
|
|
"""Create a mock diarization manager for testing."""
|
|
manager = MagicMock()
|
|
manager.process_audio = AsyncMock()
|
|
manager.process_audio.return_value = {"speakers": 2, "segments": 10}
|
|
return manager
|
|
|
|
@pytest.fixture
|
|
def mock_domain_adapter(self):
|
|
"""Create a mock domain adapter for testing."""
|
|
adapter = MagicMock()
|
|
adapter.adapt_transcript = AsyncMock()
|
|
adapter.adapt_transcript.return_value = "adapted_transcript"
|
|
return adapter
|
|
|
|
@pytest.fixture
|
|
def sample_audio_files(self):
|
|
"""Create sample audio file paths for testing."""
|
|
return [
|
|
"tests/fixtures/sample_5s.wav",
|
|
"tests/fixtures/sample_30s.mp3",
|
|
"tests/fixtures/sample_2m.mp4"
|
|
]
|
|
|
|
def test_performance_benchmark_initialization(self, mock_model_manager,
|
|
mock_diarization_manager,
|
|
mock_domain_adapter):
|
|
"""Test performance benchmark initialization."""
|
|
from src.services.performance_profiling import PerformanceBenchmark
|
|
|
|
benchmark = PerformanceBenchmark(
|
|
model_manager=mock_model_manager,
|
|
diarization_manager=mock_diarization_manager,
|
|
domain_adapter=mock_domain_adapter
|
|
)
|
|
|
|
assert benchmark.model_manager == mock_model_manager
|
|
assert benchmark.diarization_manager == mock_diarization_manager
|
|
assert benchmark.domain_adapter == mock_domain_adapter
|
|
assert benchmark.results == {}
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_benchmark_transcription_basic(self, mock_model_manager,
|
|
mock_diarization_manager,
|
|
mock_domain_adapter,
|
|
sample_audio_files):
|
|
"""Test basic transcription benchmarking."""
|
|
from src.services.performance_profiling import PerformanceBenchmark
|
|
|
|
benchmark = PerformanceBenchmark(
|
|
model_manager=mock_model_manager,
|
|
diarization_manager=mock_diarization_manager,
|
|
domain_adapter=mock_domain_adapter
|
|
)
|
|
|
|
# Mock CUDA functions
|
|
with patch('torch.cuda.reset_peak_memory_stats'), \
|
|
patch('torch.cuda.empty_cache'), \
|
|
patch('torch.cuda.max_memory_allocated') as mock_max_memory:
|
|
|
|
mock_max_memory.return_value = 1024 * 1024 * 1024 # 1GB
|
|
|
|
results = await benchmark.benchmark_transcription(
|
|
audio_files=sample_audio_files,
|
|
batch_sizes=[1, 2],
|
|
device='cuda'
|
|
)
|
|
|
|
assert len(results) == 2 # Two batch sizes
|
|
assert 'batch_size' in results.columns
|
|
assert 'total_time' in results.columns
|
|
assert 'throughput' in results.columns
|
|
assert 'peak_memory_gb' in results.columns
|
|
|
|
# Verify model manager was called
|
|
assert mock_model_manager.transcribe_batch.called
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_benchmark_diarization(self, mock_model_manager,
|
|
mock_diarization_manager,
|
|
mock_domain_adapter,
|
|
sample_audio_files):
|
|
"""Test diarization benchmarking."""
|
|
from src.services.performance_profiling import PerformanceBenchmark
|
|
|
|
benchmark = PerformanceBenchmark(
|
|
model_manager=mock_model_manager,
|
|
diarization_manager=mock_diarization_manager,
|
|
domain_adapter=mock_domain_adapter
|
|
)
|
|
|
|
# Mock psutil
|
|
with patch('psutil.Process') as mock_process:
|
|
mock_process_instance = MagicMock()
|
|
mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 100 # 100MB
|
|
mock_process.return_value = mock_process_instance
|
|
|
|
results = await benchmark.benchmark_diarization(sample_audio_files)
|
|
|
|
assert 'total_time' in results
|
|
assert 'per_file_avg' in results
|
|
assert 'peak_memory_mb' in results
|
|
assert results['total_time'] > 0
|
|
assert results['per_file_avg'] > 0
|
|
|
|
# Verify diarization manager was called
|
|
assert mock_diarization_manager.process_audio.called
|
|
|
|
def test_memory_tracking_accuracy(self):
|
|
"""Test memory tracking accuracy."""
|
|
from src.services.performance_profiling import MemoryTracker
|
|
|
|
tracker = MemoryTracker()
|
|
|
|
# Test memory tracking
|
|
with patch('psutil.Process') as mock_process:
|
|
mock_process_instance = MagicMock()
|
|
mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 50 # 50MB
|
|
mock_process.return_value = mock_process_instance
|
|
|
|
# Mock the process instance
|
|
tracker.process = mock_process_instance
|
|
|
|
memory_usage = tracker.get_current_memory_mb()
|
|
assert memory_usage == 50.0
|
|
|
|
def test_timing_decorator(self):
|
|
"""Test timing decorator functionality."""
|
|
from src.services.performance_profiling import timing_decorator
|
|
|
|
@timing_decorator
|
|
def test_function():
|
|
time.sleep(0.1) # Simulate work
|
|
return "result"
|
|
|
|
result = test_function()
|
|
assert result == "result"
|
|
|
|
# Check that timing was recorded
|
|
# This would require access to the timing storage mechanism
|
|
# Implementation will depend on how timing data is stored
|
|
|
|
def test_benchmark_data_serialization(self):
|
|
"""Test benchmark data serialization and deserialization."""
|
|
from src.services.performance_profiling import BenchmarkData
|
|
|
|
# Create sample benchmark data
|
|
data = BenchmarkData(
|
|
operation_name="test_transcription",
|
|
batch_size=4,
|
|
duration_seconds=10.5,
|
|
peak_memory_mb=2048.0,
|
|
throughput_items_per_second=2.5,
|
|
timestamp=datetime.now(timezone.utc)
|
|
)
|
|
|
|
# Test serialization
|
|
serialized = data.to_dict()
|
|
assert 'operation_name' in serialized
|
|
assert 'batch_size' in serialized
|
|
assert 'duration_seconds' in serialized
|
|
assert serialized['duration_seconds'] == 10.5
|
|
|
|
# Test deserialization
|
|
deserialized = BenchmarkData.from_dict(serialized)
|
|
assert deserialized.operation_name == "test_transcription"
|
|
assert deserialized.batch_size == 4
|
|
assert deserialized.duration_seconds == 10.5
|
|
|
|
def test_system_information_collection(self):
|
|
"""Test system information collection."""
|
|
from src.services.performance_profiling import SystemInfoCollector
|
|
|
|
collector = SystemInfoCollector()
|
|
|
|
with patch('platform.processor') as mock_processor, \
|
|
patch('platform.machine') as mock_machine, \
|
|
patch('psutil.cpu_count') as mock_cpu_count, \
|
|
patch('psutil.virtual_memory') as mock_vm:
|
|
|
|
mock_processor.return_value = "Apple M3"
|
|
mock_machine.return_value = "arm64"
|
|
mock_cpu_count.return_value = 8
|
|
mock_vm.return_value.total = 16 * 1024 * 1024 * 1024 # 16GB
|
|
|
|
system_info = collector.collect_system_info()
|
|
|
|
assert 'cpu_model' in system_info
|
|
assert 'architecture' in system_info
|
|
assert 'cpu_cores' in system_info
|
|
assert 'total_memory_gb' in system_info
|
|
assert system_info['cpu_model'] == "Apple M3"
|
|
assert system_info['cpu_cores'] == 8
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_end_to_end_benchmarking(self, mock_model_manager,
|
|
mock_diarization_manager,
|
|
mock_domain_adapter,
|
|
sample_audio_files):
|
|
"""Test end-to-end benchmarking."""
|
|
from src.services.performance_profiling import PerformanceBenchmark
|
|
|
|
benchmark = PerformanceBenchmark(
|
|
model_manager=mock_model_manager,
|
|
diarization_manager=mock_diarization_manager,
|
|
domain_adapter=mock_domain_adapter
|
|
)
|
|
|
|
# Mock all necessary components
|
|
with patch('torch.cuda.reset_peak_memory_stats'), \
|
|
patch('torch.cuda.empty_cache'), \
|
|
patch('torch.cuda.max_memory_allocated') as mock_max_memory, \
|
|
patch('psutil.Process') as mock_process:
|
|
|
|
mock_max_memory.return_value = 1024 * 1024 * 1024 # 1GB
|
|
mock_process_instance = MagicMock()
|
|
mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 100 # 100MB
|
|
mock_process.return_value = mock_process_instance
|
|
|
|
# Run end-to-end benchmark
|
|
results = await benchmark.benchmark_end_to_end(sample_audio_files)
|
|
|
|
assert 'total_processing_time' in results
|
|
assert 'transcription_time' in results
|
|
assert 'diarization_time' in results
|
|
assert 'adaptation_time' in results
|
|
assert 'peak_memory_usage' in results
|
|
assert 'throughput' in results
|
|
|
|
def test_benchmark_data_storage(self, tmp_path):
|
|
"""Test benchmark data storage and retrieval."""
|
|
from src.services.performance_profiling import BenchmarkDataStore, BenchmarkData
|
|
|
|
store = BenchmarkDataStore(storage_path=tmp_path)
|
|
|
|
# Create sample data
|
|
data = BenchmarkData(
|
|
operation_name="test_operation",
|
|
batch_size=2,
|
|
duration_seconds=5.0,
|
|
peak_memory_mb=1024.0,
|
|
throughput_items_per_second=1.0,
|
|
timestamp=datetime.now(timezone.utc)
|
|
)
|
|
|
|
# Store data
|
|
store.store_benchmark_data(data)
|
|
|
|
# Retrieve data
|
|
retrieved_data = store.get_benchmark_data("test_operation")
|
|
assert len(retrieved_data) == 1
|
|
assert retrieved_data[0].operation_name == "test_operation"
|
|
assert retrieved_data[0].duration_seconds == 5.0
|
|
|
|
def test_performance_metrics_aggregation(self):
|
|
"""Test performance metrics aggregation."""
|
|
from src.services.performance_profiling import MetricsAggregator
|
|
|
|
aggregator = MetricsAggregator()
|
|
|
|
# Create sample metrics
|
|
metrics = [
|
|
PerformanceMetrics(
|
|
operation="transcription",
|
|
duration_seconds=10.0,
|
|
memory_peak_mb=1024.0,
|
|
cpu_peak_percent=50.0,
|
|
throughput_items_per_second=2.0,
|
|
error_count=0,
|
|
success_count=10,
|
|
total_count=10
|
|
),
|
|
PerformanceMetrics(
|
|
operation="transcription",
|
|
duration_seconds=12.0,
|
|
memory_peak_mb=1536.0,
|
|
cpu_peak_percent=60.0,
|
|
throughput_items_per_second=1.8,
|
|
error_count=1,
|
|
success_count=9,
|
|
total_count=10
|
|
)
|
|
]
|
|
|
|
# Aggregate metrics
|
|
aggregated = aggregator.aggregate_metrics(metrics)
|
|
|
|
assert aggregated['avg_duration_seconds'] == 11.0
|
|
assert aggregated['avg_memory_peak_mb'] == 1280.0
|
|
assert aggregated['avg_cpu_peak_percent'] == 55.0
|
|
assert aggregated['total_operations'] == 2
|
|
assert aggregated['success_rate'] == 95.0 # 19/20 * 100
|
|
|
|
def test_performance_threshold_monitoring(self):
|
|
"""Test performance threshold monitoring."""
|
|
from src.services.performance_profiling import PerformanceThresholdMonitor
|
|
|
|
monitor = PerformanceThresholdMonitor(
|
|
max_duration_seconds=30.0,
|
|
max_memory_mb=2048.0,
|
|
max_cpu_percent=90.0
|
|
)
|
|
|
|
# Test within thresholds
|
|
metrics = PerformanceMetrics(
|
|
operation="test",
|
|
duration_seconds=15.0,
|
|
memory_peak_mb=1024.0,
|
|
cpu_peak_percent=50.0,
|
|
throughput_items_per_second=1.0,
|
|
error_count=0,
|
|
success_count=10,
|
|
total_count=10
|
|
)
|
|
|
|
violations = monitor.check_thresholds(metrics)
|
|
assert len(violations) == 0
|
|
|
|
# Test threshold violations
|
|
metrics.duration_seconds = 35.0
|
|
metrics.memory_peak_mb = 3072.0
|
|
metrics.cpu_peak_percent = 95.0
|
|
|
|
violations = monitor.check_thresholds(metrics)
|
|
assert len(violations) == 3
|
|
assert any('Duration exceeded' in v for v in violations)
|
|
assert any('Memory exceeded' in v for v in violations)
|
|
assert any('CPU exceeded' in v for v in violations)
|
|
|
|
|
|
class TestPerformanceProfilingIntegration:
|
|
"""Integration tests for performance profiling."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_full_profiling_workflow(self, tmp_path):
|
|
"""Test the complete profiling workflow."""
|
|
from src.services.performance_profiling import (
|
|
PerformanceBenchmark,
|
|
BenchmarkDataStore,
|
|
MetricsAggregator,
|
|
PerformanceThresholdMonitor,
|
|
BenchmarkData
|
|
)
|
|
|
|
# Create mock managers
|
|
mock_model_manager = MagicMock()
|
|
mock_model_manager.transcribe_batch = AsyncMock(return_value=["transcript"])
|
|
|
|
mock_diarization_manager = MagicMock()
|
|
mock_diarization_manager.process_audio = AsyncMock(return_value={"speakers": 2})
|
|
|
|
mock_domain_adapter = MagicMock()
|
|
mock_domain_adapter.adapt_transcript = AsyncMock(return_value="adapted")
|
|
|
|
# Create profiling components
|
|
benchmark = PerformanceBenchmark(
|
|
model_manager=mock_model_manager,
|
|
diarization_manager=mock_diarization_manager,
|
|
domain_adapter=mock_domain_adapter
|
|
)
|
|
|
|
store = BenchmarkDataStore(storage_path=tmp_path)
|
|
aggregator = MetricsAggregator()
|
|
monitor = PerformanceThresholdMonitor()
|
|
|
|
# Run benchmark
|
|
audio_files = ["tests/fixtures/sample_5s.wav"]
|
|
|
|
with patch('torch.cuda.reset_peak_memory_stats'), \
|
|
patch('torch.cuda.empty_cache'), \
|
|
patch('torch.cuda.max_memory_allocated') as mock_max_memory, \
|
|
patch('psutil.Process') as mock_process:
|
|
|
|
mock_max_memory.return_value = 1024 * 1024 * 512 # 512MB
|
|
mock_process_instance = MagicMock()
|
|
mock_process_instance.memory_info.return_value.rss = 1024 * 1024 * 50 # 50MB
|
|
mock_process.return_value = mock_process_instance
|
|
|
|
# Run transcription benchmark
|
|
results = await benchmark.benchmark_transcription(
|
|
audio_files=audio_files,
|
|
batch_sizes=[1],
|
|
device='cuda'
|
|
)
|
|
|
|
# Store results
|
|
for _, row in results.iterrows():
|
|
data = BenchmarkData(
|
|
operation_name="transcription",
|
|
batch_size=row['batch_size'],
|
|
duration_seconds=row['total_time'],
|
|
peak_memory_mb=row['peak_memory_gb'] * 1024,
|
|
throughput_items_per_second=row['throughput'],
|
|
timestamp=datetime.now(timezone.utc)
|
|
)
|
|
store.store_benchmark_data(data)
|
|
|
|
# Retrieve and aggregate
|
|
stored_data = store.get_benchmark_data("transcription")
|
|
|
|
# Convert BenchmarkData to PerformanceMetrics for aggregation
|
|
metrics_data = []
|
|
for data in stored_data:
|
|
metrics = PerformanceMetrics(
|
|
operation="transcription",
|
|
duration_seconds=data.duration_seconds,
|
|
memory_peak_mb=data.peak_memory_mb,
|
|
cpu_peak_percent=50.0, # Default value for test
|
|
throughput_items_per_second=data.throughput_items_per_second,
|
|
error_count=0, # Default value for test
|
|
success_count=1, # Default value for test
|
|
total_count=1 # Default value for test
|
|
)
|
|
metrics_data.append(metrics)
|
|
|
|
aggregated = aggregator.aggregate_metrics(metrics_data)
|
|
|
|
# Check thresholds
|
|
metrics = PerformanceMetrics(
|
|
operation="transcription",
|
|
duration_seconds=aggregated['avg_duration_seconds'],
|
|
memory_peak_mb=aggregated['avg_memory_peak_mb'],
|
|
cpu_peak_percent=aggregated['avg_cpu_peak_percent'],
|
|
throughput_items_per_second=aggregated['avg_throughput_items_per_second'],
|
|
error_count=aggregated['total_errors'],
|
|
success_count=aggregated['total_successes'],
|
|
total_count=aggregated['total_operations']
|
|
)
|
|
|
|
violations = monitor.check_thresholds(metrics)
|
|
|
|
# Assertions
|
|
assert len(results) > 0
|
|
assert len(stored_data) > 0
|
|
assert 'avg_duration_seconds' in aggregated
|
|
assert isinstance(violations, list)
|