274 lines
9.7 KiB
Python
274 lines
9.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Example: Batch processing with circuit breaker pattern.
|
|
|
|
This example demonstrates:
|
|
- Using BatchProcessor for parallel file processing
|
|
- Circuit breaker pattern for fault tolerance
|
|
- Progress tracking and statistics
|
|
- Error recovery strategies
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Dict, Any, List
|
|
import random
|
|
|
|
# Add parent directory to path for imports
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from src.base.processors import BatchProcessor, AudioProcessor, CircuitBreaker, CircuitBreakerState
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SimulatedAudioProcessor(AudioProcessor):
|
|
"""Simulated audio processor for demonstration."""
|
|
|
|
def __init__(self, failure_rate: float = 0.1, **kwargs):
|
|
"""Initialize with configurable failure rate for testing."""
|
|
super().__init__(**kwargs)
|
|
self.failure_rate = failure_rate
|
|
self.processed_count = 0
|
|
|
|
async def _process_audio(self, input_path: Path) -> Path:
|
|
"""Simulate audio processing with occasional failures."""
|
|
self.processed_count += 1
|
|
|
|
# Simulate processing time
|
|
await asyncio.sleep(random.uniform(0.5, 2.0))
|
|
|
|
# Simulate failures for circuit breaker demonstration
|
|
if random.random() < self.failure_rate:
|
|
if self.processed_count < 5:
|
|
# Early failures to trigger circuit breaker
|
|
raise Exception(f"Processing failed for {input_path.name}")
|
|
|
|
# Return processed file path
|
|
output_path = input_path.parent / f"{input_path.stem}_processed.wav"
|
|
output_path.touch() # Create dummy file
|
|
|
|
logger.info(f"✓ Processed {input_path.name} -> {output_path.name}")
|
|
return output_path
|
|
|
|
|
|
async def process_with_circuit_breaker(files: List[Path]) -> Dict[str, Any]:
|
|
"""Process files with circuit breaker protection.
|
|
|
|
Demonstrates how circuit breaker prevents cascading failures
|
|
and provides graceful degradation.
|
|
"""
|
|
# Configure circuit breaker
|
|
breaker = CircuitBreaker(
|
|
failure_threshold=3, # Open after 3 failures
|
|
recovery_timeout=10, # Try again after 10 seconds
|
|
expected_exception=Exception,
|
|
)
|
|
|
|
# Initialize processor with higher failure rate
|
|
processor = SimulatedAudioProcessor(failure_rate=0.3)
|
|
|
|
results = {
|
|
"successful": [],
|
|
"failed": [],
|
|
"circuit_breaker_rejections": [],
|
|
}
|
|
|
|
for file_path in files:
|
|
try:
|
|
# Check circuit breaker state
|
|
if breaker.state == CircuitBreakerState.OPEN:
|
|
logger.warning(f"⚡ Circuit breaker OPEN - skipping {file_path.name}")
|
|
results["circuit_breaker_rejections"].append(str(file_path))
|
|
continue
|
|
|
|
# Process with circuit breaker protection
|
|
async with breaker:
|
|
result = await processor.process(file_path)
|
|
results["successful"].append({
|
|
"input": str(file_path),
|
|
"output": str(result)
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"✗ Failed to process {file_path.name}: {e}")
|
|
results["failed"].append({
|
|
"file": str(file_path),
|
|
"error": str(e),
|
|
"breaker_state": breaker.state.value
|
|
})
|
|
|
|
# Add circuit breaker statistics
|
|
results["circuit_breaker_stats"] = {
|
|
"final_state": breaker.state.value,
|
|
"failure_count": breaker.failure_count,
|
|
"success_count": breaker.success_count,
|
|
"last_failure_time": breaker.last_failure_time.isoformat() if breaker.last_failure_time else None,
|
|
}
|
|
|
|
return results
|
|
|
|
|
|
async def main():
|
|
"""Run batch processing examples."""
|
|
|
|
# Create test files
|
|
test_dir = Path("test_media")
|
|
test_dir.mkdir(exist_ok=True)
|
|
|
|
test_files = []
|
|
for i in range(20):
|
|
file_path = test_dir / f"audio_{i:03d}.mp3"
|
|
file_path.touch()
|
|
test_files.append(file_path)
|
|
|
|
try:
|
|
# Example 1: Basic batch processing with parallel execution
|
|
print("\n" + "="*60)
|
|
print("Example 1: Parallel Batch Processing")
|
|
print("="*60)
|
|
|
|
# Initialize processors
|
|
audio_processor = SimulatedAudioProcessor(failure_rate=0.1)
|
|
batch_processor = BatchProcessor(config={
|
|
"max_parallel": 4,
|
|
"batch_size": 5,
|
|
"max_retries": 2,
|
|
})
|
|
|
|
# Process batch
|
|
print(f"Processing {len(test_files[:10])} files with max 4 parallel...")
|
|
results = await batch_processor.process_batch(test_files[:10], audio_processor)
|
|
|
|
print(f"\nBatch Processing Results:")
|
|
print(f" Total: {results['total']}")
|
|
print(f" Successful: {results['successful']}")
|
|
print(f" Failed: {results['failed']}")
|
|
print(f" Time: {results['elapsed_seconds']:.2f}s")
|
|
print(f" Speed: {results['files_per_second']:.2f} files/sec")
|
|
|
|
if results['errors']:
|
|
print(f"\nErrors encountered:")
|
|
for error in results['errors'][:3]: # Show first 3 errors
|
|
print(f" - {Path(error['file']).name}: {error['error']}")
|
|
|
|
# Example 2: Circuit breaker pattern for fault tolerance
|
|
print("\n" + "="*60)
|
|
print("Example 2: Circuit Breaker Pattern")
|
|
print("="*60)
|
|
|
|
print("Processing with circuit breaker (simulating failures)...")
|
|
breaker_results = await process_with_circuit_breaker(test_files[10:20])
|
|
|
|
print(f"\nCircuit Breaker Results:")
|
|
print(f" Successful: {len(breaker_results['successful'])}")
|
|
print(f" Failed: {len(breaker_results['failed'])}")
|
|
print(f" Rejected by breaker: {len(breaker_results['circuit_breaker_rejections'])}")
|
|
print(f" Final breaker state: {breaker_results['circuit_breaker_stats']['final_state']}")
|
|
|
|
# Example 3: Progress tracking and statistics
|
|
print("\n" + "="*60)
|
|
print("Example 3: Progress Tracking and Statistics")
|
|
print("="*60)
|
|
|
|
# Process with progress updates
|
|
batch_processor_with_progress = BatchProcessor(config={
|
|
"max_parallel": 2,
|
|
"batch_size": 3,
|
|
})
|
|
|
|
print("Processing with progress tracking...")
|
|
|
|
# Simulate progress updates
|
|
async def process_with_progress():
|
|
task = asyncio.create_task(
|
|
batch_processor_with_progress.process_batch(test_files[:6], audio_processor)
|
|
)
|
|
|
|
# Check progress periodically
|
|
while not task.done():
|
|
await asyncio.sleep(0.5)
|
|
stats = batch_processor_with_progress.get_stats()
|
|
if stats.get("current_batch"):
|
|
progress = stats["current_batch"]["progress"]
|
|
success_rate = stats["current_batch"]["success_rate"]
|
|
print(f" Progress: {progress} | Success rate: {success_rate:.1%}", end="\r")
|
|
|
|
return await task
|
|
|
|
final_results = await process_with_progress()
|
|
|
|
# Get final statistics
|
|
final_stats = batch_processor_with_progress.get_stats()
|
|
print(f"\n\nFinal Statistics:")
|
|
print(f" Total processed: {final_stats['total_processed']}")
|
|
print(f" Total successful: {final_stats['total_successful']}")
|
|
print(f" Total failed: {final_stats['total_failed']}")
|
|
print(f" Overall success rate: {final_stats['overall_success_rate']:.1%}")
|
|
|
|
# Example 4: Error recovery strategies
|
|
print("\n" + "="*60)
|
|
print("Example 4: Error Recovery Strategies")
|
|
print("="*60)
|
|
|
|
# Identify failed files from previous batch
|
|
failed_files = [
|
|
Path(error['file'])
|
|
for error in results.get('errors', [])
|
|
if Path(error['file']).exists()
|
|
]
|
|
|
|
if failed_files:
|
|
print(f"Retrying {len(failed_files)} failed files with reduced parallelism...")
|
|
|
|
# Retry with more conservative settings
|
|
recovery_processor = BatchProcessor(config={
|
|
"max_parallel": 1, # Process sequentially
|
|
"batch_size": 2,
|
|
"max_retries": 5, # More retry attempts
|
|
})
|
|
|
|
# Use processor with lower failure rate
|
|
reliable_processor = SimulatedAudioProcessor(failure_rate=0.05)
|
|
|
|
recovery_results = await recovery_processor.process_batch(
|
|
failed_files,
|
|
reliable_processor
|
|
)
|
|
|
|
print(f"Recovery Results:")
|
|
print(f" Recovered: {recovery_results['successful']}/{len(failed_files)}")
|
|
print(f" Still failing: {recovery_results['failed']}")
|
|
else:
|
|
print("No failed files to retry")
|
|
|
|
finally:
|
|
# Cleanup test files
|
|
for file_path in test_files:
|
|
if file_path.exists():
|
|
file_path.unlink()
|
|
|
|
# Also remove processed files
|
|
processed = file_path.parent / f"{file_path.stem}_processed.wav"
|
|
if processed.exists():
|
|
processed.unlink()
|
|
|
|
if test_dir.exists():
|
|
test_dir.rmdir()
|
|
|
|
print("\n✓ Cleanup complete")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("Trax Batch Processing Example")
|
|
print("Using AI Assistant Library for resilient batch operations")
|
|
print("-" * 60)
|
|
|
|
# Run the async main function
|
|
asyncio.run(main()) |