#!/usr/bin/env python3 """Example: Batch processing with circuit breaker pattern. This example demonstrates: - Using BatchProcessor for parallel file processing - Circuit breaker pattern for fault tolerance - Progress tracking and statistics - Error recovery strategies """ import asyncio import logging from pathlib import Path from typing import Dict, Any, List import random # Add parent directory to path for imports import sys sys.path.insert(0, str(Path(__file__).parent.parent)) from src.base.processors import BatchProcessor, AudioProcessor, CircuitBreaker, CircuitBreakerState # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) class SimulatedAudioProcessor(AudioProcessor): """Simulated audio processor for demonstration.""" def __init__(self, failure_rate: float = 0.1, **kwargs): """Initialize with configurable failure rate for testing.""" super().__init__(**kwargs) self.failure_rate = failure_rate self.processed_count = 0 async def _process_audio(self, input_path: Path) -> Path: """Simulate audio processing with occasional failures.""" self.processed_count += 1 # Simulate processing time await asyncio.sleep(random.uniform(0.5, 2.0)) # Simulate failures for circuit breaker demonstration if random.random() < self.failure_rate: if self.processed_count < 5: # Early failures to trigger circuit breaker raise Exception(f"Processing failed for {input_path.name}") # Return processed file path output_path = input_path.parent / f"{input_path.stem}_processed.wav" output_path.touch() # Create dummy file logger.info(f"āœ“ Processed {input_path.name} -> {output_path.name}") return output_path async def process_with_circuit_breaker(files: List[Path]) -> Dict[str, Any]: """Process files with circuit breaker protection. Demonstrates how circuit breaker prevents cascading failures and provides graceful degradation. """ # Configure circuit breaker breaker = CircuitBreaker( failure_threshold=3, # Open after 3 failures recovery_timeout=10, # Try again after 10 seconds expected_exception=Exception, ) # Initialize processor with higher failure rate processor = SimulatedAudioProcessor(failure_rate=0.3) results = { "successful": [], "failed": [], "circuit_breaker_rejections": [], } for file_path in files: try: # Check circuit breaker state if breaker.state == CircuitBreakerState.OPEN: logger.warning(f"⚔ Circuit breaker OPEN - skipping {file_path.name}") results["circuit_breaker_rejections"].append(str(file_path)) continue # Process with circuit breaker protection async with breaker: result = await processor.process(file_path) results["successful"].append({ "input": str(file_path), "output": str(result) }) except Exception as e: logger.error(f"āœ— Failed to process {file_path.name}: {e}") results["failed"].append({ "file": str(file_path), "error": str(e), "breaker_state": breaker.state.value }) # Add circuit breaker statistics results["circuit_breaker_stats"] = { "final_state": breaker.state.value, "failure_count": breaker.failure_count, "success_count": breaker.success_count, "last_failure_time": breaker.last_failure_time.isoformat() if breaker.last_failure_time else None, } return results async def main(): """Run batch processing examples.""" # Create test files test_dir = Path("test_media") test_dir.mkdir(exist_ok=True) test_files = [] for i in range(20): file_path = test_dir / f"audio_{i:03d}.mp3" file_path.touch() test_files.append(file_path) try: # Example 1: Basic batch processing with parallel execution print("\n" + "="*60) print("Example 1: Parallel Batch Processing") print("="*60) # Initialize processors audio_processor = SimulatedAudioProcessor(failure_rate=0.1) batch_processor = BatchProcessor(config={ "max_parallel": 4, "batch_size": 5, "max_retries": 2, }) # Process batch print(f"Processing {len(test_files[:10])} files with max 4 parallel...") results = await batch_processor.process_batch(test_files[:10], audio_processor) print(f"\nBatch Processing Results:") print(f" Total: {results['total']}") print(f" Successful: {results['successful']}") print(f" Failed: {results['failed']}") print(f" Time: {results['elapsed_seconds']:.2f}s") print(f" Speed: {results['files_per_second']:.2f} files/sec") if results['errors']: print(f"\nErrors encountered:") for error in results['errors'][:3]: # Show first 3 errors print(f" - {Path(error['file']).name}: {error['error']}") # Example 2: Circuit breaker pattern for fault tolerance print("\n" + "="*60) print("Example 2: Circuit Breaker Pattern") print("="*60) print("Processing with circuit breaker (simulating failures)...") breaker_results = await process_with_circuit_breaker(test_files[10:20]) print(f"\nCircuit Breaker Results:") print(f" Successful: {len(breaker_results['successful'])}") print(f" Failed: {len(breaker_results['failed'])}") print(f" Rejected by breaker: {len(breaker_results['circuit_breaker_rejections'])}") print(f" Final breaker state: {breaker_results['circuit_breaker_stats']['final_state']}") # Example 3: Progress tracking and statistics print("\n" + "="*60) print("Example 3: Progress Tracking and Statistics") print("="*60) # Process with progress updates batch_processor_with_progress = BatchProcessor(config={ "max_parallel": 2, "batch_size": 3, }) print("Processing with progress tracking...") # Simulate progress updates async def process_with_progress(): task = asyncio.create_task( batch_processor_with_progress.process_batch(test_files[:6], audio_processor) ) # Check progress periodically while not task.done(): await asyncio.sleep(0.5) stats = batch_processor_with_progress.get_stats() if stats.get("current_batch"): progress = stats["current_batch"]["progress"] success_rate = stats["current_batch"]["success_rate"] print(f" Progress: {progress} | Success rate: {success_rate:.1%}", end="\r") return await task final_results = await process_with_progress() # Get final statistics final_stats = batch_processor_with_progress.get_stats() print(f"\n\nFinal Statistics:") print(f" Total processed: {final_stats['total_processed']}") print(f" Total successful: {final_stats['total_successful']}") print(f" Total failed: {final_stats['total_failed']}") print(f" Overall success rate: {final_stats['overall_success_rate']:.1%}") # Example 4: Error recovery strategies print("\n" + "="*60) print("Example 4: Error Recovery Strategies") print("="*60) # Identify failed files from previous batch failed_files = [ Path(error['file']) for error in results.get('errors', []) if Path(error['file']).exists() ] if failed_files: print(f"Retrying {len(failed_files)} failed files with reduced parallelism...") # Retry with more conservative settings recovery_processor = BatchProcessor(config={ "max_parallel": 1, # Process sequentially "batch_size": 2, "max_retries": 5, # More retry attempts }) # Use processor with lower failure rate reliable_processor = SimulatedAudioProcessor(failure_rate=0.05) recovery_results = await recovery_processor.process_batch( failed_files, reliable_processor ) print(f"Recovery Results:") print(f" Recovered: {recovery_results['successful']}/{len(failed_files)}") print(f" Still failing: {recovery_results['failed']}") else: print("No failed files to retry") finally: # Cleanup test files for file_path in test_files: if file_path.exists(): file_path.unlink() # Also remove processed files processed = file_path.parent / f"{file_path.stem}_processed.wav" if processed.exists(): processed.unlink() if test_dir.exists(): test_dir.rmdir() print("\nāœ“ Cleanup complete") if __name__ == "__main__": print("Trax Batch Processing Example") print("Using AI Assistant Library for resilient batch operations") print("-" * 60) # Run the async main function asyncio.run(main())