clean-tracks/src/core/audio_processor.py

316 lines
11 KiB
Python

"""
Audio processor pipeline for Clean-Tracks.
Orchestrates the complete audio processing workflow from loading to censorship to saving.
"""
import os
import logging
from typing import Dict, List, Tuple, Optional, Any
from pathlib import Path
from dataclasses import dataclass
from enum import Enum
# from .audio_utils import AudioUtils
from .audio_utils_simple import AudioUtils
logger = logging.getLogger(__name__)
class CensorshipMethod(Enum):
"""Enumeration of available censorship methods."""
SILENCE = "silence"
BEEP = "beep"
WHITE_NOISE = "white_noise"
FADE = "fade"
@dataclass
class ProcessingOptions:
"""Options for audio processing."""
censorship_method: CensorshipMethod = CensorshipMethod.SILENCE
beep_frequency: int = 1000 # Hz
beep_volume: float = -20 # dBFS
noise_volume: float = -30 # dBFS
fade_duration: int = 10 # milliseconds
normalize_output: bool = True
target_dBFS: float = -20.0
preserve_format: bool = True
chunk_duration: float = 1800 # 30 minutes for long files
@dataclass
class ProcessingResult:
"""Result of audio processing."""
success: bool
output_path: Optional[str] = None
duration: Optional[float] = None
segments_censored: int = 0
processing_time: Optional[float] = None
error: Optional[str] = None
warnings: List[str] = None
def __post_init__(self):
if self.warnings is None:
self.warnings = []
class AudioProcessor:
"""Main audio processor that handles the complete censorship pipeline."""
def __init__(self, audio_utils: Optional[AudioUtils] = None):
"""
Initialize the audio processor.
Args:
audio_utils: Optional AudioUtils instance (creates new if None)
"""
self.audio_utils = audio_utils or AudioUtils()
def process_audio(self, input_path: str, output_path: str,
segments: List[Tuple[float, float]],
options: Optional[ProcessingOptions] = None,
progress_callback: Optional[callable] = None) -> ProcessingResult:
"""
Process an audio file with censorship.
Args:
input_path: Path to input audio file
output_path: Path for output audio file
segments: List of (start_time, end_time) tuples to censor
options: Processing options
progress_callback: Optional callback for progress updates
Returns:
ProcessingResult with details of the operation
"""
if options is None:
options = ProcessingOptions()
result = ProcessingResult(success=False)
try:
import time
start_time = time.time()
# Validate input file
if progress_callback:
progress_callback("Validating audio file...", 0)
validation = self.audio_utils.validate_audio_file(input_path)
if not validation["valid"]:
result.error = f"Invalid audio file: {', '.join(validation['errors'])}"
return result
result.warnings.extend(validation.get("warnings", []))
result.duration = validation["duration"]
# Load audio
if progress_callback:
progress_callback("Loading audio file...", 10)
logger.info(f"Loading audio from {input_path}")
audio = self.audio_utils.load_audio(input_path)
# Apply censorship if segments provided
if segments:
if progress_callback:
progress_callback(f"Applying censorship to {len(segments)} segments...", 30)
logger.info(f"Applying {options.censorship_method.value} censorship to {len(segments)} segments")
# Prepare censorship parameters
kwargs = {}
if options.censorship_method == CensorshipMethod.BEEP:
kwargs["frequency"] = options.beep_frequency
elif options.censorship_method == CensorshipMethod.WHITE_NOISE:
kwargs["volume"] = options.noise_volume
elif options.censorship_method == CensorshipMethod.FADE:
kwargs["fade_duration"] = options.fade_duration
# Apply censorship
audio = self.audio_utils.apply_censorship(
audio,
segments,
options.censorship_method.value,
**kwargs
)
result.segments_censored = len(segments)
# Normalize if requested
if options.normalize_output:
if progress_callback:
progress_callback("Normalizing audio...", 70)
logger.info(f"Normalizing audio to {options.target_dBFS} dBFS")
audio = self.audio_utils.normalize_audio(audio, options.target_dBFS)
# Determine output format
output_format = None
if options.preserve_format:
input_ext = Path(input_path).suffix[1:]
output_ext = Path(output_path).suffix[1:]
if input_ext != output_ext:
logger.warning(f"Output extension differs from input, using output extension: {output_ext}")
output_format = output_ext
# Save processed audio
if progress_callback:
progress_callback("Saving processed audio...", 90)
logger.info(f"Saving processed audio to {output_path}")
save_success = self.audio_utils.save_audio(audio, output_path, format=output_format)
if not save_success:
result.error = "Failed to save processed audio"
return result
# Calculate processing time
result.processing_time = time.time() - start_time
# Success!
if progress_callback:
progress_callback("Processing complete!", 100)
result.success = True
result.output_path = output_path
logger.info(f"Successfully processed audio in {result.processing_time:.2f} seconds")
except Exception as e:
logger.error(f"Error processing audio: {e}")
result.error = str(e)
return result
def process_batch(self, file_mappings: List[Tuple[str, str, List[Tuple[float, float]]]],
options: Optional[ProcessingOptions] = None,
progress_callback: Optional[callable] = None) -> List[ProcessingResult]:
"""
Process multiple audio files in batch.
Args:
file_mappings: List of (input_path, output_path, segments) tuples
options: Processing options (applied to all files)
progress_callback: Optional callback for progress updates
Returns:
List of ProcessingResult for each file
"""
results = []
total_files = len(file_mappings)
for i, (input_path, output_path, segments) in enumerate(file_mappings):
if progress_callback:
file_progress = lambda msg, pct: progress_callback(
f"File {i+1}/{total_files}: {msg}",
(i * 100 + pct) / total_files
)
else:
file_progress = None
result = self.process_audio(input_path, output_path, segments, options, file_progress)
results.append(result)
return results
def validate_segments(self, segments: List[Tuple[float, float]],
duration: float) -> Tuple[List[Tuple[float, float]], List[str]]:
"""
Validate and clean censorship segments.
Args:
segments: List of (start_time, end_time) tuples
duration: Total audio duration in seconds
Returns:
Tuple of (cleaned_segments, warnings)
"""
cleaned = []
warnings = []
for start, end in segments:
# Check segment validity
if start >= end:
warnings.append(f"Invalid segment: start ({start}) >= end ({end})")
continue
# Clip to audio duration
if start >= duration:
warnings.append(f"Segment start ({start}) beyond audio duration ({duration})")
continue
if end > duration:
warnings.append(f"Segment end ({end}) clipped to audio duration ({duration})")
end = duration
# Check for overlaps with previous segments
overlap = False
for prev_start, prev_end in cleaned:
if (start >= prev_start and start < prev_end) or \
(end > prev_start and end <= prev_end):
warnings.append(f"Overlapping segments: ({start}, {end}) with ({prev_start}, {prev_end})")
overlap = True
break
if not overlap:
cleaned.append((start, end))
# Sort by start time
cleaned.sort(key=lambda x: x[0])
return cleaned, warnings
def estimate_processing_time(self, file_path: str, num_segments: int) -> float:
"""
Estimate processing time for a file.
Args:
file_path: Path to audio file
num_segments: Number of segments to censor
Returns:
Estimated time in seconds
"""
try:
duration = self.audio_utils.get_duration(file_path)
# Base time: 0.1 seconds per minute of audio
base_time = duration / 60 * 0.1
# Add time for segments: 0.05 seconds per segment
segment_time = num_segments * 0.05
# Add overhead: 2 seconds
overhead = 2.0
return base_time + segment_time + overhead
except Exception as e:
logger.warning(f"Could not estimate processing time: {e}")
return 10.0 # Default estimate
def get_supported_formats(self) -> set:
"""
Get the set of supported audio formats.
Returns:
Set of supported file extensions
"""
return self.audio_utils.SUPPORTED_FORMATS
def check_dependencies(self) -> Dict[str, bool]:
"""
Check if required dependencies are available.
Returns:
Dictionary of dependency status
"""
from pydub.utils import which
return {
"ffmpeg": which("ffmpeg") is not None,
"pydub": True, # If we got here, pydub is installed
"librosa": True, # If we got here, librosa is installed
"numpy": True, # If we got here, numpy is installed
}