316 lines
11 KiB
Python
316 lines
11 KiB
Python
"""
|
|
Audio processor pipeline for Clean-Tracks.
|
|
Orchestrates the complete audio processing workflow from loading to censorship to saving.
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
from typing import Dict, List, Tuple, Optional, Any
|
|
from pathlib import Path
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
|
|
# from .audio_utils import AudioUtils
|
|
from .audio_utils_simple import AudioUtils
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class CensorshipMethod(Enum):
|
|
"""Enumeration of available censorship methods."""
|
|
SILENCE = "silence"
|
|
BEEP = "beep"
|
|
WHITE_NOISE = "white_noise"
|
|
FADE = "fade"
|
|
|
|
|
|
@dataclass
|
|
class ProcessingOptions:
|
|
"""Options for audio processing."""
|
|
censorship_method: CensorshipMethod = CensorshipMethod.SILENCE
|
|
beep_frequency: int = 1000 # Hz
|
|
beep_volume: float = -20 # dBFS
|
|
noise_volume: float = -30 # dBFS
|
|
fade_duration: int = 10 # milliseconds
|
|
normalize_output: bool = True
|
|
target_dBFS: float = -20.0
|
|
preserve_format: bool = True
|
|
chunk_duration: float = 1800 # 30 minutes for long files
|
|
|
|
|
|
@dataclass
|
|
class ProcessingResult:
|
|
"""Result of audio processing."""
|
|
success: bool
|
|
output_path: Optional[str] = None
|
|
duration: Optional[float] = None
|
|
segments_censored: int = 0
|
|
processing_time: Optional[float] = None
|
|
error: Optional[str] = None
|
|
warnings: List[str] = None
|
|
|
|
def __post_init__(self):
|
|
if self.warnings is None:
|
|
self.warnings = []
|
|
|
|
|
|
class AudioProcessor:
|
|
"""Main audio processor that handles the complete censorship pipeline."""
|
|
|
|
def __init__(self, audio_utils: Optional[AudioUtils] = None):
|
|
"""
|
|
Initialize the audio processor.
|
|
|
|
Args:
|
|
audio_utils: Optional AudioUtils instance (creates new if None)
|
|
"""
|
|
self.audio_utils = audio_utils or AudioUtils()
|
|
|
|
def process_audio(self, input_path: str, output_path: str,
|
|
segments: List[Tuple[float, float]],
|
|
options: Optional[ProcessingOptions] = None,
|
|
progress_callback: Optional[callable] = None) -> ProcessingResult:
|
|
"""
|
|
Process an audio file with censorship.
|
|
|
|
Args:
|
|
input_path: Path to input audio file
|
|
output_path: Path for output audio file
|
|
segments: List of (start_time, end_time) tuples to censor
|
|
options: Processing options
|
|
progress_callback: Optional callback for progress updates
|
|
|
|
Returns:
|
|
ProcessingResult with details of the operation
|
|
"""
|
|
if options is None:
|
|
options = ProcessingOptions()
|
|
|
|
result = ProcessingResult(success=False)
|
|
|
|
try:
|
|
import time
|
|
start_time = time.time()
|
|
|
|
# Validate input file
|
|
if progress_callback:
|
|
progress_callback("Validating audio file...", 0)
|
|
|
|
validation = self.audio_utils.validate_audio_file(input_path)
|
|
if not validation["valid"]:
|
|
result.error = f"Invalid audio file: {', '.join(validation['errors'])}"
|
|
return result
|
|
|
|
result.warnings.extend(validation.get("warnings", []))
|
|
result.duration = validation["duration"]
|
|
|
|
# Load audio
|
|
if progress_callback:
|
|
progress_callback("Loading audio file...", 10)
|
|
|
|
logger.info(f"Loading audio from {input_path}")
|
|
audio = self.audio_utils.load_audio(input_path)
|
|
|
|
# Apply censorship if segments provided
|
|
if segments:
|
|
if progress_callback:
|
|
progress_callback(f"Applying censorship to {len(segments)} segments...", 30)
|
|
|
|
logger.info(f"Applying {options.censorship_method.value} censorship to {len(segments)} segments")
|
|
|
|
# Prepare censorship parameters
|
|
kwargs = {}
|
|
if options.censorship_method == CensorshipMethod.BEEP:
|
|
kwargs["frequency"] = options.beep_frequency
|
|
elif options.censorship_method == CensorshipMethod.WHITE_NOISE:
|
|
kwargs["volume"] = options.noise_volume
|
|
elif options.censorship_method == CensorshipMethod.FADE:
|
|
kwargs["fade_duration"] = options.fade_duration
|
|
|
|
# Apply censorship
|
|
audio = self.audio_utils.apply_censorship(
|
|
audio,
|
|
segments,
|
|
options.censorship_method.value,
|
|
**kwargs
|
|
)
|
|
|
|
result.segments_censored = len(segments)
|
|
|
|
# Normalize if requested
|
|
if options.normalize_output:
|
|
if progress_callback:
|
|
progress_callback("Normalizing audio...", 70)
|
|
|
|
logger.info(f"Normalizing audio to {options.target_dBFS} dBFS")
|
|
audio = self.audio_utils.normalize_audio(audio, options.target_dBFS)
|
|
|
|
# Determine output format
|
|
output_format = None
|
|
if options.preserve_format:
|
|
input_ext = Path(input_path).suffix[1:]
|
|
output_ext = Path(output_path).suffix[1:]
|
|
if input_ext != output_ext:
|
|
logger.warning(f"Output extension differs from input, using output extension: {output_ext}")
|
|
output_format = output_ext
|
|
|
|
# Save processed audio
|
|
if progress_callback:
|
|
progress_callback("Saving processed audio...", 90)
|
|
|
|
logger.info(f"Saving processed audio to {output_path}")
|
|
save_success = self.audio_utils.save_audio(audio, output_path, format=output_format)
|
|
|
|
if not save_success:
|
|
result.error = "Failed to save processed audio"
|
|
return result
|
|
|
|
# Calculate processing time
|
|
result.processing_time = time.time() - start_time
|
|
|
|
# Success!
|
|
if progress_callback:
|
|
progress_callback("Processing complete!", 100)
|
|
|
|
result.success = True
|
|
result.output_path = output_path
|
|
|
|
logger.info(f"Successfully processed audio in {result.processing_time:.2f} seconds")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing audio: {e}")
|
|
result.error = str(e)
|
|
|
|
return result
|
|
|
|
def process_batch(self, file_mappings: List[Tuple[str, str, List[Tuple[float, float]]]],
|
|
options: Optional[ProcessingOptions] = None,
|
|
progress_callback: Optional[callable] = None) -> List[ProcessingResult]:
|
|
"""
|
|
Process multiple audio files in batch.
|
|
|
|
Args:
|
|
file_mappings: List of (input_path, output_path, segments) tuples
|
|
options: Processing options (applied to all files)
|
|
progress_callback: Optional callback for progress updates
|
|
|
|
Returns:
|
|
List of ProcessingResult for each file
|
|
"""
|
|
results = []
|
|
total_files = len(file_mappings)
|
|
|
|
for i, (input_path, output_path, segments) in enumerate(file_mappings):
|
|
if progress_callback:
|
|
file_progress = lambda msg, pct: progress_callback(
|
|
f"File {i+1}/{total_files}: {msg}",
|
|
(i * 100 + pct) / total_files
|
|
)
|
|
else:
|
|
file_progress = None
|
|
|
|
result = self.process_audio(input_path, output_path, segments, options, file_progress)
|
|
results.append(result)
|
|
|
|
return results
|
|
|
|
def validate_segments(self, segments: List[Tuple[float, float]],
|
|
duration: float) -> Tuple[List[Tuple[float, float]], List[str]]:
|
|
"""
|
|
Validate and clean censorship segments.
|
|
|
|
Args:
|
|
segments: List of (start_time, end_time) tuples
|
|
duration: Total audio duration in seconds
|
|
|
|
Returns:
|
|
Tuple of (cleaned_segments, warnings)
|
|
"""
|
|
cleaned = []
|
|
warnings = []
|
|
|
|
for start, end in segments:
|
|
# Check segment validity
|
|
if start >= end:
|
|
warnings.append(f"Invalid segment: start ({start}) >= end ({end})")
|
|
continue
|
|
|
|
# Clip to audio duration
|
|
if start >= duration:
|
|
warnings.append(f"Segment start ({start}) beyond audio duration ({duration})")
|
|
continue
|
|
|
|
if end > duration:
|
|
warnings.append(f"Segment end ({end}) clipped to audio duration ({duration})")
|
|
end = duration
|
|
|
|
# Check for overlaps with previous segments
|
|
overlap = False
|
|
for prev_start, prev_end in cleaned:
|
|
if (start >= prev_start and start < prev_end) or \
|
|
(end > prev_start and end <= prev_end):
|
|
warnings.append(f"Overlapping segments: ({start}, {end}) with ({prev_start}, {prev_end})")
|
|
overlap = True
|
|
break
|
|
|
|
if not overlap:
|
|
cleaned.append((start, end))
|
|
|
|
# Sort by start time
|
|
cleaned.sort(key=lambda x: x[0])
|
|
|
|
return cleaned, warnings
|
|
|
|
def estimate_processing_time(self, file_path: str, num_segments: int) -> float:
|
|
"""
|
|
Estimate processing time for a file.
|
|
|
|
Args:
|
|
file_path: Path to audio file
|
|
num_segments: Number of segments to censor
|
|
|
|
Returns:
|
|
Estimated time in seconds
|
|
"""
|
|
try:
|
|
duration = self.audio_utils.get_duration(file_path)
|
|
|
|
# Base time: 0.1 seconds per minute of audio
|
|
base_time = duration / 60 * 0.1
|
|
|
|
# Add time for segments: 0.05 seconds per segment
|
|
segment_time = num_segments * 0.05
|
|
|
|
# Add overhead: 2 seconds
|
|
overhead = 2.0
|
|
|
|
return base_time + segment_time + overhead
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Could not estimate processing time: {e}")
|
|
return 10.0 # Default estimate
|
|
|
|
def get_supported_formats(self) -> set:
|
|
"""
|
|
Get the set of supported audio formats.
|
|
|
|
Returns:
|
|
Set of supported file extensions
|
|
"""
|
|
return self.audio_utils.SUPPORTED_FORMATS
|
|
|
|
def check_dependencies(self) -> Dict[str, bool]:
|
|
"""
|
|
Check if required dependencies are available.
|
|
|
|
Returns:
|
|
Dictionary of dependency status
|
|
"""
|
|
from pydub.utils import which
|
|
|
|
return {
|
|
"ffmpeg": which("ffmpeg") is not None,
|
|
"pydub": True, # If we got here, pydub is installed
|
|
"librosa": True, # If we got here, librosa is installed
|
|
"numpy": True, # If we got here, numpy is installed
|
|
} |