clean-tracks/src/cli/commands/process.py

213 lines
7.0 KiB
Python

"""
Process command for single file audio processing.
"""
import time
from pathlib import Path
from typing import Optional
import click
from src.cli.utils.output import (
print_success, print_error, print_info, print_warning,
format_file_size, format_duration
)
from src.cli.utils.progress import create_progress_bar
from src.cli.utils.validation import (
validate_audio_file, validate_output_path,
validate_censor_method, validate_whisper_model
)
# Import core processing modules
from src.core import (
AudioProcessor, ProcessingOptions, ProcessingResult,
WhisperModel, CensorMethod
)
@click.command(name='process')
@click.argument('input_file', callback=lambda ctx, param, value: str(validate_audio_file(value)))
@click.option(
'--output', '-o',
required=True,
help='Output file path for processed audio.'
)
@click.option(
'--method', '-m',
default='beep',
callback=lambda ctx, param, value: validate_censor_method(value),
help='Censorship method: silence, beep, noise, or fade (default: beep).'
)
@click.option(
'--model',
default='base',
callback=lambda ctx, param, value: validate_whisper_model(value),
help='Whisper model size: tiny, base, small, medium, or large (default: base).'
)
@click.option(
'--word-list', '-w',
type=click.Path(exists=True),
help='Custom word list file (CSV or JSON).'
)
@click.option(
'--threshold', '-t',
type=click.FloatRange(0.0, 1.0),
default=0.7,
help='Confidence threshold for word detection (0.0-1.0, default: 0.7).'
)
@click.option(
'--padding',
type=click.IntRange(0, 1000),
default=100,
help='Padding in milliseconds around detected words (default: 100ms).'
)
@click.option(
'--force', '-f',
is_flag=True,
help='Overwrite output file if it exists.'
)
@click.option(
'--dry-run',
is_flag=True,
help='Perform detection only without creating output file.'
)
@click.option(
'--json',
is_flag=True,
help='Output results in JSON format.'
)
@click.pass_obj
def process_command(obj, input_file: str, output: str, method: str, model: str,
word_list: Optional[str], threshold: float, padding: int,
force: bool, dry_run: bool, json: bool):
"""
Process a single audio file to detect and censor explicit content.
This command transcribes the audio using Whisper, detects explicit words,
and applies the specified censorship method to create a clean version.
Examples:
Basic usage with default settings:
$ clean-tracks process audio.mp3 --output clean.mp3
Use a larger model for better accuracy:
$ clean-tracks process audio.mp3 -o clean.mp3 --model large
Apply silence instead of beep:
$ clean-tracks process audio.mp3 -o clean.mp3 --method silence
Use custom word list:
$ clean-tracks process audio.mp3 -o clean.mp3 --word-list custom.csv
Preview detection without creating output:
$ clean-tracks process audio.mp3 -o clean.mp3 --dry-run
"""
try:
# Validate output path
output_path = validate_output_path(output, force=force)
input_path = Path(input_file)
# Display file information
file_size = input_path.stat().st_size
print_info(f'Processing: {input_path.name} ({format_file_size(file_size)})')
if obj.verbose:
print_info(f'Model: {model}')
print_info(f'Method: {method}')
print_info(f'Threshold: {threshold}')
print_info(f'Padding: {padding}ms')
# Create processing options
options = ProcessingOptions(
whisper_model=WhisperModel[model.upper()],
censor_method=CensorMethod[method.upper()],
confidence_threshold=threshold,
padding_ms=padding,
word_list_path=word_list
)
# Initialize processor
processor = AudioProcessor(options)
# Process with progress tracking
start_time = time.time()
# Create progress bar for processing stages
progress = create_progress_bar(
total=4,
label='Processing audio'
)
# Stage 1: Load audio
progress.update(1, 'Loading audio file...')
# Stage 2: Transcribe
progress.update(1, 'Transcribing audio...')
# Stage 3: Detect words
progress.update(1, 'Detecting explicit content...')
# Stage 4: Apply censorship
if not dry_run:
progress.update(1, 'Applying censorship...')
result: ProcessingResult = processor.process_file(
input_path=str(input_path),
output_path=str(output_path)
)
else:
progress.update(1, 'Analyzing content...')
result: ProcessingResult = processor.analyze_file(
input_path=str(input_path)
)
progress.finish()
# Calculate processing time
processing_time = time.time() - start_time
# Display results
if json:
import json as json_lib
output_data = {
'input_file': str(input_path),
'output_file': str(output_path) if not dry_run else None,
'words_detected': result.words_detected,
'words_censored': result.words_censored,
'duration': result.audio_duration,
'processing_time': processing_time,
'model': model,
'method': method
}
click.echo(json_lib.dumps(output_data, indent=2))
else:
# Display summary
click.echo()
print_success(f'Processing complete in {format_duration(processing_time)}')
# Display detection results
if result.words_detected > 0:
print_warning(f'Detected {result.words_detected} explicit word(s)')
if not dry_run:
print_success(f'Censored {result.words_censored} occurrence(s)')
# Show detected words if verbose
if obj.verbose and result.detected_words:
click.echo('\nDetected words:')
for word in result.detected_words:
click.echo(f'{word.text} at {word.start_time:.2f}s '
f'(confidence: {word.confidence:.2%})')
else:
print_info('No explicit content detected')
# Display output information
if not dry_run:
output_size = output_path.stat().st_size
click.echo()
print_success(f'Output saved: {output_path}')
print_info(f'File size: {format_file_size(output_size)}')
print_info(f'Duration: {format_duration(result.audio_duration)}')
except Exception as e:
print_error(f'Processing failed: {str(e)}', exit_code=1)