clean-tracks/src/cli/commands/process.py

"""
Process command for single file audio processing.
"""

import time
from pathlib import Path
from typing import Optional

import click

from src.cli.utils.output import (
    print_success, print_error, print_info, print_warning,
    format_file_size, format_duration
)
from src.cli.utils.progress import create_progress_bar
from src.cli.utils.validation import (
    validate_audio_file, validate_output_path,
    validate_censor_method, validate_whisper_model
)

# Import core processing modules
from src.core import (
    AudioProcessor, ProcessingOptions, ProcessingResult,
    WhisperModel, CensorMethod
)


@click.command(name='process')
@click.argument('input_file', callback=lambda ctx, param, value: str(validate_audio_file(value)))
@click.option(
    '--output', '-o',
    required=True,
    help='Output file path for processed audio.'
)
@click.option(
    '--method', '-m',
    default='beep',
    callback=lambda ctx, param, value: validate_censor_method(value),
    help='Censorship method: silence, beep, noise, or fade (default: beep).'
)
@click.option(
    '--model',
    default='base',
    callback=lambda ctx, param, value: validate_whisper_model(value),
    help='Whisper model size: tiny, base, small, medium, or large (default: base).'
)
@click.option(
    '--word-list', '-w',
    type=click.Path(exists=True),
    help='Custom word list file (CSV or JSON).'
)
@click.option(
    '--threshold', '-t',
    type=click.FloatRange(0.0, 1.0),
    default=0.7,
    help='Confidence threshold for word detection (0.0-1.0, default: 0.7).'
)
@click.option(
    '--padding',
    type=click.IntRange(0, 1000),
    default=100,
    help='Padding in milliseconds around detected words (default: 100ms).'
)
@click.option(
    '--force', '-f',
    is_flag=True,
    help='Overwrite output file if it exists.'
)
@click.option(
    '--dry-run',
    is_flag=True,
    help='Perform detection only without creating output file.'
)
@click.option(
    '--json',
    is_flag=True,
    help='Output results in JSON format.'
)
@click.pass_obj
def process_command(obj, input_file: str, output: str, method: str, model: str,
                    word_list: Optional[str], threshold: float, padding: int,
                    force: bool, dry_run: bool, json: bool):
    """
    Process a single audio file to detect and censor explicit content.

    This command transcribes the audio using Whisper, detects explicit words,
    and applies the specified censorship method to create a clean version.

    Examples:

        Basic usage with default settings:
        $ clean-tracks process audio.mp3 --output clean.mp3

        Use a larger model for better accuracy:
        $ clean-tracks process audio.mp3 -o clean.mp3 --model large

        Apply silence instead of beep:
        $ clean-tracks process audio.mp3 -o clean.mp3 --method silence

        Use custom word list:
        $ clean-tracks process audio.mp3 -o clean.mp3 --word-list custom.csv

        Preview detection without creating output:
        $ clean-tracks process audio.mp3 -o clean.mp3 --dry-run
    """
    try:
        # Validate output path
        output_path = validate_output_path(output, force=force)
        input_path = Path(input_file)

        # Display file information
        file_size = input_path.stat().st_size
        print_info(f'Processing: {input_path.name} ({format_file_size(file_size)})')

        if obj.verbose:
            print_info(f'Model: {model}')
            print_info(f'Method: {method}')
            print_info(f'Threshold: {threshold}')
            print_info(f'Padding: {padding}ms')

        # Create processing options
        options = ProcessingOptions(
            whisper_model=WhisperModel[model.upper()],
            censor_method=CensorMethod[method.upper()],
            confidence_threshold=threshold,
            padding_ms=padding,
            word_list_path=word_list
        )

        # Initialize processor
        processor = AudioProcessor(options)

        # Process with progress tracking
        start_time = time.time()

        # Create progress bar for processing stages
        progress = create_progress_bar(
            total=4,
            label='Processing audio'
        )

        # Stage 1: Load audio
        progress.update(1, 'Loading audio file...')

        # Stage 2: Transcribe
        progress.update(1, 'Transcribing audio...')

        # Stage 3: Detect words
        progress.update(1, 'Detecting explicit content...')

        # Stage 4: Apply censorship
        if not dry_run:
            progress.update(1, 'Applying censorship...')
            result: ProcessingResult = processor.process_file(
                input_path=str(input_path),
                output_path=str(output_path)
            )
        else:
            progress.update(1, 'Analyzing content...')
            result: ProcessingResult = processor.analyze_file(
                input_path=str(input_path)
            )

        progress.finish()

        # Calculate processing time
        processing_time = time.time() - start_time

        # Display results
        if json:
            import json as json_lib
            output_data = {
                'input_file': str(input_path),
                'output_file': str(output_path) if not dry_run else None,
                'words_detected': result.words_detected,
                'words_censored': result.words_censored,
                'duration': result.audio_duration,
                'processing_time': processing_time,
                'model': model,
                'method': method
            }
            click.echo(json_lib.dumps(output_data, indent=2))
        else:
            # Display summary
            click.echo()
            print_success(f'Processing complete in {format_duration(processing_time)}')

            # Display detection results
            if result.words_detected > 0:
                print_warning(f'Detected {result.words_detected} explicit word(s)')

                if not dry_run:
                    print_success(f'Censored {result.words_censored} occurrence(s)')

                # Show detected words if verbose
                if obj.verbose and result.detected_words:
                    click.echo('\nDetected words:')
                    for word in result.detected_words:
                        click.echo(f'  • {word.text} at {word.start_time:.2f}s '
                                 f'(confidence: {word.confidence:.2%})')
            else:
                print_info('No explicit content detected')

            # Display output information
            if not dry_run:
                output_size = output_path.stat().st_size
                click.echo()
                print_success(f'Output saved: {output_path}')
                print_info(f'File size: {format_file_size(output_size)}')
                print_info(f'Duration: {format_duration(result.audio_duration)}')

    except Exception as e:
        print_error(f'Processing failed: {str(e)}', exit_code=1)