""" Audio format detection and utilities for Clean-Tracks. """ import mimetypes from pathlib import Path from typing import Optional, Dict, Any, List, Union import logging logger = logging.getLogger(__name__) # Supported audio formats with their properties SUPPORTED_FORMATS = { 'mp3': { 'mime_types': ['audio/mpeg', 'audio/mp3'], 'extensions': ['.mp3'], 'name': 'MPEG Audio Layer 3', 'lossy': True, 'max_bitrate': 320, 'common_bitrates': [128, 192, 256, 320] }, 'wav': { 'mime_types': ['audio/wav', 'audio/x-wav', 'audio/wave'], 'extensions': ['.wav', '.wave'], 'name': 'Waveform Audio File Format', 'lossy': False, 'max_bitrate': None, # Uncompressed 'common_bitrates': [] }, 'flac': { 'mime_types': ['audio/flac', 'audio/x-flac'], 'extensions': ['.flac'], 'name': 'Free Lossless Audio Codec', 'lossy': False, 'max_bitrate': None, # Lossless compression 'common_bitrates': [] }, 'm4a': { 'mime_types': ['audio/mp4', 'audio/x-m4a', 'audio/m4a'], 'extensions': ['.m4a', '.mp4'], 'name': 'MPEG-4 Audio', 'lossy': True, # Usually AAC, which is lossy 'max_bitrate': 512, 'common_bitrates': [128, 256, 320] }, 'ogg': { 'mime_types': ['audio/ogg', 'application/ogg', 'audio/vorbis'], 'extensions': ['.ogg', '.oga'], 'name': 'Ogg Vorbis', 'lossy': True, 'max_bitrate': 500, 'common_bitrates': [128, 192, 256] }, 'wma': { 'mime_types': ['audio/x-ms-wma'], 'extensions': ['.wma'], 'name': 'Windows Media Audio', 'lossy': True, 'max_bitrate': 768, 'common_bitrates': [128, 192, 256] }, 'aac': { 'mime_types': ['audio/aac', 'audio/x-aac'], 'extensions': ['.aac'], 'name': 'Advanced Audio Coding', 'lossy': True, 'max_bitrate': 512, 'common_bitrates': [128, 256, 320] } } def detect_format(file_path: Union[str, Path]) -> Optional[str]: """ Detect the audio format of a file. Args: file_path: Path to the audio file Returns: Format key (e.g., 'mp3', 'wav') or None if not detected """ file_path = Path(file_path) # First try to detect by file extension extension = file_path.suffix.lower() for format_key, format_info in SUPPORTED_FORMATS.items(): if extension in format_info['extensions']: logger.debug(f"Detected format '{format_key}' by extension: {extension}") return format_key # Try to detect by MIME type mime_type, _ = mimetypes.guess_type(str(file_path)) if mime_type: for format_key, format_info in SUPPORTED_FORMATS.items(): if mime_type in format_info['mime_types']: logger.debug(f"Detected format '{format_key}' by MIME type: {mime_type}") return format_key logger.warning(f"Could not detect format for file: {file_path}") return None def is_format_supported(format_key: str) -> bool: """ Check if a format is supported. Args: format_key: Format identifier (e.g., 'mp3', 'wav') Returns: True if the format is supported """ return format_key.lower() in SUPPORTED_FORMATS def get_format_info(format_key: str) -> Optional[Dict[str, Any]]: """ Get detailed information about a format. Args: format_key: Format identifier (e.g., 'mp3', 'wav') Returns: Dictionary with format information or None if not found """ return SUPPORTED_FORMATS.get(format_key.lower()) def get_supported_extensions() -> List[str]: """ Get a list of all supported file extensions. Returns: List of supported extensions (with dots) """ extensions = [] for format_info in SUPPORTED_FORMATS.values(): extensions.extend(format_info['extensions']) return sorted(list(set(extensions))) def get_format_by_extension(extension: str) -> Optional[str]: """ Get format key by file extension. Args: extension: File extension (with or without dot) Returns: Format key or None if not found """ if not extension.startswith('.'): extension = f'.{extension}' extension = extension.lower() for format_key, format_info in SUPPORTED_FORMATS.items(): if extension in format_info['extensions']: return format_key return None def get_format_by_mime_type(mime_type: str) -> Optional[str]: """ Get format key by MIME type. Args: mime_type: MIME type string Returns: Format key or None if not found """ for format_key, format_info in SUPPORTED_FORMATS.items(): if mime_type in format_info['mime_types']: return format_key return None def is_lossy_format(format_key: str) -> Optional[bool]: """ Check if a format uses lossy compression. Args: format_key: Format identifier Returns: True if lossy, False if lossless, None if format not found """ format_info = get_format_info(format_key) if format_info: return format_info['lossy'] return None def get_recommended_bitrate(format_key: str, quality: str = 'high') -> Optional[int]: """ Get recommended bitrate for a format based on quality level. Args: format_key: Format identifier quality: Quality level ('low', 'medium', 'high', 'maximum') Returns: Recommended bitrate in kbps or None """ format_info = get_format_info(format_key) if not format_info or not format_info['common_bitrates']: return None bitrates = format_info['common_bitrates'] quality_map = { 'low': 0, 'medium': len(bitrates) // 2, 'high': -2 if len(bitrates) > 1 else -1, 'maximum': -1 } index = quality_map.get(quality.lower(), -1) return bitrates[index] def validate_audio_file(file_path: Union[str, Path]) -> Dict[str, Any]: """ Validate an audio file and return information about it. Args: file_path: Path to the audio file Returns: Dictionary with validation results """ file_path = Path(file_path) result = { 'valid': False, 'exists': file_path.exists(), 'is_file': file_path.is_file() if file_path.exists() else False, 'format': None, 'format_info': None, 'size_bytes': None, 'errors': [] } if not result['exists']: result['errors'].append(f"File does not exist: {file_path}") return result if not result['is_file']: result['errors'].append(f"Path is not a file: {file_path}") return result # Check file size result['size_bytes'] = file_path.stat().st_size if result['size_bytes'] == 0: result['errors'].append("File is empty") return result # Detect format format_key = detect_format(file_path) if not format_key: result['errors'].append(f"Unsupported or unrecognized format: {file_path.suffix}") return result result['format'] = format_key result['format_info'] = get_format_info(format_key) result['valid'] = True return result