clean-tracks/src/core/formats.py

270 lines
7.3 KiB
Python

"""
Audio format detection and utilities for Clean-Tracks.
"""
import mimetypes
from pathlib import Path
from typing import Optional, Dict, Any, List, Union
import logging
logger = logging.getLogger(__name__)
# Supported audio formats with their properties
SUPPORTED_FORMATS = {
'mp3': {
'mime_types': ['audio/mpeg', 'audio/mp3'],
'extensions': ['.mp3'],
'name': 'MPEG Audio Layer 3',
'lossy': True,
'max_bitrate': 320,
'common_bitrates': [128, 192, 256, 320]
},
'wav': {
'mime_types': ['audio/wav', 'audio/x-wav', 'audio/wave'],
'extensions': ['.wav', '.wave'],
'name': 'Waveform Audio File Format',
'lossy': False,
'max_bitrate': None, # Uncompressed
'common_bitrates': []
},
'flac': {
'mime_types': ['audio/flac', 'audio/x-flac'],
'extensions': ['.flac'],
'name': 'Free Lossless Audio Codec',
'lossy': False,
'max_bitrate': None, # Lossless compression
'common_bitrates': []
},
'm4a': {
'mime_types': ['audio/mp4', 'audio/x-m4a', 'audio/m4a'],
'extensions': ['.m4a', '.mp4'],
'name': 'MPEG-4 Audio',
'lossy': True, # Usually AAC, which is lossy
'max_bitrate': 512,
'common_bitrates': [128, 256, 320]
},
'ogg': {
'mime_types': ['audio/ogg', 'application/ogg', 'audio/vorbis'],
'extensions': ['.ogg', '.oga'],
'name': 'Ogg Vorbis',
'lossy': True,
'max_bitrate': 500,
'common_bitrates': [128, 192, 256]
},
'wma': {
'mime_types': ['audio/x-ms-wma'],
'extensions': ['.wma'],
'name': 'Windows Media Audio',
'lossy': True,
'max_bitrate': 768,
'common_bitrates': [128, 192, 256]
},
'aac': {
'mime_types': ['audio/aac', 'audio/x-aac'],
'extensions': ['.aac'],
'name': 'Advanced Audio Coding',
'lossy': True,
'max_bitrate': 512,
'common_bitrates': [128, 256, 320]
}
}
def detect_format(file_path: Union[str, Path]) -> Optional[str]:
"""
Detect the audio format of a file.
Args:
file_path: Path to the audio file
Returns:
Format key (e.g., 'mp3', 'wav') or None if not detected
"""
file_path = Path(file_path)
# First try to detect by file extension
extension = file_path.suffix.lower()
for format_key, format_info in SUPPORTED_FORMATS.items():
if extension in format_info['extensions']:
logger.debug(f"Detected format '{format_key}' by extension: {extension}")
return format_key
# Try to detect by MIME type
mime_type, _ = mimetypes.guess_type(str(file_path))
if mime_type:
for format_key, format_info in SUPPORTED_FORMATS.items():
if mime_type in format_info['mime_types']:
logger.debug(f"Detected format '{format_key}' by MIME type: {mime_type}")
return format_key
logger.warning(f"Could not detect format for file: {file_path}")
return None
def is_format_supported(format_key: str) -> bool:
"""
Check if a format is supported.
Args:
format_key: Format identifier (e.g., 'mp3', 'wav')
Returns:
True if the format is supported
"""
return format_key.lower() in SUPPORTED_FORMATS
def get_format_info(format_key: str) -> Optional[Dict[str, Any]]:
"""
Get detailed information about a format.
Args:
format_key: Format identifier (e.g., 'mp3', 'wav')
Returns:
Dictionary with format information or None if not found
"""
return SUPPORTED_FORMATS.get(format_key.lower())
def get_supported_extensions() -> List[str]:
"""
Get a list of all supported file extensions.
Returns:
List of supported extensions (with dots)
"""
extensions = []
for format_info in SUPPORTED_FORMATS.values():
extensions.extend(format_info['extensions'])
return sorted(list(set(extensions)))
def get_format_by_extension(extension: str) -> Optional[str]:
"""
Get format key by file extension.
Args:
extension: File extension (with or without dot)
Returns:
Format key or None if not found
"""
if not extension.startswith('.'):
extension = f'.{extension}'
extension = extension.lower()
for format_key, format_info in SUPPORTED_FORMATS.items():
if extension in format_info['extensions']:
return format_key
return None
def get_format_by_mime_type(mime_type: str) -> Optional[str]:
"""
Get format key by MIME type.
Args:
mime_type: MIME type string
Returns:
Format key or None if not found
"""
for format_key, format_info in SUPPORTED_FORMATS.items():
if mime_type in format_info['mime_types']:
return format_key
return None
def is_lossy_format(format_key: str) -> Optional[bool]:
"""
Check if a format uses lossy compression.
Args:
format_key: Format identifier
Returns:
True if lossy, False if lossless, None if format not found
"""
format_info = get_format_info(format_key)
if format_info:
return format_info['lossy']
return None
def get_recommended_bitrate(format_key: str, quality: str = 'high') -> Optional[int]:
"""
Get recommended bitrate for a format based on quality level.
Args:
format_key: Format identifier
quality: Quality level ('low', 'medium', 'high', 'maximum')
Returns:
Recommended bitrate in kbps or None
"""
format_info = get_format_info(format_key)
if not format_info or not format_info['common_bitrates']:
return None
bitrates = format_info['common_bitrates']
quality_map = {
'low': 0,
'medium': len(bitrates) // 2,
'high': -2 if len(bitrates) > 1 else -1,
'maximum': -1
}
index = quality_map.get(quality.lower(), -1)
return bitrates[index]
def validate_audio_file(file_path: Union[str, Path]) -> Dict[str, Any]:
"""
Validate an audio file and return information about it.
Args:
file_path: Path to the audio file
Returns:
Dictionary with validation results
"""
file_path = Path(file_path)
result = {
'valid': False,
'exists': file_path.exists(),
'is_file': file_path.is_file() if file_path.exists() else False,
'format': None,
'format_info': None,
'size_bytes': None,
'errors': []
}
if not result['exists']:
result['errors'].append(f"File does not exist: {file_path}")
return result
if not result['is_file']:
result['errors'].append(f"Path is not a file: {file_path}")
return result
# Check file size
result['size_bytes'] = file_path.stat().st_size
if result['size_bytes'] == 0:
result['errors'].append("File is empty")
return result
# Detect format
format_key = detect_format(file_path)
if not format_key:
result['errors'].append(f"Unsupported or unrecognized format: {file_path.suffix}")
return result
result['format'] = format_key
result['format_info'] = get_format_info(format_key)
result['valid'] = True
return result