""" Video download configuration """ from pathlib import Path from typing import List, Optional, Dict, Any try: from pydantic_settings import BaseSettings from pydantic import Field except ImportError: # Fallback for older pydantic versions from pydantic import BaseSettings, Field from backend.models.video_download import VideoQuality, DownloadMethod class VideoDownloadConfig(BaseSettings): """Configuration for video download system""" # API Keys youtube_api_key: Optional[str] = Field(None, description="YouTube Data API v3 key") # Storage Configuration storage_path: Path = Field(Path("./video_storage"), description="Base storage directory") max_storage_gb: float = Field(10.0, description="Maximum storage size in GB") cleanup_older_than_days: int = Field(30, description="Clean up files older than X days") temp_dir: Path = Field(Path("./video_storage/temp"), description="Temporary files directory") # Download Preferences default_quality: VideoQuality = Field(VideoQuality.MEDIUM_720P, description="Default video quality") max_video_duration_minutes: int = Field(180, description="Skip videos longer than X minutes") prefer_audio_only: bool = Field(True, description="Prefer audio-only for transcription") extract_audio: bool = Field(True, description="Always extract audio") save_video: bool = Field(False, description="Save video files (storage optimization)") # Method Configuration enabled_methods: List[DownloadMethod] = Field( default=[ DownloadMethod.PYTUBEFIX, DownloadMethod.YT_DLP, DownloadMethod.PLAYWRIGHT, DownloadMethod.TRANSCRIPT_ONLY ], description="Enabled download methods in order of preference" ) method_timeout_seconds: int = Field(120, description="Timeout per download method") max_retries_per_method: int = Field(2, description="Max retries per method") # yt-dlp specific configuration ytdlp_use_cookies: bool = Field(True, description="Use cookies for yt-dlp") ytdlp_cookies_file: Optional[Path] = Field(None, description="Path to cookies.txt file") ytdlp_user_agents: List[str] = Field( default=[ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ], description="User agents for yt-dlp rotation" ) # Playwright configuration playwright_headless: bool = Field(True, description="Run Playwright in headless mode") playwright_browser_session: Optional[Path] = Field(None, description="Saved browser session") playwright_timeout: int = Field(30000, description="Playwright timeout in milliseconds") # External tools configuration external_tools_enabled: bool = Field(True, description="Enable external tools") fourk_video_downloader_path: Optional[Path] = Field(None, description="Path to 4K Video Downloader CLI") # Web services configuration web_services_enabled: bool = Field(True, description="Enable web service APIs") web_service_timeout: int = Field(30, description="Web service timeout in seconds") web_service_user_agents: List[str] = Field( default=[ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" ], description="User agents for web services" ) # Performance Configuration max_concurrent_downloads: int = Field(3, description="Maximum concurrent downloads") cache_results: bool = Field(True, description="Cache download results") cache_ttl_hours: int = Field(24, description="Cache TTL in hours") # Monitoring and Health health_check_interval_minutes: int = Field(30, description="Health check interval") success_rate_threshold: float = Field(0.7, description="Switch methods if success rate drops below") enable_telemetry: bool = Field(True, description="Enable performance telemetry") # Error Handling max_total_retries: int = Field(5, description="Maximum total retries across all methods") backoff_factor: float = Field(1.5, description="Exponential backoff factor") # Audio Processing audio_format: str = Field("mp3", description="Audio output format") audio_quality: str = Field("192k", description="Audio quality") keep_audio_files: bool = Field(True, description="Keep audio files for future re-transcription") audio_cleanup_days: int = Field(30, description="Delete audio files older than X days (0 = never delete)") # Video Processing video_format: str = Field("mp4", description="Video output format") merge_audio_video: bool = Field(True, description="Merge audio and video streams") # Faster-Whisper Configuration (20-32x speed improvement) whisper_model: str = Field("large-v3-turbo", description="Faster-whisper model ('large-v3-turbo', 'large-v3', 'large-v2', 'medium', 'small', 'base', 'tiny')") whisper_device: str = Field("auto", description="Processing device ('auto', 'cpu', 'cuda')") whisper_compute_type: str = Field("auto", description="Compute type ('auto', 'int8', 'float16', 'float32')") whisper_beam_size: int = Field(5, description="Beam search size (1-10, higher = better quality)") whisper_vad_filter: bool = Field(True, description="Voice Activity Detection for efficiency") whisper_word_timestamps: bool = Field(True, description="Enable word-level timestamps") whisper_temperature: float = Field(0.0, description="Sampling temperature (0 = deterministic)") whisper_best_of: int = Field(5, description="Number of candidates when sampling") class Config: env_file = ".env" env_prefix = "VIDEO_DOWNLOAD_" case_sensitive = False extra = "ignore" # Allow extra environment variables def get_storage_dirs(self) -> Dict[str, Path]: """Get all storage directories""" base = Path(self.storage_path) return { "base": base, "videos": base / "videos", "audio": base / "audio", "transcripts": base / "transcripts", "summaries": base / "summaries", "temp": base / "temp", "cache": base / "cache", "logs": base / "logs" } def ensure_directories(self): """Create all required directories""" dirs = self.get_storage_dirs() for path in dirs.values(): path.mkdir(parents=True, exist_ok=True) def get_method_priority(self) -> List[DownloadMethod]: """Get download methods in priority order""" return self.enabled_methods.copy() def is_method_enabled(self, method: DownloadMethod) -> bool: """Check if a download method is enabled""" return method in self.enabled_methods # Default configuration instance default_config = VideoDownloadConfig() def get_video_download_config() -> VideoDownloadConfig: """Get video download configuration""" return VideoDownloadConfig() # Configuration validation def validate_config(config: VideoDownloadConfig) -> List[str]: """Validate configuration and return list of warnings/errors""" warnings = [] # Check storage space if config.max_storage_gb < 1.0: warnings.append("Storage limit is very low (< 1GB)") # Check if any download methods are enabled if not config.enabled_methods: warnings.append("No download methods enabled") # Check for required tools/dependencies if DownloadMethod.PLAYWRIGHT in config.enabled_methods: try: import playwright except ImportError: warnings.append("Playwright not installed but enabled in config") # Check external tool paths if config.fourk_video_downloader_path and not config.fourk_video_downloader_path.exists(): warnings.append(f"4K Video Downloader path does not exist: {config.fourk_video_downloader_path}") # Check cookies file if config.ytdlp_cookies_file and not config.ytdlp_cookies_file.exists(): warnings.append(f"yt-dlp cookies file does not exist: {config.ytdlp_cookies_file}") return warnings