186 lines
8.4 KiB
Python
186 lines
8.4 KiB
Python
"""
|
|
Video download configuration
|
|
"""
|
|
from pathlib import Path
|
|
from typing import List, Optional, Dict, Any
|
|
try:
|
|
from pydantic_settings import BaseSettings
|
|
from pydantic import Field
|
|
except ImportError:
|
|
# Fallback for older pydantic versions
|
|
from pydantic import BaseSettings, Field
|
|
from backend.models.video_download import VideoQuality, DownloadMethod
|
|
|
|
|
|
class VideoDownloadConfig(BaseSettings):
|
|
"""Configuration for video download system"""
|
|
|
|
# API Keys
|
|
youtube_api_key: Optional[str] = Field(None, description="YouTube Data API v3 key")
|
|
|
|
# Storage Configuration
|
|
storage_path: Path = Field(Path("./video_storage"), description="Base storage directory")
|
|
max_storage_gb: float = Field(10.0, description="Maximum storage size in GB")
|
|
cleanup_older_than_days: int = Field(30, description="Clean up files older than X days")
|
|
temp_dir: Path = Field(Path("./video_storage/temp"), description="Temporary files directory")
|
|
|
|
# Download Preferences
|
|
default_quality: VideoQuality = Field(VideoQuality.MEDIUM_720P, description="Default video quality")
|
|
max_video_duration_minutes: int = Field(180, description="Skip videos longer than X minutes")
|
|
prefer_audio_only: bool = Field(True, description="Prefer audio-only for transcription")
|
|
extract_audio: bool = Field(True, description="Always extract audio")
|
|
save_video: bool = Field(False, description="Save video files (storage optimization)")
|
|
|
|
# Method Configuration
|
|
enabled_methods: List[DownloadMethod] = Field(
|
|
default=[
|
|
DownloadMethod.PYTUBEFIX,
|
|
DownloadMethod.YT_DLP,
|
|
DownloadMethod.PLAYWRIGHT,
|
|
DownloadMethod.TRANSCRIPT_ONLY
|
|
],
|
|
description="Enabled download methods in order of preference"
|
|
)
|
|
|
|
method_timeout_seconds: int = Field(120, description="Timeout per download method")
|
|
max_retries_per_method: int = Field(2, description="Max retries per method")
|
|
|
|
# yt-dlp specific configuration
|
|
ytdlp_use_cookies: bool = Field(True, description="Use cookies for yt-dlp")
|
|
ytdlp_cookies_file: Optional[Path] = Field(None, description="Path to cookies.txt file")
|
|
ytdlp_user_agents: List[str] = Field(
|
|
default=[
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
],
|
|
description="User agents for yt-dlp rotation"
|
|
)
|
|
|
|
# Playwright configuration
|
|
playwright_headless: bool = Field(True, description="Run Playwright in headless mode")
|
|
playwright_browser_session: Optional[Path] = Field(None, description="Saved browser session")
|
|
playwright_timeout: int = Field(30000, description="Playwright timeout in milliseconds")
|
|
|
|
# External tools configuration
|
|
external_tools_enabled: bool = Field(True, description="Enable external tools")
|
|
fourk_video_downloader_path: Optional[Path] = Field(None, description="Path to 4K Video Downloader CLI")
|
|
|
|
# Web services configuration
|
|
web_services_enabled: bool = Field(True, description="Enable web service APIs")
|
|
web_service_timeout: int = Field(30, description="Web service timeout in seconds")
|
|
web_service_user_agents: List[str] = Field(
|
|
default=[
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
|
|
],
|
|
description="User agents for web services"
|
|
)
|
|
|
|
# Performance Configuration
|
|
max_concurrent_downloads: int = Field(3, description="Maximum concurrent downloads")
|
|
cache_results: bool = Field(True, description="Cache download results")
|
|
cache_ttl_hours: int = Field(24, description="Cache TTL in hours")
|
|
|
|
# Monitoring and Health
|
|
health_check_interval_minutes: int = Field(30, description="Health check interval")
|
|
success_rate_threshold: float = Field(0.7, description="Switch methods if success rate drops below")
|
|
enable_telemetry: bool = Field(True, description="Enable performance telemetry")
|
|
|
|
# Error Handling
|
|
max_total_retries: int = Field(5, description="Maximum total retries across all methods")
|
|
backoff_factor: float = Field(1.5, description="Exponential backoff factor")
|
|
|
|
# Audio Processing
|
|
audio_format: str = Field("mp3", description="Audio output format")
|
|
audio_quality: str = Field("192k", description="Audio quality")
|
|
keep_audio_files: bool = Field(True, description="Keep audio files for future re-transcription")
|
|
audio_cleanup_days: int = Field(30, description="Delete audio files older than X days (0 = never delete)")
|
|
|
|
# Video Processing
|
|
video_format: str = Field("mp4", description="Video output format")
|
|
merge_audio_video: bool = Field(True, description="Merge audio and video streams")
|
|
|
|
# Faster-Whisper Configuration (20-32x speed improvement)
|
|
whisper_model: str = Field("large-v3-turbo", description="Faster-whisper model ('large-v3-turbo', 'large-v3', 'large-v2', 'medium', 'small', 'base', 'tiny')")
|
|
whisper_device: str = Field("auto", description="Processing device ('auto', 'cpu', 'cuda')")
|
|
whisper_compute_type: str = Field("auto", description="Compute type ('auto', 'int8', 'float16', 'float32')")
|
|
whisper_beam_size: int = Field(5, description="Beam search size (1-10, higher = better quality)")
|
|
whisper_vad_filter: bool = Field(True, description="Voice Activity Detection for efficiency")
|
|
whisper_word_timestamps: bool = Field(True, description="Enable word-level timestamps")
|
|
whisper_temperature: float = Field(0.0, description="Sampling temperature (0 = deterministic)")
|
|
whisper_best_of: int = Field(5, description="Number of candidates when sampling")
|
|
|
|
class Config:
|
|
env_file = ".env"
|
|
env_prefix = "VIDEO_DOWNLOAD_"
|
|
case_sensitive = False
|
|
extra = "ignore" # Allow extra environment variables
|
|
|
|
def get_storage_dirs(self) -> Dict[str, Path]:
|
|
"""Get all storage directories"""
|
|
base = Path(self.storage_path)
|
|
return {
|
|
"base": base,
|
|
"videos": base / "videos",
|
|
"audio": base / "audio",
|
|
"transcripts": base / "transcripts",
|
|
"summaries": base / "summaries",
|
|
"temp": base / "temp",
|
|
"cache": base / "cache",
|
|
"logs": base / "logs"
|
|
}
|
|
|
|
def ensure_directories(self):
|
|
"""Create all required directories"""
|
|
dirs = self.get_storage_dirs()
|
|
for path in dirs.values():
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
|
|
def get_method_priority(self) -> List[DownloadMethod]:
|
|
"""Get download methods in priority order"""
|
|
return self.enabled_methods.copy()
|
|
|
|
def is_method_enabled(self, method: DownloadMethod) -> bool:
|
|
"""Check if a download method is enabled"""
|
|
return method in self.enabled_methods
|
|
|
|
|
|
# Default configuration instance
|
|
default_config = VideoDownloadConfig()
|
|
|
|
|
|
def get_video_download_config() -> VideoDownloadConfig:
|
|
"""Get video download configuration"""
|
|
return VideoDownloadConfig()
|
|
|
|
|
|
# Configuration validation
|
|
def validate_config(config: VideoDownloadConfig) -> List[str]:
|
|
"""Validate configuration and return list of warnings/errors"""
|
|
warnings = []
|
|
|
|
# Check storage space
|
|
if config.max_storage_gb < 1.0:
|
|
warnings.append("Storage limit is very low (< 1GB)")
|
|
|
|
# Check if any download methods are enabled
|
|
if not config.enabled_methods:
|
|
warnings.append("No download methods enabled")
|
|
|
|
# Check for required tools/dependencies
|
|
if DownloadMethod.PLAYWRIGHT in config.enabled_methods:
|
|
try:
|
|
import playwright
|
|
except ImportError:
|
|
warnings.append("Playwright not installed but enabled in config")
|
|
|
|
# Check external tool paths
|
|
if config.fourk_video_downloader_path and not config.fourk_video_downloader_path.exists():
|
|
warnings.append(f"4K Video Downloader path does not exist: {config.fourk_video_downloader_path}")
|
|
|
|
# Check cookies file
|
|
if config.ytdlp_cookies_file and not config.ytdlp_cookies_file.exists():
|
|
warnings.append(f"yt-dlp cookies file does not exist: {config.ytdlp_cookies_file}")
|
|
|
|
return warnings |