""" Video download models and data structures """ import asyncio import time from datetime import datetime from enum import Enum from pathlib import Path from typing import Optional, List, Dict, Any, Union from pydantic import BaseModel, HttpUrl, Field class DownloadMethod(str, Enum): """Supported download methods""" PYTUBEFIX = "pytubefix" YT_DLP = "yt-dlp" PLAYWRIGHT = "playwright" EXTERNAL_TOOL = "external_tool" WEB_SERVICE = "web_service" TRANSCRIPT_ONLY = "transcript_only" FAILED = "failed" class VideoQuality(str, Enum): """Video quality options""" AUDIO_ONLY = "audio_only" LOW_480P = "480p" MEDIUM_720P = "720p" HIGH_1080P = "1080p" ULTRA_1440P = "1440p" MAX_2160P = "2160p" BEST = "best" class DownloadStatus(str, Enum): """Download operation status""" PENDING = "pending" IN_PROGRESS = "in_progress" COMPLETED = "completed" FAILED = "failed" PARTIAL = "partial" # Transcript only, no video CANCELLED = "cancelled" class DownloadPreferences(BaseModel): """User preferences for video downloading""" quality: VideoQuality = VideoQuality.MEDIUM_720P prefer_audio_only: bool = True # For transcription, audio is sufficient max_duration_minutes: int = 180 # Skip very long videos fallback_to_transcript: bool = True extract_audio: bool = True save_video: bool = False # For storage optimization output_format: str = "mp4" enable_subtitles: bool = True class VideoMetadata(BaseModel): """Video metadata from various sources""" video_id: str title: Optional[str] = None description: Optional[str] = None duration_seconds: Optional[int] = None view_count: Optional[int] = None upload_date: Optional[str] = None uploader: Optional[str] = None thumbnail_url: Optional[str] = None tags: List[str] = Field(default_factory=list) language: Optional[str] = "en" availability: Optional[str] = None # public, private, unlisted age_restricted: bool = False class TranscriptData(BaseModel): """Transcript information""" text: str language: str = "en" is_auto_generated: bool = False segments: Optional[List[Dict[str, Any]]] = None source: str = "youtube-transcript-api" # Source of transcript class VideoDownloadResult(BaseModel): """Result of a video download operation""" video_id: str video_url: str status: DownloadStatus method: DownloadMethod # File paths video_path: Optional[Path] = None audio_path: Optional[Path] = None # Content transcript: Optional[TranscriptData] = None metadata: Optional[VideoMetadata] = None # Performance metrics download_time_seconds: Optional[float] = None file_size_bytes: Optional[int] = None processing_time_seconds: Optional[float] = None # Error handling error_message: Optional[str] = None error_details: Optional[Dict[str, Any]] = None retry_count: int = 0 # Flags is_partial: bool = False # True if only transcript/metadata available from_cache: bool = False created_at: datetime = Field(default_factory=datetime.now) class Config: arbitrary_types_allowed = True class DownloadJobStatus(BaseModel): """Status of a download job""" job_id: str video_url: str status: DownloadStatus progress_percent: float = 0.0 current_method: Optional[DownloadMethod] = None error_message: Optional[str] = None estimated_completion: Optional[datetime] = None created_at: datetime = Field(default_factory=datetime.now) updated_at: datetime = Field(default_factory=datetime.now) class DownloadMetrics(BaseModel): """Download performance metrics""" total_attempts: int = 0 successful_downloads: int = 0 failed_downloads: int = 0 partial_downloads: int = 0 # Transcript-only results # Method-specific success rates method_success_rates: Dict[str, float] = Field(default_factory=dict) method_attempt_counts: Dict[str, int] = Field(default_factory=dict) # Performance metrics average_download_time: float = 0.0 average_file_size_mb: float = 0.0 # Error analysis common_errors: Dict[str, int] = Field(default_factory=dict) last_updated: datetime = Field(default_factory=datetime.now) def update_success_rate(self, method: DownloadMethod, success: bool): """Update success rate for a specific method""" method_str = method.value if method_str not in self.method_attempt_counts: self.method_attempt_counts[method_str] = 0 self.method_success_rates[method_str] = 0.0 current_attempts = self.method_attempt_counts[method_str] current_rate = self.method_success_rates[method_str] # Calculate new success rate if success: new_successes = (current_rate * current_attempts) + 1 else: new_successes = (current_rate * current_attempts) new_attempts = current_attempts + 1 new_rate = new_successes / new_attempts if new_attempts > 0 else 0.0 self.method_attempt_counts[method_str] = new_attempts self.method_success_rates[method_str] = new_rate self.last_updated = datetime.now() class HealthCheckResult(BaseModel): """Health check result for download system""" overall_status: str # healthy, degraded, unhealthy healthy_methods: int total_methods: int method_details: Dict[str, Dict[str, Any]] recommendations: List[str] = Field(default_factory=list) last_check: datetime = Field(default_factory=datetime.now) class DownloaderException(Exception): """Base exception for download operations""" pass class VideoNotAvailableError(DownloaderException): """Video is not available for download""" pass class UnsupportedFormatError(DownloaderException): """Requested format is not supported""" pass class DownloadTimeoutError(DownloaderException): """Download operation timed out""" pass class QuotaExceededError(DownloaderException): """API quota exceeded""" pass class NetworkError(DownloaderException): """Network-related error""" pass class AllMethodsFailedError(DownloaderException): """All download methods have failed""" pass