217 lines
6.3 KiB
Python
217 lines
6.3 KiB
Python
"""
|
|
Video download models and data structures
|
|
"""
|
|
import asyncio
|
|
import time
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import Optional, List, Dict, Any, Union
|
|
from pydantic import BaseModel, HttpUrl, Field
|
|
|
|
|
|
class DownloadMethod(str, Enum):
|
|
"""Supported download methods"""
|
|
PYTUBEFIX = "pytubefix"
|
|
YT_DLP = "yt-dlp"
|
|
PLAYWRIGHT = "playwright"
|
|
EXTERNAL_TOOL = "external_tool"
|
|
WEB_SERVICE = "web_service"
|
|
TRANSCRIPT_ONLY = "transcript_only"
|
|
FAILED = "failed"
|
|
|
|
|
|
class VideoQuality(str, Enum):
|
|
"""Video quality options"""
|
|
AUDIO_ONLY = "audio_only"
|
|
LOW_480P = "480p"
|
|
MEDIUM_720P = "720p"
|
|
HIGH_1080P = "1080p"
|
|
ULTRA_1440P = "1440p"
|
|
MAX_2160P = "2160p"
|
|
BEST = "best"
|
|
|
|
|
|
class DownloadStatus(str, Enum):
|
|
"""Download operation status"""
|
|
PENDING = "pending"
|
|
IN_PROGRESS = "in_progress"
|
|
COMPLETED = "completed"
|
|
FAILED = "failed"
|
|
PARTIAL = "partial" # Transcript only, no video
|
|
CANCELLED = "cancelled"
|
|
|
|
|
|
class DownloadPreferences(BaseModel):
|
|
"""User preferences for video downloading"""
|
|
quality: VideoQuality = VideoQuality.MEDIUM_720P
|
|
prefer_audio_only: bool = True # For transcription, audio is sufficient
|
|
max_duration_minutes: int = 180 # Skip very long videos
|
|
fallback_to_transcript: bool = True
|
|
extract_audio: bool = True
|
|
save_video: bool = False # For storage optimization
|
|
output_format: str = "mp4"
|
|
enable_subtitles: bool = True
|
|
|
|
|
|
class VideoMetadata(BaseModel):
|
|
"""Video metadata from various sources"""
|
|
video_id: str
|
|
title: Optional[str] = None
|
|
description: Optional[str] = None
|
|
duration_seconds: Optional[int] = None
|
|
view_count: Optional[int] = None
|
|
upload_date: Optional[str] = None
|
|
uploader: Optional[str] = None
|
|
thumbnail_url: Optional[str] = None
|
|
tags: List[str] = Field(default_factory=list)
|
|
language: Optional[str] = "en"
|
|
availability: Optional[str] = None # public, private, unlisted
|
|
age_restricted: bool = False
|
|
|
|
|
|
class TranscriptData(BaseModel):
|
|
"""Transcript information"""
|
|
text: str
|
|
language: str = "en"
|
|
is_auto_generated: bool = False
|
|
segments: Optional[List[Dict[str, Any]]] = None
|
|
source: str = "youtube-transcript-api" # Source of transcript
|
|
|
|
|
|
class VideoDownloadResult(BaseModel):
|
|
"""Result of a video download operation"""
|
|
video_id: str
|
|
video_url: str
|
|
status: DownloadStatus
|
|
method: DownloadMethod
|
|
|
|
# File paths
|
|
video_path: Optional[Path] = None
|
|
audio_path: Optional[Path] = None
|
|
|
|
# Content
|
|
transcript: Optional[TranscriptData] = None
|
|
metadata: Optional[VideoMetadata] = None
|
|
|
|
# Performance metrics
|
|
download_time_seconds: Optional[float] = None
|
|
file_size_bytes: Optional[int] = None
|
|
processing_time_seconds: Optional[float] = None
|
|
|
|
# Error handling
|
|
error_message: Optional[str] = None
|
|
error_details: Optional[Dict[str, Any]] = None
|
|
retry_count: int = 0
|
|
|
|
# Flags
|
|
is_partial: bool = False # True if only transcript/metadata available
|
|
from_cache: bool = False
|
|
|
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
|
|
class Config:
|
|
arbitrary_types_allowed = True
|
|
|
|
|
|
class DownloadJobStatus(BaseModel):
|
|
"""Status of a download job"""
|
|
job_id: str
|
|
video_url: str
|
|
status: DownloadStatus
|
|
progress_percent: float = 0.0
|
|
current_method: Optional[DownloadMethod] = None
|
|
error_message: Optional[str] = None
|
|
estimated_completion: Optional[datetime] = None
|
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
updated_at: datetime = Field(default_factory=datetime.now)
|
|
|
|
|
|
class DownloadMetrics(BaseModel):
|
|
"""Download performance metrics"""
|
|
total_attempts: int = 0
|
|
successful_downloads: int = 0
|
|
failed_downloads: int = 0
|
|
partial_downloads: int = 0 # Transcript-only results
|
|
|
|
# Method-specific success rates
|
|
method_success_rates: Dict[str, float] = Field(default_factory=dict)
|
|
method_attempt_counts: Dict[str, int] = Field(default_factory=dict)
|
|
|
|
# Performance metrics
|
|
average_download_time: float = 0.0
|
|
average_file_size_mb: float = 0.0
|
|
|
|
# Error analysis
|
|
common_errors: Dict[str, int] = Field(default_factory=dict)
|
|
|
|
last_updated: datetime = Field(default_factory=datetime.now)
|
|
|
|
def update_success_rate(self, method: DownloadMethod, success: bool):
|
|
"""Update success rate for a specific method"""
|
|
method_str = method.value
|
|
|
|
if method_str not in self.method_attempt_counts:
|
|
self.method_attempt_counts[method_str] = 0
|
|
self.method_success_rates[method_str] = 0.0
|
|
|
|
current_attempts = self.method_attempt_counts[method_str]
|
|
current_rate = self.method_success_rates[method_str]
|
|
|
|
# Calculate new success rate
|
|
if success:
|
|
new_successes = (current_rate * current_attempts) + 1
|
|
else:
|
|
new_successes = (current_rate * current_attempts)
|
|
|
|
new_attempts = current_attempts + 1
|
|
new_rate = new_successes / new_attempts if new_attempts > 0 else 0.0
|
|
|
|
self.method_attempt_counts[method_str] = new_attempts
|
|
self.method_success_rates[method_str] = new_rate
|
|
self.last_updated = datetime.now()
|
|
|
|
|
|
class HealthCheckResult(BaseModel):
|
|
"""Health check result for download system"""
|
|
overall_status: str # healthy, degraded, unhealthy
|
|
healthy_methods: int
|
|
total_methods: int
|
|
method_details: Dict[str, Dict[str, Any]]
|
|
recommendations: List[str] = Field(default_factory=list)
|
|
last_check: datetime = Field(default_factory=datetime.now)
|
|
|
|
|
|
class DownloaderException(Exception):
|
|
"""Base exception for download operations"""
|
|
pass
|
|
|
|
|
|
class VideoNotAvailableError(DownloaderException):
|
|
"""Video is not available for download"""
|
|
pass
|
|
|
|
|
|
class UnsupportedFormatError(DownloaderException):
|
|
"""Requested format is not supported"""
|
|
pass
|
|
|
|
|
|
class DownloadTimeoutError(DownloaderException):
|
|
"""Download operation timed out"""
|
|
pass
|
|
|
|
|
|
class QuotaExceededError(DownloaderException):
|
|
"""API quota exceeded"""
|
|
pass
|
|
|
|
|
|
class NetworkError(DownloaderException):
|
|
"""Network-related error"""
|
|
pass
|
|
|
|
|
|
class AllMethodsFailedError(DownloaderException):
|
|
"""All download methods have failed"""
|
|
pass |