youtube-summarizer/backend/services/video_downloaders/base_downloader.py

205 lines
6.4 KiB
Python

"""
Base interface for video downloaders with progress tracking
"""
import asyncio
import logging
from abc import ABC, abstractmethod
from typing import Optional, Dict, Any, Callable
from dataclasses import dataclass
from pathlib import Path
from backend.models.video_download import (
VideoDownloadResult,
DownloadPreferences,
DownloadMethod,
DownloadStatus,
VideoMetadata,
TranscriptData,
DownloaderException
)
logger = logging.getLogger(__name__)
@dataclass
class DownloadProgress:
"""Progress data for download operations"""
download_percent: float = 0.0
bytes_downloaded: int = 0
total_bytes: int = 0
speed_bps: float = 0.0 # bytes per second
eta_seconds: float = 0.0
current_method: str = ""
retry_attempt: int = 0
status_message: str = ""
class BaseVideoDownloader(ABC):
"""Base class for all video downloaders"""
def __init__(self, method: DownloadMethod, config: Optional[Dict[str, Any]] = None):
self.method = method
self.config = config or {}
self.logger = logging.getLogger(f"{self.__class__.__name__}")
@abstractmethod
async def download_video(
self,
url: str,
preferences: DownloadPreferences,
progress_callback: Optional[Callable[[DownloadProgress], None]] = None
) -> VideoDownloadResult:
"""
Download video with given preferences and progress tracking
Args:
url: YouTube video URL
preferences: Download preferences
progress_callback: Optional callback for progress updates
Returns:
VideoDownloadResult with download status and file paths
Raises:
DownloaderException: If download fails
"""
pass
@abstractmethod
async def test_connection(self) -> bool:
"""
Test if this downloader is working
Returns:
True if downloader is functional, False otherwise
"""
pass
async def extract_video_id(self, url: str) -> str:
"""Extract YouTube video ID from URL"""
import re
patterns = [
r'(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})',
r'youtube\.com/embed/([a-zA-Z0-9_-]{11})',
r'youtube\.com/v/([a-zA-Z0-9_-]{11})'
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
raise DownloaderException(f"Could not extract video ID from URL: {url}")
def create_result(self,
video_id: str,
video_url: str,
status: DownloadStatus = DownloadStatus.FAILED,
error_message: Optional[str] = None) -> VideoDownloadResult:
"""Create a basic download result"""
return VideoDownloadResult(
video_id=video_id,
video_url=video_url,
status=status,
method=self.method,
error_message=error_message
)
async def get_video_metadata(self, video_id: str) -> Optional[VideoMetadata]:
"""Get video metadata if supported by this downloader"""
return None
async def get_transcript(self, video_id: str) -> Optional[TranscriptData]:
"""Get video transcript if supported by this downloader"""
return None
def supports_audio_only(self) -> bool:
"""Check if this downloader supports audio-only downloads"""
return False
async def report_progress(
self,
callback: Optional[Callable[[DownloadProgress], None]],
progress: DownloadProgress
):
"""Helper method to report progress if callback is provided"""
if callback:
try:
if asyncio.iscoroutinefunction(callback):
await callback(progress)
else:
callback(progress)
except Exception as e:
self.logger.warning(f"Error in progress callback: {e}")
def supports_quality_selection(self) -> bool:
"""Check if this downloader supports quality selection"""
return False
def get_supported_formats(self) -> list[str]:
"""Get list of supported output formats"""
return ["mp4"]
async def cleanup_temp_files(self, temp_dir: Path):
"""Clean up temporary files"""
if temp_dir.exists():
import shutil
try:
shutil.rmtree(temp_dir)
self.logger.debug(f"Cleaned up temp directory: {temp_dir}")
except Exception as e:
self.logger.warning(f"Failed to clean up temp directory {temp_dir}: {e}")
class DownloaderFactory:
"""Factory for creating video downloaders"""
_downloaders = {}
@classmethod
def register(cls, method: DownloadMethod, downloader_class):
"""Register a downloader class"""
cls._downloaders[method] = downloader_class
@classmethod
def create(cls, method: DownloadMethod, config: Optional[Dict[str, Any]] = None) -> BaseVideoDownloader:
"""Create a downloader instance"""
if method not in cls._downloaders:
raise ValueError(f"Unsupported download method: {method}")
downloader_class = cls._downloaders[method]
return downloader_class(method, config)
@classmethod
def get_available_methods(cls) -> list[DownloadMethod]:
"""Get list of available download methods"""
return list(cls._downloaders.keys())
class DownloadTimeout:
"""Timeout context manager for download operations"""
def __init__(self, timeout_seconds: int):
self.timeout_seconds = timeout_seconds
self.task = None
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
if self.task and not self.task.done():
self.task.cancel()
try:
await self.task
except asyncio.CancelledError:
pass
async def run(self, coro):
"""Run coroutine with timeout"""
try:
self.task = asyncio.create_task(coro)
return await asyncio.wait_for(self.task, timeout=self.timeout_seconds)
except asyncio.TimeoutError:
raise DownloaderException(f"Operation timed out after {self.timeout_seconds} seconds")