youtube-summarizer/backend/services/video_downloaders/base_downloader.py

170 lines
5.3 KiB
Python

"""
Base interface for video downloaders
"""
import asyncio
import logging
from abc import ABC, abstractmethod
from typing import Optional, Dict, Any
from pathlib import Path
from backend.models.video_download import (
VideoDownloadResult,
DownloadPreferences,
DownloadMethod,
DownloadStatus,
VideoMetadata,
TranscriptData,
DownloaderException
)
logger = logging.getLogger(__name__)
class BaseVideoDownloader(ABC):
"""Base class for all video downloaders"""
def __init__(self, method: DownloadMethod, config: Optional[Dict[str, Any]] = None):
self.method = method
self.config = config or {}
self.logger = logging.getLogger(f"{self.__class__.__name__}")
@abstractmethod
async def download_video(self, url: str, preferences: DownloadPreferences) -> VideoDownloadResult:
"""
Download video with given preferences
Args:
url: YouTube video URL
preferences: Download preferences
Returns:
VideoDownloadResult with download status and file paths
Raises:
DownloaderException: If download fails
"""
pass
@abstractmethod
async def test_connection(self) -> bool:
"""
Test if this downloader is working
Returns:
True if downloader is functional, False otherwise
"""
pass
async def extract_video_id(self, url: str) -> str:
"""Extract YouTube video ID from URL"""
import re
patterns = [
r'(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})',
r'youtube\.com/embed/([a-zA-Z0-9_-]{11})',
r'youtube\.com/v/([a-zA-Z0-9_-]{11})'
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
raise DownloaderException(f"Could not extract video ID from URL: {url}")
def create_result(self,
video_id: str,
video_url: str,
status: DownloadStatus = DownloadStatus.FAILED,
error_message: Optional[str] = None) -> VideoDownloadResult:
"""Create a basic download result"""
return VideoDownloadResult(
video_id=video_id,
video_url=video_url,
status=status,
method=self.method,
error_message=error_message
)
async def get_video_metadata(self, video_id: str) -> Optional[VideoMetadata]:
"""Get video metadata if supported by this downloader"""
return None
async def get_transcript(self, video_id: str) -> Optional[TranscriptData]:
"""Get video transcript if supported by this downloader"""
return None
def supports_audio_only(self) -> bool:
"""Check if this downloader supports audio-only downloads"""
return False
def supports_quality_selection(self) -> bool:
"""Check if this downloader supports quality selection"""
return False
def get_supported_formats(self) -> list[str]:
"""Get list of supported output formats"""
return ["mp4"]
async def cleanup_temp_files(self, temp_dir: Path):
"""Clean up temporary files"""
if temp_dir.exists():
import shutil
try:
shutil.rmtree(temp_dir)
self.logger.debug(f"Cleaned up temp directory: {temp_dir}")
except Exception as e:
self.logger.warning(f"Failed to clean up temp directory {temp_dir}: {e}")
class DownloaderFactory:
"""Factory for creating video downloaders"""
_downloaders = {}
@classmethod
def register(cls, method: DownloadMethod, downloader_class):
"""Register a downloader class"""
cls._downloaders[method] = downloader_class
@classmethod
def create(cls, method: DownloadMethod, config: Optional[Dict[str, Any]] = None) -> BaseVideoDownloader:
"""Create a downloader instance"""
if method not in cls._downloaders:
raise ValueError(f"Unsupported download method: {method}")
downloader_class = cls._downloaders[method]
return downloader_class(method, config)
@classmethod
def get_available_methods(cls) -> list[DownloadMethod]:
"""Get list of available download methods"""
return list(cls._downloaders.keys())
class DownloadTimeout:
"""Timeout context manager for download operations"""
def __init__(self, timeout_seconds: int):
self.timeout_seconds = timeout_seconds
self.task = None
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
if self.task and not self.task.done():
self.task.cancel()
try:
await self.task
except asyncio.CancelledError:
pass
async def run(self, coro):
"""Run coroutine with timeout"""
try:
self.task = asyncio.create_task(coro)
return await asyncio.wait_for(self.task, timeout=self.timeout_seconds)
except asyncio.TimeoutError:
raise DownloaderException(f"Operation timed out after {self.timeout_seconds} seconds")