""" Enhanced video service integrating the intelligent video downloader """ import asyncio import logging from typing import Optional, Dict, Any from pathlib import Path from backend.models.video_download import ( VideoDownloadResult, DownloadPreferences, DownloadStatus, VideoQuality, DownloaderException ) from backend.config.video_download_config import VideoDownloadConfig, get_video_download_config from backend.services.intelligent_video_downloader import IntelligentVideoDownloader from backend.services.video_service import VideoService # Original service from backend.core.exceptions import ValidationError, UnsupportedFormatError logger = logging.getLogger(__name__) class EnhancedVideoService(VideoService): """Enhanced video service with intelligent downloading capabilities""" def __init__(self, config: Optional[VideoDownloadConfig] = None): super().__init__() # Initialize parent class self.download_config = config or get_video_download_config() self.intelligent_downloader = IntelligentVideoDownloader(self.download_config) logger.info("Enhanced video service initialized with intelligent downloader") async def get_video_for_processing(self, url: str, preferences: Optional[DownloadPreferences] = None) -> VideoDownloadResult: """ Get video for processing - either download or extract transcript/metadata This is the main entry point for the YouTube Summarizer pipeline """ try: # First validate the URL using parent class video_id = self.extract_video_id(url) # Set up default preferences optimized for summarization if preferences is None: preferences = DownloadPreferences( quality=VideoQuality.MEDIUM_720P, prefer_audio_only=True, # For transcription, audio is sufficient max_duration_minutes=self.download_config.max_video_duration_minutes, fallback_to_transcript=True, # Always allow transcript fallback extract_audio=True, save_video=self.download_config.save_video, enable_subtitles=True ) # Use intelligent downloader result = await self.intelligent_downloader.download_video(url, preferences) # Validate result for pipeline requirements if result.status == DownloadStatus.FAILED: raise DownloaderException(f"All download methods failed: {result.error_message}") # Log success if result.status == DownloadStatus.COMPLETED: logger.info(f"Successfully downloaded video {video_id} using {result.method.value}") elif result.status == DownloadStatus.PARTIAL: logger.info(f"Got transcript/metadata for video {video_id} using {result.method.value}") return result except ValidationError: # Re-raise validation errors from parent class raise except Exception as e: logger.error(f"Enhanced video service failed for {url}: {e}") raise DownloaderException(f"Video processing failed: {e}") async def get_video_metadata_only(self, url: str) -> Optional[Dict[str, Any]]: """Get only video metadata without downloading""" try: video_id = self.extract_video_id(url) # Use transcript-only downloader for metadata transcript_downloader = self.intelligent_downloader.downloaders.get('transcript_only') if transcript_downloader: metadata = await transcript_downloader.get_video_metadata(video_id) if metadata: return { 'video_id': metadata.video_id, 'title': metadata.title, 'description': metadata.description, 'duration_seconds': metadata.duration_seconds, 'view_count': metadata.view_count, 'upload_date': metadata.upload_date, 'uploader': metadata.uploader, 'thumbnail_url': metadata.thumbnail_url, 'tags': metadata.tags, 'language': metadata.language } return None except Exception as e: logger.error(f"Metadata extraction failed for {url}: {e}") return None async def get_transcript_only(self, url: str) -> Optional[Dict[str, Any]]: """Get only transcript without downloading video""" try: video_id = self.extract_video_id(url) # Use transcript-only downloader transcript_downloader = self.intelligent_downloader.downloaders.get('transcript_only') if transcript_downloader: transcript = await transcript_downloader.get_transcript(video_id) if transcript: return { 'text': transcript.text, 'language': transcript.language, 'is_auto_generated': transcript.is_auto_generated, 'segments': transcript.segments, 'source': transcript.source } return None except Exception as e: logger.error(f"Transcript extraction failed for {url}: {e}") return None async def get_download_job_status(self, job_id: str) -> Optional[Dict[str, Any]]: """Get status of an active download job""" job_status = await self.intelligent_downloader.get_job_status(job_id) if job_status: return { 'job_id': job_status.job_id, 'video_url': job_status.video_url, 'status': job_status.status.value, 'progress_percent': job_status.progress_percent, 'current_method': job_status.current_method.value if job_status.current_method else None, 'error_message': job_status.error_message, 'created_at': job_status.created_at.isoformat(), 'updated_at': job_status.updated_at.isoformat() } return None async def cancel_download(self, job_id: str) -> bool: """Cancel an active download job""" return await self.intelligent_downloader.cancel_job(job_id) async def get_health_status(self) -> Dict[str, Any]: """Get health status of all download methods""" health_result = await self.intelligent_downloader.health_check() return { 'overall_status': health_result.overall_status, 'healthy_methods': health_result.healthy_methods, 'total_methods': health_result.total_methods, 'method_details': health_result.method_details, 'recommendations': health_result.recommendations, 'last_check': health_result.last_check.isoformat() } async def get_download_metrics(self) -> Dict[str, Any]: """Get download performance metrics""" metrics = self.intelligent_downloader.get_metrics() return { 'total_attempts': metrics.total_attempts, 'successful_downloads': metrics.successful_downloads, 'failed_downloads': metrics.failed_downloads, 'partial_downloads': metrics.partial_downloads, 'success_rate': (metrics.successful_downloads / max(metrics.total_attempts, 1)) * 100, 'method_success_rates': metrics.method_success_rates, 'method_attempt_counts': metrics.method_attempt_counts, 'average_download_time': metrics.average_download_time, 'average_file_size_mb': metrics.average_file_size_mb, 'common_errors': metrics.common_errors, 'last_updated': metrics.last_updated.isoformat() } async def cleanup_old_files(self, max_age_days: int = None) -> Dict[str, Any]: """Clean up old downloaded files""" return await self.intelligent_downloader.cleanup_old_files(max_age_days) def get_supported_methods(self) -> list[str]: """Get list of supported download methods""" return [method.value for method in self.intelligent_downloader.downloaders.keys()] def get_storage_info(self) -> Dict[str, Any]: """Get storage directory information""" storage_dirs = self.download_config.get_storage_dirs() info = {} for name, path in storage_dirs.items(): if path.exists(): # Calculate directory size total_size = sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) file_count = len([f for f in path.glob('**/*') if f.is_file()]) info[name] = { 'path': str(path), 'exists': True, 'size_bytes': total_size, 'size_mb': round(total_size / (1024 * 1024), 2), 'file_count': file_count } else: info[name] = { 'path': str(path), 'exists': False, 'size_bytes': 0, 'size_mb': 0, 'file_count': 0 } # Calculate total usage total_size = sum(info[name]['size_bytes'] for name in info) max_size_bytes = self.download_config.max_storage_gb * 1024 * 1024 * 1024 info['total'] = { 'size_bytes': total_size, 'size_mb': round(total_size / (1024 * 1024), 2), 'size_gb': round(total_size / (1024 * 1024 * 1024), 2), 'max_size_gb': self.download_config.max_storage_gb, 'usage_percent': round((total_size / max_size_bytes) * 100, 1) if max_size_bytes > 0 else 0 } return info # Dependency injection for FastAPI def get_enhanced_video_service() -> EnhancedVideoService: """Get enhanced video service instance""" return EnhancedVideoService()