youtube-summarizer/backend/services/enhanced_video_service.py

"""
Enhanced video service integrating the intelligent video downloader
"""
import asyncio
import logging
from typing import Optional, Dict, Any
from pathlib import Path

from backend.models.video_download import (
    VideoDownloadResult,
    DownloadPreferences,
    DownloadStatus,
    VideoQuality,
    DownloaderException
)
from backend.config.video_download_config import VideoDownloadConfig, get_video_download_config
from backend.services.intelligent_video_downloader import IntelligentVideoDownloader
from backend.services.video_service import VideoService  # Original service
from backend.core.exceptions import ValidationError, UnsupportedFormatError

logger = logging.getLogger(__name__)


class EnhancedVideoService(VideoService):
    """Enhanced video service with intelligent downloading capabilities"""

    def __init__(self, config: Optional[VideoDownloadConfig] = None):
        super().__init__()  # Initialize parent class

        self.download_config = config or get_video_download_config()
        self.intelligent_downloader = IntelligentVideoDownloader(self.download_config)

        logger.info("Enhanced video service initialized with intelligent downloader")

    async def get_video_for_processing(self, url: str, preferences: Optional[DownloadPreferences] = None) -> VideoDownloadResult:
        """
        Get video for processing - either download or extract transcript/metadata

        This is the main entry point for the YouTube Summarizer pipeline
        """
        try:
            # First validate the URL using parent class
            video_id = self.extract_video_id(url)

            # Set up default preferences optimized for summarization
            if preferences is None:
                preferences = DownloadPreferences(
                    quality=VideoQuality.MEDIUM_720P,
                    prefer_audio_only=True,  # For transcription, audio is sufficient
                    max_duration_minutes=self.download_config.max_video_duration_minutes,
                    fallback_to_transcript=True,  # Always allow transcript fallback
                    extract_audio=True,
                    save_video=self.download_config.save_video,
                    enable_subtitles=True
                )

            # Use intelligent downloader
            result = await self.intelligent_downloader.download_video(url, preferences)

            # Validate result for pipeline requirements
            if result.status == DownloadStatus.FAILED:
                raise DownloaderException(f"All download methods failed: {result.error_message}")

            # Log success
            if result.status == DownloadStatus.COMPLETED:
                logger.info(f"Successfully downloaded video {video_id} using {result.method.value}")
            elif result.status == DownloadStatus.PARTIAL:
                logger.info(f"Got transcript/metadata for video {video_id} using {result.method.value}")

            return result

        except ValidationError:
            # Re-raise validation errors from parent class
            raise
        except Exception as e:
            logger.error(f"Enhanced video service failed for {url}: {e}")
            raise DownloaderException(f"Video processing failed: {e}")

    async def get_video_metadata_only(self, url: str) -> Optional[Dict[str, Any]]:
        """Get only video metadata without downloading"""
        try:
            video_id = self.extract_video_id(url)

            # Use transcript-only downloader for metadata
            transcript_downloader = self.intelligent_downloader.downloaders.get('transcript_only')
            if transcript_downloader:
                metadata = await transcript_downloader.get_video_metadata(video_id)
                if metadata:
                    return {
                        'video_id': metadata.video_id,
                        'title': metadata.title,
                        'description': metadata.description,
                        'duration_seconds': metadata.duration_seconds,
                        'view_count': metadata.view_count,
                        'upload_date': metadata.upload_date,
                        'uploader': metadata.uploader,
                        'thumbnail_url': metadata.thumbnail_url,
                        'tags': metadata.tags,
                        'language': metadata.language
                    }

            return None

        except Exception as e:
            logger.error(f"Metadata extraction failed for {url}: {e}")
            return None

    async def get_transcript_only(self, url: str) -> Optional[Dict[str, Any]]:
        """Get only transcript without downloading video"""
        try:
            video_id = self.extract_video_id(url)

            # Use transcript-only downloader
            transcript_downloader = self.intelligent_downloader.downloaders.get('transcript_only')
            if transcript_downloader:
                transcript = await transcript_downloader.get_transcript(video_id)
                if transcript:
                    return {
                        'text': transcript.text,
                        'language': transcript.language,
                        'is_auto_generated': transcript.is_auto_generated,
                        'segments': transcript.segments,
                        'source': transcript.source
                    }

            return None

        except Exception as e:
            logger.error(f"Transcript extraction failed for {url}: {e}")
            return None

    async def get_download_job_status(self, job_id: str) -> Optional[Dict[str, Any]]:
        """Get status of an active download job"""
        job_status = await self.intelligent_downloader.get_job_status(job_id)

        if job_status:
            return {
                'job_id': job_status.job_id,
                'video_url': job_status.video_url,
                'status': job_status.status.value,
                'progress_percent': job_status.progress_percent,
                'current_method': job_status.current_method.value if job_status.current_method else None,
                'error_message': job_status.error_message,
                'created_at': job_status.created_at.isoformat(),
                'updated_at': job_status.updated_at.isoformat()
            }

        return None

    async def cancel_download(self, job_id: str) -> bool:
        """Cancel an active download job"""
        return await self.intelligent_downloader.cancel_job(job_id)

    async def get_health_status(self) -> Dict[str, Any]:
        """Get health status of all download methods"""
        health_result = await self.intelligent_downloader.health_check()

        return {
            'overall_status': health_result.overall_status,
            'healthy_methods': health_result.healthy_methods,
            'total_methods': health_result.total_methods,
            'method_details': health_result.method_details,
            'recommendations': health_result.recommendations,
            'last_check': health_result.last_check.isoformat()
        }

    async def get_download_metrics(self) -> Dict[str, Any]:
        """Get download performance metrics"""
        metrics = self.intelligent_downloader.get_metrics()

        return {
            'total_attempts': metrics.total_attempts,
            'successful_downloads': metrics.successful_downloads,
            'failed_downloads': metrics.failed_downloads,
            'partial_downloads': metrics.partial_downloads,
            'success_rate': (metrics.successful_downloads / max(metrics.total_attempts, 1)) * 100,
            'method_success_rates': metrics.method_success_rates,
            'method_attempt_counts': metrics.method_attempt_counts,
            'average_download_time': metrics.average_download_time,
            'average_file_size_mb': metrics.average_file_size_mb,
            'common_errors': metrics.common_errors,
            'last_updated': metrics.last_updated.isoformat()
        }

    async def cleanup_old_files(self, max_age_days: int = None) -> Dict[str, Any]:
        """Clean up old downloaded files"""
        return await self.intelligent_downloader.cleanup_old_files(max_age_days)

    def get_supported_methods(self) -> list[str]:
        """Get list of supported download methods"""
        return [method.value for method in self.intelligent_downloader.downloaders.keys()]

    def get_storage_info(self) -> Dict[str, Any]:
        """Get storage directory information"""
        storage_dirs = self.download_config.get_storage_dirs()

        info = {}
        for name, path in storage_dirs.items():
            if path.exists():
                # Calculate directory size
                total_size = sum(f.stat().st_size for f in path.glob('**/*') if f.is_file())
                file_count = len([f for f in path.glob('**/*') if f.is_file()])

                info[name] = {
                    'path': str(path),
                    'exists': True,
                    'size_bytes': total_size,
                    'size_mb': round(total_size / (1024 * 1024), 2),
                    'file_count': file_count
                }
            else:
                info[name] = {
                    'path': str(path),
                    'exists': False,
                    'size_bytes': 0,
                    'size_mb': 0,
                    'file_count': 0
                }

        # Calculate total usage
        total_size = sum(info[name]['size_bytes'] for name in info)
        max_size_bytes = self.download_config.max_storage_gb * 1024 * 1024 * 1024

        info['total'] = {
            'size_bytes': total_size,
            'size_mb': round(total_size / (1024 * 1024), 2),
            'size_gb': round(total_size / (1024 * 1024 * 1024), 2),
            'max_size_gb': self.download_config.max_storage_gb,
            'usage_percent': round((total_size / max_size_bytes) * 100, 1) if max_size_bytes > 0 else 0
        }

        return info


# Dependency injection for FastAPI
def get_enhanced_video_service() -> EnhancedVideoService:
    """Get enhanced video service instance"""
    return EnhancedVideoService()