youtube-summarizer/backend/services/enhanced_video_service.py

238 lines
10 KiB
Python

"""
Enhanced video service integrating the intelligent video downloader
"""
import asyncio
import logging
from typing import Optional, Dict, Any
from pathlib import Path
from backend.models.video_download import (
VideoDownloadResult,
DownloadPreferences,
DownloadStatus,
VideoQuality,
DownloaderException
)
from backend.config.video_download_config import VideoDownloadConfig, get_video_download_config
from backend.services.intelligent_video_downloader import IntelligentVideoDownloader
from backend.services.video_service import VideoService # Original service
from backend.core.exceptions import ValidationError, UnsupportedFormatError
logger = logging.getLogger(__name__)
class EnhancedVideoService(VideoService):
"""Enhanced video service with intelligent downloading capabilities"""
def __init__(self, config: Optional[VideoDownloadConfig] = None):
super().__init__() # Initialize parent class
self.download_config = config or get_video_download_config()
self.intelligent_downloader = IntelligentVideoDownloader(self.download_config)
logger.info("Enhanced video service initialized with intelligent downloader")
async def get_video_for_processing(self, url: str, preferences: Optional[DownloadPreferences] = None) -> VideoDownloadResult:
"""
Get video for processing - either download or extract transcript/metadata
This is the main entry point for the YouTube Summarizer pipeline
"""
try:
# First validate the URL using parent class
video_id = self.extract_video_id(url)
# Set up default preferences optimized for summarization
if preferences is None:
preferences = DownloadPreferences(
quality=VideoQuality.MEDIUM_720P,
prefer_audio_only=True, # For transcription, audio is sufficient
max_duration_minutes=self.download_config.max_video_duration_minutes,
fallback_to_transcript=True, # Always allow transcript fallback
extract_audio=True,
save_video=self.download_config.save_video,
enable_subtitles=True
)
# Use intelligent downloader
result = await self.intelligent_downloader.download_video(url, preferences)
# Validate result for pipeline requirements
if result.status == DownloadStatus.FAILED:
raise DownloaderException(f"All download methods failed: {result.error_message}")
# Log success
if result.status == DownloadStatus.COMPLETED:
logger.info(f"Successfully downloaded video {video_id} using {result.method.value}")
elif result.status == DownloadStatus.PARTIAL:
logger.info(f"Got transcript/metadata for video {video_id} using {result.method.value}")
return result
except ValidationError:
# Re-raise validation errors from parent class
raise
except Exception as e:
logger.error(f"Enhanced video service failed for {url}: {e}")
raise DownloaderException(f"Video processing failed: {e}")
async def get_video_metadata_only(self, url: str) -> Optional[Dict[str, Any]]:
"""Get only video metadata without downloading"""
try:
video_id = self.extract_video_id(url)
# Use transcript-only downloader for metadata
transcript_downloader = self.intelligent_downloader.downloaders.get('transcript_only')
if transcript_downloader:
metadata = await transcript_downloader.get_video_metadata(video_id)
if metadata:
return {
'video_id': metadata.video_id,
'title': metadata.title,
'description': metadata.description,
'duration_seconds': metadata.duration_seconds,
'view_count': metadata.view_count,
'upload_date': metadata.upload_date,
'uploader': metadata.uploader,
'thumbnail_url': metadata.thumbnail_url,
'tags': metadata.tags,
'language': metadata.language
}
return None
except Exception as e:
logger.error(f"Metadata extraction failed for {url}: {e}")
return None
async def get_transcript_only(self, url: str) -> Optional[Dict[str, Any]]:
"""Get only transcript without downloading video"""
try:
video_id = self.extract_video_id(url)
# Use transcript-only downloader
transcript_downloader = self.intelligent_downloader.downloaders.get('transcript_only')
if transcript_downloader:
transcript = await transcript_downloader.get_transcript(video_id)
if transcript:
return {
'text': transcript.text,
'language': transcript.language,
'is_auto_generated': transcript.is_auto_generated,
'segments': transcript.segments,
'source': transcript.source
}
return None
except Exception as e:
logger.error(f"Transcript extraction failed for {url}: {e}")
return None
async def get_download_job_status(self, job_id: str) -> Optional[Dict[str, Any]]:
"""Get status of an active download job"""
job_status = await self.intelligent_downloader.get_job_status(job_id)
if job_status:
return {
'job_id': job_status.job_id,
'video_url': job_status.video_url,
'status': job_status.status.value,
'progress_percent': job_status.progress_percent,
'current_method': job_status.current_method.value if job_status.current_method else None,
'error_message': job_status.error_message,
'created_at': job_status.created_at.isoformat(),
'updated_at': job_status.updated_at.isoformat()
}
return None
async def cancel_download(self, job_id: str) -> bool:
"""Cancel an active download job"""
return await self.intelligent_downloader.cancel_job(job_id)
async def get_health_status(self) -> Dict[str, Any]:
"""Get health status of all download methods"""
health_result = await self.intelligent_downloader.health_check()
return {
'overall_status': health_result.overall_status,
'healthy_methods': health_result.healthy_methods,
'total_methods': health_result.total_methods,
'method_details': health_result.method_details,
'recommendations': health_result.recommendations,
'last_check': health_result.last_check.isoformat()
}
async def get_download_metrics(self) -> Dict[str, Any]:
"""Get download performance metrics"""
metrics = self.intelligent_downloader.get_metrics()
return {
'total_attempts': metrics.total_attempts,
'successful_downloads': metrics.successful_downloads,
'failed_downloads': metrics.failed_downloads,
'partial_downloads': metrics.partial_downloads,
'success_rate': (metrics.successful_downloads / max(metrics.total_attempts, 1)) * 100,
'method_success_rates': metrics.method_success_rates,
'method_attempt_counts': metrics.method_attempt_counts,
'average_download_time': metrics.average_download_time,
'average_file_size_mb': metrics.average_file_size_mb,
'common_errors': metrics.common_errors,
'last_updated': metrics.last_updated.isoformat()
}
async def cleanup_old_files(self, max_age_days: int = None) -> Dict[str, Any]:
"""Clean up old downloaded files"""
return await self.intelligent_downloader.cleanup_old_files(max_age_days)
def get_supported_methods(self) -> list[str]:
"""Get list of supported download methods"""
return [method.value for method in self.intelligent_downloader.downloaders.keys()]
def get_storage_info(self) -> Dict[str, Any]:
"""Get storage directory information"""
storage_dirs = self.download_config.get_storage_dirs()
info = {}
for name, path in storage_dirs.items():
if path.exists():
# Calculate directory size
total_size = sum(f.stat().st_size for f in path.glob('**/*') if f.is_file())
file_count = len([f for f in path.glob('**/*') if f.is_file()])
info[name] = {
'path': str(path),
'exists': True,
'size_bytes': total_size,
'size_mb': round(total_size / (1024 * 1024), 2),
'file_count': file_count
}
else:
info[name] = {
'path': str(path),
'exists': False,
'size_bytes': 0,
'size_mb': 0,
'file_count': 0
}
# Calculate total usage
total_size = sum(info[name]['size_bytes'] for name in info)
max_size_bytes = self.download_config.max_storage_gb * 1024 * 1024 * 1024
info['total'] = {
'size_bytes': total_size,
'size_mb': round(total_size / (1024 * 1024), 2),
'size_gb': round(total_size / (1024 * 1024 * 1024), 2),
'max_size_gb': self.download_config.max_storage_gb,
'usage_percent': round((total_size / max_size_bytes) * 100, 1) if max_size_bytes > 0 else 0
}
return info
# Dependency injection for FastAPI
def get_enhanced_video_service() -> EnhancedVideoService:
"""Get enhanced video service instance"""
return EnhancedVideoService()