238 lines
10 KiB
Python
238 lines
10 KiB
Python
"""
|
|
Enhanced video service integrating the intelligent video downloader
|
|
"""
|
|
import asyncio
|
|
import logging
|
|
from typing import Optional, Dict, Any
|
|
from pathlib import Path
|
|
|
|
from backend.models.video_download import (
|
|
VideoDownloadResult,
|
|
DownloadPreferences,
|
|
DownloadStatus,
|
|
VideoQuality,
|
|
DownloaderException
|
|
)
|
|
from backend.config.video_download_config import VideoDownloadConfig, get_video_download_config
|
|
from backend.services.intelligent_video_downloader import IntelligentVideoDownloader
|
|
from backend.services.video_service import VideoService # Original service
|
|
from backend.core.exceptions import ValidationError, UnsupportedFormatError
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class EnhancedVideoService(VideoService):
|
|
"""Enhanced video service with intelligent downloading capabilities"""
|
|
|
|
def __init__(self, config: Optional[VideoDownloadConfig] = None):
|
|
super().__init__() # Initialize parent class
|
|
|
|
self.download_config = config or get_video_download_config()
|
|
self.intelligent_downloader = IntelligentVideoDownloader(self.download_config)
|
|
|
|
logger.info("Enhanced video service initialized with intelligent downloader")
|
|
|
|
async def get_video_for_processing(self, url: str, preferences: Optional[DownloadPreferences] = None) -> VideoDownloadResult:
|
|
"""
|
|
Get video for processing - either download or extract transcript/metadata
|
|
|
|
This is the main entry point for the YouTube Summarizer pipeline
|
|
"""
|
|
try:
|
|
# First validate the URL using parent class
|
|
video_id = self.extract_video_id(url)
|
|
|
|
# Set up default preferences optimized for summarization
|
|
if preferences is None:
|
|
preferences = DownloadPreferences(
|
|
quality=VideoQuality.MEDIUM_720P,
|
|
prefer_audio_only=True, # For transcription, audio is sufficient
|
|
max_duration_minutes=self.download_config.max_video_duration_minutes,
|
|
fallback_to_transcript=True, # Always allow transcript fallback
|
|
extract_audio=True,
|
|
save_video=self.download_config.save_video,
|
|
enable_subtitles=True
|
|
)
|
|
|
|
# Use intelligent downloader
|
|
result = await self.intelligent_downloader.download_video(url, preferences)
|
|
|
|
# Validate result for pipeline requirements
|
|
if result.status == DownloadStatus.FAILED:
|
|
raise DownloaderException(f"All download methods failed: {result.error_message}")
|
|
|
|
# Log success
|
|
if result.status == DownloadStatus.COMPLETED:
|
|
logger.info(f"Successfully downloaded video {video_id} using {result.method.value}")
|
|
elif result.status == DownloadStatus.PARTIAL:
|
|
logger.info(f"Got transcript/metadata for video {video_id} using {result.method.value}")
|
|
|
|
return result
|
|
|
|
except ValidationError:
|
|
# Re-raise validation errors from parent class
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Enhanced video service failed for {url}: {e}")
|
|
raise DownloaderException(f"Video processing failed: {e}")
|
|
|
|
async def get_video_metadata_only(self, url: str) -> Optional[Dict[str, Any]]:
|
|
"""Get only video metadata without downloading"""
|
|
try:
|
|
video_id = self.extract_video_id(url)
|
|
|
|
# Use transcript-only downloader for metadata
|
|
transcript_downloader = self.intelligent_downloader.downloaders.get('transcript_only')
|
|
if transcript_downloader:
|
|
metadata = await transcript_downloader.get_video_metadata(video_id)
|
|
if metadata:
|
|
return {
|
|
'video_id': metadata.video_id,
|
|
'title': metadata.title,
|
|
'description': metadata.description,
|
|
'duration_seconds': metadata.duration_seconds,
|
|
'view_count': metadata.view_count,
|
|
'upload_date': metadata.upload_date,
|
|
'uploader': metadata.uploader,
|
|
'thumbnail_url': metadata.thumbnail_url,
|
|
'tags': metadata.tags,
|
|
'language': metadata.language
|
|
}
|
|
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Metadata extraction failed for {url}: {e}")
|
|
return None
|
|
|
|
async def get_transcript_only(self, url: str) -> Optional[Dict[str, Any]]:
|
|
"""Get only transcript without downloading video"""
|
|
try:
|
|
video_id = self.extract_video_id(url)
|
|
|
|
# Use transcript-only downloader
|
|
transcript_downloader = self.intelligent_downloader.downloaders.get('transcript_only')
|
|
if transcript_downloader:
|
|
transcript = await transcript_downloader.get_transcript(video_id)
|
|
if transcript:
|
|
return {
|
|
'text': transcript.text,
|
|
'language': transcript.language,
|
|
'is_auto_generated': transcript.is_auto_generated,
|
|
'segments': transcript.segments,
|
|
'source': transcript.source
|
|
}
|
|
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Transcript extraction failed for {url}: {e}")
|
|
return None
|
|
|
|
async def get_download_job_status(self, job_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Get status of an active download job"""
|
|
job_status = await self.intelligent_downloader.get_job_status(job_id)
|
|
|
|
if job_status:
|
|
return {
|
|
'job_id': job_status.job_id,
|
|
'video_url': job_status.video_url,
|
|
'status': job_status.status.value,
|
|
'progress_percent': job_status.progress_percent,
|
|
'current_method': job_status.current_method.value if job_status.current_method else None,
|
|
'error_message': job_status.error_message,
|
|
'created_at': job_status.created_at.isoformat(),
|
|
'updated_at': job_status.updated_at.isoformat()
|
|
}
|
|
|
|
return None
|
|
|
|
async def cancel_download(self, job_id: str) -> bool:
|
|
"""Cancel an active download job"""
|
|
return await self.intelligent_downloader.cancel_job(job_id)
|
|
|
|
async def get_health_status(self) -> Dict[str, Any]:
|
|
"""Get health status of all download methods"""
|
|
health_result = await self.intelligent_downloader.health_check()
|
|
|
|
return {
|
|
'overall_status': health_result.overall_status,
|
|
'healthy_methods': health_result.healthy_methods,
|
|
'total_methods': health_result.total_methods,
|
|
'method_details': health_result.method_details,
|
|
'recommendations': health_result.recommendations,
|
|
'last_check': health_result.last_check.isoformat()
|
|
}
|
|
|
|
async def get_download_metrics(self) -> Dict[str, Any]:
|
|
"""Get download performance metrics"""
|
|
metrics = self.intelligent_downloader.get_metrics()
|
|
|
|
return {
|
|
'total_attempts': metrics.total_attempts,
|
|
'successful_downloads': metrics.successful_downloads,
|
|
'failed_downloads': metrics.failed_downloads,
|
|
'partial_downloads': metrics.partial_downloads,
|
|
'success_rate': (metrics.successful_downloads / max(metrics.total_attempts, 1)) * 100,
|
|
'method_success_rates': metrics.method_success_rates,
|
|
'method_attempt_counts': metrics.method_attempt_counts,
|
|
'average_download_time': metrics.average_download_time,
|
|
'average_file_size_mb': metrics.average_file_size_mb,
|
|
'common_errors': metrics.common_errors,
|
|
'last_updated': metrics.last_updated.isoformat()
|
|
}
|
|
|
|
async def cleanup_old_files(self, max_age_days: int = None) -> Dict[str, Any]:
|
|
"""Clean up old downloaded files"""
|
|
return await self.intelligent_downloader.cleanup_old_files(max_age_days)
|
|
|
|
def get_supported_methods(self) -> list[str]:
|
|
"""Get list of supported download methods"""
|
|
return [method.value for method in self.intelligent_downloader.downloaders.keys()]
|
|
|
|
def get_storage_info(self) -> Dict[str, Any]:
|
|
"""Get storage directory information"""
|
|
storage_dirs = self.download_config.get_storage_dirs()
|
|
|
|
info = {}
|
|
for name, path in storage_dirs.items():
|
|
if path.exists():
|
|
# Calculate directory size
|
|
total_size = sum(f.stat().st_size for f in path.glob('**/*') if f.is_file())
|
|
file_count = len([f for f in path.glob('**/*') if f.is_file()])
|
|
|
|
info[name] = {
|
|
'path': str(path),
|
|
'exists': True,
|
|
'size_bytes': total_size,
|
|
'size_mb': round(total_size / (1024 * 1024), 2),
|
|
'file_count': file_count
|
|
}
|
|
else:
|
|
info[name] = {
|
|
'path': str(path),
|
|
'exists': False,
|
|
'size_bytes': 0,
|
|
'size_mb': 0,
|
|
'file_count': 0
|
|
}
|
|
|
|
# Calculate total usage
|
|
total_size = sum(info[name]['size_bytes'] for name in info)
|
|
max_size_bytes = self.download_config.max_storage_gb * 1024 * 1024 * 1024
|
|
|
|
info['total'] = {
|
|
'size_bytes': total_size,
|
|
'size_mb': round(total_size / (1024 * 1024), 2),
|
|
'size_gb': round(total_size / (1024 * 1024 * 1024), 2),
|
|
'max_size_gb': self.download_config.max_storage_gb,
|
|
'usage_percent': round((total_size / max_size_bytes) * 100, 1) if max_size_bytes > 0 else 0
|
|
}
|
|
|
|
return info
|
|
|
|
|
|
# Dependency injection for FastAPI
|
|
def get_enhanced_video_service() -> EnhancedVideoService:
|
|
"""Get enhanced video service instance"""
|
|
return EnhancedVideoService() |