youtube-summarizer/backend/api/video_download.py

338 lines
11 KiB
Python

"""
API endpoints for video download functionality
"""
from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
from pydantic import BaseModel, HttpUrl, Field
from typing import Optional, Dict, Any
import logging
from backend.services.enhanced_video_service import EnhancedVideoService, get_enhanced_video_service
from backend.models.video_download import DownloadPreferences, VideoQuality, DownloadStatus
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/video", tags=["video-download"])
class VideoProcessRequest(BaseModel):
"""Request model for video processing"""
url: HttpUrl
preferences: Optional[DownloadPreferences] = None
class VideoDownloadResponse(BaseModel):
"""Response model for video download"""
video_id: str
video_url: str
status: str
method: str
video_path: Optional[str] = None
audio_path: Optional[str] = None
transcript: Optional[Dict[str, Any]] = None
metadata: Optional[Dict[str, Any]] = None
processing_time_seconds: Optional[float] = None
file_size_bytes: Optional[int] = None
is_partial: bool = False
error_message: Optional[str] = None
class HealthStatusResponse(BaseModel):
"""Response model for health status"""
overall_status: str
healthy_methods: int
total_methods: int
method_details: Dict[str, Dict[str, Any]]
recommendations: list[str]
last_check: str
class MetricsResponse(BaseModel):
"""Response model for download metrics"""
total_attempts: int
successful_downloads: int
failed_downloads: int
partial_downloads: int
success_rate: float
method_success_rates: Dict[str, float]
method_attempt_counts: Dict[str, int]
common_errors: Dict[str, int]
last_updated: str
@router.post("/process", response_model=VideoDownloadResponse)
async def process_video(
request: VideoProcessRequest,
video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
"""
Process a YouTube video - download and extract content
This is the main endpoint for the YouTube Summarizer pipeline
"""
try:
result = await video_service.get_video_for_processing(
str(request.url),
request.preferences
)
# Convert paths to strings for JSON serialization
video_path_str = str(result.video_path) if result.video_path else None
audio_path_str = str(result.audio_path) if result.audio_path else None
# Convert transcript to dict
transcript_dict = None
if result.transcript:
transcript_dict = {
'text': result.transcript.text,
'language': result.transcript.language,
'is_auto_generated': result.transcript.is_auto_generated,
'segments': result.transcript.segments,
'source': result.transcript.source
}
# Convert metadata to dict
metadata_dict = None
if result.metadata:
metadata_dict = {
'video_id': result.metadata.video_id,
'title': result.metadata.title,
'description': result.metadata.description,
'duration_seconds': result.metadata.duration_seconds,
'view_count': result.metadata.view_count,
'upload_date': result.metadata.upload_date,
'uploader': result.metadata.uploader,
'thumbnail_url': result.metadata.thumbnail_url,
'tags': result.metadata.tags,
'language': result.metadata.language,
'availability': result.metadata.availability,
'age_restricted': result.metadata.age_restricted
}
return VideoDownloadResponse(
video_id=result.video_id,
video_url=result.video_url,
status=result.status.value,
method=result.method.value,
video_path=video_path_str,
audio_path=audio_path_str,
transcript=transcript_dict,
metadata=metadata_dict,
processing_time_seconds=result.processing_time_seconds,
file_size_bytes=result.file_size_bytes,
is_partial=result.is_partial,
error_message=result.error_message
)
except Exception as e:
logger.error(f"Video processing failed: {e}")
raise HTTPException(
status_code=500,
detail={
"error": "Video processing failed",
"message": str(e),
"type": type(e).__name__
}
)
@router.get("/metadata/{video_id}")
async def get_video_metadata(
video_id: str,
video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
"""Get video metadata without downloading"""
try:
# Construct URL from video ID
url = f"https://youtube.com/watch?v={video_id}"
metadata = await video_service.get_video_metadata_only(url)
if not metadata:
raise HTTPException(status_code=404, detail="Video metadata not found")
return metadata
except HTTPException:
raise
except Exception as e:
logger.error(f"Metadata extraction failed: {e}")
raise HTTPException(
status_code=500,
detail=f"Metadata extraction failed: {e}"
)
@router.get("/transcript/{video_id}")
async def get_video_transcript(
video_id: str,
video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
"""Get video transcript without downloading"""
try:
# Construct URL from video ID
url = f"https://youtube.com/watch?v={video_id}"
transcript = await video_service.get_transcript_only(url)
if not transcript:
raise HTTPException(status_code=404, detail="Video transcript not found")
return transcript
except HTTPException:
raise
except Exception as e:
logger.error(f"Transcript extraction failed: {e}")
raise HTTPException(
status_code=500,
detail=f"Transcript extraction failed: {e}"
)
@router.get("/job/{job_id}")
async def get_download_job_status(
job_id: str,
video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
"""Get status of a download job"""
try:
status = await video_service.get_download_job_status(job_id)
if not status:
raise HTTPException(status_code=404, detail="Job not found")
return status
except HTTPException:
raise
except Exception as e:
logger.error(f"Job status query failed: {e}")
raise HTTPException(status_code=500, detail=f"Job status query failed: {e}")
@router.delete("/job/{job_id}")
async def cancel_download_job(
job_id: str,
video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
"""Cancel a download job"""
try:
success = await video_service.cancel_download(job_id)
if not success:
raise HTTPException(status_code=404, detail="Job not found or already completed")
return {"message": "Job cancelled successfully"}
except HTTPException:
raise
except Exception as e:
logger.error(f"Job cancellation failed: {e}")
raise HTTPException(status_code=500, detail=f"Job cancellation failed: {e}")
@router.get("/health", response_model=HealthStatusResponse)
async def get_health_status(
video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
"""Get health status of all download methods"""
try:
health_status = await video_service.get_health_status()
return HealthStatusResponse(**health_status)
except Exception as e:
logger.error(f"Health check failed: {e}")
raise HTTPException(status_code=500, detail=f"Health check failed: {e}")
@router.get("/metrics", response_model=MetricsResponse)
async def get_download_metrics(
video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
"""Get download performance metrics"""
try:
metrics = await video_service.get_download_metrics()
return MetricsResponse(**metrics)
except Exception as e:
logger.error(f"Metrics query failed: {e}")
raise HTTPException(status_code=500, detail=f"Metrics query failed: {e}")
@router.get("/storage")
async def get_storage_info(
video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
"""Get storage usage information"""
try:
return video_service.get_storage_info()
except Exception as e:
logger.error(f"Storage info query failed: {e}")
raise HTTPException(status_code=500, detail=f"Storage info query failed: {e}")
@router.post("/cleanup")
async def cleanup_old_files(
max_age_days: Optional[int] = None,
background_tasks: BackgroundTasks = BackgroundTasks(),
video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
"""Clean up old downloaded files"""
try:
# Run cleanup in background
background_tasks.add_task(video_service.cleanup_old_files, max_age_days)
return {"message": "Cleanup task started"}
except Exception as e:
logger.error(f"Cleanup task failed: {e}")
raise HTTPException(status_code=500, detail=f"Cleanup task failed: {e}")
@router.get("/methods")
async def get_supported_methods(
video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
"""Get list of supported download methods"""
try:
methods = video_service.get_supported_methods()
return {"methods": methods}
except Exception as e:
logger.error(f"Methods query failed: {e}")
raise HTTPException(status_code=500, detail=f"Methods query failed: {e}")
# Test endpoint for development
@router.post("/test")
async def test_download_system(
video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
"""Test the download system with a known working video"""
test_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
try:
# Test with transcript-only preferences
preferences = DownloadPreferences(
prefer_audio_only=True,
fallback_to_transcript=True,
max_duration_minutes=10 # Short limit for testing
)
result = await video_service.get_video_for_processing(test_url, preferences)
return {
"status": "success",
"result_status": result.status.value,
"method_used": result.method.value,
"has_transcript": result.transcript is not None,
"has_metadata": result.metadata is not None,
"processing_time": result.processing_time_seconds
}
except Exception as e:
logger.error(f"Download system test failed: {e}")
return {
"status": "failed",
"error": str(e),
"error_type": type(e).__name__
}