youtube-summarizer/backend/api/videos.py

457 lines
14 KiB
Python

"""
Video download API endpoints.
Handles video downloading, storage management, and progress tracking.
"""
from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends, Query
from fastapi.responses import JSONResponse
from typing import Optional, List, Dict, Any
from pathlib import Path
import logging
import asyncio
import uuid
from backend.models.video import (
VideoDownloadRequest,
VideoResponse,
StorageStats,
CleanupRequest,
CleanupResponse,
CachedVideo,
BatchDownloadRequest,
BatchDownloadResponse,
VideoArchiveRequest,
VideoRestoreRequest,
DownloadProgress,
DownloadStatus
)
from backend.services.video_download_service import VideoDownloadService, VideoDownloadError
from backend.services.storage_manager import StorageManager
logger = logging.getLogger(__name__)
# Create router
router = APIRouter(prefix="/api/videos", tags=["videos"])
# Service instances (in production, use dependency injection)
video_service = None
storage_manager = None
# Track background download jobs
download_jobs = {}
def get_video_service() -> VideoDownloadService:
"""Get or create video download service instance."""
global video_service
if video_service is None:
video_service = VideoDownloadService()
return video_service
def get_storage_manager() -> StorageManager:
"""Get or create storage manager instance."""
global storage_manager
if storage_manager is None:
storage_manager = StorageManager()
return storage_manager
async def download_video_task(
job_id: str,
url: str,
quality: str,
extract_audio: bool,
force: bool
):
"""Background task for video download."""
try:
download_jobs[job_id] = {
'status': DownloadStatus.DOWNLOADING,
'url': url
}
service = get_video_service()
service.video_quality = quality
video_path, audio_path = await service.download_video(
url=url,
extract_audio=extract_audio,
force=force
)
# Get video info from cache
info = await service.get_video_info(url)
video_id = info['id']
video_hash = service._get_video_hash(video_id)
cached_info = service.cache.get(video_hash, {})
download_jobs[job_id] = {
'status': DownloadStatus.COMPLETED,
'video_id': video_id,
'video_path': str(video_path) if video_path else None,
'audio_path': str(audio_path) if audio_path else None,
'title': cached_info.get('title', 'Unknown'),
'size_mb': cached_info.get('size_bytes', 0) / (1024 * 1024)
}
except Exception as e:
logger.error(f"Background download failed for job {job_id}: {e}")
download_jobs[job_id] = {
'status': DownloadStatus.FAILED,
'error': str(e)
}
@router.post("/download", response_model=VideoResponse)
async def download_video(
request: VideoDownloadRequest,
background_tasks: BackgroundTasks,
video_service: VideoDownloadService = Depends(get_video_service)
):
"""
Download a YouTube video and optionally extract audio.
This endpoint downloads the video immediately and returns the result.
For background downloads, use the /download/background endpoint.
"""
try:
# Set quality for this download
video_service.video_quality = request.quality.value
# Check if already cached and not forcing
info = await video_service.get_video_info(str(request.url))
video_id = info['id']
cached = video_service.is_video_downloaded(video_id) and not request.force_download
# Download video
video_path, audio_path = await video_service.download_video(
url=str(request.url),
extract_audio=request.extract_audio,
force=request.force_download
)
# Get updated info from cache
video_hash = video_service._get_video_hash(video_id)
cached_info = video_service.cache.get(video_hash, {})
return VideoResponse(
video_id=video_id,
title=cached_info.get('title', info.get('title', 'Unknown')),
video_path=str(video_path) if video_path else "",
audio_path=str(audio_path) if audio_path else None,
download_date=cached_info.get('download_date', ''),
size_mb=cached_info.get('size_bytes', 0) / (1024 * 1024),
duration=cached_info.get('duration', info.get('duration', 0)),
quality=request.quality.value,
cached=cached
)
except VideoDownloadError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Download failed: {e}")
raise HTTPException(status_code=500, detail=f"Download failed: {str(e)}")
@router.post("/download/background")
async def download_video_background(
request: VideoDownloadRequest,
background_tasks: BackgroundTasks,
video_service: VideoDownloadService = Depends(get_video_service)
):
"""
Queue a video for background download.
Returns a job ID that can be used to check download progress.
"""
try:
# Generate job ID
job_id = str(uuid.uuid4())
# Get video info first to validate URL
info = await video_service.get_video_info(str(request.url))
video_id = info['id']
# Add to background tasks
background_tasks.add_task(
download_video_task,
job_id=job_id,
url=str(request.url),
quality=request.quality.value,
extract_audio=request.extract_audio,
force=request.force_download
)
# Initialize job status
download_jobs[job_id] = {
'status': DownloadStatus.PENDING,
'video_id': video_id,
'title': info.get('title', 'Unknown')
}
return {
"job_id": job_id,
"status": "queued",
"message": f"Video {video_id} queued for download",
"video_id": video_id,
"title": info.get('title', 'Unknown')
}
except Exception as e:
logger.error(f"Failed to queue download: {e}")
raise HTTPException(status_code=400, detail=str(e))
@router.get("/download/status/{job_id}")
async def get_download_status(job_id: str):
"""Get the status of a background download job."""
if job_id not in download_jobs:
raise HTTPException(status_code=404, detail="Job not found")
return download_jobs[job_id]
@router.get("/download/progress/{video_id}")
async def get_download_progress(
video_id: str,
video_service: VideoDownloadService = Depends(get_video_service)
):
"""Get real-time download progress for a video."""
progress = video_service.get_download_progress(video_id)
if progress is None:
raise HTTPException(
status_code=404,
detail=f"No download progress found for video {video_id}"
)
return progress
@router.post("/download/batch", response_model=BatchDownloadResponse)
async def download_batch(
request: BatchDownloadRequest,
background_tasks: BackgroundTasks,
video_service: VideoDownloadService = Depends(get_video_service)
):
"""
Download multiple videos in the background.
Each video is downloaded sequentially to avoid overwhelming the system.
"""
results = []
successful = 0
failed = 0
skipped = 0
for url in request.urls:
try:
# Check if already cached
info = await video_service.get_video_info(str(url))
video_id = info['id']
if video_service.is_video_downloaded(video_id):
skipped += 1
results.append({
"video_id": video_id,
"status": "cached",
"title": info.get('title', 'Unknown')
})
continue
# Queue for download
job_id = str(uuid.uuid4())
background_tasks.add_task(
download_video_task,
job_id=job_id,
url=str(url),
quality=request.quality.value,
extract_audio=request.extract_audio,
force=False
)
successful += 1
results.append({
"video_id": video_id,
"status": "queued",
"job_id": job_id,
"title": info.get('title', 'Unknown')
})
except Exception as e:
failed += 1
results.append({
"url": str(url),
"status": "failed",
"error": str(e)
})
if not request.continue_on_error:
break
return BatchDownloadResponse(
total=len(request.urls),
successful=successful,
failed=failed,
skipped=skipped,
results=results
)
@router.get("/stats", response_model=StorageStats)
async def get_storage_stats(
video_service: VideoDownloadService = Depends(get_video_service),
storage_manager: StorageManager = Depends(get_storage_manager)
):
"""Get storage statistics and usage information."""
stats = video_service.get_storage_stats()
# Add category breakdown from storage manager
category_usage = storage_manager.get_storage_usage()
stats['by_category'] = {
k: v / (1024 * 1024) # Convert to MB
for k, v in category_usage.items()
}
return StorageStats(**stats)
@router.post("/cleanup", response_model=CleanupResponse)
async def cleanup_storage(
request: CleanupRequest,
video_service: VideoDownloadService = Depends(get_video_service),
storage_manager: StorageManager = Depends(get_storage_manager)
):
"""
Clean up storage to free space.
Can specify exact bytes to free or use automatic cleanup policies.
"""
bytes_freed = 0
files_removed = 0
old_files_removed = 0
orphaned_files_removed = 0
temp_files_removed = 0
# Clean temporary files
if request.cleanup_temp:
temp_freed = storage_manager.cleanup_temp_files()
bytes_freed += temp_freed
if temp_freed > 0:
temp_files_removed += 1
# Clean orphaned files
if request.cleanup_orphaned:
orphaned_freed = storage_manager.cleanup_orphaned_files(video_service.cache)
bytes_freed += orphaned_freed
# Rough estimate of files removed
orphaned_files_removed = int(orphaned_freed / (10 * 1024 * 1024)) # Assume 10MB average
# Clean old files if specified bytes to free
if request.bytes_to_free and bytes_freed < request.bytes_to_free:
remaining = request.bytes_to_free - bytes_freed
video_freed = video_service.cleanup_old_videos(remaining)
bytes_freed += video_freed
# Rough estimate of videos removed
files_removed = int(video_freed / (100 * 1024 * 1024)) # Assume 100MB average
# Clean old files by age
elif request.cleanup_old_files:
old_files = storage_manager.find_old_files(request.days_threshold)
for file in old_files[:10]: # Limit to 10 files at a time
if file.exists():
size = file.stat().st_size
file.unlink()
bytes_freed += size
old_files_removed += 1
total_files = files_removed + old_files_removed + orphaned_files_removed + temp_files_removed
return CleanupResponse(
bytes_freed=bytes_freed,
mb_freed=bytes_freed / (1024 * 1024),
gb_freed=bytes_freed / (1024 * 1024 * 1024),
files_removed=total_files,
old_files_removed=old_files_removed,
orphaned_files_removed=orphaned_files_removed,
temp_files_removed=temp_files_removed
)
@router.get("/cached", response_model=List[CachedVideo])
async def get_cached_videos(
video_service: VideoDownloadService = Depends(get_video_service),
limit: int = Query(default=100, description="Maximum number of videos to return"),
offset: int = Query(default=0, description="Number of videos to skip")
):
"""Get list of all cached videos with their information."""
all_videos = video_service.get_cached_videos()
# Apply pagination
paginated = all_videos[offset:offset + limit]
return [CachedVideo(**video) for video in paginated]
@router.delete("/cached/{video_id}")
async def delete_cached_video(
video_id: str,
video_service: VideoDownloadService = Depends(get_video_service)
):
"""Delete a specific cached video and its associated files."""
video_hash = video_service._get_video_hash(video_id)
if video_hash not in video_service.cache:
raise HTTPException(status_code=404, detail="Video not found in cache")
# Clean up the video
video_service._cleanup_failed_download(video_id)
return {"message": f"Video {video_id} deleted successfully"}
@router.post("/archive")
async def archive_video(
request: VideoArchiveRequest,
storage_manager: StorageManager = Depends(get_storage_manager)
):
"""Archive a video and its associated files."""
success = storage_manager.archive_video(request.video_id, request.archive_dir)
if not success:
raise HTTPException(status_code=500, detail="Failed to archive video")
return {
"message": f"Video {request.video_id} archived successfully",
"archive_dir": request.archive_dir
}
@router.post("/restore")
async def restore_video(
request: VideoRestoreRequest,
storage_manager: StorageManager = Depends(get_storage_manager)
):
"""Restore a video from archive."""
success = storage_manager.restore_from_archive(request.video_id, request.archive_dir)
if not success:
raise HTTPException(
status_code=404,
detail=f"Video {request.video_id} not found in archive"
)
return {
"message": f"Video {request.video_id} restored successfully",
"archive_dir": request.archive_dir
}
@router.get("/disk-usage")
async def get_disk_usage(
storage_manager: StorageManager = Depends(get_storage_manager)
):
"""Get disk usage statistics for the storage directory."""
return storage_manager.get_disk_usage()