youtube-summarizer/backend/api/videos.py

"""
Video download API endpoints.
Handles video downloading, storage management, and progress tracking.
"""

from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends, Query
from fastapi.responses import JSONResponse
from typing import Optional, List, Dict, Any
from pathlib import Path
import logging
import asyncio
import uuid

from backend.models.video import (
    VideoDownloadRequest,
    VideoResponse,
    StorageStats,
    CleanupRequest,
    CleanupResponse,
    CachedVideo,
    BatchDownloadRequest,
    BatchDownloadResponse,
    VideoArchiveRequest,
    VideoRestoreRequest,
    DownloadProgress,
    DownloadStatus
)
from backend.services.video_download_service import VideoDownloadService, VideoDownloadError
from backend.services.storage_manager import StorageManager

logger = logging.getLogger(__name__)

# Create router
router = APIRouter(prefix="/api/videos", tags=["videos"])

# Service instances (in production, use dependency injection)
video_service = None
storage_manager = None

# Track background download jobs
download_jobs = {}


def get_video_service() -> VideoDownloadService:
    """Get or create video download service instance."""
    global video_service
    if video_service is None:
        video_service = VideoDownloadService()
    return video_service


def get_storage_manager() -> StorageManager:
    """Get or create storage manager instance."""
    global storage_manager
    if storage_manager is None:
        storage_manager = StorageManager()
    return storage_manager


async def download_video_task(
    job_id: str,
    url: str,
    quality: str,
    extract_audio: bool,
    force: bool
):
    """Background task for video download."""
    try:
        download_jobs[job_id] = {
            'status': DownloadStatus.DOWNLOADING,
            'url': url
        }

        service = get_video_service()
        service.video_quality = quality

        video_path, audio_path = await service.download_video(
            url=url,
            extract_audio=extract_audio,
            force=force
        )

        # Get video info from cache
        info = await service.get_video_info(url)
        video_id = info['id']
        video_hash = service._get_video_hash(video_id)
        cached_info = service.cache.get(video_hash, {})

        download_jobs[job_id] = {
            'status': DownloadStatus.COMPLETED,
            'video_id': video_id,
            'video_path': str(video_path) if video_path else None,
            'audio_path': str(audio_path) if audio_path else None,
            'title': cached_info.get('title', 'Unknown'),
            'size_mb': cached_info.get('size_bytes', 0) / (1024 * 1024)
        }

    except Exception as e:
        logger.error(f"Background download failed for job {job_id}: {e}")
        download_jobs[job_id] = {
            'status': DownloadStatus.FAILED,
            'error': str(e)
        }


@router.post("/download", response_model=VideoResponse)
async def download_video(
    request: VideoDownloadRequest,
    background_tasks: BackgroundTasks,
    video_service: VideoDownloadService = Depends(get_video_service)
):
    """
    Download a YouTube video and optionally extract audio.

    This endpoint downloads the video immediately and returns the result.
    For background downloads, use the /download/background endpoint.
    """
    try:
        # Set quality for this download
        video_service.video_quality = request.quality.value

        # Check if already cached and not forcing
        info = await video_service.get_video_info(str(request.url))
        video_id = info['id']

        cached = video_service.is_video_downloaded(video_id) and not request.force_download

        # Download video
        video_path, audio_path = await video_service.download_video(
            url=str(request.url),
            extract_audio=request.extract_audio,
            force=request.force_download
        )

        # Get updated info from cache
        video_hash = video_service._get_video_hash(video_id)
        cached_info = video_service.cache.get(video_hash, {})

        return VideoResponse(
            video_id=video_id,
            title=cached_info.get('title', info.get('title', 'Unknown')),
            video_path=str(video_path) if video_path else "",
            audio_path=str(audio_path) if audio_path else None,
            download_date=cached_info.get('download_date', ''),
            size_mb=cached_info.get('size_bytes', 0) / (1024 * 1024),
            duration=cached_info.get('duration', info.get('duration', 0)),
            quality=request.quality.value,
            cached=cached
        )

    except VideoDownloadError as e:
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        logger.error(f"Download failed: {e}")
        raise HTTPException(status_code=500, detail=f"Download failed: {str(e)}")


@router.post("/download/background")
async def download_video_background(
    request: VideoDownloadRequest,
    background_tasks: BackgroundTasks,
    video_service: VideoDownloadService = Depends(get_video_service)
):
    """
    Queue a video for background download.

    Returns a job ID that can be used to check download progress.
    """
    try:
        # Generate job ID
        job_id = str(uuid.uuid4())

        # Get video info first to validate URL
        info = await video_service.get_video_info(str(request.url))
        video_id = info['id']

        # Add to background tasks
        background_tasks.add_task(
            download_video_task,
            job_id=job_id,
            url=str(request.url),
            quality=request.quality.value,
            extract_audio=request.extract_audio,
            force=request.force_download
        )

        # Initialize job status
        download_jobs[job_id] = {
            'status': DownloadStatus.PENDING,
            'video_id': video_id,
            'title': info.get('title', 'Unknown')
        }

        return {
            "job_id": job_id,
            "status": "queued",
            "message": f"Video {video_id} queued for download",
            "video_id": video_id,
            "title": info.get('title', 'Unknown')
        }

    except Exception as e:
        logger.error(f"Failed to queue download: {e}")
        raise HTTPException(status_code=400, detail=str(e))


@router.get("/download/status/{job_id}")
async def get_download_status(job_id: str):
    """Get the status of a background download job."""
    if job_id not in download_jobs:
        raise HTTPException(status_code=404, detail="Job not found")

    return download_jobs[job_id]


@router.get("/download/progress/{video_id}")
async def get_download_progress(
    video_id: str,
    video_service: VideoDownloadService = Depends(get_video_service)
):
    """Get real-time download progress for a video."""
    progress = video_service.get_download_progress(video_id)

    if progress is None:
        raise HTTPException(
            status_code=404,
            detail=f"No download progress found for video {video_id}"
        )

    return progress


@router.post("/download/batch", response_model=BatchDownloadResponse)
async def download_batch(
    request: BatchDownloadRequest,
    background_tasks: BackgroundTasks,
    video_service: VideoDownloadService = Depends(get_video_service)
):
    """
    Download multiple videos in the background.

    Each video is downloaded sequentially to avoid overwhelming the system.
    """
    results = []
    successful = 0
    failed = 0
    skipped = 0

    for url in request.urls:
        try:
            # Check if already cached
            info = await video_service.get_video_info(str(url))
            video_id = info['id']

            if video_service.is_video_downloaded(video_id):
                skipped += 1
                results.append({
                    "video_id": video_id,
                    "status": "cached",
                    "title": info.get('title', 'Unknown')
                })
                continue

            # Queue for download
            job_id = str(uuid.uuid4())
            background_tasks.add_task(
                download_video_task,
                job_id=job_id,
                url=str(url),
                quality=request.quality.value,
                extract_audio=request.extract_audio,
                force=False
            )

            successful += 1
            results.append({
                "video_id": video_id,
                "status": "queued",
                "job_id": job_id,
                "title": info.get('title', 'Unknown')
            })

        except Exception as e:
            failed += 1
            results.append({
                "url": str(url),
                "status": "failed",
                "error": str(e)
            })

            if not request.continue_on_error:
                break

    return BatchDownloadResponse(
        total=len(request.urls),
        successful=successful,
        failed=failed,
        skipped=skipped,
        results=results
    )


@router.get("/stats", response_model=StorageStats)
async def get_storage_stats(
    video_service: VideoDownloadService = Depends(get_video_service),
    storage_manager: StorageManager = Depends(get_storage_manager)
):
    """Get storage statistics and usage information."""
    stats = video_service.get_storage_stats()

    # Add category breakdown from storage manager
    category_usage = storage_manager.get_storage_usage()
    stats['by_category'] = {
        k: v / (1024 * 1024)  # Convert to MB
        for k, v in category_usage.items()
    }

    return StorageStats(**stats)


@router.post("/cleanup", response_model=CleanupResponse)
async def cleanup_storage(
    request: CleanupRequest,
    video_service: VideoDownloadService = Depends(get_video_service),
    storage_manager: StorageManager = Depends(get_storage_manager)
):
    """
    Clean up storage to free space.

    Can specify exact bytes to free or use automatic cleanup policies.
    """
    bytes_freed = 0
    files_removed = 0
    old_files_removed = 0
    orphaned_files_removed = 0
    temp_files_removed = 0

    # Clean temporary files
    if request.cleanup_temp:
        temp_freed = storage_manager.cleanup_temp_files()
        bytes_freed += temp_freed
        if temp_freed > 0:
            temp_files_removed += 1

    # Clean orphaned files
    if request.cleanup_orphaned:
        orphaned_freed = storage_manager.cleanup_orphaned_files(video_service.cache)
        bytes_freed += orphaned_freed
        # Rough estimate of files removed
        orphaned_files_removed = int(orphaned_freed / (10 * 1024 * 1024))  # Assume 10MB average

    # Clean old files if specified bytes to free
    if request.bytes_to_free and bytes_freed < request.bytes_to_free:
        remaining = request.bytes_to_free - bytes_freed
        video_freed = video_service.cleanup_old_videos(remaining)
        bytes_freed += video_freed
        # Rough estimate of videos removed
        files_removed = int(video_freed / (100 * 1024 * 1024))  # Assume 100MB average

    # Clean old files by age
    elif request.cleanup_old_files:
        old_files = storage_manager.find_old_files(request.days_threshold)
        for file in old_files[:10]:  # Limit to 10 files at a time
            if file.exists():
                size = file.stat().st_size
                file.unlink()
                bytes_freed += size
                old_files_removed += 1

    total_files = files_removed + old_files_removed + orphaned_files_removed + temp_files_removed

    return CleanupResponse(
        bytes_freed=bytes_freed,
        mb_freed=bytes_freed / (1024 * 1024),
        gb_freed=bytes_freed / (1024 * 1024 * 1024),
        files_removed=total_files,
        old_files_removed=old_files_removed,
        orphaned_files_removed=orphaned_files_removed,
        temp_files_removed=temp_files_removed
    )


@router.get("/cached", response_model=List[CachedVideo])
async def get_cached_videos(
    video_service: VideoDownloadService = Depends(get_video_service),
    limit: int = Query(default=100, description="Maximum number of videos to return"),
    offset: int = Query(default=0, description="Number of videos to skip")
):
    """Get list of all cached videos with their information."""
    all_videos = video_service.get_cached_videos()

    # Apply pagination
    paginated = all_videos[offset:offset + limit]

    return [CachedVideo(**video) for video in paginated]


@router.delete("/cached/{video_id}")
async def delete_cached_video(
    video_id: str,
    video_service: VideoDownloadService = Depends(get_video_service)
):
    """Delete a specific cached video and its associated files."""
    video_hash = video_service._get_video_hash(video_id)

    if video_hash not in video_service.cache:
        raise HTTPException(status_code=404, detail="Video not found in cache")

    # Clean up the video
    video_service._cleanup_failed_download(video_id)

    return {"message": f"Video {video_id} deleted successfully"}


@router.post("/archive")
async def archive_video(
    request: VideoArchiveRequest,
    storage_manager: StorageManager = Depends(get_storage_manager)
):
    """Archive a video and its associated files."""
    success = storage_manager.archive_video(request.video_id, request.archive_dir)

    if not success:
        raise HTTPException(status_code=500, detail="Failed to archive video")

    return {
        "message": f"Video {request.video_id} archived successfully",
        "archive_dir": request.archive_dir
    }


@router.post("/restore")
async def restore_video(
    request: VideoRestoreRequest,
    storage_manager: StorageManager = Depends(get_storage_manager)
):
    """Restore a video from archive."""
    success = storage_manager.restore_from_archive(request.video_id, request.archive_dir)

    if not success:
        raise HTTPException(
            status_code=404,
            detail=f"Video {request.video_id} not found in archive"
        )

    return {
        "message": f"Video {request.video_id} restored successfully",
        "archive_dir": request.archive_dir
    }


@router.get("/disk-usage")
async def get_disk_usage(
    storage_manager: StorageManager = Depends(get_storage_manager)
):
    """Get disk usage statistics for the storage directory."""
    return storage_manager.get_disk_usage()