youtube-summarizer/backend/api/video_download.py

"""
API endpoints for video download functionality
"""
from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
from pydantic import BaseModel, HttpUrl, Field
from typing import Optional, Dict, Any
import logging

from backend.services.enhanced_video_service import EnhancedVideoService, get_enhanced_video_service
from backend.models.video_download import DownloadPreferences, VideoQuality, DownloadStatus

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/api/video", tags=["video-download"])


class VideoProcessRequest(BaseModel):
    """Request model for video processing"""
    url: HttpUrl
    preferences: Optional[DownloadPreferences] = None


class VideoDownloadResponse(BaseModel):
    """Response model for video download"""
    video_id: str
    video_url: str
    status: str
    method: str
    video_path: Optional[str] = None
    audio_path: Optional[str] = None
    transcript: Optional[Dict[str, Any]] = None
    metadata: Optional[Dict[str, Any]] = None
    processing_time_seconds: Optional[float] = None
    file_size_bytes: Optional[int] = None
    is_partial: bool = False
    error_message: Optional[str] = None


class HealthStatusResponse(BaseModel):
    """Response model for health status"""
    overall_status: str
    healthy_methods: int
    total_methods: int
    method_details: Dict[str, Dict[str, Any]]
    recommendations: list[str]
    last_check: str


class MetricsResponse(BaseModel):
    """Response model for download metrics"""
    total_attempts: int
    successful_downloads: int
    failed_downloads: int
    partial_downloads: int
    success_rate: float
    method_success_rates: Dict[str, float]
    method_attempt_counts: Dict[str, int]
    common_errors: Dict[str, int]
    last_updated: str


@router.post("/process", response_model=VideoDownloadResponse)
async def process_video(
    request: VideoProcessRequest,
    video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
    """
    Process a YouTube video - download and extract content

    This is the main endpoint for the YouTube Summarizer pipeline
    """
    try:
        result = await video_service.get_video_for_processing(
            str(request.url),
            request.preferences
        )

        # Convert paths to strings for JSON serialization
        video_path_str = str(result.video_path) if result.video_path else None
        audio_path_str = str(result.audio_path) if result.audio_path else None

        # Convert transcript to dict
        transcript_dict = None
        if result.transcript:
            transcript_dict = {
                'text': result.transcript.text,
                'language': result.transcript.language,
                'is_auto_generated': result.transcript.is_auto_generated,
                'segments': result.transcript.segments,
                'source': result.transcript.source
            }

        # Convert metadata to dict
        metadata_dict = None
        if result.metadata:
            metadata_dict = {
                'video_id': result.metadata.video_id,
                'title': result.metadata.title,
                'description': result.metadata.description,
                'duration_seconds': result.metadata.duration_seconds,
                'view_count': result.metadata.view_count,
                'upload_date': result.metadata.upload_date,
                'uploader': result.metadata.uploader,
                'thumbnail_url': result.metadata.thumbnail_url,
                'tags': result.metadata.tags,
                'language': result.metadata.language,
                'availability': result.metadata.availability,
                'age_restricted': result.metadata.age_restricted
            }

        return VideoDownloadResponse(
            video_id=result.video_id,
            video_url=result.video_url,
            status=result.status.value,
            method=result.method.value,
            video_path=video_path_str,
            audio_path=audio_path_str,
            transcript=transcript_dict,
            metadata=metadata_dict,
            processing_time_seconds=result.processing_time_seconds,
            file_size_bytes=result.file_size_bytes,
            is_partial=result.is_partial,
            error_message=result.error_message
        )

    except Exception as e:
        logger.error(f"Video processing failed: {e}")
        raise HTTPException(
            status_code=500,
            detail={
                "error": "Video processing failed",
                "message": str(e),
                "type": type(e).__name__
            }
        )


@router.get("/metadata/{video_id}")
async def get_video_metadata(
    video_id: str,
    video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
    """Get video metadata without downloading"""
    try:
        # Construct URL from video ID
        url = f"https://youtube.com/watch?v={video_id}"
        metadata = await video_service.get_video_metadata_only(url)

        if not metadata:
            raise HTTPException(status_code=404, detail="Video metadata not found")

        return metadata

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Metadata extraction failed: {e}")
        raise HTTPException(
            status_code=500,
            detail=f"Metadata extraction failed: {e}"
        )


@router.get("/transcript/{video_id}")
async def get_video_transcript(
    video_id: str,
    video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
    """Get video transcript without downloading"""
    try:
        # Construct URL from video ID
        url = f"https://youtube.com/watch?v={video_id}"
        transcript = await video_service.get_transcript_only(url)

        if not transcript:
            raise HTTPException(status_code=404, detail="Video transcript not found")

        return transcript

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Transcript extraction failed: {e}")
        raise HTTPException(
            status_code=500,
            detail=f"Transcript extraction failed: {e}"
        )


@router.get("/job/{job_id}")
async def get_download_job_status(
    job_id: str,
    video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
    """Get status of a download job"""
    try:
        status = await video_service.get_download_job_status(job_id)

        if not status:
            raise HTTPException(status_code=404, detail="Job not found")

        return status

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Job status query failed: {e}")
        raise HTTPException(status_code=500, detail=f"Job status query failed: {e}")


@router.delete("/job/{job_id}")
async def cancel_download_job(
    job_id: str,
    video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
    """Cancel a download job"""
    try:
        success = await video_service.cancel_download(job_id)

        if not success:
            raise HTTPException(status_code=404, detail="Job not found or already completed")

        return {"message": "Job cancelled successfully"}

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Job cancellation failed: {e}")
        raise HTTPException(status_code=500, detail=f"Job cancellation failed: {e}")


@router.get("/health", response_model=HealthStatusResponse)
async def get_health_status(
    video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
    """Get health status of all download methods"""
    try:
        health_status = await video_service.get_health_status()
        return HealthStatusResponse(**health_status)

    except Exception as e:
        logger.error(f"Health check failed: {e}")
        raise HTTPException(status_code=500, detail=f"Health check failed: {e}")


@router.get("/metrics", response_model=MetricsResponse)
async def get_download_metrics(
    video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
    """Get download performance metrics"""
    try:
        metrics = await video_service.get_download_metrics()
        return MetricsResponse(**metrics)

    except Exception as e:
        logger.error(f"Metrics query failed: {e}")
        raise HTTPException(status_code=500, detail=f"Metrics query failed: {e}")


@router.get("/storage")
async def get_storage_info(
    video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
    """Get storage usage information"""
    try:
        return video_service.get_storage_info()

    except Exception as e:
        logger.error(f"Storage info query failed: {e}")
        raise HTTPException(status_code=500, detail=f"Storage info query failed: {e}")


@router.post("/cleanup")
async def cleanup_old_files(
    max_age_days: Optional[int] = None,
    background_tasks: BackgroundTasks = BackgroundTasks(),
    video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
    """Clean up old downloaded files"""
    try:
        # Run cleanup in background
        background_tasks.add_task(video_service.cleanup_old_files, max_age_days)

        return {"message": "Cleanup task started"}

    except Exception as e:
        logger.error(f"Cleanup task failed: {e}")
        raise HTTPException(status_code=500, detail=f"Cleanup task failed: {e}")


@router.get("/methods")
async def get_supported_methods(
    video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
    """Get list of supported download methods"""
    try:
        methods = video_service.get_supported_methods()
        return {"methods": methods}

    except Exception as e:
        logger.error(f"Methods query failed: {e}")
        raise HTTPException(status_code=500, detail=f"Methods query failed: {e}")


# Test endpoint for development
@router.post("/test")
async def test_download_system(
    video_service: EnhancedVideoService = Depends(get_enhanced_video_service)
):
    """Test the download system with a known working video"""
    test_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"

    try:
        # Test with transcript-only preferences
        preferences = DownloadPreferences(
            prefer_audio_only=True,
            fallback_to_transcript=True,
            max_duration_minutes=10  # Short limit for testing
        )

        result = await video_service.get_video_for_processing(test_url, preferences)

        return {
            "status": "success",
            "result_status": result.status.value,
            "method_used": result.method.value,
            "has_transcript": result.transcript is not None,
            "has_metadata": result.metadata is not None,
            "processing_time": result.processing_time_seconds
        }

    except Exception as e:
        logger.error(f"Download system test failed: {e}")
        return {
            "status": "failed",
            "error": str(e),
            "error_type": type(e).__name__
        }