youtube-summarizer/backend/api/export.py

"""
Export API endpoints for YouTube Summarizer
Handles single and bulk export requests for summaries
"""

import os
from datetime import datetime
from typing import List, Optional, Dict, Any
from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends, Query
from fastapi.responses import FileResponse
from pydantic import BaseModel, Field
from enum import Enum

from ..services.export_service import (
    ExportService,
    ExportFormat,
    ExportRequest,
    BulkExportRequest,
    ExportStatus
)
from ..models.video import VideoSummary
from ..services.storage_manager import StorageManager
from ..services.enhanced_cache_manager import EnhancedCacheManager
from ..core.exceptions import YouTubeError


# Create router
router = APIRouter(prefix="/api/export", tags=["export"])


class SingleExportRequestModel(BaseModel):
    """Request model for single summary export"""
    summary_id: str = Field(..., description="ID of summary to export")
    format: ExportFormat = Field(..., description="Export format")
    template: Optional[str] = Field(None, description="Custom template name")
    include_metadata: bool = Field(True, description="Include processing metadata")
    custom_branding: Optional[Dict[str, Any]] = Field(None, description="Custom branding options")


class BulkExportRequestModel(BaseModel):
    """Request model for bulk export"""
    summary_ids: List[str] = Field(..., description="List of summary IDs to export")
    formats: List[ExportFormat] = Field(..., description="Export formats")
    template: Optional[str] = Field(None, description="Custom template name")
    organize_by: str = Field("format", description="Organization method: format, date, video")
    include_metadata: bool = Field(True, description="Include processing metadata")
    custom_branding: Optional[Dict[str, Any]] = Field(None, description="Custom branding options")


class ExportResponseModel(BaseModel):
    """Response model for export operations"""
    export_id: str
    status: str
    format: Optional[str] = None
    download_url: Optional[str] = None
    file_size_bytes: Optional[int] = None
    error: Optional[str] = None
    created_at: Optional[str] = None
    completed_at: Optional[str] = None
    estimated_time_remaining: Optional[int] = None


class ExportListResponseModel(BaseModel):
    """Response model for listing exports"""
    exports: List[ExportResponseModel]
    total: int
    page: int
    page_size: int


# Initialize services
export_service = ExportService()
storage_manager = StorageManager()
cache_manager = EnhancedCacheManager()


async def get_summary_data(summary_id: str) -> Optional[Dict[str, Any]]:
    """
    Retrieve summary data by ID
    First checks cache, then storage
    """
    # Try to get from cache first
    cached_data = await cache_manager.get_from_cache(
        cache_type="summary",
        key=summary_id
    )

    if cached_data:
        return cached_data

    # Get from storage
    try:
        # This would integrate with your actual storage system
        # For now, returning mock data for testing
        return {
            "video_id": summary_id,
            "video_url": f"https://youtube.com/watch?v={summary_id}",
            "video_metadata": {
                "title": "Sample Video Title",
                "channel_name": "Sample Channel",
                "duration": 600,
                "published_at": "2025-01-25",
                "view_count": 10000,
                "like_count": 500,
                "thumbnail_url": "https://example.com/thumbnail.jpg"
            },
            "summary": "This is a sample summary of the video content. It provides key insights and main points discussed in the video.",
            "key_points": [
                "First key point from the video",
                "Second important insight",
                "Third main takeaway"
            ],
            "main_themes": [
                "Technology",
                "Innovation",
                "Future Trends"
            ],
            "actionable_insights": [
                "Implement the discussed strategy in your workflow",
                "Consider the new approach for better results",
                "Apply the learned concepts to real-world scenarios"
            ],
            "confidence_score": 0.92,
            "processing_metadata": {
                "model": "gpt-4",
                "processing_time_seconds": 15,
                "tokens_used": 2500,
                "timestamp": datetime.utcnow().isoformat()
            },
            "created_at": datetime.utcnow().isoformat()
        }
    except Exception as e:
        return None


async def process_bulk_export_async(
    summaries_data: List[Dict[str, Any]],
    request: BulkExportRequest,
    export_service: ExportService
):
    """Process bulk export in background"""

    try:
        result = await export_service.bulk_export_summaries(summaries_data, request)
        # Could send notification when complete
        # await notification_service.send_export_complete(result)
    except Exception as e:
        print(f"Bulk export error: {e}")
        # Could send error notification
        # await notification_service.send_export_error(str(e))


@router.post("/single", response_model=ExportResponseModel)
async def export_single_summary(
    request: SingleExportRequestModel,
    background_tasks: BackgroundTasks
):
    """
    Export a single summary to the specified format

    Supports formats: markdown, pdf, text, json, html
    Returns export ID for tracking and download
    """

    try:
        # Get summary data
        summary_data = await get_summary_data(request.summary_id)

        if not summary_data:
            raise HTTPException(status_code=404, detail="Summary not found")

        # Create export request
        export_request = ExportRequest(
            summary_id=request.summary_id,
            format=request.format,
            template=request.template,
            include_metadata=request.include_metadata,
            custom_branding=request.custom_branding
        )

        # Process export
        result = await export_service.export_summary(summary_data, export_request)

        # Return response
        return ExportResponseModel(
            export_id=result.export_id,
            status=result.status.value,
            format=result.format.value if result.format else None,
            download_url=result.download_url,
            file_size_bytes=result.file_size_bytes,
            error=result.error,
            created_at=result.created_at.isoformat() if result.created_at else None,
            completed_at=result.completed_at.isoformat() if result.completed_at else None
        )

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Export failed: {str(e)}")


@router.post("/bulk", response_model=ExportResponseModel)
async def export_bulk_summaries(
    request: BulkExportRequestModel,
    background_tasks: BackgroundTasks
):
    """
    Export multiple summaries in bulk

    Creates a ZIP archive with organized folder structure
    Processes in background for large exports
    """

    try:
        # Validate request
        if len(request.summary_ids) > 100:
            raise HTTPException(
                status_code=400,
                detail="Maximum 100 summaries per bulk export"
            )

        # Get all summary data
        summaries_data = []
        for summary_id in request.summary_ids:
            summary_data = await get_summary_data(summary_id)
            if summary_data:
                summaries_data.append(summary_data)

        if not summaries_data:
            raise HTTPException(status_code=404, detail="No valid summaries found")

        # Create bulk export request
        bulk_request = BulkExportRequest(
            summary_ids=request.summary_ids,
            formats=request.formats,
            template=request.template,
            organize_by=request.organize_by,
            include_metadata=request.include_metadata,
            custom_branding=request.custom_branding
        )

        # Process in background for large exports
        if len(summaries_data) > 10:
            # Large export - process async
            import uuid
            export_id = str(uuid.uuid4())

            background_tasks.add_task(
                process_bulk_export_async,
                summaries_data=summaries_data,
                request=bulk_request,
                export_service=export_service
            )

            return ExportResponseModel(
                export_id=export_id,
                status="processing",
                created_at=datetime.utcnow().isoformat(),
                estimated_time_remaining=len(summaries_data) * 2  # Rough estimate
            )
        else:
            # Small export - process immediately
            result = await export_service.bulk_export_summaries(
                summaries_data,
                bulk_request
            )

            return ExportResponseModel(
                export_id=result.export_id,
                status=result.status.value,
                download_url=result.download_url,
                file_size_bytes=result.file_size_bytes,
                error=result.error,
                created_at=result.created_at.isoformat() if result.created_at else None,
                completed_at=result.completed_at.isoformat() if result.completed_at else None
            )

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Bulk export failed: {str(e)}")


@router.get("/status/{export_id}", response_model=ExportResponseModel)
async def get_export_status(export_id: str):
    """
    Get export status and download information

    Check the status of an ongoing or completed export
    """

    result = export_service.get_export_status(export_id)

    if not result:
        raise HTTPException(status_code=404, detail="Export not found")

    return ExportResponseModel(
        export_id=result.export_id,
        status=result.status.value,
        format=result.format.value if result.format else None,
        download_url=result.download_url,
        file_size_bytes=result.file_size_bytes,
        error=result.error,
        created_at=result.created_at.isoformat() if result.created_at else None,
        completed_at=result.completed_at.isoformat() if result.completed_at else None
    )


@router.get("/download/{export_id}")
async def download_export(export_id: str):
    """
    Download exported file

    Returns the exported file for download
    Files are automatically cleaned up after 24 hours
    """

    result = export_service.get_export_status(export_id)

    if not result or not result.file_path:
        raise HTTPException(status_code=404, detail="Export file not found")

    if not os.path.exists(result.file_path):
        raise HTTPException(status_code=404, detail="Export file no longer available")

    # Determine filename and media type
    if result.format:
        ext = result.format.value
        if ext == "text":
            ext = "txt"
        filename = f"youtube_summary_export_{export_id}.{ext}"
    else:
        filename = f"bulk_export_{export_id}.zip"

    media_type = {
        ExportFormat.MARKDOWN: "text/markdown",
        ExportFormat.PDF: "application/pdf",
        ExportFormat.PLAIN_TEXT: "text/plain",
        ExportFormat.JSON: "application/json",
        ExportFormat.HTML: "text/html"
    }.get(result.format, "application/zip")

    return FileResponse(
        path=result.file_path,
        filename=filename,
        media_type=media_type,
        headers={
            "Content-Disposition": f"attachment; filename={filename}"
        }
    )


@router.get("/list", response_model=ExportListResponseModel)
async def list_exports(
    page: int = Query(1, ge=1, description="Page number"),
    page_size: int = Query(10, ge=1, le=100, description="Items per page"),
    status: Optional[str] = Query(None, description="Filter by status")
):
    """
    List all exports with pagination

    Returns a paginated list of export jobs
    """

    all_exports = list(export_service.active_exports.values())

    # Filter by status if provided
    if status:
        try:
            status_enum = ExportStatus(status)
            all_exports = [e for e in all_exports if e.status == status_enum]
        except ValueError:
            raise HTTPException(status_code=400, detail=f"Invalid status: {status}")

    # Sort by creation date (newest first)
    all_exports.sort(key=lambda x: x.created_at or datetime.min, reverse=True)

    # Pagination
    total = len(all_exports)
    start = (page - 1) * page_size
    end = start + page_size
    exports_page = all_exports[start:end]

    # Convert to response models
    export_responses = []
    for export in exports_page:
        export_responses.append(ExportResponseModel(
            export_id=export.export_id,
            status=export.status.value,
            format=export.format.value if export.format else None,
            download_url=export.download_url,
            file_size_bytes=export.file_size_bytes,
            error=export.error,
            created_at=export.created_at.isoformat() if export.created_at else None,
            completed_at=export.completed_at.isoformat() if export.completed_at else None
        ))

    return ExportListResponseModel(
        exports=export_responses,
        total=total,
        page=page,
        page_size=page_size
    )


@router.delete("/cleanup")
async def cleanup_old_exports(
    max_age_hours: int = Query(24, ge=1, le=168, description="Max age in hours")
):
    """
    Clean up old export files

    Removes export files older than specified hours (default: 24)
    """

    try:
        await export_service.cleanup_old_exports(max_age_hours)
        return {"message": f"Cleaned up exports older than {max_age_hours} hours"}
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Cleanup failed: {str(e)}")


@router.get("/formats")
async def get_available_formats():
    """
    Get list of available export formats

    Returns all supported export formats with descriptions
    """

    formats = []
    for format_enum in ExportFormat:
        available = format_enum in export_service.exporters

        description = {
            ExportFormat.MARKDOWN: "Clean, formatted Markdown for documentation",
            ExportFormat.PDF: "Professional PDF with formatting and branding",
            ExportFormat.PLAIN_TEXT: "Simple plain text format",
            ExportFormat.JSON: "Structured JSON with full metadata",
            ExportFormat.HTML: "Responsive HTML with embedded styles"
        }.get(format_enum, "")

        formats.append({
            "format": format_enum.value,
            "name": format_enum.name.replace("_", " ").title(),
            "description": description,
            "available": available,
            "requires_install": format_enum == ExportFormat.PDF and not available
        })

    return {"formats": formats}