youtube-summarizer/backend/api/export.py

451 lines
15 KiB
Python

"""
Export API endpoints for YouTube Summarizer
Handles single and bulk export requests for summaries
"""
import os
from datetime import datetime
from typing import List, Optional, Dict, Any
from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends, Query
from fastapi.responses import FileResponse
from pydantic import BaseModel, Field
from enum import Enum
from ..services.export_service import (
ExportService,
ExportFormat,
ExportRequest,
BulkExportRequest,
ExportStatus
)
from ..models.video import VideoSummary
from ..services.storage_manager import StorageManager
from ..services.enhanced_cache_manager import EnhancedCacheManager
from ..core.exceptions import YouTubeError
# Create router
router = APIRouter(prefix="/api/export", tags=["export"])
class SingleExportRequestModel(BaseModel):
"""Request model for single summary export"""
summary_id: str = Field(..., description="ID of summary to export")
format: ExportFormat = Field(..., description="Export format")
template: Optional[str] = Field(None, description="Custom template name")
include_metadata: bool = Field(True, description="Include processing metadata")
custom_branding: Optional[Dict[str, Any]] = Field(None, description="Custom branding options")
class BulkExportRequestModel(BaseModel):
"""Request model for bulk export"""
summary_ids: List[str] = Field(..., description="List of summary IDs to export")
formats: List[ExportFormat] = Field(..., description="Export formats")
template: Optional[str] = Field(None, description="Custom template name")
organize_by: str = Field("format", description="Organization method: format, date, video")
include_metadata: bool = Field(True, description="Include processing metadata")
custom_branding: Optional[Dict[str, Any]] = Field(None, description="Custom branding options")
class ExportResponseModel(BaseModel):
"""Response model for export operations"""
export_id: str
status: str
format: Optional[str] = None
download_url: Optional[str] = None
file_size_bytes: Optional[int] = None
error: Optional[str] = None
created_at: Optional[str] = None
completed_at: Optional[str] = None
estimated_time_remaining: Optional[int] = None
class ExportListResponseModel(BaseModel):
"""Response model for listing exports"""
exports: List[ExportResponseModel]
total: int
page: int
page_size: int
# Initialize services
export_service = ExportService()
storage_manager = StorageManager()
cache_manager = EnhancedCacheManager()
async def get_summary_data(summary_id: str) -> Optional[Dict[str, Any]]:
"""
Retrieve summary data by ID
First checks cache, then storage
"""
# Try to get from cache first
cached_data = await cache_manager.get_from_cache(
cache_type="summary",
key=summary_id
)
if cached_data:
return cached_data
# Get from storage
try:
# This would integrate with your actual storage system
# For now, returning mock data for testing
return {
"video_id": summary_id,
"video_url": f"https://youtube.com/watch?v={summary_id}",
"video_metadata": {
"title": "Sample Video Title",
"channel_name": "Sample Channel",
"duration": 600,
"published_at": "2025-01-25",
"view_count": 10000,
"like_count": 500,
"thumbnail_url": "https://example.com/thumbnail.jpg"
},
"summary": "This is a sample summary of the video content. It provides key insights and main points discussed in the video.",
"key_points": [
"First key point from the video",
"Second important insight",
"Third main takeaway"
],
"main_themes": [
"Technology",
"Innovation",
"Future Trends"
],
"actionable_insights": [
"Implement the discussed strategy in your workflow",
"Consider the new approach for better results",
"Apply the learned concepts to real-world scenarios"
],
"confidence_score": 0.92,
"processing_metadata": {
"model": "gpt-4",
"processing_time_seconds": 15,
"tokens_used": 2500,
"timestamp": datetime.utcnow().isoformat()
},
"created_at": datetime.utcnow().isoformat()
}
except Exception as e:
return None
async def process_bulk_export_async(
summaries_data: List[Dict[str, Any]],
request: BulkExportRequest,
export_service: ExportService
):
"""Process bulk export in background"""
try:
result = await export_service.bulk_export_summaries(summaries_data, request)
# Could send notification when complete
# await notification_service.send_export_complete(result)
except Exception as e:
print(f"Bulk export error: {e}")
# Could send error notification
# await notification_service.send_export_error(str(e))
@router.post("/single", response_model=ExportResponseModel)
async def export_single_summary(
request: SingleExportRequestModel,
background_tasks: BackgroundTasks
):
"""
Export a single summary to the specified format
Supports formats: markdown, pdf, text, json, html
Returns export ID for tracking and download
"""
try:
# Get summary data
summary_data = await get_summary_data(request.summary_id)
if not summary_data:
raise HTTPException(status_code=404, detail="Summary not found")
# Create export request
export_request = ExportRequest(
summary_id=request.summary_id,
format=request.format,
template=request.template,
include_metadata=request.include_metadata,
custom_branding=request.custom_branding
)
# Process export
result = await export_service.export_summary(summary_data, export_request)
# Return response
return ExportResponseModel(
export_id=result.export_id,
status=result.status.value,
format=result.format.value if result.format else None,
download_url=result.download_url,
file_size_bytes=result.file_size_bytes,
error=result.error,
created_at=result.created_at.isoformat() if result.created_at else None,
completed_at=result.completed_at.isoformat() if result.completed_at else None
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Export failed: {str(e)}")
@router.post("/bulk", response_model=ExportResponseModel)
async def export_bulk_summaries(
request: BulkExportRequestModel,
background_tasks: BackgroundTasks
):
"""
Export multiple summaries in bulk
Creates a ZIP archive with organized folder structure
Processes in background for large exports
"""
try:
# Validate request
if len(request.summary_ids) > 100:
raise HTTPException(
status_code=400,
detail="Maximum 100 summaries per bulk export"
)
# Get all summary data
summaries_data = []
for summary_id in request.summary_ids:
summary_data = await get_summary_data(summary_id)
if summary_data:
summaries_data.append(summary_data)
if not summaries_data:
raise HTTPException(status_code=404, detail="No valid summaries found")
# Create bulk export request
bulk_request = BulkExportRequest(
summary_ids=request.summary_ids,
formats=request.formats,
template=request.template,
organize_by=request.organize_by,
include_metadata=request.include_metadata,
custom_branding=request.custom_branding
)
# Process in background for large exports
if len(summaries_data) > 10:
# Large export - process async
import uuid
export_id = str(uuid.uuid4())
background_tasks.add_task(
process_bulk_export_async,
summaries_data=summaries_data,
request=bulk_request,
export_service=export_service
)
return ExportResponseModel(
export_id=export_id,
status="processing",
created_at=datetime.utcnow().isoformat(),
estimated_time_remaining=len(summaries_data) * 2 # Rough estimate
)
else:
# Small export - process immediately
result = await export_service.bulk_export_summaries(
summaries_data,
bulk_request
)
return ExportResponseModel(
export_id=result.export_id,
status=result.status.value,
download_url=result.download_url,
file_size_bytes=result.file_size_bytes,
error=result.error,
created_at=result.created_at.isoformat() if result.created_at else None,
completed_at=result.completed_at.isoformat() if result.completed_at else None
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Bulk export failed: {str(e)}")
@router.get("/status/{export_id}", response_model=ExportResponseModel)
async def get_export_status(export_id: str):
"""
Get export status and download information
Check the status of an ongoing or completed export
"""
result = export_service.get_export_status(export_id)
if not result:
raise HTTPException(status_code=404, detail="Export not found")
return ExportResponseModel(
export_id=result.export_id,
status=result.status.value,
format=result.format.value if result.format else None,
download_url=result.download_url,
file_size_bytes=result.file_size_bytes,
error=result.error,
created_at=result.created_at.isoformat() if result.created_at else None,
completed_at=result.completed_at.isoformat() if result.completed_at else None
)
@router.get("/download/{export_id}")
async def download_export(export_id: str):
"""
Download exported file
Returns the exported file for download
Files are automatically cleaned up after 24 hours
"""
result = export_service.get_export_status(export_id)
if not result or not result.file_path:
raise HTTPException(status_code=404, detail="Export file not found")
if not os.path.exists(result.file_path):
raise HTTPException(status_code=404, detail="Export file no longer available")
# Determine filename and media type
if result.format:
ext = result.format.value
if ext == "text":
ext = "txt"
filename = f"youtube_summary_export_{export_id}.{ext}"
else:
filename = f"bulk_export_{export_id}.zip"
media_type = {
ExportFormat.MARKDOWN: "text/markdown",
ExportFormat.PDF: "application/pdf",
ExportFormat.PLAIN_TEXT: "text/plain",
ExportFormat.JSON: "application/json",
ExportFormat.HTML: "text/html"
}.get(result.format, "application/zip")
return FileResponse(
path=result.file_path,
filename=filename,
media_type=media_type,
headers={
"Content-Disposition": f"attachment; filename={filename}"
}
)
@router.get("/list", response_model=ExportListResponseModel)
async def list_exports(
page: int = Query(1, ge=1, description="Page number"),
page_size: int = Query(10, ge=1, le=100, description="Items per page"),
status: Optional[str] = Query(None, description="Filter by status")
):
"""
List all exports with pagination
Returns a paginated list of export jobs
"""
all_exports = list(export_service.active_exports.values())
# Filter by status if provided
if status:
try:
status_enum = ExportStatus(status)
all_exports = [e for e in all_exports if e.status == status_enum]
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid status: {status}")
# Sort by creation date (newest first)
all_exports.sort(key=lambda x: x.created_at or datetime.min, reverse=True)
# Pagination
total = len(all_exports)
start = (page - 1) * page_size
end = start + page_size
exports_page = all_exports[start:end]
# Convert to response models
export_responses = []
for export in exports_page:
export_responses.append(ExportResponseModel(
export_id=export.export_id,
status=export.status.value,
format=export.format.value if export.format else None,
download_url=export.download_url,
file_size_bytes=export.file_size_bytes,
error=export.error,
created_at=export.created_at.isoformat() if export.created_at else None,
completed_at=export.completed_at.isoformat() if export.completed_at else None
))
return ExportListResponseModel(
exports=export_responses,
total=total,
page=page,
page_size=page_size
)
@router.delete("/cleanup")
async def cleanup_old_exports(
max_age_hours: int = Query(24, ge=1, le=168, description="Max age in hours")
):
"""
Clean up old export files
Removes export files older than specified hours (default: 24)
"""
try:
await export_service.cleanup_old_exports(max_age_hours)
return {"message": f"Cleaned up exports older than {max_age_hours} hours"}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Cleanup failed: {str(e)}")
@router.get("/formats")
async def get_available_formats():
"""
Get list of available export formats
Returns all supported export formats with descriptions
"""
formats = []
for format_enum in ExportFormat:
available = format_enum in export_service.exporters
description = {
ExportFormat.MARKDOWN: "Clean, formatted Markdown for documentation",
ExportFormat.PDF: "Professional PDF with formatting and branding",
ExportFormat.PLAIN_TEXT: "Simple plain text format",
ExportFormat.JSON: "Structured JSON with full metadata",
ExportFormat.HTML: "Responsive HTML with embedded styles"
}.get(format_enum, "")
formats.append({
"format": format_enum.value,
"name": format_enum.name.replace("_", " ").title(),
"description": description,
"available": available,
"requires_install": format_enum == ExportFormat.PDF and not available
})
return {"formats": formats}