""" Export API endpoints for YouTube Summarizer Handles single and bulk export requests for summaries """ import os from datetime import datetime from typing import List, Optional, Dict, Any from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends, Query from fastapi.responses import FileResponse from pydantic import BaseModel, Field from enum import Enum from ..services.export_service import ( ExportService, ExportFormat, ExportRequest, BulkExportRequest, ExportStatus ) from ..models.video import VideoSummary from ..services.storage_manager import StorageManager from ..services.enhanced_cache_manager import EnhancedCacheManager from ..core.exceptions import YouTubeError # Create router router = APIRouter(prefix="/api/export", tags=["export"]) class SingleExportRequestModel(BaseModel): """Request model for single summary export""" summary_id: str = Field(..., description="ID of summary to export") format: ExportFormat = Field(..., description="Export format") template: Optional[str] = Field(None, description="Custom template name") include_metadata: bool = Field(True, description="Include processing metadata") custom_branding: Optional[Dict[str, Any]] = Field(None, description="Custom branding options") class BulkExportRequestModel(BaseModel): """Request model for bulk export""" summary_ids: List[str] = Field(..., description="List of summary IDs to export") formats: List[ExportFormat] = Field(..., description="Export formats") template: Optional[str] = Field(None, description="Custom template name") organize_by: str = Field("format", description="Organization method: format, date, video") include_metadata: bool = Field(True, description="Include processing metadata") custom_branding: Optional[Dict[str, Any]] = Field(None, description="Custom branding options") class ExportResponseModel(BaseModel): """Response model for export operations""" export_id: str status: str format: Optional[str] = None download_url: Optional[str] = None file_size_bytes: Optional[int] = None error: Optional[str] = None created_at: Optional[str] = None completed_at: Optional[str] = None estimated_time_remaining: Optional[int] = None class ExportListResponseModel(BaseModel): """Response model for listing exports""" exports: List[ExportResponseModel] total: int page: int page_size: int # Initialize services export_service = ExportService() storage_manager = StorageManager() cache_manager = EnhancedCacheManager() async def get_summary_data(summary_id: str) -> Optional[Dict[str, Any]]: """ Retrieve summary data by ID First checks cache, then storage """ # Try to get from cache first cached_data = await cache_manager.get_from_cache( cache_type="summary", key=summary_id ) if cached_data: return cached_data # Get from storage try: # This would integrate with your actual storage system # For now, returning mock data for testing return { "video_id": summary_id, "video_url": f"https://youtube.com/watch?v={summary_id}", "video_metadata": { "title": "Sample Video Title", "channel_name": "Sample Channel", "duration": 600, "published_at": "2025-01-25", "view_count": 10000, "like_count": 500, "thumbnail_url": "https://example.com/thumbnail.jpg" }, "summary": "This is a sample summary of the video content. It provides key insights and main points discussed in the video.", "key_points": [ "First key point from the video", "Second important insight", "Third main takeaway" ], "main_themes": [ "Technology", "Innovation", "Future Trends" ], "actionable_insights": [ "Implement the discussed strategy in your workflow", "Consider the new approach for better results", "Apply the learned concepts to real-world scenarios" ], "confidence_score": 0.92, "processing_metadata": { "model": "gpt-4", "processing_time_seconds": 15, "tokens_used": 2500, "timestamp": datetime.utcnow().isoformat() }, "created_at": datetime.utcnow().isoformat() } except Exception as e: return None async def process_bulk_export_async( summaries_data: List[Dict[str, Any]], request: BulkExportRequest, export_service: ExportService ): """Process bulk export in background""" try: result = await export_service.bulk_export_summaries(summaries_data, request) # Could send notification when complete # await notification_service.send_export_complete(result) except Exception as e: print(f"Bulk export error: {e}") # Could send error notification # await notification_service.send_export_error(str(e)) @router.post("/single", response_model=ExportResponseModel) async def export_single_summary( request: SingleExportRequestModel, background_tasks: BackgroundTasks ): """ Export a single summary to the specified format Supports formats: markdown, pdf, text, json, html Returns export ID for tracking and download """ try: # Get summary data summary_data = await get_summary_data(request.summary_id) if not summary_data: raise HTTPException(status_code=404, detail="Summary not found") # Create export request export_request = ExportRequest( summary_id=request.summary_id, format=request.format, template=request.template, include_metadata=request.include_metadata, custom_branding=request.custom_branding ) # Process export result = await export_service.export_summary(summary_data, export_request) # Return response return ExportResponseModel( export_id=result.export_id, status=result.status.value, format=result.format.value if result.format else None, download_url=result.download_url, file_size_bytes=result.file_size_bytes, error=result.error, created_at=result.created_at.isoformat() if result.created_at else None, completed_at=result.completed_at.isoformat() if result.completed_at else None ) except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=f"Export failed: {str(e)}") @router.post("/bulk", response_model=ExportResponseModel) async def export_bulk_summaries( request: BulkExportRequestModel, background_tasks: BackgroundTasks ): """ Export multiple summaries in bulk Creates a ZIP archive with organized folder structure Processes in background for large exports """ try: # Validate request if len(request.summary_ids) > 100: raise HTTPException( status_code=400, detail="Maximum 100 summaries per bulk export" ) # Get all summary data summaries_data = [] for summary_id in request.summary_ids: summary_data = await get_summary_data(summary_id) if summary_data: summaries_data.append(summary_data) if not summaries_data: raise HTTPException(status_code=404, detail="No valid summaries found") # Create bulk export request bulk_request = BulkExportRequest( summary_ids=request.summary_ids, formats=request.formats, template=request.template, organize_by=request.organize_by, include_metadata=request.include_metadata, custom_branding=request.custom_branding ) # Process in background for large exports if len(summaries_data) > 10: # Large export - process async import uuid export_id = str(uuid.uuid4()) background_tasks.add_task( process_bulk_export_async, summaries_data=summaries_data, request=bulk_request, export_service=export_service ) return ExportResponseModel( export_id=export_id, status="processing", created_at=datetime.utcnow().isoformat(), estimated_time_remaining=len(summaries_data) * 2 # Rough estimate ) else: # Small export - process immediately result = await export_service.bulk_export_summaries( summaries_data, bulk_request ) return ExportResponseModel( export_id=result.export_id, status=result.status.value, download_url=result.download_url, file_size_bytes=result.file_size_bytes, error=result.error, created_at=result.created_at.isoformat() if result.created_at else None, completed_at=result.completed_at.isoformat() if result.completed_at else None ) except HTTPException: raise except Exception as e: raise HTTPException(status_code=500, detail=f"Bulk export failed: {str(e)}") @router.get("/status/{export_id}", response_model=ExportResponseModel) async def get_export_status(export_id: str): """ Get export status and download information Check the status of an ongoing or completed export """ result = export_service.get_export_status(export_id) if not result: raise HTTPException(status_code=404, detail="Export not found") return ExportResponseModel( export_id=result.export_id, status=result.status.value, format=result.format.value if result.format else None, download_url=result.download_url, file_size_bytes=result.file_size_bytes, error=result.error, created_at=result.created_at.isoformat() if result.created_at else None, completed_at=result.completed_at.isoformat() if result.completed_at else None ) @router.get("/download/{export_id}") async def download_export(export_id: str): """ Download exported file Returns the exported file for download Files are automatically cleaned up after 24 hours """ result = export_service.get_export_status(export_id) if not result or not result.file_path: raise HTTPException(status_code=404, detail="Export file not found") if not os.path.exists(result.file_path): raise HTTPException(status_code=404, detail="Export file no longer available") # Determine filename and media type if result.format: ext = result.format.value if ext == "text": ext = "txt" filename = f"youtube_summary_export_{export_id}.{ext}" else: filename = f"bulk_export_{export_id}.zip" media_type = { ExportFormat.MARKDOWN: "text/markdown", ExportFormat.PDF: "application/pdf", ExportFormat.PLAIN_TEXT: "text/plain", ExportFormat.JSON: "application/json", ExportFormat.HTML: "text/html" }.get(result.format, "application/zip") return FileResponse( path=result.file_path, filename=filename, media_type=media_type, headers={ "Content-Disposition": f"attachment; filename={filename}" } ) @router.get("/list", response_model=ExportListResponseModel) async def list_exports( page: int = Query(1, ge=1, description="Page number"), page_size: int = Query(10, ge=1, le=100, description="Items per page"), status: Optional[str] = Query(None, description="Filter by status") ): """ List all exports with pagination Returns a paginated list of export jobs """ all_exports = list(export_service.active_exports.values()) # Filter by status if provided if status: try: status_enum = ExportStatus(status) all_exports = [e for e in all_exports if e.status == status_enum] except ValueError: raise HTTPException(status_code=400, detail=f"Invalid status: {status}") # Sort by creation date (newest first) all_exports.sort(key=lambda x: x.created_at or datetime.min, reverse=True) # Pagination total = len(all_exports) start = (page - 1) * page_size end = start + page_size exports_page = all_exports[start:end] # Convert to response models export_responses = [] for export in exports_page: export_responses.append(ExportResponseModel( export_id=export.export_id, status=export.status.value, format=export.format.value if export.format else None, download_url=export.download_url, file_size_bytes=export.file_size_bytes, error=export.error, created_at=export.created_at.isoformat() if export.created_at else None, completed_at=export.completed_at.isoformat() if export.completed_at else None )) return ExportListResponseModel( exports=export_responses, total=total, page=page, page_size=page_size ) @router.delete("/cleanup") async def cleanup_old_exports( max_age_hours: int = Query(24, ge=1, le=168, description="Max age in hours") ): """ Clean up old export files Removes export files older than specified hours (default: 24) """ try: await export_service.cleanup_old_exports(max_age_hours) return {"message": f"Cleaned up exports older than {max_age_hours} hours"} except Exception as e: raise HTTPException(status_code=500, detail=f"Cleanup failed: {str(e)}") @router.get("/formats") async def get_available_formats(): """ Get list of available export formats Returns all supported export formats with descriptions """ formats = [] for format_enum in ExportFormat: available = format_enum in export_service.exporters description = { ExportFormat.MARKDOWN: "Clean, formatted Markdown for documentation", ExportFormat.PDF: "Professional PDF with formatting and branding", ExportFormat.PLAIN_TEXT: "Simple plain text format", ExportFormat.JSON: "Structured JSON with full metadata", ExportFormat.HTML: "Responsive HTML with embedded styles" }.get(format_enum, "") formats.append({ "format": format_enum.value, "name": format_enum.name.replace("_", " ").title(), "description": description, "available": available, "requires_install": format_enum == ExportFormat.PDF and not available }) return {"formats": formats}