451 lines
15 KiB
Python
451 lines
15 KiB
Python
"""
|
|
Export API endpoints for YouTube Summarizer
|
|
Handles single and bulk export requests for summaries
|
|
"""
|
|
|
|
import os
|
|
from datetime import datetime
|
|
from typing import List, Optional, Dict, Any
|
|
from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends, Query
|
|
from fastapi.responses import FileResponse
|
|
from pydantic import BaseModel, Field
|
|
from enum import Enum
|
|
|
|
from ..services.export_service import (
|
|
ExportService,
|
|
ExportFormat,
|
|
ExportRequest,
|
|
BulkExportRequest,
|
|
ExportStatus
|
|
)
|
|
from ..models.video import VideoSummary
|
|
from ..services.storage_manager import StorageManager
|
|
from ..services.enhanced_cache_manager import EnhancedCacheManager
|
|
from ..core.exceptions import YouTubeError
|
|
|
|
|
|
# Create router
|
|
router = APIRouter(prefix="/api/export", tags=["export"])
|
|
|
|
|
|
class SingleExportRequestModel(BaseModel):
|
|
"""Request model for single summary export"""
|
|
summary_id: str = Field(..., description="ID of summary to export")
|
|
format: ExportFormat = Field(..., description="Export format")
|
|
template: Optional[str] = Field(None, description="Custom template name")
|
|
include_metadata: bool = Field(True, description="Include processing metadata")
|
|
custom_branding: Optional[Dict[str, Any]] = Field(None, description="Custom branding options")
|
|
|
|
|
|
class BulkExportRequestModel(BaseModel):
|
|
"""Request model for bulk export"""
|
|
summary_ids: List[str] = Field(..., description="List of summary IDs to export")
|
|
formats: List[ExportFormat] = Field(..., description="Export formats")
|
|
template: Optional[str] = Field(None, description="Custom template name")
|
|
organize_by: str = Field("format", description="Organization method: format, date, video")
|
|
include_metadata: bool = Field(True, description="Include processing metadata")
|
|
custom_branding: Optional[Dict[str, Any]] = Field(None, description="Custom branding options")
|
|
|
|
|
|
class ExportResponseModel(BaseModel):
|
|
"""Response model for export operations"""
|
|
export_id: str
|
|
status: str
|
|
format: Optional[str] = None
|
|
download_url: Optional[str] = None
|
|
file_size_bytes: Optional[int] = None
|
|
error: Optional[str] = None
|
|
created_at: Optional[str] = None
|
|
completed_at: Optional[str] = None
|
|
estimated_time_remaining: Optional[int] = None
|
|
|
|
|
|
class ExportListResponseModel(BaseModel):
|
|
"""Response model for listing exports"""
|
|
exports: List[ExportResponseModel]
|
|
total: int
|
|
page: int
|
|
page_size: int
|
|
|
|
|
|
# Initialize services
|
|
export_service = ExportService()
|
|
storage_manager = StorageManager()
|
|
cache_manager = EnhancedCacheManager()
|
|
|
|
|
|
async def get_summary_data(summary_id: str) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Retrieve summary data by ID
|
|
First checks cache, then storage
|
|
"""
|
|
# Try to get from cache first
|
|
cached_data = await cache_manager.get_from_cache(
|
|
cache_type="summary",
|
|
key=summary_id
|
|
)
|
|
|
|
if cached_data:
|
|
return cached_data
|
|
|
|
# Get from storage
|
|
try:
|
|
# This would integrate with your actual storage system
|
|
# For now, returning mock data for testing
|
|
return {
|
|
"video_id": summary_id,
|
|
"video_url": f"https://youtube.com/watch?v={summary_id}",
|
|
"video_metadata": {
|
|
"title": "Sample Video Title",
|
|
"channel_name": "Sample Channel",
|
|
"duration": 600,
|
|
"published_at": "2025-01-25",
|
|
"view_count": 10000,
|
|
"like_count": 500,
|
|
"thumbnail_url": "https://example.com/thumbnail.jpg"
|
|
},
|
|
"summary": "This is a sample summary of the video content. It provides key insights and main points discussed in the video.",
|
|
"key_points": [
|
|
"First key point from the video",
|
|
"Second important insight",
|
|
"Third main takeaway"
|
|
],
|
|
"main_themes": [
|
|
"Technology",
|
|
"Innovation",
|
|
"Future Trends"
|
|
],
|
|
"actionable_insights": [
|
|
"Implement the discussed strategy in your workflow",
|
|
"Consider the new approach for better results",
|
|
"Apply the learned concepts to real-world scenarios"
|
|
],
|
|
"confidence_score": 0.92,
|
|
"processing_metadata": {
|
|
"model": "gpt-4",
|
|
"processing_time_seconds": 15,
|
|
"tokens_used": 2500,
|
|
"timestamp": datetime.utcnow().isoformat()
|
|
},
|
|
"created_at": datetime.utcnow().isoformat()
|
|
}
|
|
except Exception as e:
|
|
return None
|
|
|
|
|
|
async def process_bulk_export_async(
|
|
summaries_data: List[Dict[str, Any]],
|
|
request: BulkExportRequest,
|
|
export_service: ExportService
|
|
):
|
|
"""Process bulk export in background"""
|
|
|
|
try:
|
|
result = await export_service.bulk_export_summaries(summaries_data, request)
|
|
# Could send notification when complete
|
|
# await notification_service.send_export_complete(result)
|
|
except Exception as e:
|
|
print(f"Bulk export error: {e}")
|
|
# Could send error notification
|
|
# await notification_service.send_export_error(str(e))
|
|
|
|
|
|
@router.post("/single", response_model=ExportResponseModel)
|
|
async def export_single_summary(
|
|
request: SingleExportRequestModel,
|
|
background_tasks: BackgroundTasks
|
|
):
|
|
"""
|
|
Export a single summary to the specified format
|
|
|
|
Supports formats: markdown, pdf, text, json, html
|
|
Returns export ID for tracking and download
|
|
"""
|
|
|
|
try:
|
|
# Get summary data
|
|
summary_data = await get_summary_data(request.summary_id)
|
|
|
|
if not summary_data:
|
|
raise HTTPException(status_code=404, detail="Summary not found")
|
|
|
|
# Create export request
|
|
export_request = ExportRequest(
|
|
summary_id=request.summary_id,
|
|
format=request.format,
|
|
template=request.template,
|
|
include_metadata=request.include_metadata,
|
|
custom_branding=request.custom_branding
|
|
)
|
|
|
|
# Process export
|
|
result = await export_service.export_summary(summary_data, export_request)
|
|
|
|
# Return response
|
|
return ExportResponseModel(
|
|
export_id=result.export_id,
|
|
status=result.status.value,
|
|
format=result.format.value if result.format else None,
|
|
download_url=result.download_url,
|
|
file_size_bytes=result.file_size_bytes,
|
|
error=result.error,
|
|
created_at=result.created_at.isoformat() if result.created_at else None,
|
|
completed_at=result.completed_at.isoformat() if result.completed_at else None
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Export failed: {str(e)}")
|
|
|
|
|
|
@router.post("/bulk", response_model=ExportResponseModel)
|
|
async def export_bulk_summaries(
|
|
request: BulkExportRequestModel,
|
|
background_tasks: BackgroundTasks
|
|
):
|
|
"""
|
|
Export multiple summaries in bulk
|
|
|
|
Creates a ZIP archive with organized folder structure
|
|
Processes in background for large exports
|
|
"""
|
|
|
|
try:
|
|
# Validate request
|
|
if len(request.summary_ids) > 100:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Maximum 100 summaries per bulk export"
|
|
)
|
|
|
|
# Get all summary data
|
|
summaries_data = []
|
|
for summary_id in request.summary_ids:
|
|
summary_data = await get_summary_data(summary_id)
|
|
if summary_data:
|
|
summaries_data.append(summary_data)
|
|
|
|
if not summaries_data:
|
|
raise HTTPException(status_code=404, detail="No valid summaries found")
|
|
|
|
# Create bulk export request
|
|
bulk_request = BulkExportRequest(
|
|
summary_ids=request.summary_ids,
|
|
formats=request.formats,
|
|
template=request.template,
|
|
organize_by=request.organize_by,
|
|
include_metadata=request.include_metadata,
|
|
custom_branding=request.custom_branding
|
|
)
|
|
|
|
# Process in background for large exports
|
|
if len(summaries_data) > 10:
|
|
# Large export - process async
|
|
import uuid
|
|
export_id = str(uuid.uuid4())
|
|
|
|
background_tasks.add_task(
|
|
process_bulk_export_async,
|
|
summaries_data=summaries_data,
|
|
request=bulk_request,
|
|
export_service=export_service
|
|
)
|
|
|
|
return ExportResponseModel(
|
|
export_id=export_id,
|
|
status="processing",
|
|
created_at=datetime.utcnow().isoformat(),
|
|
estimated_time_remaining=len(summaries_data) * 2 # Rough estimate
|
|
)
|
|
else:
|
|
# Small export - process immediately
|
|
result = await export_service.bulk_export_summaries(
|
|
summaries_data,
|
|
bulk_request
|
|
)
|
|
|
|
return ExportResponseModel(
|
|
export_id=result.export_id,
|
|
status=result.status.value,
|
|
download_url=result.download_url,
|
|
file_size_bytes=result.file_size_bytes,
|
|
error=result.error,
|
|
created_at=result.created_at.isoformat() if result.created_at else None,
|
|
completed_at=result.completed_at.isoformat() if result.completed_at else None
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Bulk export failed: {str(e)}")
|
|
|
|
|
|
@router.get("/status/{export_id}", response_model=ExportResponseModel)
|
|
async def get_export_status(export_id: str):
|
|
"""
|
|
Get export status and download information
|
|
|
|
Check the status of an ongoing or completed export
|
|
"""
|
|
|
|
result = export_service.get_export_status(export_id)
|
|
|
|
if not result:
|
|
raise HTTPException(status_code=404, detail="Export not found")
|
|
|
|
return ExportResponseModel(
|
|
export_id=result.export_id,
|
|
status=result.status.value,
|
|
format=result.format.value if result.format else None,
|
|
download_url=result.download_url,
|
|
file_size_bytes=result.file_size_bytes,
|
|
error=result.error,
|
|
created_at=result.created_at.isoformat() if result.created_at else None,
|
|
completed_at=result.completed_at.isoformat() if result.completed_at else None
|
|
)
|
|
|
|
|
|
@router.get("/download/{export_id}")
|
|
async def download_export(export_id: str):
|
|
"""
|
|
Download exported file
|
|
|
|
Returns the exported file for download
|
|
Files are automatically cleaned up after 24 hours
|
|
"""
|
|
|
|
result = export_service.get_export_status(export_id)
|
|
|
|
if not result or not result.file_path:
|
|
raise HTTPException(status_code=404, detail="Export file not found")
|
|
|
|
if not os.path.exists(result.file_path):
|
|
raise HTTPException(status_code=404, detail="Export file no longer available")
|
|
|
|
# Determine filename and media type
|
|
if result.format:
|
|
ext = result.format.value
|
|
if ext == "text":
|
|
ext = "txt"
|
|
filename = f"youtube_summary_export_{export_id}.{ext}"
|
|
else:
|
|
filename = f"bulk_export_{export_id}.zip"
|
|
|
|
media_type = {
|
|
ExportFormat.MARKDOWN: "text/markdown",
|
|
ExportFormat.PDF: "application/pdf",
|
|
ExportFormat.PLAIN_TEXT: "text/plain",
|
|
ExportFormat.JSON: "application/json",
|
|
ExportFormat.HTML: "text/html"
|
|
}.get(result.format, "application/zip")
|
|
|
|
return FileResponse(
|
|
path=result.file_path,
|
|
filename=filename,
|
|
media_type=media_type,
|
|
headers={
|
|
"Content-Disposition": f"attachment; filename={filename}"
|
|
}
|
|
)
|
|
|
|
|
|
@router.get("/list", response_model=ExportListResponseModel)
|
|
async def list_exports(
|
|
page: int = Query(1, ge=1, description="Page number"),
|
|
page_size: int = Query(10, ge=1, le=100, description="Items per page"),
|
|
status: Optional[str] = Query(None, description="Filter by status")
|
|
):
|
|
"""
|
|
List all exports with pagination
|
|
|
|
Returns a paginated list of export jobs
|
|
"""
|
|
|
|
all_exports = list(export_service.active_exports.values())
|
|
|
|
# Filter by status if provided
|
|
if status:
|
|
try:
|
|
status_enum = ExportStatus(status)
|
|
all_exports = [e for e in all_exports if e.status == status_enum]
|
|
except ValueError:
|
|
raise HTTPException(status_code=400, detail=f"Invalid status: {status}")
|
|
|
|
# Sort by creation date (newest first)
|
|
all_exports.sort(key=lambda x: x.created_at or datetime.min, reverse=True)
|
|
|
|
# Pagination
|
|
total = len(all_exports)
|
|
start = (page - 1) * page_size
|
|
end = start + page_size
|
|
exports_page = all_exports[start:end]
|
|
|
|
# Convert to response models
|
|
export_responses = []
|
|
for export in exports_page:
|
|
export_responses.append(ExportResponseModel(
|
|
export_id=export.export_id,
|
|
status=export.status.value,
|
|
format=export.format.value if export.format else None,
|
|
download_url=export.download_url,
|
|
file_size_bytes=export.file_size_bytes,
|
|
error=export.error,
|
|
created_at=export.created_at.isoformat() if export.created_at else None,
|
|
completed_at=export.completed_at.isoformat() if export.completed_at else None
|
|
))
|
|
|
|
return ExportListResponseModel(
|
|
exports=export_responses,
|
|
total=total,
|
|
page=page,
|
|
page_size=page_size
|
|
)
|
|
|
|
|
|
@router.delete("/cleanup")
|
|
async def cleanup_old_exports(
|
|
max_age_hours: int = Query(24, ge=1, le=168, description="Max age in hours")
|
|
):
|
|
"""
|
|
Clean up old export files
|
|
|
|
Removes export files older than specified hours (default: 24)
|
|
"""
|
|
|
|
try:
|
|
await export_service.cleanup_old_exports(max_age_hours)
|
|
return {"message": f"Cleaned up exports older than {max_age_hours} hours"}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Cleanup failed: {str(e)}")
|
|
|
|
|
|
@router.get("/formats")
|
|
async def get_available_formats():
|
|
"""
|
|
Get list of available export formats
|
|
|
|
Returns all supported export formats with descriptions
|
|
"""
|
|
|
|
formats = []
|
|
for format_enum in ExportFormat:
|
|
available = format_enum in export_service.exporters
|
|
|
|
description = {
|
|
ExportFormat.MARKDOWN: "Clean, formatted Markdown for documentation",
|
|
ExportFormat.PDF: "Professional PDF with formatting and branding",
|
|
ExportFormat.PLAIN_TEXT: "Simple plain text format",
|
|
ExportFormat.JSON: "Structured JSON with full metadata",
|
|
ExportFormat.HTML: "Responsive HTML with embedded styles"
|
|
}.get(format_enum, "")
|
|
|
|
formats.append({
|
|
"format": format_enum.value,
|
|
"name": format_enum.name.replace("_", " ").title(),
|
|
"description": description,
|
|
"available": available,
|
|
"requires_install": format_enum == ExportFormat.PDF and not available
|
|
})
|
|
|
|
return {"formats": formats} |