""" Enhanced API endpoints for YouTube Summarizer Developer Platform Extends existing API with advanced developer features, batch processing, and webhooks """ from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks, Query, Header from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials from fastapi.responses import StreamingResponse from pydantic import BaseModel, Field, HttpUrl from typing import List, Optional, Dict, Any, Literal, Union from datetime import datetime, timedelta from uuid import UUID, uuid4 import json import asyncio import logging from enum import Enum # Import existing services try: from ..services.dual_transcript_service import DualTranscriptService from ..services.batch_processing_service import BatchProcessingService from ..models.transcript import TranscriptSource, WhisperModelSize, DualTranscriptResult from ..models.batch import BatchJob, BatchJobStatus except ImportError: # Fallback for testing pass logger = logging.getLogger(__name__) # Authentication security = HTTPBearer(auto_error=False) # Create enhanced API router router = APIRouter(prefix="/api/v2", tags=["enhanced-api"]) # Enhanced Models class APIKeyInfo(BaseModel): id: str name: str rate_limit_per_hour: int created_at: datetime last_used_at: Optional[datetime] usage_count: int is_active: bool class ProcessingPriority(str, Enum): LOW = "low" NORMAL = "normal" HIGH = "high" URGENT = "urgent" class WebhookEvent(str, Enum): JOB_STARTED = "job.started" JOB_PROGRESS = "job.progress" JOB_COMPLETED = "job.completed" JOB_FAILED = "job.failed" BATCH_COMPLETED = "batch.completed" class EnhancedTranscriptRequest(BaseModel): video_url: HttpUrl = Field(..., description="YouTube video URL") transcript_source: TranscriptSource = Field(default=TranscriptSource.YOUTUBE, description="Transcript source") whisper_model_size: Optional[WhisperModelSize] = Field(default=WhisperModelSize.SMALL, description="Whisper model size") priority: ProcessingPriority = Field(default=ProcessingPriority.NORMAL, description="Processing priority") webhook_url: Optional[HttpUrl] = Field(None, description="Webhook URL for notifications") include_quality_analysis: bool = Field(default=True, description="Include transcript quality analysis") custom_prompt: Optional[str] = Field(None, description="Custom processing prompt") tags: List[str] = Field(default_factory=list, description="Custom tags for organization") class BatchProcessingRequest(BaseModel): video_urls: List[HttpUrl] = Field(..., min_items=1, max_items=1000, description="List of video URLs") transcript_source: TranscriptSource = Field(default=TranscriptSource.YOUTUBE, description="Transcript source for all videos") batch_name: str = Field(..., description="Batch job name") priority: ProcessingPriority = Field(default=ProcessingPriority.NORMAL, description="Processing priority") webhook_url: Optional[HttpUrl] = Field(None, description="Webhook URL for batch notifications") parallel_processing: bool = Field(default=False, description="Enable parallel processing") max_concurrent_jobs: int = Field(default=5, description="Maximum concurrent jobs") class EnhancedJobResponse(BaseModel): job_id: str status: str priority: ProcessingPriority created_at: datetime estimated_completion: Optional[datetime] progress_percentage: float current_stage: str webhook_url: Optional[str] metadata: Dict[str, Any] class APIUsageStats(BaseModel): total_requests: int requests_today: int requests_this_month: int average_response_time_ms: float success_rate: float rate_limit_remaining: int quota_reset_time: datetime class WebhookConfiguration(BaseModel): url: HttpUrl events: List[WebhookEvent] secret: Optional[str] = Field(None, description="Webhook secret for verification") is_active: bool = Field(default=True) # Mock authentication and rate limiting (to be replaced with real implementation) async def verify_api_key(credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)) -> Dict[str, Any]: """Verify API key and return user info""" if not credentials: raise HTTPException(status_code=401, detail="API key required") # Mock API key validation - replace with real implementation api_key = credentials.credentials if not api_key.startswith("ys_"): raise HTTPException(status_code=401, detail="Invalid API key format") # Mock user info - replace with database lookup return { "user_id": "user_" + api_key[-8:], "api_key_id": "key_" + api_key[-8:], "rate_limit": 1000, "tier": "pro" if "pro" in api_key else "free" } async def check_rate_limit(user_info: Dict = Depends(verify_api_key)) -> Dict[str, Any]: """Check and update rate limiting""" # Mock rate limiting - replace with Redis implementation remaining = 995 # Mock remaining requests reset_time = datetime.now() + timedelta(hours=1) if remaining <= 0: raise HTTPException( status_code=429, detail="Rate limit exceeded", headers={"Retry-After": "3600"} ) return { **user_info, "rate_limit_remaining": remaining, "rate_limit_reset": reset_time } # Enhanced API Endpoints @router.get("/health", summary="Health check with detailed status") async def enhanced_health_check(): """Enhanced health check with service status""" try: # Check service availability services_status = { "dual_transcript_service": True, # Check actual service "batch_processing_service": True, # Check actual service "database": True, # Check database connection "redis": True, # Check Redis connection "webhook_service": True, # Check webhook service } overall_healthy = all(services_status.values()) return { "status": "healthy" if overall_healthy else "degraded", "timestamp": datetime.now().isoformat(), "version": "4.2.0", "services": services_status, "uptime_seconds": 3600, # Mock uptime "requests_per_minute": 45, # Mock metric } except Exception as e: raise HTTPException(status_code=503, detail=f"Service unavailable: {str(e)}") @router.post("/transcript/extract", summary="Extract transcript with enhanced options", response_model=EnhancedJobResponse) async def enhanced_transcript_extraction( request: EnhancedTranscriptRequest, background_tasks: BackgroundTasks, user_info: Dict = Depends(check_rate_limit) ): """Enhanced transcript extraction with priority, webhooks, and quality analysis""" job_id = str(uuid4()) try: # Create job with enhanced metadata job_metadata = { "user_id": user_info["user_id"], "video_url": str(request.video_url), "transcript_source": request.transcript_source.value, "priority": request.priority.value, "tags": request.tags, "custom_prompt": request.custom_prompt, "include_quality_analysis": request.include_quality_analysis } # Start background processing background_tasks.add_task( process_enhanced_transcript, job_id=job_id, request=request, user_info=user_info ) # Calculate estimated completion based on priority priority_multiplier = { ProcessingPriority.URGENT: 0.5, ProcessingPriority.HIGH: 0.7, ProcessingPriority.NORMAL: 1.0, ProcessingPriority.LOW: 1.5 } base_time = 30 if request.transcript_source == TranscriptSource.YOUTUBE else 120 estimated_seconds = base_time * priority_multiplier[request.priority] estimated_completion = datetime.now() + timedelta(seconds=estimated_seconds) return EnhancedJobResponse( job_id=job_id, status="queued", priority=request.priority, created_at=datetime.now(), estimated_completion=estimated_completion, progress_percentage=0.0, current_stage="queued", webhook_url=str(request.webhook_url) if request.webhook_url else None, metadata=job_metadata ) except Exception as e: logger.error(f"Enhanced transcript extraction failed: {e}") raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}") @router.post("/batch/process", summary="Batch process multiple videos", response_model=Dict[str, Any]) async def enhanced_batch_processing( request: BatchProcessingRequest, background_tasks: BackgroundTasks, user_info: Dict = Depends(check_rate_limit) ): """Enhanced batch processing with parallel execution and progress tracking""" batch_id = str(uuid4()) try: # Validate batch size limits based on user tier max_batch_size = 1000 if user_info["tier"] == "pro" else 100 if len(request.video_urls) > max_batch_size: raise HTTPException( status_code=400, detail=f"Batch size exceeds limit. Max: {max_batch_size} for {user_info['tier']} tier" ) # Create batch job batch_metadata = { "user_id": user_info["user_id"], "batch_name": request.batch_name, "video_count": len(request.video_urls), "transcript_source": request.transcript_source.value, "priority": request.priority.value, "parallel_processing": request.parallel_processing, "max_concurrent_jobs": request.max_concurrent_jobs } # Start background batch processing background_tasks.add_task( process_enhanced_batch, batch_id=batch_id, request=request, user_info=user_info ) # Calculate estimated completion job_time = 30 if request.transcript_source == TranscriptSource.YOUTUBE else 120 if request.parallel_processing: total_time = (len(request.video_urls) / request.max_concurrent_jobs) * job_time else: total_time = len(request.video_urls) * job_time estimated_completion = datetime.now() + timedelta(seconds=total_time) return { "batch_id": batch_id, "status": "queued", "video_count": len(request.video_urls), "priority": request.priority.value, "estimated_completion": estimated_completion.isoformat(), "parallel_processing": request.parallel_processing, "webhook_url": str(request.webhook_url) if request.webhook_url else None, "metadata": batch_metadata } except Exception as e: logger.error(f"Enhanced batch processing failed: {e}") raise HTTPException(status_code=500, detail=f"Batch processing failed: {str(e)}") @router.get("/job/{job_id}", summary="Get enhanced job status", response_model=EnhancedJobResponse) async def get_enhanced_job_status( job_id: str, user_info: Dict = Depends(verify_api_key) ): """Get detailed job status with progress and metadata""" try: # Mock job status - replace with actual job lookup mock_job = { "job_id": job_id, "status": "processing", "priority": ProcessingPriority.NORMAL, "created_at": datetime.now() - timedelta(minutes=2), "estimated_completion": datetime.now() + timedelta(minutes=3), "progress_percentage": 65.0, "current_stage": "generating_summary", "webhook_url": None, "metadata": { "user_id": user_info["user_id"], "processing_time_elapsed": 120, "estimated_time_remaining": 180 } } return EnhancedJobResponse(**mock_job) except Exception as e: logger.error(f"Job status lookup failed: {e}") raise HTTPException(status_code=404, detail=f"Job not found: {job_id}") @router.get("/usage/stats", summary="Get API usage statistics", response_model=APIUsageStats) async def get_usage_statistics( user_info: Dict = Depends(verify_api_key) ): """Get detailed API usage statistics for the authenticated user""" try: # Mock usage stats - replace with actual database queries return APIUsageStats( total_requests=1250, requests_today=45, requests_this_month=890, average_response_time_ms=245.5, success_rate=0.987, rate_limit_remaining=955, quota_reset_time=datetime.now() + timedelta(hours=1) ) except Exception as e: logger.error(f"Usage statistics failed: {e}") raise HTTPException(status_code=500, detail=f"Statistics unavailable: {str(e)}") @router.get("/jobs/stream", summary="Stream job updates via Server-Sent Events") async def stream_job_updates( user_info: Dict = Depends(verify_api_key) ): """Stream real-time job updates using Server-Sent Events""" async def generate_events(): """Generate SSE events for job updates""" try: while True: # Mock event - replace with actual job update logic event_data = { "event": "job_update", "job_id": "mock_job_123", "status": "processing", "progress": 75.0, "timestamp": datetime.now().isoformat() } yield f"data: {json.dumps(event_data)}\n\n" await asyncio.sleep(2) # Send updates every 2 seconds except asyncio.CancelledError: logger.info("SSE stream cancelled") yield f"data: {json.dumps({'event': 'stream_closed'})}\n\n" return StreamingResponse( generate_events(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Headers": "Cache-Control" } ) # Background processing functions async def process_enhanced_transcript(job_id: str, request: EnhancedTranscriptRequest, user_info: Dict): """Background task for enhanced transcript processing""" try: logger.info(f"Starting enhanced transcript processing for job {job_id}") # Mock processing stages stages = ["downloading", "extracting", "analyzing", "generating", "completed"] for i, stage in enumerate(stages): # Mock processing delay await asyncio.sleep(2) progress = (i + 1) / len(stages) * 100 logger.info(f"Job {job_id} - Stage: {stage}, Progress: {progress}%") # Send webhook notification if configured if request.webhook_url: await send_webhook_notification( url=str(request.webhook_url), event_type=WebhookEvent.JOB_PROGRESS, data={ "job_id": job_id, "stage": stage, "progress": progress, "timestamp": datetime.now().isoformat() } ) # Final completion webhook if request.webhook_url: await send_webhook_notification( url=str(request.webhook_url), event_type=WebhookEvent.JOB_COMPLETED, data={ "job_id": job_id, "status": "completed", "result_url": f"/api/v2/job/{job_id}/result", "timestamp": datetime.now().isoformat() } ) logger.info(f"Enhanced transcript processing completed for job {job_id}") except Exception as e: logger.error(f"Enhanced transcript processing failed for job {job_id}: {e}") # Send failure webhook if request.webhook_url: await send_webhook_notification( url=str(request.webhook_url), event_type=WebhookEvent.JOB_FAILED, data={ "job_id": job_id, "error": str(e), "timestamp": datetime.now().isoformat() } ) async def process_enhanced_batch(batch_id: str, request: BatchProcessingRequest, user_info: Dict): """Background task for enhanced batch processing""" try: logger.info(f"Starting enhanced batch processing for batch {batch_id}") if request.parallel_processing: # Process in parallel batches semaphore = asyncio.Semaphore(request.max_concurrent_jobs) tasks = [] for i, video_url in enumerate(request.video_urls): task = process_single_video_in_batch( semaphore, batch_id, str(video_url), i, request ) tasks.append(task) # Wait for all tasks to complete await asyncio.gather(*tasks, return_exceptions=True) else: # Process sequentially for i, video_url in enumerate(request.video_urls): await process_single_video_in_batch( None, batch_id, str(video_url), i, request ) # Send batch completion webhook if request.webhook_url: await send_webhook_notification( url=str(request.webhook_url), event_type=WebhookEvent.BATCH_COMPLETED, data={ "batch_id": batch_id, "status": "completed", "total_videos": len(request.video_urls), "timestamp": datetime.now().isoformat() } ) logger.info(f"Enhanced batch processing completed for batch {batch_id}") except Exception as e: logger.error(f"Enhanced batch processing failed for batch {batch_id}: {e}") async def process_single_video_in_batch(semaphore: Optional[asyncio.Semaphore], batch_id: str, video_url: str, index: int, request: BatchProcessingRequest): """Process a single video within a batch""" if semaphore: async with semaphore: await _process_video(batch_id, video_url, index, request) else: await _process_video(batch_id, video_url, index, request) async def _process_video(batch_id: str, video_url: str, index: int, request: BatchProcessingRequest): """Internal video processing logic""" try: logger.info(f"Processing video {index + 1}/{len(request.video_urls)} in batch {batch_id}") # Mock processing time processing_time = 5 if request.transcript_source == TranscriptSource.YOUTUBE else 15 await asyncio.sleep(processing_time) logger.info(f"Completed video {index + 1} in batch {batch_id}") except Exception as e: logger.error(f"Failed to process video {index + 1} in batch {batch_id}: {e}") async def send_webhook_notification(url: str, event_type: WebhookEvent, data: Dict[str, Any]): """Send webhook notification""" try: import httpx payload = { "event": event_type.value, "timestamp": datetime.now().isoformat(), "data": data } # Mock webhook sending - replace with actual HTTP client logger.info(f"Sending webhook to {url}: {event_type.value}") # In production, use actual HTTP client: # async with httpx.AsyncClient() as client: # response = await client.post(url, json=payload, timeout=10) # logger.info(f"Webhook sent successfully: {response.status_code}") except Exception as e: logger.error(f"Failed to send webhook to {url}: {e}") # Export router __all__ = ["router"]