youtube-summarizer/backend/api/enhanced.py

538 lines
20 KiB
Python

"""
Enhanced API endpoints for YouTube Summarizer Developer Platform
Extends existing API with advanced developer features, batch processing, and webhooks
"""
from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks, Query, Header
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field, HttpUrl
from typing import List, Optional, Dict, Any, Literal, Union
from datetime import datetime, timedelta
from uuid import UUID, uuid4
import json
import asyncio
import logging
from enum import Enum
# Import existing services
try:
from ..services.dual_transcript_service import DualTranscriptService
from ..services.batch_processing_service import BatchProcessingService
from ..models.transcript import TranscriptSource, WhisperModelSize, DualTranscriptResult
from ..models.batch import BatchJob, BatchJobStatus
except ImportError:
# Fallback for testing
pass
logger = logging.getLogger(__name__)
# Authentication
security = HTTPBearer(auto_error=False)
# Create enhanced API router
router = APIRouter(prefix="/api/v2", tags=["enhanced-api"])
# Enhanced Models
class APIKeyInfo(BaseModel):
id: str
name: str
rate_limit_per_hour: int
created_at: datetime
last_used_at: Optional[datetime]
usage_count: int
is_active: bool
class ProcessingPriority(str, Enum):
LOW = "low"
NORMAL = "normal"
HIGH = "high"
URGENT = "urgent"
class WebhookEvent(str, Enum):
JOB_STARTED = "job.started"
JOB_PROGRESS = "job.progress"
JOB_COMPLETED = "job.completed"
JOB_FAILED = "job.failed"
BATCH_COMPLETED = "batch.completed"
class EnhancedTranscriptRequest(BaseModel):
video_url: HttpUrl = Field(..., description="YouTube video URL")
transcript_source: TranscriptSource = Field(default=TranscriptSource.YOUTUBE, description="Transcript source")
whisper_model_size: Optional[WhisperModelSize] = Field(default=WhisperModelSize.SMALL, description="Whisper model size")
priority: ProcessingPriority = Field(default=ProcessingPriority.NORMAL, description="Processing priority")
webhook_url: Optional[HttpUrl] = Field(None, description="Webhook URL for notifications")
include_quality_analysis: bool = Field(default=True, description="Include transcript quality analysis")
custom_prompt: Optional[str] = Field(None, description="Custom processing prompt")
tags: List[str] = Field(default_factory=list, description="Custom tags for organization")
class BatchProcessingRequest(BaseModel):
video_urls: List[HttpUrl] = Field(..., min_items=1, max_items=1000, description="List of video URLs")
transcript_source: TranscriptSource = Field(default=TranscriptSource.YOUTUBE, description="Transcript source for all videos")
batch_name: str = Field(..., description="Batch job name")
priority: ProcessingPriority = Field(default=ProcessingPriority.NORMAL, description="Processing priority")
webhook_url: Optional[HttpUrl] = Field(None, description="Webhook URL for batch notifications")
parallel_processing: bool = Field(default=False, description="Enable parallel processing")
max_concurrent_jobs: int = Field(default=5, description="Maximum concurrent jobs")
class EnhancedJobResponse(BaseModel):
job_id: str
status: str
priority: ProcessingPriority
created_at: datetime
estimated_completion: Optional[datetime]
progress_percentage: float
current_stage: str
webhook_url: Optional[str]
metadata: Dict[str, Any]
class APIUsageStats(BaseModel):
total_requests: int
requests_today: int
requests_this_month: int
average_response_time_ms: float
success_rate: float
rate_limit_remaining: int
quota_reset_time: datetime
class WebhookConfiguration(BaseModel):
url: HttpUrl
events: List[WebhookEvent]
secret: Optional[str] = Field(None, description="Webhook secret for verification")
is_active: bool = Field(default=True)
# Mock authentication and rate limiting (to be replaced with real implementation)
async def verify_api_key(credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)) -> Dict[str, Any]:
"""Verify API key and return user info"""
if not credentials:
raise HTTPException(status_code=401, detail="API key required")
# Mock API key validation - replace with real implementation
api_key = credentials.credentials
if not api_key.startswith("ys_"):
raise HTTPException(status_code=401, detail="Invalid API key format")
# Mock user info - replace with database lookup
return {
"user_id": "user_" + api_key[-8:],
"api_key_id": "key_" + api_key[-8:],
"rate_limit": 1000,
"tier": "pro" if "pro" in api_key else "free"
}
async def check_rate_limit(user_info: Dict = Depends(verify_api_key)) -> Dict[str, Any]:
"""Check and update rate limiting"""
# Mock rate limiting - replace with Redis implementation
remaining = 995 # Mock remaining requests
reset_time = datetime.now() + timedelta(hours=1)
if remaining <= 0:
raise HTTPException(
status_code=429,
detail="Rate limit exceeded",
headers={"Retry-After": "3600"}
)
return {
**user_info,
"rate_limit_remaining": remaining,
"rate_limit_reset": reset_time
}
# Enhanced API Endpoints
@router.get("/health", summary="Health check with detailed status")
async def enhanced_health_check():
"""Enhanced health check with service status"""
try:
# Check service availability
services_status = {
"dual_transcript_service": True, # Check actual service
"batch_processing_service": True, # Check actual service
"database": True, # Check database connection
"redis": True, # Check Redis connection
"webhook_service": True, # Check webhook service
}
overall_healthy = all(services_status.values())
return {
"status": "healthy" if overall_healthy else "degraded",
"timestamp": datetime.now().isoformat(),
"version": "4.2.0",
"services": services_status,
"uptime_seconds": 3600, # Mock uptime
"requests_per_minute": 45, # Mock metric
}
except Exception as e:
raise HTTPException(status_code=503, detail=f"Service unavailable: {str(e)}")
@router.post("/transcript/extract",
summary="Extract transcript with enhanced options",
response_model=EnhancedJobResponse)
async def enhanced_transcript_extraction(
request: EnhancedTranscriptRequest,
background_tasks: BackgroundTasks,
user_info: Dict = Depends(check_rate_limit)
):
"""Enhanced transcript extraction with priority, webhooks, and quality analysis"""
job_id = str(uuid4())
try:
# Create job with enhanced metadata
job_metadata = {
"user_id": user_info["user_id"],
"video_url": str(request.video_url),
"transcript_source": request.transcript_source.value,
"priority": request.priority.value,
"tags": request.tags,
"custom_prompt": request.custom_prompt,
"include_quality_analysis": request.include_quality_analysis
}
# Start background processing
background_tasks.add_task(
process_enhanced_transcript,
job_id=job_id,
request=request,
user_info=user_info
)
# Calculate estimated completion based on priority
priority_multiplier = {
ProcessingPriority.URGENT: 0.5,
ProcessingPriority.HIGH: 0.7,
ProcessingPriority.NORMAL: 1.0,
ProcessingPriority.LOW: 1.5
}
base_time = 30 if request.transcript_source == TranscriptSource.YOUTUBE else 120
estimated_seconds = base_time * priority_multiplier[request.priority]
estimated_completion = datetime.now() + timedelta(seconds=estimated_seconds)
return EnhancedJobResponse(
job_id=job_id,
status="queued",
priority=request.priority,
created_at=datetime.now(),
estimated_completion=estimated_completion,
progress_percentage=0.0,
current_stage="queued",
webhook_url=str(request.webhook_url) if request.webhook_url else None,
metadata=job_metadata
)
except Exception as e:
logger.error(f"Enhanced transcript extraction failed: {e}")
raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")
@router.post("/batch/process",
summary="Batch process multiple videos",
response_model=Dict[str, Any])
async def enhanced_batch_processing(
request: BatchProcessingRequest,
background_tasks: BackgroundTasks,
user_info: Dict = Depends(check_rate_limit)
):
"""Enhanced batch processing with parallel execution and progress tracking"""
batch_id = str(uuid4())
try:
# Validate batch size limits based on user tier
max_batch_size = 1000 if user_info["tier"] == "pro" else 100
if len(request.video_urls) > max_batch_size:
raise HTTPException(
status_code=400,
detail=f"Batch size exceeds limit. Max: {max_batch_size} for {user_info['tier']} tier"
)
# Create batch job
batch_metadata = {
"user_id": user_info["user_id"],
"batch_name": request.batch_name,
"video_count": len(request.video_urls),
"transcript_source": request.transcript_source.value,
"priority": request.priority.value,
"parallel_processing": request.parallel_processing,
"max_concurrent_jobs": request.max_concurrent_jobs
}
# Start background batch processing
background_tasks.add_task(
process_enhanced_batch,
batch_id=batch_id,
request=request,
user_info=user_info
)
# Calculate estimated completion
job_time = 30 if request.transcript_source == TranscriptSource.YOUTUBE else 120
if request.parallel_processing:
total_time = (len(request.video_urls) / request.max_concurrent_jobs) * job_time
else:
total_time = len(request.video_urls) * job_time
estimated_completion = datetime.now() + timedelta(seconds=total_time)
return {
"batch_id": batch_id,
"status": "queued",
"video_count": len(request.video_urls),
"priority": request.priority.value,
"estimated_completion": estimated_completion.isoformat(),
"parallel_processing": request.parallel_processing,
"webhook_url": str(request.webhook_url) if request.webhook_url else None,
"metadata": batch_metadata
}
except Exception as e:
logger.error(f"Enhanced batch processing failed: {e}")
raise HTTPException(status_code=500, detail=f"Batch processing failed: {str(e)}")
@router.get("/job/{job_id}",
summary="Get enhanced job status",
response_model=EnhancedJobResponse)
async def get_enhanced_job_status(
job_id: str,
user_info: Dict = Depends(verify_api_key)
):
"""Get detailed job status with progress and metadata"""
try:
# Mock job status - replace with actual job lookup
mock_job = {
"job_id": job_id,
"status": "processing",
"priority": ProcessingPriority.NORMAL,
"created_at": datetime.now() - timedelta(minutes=2),
"estimated_completion": datetime.now() + timedelta(minutes=3),
"progress_percentage": 65.0,
"current_stage": "generating_summary",
"webhook_url": None,
"metadata": {
"user_id": user_info["user_id"],
"processing_time_elapsed": 120,
"estimated_time_remaining": 180
}
}
return EnhancedJobResponse(**mock_job)
except Exception as e:
logger.error(f"Job status lookup failed: {e}")
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
@router.get("/usage/stats",
summary="Get API usage statistics",
response_model=APIUsageStats)
async def get_usage_statistics(
user_info: Dict = Depends(verify_api_key)
):
"""Get detailed API usage statistics for the authenticated user"""
try:
# Mock usage stats - replace with actual database queries
return APIUsageStats(
total_requests=1250,
requests_today=45,
requests_this_month=890,
average_response_time_ms=245.5,
success_rate=0.987,
rate_limit_remaining=955,
quota_reset_time=datetime.now() + timedelta(hours=1)
)
except Exception as e:
logger.error(f"Usage statistics failed: {e}")
raise HTTPException(status_code=500, detail=f"Statistics unavailable: {str(e)}")
@router.get("/jobs/stream",
summary="Stream job updates via Server-Sent Events")
async def stream_job_updates(
user_info: Dict = Depends(verify_api_key)
):
"""Stream real-time job updates using Server-Sent Events"""
async def generate_events():
"""Generate SSE events for job updates"""
try:
while True:
# Mock event - replace with actual job update logic
event_data = {
"event": "job_update",
"job_id": "mock_job_123",
"status": "processing",
"progress": 75.0,
"timestamp": datetime.now().isoformat()
}
yield f"data: {json.dumps(event_data)}\n\n"
await asyncio.sleep(2) # Send updates every 2 seconds
except asyncio.CancelledError:
logger.info("SSE stream cancelled")
yield f"data: {json.dumps({'event': 'stream_closed'})}\n\n"
return StreamingResponse(
generate_events(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Headers": "Cache-Control"
}
)
# Background processing functions
async def process_enhanced_transcript(job_id: str, request: EnhancedTranscriptRequest, user_info: Dict):
"""Background task for enhanced transcript processing"""
try:
logger.info(f"Starting enhanced transcript processing for job {job_id}")
# Mock processing stages
stages = ["downloading", "extracting", "analyzing", "generating", "completed"]
for i, stage in enumerate(stages):
# Mock processing delay
await asyncio.sleep(2)
progress = (i + 1) / len(stages) * 100
logger.info(f"Job {job_id} - Stage: {stage}, Progress: {progress}%")
# Send webhook notification if configured
if request.webhook_url:
await send_webhook_notification(
url=str(request.webhook_url),
event_type=WebhookEvent.JOB_PROGRESS,
data={
"job_id": job_id,
"stage": stage,
"progress": progress,
"timestamp": datetime.now().isoformat()
}
)
# Final completion webhook
if request.webhook_url:
await send_webhook_notification(
url=str(request.webhook_url),
event_type=WebhookEvent.JOB_COMPLETED,
data={
"job_id": job_id,
"status": "completed",
"result_url": f"/api/v2/job/{job_id}/result",
"timestamp": datetime.now().isoformat()
}
)
logger.info(f"Enhanced transcript processing completed for job {job_id}")
except Exception as e:
logger.error(f"Enhanced transcript processing failed for job {job_id}: {e}")
# Send failure webhook
if request.webhook_url:
await send_webhook_notification(
url=str(request.webhook_url),
event_type=WebhookEvent.JOB_FAILED,
data={
"job_id": job_id,
"error": str(e),
"timestamp": datetime.now().isoformat()
}
)
async def process_enhanced_batch(batch_id: str, request: BatchProcessingRequest, user_info: Dict):
"""Background task for enhanced batch processing"""
try:
logger.info(f"Starting enhanced batch processing for batch {batch_id}")
if request.parallel_processing:
# Process in parallel batches
semaphore = asyncio.Semaphore(request.max_concurrent_jobs)
tasks = []
for i, video_url in enumerate(request.video_urls):
task = process_single_video_in_batch(
semaphore, batch_id, str(video_url), i, request
)
tasks.append(task)
# Wait for all tasks to complete
await asyncio.gather(*tasks, return_exceptions=True)
else:
# Process sequentially
for i, video_url in enumerate(request.video_urls):
await process_single_video_in_batch(
None, batch_id, str(video_url), i, request
)
# Send batch completion webhook
if request.webhook_url:
await send_webhook_notification(
url=str(request.webhook_url),
event_type=WebhookEvent.BATCH_COMPLETED,
data={
"batch_id": batch_id,
"status": "completed",
"total_videos": len(request.video_urls),
"timestamp": datetime.now().isoformat()
}
)
logger.info(f"Enhanced batch processing completed for batch {batch_id}")
except Exception as e:
logger.error(f"Enhanced batch processing failed for batch {batch_id}: {e}")
async def process_single_video_in_batch(semaphore: Optional[asyncio.Semaphore],
batch_id: str, video_url: str, index: int,
request: BatchProcessingRequest):
"""Process a single video within a batch"""
if semaphore:
async with semaphore:
await _process_video(batch_id, video_url, index, request)
else:
await _process_video(batch_id, video_url, index, request)
async def _process_video(batch_id: str, video_url: str, index: int, request: BatchProcessingRequest):
"""Internal video processing logic"""
try:
logger.info(f"Processing video {index + 1}/{len(request.video_urls)} in batch {batch_id}")
# Mock processing time
processing_time = 5 if request.transcript_source == TranscriptSource.YOUTUBE else 15
await asyncio.sleep(processing_time)
logger.info(f"Completed video {index + 1} in batch {batch_id}")
except Exception as e:
logger.error(f"Failed to process video {index + 1} in batch {batch_id}: {e}")
async def send_webhook_notification(url: str, event_type: WebhookEvent, data: Dict[str, Any]):
"""Send webhook notification"""
try:
import httpx
payload = {
"event": event_type.value,
"timestamp": datetime.now().isoformat(),
"data": data
}
# Mock webhook sending - replace with actual HTTP client
logger.info(f"Sending webhook to {url}: {event_type.value}")
# In production, use actual HTTP client:
# async with httpx.AsyncClient() as client:
# response = await client.post(url, json=payload, timeout=10)
# logger.info(f"Webhook sent successfully: {response.status_code}")
except Exception as e:
logger.error(f"Failed to send webhook to {url}: {e}")
# Export router
__all__ = ["router"]