538 lines
20 KiB
Python
538 lines
20 KiB
Python
"""
|
|
Enhanced API endpoints for YouTube Summarizer Developer Platform
|
|
Extends existing API with advanced developer features, batch processing, and webhooks
|
|
"""
|
|
|
|
from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks, Query, Header
|
|
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
|
from fastapi.responses import StreamingResponse
|
|
from pydantic import BaseModel, Field, HttpUrl
|
|
from typing import List, Optional, Dict, Any, Literal, Union
|
|
from datetime import datetime, timedelta
|
|
from uuid import UUID, uuid4
|
|
import json
|
|
import asyncio
|
|
import logging
|
|
from enum import Enum
|
|
|
|
# Import existing services
|
|
try:
|
|
from ..services.dual_transcript_service import DualTranscriptService
|
|
from ..services.batch_processing_service import BatchProcessingService
|
|
from ..models.transcript import TranscriptSource, WhisperModelSize, DualTranscriptResult
|
|
from ..models.batch import BatchJob, BatchJobStatus
|
|
except ImportError:
|
|
# Fallback for testing
|
|
pass
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Authentication
|
|
security = HTTPBearer(auto_error=False)
|
|
|
|
# Create enhanced API router
|
|
router = APIRouter(prefix="/api/v2", tags=["enhanced-api"])
|
|
|
|
# Enhanced Models
|
|
class APIKeyInfo(BaseModel):
|
|
id: str
|
|
name: str
|
|
rate_limit_per_hour: int
|
|
created_at: datetime
|
|
last_used_at: Optional[datetime]
|
|
usage_count: int
|
|
is_active: bool
|
|
|
|
class ProcessingPriority(str, Enum):
|
|
LOW = "low"
|
|
NORMAL = "normal"
|
|
HIGH = "high"
|
|
URGENT = "urgent"
|
|
|
|
class WebhookEvent(str, Enum):
|
|
JOB_STARTED = "job.started"
|
|
JOB_PROGRESS = "job.progress"
|
|
JOB_COMPLETED = "job.completed"
|
|
JOB_FAILED = "job.failed"
|
|
BATCH_COMPLETED = "batch.completed"
|
|
|
|
class EnhancedTranscriptRequest(BaseModel):
|
|
video_url: HttpUrl = Field(..., description="YouTube video URL")
|
|
transcript_source: TranscriptSource = Field(default=TranscriptSource.YOUTUBE, description="Transcript source")
|
|
whisper_model_size: Optional[WhisperModelSize] = Field(default=WhisperModelSize.SMALL, description="Whisper model size")
|
|
priority: ProcessingPriority = Field(default=ProcessingPriority.NORMAL, description="Processing priority")
|
|
webhook_url: Optional[HttpUrl] = Field(None, description="Webhook URL for notifications")
|
|
include_quality_analysis: bool = Field(default=True, description="Include transcript quality analysis")
|
|
custom_prompt: Optional[str] = Field(None, description="Custom processing prompt")
|
|
tags: List[str] = Field(default_factory=list, description="Custom tags for organization")
|
|
|
|
class BatchProcessingRequest(BaseModel):
|
|
video_urls: List[HttpUrl] = Field(..., min_items=1, max_items=1000, description="List of video URLs")
|
|
transcript_source: TranscriptSource = Field(default=TranscriptSource.YOUTUBE, description="Transcript source for all videos")
|
|
batch_name: str = Field(..., description="Batch job name")
|
|
priority: ProcessingPriority = Field(default=ProcessingPriority.NORMAL, description="Processing priority")
|
|
webhook_url: Optional[HttpUrl] = Field(None, description="Webhook URL for batch notifications")
|
|
parallel_processing: bool = Field(default=False, description="Enable parallel processing")
|
|
max_concurrent_jobs: int = Field(default=5, description="Maximum concurrent jobs")
|
|
|
|
class EnhancedJobResponse(BaseModel):
|
|
job_id: str
|
|
status: str
|
|
priority: ProcessingPriority
|
|
created_at: datetime
|
|
estimated_completion: Optional[datetime]
|
|
progress_percentage: float
|
|
current_stage: str
|
|
webhook_url: Optional[str]
|
|
metadata: Dict[str, Any]
|
|
|
|
class APIUsageStats(BaseModel):
|
|
total_requests: int
|
|
requests_today: int
|
|
requests_this_month: int
|
|
average_response_time_ms: float
|
|
success_rate: float
|
|
rate_limit_remaining: int
|
|
quota_reset_time: datetime
|
|
|
|
class WebhookConfiguration(BaseModel):
|
|
url: HttpUrl
|
|
events: List[WebhookEvent]
|
|
secret: Optional[str] = Field(None, description="Webhook secret for verification")
|
|
is_active: bool = Field(default=True)
|
|
|
|
# Mock authentication and rate limiting (to be replaced with real implementation)
|
|
async def verify_api_key(credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)) -> Dict[str, Any]:
|
|
"""Verify API key and return user info"""
|
|
if not credentials:
|
|
raise HTTPException(status_code=401, detail="API key required")
|
|
|
|
# Mock API key validation - replace with real implementation
|
|
api_key = credentials.credentials
|
|
if not api_key.startswith("ys_"):
|
|
raise HTTPException(status_code=401, detail="Invalid API key format")
|
|
|
|
# Mock user info - replace with database lookup
|
|
return {
|
|
"user_id": "user_" + api_key[-8:],
|
|
"api_key_id": "key_" + api_key[-8:],
|
|
"rate_limit": 1000,
|
|
"tier": "pro" if "pro" in api_key else "free"
|
|
}
|
|
|
|
async def check_rate_limit(user_info: Dict = Depends(verify_api_key)) -> Dict[str, Any]:
|
|
"""Check and update rate limiting"""
|
|
# Mock rate limiting - replace with Redis implementation
|
|
remaining = 995 # Mock remaining requests
|
|
reset_time = datetime.now() + timedelta(hours=1)
|
|
|
|
if remaining <= 0:
|
|
raise HTTPException(
|
|
status_code=429,
|
|
detail="Rate limit exceeded",
|
|
headers={"Retry-After": "3600"}
|
|
)
|
|
|
|
return {
|
|
**user_info,
|
|
"rate_limit_remaining": remaining,
|
|
"rate_limit_reset": reset_time
|
|
}
|
|
|
|
# Enhanced API Endpoints
|
|
|
|
@router.get("/health", summary="Health check with detailed status")
|
|
async def enhanced_health_check():
|
|
"""Enhanced health check with service status"""
|
|
try:
|
|
# Check service availability
|
|
services_status = {
|
|
"dual_transcript_service": True, # Check actual service
|
|
"batch_processing_service": True, # Check actual service
|
|
"database": True, # Check database connection
|
|
"redis": True, # Check Redis connection
|
|
"webhook_service": True, # Check webhook service
|
|
}
|
|
|
|
overall_healthy = all(services_status.values())
|
|
|
|
return {
|
|
"status": "healthy" if overall_healthy else "degraded",
|
|
"timestamp": datetime.now().isoformat(),
|
|
"version": "4.2.0",
|
|
"services": services_status,
|
|
"uptime_seconds": 3600, # Mock uptime
|
|
"requests_per_minute": 45, # Mock metric
|
|
}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=503, detail=f"Service unavailable: {str(e)}")
|
|
|
|
@router.post("/transcript/extract",
|
|
summary="Extract transcript with enhanced options",
|
|
response_model=EnhancedJobResponse)
|
|
async def enhanced_transcript_extraction(
|
|
request: EnhancedTranscriptRequest,
|
|
background_tasks: BackgroundTasks,
|
|
user_info: Dict = Depends(check_rate_limit)
|
|
):
|
|
"""Enhanced transcript extraction with priority, webhooks, and quality analysis"""
|
|
|
|
job_id = str(uuid4())
|
|
|
|
try:
|
|
# Create job with enhanced metadata
|
|
job_metadata = {
|
|
"user_id": user_info["user_id"],
|
|
"video_url": str(request.video_url),
|
|
"transcript_source": request.transcript_source.value,
|
|
"priority": request.priority.value,
|
|
"tags": request.tags,
|
|
"custom_prompt": request.custom_prompt,
|
|
"include_quality_analysis": request.include_quality_analysis
|
|
}
|
|
|
|
# Start background processing
|
|
background_tasks.add_task(
|
|
process_enhanced_transcript,
|
|
job_id=job_id,
|
|
request=request,
|
|
user_info=user_info
|
|
)
|
|
|
|
# Calculate estimated completion based on priority
|
|
priority_multiplier = {
|
|
ProcessingPriority.URGENT: 0.5,
|
|
ProcessingPriority.HIGH: 0.7,
|
|
ProcessingPriority.NORMAL: 1.0,
|
|
ProcessingPriority.LOW: 1.5
|
|
}
|
|
|
|
base_time = 30 if request.transcript_source == TranscriptSource.YOUTUBE else 120
|
|
estimated_seconds = base_time * priority_multiplier[request.priority]
|
|
estimated_completion = datetime.now() + timedelta(seconds=estimated_seconds)
|
|
|
|
return EnhancedJobResponse(
|
|
job_id=job_id,
|
|
status="queued",
|
|
priority=request.priority,
|
|
created_at=datetime.now(),
|
|
estimated_completion=estimated_completion,
|
|
progress_percentage=0.0,
|
|
current_stage="queued",
|
|
webhook_url=str(request.webhook_url) if request.webhook_url else None,
|
|
metadata=job_metadata
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Enhanced transcript extraction failed: {e}")
|
|
raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")
|
|
|
|
@router.post("/batch/process",
|
|
summary="Batch process multiple videos",
|
|
response_model=Dict[str, Any])
|
|
async def enhanced_batch_processing(
|
|
request: BatchProcessingRequest,
|
|
background_tasks: BackgroundTasks,
|
|
user_info: Dict = Depends(check_rate_limit)
|
|
):
|
|
"""Enhanced batch processing with parallel execution and progress tracking"""
|
|
|
|
batch_id = str(uuid4())
|
|
|
|
try:
|
|
# Validate batch size limits based on user tier
|
|
max_batch_size = 1000 if user_info["tier"] == "pro" else 100
|
|
if len(request.video_urls) > max_batch_size:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Batch size exceeds limit. Max: {max_batch_size} for {user_info['tier']} tier"
|
|
)
|
|
|
|
# Create batch job
|
|
batch_metadata = {
|
|
"user_id": user_info["user_id"],
|
|
"batch_name": request.batch_name,
|
|
"video_count": len(request.video_urls),
|
|
"transcript_source": request.transcript_source.value,
|
|
"priority": request.priority.value,
|
|
"parallel_processing": request.parallel_processing,
|
|
"max_concurrent_jobs": request.max_concurrent_jobs
|
|
}
|
|
|
|
# Start background batch processing
|
|
background_tasks.add_task(
|
|
process_enhanced_batch,
|
|
batch_id=batch_id,
|
|
request=request,
|
|
user_info=user_info
|
|
)
|
|
|
|
# Calculate estimated completion
|
|
job_time = 30 if request.transcript_source == TranscriptSource.YOUTUBE else 120
|
|
if request.parallel_processing:
|
|
total_time = (len(request.video_urls) / request.max_concurrent_jobs) * job_time
|
|
else:
|
|
total_time = len(request.video_urls) * job_time
|
|
|
|
estimated_completion = datetime.now() + timedelta(seconds=total_time)
|
|
|
|
return {
|
|
"batch_id": batch_id,
|
|
"status": "queued",
|
|
"video_count": len(request.video_urls),
|
|
"priority": request.priority.value,
|
|
"estimated_completion": estimated_completion.isoformat(),
|
|
"parallel_processing": request.parallel_processing,
|
|
"webhook_url": str(request.webhook_url) if request.webhook_url else None,
|
|
"metadata": batch_metadata
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Enhanced batch processing failed: {e}")
|
|
raise HTTPException(status_code=500, detail=f"Batch processing failed: {str(e)}")
|
|
|
|
@router.get("/job/{job_id}",
|
|
summary="Get enhanced job status",
|
|
response_model=EnhancedJobResponse)
|
|
async def get_enhanced_job_status(
|
|
job_id: str,
|
|
user_info: Dict = Depends(verify_api_key)
|
|
):
|
|
"""Get detailed job status with progress and metadata"""
|
|
|
|
try:
|
|
# Mock job status - replace with actual job lookup
|
|
mock_job = {
|
|
"job_id": job_id,
|
|
"status": "processing",
|
|
"priority": ProcessingPriority.NORMAL,
|
|
"created_at": datetime.now() - timedelta(minutes=2),
|
|
"estimated_completion": datetime.now() + timedelta(minutes=3),
|
|
"progress_percentage": 65.0,
|
|
"current_stage": "generating_summary",
|
|
"webhook_url": None,
|
|
"metadata": {
|
|
"user_id": user_info["user_id"],
|
|
"processing_time_elapsed": 120,
|
|
"estimated_time_remaining": 180
|
|
}
|
|
}
|
|
|
|
return EnhancedJobResponse(**mock_job)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Job status lookup failed: {e}")
|
|
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
|
|
|
|
@router.get("/usage/stats",
|
|
summary="Get API usage statistics",
|
|
response_model=APIUsageStats)
|
|
async def get_usage_statistics(
|
|
user_info: Dict = Depends(verify_api_key)
|
|
):
|
|
"""Get detailed API usage statistics for the authenticated user"""
|
|
|
|
try:
|
|
# Mock usage stats - replace with actual database queries
|
|
return APIUsageStats(
|
|
total_requests=1250,
|
|
requests_today=45,
|
|
requests_this_month=890,
|
|
average_response_time_ms=245.5,
|
|
success_rate=0.987,
|
|
rate_limit_remaining=955,
|
|
quota_reset_time=datetime.now() + timedelta(hours=1)
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Usage statistics failed: {e}")
|
|
raise HTTPException(status_code=500, detail=f"Statistics unavailable: {str(e)}")
|
|
|
|
@router.get("/jobs/stream",
|
|
summary="Stream job updates via Server-Sent Events")
|
|
async def stream_job_updates(
|
|
user_info: Dict = Depends(verify_api_key)
|
|
):
|
|
"""Stream real-time job updates using Server-Sent Events"""
|
|
|
|
async def generate_events():
|
|
"""Generate SSE events for job updates"""
|
|
try:
|
|
while True:
|
|
# Mock event - replace with actual job update logic
|
|
event_data = {
|
|
"event": "job_update",
|
|
"job_id": "mock_job_123",
|
|
"status": "processing",
|
|
"progress": 75.0,
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
|
|
yield f"data: {json.dumps(event_data)}\n\n"
|
|
await asyncio.sleep(2) # Send updates every 2 seconds
|
|
|
|
except asyncio.CancelledError:
|
|
logger.info("SSE stream cancelled")
|
|
yield f"data: {json.dumps({'event': 'stream_closed'})}\n\n"
|
|
|
|
return StreamingResponse(
|
|
generate_events(),
|
|
media_type="text/event-stream",
|
|
headers={
|
|
"Cache-Control": "no-cache",
|
|
"Connection": "keep-alive",
|
|
"Access-Control-Allow-Origin": "*",
|
|
"Access-Control-Allow-Headers": "Cache-Control"
|
|
}
|
|
)
|
|
|
|
# Background processing functions
|
|
async def process_enhanced_transcript(job_id: str, request: EnhancedTranscriptRequest, user_info: Dict):
|
|
"""Background task for enhanced transcript processing"""
|
|
try:
|
|
logger.info(f"Starting enhanced transcript processing for job {job_id}")
|
|
|
|
# Mock processing stages
|
|
stages = ["downloading", "extracting", "analyzing", "generating", "completed"]
|
|
|
|
for i, stage in enumerate(stages):
|
|
# Mock processing delay
|
|
await asyncio.sleep(2)
|
|
|
|
progress = (i + 1) / len(stages) * 100
|
|
logger.info(f"Job {job_id} - Stage: {stage}, Progress: {progress}%")
|
|
|
|
# Send webhook notification if configured
|
|
if request.webhook_url:
|
|
await send_webhook_notification(
|
|
url=str(request.webhook_url),
|
|
event_type=WebhookEvent.JOB_PROGRESS,
|
|
data={
|
|
"job_id": job_id,
|
|
"stage": stage,
|
|
"progress": progress,
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
)
|
|
|
|
# Final completion webhook
|
|
if request.webhook_url:
|
|
await send_webhook_notification(
|
|
url=str(request.webhook_url),
|
|
event_type=WebhookEvent.JOB_COMPLETED,
|
|
data={
|
|
"job_id": job_id,
|
|
"status": "completed",
|
|
"result_url": f"/api/v2/job/{job_id}/result",
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
)
|
|
|
|
logger.info(f"Enhanced transcript processing completed for job {job_id}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Enhanced transcript processing failed for job {job_id}: {e}")
|
|
|
|
# Send failure webhook
|
|
if request.webhook_url:
|
|
await send_webhook_notification(
|
|
url=str(request.webhook_url),
|
|
event_type=WebhookEvent.JOB_FAILED,
|
|
data={
|
|
"job_id": job_id,
|
|
"error": str(e),
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
)
|
|
|
|
async def process_enhanced_batch(batch_id: str, request: BatchProcessingRequest, user_info: Dict):
|
|
"""Background task for enhanced batch processing"""
|
|
try:
|
|
logger.info(f"Starting enhanced batch processing for batch {batch_id}")
|
|
|
|
if request.parallel_processing:
|
|
# Process in parallel batches
|
|
semaphore = asyncio.Semaphore(request.max_concurrent_jobs)
|
|
tasks = []
|
|
|
|
for i, video_url in enumerate(request.video_urls):
|
|
task = process_single_video_in_batch(
|
|
semaphore, batch_id, str(video_url), i, request
|
|
)
|
|
tasks.append(task)
|
|
|
|
# Wait for all tasks to complete
|
|
await asyncio.gather(*tasks, return_exceptions=True)
|
|
else:
|
|
# Process sequentially
|
|
for i, video_url in enumerate(request.video_urls):
|
|
await process_single_video_in_batch(
|
|
None, batch_id, str(video_url), i, request
|
|
)
|
|
|
|
# Send batch completion webhook
|
|
if request.webhook_url:
|
|
await send_webhook_notification(
|
|
url=str(request.webhook_url),
|
|
event_type=WebhookEvent.BATCH_COMPLETED,
|
|
data={
|
|
"batch_id": batch_id,
|
|
"status": "completed",
|
|
"total_videos": len(request.video_urls),
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
)
|
|
|
|
logger.info(f"Enhanced batch processing completed for batch {batch_id}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Enhanced batch processing failed for batch {batch_id}: {e}")
|
|
|
|
async def process_single_video_in_batch(semaphore: Optional[asyncio.Semaphore],
|
|
batch_id: str, video_url: str, index: int,
|
|
request: BatchProcessingRequest):
|
|
"""Process a single video within a batch"""
|
|
if semaphore:
|
|
async with semaphore:
|
|
await _process_video(batch_id, video_url, index, request)
|
|
else:
|
|
await _process_video(batch_id, video_url, index, request)
|
|
|
|
async def _process_video(batch_id: str, video_url: str, index: int, request: BatchProcessingRequest):
|
|
"""Internal video processing logic"""
|
|
try:
|
|
logger.info(f"Processing video {index + 1}/{len(request.video_urls)} in batch {batch_id}")
|
|
|
|
# Mock processing time
|
|
processing_time = 5 if request.transcript_source == TranscriptSource.YOUTUBE else 15
|
|
await asyncio.sleep(processing_time)
|
|
|
|
logger.info(f"Completed video {index + 1} in batch {batch_id}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to process video {index + 1} in batch {batch_id}: {e}")
|
|
|
|
async def send_webhook_notification(url: str, event_type: WebhookEvent, data: Dict[str, Any]):
|
|
"""Send webhook notification"""
|
|
try:
|
|
import httpx
|
|
|
|
payload = {
|
|
"event": event_type.value,
|
|
"timestamp": datetime.now().isoformat(),
|
|
"data": data
|
|
}
|
|
|
|
# Mock webhook sending - replace with actual HTTP client
|
|
logger.info(f"Sending webhook to {url}: {event_type.value}")
|
|
|
|
# In production, use actual HTTP client:
|
|
# async with httpx.AsyncClient() as client:
|
|
# response = await client.post(url, json=payload, timeout=10)
|
|
# logger.info(f"Webhook sent successfully: {response.status_code}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to send webhook to {url}: {e}")
|
|
|
|
# Export router
|
|
__all__ = ["router"] |