youtube-summarizer/backend/api/enhanced.py

"""
Enhanced API endpoints for YouTube Summarizer Developer Platform
Extends existing API with advanced developer features, batch processing, and webhooks
"""

from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks, Query, Header
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field, HttpUrl
from typing import List, Optional, Dict, Any, Literal, Union
from datetime import datetime, timedelta
from uuid import UUID, uuid4
import json
import asyncio
import logging
from enum import Enum

# Import existing services
try:
    from ..services.dual_transcript_service import DualTranscriptService
    from ..services.batch_processing_service import BatchProcessingService
    from ..models.transcript import TranscriptSource, WhisperModelSize, DualTranscriptResult
    from ..models.batch import BatchJob, BatchJobStatus
except ImportError:
    # Fallback for testing
    pass

logger = logging.getLogger(__name__)

# Authentication
security = HTTPBearer(auto_error=False)

# Create enhanced API router
router = APIRouter(prefix="/api/v2", tags=["enhanced-api"])

# Enhanced Models
class APIKeyInfo(BaseModel):
    id: str
    name: str
    rate_limit_per_hour: int
    created_at: datetime
    last_used_at: Optional[datetime]
    usage_count: int
    is_active: bool

class ProcessingPriority(str, Enum):
    LOW = "low"
    NORMAL = "normal"
    HIGH = "high"
    URGENT = "urgent"

class WebhookEvent(str, Enum):
    JOB_STARTED = "job.started"
    JOB_PROGRESS = "job.progress"
    JOB_COMPLETED = "job.completed"
    JOB_FAILED = "job.failed"
    BATCH_COMPLETED = "batch.completed"

class EnhancedTranscriptRequest(BaseModel):
    video_url: HttpUrl = Field(..., description="YouTube video URL")
    transcript_source: TranscriptSource = Field(default=TranscriptSource.YOUTUBE, description="Transcript source")
    whisper_model_size: Optional[WhisperModelSize] = Field(default=WhisperModelSize.SMALL, description="Whisper model size")
    priority: ProcessingPriority = Field(default=ProcessingPriority.NORMAL, description="Processing priority")
    webhook_url: Optional[HttpUrl] = Field(None, description="Webhook URL for notifications")
    include_quality_analysis: bool = Field(default=True, description="Include transcript quality analysis")
    custom_prompt: Optional[str] = Field(None, description="Custom processing prompt")
    tags: List[str] = Field(default_factory=list, description="Custom tags for organization")

class BatchProcessingRequest(BaseModel):
    video_urls: List[HttpUrl] = Field(..., min_items=1, max_items=1000, description="List of video URLs")
    transcript_source: TranscriptSource = Field(default=TranscriptSource.YOUTUBE, description="Transcript source for all videos")
    batch_name: str = Field(..., description="Batch job name")
    priority: ProcessingPriority = Field(default=ProcessingPriority.NORMAL, description="Processing priority")
    webhook_url: Optional[HttpUrl] = Field(None, description="Webhook URL for batch notifications")
    parallel_processing: bool = Field(default=False, description="Enable parallel processing")
    max_concurrent_jobs: int = Field(default=5, description="Maximum concurrent jobs")

class EnhancedJobResponse(BaseModel):
    job_id: str
    status: str
    priority: ProcessingPriority
    created_at: datetime
    estimated_completion: Optional[datetime]
    progress_percentage: float
    current_stage: str
    webhook_url: Optional[str]
    metadata: Dict[str, Any]

class APIUsageStats(BaseModel):
    total_requests: int
    requests_today: int
    requests_this_month: int
    average_response_time_ms: float
    success_rate: float
    rate_limit_remaining: int
    quota_reset_time: datetime

class WebhookConfiguration(BaseModel):
    url: HttpUrl
    events: List[WebhookEvent]
    secret: Optional[str] = Field(None, description="Webhook secret for verification")
    is_active: bool = Field(default=True)

# Mock authentication and rate limiting (to be replaced with real implementation)
async def verify_api_key(credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)) -> Dict[str, Any]:
    """Verify API key and return user info"""
    if not credentials:
        raise HTTPException(status_code=401, detail="API key required")

    # Mock API key validation - replace with real implementation
    api_key = credentials.credentials
    if not api_key.startswith("ys_"):
        raise HTTPException(status_code=401, detail="Invalid API key format")

    # Mock user info - replace with database lookup
    return {
        "user_id": "user_" + api_key[-8:],
        "api_key_id": "key_" + api_key[-8:],
        "rate_limit": 1000,
        "tier": "pro" if "pro" in api_key else "free"
    }

async def check_rate_limit(user_info: Dict = Depends(verify_api_key)) -> Dict[str, Any]:
    """Check and update rate limiting"""
    # Mock rate limiting - replace with Redis implementation
    remaining = 995  # Mock remaining requests
    reset_time = datetime.now() + timedelta(hours=1)

    if remaining <= 0:
        raise HTTPException(
            status_code=429,
            detail="Rate limit exceeded",
            headers={"Retry-After": "3600"}
        )

    return {
        **user_info,
        "rate_limit_remaining": remaining,
        "rate_limit_reset": reset_time
    }

# Enhanced API Endpoints

@router.get("/health", summary="Health check with detailed status")
async def enhanced_health_check():
    """Enhanced health check with service status"""
    try:
        # Check service availability
        services_status = {
            "dual_transcript_service": True,  # Check actual service
            "batch_processing_service": True,  # Check actual service
            "database": True,  # Check database connection
            "redis": True,  # Check Redis connection
            "webhook_service": True,  # Check webhook service
        }

        overall_healthy = all(services_status.values())

        return {
            "status": "healthy" if overall_healthy else "degraded",
            "timestamp": datetime.now().isoformat(),
            "version": "4.2.0",
            "services": services_status,
            "uptime_seconds": 3600,  # Mock uptime
            "requests_per_minute": 45,  # Mock metric
        }
    except Exception as e:
        raise HTTPException(status_code=503, detail=f"Service unavailable: {str(e)}")

@router.post("/transcript/extract",
             summary="Extract transcript with enhanced options",
             response_model=EnhancedJobResponse)
async def enhanced_transcript_extraction(
    request: EnhancedTranscriptRequest,
    background_tasks: BackgroundTasks,
    user_info: Dict = Depends(check_rate_limit)
):
    """Enhanced transcript extraction with priority, webhooks, and quality analysis"""

    job_id = str(uuid4())

    try:
        # Create job with enhanced metadata
        job_metadata = {
            "user_id": user_info["user_id"],
            "video_url": str(request.video_url),
            "transcript_source": request.transcript_source.value,
            "priority": request.priority.value,
            "tags": request.tags,
            "custom_prompt": request.custom_prompt,
            "include_quality_analysis": request.include_quality_analysis
        }

        # Start background processing
        background_tasks.add_task(
            process_enhanced_transcript,
            job_id=job_id,
            request=request,
            user_info=user_info
        )

        # Calculate estimated completion based on priority
        priority_multiplier = {
            ProcessingPriority.URGENT: 0.5,
            ProcessingPriority.HIGH: 0.7,
            ProcessingPriority.NORMAL: 1.0,
            ProcessingPriority.LOW: 1.5
        }

        base_time = 30 if request.transcript_source == TranscriptSource.YOUTUBE else 120
        estimated_seconds = base_time * priority_multiplier[request.priority]
        estimated_completion = datetime.now() + timedelta(seconds=estimated_seconds)

        return EnhancedJobResponse(
            job_id=job_id,
            status="queued",
            priority=request.priority,
            created_at=datetime.now(),
            estimated_completion=estimated_completion,
            progress_percentage=0.0,
            current_stage="queued",
            webhook_url=str(request.webhook_url) if request.webhook_url else None,
            metadata=job_metadata
        )

    except Exception as e:
        logger.error(f"Enhanced transcript extraction failed: {e}")
        raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")

@router.post("/batch/process",
             summary="Batch process multiple videos",
             response_model=Dict[str, Any])
async def enhanced_batch_processing(
    request: BatchProcessingRequest,
    background_tasks: BackgroundTasks,
    user_info: Dict = Depends(check_rate_limit)
):
    """Enhanced batch processing with parallel execution and progress tracking"""

    batch_id = str(uuid4())

    try:
        # Validate batch size limits based on user tier
        max_batch_size = 1000 if user_info["tier"] == "pro" else 100
        if len(request.video_urls) > max_batch_size:
            raise HTTPException(
                status_code=400,
                detail=f"Batch size exceeds limit. Max: {max_batch_size} for {user_info['tier']} tier"
            )

        # Create batch job
        batch_metadata = {
            "user_id": user_info["user_id"],
            "batch_name": request.batch_name,
            "video_count": len(request.video_urls),
            "transcript_source": request.transcript_source.value,
            "priority": request.priority.value,
            "parallel_processing": request.parallel_processing,
            "max_concurrent_jobs": request.max_concurrent_jobs
        }

        # Start background batch processing
        background_tasks.add_task(
            process_enhanced_batch,
            batch_id=batch_id,
            request=request,
            user_info=user_info
        )

        # Calculate estimated completion
        job_time = 30 if request.transcript_source == TranscriptSource.YOUTUBE else 120
        if request.parallel_processing:
            total_time = (len(request.video_urls) / request.max_concurrent_jobs) * job_time
        else:
            total_time = len(request.video_urls) * job_time

        estimated_completion = datetime.now() + timedelta(seconds=total_time)

        return {
            "batch_id": batch_id,
            "status": "queued",
            "video_count": len(request.video_urls),
            "priority": request.priority.value,
            "estimated_completion": estimated_completion.isoformat(),
            "parallel_processing": request.parallel_processing,
            "webhook_url": str(request.webhook_url) if request.webhook_url else None,
            "metadata": batch_metadata
        }

    except Exception as e:
        logger.error(f"Enhanced batch processing failed: {e}")
        raise HTTPException(status_code=500, detail=f"Batch processing failed: {str(e)}")

@router.get("/job/{job_id}",
            summary="Get enhanced job status",
            response_model=EnhancedJobResponse)
async def get_enhanced_job_status(
    job_id: str,
    user_info: Dict = Depends(verify_api_key)
):
    """Get detailed job status with progress and metadata"""

    try:
        # Mock job status - replace with actual job lookup
        mock_job = {
            "job_id": job_id,
            "status": "processing",
            "priority": ProcessingPriority.NORMAL,
            "created_at": datetime.now() - timedelta(minutes=2),
            "estimated_completion": datetime.now() + timedelta(minutes=3),
            "progress_percentage": 65.0,
            "current_stage": "generating_summary",
            "webhook_url": None,
            "metadata": {
                "user_id": user_info["user_id"],
                "processing_time_elapsed": 120,
                "estimated_time_remaining": 180
            }
        }

        return EnhancedJobResponse(**mock_job)

    except Exception as e:
        logger.error(f"Job status lookup failed: {e}")
        raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")

@router.get("/usage/stats",
            summary="Get API usage statistics",
            response_model=APIUsageStats)
async def get_usage_statistics(
    user_info: Dict = Depends(verify_api_key)
):
    """Get detailed API usage statistics for the authenticated user"""

    try:
        # Mock usage stats - replace with actual database queries
        return APIUsageStats(
            total_requests=1250,
            requests_today=45,
            requests_this_month=890,
            average_response_time_ms=245.5,
            success_rate=0.987,
            rate_limit_remaining=955,
            quota_reset_time=datetime.now() + timedelta(hours=1)
        )

    except Exception as e:
        logger.error(f"Usage statistics failed: {e}")
        raise HTTPException(status_code=500, detail=f"Statistics unavailable: {str(e)}")

@router.get("/jobs/stream",
            summary="Stream job updates via Server-Sent Events")
async def stream_job_updates(
    user_info: Dict = Depends(verify_api_key)
):
    """Stream real-time job updates using Server-Sent Events"""

    async def generate_events():
        """Generate SSE events for job updates"""
        try:
            while True:
                # Mock event - replace with actual job update logic
                event_data = {
                    "event": "job_update",
                    "job_id": "mock_job_123",
                    "status": "processing",
                    "progress": 75.0,
                    "timestamp": datetime.now().isoformat()
                }

                yield f"data: {json.dumps(event_data)}\n\n"
                await asyncio.sleep(2)  # Send updates every 2 seconds

        except asyncio.CancelledError:
            logger.info("SSE stream cancelled")
            yield f"data: {json.dumps({'event': 'stream_closed'})}\n\n"

    return StreamingResponse(
        generate_events(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "Access-Control-Allow-Origin": "*",
            "Access-Control-Allow-Headers": "Cache-Control"
        }
    )

# Background processing functions
async def process_enhanced_transcript(job_id: str, request: EnhancedTranscriptRequest, user_info: Dict):
    """Background task for enhanced transcript processing"""
    try:
        logger.info(f"Starting enhanced transcript processing for job {job_id}")

        # Mock processing stages
        stages = ["downloading", "extracting", "analyzing", "generating", "completed"]

        for i, stage in enumerate(stages):
            # Mock processing delay
            await asyncio.sleep(2)

            progress = (i + 1) / len(stages) * 100
            logger.info(f"Job {job_id} - Stage: {stage}, Progress: {progress}%")

            # Send webhook notification if configured
            if request.webhook_url:
                await send_webhook_notification(
                    url=str(request.webhook_url),
                    event_type=WebhookEvent.JOB_PROGRESS,
                    data={
                        "job_id": job_id,
                        "stage": stage,
                        "progress": progress,
                        "timestamp": datetime.now().isoformat()
                    }
                )

        # Final completion webhook
        if request.webhook_url:
            await send_webhook_notification(
                url=str(request.webhook_url),
                event_type=WebhookEvent.JOB_COMPLETED,
                data={
                    "job_id": job_id,
                    "status": "completed",
                    "result_url": f"/api/v2/job/{job_id}/result",
                    "timestamp": datetime.now().isoformat()
                }
            )

        logger.info(f"Enhanced transcript processing completed for job {job_id}")

    except Exception as e:
        logger.error(f"Enhanced transcript processing failed for job {job_id}: {e}")

        # Send failure webhook
        if request.webhook_url:
            await send_webhook_notification(
                url=str(request.webhook_url),
                event_type=WebhookEvent.JOB_FAILED,
                data={
                    "job_id": job_id,
                    "error": str(e),
                    "timestamp": datetime.now().isoformat()
                }
            )

async def process_enhanced_batch(batch_id: str, request: BatchProcessingRequest, user_info: Dict):
    """Background task for enhanced batch processing"""
    try:
        logger.info(f"Starting enhanced batch processing for batch {batch_id}")

        if request.parallel_processing:
            # Process in parallel batches
            semaphore = asyncio.Semaphore(request.max_concurrent_jobs)
            tasks = []

            for i, video_url in enumerate(request.video_urls):
                task = process_single_video_in_batch(
                    semaphore, batch_id, str(video_url), i, request
                )
                tasks.append(task)

            # Wait for all tasks to complete
            await asyncio.gather(*tasks, return_exceptions=True)
        else:
            # Process sequentially
            for i, video_url in enumerate(request.video_urls):
                await process_single_video_in_batch(
                    None, batch_id, str(video_url), i, request
                )

        # Send batch completion webhook
        if request.webhook_url:
            await send_webhook_notification(
                url=str(request.webhook_url),
                event_type=WebhookEvent.BATCH_COMPLETED,
                data={
                    "batch_id": batch_id,
                    "status": "completed",
                    "total_videos": len(request.video_urls),
                    "timestamp": datetime.now().isoformat()
                }
            )

        logger.info(f"Enhanced batch processing completed for batch {batch_id}")

    except Exception as e:
        logger.error(f"Enhanced batch processing failed for batch {batch_id}: {e}")

async def process_single_video_in_batch(semaphore: Optional[asyncio.Semaphore],
                                      batch_id: str, video_url: str, index: int,
                                      request: BatchProcessingRequest):
    """Process a single video within a batch"""
    if semaphore:
        async with semaphore:
            await _process_video(batch_id, video_url, index, request)
    else:
        await _process_video(batch_id, video_url, index, request)

async def _process_video(batch_id: str, video_url: str, index: int, request: BatchProcessingRequest):
    """Internal video processing logic"""
    try:
        logger.info(f"Processing video {index + 1}/{len(request.video_urls)} in batch {batch_id}")

        # Mock processing time
        processing_time = 5 if request.transcript_source == TranscriptSource.YOUTUBE else 15
        await asyncio.sleep(processing_time)

        logger.info(f"Completed video {index + 1} in batch {batch_id}")

    except Exception as e:
        logger.error(f"Failed to process video {index + 1} in batch {batch_id}: {e}")

async def send_webhook_notification(url: str, event_type: WebhookEvent, data: Dict[str, Any]):
    """Send webhook notification"""
    try:
        import httpx

        payload = {
            "event": event_type.value,
            "timestamp": datetime.now().isoformat(),
            "data": data
        }

        # Mock webhook sending - replace with actual HTTP client
        logger.info(f"Sending webhook to {url}: {event_type.value}")

        # In production, use actual HTTP client:
        # async with httpx.AsyncClient() as client:
        #     response = await client.post(url, json=payload, timeout=10)
        #     logger.info(f"Webhook sent successfully: {response.status_code}")

    except Exception as e:
        logger.error(f"Failed to send webhook to {url}: {e}")

# Export router
__all__ = ["router"]