"""Pipeline API endpoints for complete YouTube summarization workflow.""" from fastapi import APIRouter, HTTPException, BackgroundTasks, Depends from pydantic import BaseModel, Field, HttpUrl from typing import Optional, List, Dict, Any from datetime import datetime from ..services.summary_pipeline import SummaryPipeline from ..services.video_service import VideoService from ..services.transcript_service import TranscriptService from ..services.anthropic_summarizer import AnthropicSummarizer from ..services.cache_manager import CacheManager from ..services.notification_service import NotificationService from ..models.pipeline import ( PipelineStage, PipelineConfig, ProcessVideoRequest, ProcessVideoResponse, PipelineStatusResponse ) from ..core.websocket_manager import websocket_manager import os router = APIRouter(prefix="/api", tags=["pipeline"]) # Dependency providers def get_video_service() -> VideoService: """Get VideoService instance.""" return VideoService() def get_transcript_service() -> TranscriptService: """Get TranscriptService instance.""" return TranscriptService() def get_ai_service() -> AnthropicSummarizer: """Get AnthropicSummarizer instance.""" api_key = os.getenv("ANTHROPIC_API_KEY") if not api_key: raise HTTPException( status_code=500, detail="Anthropic API key not configured" ) return AnthropicSummarizer(api_key=api_key) def get_cache_manager() -> CacheManager: """Get CacheManager instance.""" return CacheManager() def get_notification_service() -> NotificationService: """Get NotificationService instance.""" return NotificationService() def get_summary_pipeline( video_service: VideoService = Depends(get_video_service), transcript_service: TranscriptService = Depends(get_transcript_service), ai_service: AnthropicSummarizer = Depends(get_ai_service), cache_manager: CacheManager = Depends(get_cache_manager), notification_service: NotificationService = Depends(get_notification_service) ) -> SummaryPipeline: """Get SummaryPipeline instance with all dependencies.""" return SummaryPipeline( video_service=video_service, transcript_service=transcript_service, ai_service=ai_service, cache_manager=cache_manager, notification_service=notification_service ) @router.post("/process", response_model=ProcessVideoResponse) async def process_video( request: ProcessVideoRequest, pipeline: SummaryPipeline = Depends(get_summary_pipeline) ): """Process YouTube video through complete pipeline. Args: request: Video processing request with URL and configuration pipeline: SummaryPipeline service instance Returns: ProcessVideoResponse with job ID and status """ try: config = PipelineConfig( summary_length=request.summary_length, focus_areas=request.focus_areas or [], include_timestamps=request.include_timestamps, quality_threshold=request.quality_threshold, enable_notifications=request.enable_notifications, max_retries=2 # Default retry limit ) # Create progress callback for WebSocket notifications async def progress_callback(job_id: str, progress): await websocket_manager.send_progress_update(job_id, { "stage": progress.stage.value, "percentage": progress.percentage, "message": progress.message, "details": progress.current_step_details }) # Start pipeline processing job_id = await pipeline.process_video( video_url=str(request.video_url), config=config, progress_callback=progress_callback ) return ProcessVideoResponse( job_id=job_id, status="processing", message="Video processing started", estimated_completion_time=120.0 # 2 minutes estimate ) except Exception as e: raise HTTPException( status_code=500, detail=f"Failed to start processing: {str(e)}" ) @router.get("/process/{job_id}", response_model=PipelineStatusResponse) async def get_pipeline_status( job_id: str, pipeline: SummaryPipeline = Depends(get_summary_pipeline) ): """Get pipeline processing status and results. Args: job_id: Pipeline job identifier pipeline: SummaryPipeline service instance Returns: PipelineStatusResponse with current status and results """ result = await pipeline.get_pipeline_result(job_id) if not result: raise HTTPException( status_code=404, detail="Pipeline job not found" ) # Calculate progress percentage based on stage stage_percentages = { PipelineStage.INITIALIZED: 0, PipelineStage.VALIDATING_URL: 5, PipelineStage.EXTRACTING_METADATA: 15, PipelineStage.EXTRACTING_TRANSCRIPT: 35, PipelineStage.ANALYZING_CONTENT: 50, PipelineStage.GENERATING_SUMMARY: 75, PipelineStage.VALIDATING_QUALITY: 90, PipelineStage.COMPLETED: 100, PipelineStage.FAILED: 0, PipelineStage.CANCELLED: 0 } response_data = { "job_id": job_id, "status": result.status.value, "progress_percentage": stage_percentages.get(result.status, 0), "current_message": f"Status: {result.status.value.replace('_', ' ').title()}", "video_metadata": result.video_metadata, "processing_time_seconds": result.processing_time_seconds } # Include results if completed if result.status == PipelineStage.COMPLETED: response_data["result"] = { "summary": result.summary, "key_points": result.key_points, "main_themes": result.main_themes, "actionable_insights": result.actionable_insights, "confidence_score": result.confidence_score, "quality_score": result.quality_score, "cost_data": result.cost_data } # Include error if failed if result.status == PipelineStage.FAILED and result.error: response_data["error"] = result.error return PipelineStatusResponse(**response_data) @router.delete("/process/{job_id}") async def cancel_pipeline( job_id: str, pipeline: SummaryPipeline = Depends(get_summary_pipeline) ): """Cancel running pipeline. Args: job_id: Pipeline job identifier pipeline: SummaryPipeline service instance Returns: Success message if cancelled """ success = await pipeline.cancel_job(job_id) if not success: raise HTTPException( status_code=404, detail="Pipeline job not found or already completed" ) return {"message": "Pipeline cancelled successfully"} @router.get("/process/{job_id}/history") async def get_pipeline_history( job_id: str, pipeline: SummaryPipeline = Depends(get_summary_pipeline) ): """Get pipeline processing history and logs. Args: job_id: Pipeline job identifier pipeline: SummaryPipeline service instance Returns: Pipeline processing history """ result = await pipeline.get_pipeline_result(job_id) if not result: raise HTTPException( status_code=404, detail="Pipeline job not found" ) return { "job_id": job_id, "created_at": result.started_at.isoformat() if result.started_at else None, "completed_at": result.completed_at.isoformat() if result.completed_at else None, "processing_time_seconds": result.processing_time_seconds, "retry_count": result.retry_count, "final_status": result.status.value, "video_url": result.video_url, "video_id": result.video_id, "error_history": [result.error] if result.error else [] } @router.get("/stats") async def get_pipeline_stats( pipeline: SummaryPipeline = Depends(get_summary_pipeline), cache_manager: CacheManager = Depends(get_cache_manager), notification_service: NotificationService = Depends(get_notification_service) ): """Get pipeline processing statistics. Args: pipeline: SummaryPipeline service instance cache_manager: CacheManager service instance notification_service: NotificationService instance Returns: Pipeline processing statistics """ try: # Get active jobs active_jobs = pipeline.get_active_jobs() # Get cache statistics cache_stats = await cache_manager.get_cache_stats() # Get notification statistics notification_stats = notification_service.get_notification_stats() # Get WebSocket connection stats websocket_stats = websocket_manager.get_stats() return { "active_jobs": { "count": len(active_jobs), "job_ids": active_jobs }, "cache": cache_stats, "notifications": notification_stats, "websockets": websocket_stats, "timestamp": datetime.utcnow().isoformat() } except Exception as e: raise HTTPException( status_code=500, detail=f"Failed to retrieve statistics: {str(e)}" ) @router.post("/cleanup") async def cleanup_old_jobs( max_age_hours: int = 24, pipeline: SummaryPipeline = Depends(get_summary_pipeline), cache_manager: CacheManager = Depends(get_cache_manager), notification_service: NotificationService = Depends(get_notification_service) ): """Clean up old completed jobs and cache entries. Args: max_age_hours: Maximum age in hours for cleanup pipeline: SummaryPipeline service instance cache_manager: CacheManager service instance notification_service: NotificationService instance Returns: Cleanup results """ try: # Cleanup pipeline jobs await pipeline.cleanup_completed_jobs(max_age_hours) # Cleanup notification history notification_service.clear_history() # Note: Cache cleanup happens automatically during normal operations return { "message": "Cleanup completed successfully", "max_age_hours": max_age_hours, "timestamp": datetime.utcnow().isoformat() } except Exception as e: raise HTTPException( status_code=500, detail=f"Cleanup failed: {str(e)}" ) # Health check endpoint @router.get("/health") async def pipeline_health_check( pipeline: SummaryPipeline = Depends(get_summary_pipeline) ): """Check pipeline service health. Args: pipeline: SummaryPipeline service instance Returns: Health status information """ try: # Basic health checks active_jobs_count = len(pipeline.get_active_jobs()) # Check API key availability anthropic_key_available = bool(os.getenv("ANTHROPIC_API_KEY")) health_status = { "status": "healthy", "active_jobs": active_jobs_count, "anthropic_api_available": anthropic_key_available, "timestamp": datetime.utcnow().isoformat() } if not anthropic_key_available: health_status["status"] = "degraded" health_status["warning"] = "Anthropic API key not configured" return health_status except Exception as e: raise HTTPException( status_code=503, detail=f"Health check failed: {str(e)}" )