"""Playlist processing service for multi-video analysis.""" import asyncio import logging import re from typing import Dict, List, Optional, Any from datetime import datetime import uuid from googleapiclient.discovery import build from googleapiclient.errors import HttpError from backend.core.config import settings from backend.core.exceptions import ServiceError from backend.services.video_service import VideoService from backend.services.transcript_service import TranscriptService from backend.services.multi_agent_service import MultiAgentSummarizerService, AgentPerspective logger = logging.getLogger(__name__) class PlaylistVideo: """Represents a video in a playlist.""" def __init__(self, video_id: str, title: str, position: int, duration: Optional[str] = None, upload_date: Optional[str] = None): self.video_id = video_id self.title = title self.position = position self.duration = duration self.upload_date = upload_date self.analysis_result: Optional[Dict[str, Any]] = None self.error: Optional[str] = None class PlaylistMetadata: """Metadata for a YouTube playlist.""" def __init__(self, playlist_id: str, title: str, channel_name: str, video_count: int, total_duration: Optional[int] = None): self.playlist_id = playlist_id self.title = title self.channel_name = channel_name self.video_count = video_count self.total_duration = total_duration class PlaylistProcessingResult: """Result of playlist processing with multi-agent analysis.""" def __init__(self, job_id: str, playlist_url: str): self.job_id = job_id self.playlist_url = playlist_url self.playlist_metadata: Optional[PlaylistMetadata] = None self.videos: List[PlaylistVideo] = [] self.processed_videos: int = 0 self.failed_videos: int = 0 self.progress_percentage: float = 0.0 self.current_video: Optional[str] = None self.status: str = "initializing" # initializing, processing, completed, failed, cancelled self.error: Optional[str] = None self.cross_video_analysis: Optional[Dict[str, Any]] = None self.started_at: datetime = datetime.now() self.completed_at: Optional[datetime] = None class PlaylistService: """Service for processing YouTube playlists with multi-agent analysis.""" def __init__(self, youtube_api_key: Optional[str] = None): self.youtube_api_key = youtube_api_key or settings.YOUTUBE_API_KEY self.youtube = None if self.youtube_api_key: self.youtube = build('youtube', 'v3', developerKey=self.youtube_api_key) self.video_service = VideoService() self.transcript_service = TranscriptService() self.multi_agent_service = MultiAgentSummarizerService() # Active job tracking self.active_jobs: Dict[str, PlaylistProcessingResult] = {} def extract_playlist_id(self, playlist_url: str) -> Optional[str]: """Extract playlist ID from YouTube playlist URL.""" patterns = [ r'list=([a-zA-Z0-9_-]+)', # Standard playlist parameter r'playlist\?list=([a-zA-Z0-9_-]+)', # Direct playlist URL r'youtube\.com/.*[?&]list=([a-zA-Z0-9_-]+)', # Any YouTube URL with list param ] for pattern in patterns: match = re.search(pattern, playlist_url) if match: return match.group(1) return None async def get_playlist_metadata(self, playlist_id: str) -> Optional[PlaylistMetadata]: """Get playlist metadata from YouTube Data API.""" if not self.youtube: logger.warning("YouTube Data API not configured, using mock data") return PlaylistMetadata( playlist_id=playlist_id, title=f"Mock Playlist {playlist_id}", channel_name="Mock Channel", video_count=5 ) try: # Get playlist details playlist_response = self.youtube.playlists().list( part='snippet,contentDetails', id=playlist_id, maxResults=1 ).execute() if not playlist_response.get('items'): return None playlist_item = playlist_response['items'][0] snippet = playlist_item['snippet'] content_details = playlist_item['contentDetails'] return PlaylistMetadata( playlist_id=playlist_id, title=snippet.get('title', ''), channel_name=snippet.get('channelTitle', ''), video_count=content_details.get('itemCount', 0) ) except HttpError as e: logger.error(f"Error fetching playlist metadata: {e}") return None except Exception as e: logger.error(f"Unexpected error in get_playlist_metadata: {e}") return None async def discover_playlist_videos(self, playlist_id: str, max_videos: Optional[int] = None) -> List[PlaylistVideo]: """Discover all videos in a playlist.""" if not self.youtube: logger.warning("YouTube Data API not configured, using mock data") return [ PlaylistVideo(f"mock_video_{i}", f"Mock Video {i+1}", i) for i in range(min(max_videos or 5, 5)) ] videos = [] next_page_token = None position = 0 try: while True: # Get playlist items playlist_items_response = self.youtube.playlistItems().list( part='snippet,contentDetails', playlistId=playlist_id, maxResults=50, # Maximum allowed by YouTube API pageToken=next_page_token ).execute() items = playlist_items_response.get('items', []) if not items: break # Process each video for item in items: if max_videos and len(videos) >= max_videos: break snippet = item['snippet'] content_details = item['contentDetails'] video_id = content_details.get('videoId') if not video_id: continue # Skip deleted or private videos title = snippet.get('title', f'Video {position + 1}') upload_date = snippet.get('publishedAt') videos.append(PlaylistVideo( video_id=video_id, title=title, position=position, upload_date=upload_date )) position += 1 # Check if we need to fetch more pages next_page_token = playlist_items_response.get('nextPageToken') if not next_page_token or (max_videos and len(videos) >= max_videos): break logger.info(f"Discovered {len(videos)} videos in playlist {playlist_id}") return videos except HttpError as e: logger.error(f"Error fetching playlist videos: {e}") raise ServiceError(f"Failed to fetch playlist videos: {str(e)}") except Exception as e: logger.error(f"Unexpected error in discover_playlist_videos: {e}") raise ServiceError(f"Unexpected error discovering videos: {str(e)}") async def start_playlist_processing(self, playlist_url: str, max_videos: Optional[int] = None, agent_types: Optional[List[str]] = None) -> str: """Start processing a playlist with multi-agent analysis.""" job_id = str(uuid.uuid4()) # Initialize job result result = PlaylistProcessingResult(job_id=job_id, playlist_url=playlist_url) self.active_jobs[job_id] = result # Start background processing asyncio.create_task(self._process_playlist_background( job_id, playlist_url, max_videos, agent_types or ["technical", "business", "user"] )) return job_id async def _process_playlist_background(self, job_id: str, playlist_url: str, max_videos: Optional[int], agent_types: List[str]): """Background task to process playlist.""" result = self.active_jobs[job_id] try: result.status = "processing" # Extract playlist ID playlist_id = self.extract_playlist_id(playlist_url) if not playlist_id: raise ServiceError("Invalid playlist URL") logger.info(f"Starting playlist processing for job {job_id}, playlist {playlist_id}") # Get playlist metadata result.playlist_metadata = await self.get_playlist_metadata(playlist_id) if not result.playlist_metadata: raise ServiceError("Could not fetch playlist metadata") # Discover videos result.videos = await self.discover_playlist_videos(playlist_id, max_videos) if not result.videos: raise ServiceError("No videos found in playlist") # Convert agent type strings to enums perspectives = [] for agent_type in agent_types: if agent_type == "technical": perspectives.append(AgentPerspective.TECHNICAL) elif agent_type == "business": perspectives.append(AgentPerspective.BUSINESS) elif agent_type == "user": perspectives.append(AgentPerspective.USER_EXPERIENCE) # Process each video for i, video in enumerate(result.videos): if result.status == "cancelled": break result.current_video = video.title result.progress_percentage = (i / len(result.videos)) * 90 # Reserve 10% for cross-video analysis try: logger.info(f"Processing video {i+1}/{len(result.videos)}: {video.video_id}") # Get transcript transcript_result = await self.transcript_service.extract_transcript(video.video_id) if not transcript_result or not transcript_result.get('transcript'): video.error = "Could not extract transcript" result.failed_videos += 1 continue transcript = transcript_result['transcript'] # Perform multi-agent analysis analysis_result = await self.multi_agent_service.analyze_with_multiple_perspectives( transcript=transcript, video_id=video.video_id, video_title=video.title, perspectives=perspectives ) video.analysis_result = analysis_result.dict() result.processed_videos += 1 logger.info(f"Completed analysis for video {video.video_id}") except Exception as e: logger.error(f"Error processing video {video.video_id}: {e}") video.error = str(e) result.failed_videos += 1 # Small delay to prevent overwhelming APIs await asyncio.sleep(0.5) # Perform cross-video analysis result.current_video = "Cross-video analysis" result.progress_percentage = 95.0 result.cross_video_analysis = await self._perform_cross_video_analysis(result.videos) # Mark as completed result.status = "completed" result.progress_percentage = 100.0 result.completed_at = datetime.now() result.current_video = None logger.info(f"Playlist processing completed for job {job_id}") except Exception as e: logger.error(f"Error in playlist processing for job {job_id}: {e}") result.status = "failed" result.error = str(e) result.current_video = None async def _perform_cross_video_analysis(self, videos: List[PlaylistVideo]) -> Dict[str, Any]: """Perform cross-video analysis to identify themes and patterns.""" # For now, implement a simple analysis # In production, this could use AI to identify themes across videos successful_videos = [v for v in videos if v.analysis_result and not v.error] if not successful_videos: return {"error": "No successful video analyses to compare"} # Extract common themes from titles and summaries all_titles = [v.title for v in successful_videos] # Simple theme extraction (could be enhanced with AI) themes = [] if len(all_titles) > 1: themes = ["Multi-part series", "Educational content", "Topic progression"] analysis = { "total_videos": len(videos), "successfully_analyzed": len(successful_videos), "failed_analyses": len(videos) - len(successful_videos), "identified_themes": themes, "content_progression": "Sequential learning path detected" if len(successful_videos) > 2 else "Standalone content", "key_insights": [ f"Analyzed {len(successful_videos)} videos successfully", f"Common themes: {', '.join(themes) if themes else 'None identified'}", "Multi-agent perspectives provide comprehensive analysis" ], "agent_perspectives": { "technical": "Technical concepts build upon each other", "business": "Business value increases with series completion", "user": "User journey spans multiple videos for complete understanding" } } return analysis def get_playlist_status(self, job_id: str) -> Optional[PlaylistProcessingResult]: """Get the current status of a playlist processing job.""" return self.active_jobs.get(job_id) def cancel_playlist_processing(self, job_id: str) -> bool: """Cancel a running playlist processing job.""" if job_id in self.active_jobs: job = self.active_jobs[job_id] if job.status in ["initializing", "processing"]: job.status = "cancelled" job.error = "Job cancelled by user" job.current_video = None logger.info(f"Cancelled playlist processing job {job_id}") return True return False def cleanup_completed_jobs(self, max_age_hours: int = 24): """Clean up old completed jobs to prevent memory leaks.""" cutoff_time = datetime.now().timestamp() - (max_age_hours * 3600) jobs_to_remove = [] for job_id, job in self.active_jobs.items(): if job.status in ["completed", "failed", "cancelled"]: if job.completed_at and job.completed_at.timestamp() < cutoff_time: jobs_to_remove.append(job_id) for job_id in jobs_to_remove: del self.active_jobs[job_id] logger.info(f"Cleaned up old job {job_id}")