youtube-summarizer/backend/services/multi_agent_orchestrator.py

"""Multi-agent orchestration service for YouTube video analysis."""

import asyncio
import logging
import uuid
from typing import Dict, List, Optional, Any
from datetime import datetime
from sqlalchemy.orm import Session

from ..core.exceptions import ServiceError
from .deepseek_service import DeepSeekService
from .perspective_agents import (
    TechnicalAnalysisAgent,
    BusinessAnalysisAgent,
    UserExperienceAgent,
    SynthesisAgent
)
from backend.models.agent_models import AgentSummary

logger = logging.getLogger(__name__)


class MultiAgentVideoOrchestrator:
    """Orchestrator for multi-agent YouTube video analysis."""

    def __init__(self, ai_service: Optional[DeepSeekService] = None):
        """Initialize the multi-agent orchestrator.

        Args:
            ai_service: DeepSeek AI service instance
        """
        self.ai_service = ai_service or DeepSeekService()

        # Initialize perspective agents
        self.technical_agent = TechnicalAnalysisAgent(self.ai_service)
        self.business_agent = BusinessAnalysisAgent(self.ai_service)
        self.ux_agent = UserExperienceAgent(self.ai_service)
        self.synthesis_agent = SynthesisAgent(self.ai_service)

        self._is_initialized = False

    async def initialize(self) -> None:
        """Initialize the orchestrator and agents."""
        if self._is_initialized:
            logger.warning("Multi-agent orchestrator already initialized")
            return

        logger.info("Initializing multi-agent video orchestrator")

        try:
            # Basic initialization - agents are already created
            self._is_initialized = True
            logger.info("Multi-agent video orchestrator initialized with 4 perspective agents")

        except Exception as e:
            logger.error(f"Failed to initialize multi-agent orchestrator: {e}")
            raise ServiceError(f"Orchestrator initialization failed: {str(e)}")

    async def shutdown(self) -> None:
        """Shutdown the orchestrator gracefully."""
        logger.info("Shutting down multi-agent video orchestrator")
        self._is_initialized = False
        logger.info("Multi-agent video orchestrator shutdown complete")

    async def analyze_video_with_multiple_perspectives(
        self,
        transcript: str,
        video_id: str,
        video_title: str = "",
        perspectives: Optional[List[str]] = None,
        thread_id: Optional[str] = None
    ) -> Dict[str, Any]:
        """Analyze video content using multiple agent perspectives.

        Args:
            transcript: Video transcript text
            video_id: YouTube video ID
            video_title: Video title for context
            perspectives: List of perspectives to analyze (defaults to all)
            thread_id: Thread ID for continuity (unused in simplified version)

        Returns:
            Complete multi-agent analysis result
        """
        if not self._is_initialized:
            await self.initialize()

        if not transcript or len(transcript.strip()) < 50:
            raise ServiceError("Transcript too short for multi-agent analysis")

        # Default to all perspectives
        if perspectives is None:
            perspectives = ["technical", "business", "user_experience"]

        logger.info(f"Starting multi-agent analysis for video {video_id} with perspectives: {perspectives}")

        try:
            # Create analysis state
            state = {
                "transcript": transcript,
                "video_id": video_id,
                "video_title": video_title,
                "metadata": {
                    "video_analysis": True,
                    "perspectives": perspectives,
                }
            }

            # Execute perspective analyses in parallel
            analysis_tasks = []

            for perspective in perspectives:
                if perspective == "technical":
                    task = self._execute_perspective_analysis(
                        agent=self.technical_agent,
                        state=state
                    )
                elif perspective == "business":
                    task = self._execute_perspective_analysis(
                        agent=self.business_agent,
                        state=state
                    )
                elif perspective == "user_experience":
                    task = self._execute_perspective_analysis(
                        agent=self.ux_agent,
                        state=state
                    )

                if task:
                    analysis_tasks.append(task)

            # Wait for all perspective analyses to complete
            perspective_results = await asyncio.gather(*analysis_tasks, return_exceptions=True)

            # Process results and handle exceptions
            successful_analyses = {}
            total_processing_time = 0.0

            for i, result in enumerate(perspective_results):
                perspective = perspectives[i]

                if isinstance(result, Exception):
                    logger.error(f"Error in {perspective} analysis: {result}")
                    continue

                if result and result.get("status") != "error":
                    analysis_data = result.get("analysis_results", {})
                    for analysis_key, analysis_content in analysis_data.items():
                        successful_analyses[analysis_key] = analysis_content
                        total_processing_time += analysis_content.get("processing_time_seconds", 0)

            if not successful_analyses:
                raise ServiceError("All perspective analyses failed")

            # Run synthesis if we have multiple perspectives
            if len(successful_analyses) > 1:
                synthesis_state = state.copy()
                synthesis_state["analysis_results"] = successful_analyses

                synthesis_result = await self._execute_synthesis(
                    agent=self.synthesis_agent,
                    state=synthesis_state
                )

                if synthesis_result and synthesis_result.get("status") != "error":
                    synthesis_data = synthesis_result.get("analysis_results", {}).get("synthesis")
                    if synthesis_data:
                        successful_analyses["synthesis"] = synthesis_data
                        total_processing_time += synthesis_data.get("processing_time_seconds", 0)

            # Calculate overall quality score
            quality_score = self._calculate_quality_score(successful_analyses)

            # Extract unified insights
            unified_insights = self._extract_unified_insights(successful_analyses)

            # Build final result
            result = {
                "video_id": video_id,
                "video_title": video_title,
                "perspectives": successful_analyses,
                "unified_insights": unified_insights,
                "processing_time_seconds": total_processing_time,
                "quality_score": quality_score,
                "created_at": datetime.now().isoformat(),
                "orchestrator_stats": {
                    "agent_count": len(successful_analyses),
                    "perspectives_analyzed": list(successful_analyses.keys()),
                    "total_processing_time": total_processing_time
                }
            }

            logger.info(f"Multi-agent analysis completed for video {video_id} in {total_processing_time:.2f}s")
            return result

        except Exception as e:
            logger.error(f"Error in multi-agent video analysis for {video_id}: {e}")
            raise ServiceError(f"Multi-agent analysis failed: {str(e)}")

    async def save_analysis_to_database(
        self,
        summary_id: str,
        analysis_result: Dict[str, Any],
        db: Session
    ) -> List[AgentSummary]:
        """Save multi-agent analysis results to database.

        Args:
            summary_id: ID of the summary this analysis belongs to
            analysis_result: Complete analysis result from analyze_video_with_multiple_perspectives
            db: Database session

        Returns:
            List of AgentSummary objects that were saved
        """
        agent_summaries = []

        try:
            perspectives = analysis_result.get('perspectives', {})

            for perspective_type, analysis_data in perspectives.items():
                agent_summary = AgentSummary(
                    summary_id=summary_id,
                    agent_type=perspective_type,
                    agent_summary=analysis_data.get('summary'),
                    key_insights=analysis_data.get('key_insights', []),
                    focus_areas=analysis_data.get('focus_areas', []),
                    recommendations=analysis_data.get('recommendations', []),
                    confidence_score=analysis_data.get('confidence_score'),
                    processing_time_seconds=analysis_data.get('processing_time_seconds')
                )
                db.add(agent_summary)
                agent_summaries.append(agent_summary)

            db.commit()
            logger.info(f"Saved {len(agent_summaries)} agent analyses to database for summary {summary_id}")
            return agent_summaries

        except Exception as e:
            db.rollback()
            logger.error(f"Failed to save agent analyses to database: {e}")
            raise ServiceError(f"Database save failed: {str(e)}")

    async def _execute_perspective_analysis(
        self,
        agent,
        state: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Execute analysis for a specific perspective agent.

        Args:
            agent: The perspective agent to execute
            state: Analysis state with transcript and metadata

        Returns:
            Analysis result from the agent
        """
        try:
            # Execute the agent directly
            result_state = await agent.execute(state)
            return result_state

        except Exception as e:
            logger.error(f"Error executing {agent.agent_id}: {e}")
            return {
                "status": "error",
                "error": str(e),
                "agent_id": agent.agent_id
            }

    async def _execute_synthesis(
        self,
        agent,
        state: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Execute synthesis of multiple perspective analyses.

        Args:
            agent: The synthesis agent
            state: State with analysis results

        Returns:
            Synthesis result
        """
        try:
            # Execute synthesis agent
            result_state = await agent.execute(state)
            return result_state

        except Exception as e:
            logger.error(f"Error in synthesis execution: {e}")
            return {
                "status": "error",
                "error": str(e),
                "agent_id": agent.agent_id
            }

    def _calculate_quality_score(self, analyses: Dict[str, Any]) -> float:
        """Calculate overall quality score from perspective analyses.

        Args:
            analyses: Dictionary of perspective analyses

        Returns:
            Quality score between 0.0 and 1.0
        """
        if not analyses:
            return 0.0

        # Average confidence scores
        confidence_scores = []
        completeness_scores = []

        for analysis in analyses.values():
            if analysis.get("agent_type") == "synthesis":
                # Synthesis has different structure
                confidence_scores.append(analysis.get("confidence_score", 0.7))
                # Synthesis completeness based on unified insights and recommendations
                insight_score = min(len(analysis.get("unified_insights", [])) / 8.0, 1.0)
                rec_score = min(len(analysis.get("recommendations", [])) / 5.0, 1.0)
                completeness_scores.append((insight_score + rec_score) / 2.0)
            else:
                # Regular perspective analysis
                confidence_scores.append(analysis.get("confidence_score", 0.7))
                # Completeness based on insights and recommendations
                insight_score = min(len(analysis.get("key_insights", [])) / 5.0, 1.0)
                rec_score = min(len(analysis.get("recommendations", [])) / 3.0, 1.0)
                completeness_scores.append((insight_score + rec_score) / 2.0)

        # Calculate averages
        avg_confidence = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0.0
        avg_completeness = sum(completeness_scores) / len(completeness_scores) if completeness_scores else 0.0

        # Weighted final score (confidence weighted more heavily)
        quality_score = (avg_confidence * 0.7) + (avg_completeness * 0.3)
        return round(quality_score, 2)

    def _extract_unified_insights(self, analyses: Dict[str, Any]) -> List[str]:
        """Extract unified insights from all analyses.

        Args:
            analyses: Dictionary of perspective analyses

        Returns:
            List of unified insights
        """
        unified_insights = []

        # Check if synthesis exists and use its unified insights
        if "synthesis" in analyses:
            synthesis_insights = analyses["synthesis"].get("unified_insights", [])
            unified_insights.extend(synthesis_insights[:8])  # Top 8 from synthesis

        # Add top insights from each perspective (if no synthesis or to supplement)
        for perspective_type, analysis in analyses.items():
            if perspective_type == "synthesis":
                continue

            perspective_insights = analysis.get("key_insights", [])
            for insight in perspective_insights[:2]:  # Top 2 from each perspective
                if insight and len(unified_insights) < 12:
                    formatted_insight = f"[{perspective_type.title()}] {insight}"
                    if formatted_insight not in unified_insights:
                        unified_insights.append(formatted_insight)

        return unified_insights[:12]  # Limit to 12 total insights

    async def get_orchestrator_health(self) -> Dict[str, Any]:
        """Get health status of the multi-agent orchestrator.

        Returns:
            Health information for the orchestrator and all agents
        """
        health_info = {
            "service": "multi_agent_video_orchestrator",
            "initialized": self._is_initialized,
            "timestamp": datetime.now().isoformat(),
            "ai_service_available": self.ai_service is not None
        }

        if self._is_initialized:
            # Get agent information
            agents = [
                {"agent_id": self.technical_agent.agent_id, "name": self.technical_agent.name},
                {"agent_id": self.business_agent.agent_id, "name": self.business_agent.name},
                {"agent_id": self.ux_agent.agent_id, "name": self.ux_agent.name},
                {"agent_id": self.synthesis_agent.agent_id, "name": self.synthesis_agent.name}
            ]

            health_info["agents"] = agents
            health_info["agent_count"] = len(agents)
            health_info["status"] = "healthy"
        else:
            health_info["status"] = "not_initialized"

        # Test AI service connectivity
        if self.ai_service:
            try:
                await self.ai_service.generate_response("test", max_tokens=10)
                health_info["ai_service_status"] = "connected"
            except Exception:
                health_info["ai_service_status"] = "connection_error"
                if health_info["status"] == "healthy":
                    health_info["status"] = "degraded"
        else:
            health_info["ai_service_status"] = "not_configured"
            health_info["status"] = "error"

        return health_info

    def get_supported_perspectives(self) -> List[str]:
        """Get list of supported analysis perspectives.

        Returns:
            List of perspective names
        """
        return ["technical", "business", "user_experience"]

    def get_agent_capabilities(self) -> Dict[str, List[str]]:
        """Get capabilities of each registered agent.

        Returns:
            Dictionary mapping agent IDs to their capabilities
        """
        return {
            "technical_analyst": self.technical_agent.get_capabilities(),
            "business_analyst": self.business_agent.get_capabilities(),
            "ux_analyst": self.ux_agent.get_capabilities(),
            "synthesis_agent": self.synthesis_agent.get_capabilities()
        }