youtube-summarizer/backend/services/unified_analysis_agent.py

"""Unified Analysis Agent - Template-driven multi-perspective analysis agent."""

import logging
import asyncio
from typing import Dict, List, Optional, Any, Union
from datetime import datetime

from pydantic import BaseModel, Field

# Import BaseAgent pattern from local implementation
from ..core.base_agent import (
    BaseAgent, AgentMetadata, AgentConfig, AgentState, AgentContext, TaskResult
)

from ..models.analysis_templates import AnalysisTemplate, TemplateRegistry
from ..services.deepseek_service import DeepSeekService
from ..services.template_driven_agent import TemplateAnalysisRequest, TemplateAnalysisResult

logger = logging.getLogger(__name__)


class UnifiedAgentConfig(AgentConfig):
    """Extended configuration for unified analysis agents."""

    template_id: str = Field(..., description="Template ID for this agent instance")
    ai_service_config: Dict[str, Any] = Field(default_factory=dict, description="AI service configuration")
    cost_limit: Optional[float] = Field(None, description="Cost limit for AI operations")
    quality_threshold: float = Field(default=0.7, description="Minimum quality threshold for results")


class UnifiedAnalysisAgent(BaseAgent):
    """
    Unified analysis agent that uses templates to determine behavior dynamically.

    This agent can function as:
    - Educational perspective (Beginner/Expert/Scholarly)
    - Domain perspective (Technical/Business/UX)
    - Any custom perspective defined via templates

    Key features:
    - Template-driven behavior switching
    - Automatic capability registration
    - LangGraph state management compatibility
    - Performance metrics and health monitoring
    """

    def __init__(
        self,
        template: AnalysisTemplate,
        ai_service: Optional[DeepSeekService] = None,
        template_registry: Optional[TemplateRegistry] = None,
        config: Optional[UnifiedAgentConfig] = None
    ):
        """Initialize the unified analysis agent.

        Args:
            template: Analysis template defining agent behavior
            ai_service: AI service for content processing
            template_registry: Registry for template lookups
            config: Agent configuration
        """
        # Create agent metadata from template
        metadata = AgentMetadata(
            agent_id=f"unified_{template.id}",
            name=template.name,
            description=template.description,
            category=template.template_type.value,
            capabilities=self._generate_capabilities_from_template(template)
        )

        # Use provided config or create from template
        if config is None:
            config = UnifiedAgentConfig(
                template_id=template.id,
                temperature=0.7,  # Default for analysis tasks
                memory_enabled=True
            )

        super().__init__(metadata, config)

        self.template = template
        self.ai_service = ai_service or DeepSeekService()
        self.template_registry = template_registry

        # Performance tracking
        self._execution_count = 0
        self._total_processing_time = 0.0
        self._average_confidence = 0.0
        self._last_execution: Optional[datetime] = None

        logger.info(f"Initialized UnifiedAnalysisAgent: {self.agent_id} ({template.name})")

    @classmethod
    def from_template_id(
        cls,
        template_id: str,
        template_registry: TemplateRegistry,
        ai_service: Optional[DeepSeekService] = None,
        config: Optional[UnifiedAgentConfig] = None
    ) -> "UnifiedAnalysisAgent":
        """Create agent from template ID.

        Args:
            template_id: ID of template to use
            template_registry: Registry containing the template
            ai_service: AI service instance
            config: Agent configuration

        Returns:
            Configured UnifiedAnalysisAgent

        Raises:
            ValueError: If template not found or inactive
        """
        template = template_registry.get_template(template_id)
        if not template:
            raise ValueError(f"Template not found: {template_id}")

        if not template.is_active:
            raise ValueError(f"Template is inactive: {template_id}")

        return cls(template, ai_service, template_registry, config)

    def _generate_capabilities_from_template(self, template: AnalysisTemplate) -> List[str]:
        """Generate agent capabilities based on template configuration."""
        capabilities = [
            "content_analysis",
            "text_processing",
            f"{template.template_type.value}_perspective",
            "ai_summarization"
        ]

        # Add complexity-specific capabilities
        if template.complexity_level:
            capabilities.append(f"{template.complexity_level.value}_analysis")

        # Add focus-area capabilities
        for focus in template.analysis_focus:
            # Convert focus to capability format
            capability = focus.lower().replace(" ", "_").replace("-", "_")
            capabilities.append(f"analysis_{capability}")

        # Add template-specific capabilities
        if template.include_examples:
            capabilities.append("example_generation")

        if template.include_recommendations:
            capabilities.append("recommendation_generation")

        return list(set(capabilities))  # Remove duplicates

    async def execute(self, state: AgentState, context: AgentContext) -> AgentState:
        """Execute analysis using the agent's template configuration.

        Args:
            state: Current LangGraph state
            context: Execution context

        Returns:
            Updated state with analysis results
        """
        try:
            start_time = datetime.utcnow()

            # Extract content to analyze from state
            content = state.get("content") or state.get("transcript", "")
            if not content:
                raise ValueError("No content provided for analysis")

            # Get additional context from state
            video_id = state.get("video_id")
            analysis_context = state.get("context", {})

            # Create template analysis request
            request = TemplateAnalysisRequest(
                content=content,
                template_id=self.template.id,
                context=analysis_context,
                video_id=video_id
            )

            # Perform template-driven analysis
            result = await self._execute_template_analysis(request)

            # Update performance metrics
            processing_time = (datetime.utcnow() - start_time).total_seconds()
            self._update_performance_metrics(result, processing_time)

            # Update agent state with results
            agent_key = f"agent_{self.template.id}"
            state[agent_key] = {
                "agent_id": self.agent_id,
                "template_id": self.template.id,
                "template_name": self.template.name,
                "result": result.dict(),
                "processing_time": processing_time,
                "timestamp": start_time.isoformat()
            }

            # Update execution metadata
            state["execution_metadata"] = state.get("execution_metadata", {})
            state["execution_metadata"][self.agent_id] = {
                "status": "completed",
                "confidence": result.confidence_score,
                "insights_count": len(result.key_insights),
                "processing_time": processing_time
            }

            logger.info(f"Agent {self.agent_id} completed analysis in {processing_time:.2f}s")
            return state

        except Exception as e:
            logger.error(f"Error in agent {self.agent_id} execution: {e}")
            return await self.handle_error(e, state, context)

    async def _execute_template_analysis(self, request: TemplateAnalysisRequest) -> TemplateAnalysisResult:
        """Execute template-driven analysis using the template-driven agent pattern."""
        start_time = datetime.utcnow()

        # Prepare context with template variables
        analysis_context = {
            **self.template.variables,
            **request.context,
            "content": request.content,
            "video_id": request.video_id or "unknown"
        }

        # Render system prompt with context
        system_prompt = self.template.render_prompt(analysis_context)

        # Create analysis prompt
        analysis_prompt = self._create_analysis_prompt(request.content, analysis_context)

        # Generate analysis using AI service
        ai_response = await self.ai_service.generate_summary({
            "prompt": analysis_prompt,
            "system_prompt": system_prompt,
            "max_tokens": 2000,
            "temperature": getattr(self.config, 'temperature', 0.7)
        })

        # Extract insights from response
        key_insights = self._extract_insights(ai_response)

        # Calculate confidence score
        confidence_score = self._calculate_confidence_score(ai_response)

        # Calculate processing time
        processing_time = (datetime.utcnow() - start_time).total_seconds()

        return TemplateAnalysisResult(
            template_id=self.template.id,
            template_name=self.template.name,
            analysis=ai_response,
            key_insights=key_insights,
            confidence_score=confidence_score,
            processing_time_seconds=processing_time,
            context_used=analysis_context,
            template_variables=self.template.variables
        )

    def _create_analysis_prompt(self, content: str, context: Dict[str, Any]) -> str:
        """Create the analysis prompt for the AI service."""
        return f"""
Please analyze the following content using the specified approach:

{content}

Analysis Instructions:
- Follow the output format specified in the template
- Generate between {self.template.min_insights} and {self.template.max_insights} key insights
- Target audience: {self.template.target_audience}
- Tone: {self.template.tone}
- Depth: {self.template.depth}
- Focus areas: {', '.join(self.template.analysis_focus)}

{'Include relevant examples and analogies.' if self.template.include_examples else ''}
{'Provide actionable recommendations.' if self.template.include_recommendations else ''}

Expected Output Format:
{self.template.output_format}
"""

    def _extract_insights(self, response: str) -> List[str]:
        """Extract key insights from the AI response."""
        insights = []

        # Parse structured insights from response
        lines = response.split('\n')

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # Look for insight markers
            if line.startswith('-') or line.startswith('•') or line.startswith('*'):
                insight = line[1:].strip()
                if len(insight) > 10:  # Filter out very short items
                    insights.append(insight)
            elif any(numbered in line for numbered in ['1.', '2.', '3.', '4.', '5.']):
                # Handle numbered lists
                if '. ' in line:
                    insight = line.split('. ', 1)[1].strip()
                    if len(insight) > 10:
                        insights.append(insight)

        # Ensure we have the right number of insights
        if len(insights) < self.template.min_insights:
            # Extract additional insights from content
            sentences = response.split('.')
            for sentence in sentences:
                sentence = sentence.strip()
                if len(sentence) > 20 and any(keyword in sentence.lower() for keyword in
                    ['important', 'key', 'significant', 'notable', 'crucial', 'essential']):
                    if sentence not in insights and len(insights) < self.template.max_insights:
                        insights.append(sentence)

        # Trim to max insights if needed
        if len(insights) > self.template.max_insights:
            insights = insights[:self.template.max_insights]

        return insights

    def _calculate_confidence_score(self, response: str) -> float:
        """Calculate confidence score based on response quality."""
        score = 0.0

        # Length score (20%)
        if len(response) > 200:
            score += 0.2
        elif len(response) > 100:
            score += 0.1

        # Structure score (30%)
        if "##" in response or "**" in response:  # Has formatting
            score += 0.15
        if any(marker in response for marker in ['-', '•', '*', '1.']):  # Has lists
            score += 0.15

        # Content quality score (30%)
        focus_matches = sum(1 for focus in self.template.analysis_focus
                          if any(word.lower() in response.lower()
                          for word in focus.split()))
        score += min(0.3, focus_matches * 0.1)

        # Completeness score (20%)
        expected_sections = self.template.output_format.count('##')
        actual_sections = response.count('##')
        if expected_sections > 0:
            completeness = min(1.0, actual_sections / expected_sections)
            score += completeness * 0.2
        else:
            score += 0.2  # Default if no specific structure expected

        return min(1.0, score)

    def _update_performance_metrics(self, result: TemplateAnalysisResult, processing_time: float) -> None:
        """Update agent performance metrics."""
        self._execution_count += 1
        self._total_processing_time += processing_time

        # Update average confidence (exponential moving average)
        alpha = 0.2
        if self._execution_count == 1:
            self._average_confidence = result.confidence_score
        else:
            self._average_confidence = (
                alpha * result.confidence_score +
                (1 - alpha) * self._average_confidence
            )

        self._last_execution = datetime.utcnow()

    def get_performance_metrics(self) -> Dict[str, Any]:
        """Get comprehensive performance metrics for this agent."""
        avg_processing_time = (
            self._total_processing_time / max(self._execution_count, 1)
        )

        return {
            "agent_id": self.agent_id,
            "template_id": self.template.id,
            "template_name": self.template.name,
            "execution_count": self._execution_count,
            "total_processing_time": self._total_processing_time,
            "average_processing_time": avg_processing_time,
            "average_confidence": self._average_confidence,
            "last_execution": self._last_execution.isoformat() if self._last_execution else None,
            "uptime_seconds": (
                (datetime.utcnow() - self._start_time).total_seconds()
                if self._start_time else 0
            )
        }

    async def validate_input(self, state: AgentState, context: AgentContext) -> bool:
        """Validate input before execution."""
        # Check for required content
        content = state.get("content") or state.get("transcript", "")
        if not content or len(content.strip()) < 50:
            logger.warning(f"Agent {self.agent_id}: Insufficient content for analysis")
            return False

        # Check template is still active
        if not self.template.is_active:
            logger.warning(f"Agent {self.agent_id}: Template {self.template.id} is inactive")
            return False

        # Check cost limits if configured
        if hasattr(self.config, 'cost_limit') and self.config.cost_limit:
            estimated_cost = len(content) * 0.00001  # Rough estimate
            if estimated_cost > self.config.cost_limit:
                logger.warning(f"Agent {self.agent_id}: Estimated cost exceeds limit")
                return False

        return True

    async def handle_error(self, error: Exception, state: AgentState, context: AgentContext) -> AgentState:
        """Handle errors during execution with template-specific context."""
        logger.error(f"Error in agent {self.agent_id} (template: {self.template.id}): {str(error)}")

        state["error"] = {
            "agent_id": self.agent_id,
            "template_id": self.template.id,
            "error_type": type(error).__name__,
            "error_message": str(error),
            "timestamp": datetime.utcnow().isoformat(),
            "template_name": self.template.name
        }
        state["status"] = "error"

        # Update execution metadata
        state["execution_metadata"] = state.get("execution_metadata", {})
        state["execution_metadata"][self.agent_id] = {
            "status": "error",
            "error": str(error),
            "timestamp": datetime.utcnow().isoformat()
        }

        return state

    def __repr__(self) -> str:
        """String representation of the unified agent."""
        return f"<UnifiedAnalysisAgent(id={self.agent_id}, template={self.template.id}, name={self.template.name})>"