youtube-summarizer/backend/services/unified_analysis_agent.py

444 lines
17 KiB
Python

"""Unified Analysis Agent - Template-driven multi-perspective analysis agent."""
import logging
import asyncio
from typing import Dict, List, Optional, Any, Union
from datetime import datetime
from pydantic import BaseModel, Field
# Import BaseAgent pattern from local implementation
from ..core.base_agent import (
BaseAgent, AgentMetadata, AgentConfig, AgentState, AgentContext, TaskResult
)
from ..models.analysis_templates import AnalysisTemplate, TemplateRegistry
from ..services.deepseek_service import DeepSeekService
from ..services.template_driven_agent import TemplateAnalysisRequest, TemplateAnalysisResult
logger = logging.getLogger(__name__)
class UnifiedAgentConfig(AgentConfig):
"""Extended configuration for unified analysis agents."""
template_id: str = Field(..., description="Template ID for this agent instance")
ai_service_config: Dict[str, Any] = Field(default_factory=dict, description="AI service configuration")
cost_limit: Optional[float] = Field(None, description="Cost limit for AI operations")
quality_threshold: float = Field(default=0.7, description="Minimum quality threshold for results")
class UnifiedAnalysisAgent(BaseAgent):
"""
Unified analysis agent that uses templates to determine behavior dynamically.
This agent can function as:
- Educational perspective (Beginner/Expert/Scholarly)
- Domain perspective (Technical/Business/UX)
- Any custom perspective defined via templates
Key features:
- Template-driven behavior switching
- Automatic capability registration
- LangGraph state management compatibility
- Performance metrics and health monitoring
"""
def __init__(
self,
template: AnalysisTemplate,
ai_service: Optional[DeepSeekService] = None,
template_registry: Optional[TemplateRegistry] = None,
config: Optional[UnifiedAgentConfig] = None
):
"""Initialize the unified analysis agent.
Args:
template: Analysis template defining agent behavior
ai_service: AI service for content processing
template_registry: Registry for template lookups
config: Agent configuration
"""
# Create agent metadata from template
metadata = AgentMetadata(
agent_id=f"unified_{template.id}",
name=template.name,
description=template.description,
category=template.template_type.value,
capabilities=self._generate_capabilities_from_template(template)
)
# Use provided config or create from template
if config is None:
config = UnifiedAgentConfig(
template_id=template.id,
temperature=0.7, # Default for analysis tasks
memory_enabled=True
)
super().__init__(metadata, config)
self.template = template
self.ai_service = ai_service or DeepSeekService()
self.template_registry = template_registry
# Performance tracking
self._execution_count = 0
self._total_processing_time = 0.0
self._average_confidence = 0.0
self._last_execution: Optional[datetime] = None
logger.info(f"Initialized UnifiedAnalysisAgent: {self.agent_id} ({template.name})")
@classmethod
def from_template_id(
cls,
template_id: str,
template_registry: TemplateRegistry,
ai_service: Optional[DeepSeekService] = None,
config: Optional[UnifiedAgentConfig] = None
) -> "UnifiedAnalysisAgent":
"""Create agent from template ID.
Args:
template_id: ID of template to use
template_registry: Registry containing the template
ai_service: AI service instance
config: Agent configuration
Returns:
Configured UnifiedAnalysisAgent
Raises:
ValueError: If template not found or inactive
"""
template = template_registry.get_template(template_id)
if not template:
raise ValueError(f"Template not found: {template_id}")
if not template.is_active:
raise ValueError(f"Template is inactive: {template_id}")
return cls(template, ai_service, template_registry, config)
def _generate_capabilities_from_template(self, template: AnalysisTemplate) -> List[str]:
"""Generate agent capabilities based on template configuration."""
capabilities = [
"content_analysis",
"text_processing",
f"{template.template_type.value}_perspective",
"ai_summarization"
]
# Add complexity-specific capabilities
if template.complexity_level:
capabilities.append(f"{template.complexity_level.value}_analysis")
# Add focus-area capabilities
for focus in template.analysis_focus:
# Convert focus to capability format
capability = focus.lower().replace(" ", "_").replace("-", "_")
capabilities.append(f"analysis_{capability}")
# Add template-specific capabilities
if template.include_examples:
capabilities.append("example_generation")
if template.include_recommendations:
capabilities.append("recommendation_generation")
return list(set(capabilities)) # Remove duplicates
async def execute(self, state: AgentState, context: AgentContext) -> AgentState:
"""Execute analysis using the agent's template configuration.
Args:
state: Current LangGraph state
context: Execution context
Returns:
Updated state with analysis results
"""
try:
start_time = datetime.utcnow()
# Extract content to analyze from state
content = state.get("content") or state.get("transcript", "")
if not content:
raise ValueError("No content provided for analysis")
# Get additional context from state
video_id = state.get("video_id")
analysis_context = state.get("context", {})
# Create template analysis request
request = TemplateAnalysisRequest(
content=content,
template_id=self.template.id,
context=analysis_context,
video_id=video_id
)
# Perform template-driven analysis
result = await self._execute_template_analysis(request)
# Update performance metrics
processing_time = (datetime.utcnow() - start_time).total_seconds()
self._update_performance_metrics(result, processing_time)
# Update agent state with results
agent_key = f"agent_{self.template.id}"
state[agent_key] = {
"agent_id": self.agent_id,
"template_id": self.template.id,
"template_name": self.template.name,
"result": result.dict(),
"processing_time": processing_time,
"timestamp": start_time.isoformat()
}
# Update execution metadata
state["execution_metadata"] = state.get("execution_metadata", {})
state["execution_metadata"][self.agent_id] = {
"status": "completed",
"confidence": result.confidence_score,
"insights_count": len(result.key_insights),
"processing_time": processing_time
}
logger.info(f"Agent {self.agent_id} completed analysis in {processing_time:.2f}s")
return state
except Exception as e:
logger.error(f"Error in agent {self.agent_id} execution: {e}")
return await self.handle_error(e, state, context)
async def _execute_template_analysis(self, request: TemplateAnalysisRequest) -> TemplateAnalysisResult:
"""Execute template-driven analysis using the template-driven agent pattern."""
start_time = datetime.utcnow()
# Prepare context with template variables
analysis_context = {
**self.template.variables,
**request.context,
"content": request.content,
"video_id": request.video_id or "unknown"
}
# Render system prompt with context
system_prompt = self.template.render_prompt(analysis_context)
# Create analysis prompt
analysis_prompt = self._create_analysis_prompt(request.content, analysis_context)
# Generate analysis using AI service
ai_response = await self.ai_service.generate_summary({
"prompt": analysis_prompt,
"system_prompt": system_prompt,
"max_tokens": 2000,
"temperature": getattr(self.config, 'temperature', 0.7)
})
# Extract insights from response
key_insights = self._extract_insights(ai_response)
# Calculate confidence score
confidence_score = self._calculate_confidence_score(ai_response)
# Calculate processing time
processing_time = (datetime.utcnow() - start_time).total_seconds()
return TemplateAnalysisResult(
template_id=self.template.id,
template_name=self.template.name,
analysis=ai_response,
key_insights=key_insights,
confidence_score=confidence_score,
processing_time_seconds=processing_time,
context_used=analysis_context,
template_variables=self.template.variables
)
def _create_analysis_prompt(self, content: str, context: Dict[str, Any]) -> str:
"""Create the analysis prompt for the AI service."""
return f"""
Please analyze the following content using the specified approach:
{content}
Analysis Instructions:
- Follow the output format specified in the template
- Generate between {self.template.min_insights} and {self.template.max_insights} key insights
- Target audience: {self.template.target_audience}
- Tone: {self.template.tone}
- Depth: {self.template.depth}
- Focus areas: {', '.join(self.template.analysis_focus)}
{'Include relevant examples and analogies.' if self.template.include_examples else ''}
{'Provide actionable recommendations.' if self.template.include_recommendations else ''}
Expected Output Format:
{self.template.output_format}
"""
def _extract_insights(self, response: str) -> List[str]:
"""Extract key insights from the AI response."""
insights = []
# Parse structured insights from response
lines = response.split('\n')
for line in lines:
line = line.strip()
if not line:
continue
# Look for insight markers
if line.startswith('-') or line.startswith('') or line.startswith('*'):
insight = line[1:].strip()
if len(insight) > 10: # Filter out very short items
insights.append(insight)
elif any(numbered in line for numbered in ['1.', '2.', '3.', '4.', '5.']):
# Handle numbered lists
if '. ' in line:
insight = line.split('. ', 1)[1].strip()
if len(insight) > 10:
insights.append(insight)
# Ensure we have the right number of insights
if len(insights) < self.template.min_insights:
# Extract additional insights from content
sentences = response.split('.')
for sentence in sentences:
sentence = sentence.strip()
if len(sentence) > 20 and any(keyword in sentence.lower() for keyword in
['important', 'key', 'significant', 'notable', 'crucial', 'essential']):
if sentence not in insights and len(insights) < self.template.max_insights:
insights.append(sentence)
# Trim to max insights if needed
if len(insights) > self.template.max_insights:
insights = insights[:self.template.max_insights]
return insights
def _calculate_confidence_score(self, response: str) -> float:
"""Calculate confidence score based on response quality."""
score = 0.0
# Length score (20%)
if len(response) > 200:
score += 0.2
elif len(response) > 100:
score += 0.1
# Structure score (30%)
if "##" in response or "**" in response: # Has formatting
score += 0.15
if any(marker in response for marker in ['-', '', '*', '1.']): # Has lists
score += 0.15
# Content quality score (30%)
focus_matches = sum(1 for focus in self.template.analysis_focus
if any(word.lower() in response.lower()
for word in focus.split()))
score += min(0.3, focus_matches * 0.1)
# Completeness score (20%)
expected_sections = self.template.output_format.count('##')
actual_sections = response.count('##')
if expected_sections > 0:
completeness = min(1.0, actual_sections / expected_sections)
score += completeness * 0.2
else:
score += 0.2 # Default if no specific structure expected
return min(1.0, score)
def _update_performance_metrics(self, result: TemplateAnalysisResult, processing_time: float) -> None:
"""Update agent performance metrics."""
self._execution_count += 1
self._total_processing_time += processing_time
# Update average confidence (exponential moving average)
alpha = 0.2
if self._execution_count == 1:
self._average_confidence = result.confidence_score
else:
self._average_confidence = (
alpha * result.confidence_score +
(1 - alpha) * self._average_confidence
)
self._last_execution = datetime.utcnow()
def get_performance_metrics(self) -> Dict[str, Any]:
"""Get comprehensive performance metrics for this agent."""
avg_processing_time = (
self._total_processing_time / max(self._execution_count, 1)
)
return {
"agent_id": self.agent_id,
"template_id": self.template.id,
"template_name": self.template.name,
"execution_count": self._execution_count,
"total_processing_time": self._total_processing_time,
"average_processing_time": avg_processing_time,
"average_confidence": self._average_confidence,
"last_execution": self._last_execution.isoformat() if self._last_execution else None,
"uptime_seconds": (
(datetime.utcnow() - self._start_time).total_seconds()
if self._start_time else 0
)
}
async def validate_input(self, state: AgentState, context: AgentContext) -> bool:
"""Validate input before execution."""
# Check for required content
content = state.get("content") or state.get("transcript", "")
if not content or len(content.strip()) < 50:
logger.warning(f"Agent {self.agent_id}: Insufficient content for analysis")
return False
# Check template is still active
if not self.template.is_active:
logger.warning(f"Agent {self.agent_id}: Template {self.template.id} is inactive")
return False
# Check cost limits if configured
if hasattr(self.config, 'cost_limit') and self.config.cost_limit:
estimated_cost = len(content) * 0.00001 # Rough estimate
if estimated_cost > self.config.cost_limit:
logger.warning(f"Agent {self.agent_id}: Estimated cost exceeds limit")
return False
return True
async def handle_error(self, error: Exception, state: AgentState, context: AgentContext) -> AgentState:
"""Handle errors during execution with template-specific context."""
logger.error(f"Error in agent {self.agent_id} (template: {self.template.id}): {str(error)}")
state["error"] = {
"agent_id": self.agent_id,
"template_id": self.template.id,
"error_type": type(error).__name__,
"error_message": str(error),
"timestamp": datetime.utcnow().isoformat(),
"template_name": self.template.name
}
state["status"] = "error"
# Update execution metadata
state["execution_metadata"] = state.get("execution_metadata", {})
state["execution_metadata"][self.agent_id] = {
"status": "error",
"error": str(error),
"timestamp": datetime.utcnow().isoformat()
}
return state
def __repr__(self) -> str:
"""String representation of the unified agent."""
return f"<UnifiedAnalysisAgent(id={self.agent_id}, template={self.template.id}, name={self.template.name})>"