444 lines
17 KiB
Python
444 lines
17 KiB
Python
"""Unified Analysis Agent - Template-driven multi-perspective analysis agent."""
|
|
|
|
import logging
|
|
import asyncio
|
|
from typing import Dict, List, Optional, Any, Union
|
|
from datetime import datetime
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
# Import BaseAgent pattern from local implementation
|
|
from ..core.base_agent import (
|
|
BaseAgent, AgentMetadata, AgentConfig, AgentState, AgentContext, TaskResult
|
|
)
|
|
|
|
from ..models.analysis_templates import AnalysisTemplate, TemplateRegistry
|
|
from ..services.deepseek_service import DeepSeekService
|
|
from ..services.template_driven_agent import TemplateAnalysisRequest, TemplateAnalysisResult
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class UnifiedAgentConfig(AgentConfig):
|
|
"""Extended configuration for unified analysis agents."""
|
|
|
|
template_id: str = Field(..., description="Template ID for this agent instance")
|
|
ai_service_config: Dict[str, Any] = Field(default_factory=dict, description="AI service configuration")
|
|
cost_limit: Optional[float] = Field(None, description="Cost limit for AI operations")
|
|
quality_threshold: float = Field(default=0.7, description="Minimum quality threshold for results")
|
|
|
|
|
|
class UnifiedAnalysisAgent(BaseAgent):
|
|
"""
|
|
Unified analysis agent that uses templates to determine behavior dynamically.
|
|
|
|
This agent can function as:
|
|
- Educational perspective (Beginner/Expert/Scholarly)
|
|
- Domain perspective (Technical/Business/UX)
|
|
- Any custom perspective defined via templates
|
|
|
|
Key features:
|
|
- Template-driven behavior switching
|
|
- Automatic capability registration
|
|
- LangGraph state management compatibility
|
|
- Performance metrics and health monitoring
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
template: AnalysisTemplate,
|
|
ai_service: Optional[DeepSeekService] = None,
|
|
template_registry: Optional[TemplateRegistry] = None,
|
|
config: Optional[UnifiedAgentConfig] = None
|
|
):
|
|
"""Initialize the unified analysis agent.
|
|
|
|
Args:
|
|
template: Analysis template defining agent behavior
|
|
ai_service: AI service for content processing
|
|
template_registry: Registry for template lookups
|
|
config: Agent configuration
|
|
"""
|
|
# Create agent metadata from template
|
|
metadata = AgentMetadata(
|
|
agent_id=f"unified_{template.id}",
|
|
name=template.name,
|
|
description=template.description,
|
|
category=template.template_type.value,
|
|
capabilities=self._generate_capabilities_from_template(template)
|
|
)
|
|
|
|
# Use provided config or create from template
|
|
if config is None:
|
|
config = UnifiedAgentConfig(
|
|
template_id=template.id,
|
|
temperature=0.7, # Default for analysis tasks
|
|
memory_enabled=True
|
|
)
|
|
|
|
super().__init__(metadata, config)
|
|
|
|
self.template = template
|
|
self.ai_service = ai_service or DeepSeekService()
|
|
self.template_registry = template_registry
|
|
|
|
# Performance tracking
|
|
self._execution_count = 0
|
|
self._total_processing_time = 0.0
|
|
self._average_confidence = 0.0
|
|
self._last_execution: Optional[datetime] = None
|
|
|
|
logger.info(f"Initialized UnifiedAnalysisAgent: {self.agent_id} ({template.name})")
|
|
|
|
@classmethod
|
|
def from_template_id(
|
|
cls,
|
|
template_id: str,
|
|
template_registry: TemplateRegistry,
|
|
ai_service: Optional[DeepSeekService] = None,
|
|
config: Optional[UnifiedAgentConfig] = None
|
|
) -> "UnifiedAnalysisAgent":
|
|
"""Create agent from template ID.
|
|
|
|
Args:
|
|
template_id: ID of template to use
|
|
template_registry: Registry containing the template
|
|
ai_service: AI service instance
|
|
config: Agent configuration
|
|
|
|
Returns:
|
|
Configured UnifiedAnalysisAgent
|
|
|
|
Raises:
|
|
ValueError: If template not found or inactive
|
|
"""
|
|
template = template_registry.get_template(template_id)
|
|
if not template:
|
|
raise ValueError(f"Template not found: {template_id}")
|
|
|
|
if not template.is_active:
|
|
raise ValueError(f"Template is inactive: {template_id}")
|
|
|
|
return cls(template, ai_service, template_registry, config)
|
|
|
|
def _generate_capabilities_from_template(self, template: AnalysisTemplate) -> List[str]:
|
|
"""Generate agent capabilities based on template configuration."""
|
|
capabilities = [
|
|
"content_analysis",
|
|
"text_processing",
|
|
f"{template.template_type.value}_perspective",
|
|
"ai_summarization"
|
|
]
|
|
|
|
# Add complexity-specific capabilities
|
|
if template.complexity_level:
|
|
capabilities.append(f"{template.complexity_level.value}_analysis")
|
|
|
|
# Add focus-area capabilities
|
|
for focus in template.analysis_focus:
|
|
# Convert focus to capability format
|
|
capability = focus.lower().replace(" ", "_").replace("-", "_")
|
|
capabilities.append(f"analysis_{capability}")
|
|
|
|
# Add template-specific capabilities
|
|
if template.include_examples:
|
|
capabilities.append("example_generation")
|
|
|
|
if template.include_recommendations:
|
|
capabilities.append("recommendation_generation")
|
|
|
|
return list(set(capabilities)) # Remove duplicates
|
|
|
|
async def execute(self, state: AgentState, context: AgentContext) -> AgentState:
|
|
"""Execute analysis using the agent's template configuration.
|
|
|
|
Args:
|
|
state: Current LangGraph state
|
|
context: Execution context
|
|
|
|
Returns:
|
|
Updated state with analysis results
|
|
"""
|
|
try:
|
|
start_time = datetime.utcnow()
|
|
|
|
# Extract content to analyze from state
|
|
content = state.get("content") or state.get("transcript", "")
|
|
if not content:
|
|
raise ValueError("No content provided for analysis")
|
|
|
|
# Get additional context from state
|
|
video_id = state.get("video_id")
|
|
analysis_context = state.get("context", {})
|
|
|
|
# Create template analysis request
|
|
request = TemplateAnalysisRequest(
|
|
content=content,
|
|
template_id=self.template.id,
|
|
context=analysis_context,
|
|
video_id=video_id
|
|
)
|
|
|
|
# Perform template-driven analysis
|
|
result = await self._execute_template_analysis(request)
|
|
|
|
# Update performance metrics
|
|
processing_time = (datetime.utcnow() - start_time).total_seconds()
|
|
self._update_performance_metrics(result, processing_time)
|
|
|
|
# Update agent state with results
|
|
agent_key = f"agent_{self.template.id}"
|
|
state[agent_key] = {
|
|
"agent_id": self.agent_id,
|
|
"template_id": self.template.id,
|
|
"template_name": self.template.name,
|
|
"result": result.dict(),
|
|
"processing_time": processing_time,
|
|
"timestamp": start_time.isoformat()
|
|
}
|
|
|
|
# Update execution metadata
|
|
state["execution_metadata"] = state.get("execution_metadata", {})
|
|
state["execution_metadata"][self.agent_id] = {
|
|
"status": "completed",
|
|
"confidence": result.confidence_score,
|
|
"insights_count": len(result.key_insights),
|
|
"processing_time": processing_time
|
|
}
|
|
|
|
logger.info(f"Agent {self.agent_id} completed analysis in {processing_time:.2f}s")
|
|
return state
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in agent {self.agent_id} execution: {e}")
|
|
return await self.handle_error(e, state, context)
|
|
|
|
async def _execute_template_analysis(self, request: TemplateAnalysisRequest) -> TemplateAnalysisResult:
|
|
"""Execute template-driven analysis using the template-driven agent pattern."""
|
|
start_time = datetime.utcnow()
|
|
|
|
# Prepare context with template variables
|
|
analysis_context = {
|
|
**self.template.variables,
|
|
**request.context,
|
|
"content": request.content,
|
|
"video_id": request.video_id or "unknown"
|
|
}
|
|
|
|
# Render system prompt with context
|
|
system_prompt = self.template.render_prompt(analysis_context)
|
|
|
|
# Create analysis prompt
|
|
analysis_prompt = self._create_analysis_prompt(request.content, analysis_context)
|
|
|
|
# Generate analysis using AI service
|
|
ai_response = await self.ai_service.generate_summary({
|
|
"prompt": analysis_prompt,
|
|
"system_prompt": system_prompt,
|
|
"max_tokens": 2000,
|
|
"temperature": getattr(self.config, 'temperature', 0.7)
|
|
})
|
|
|
|
# Extract insights from response
|
|
key_insights = self._extract_insights(ai_response)
|
|
|
|
# Calculate confidence score
|
|
confidence_score = self._calculate_confidence_score(ai_response)
|
|
|
|
# Calculate processing time
|
|
processing_time = (datetime.utcnow() - start_time).total_seconds()
|
|
|
|
return TemplateAnalysisResult(
|
|
template_id=self.template.id,
|
|
template_name=self.template.name,
|
|
analysis=ai_response,
|
|
key_insights=key_insights,
|
|
confidence_score=confidence_score,
|
|
processing_time_seconds=processing_time,
|
|
context_used=analysis_context,
|
|
template_variables=self.template.variables
|
|
)
|
|
|
|
def _create_analysis_prompt(self, content: str, context: Dict[str, Any]) -> str:
|
|
"""Create the analysis prompt for the AI service."""
|
|
return f"""
|
|
Please analyze the following content using the specified approach:
|
|
|
|
{content}
|
|
|
|
Analysis Instructions:
|
|
- Follow the output format specified in the template
|
|
- Generate between {self.template.min_insights} and {self.template.max_insights} key insights
|
|
- Target audience: {self.template.target_audience}
|
|
- Tone: {self.template.tone}
|
|
- Depth: {self.template.depth}
|
|
- Focus areas: {', '.join(self.template.analysis_focus)}
|
|
|
|
{'Include relevant examples and analogies.' if self.template.include_examples else ''}
|
|
{'Provide actionable recommendations.' if self.template.include_recommendations else ''}
|
|
|
|
Expected Output Format:
|
|
{self.template.output_format}
|
|
"""
|
|
|
|
def _extract_insights(self, response: str) -> List[str]:
|
|
"""Extract key insights from the AI response."""
|
|
insights = []
|
|
|
|
# Parse structured insights from response
|
|
lines = response.split('\n')
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
# Look for insight markers
|
|
if line.startswith('-') or line.startswith('•') or line.startswith('*'):
|
|
insight = line[1:].strip()
|
|
if len(insight) > 10: # Filter out very short items
|
|
insights.append(insight)
|
|
elif any(numbered in line for numbered in ['1.', '2.', '3.', '4.', '5.']):
|
|
# Handle numbered lists
|
|
if '. ' in line:
|
|
insight = line.split('. ', 1)[1].strip()
|
|
if len(insight) > 10:
|
|
insights.append(insight)
|
|
|
|
# Ensure we have the right number of insights
|
|
if len(insights) < self.template.min_insights:
|
|
# Extract additional insights from content
|
|
sentences = response.split('.')
|
|
for sentence in sentences:
|
|
sentence = sentence.strip()
|
|
if len(sentence) > 20 and any(keyword in sentence.lower() for keyword in
|
|
['important', 'key', 'significant', 'notable', 'crucial', 'essential']):
|
|
if sentence not in insights and len(insights) < self.template.max_insights:
|
|
insights.append(sentence)
|
|
|
|
# Trim to max insights if needed
|
|
if len(insights) > self.template.max_insights:
|
|
insights = insights[:self.template.max_insights]
|
|
|
|
return insights
|
|
|
|
def _calculate_confidence_score(self, response: str) -> float:
|
|
"""Calculate confidence score based on response quality."""
|
|
score = 0.0
|
|
|
|
# Length score (20%)
|
|
if len(response) > 200:
|
|
score += 0.2
|
|
elif len(response) > 100:
|
|
score += 0.1
|
|
|
|
# Structure score (30%)
|
|
if "##" in response or "**" in response: # Has formatting
|
|
score += 0.15
|
|
if any(marker in response for marker in ['-', '•', '*', '1.']): # Has lists
|
|
score += 0.15
|
|
|
|
# Content quality score (30%)
|
|
focus_matches = sum(1 for focus in self.template.analysis_focus
|
|
if any(word.lower() in response.lower()
|
|
for word in focus.split()))
|
|
score += min(0.3, focus_matches * 0.1)
|
|
|
|
# Completeness score (20%)
|
|
expected_sections = self.template.output_format.count('##')
|
|
actual_sections = response.count('##')
|
|
if expected_sections > 0:
|
|
completeness = min(1.0, actual_sections / expected_sections)
|
|
score += completeness * 0.2
|
|
else:
|
|
score += 0.2 # Default if no specific structure expected
|
|
|
|
return min(1.0, score)
|
|
|
|
def _update_performance_metrics(self, result: TemplateAnalysisResult, processing_time: float) -> None:
|
|
"""Update agent performance metrics."""
|
|
self._execution_count += 1
|
|
self._total_processing_time += processing_time
|
|
|
|
# Update average confidence (exponential moving average)
|
|
alpha = 0.2
|
|
if self._execution_count == 1:
|
|
self._average_confidence = result.confidence_score
|
|
else:
|
|
self._average_confidence = (
|
|
alpha * result.confidence_score +
|
|
(1 - alpha) * self._average_confidence
|
|
)
|
|
|
|
self._last_execution = datetime.utcnow()
|
|
|
|
def get_performance_metrics(self) -> Dict[str, Any]:
|
|
"""Get comprehensive performance metrics for this agent."""
|
|
avg_processing_time = (
|
|
self._total_processing_time / max(self._execution_count, 1)
|
|
)
|
|
|
|
return {
|
|
"agent_id": self.agent_id,
|
|
"template_id": self.template.id,
|
|
"template_name": self.template.name,
|
|
"execution_count": self._execution_count,
|
|
"total_processing_time": self._total_processing_time,
|
|
"average_processing_time": avg_processing_time,
|
|
"average_confidence": self._average_confidence,
|
|
"last_execution": self._last_execution.isoformat() if self._last_execution else None,
|
|
"uptime_seconds": (
|
|
(datetime.utcnow() - self._start_time).total_seconds()
|
|
if self._start_time else 0
|
|
)
|
|
}
|
|
|
|
async def validate_input(self, state: AgentState, context: AgentContext) -> bool:
|
|
"""Validate input before execution."""
|
|
# Check for required content
|
|
content = state.get("content") or state.get("transcript", "")
|
|
if not content or len(content.strip()) < 50:
|
|
logger.warning(f"Agent {self.agent_id}: Insufficient content for analysis")
|
|
return False
|
|
|
|
# Check template is still active
|
|
if not self.template.is_active:
|
|
logger.warning(f"Agent {self.agent_id}: Template {self.template.id} is inactive")
|
|
return False
|
|
|
|
# Check cost limits if configured
|
|
if hasattr(self.config, 'cost_limit') and self.config.cost_limit:
|
|
estimated_cost = len(content) * 0.00001 # Rough estimate
|
|
if estimated_cost > self.config.cost_limit:
|
|
logger.warning(f"Agent {self.agent_id}: Estimated cost exceeds limit")
|
|
return False
|
|
|
|
return True
|
|
|
|
async def handle_error(self, error: Exception, state: AgentState, context: AgentContext) -> AgentState:
|
|
"""Handle errors during execution with template-specific context."""
|
|
logger.error(f"Error in agent {self.agent_id} (template: {self.template.id}): {str(error)}")
|
|
|
|
state["error"] = {
|
|
"agent_id": self.agent_id,
|
|
"template_id": self.template.id,
|
|
"error_type": type(error).__name__,
|
|
"error_message": str(error),
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"template_name": self.template.name
|
|
}
|
|
state["status"] = "error"
|
|
|
|
# Update execution metadata
|
|
state["execution_metadata"] = state.get("execution_metadata", {})
|
|
state["execution_metadata"][self.agent_id] = {
|
|
"status": "error",
|
|
"error": str(error),
|
|
"timestamp": datetime.utcnow().isoformat()
|
|
}
|
|
|
|
return state
|
|
|
|
def __repr__(self) -> str:
|
|
"""String representation of the unified agent."""
|
|
return f"<UnifiedAnalysisAgent(id={self.agent_id}, template={self.template.id}, name={self.template.name})>" |