youtube-summarizer/backend/services/analysis_agents.py

706 lines
28 KiB
Python

"""Specialized analysis agents for multi-video and multi-agent analysis."""
import asyncio
import logging
from typing import Dict, List, Optional, Any
from datetime import datetime
from dataclasses import dataclass
from pydantic import BaseModel
# Import from the AI ecosystem if available
try:
from ...src.agents.ecosystem.core.base_agent import BaseAgent, AgentMetadata, AgentConfig
from ...src.agents.ecosystem.core.agent_state import AgentState, AgentContext
ECOSYSTEM_AVAILABLE = True
except ImportError:
# Fallback to basic implementation if ecosystem not available
ECOSYSTEM_AVAILABLE = False
class BaseAgent:
def __init__(self, metadata, config=None):
self.metadata = metadata
self.config = config or {}
self.agent_id = metadata.agent_id
self.name = metadata.name
class AgentMetadata:
def __init__(self, agent_id: str, name: str, description: str, category: str, capabilities: List[str]):
self.agent_id = agent_id
self.name = name
self.description = description
self.category = category
self.capabilities = capabilities
class AgentConfig:
def __init__(self, **kwargs):
for key, value in kwargs.items():
setattr(self, key, value)
AgentState = Dict[str, Any]
class AgentContext:
def __init__(self, **kwargs):
for key, value in kwargs.items():
setattr(self, key, value)
from .deepseek_service import DeepSeekService
from ..core.exceptions import ServiceError
logger = logging.getLogger(__name__)
# Analysis Result Models
class AnalysisResult(BaseModel):
"""Result from a single analysis agent."""
agent_type: str
summary: str
key_insights: List[str]
focus_areas: List[str]
recommendations: List[str]
confidence_score: float
processing_time_seconds: float
raw_response: Optional[str] = None
class MultiAgentAnalysisResult(BaseModel):
"""Combined result from multiple analysis agents."""
video_id: str
video_title: str
agent_analyses: List[AnalysisResult]
synthesis_summary: str
unified_insights: List[str]
cross_perspective_connections: List[str]
total_processing_time: float
quality_score: float
# Specialized Analysis Agents
class TechnicalAnalysisAgent(BaseAgent):
"""Agent specialized in technical analysis of video content."""
def __init__(self, ai_service: Optional[DeepSeekService] = None):
"""Initialize technical analysis agent."""
metadata = AgentMetadata(
agent_id="technical_analyst",
name="Technical Analysis Agent",
description="Analyzes technical concepts, implementations, tools, and architectures",
category="analysis",
capabilities=[
"technical_analysis", "code_review", "architecture_analysis",
"tool_evaluation", "implementation_patterns", "technical_insights"
]
)
config = AgentConfig(
temperature=0.3, # Lower temperature for more consistent technical analysis
max_tokens=1500,
focus_areas=[
"technical_concepts", "tools_and_technologies", "implementation_details",
"architecture_patterns", "best_practices", "performance_optimization"
]
)
super().__init__(metadata, config)
self.ai_service = ai_service or DeepSeekService()
async def execute(self, state: AgentState, context: AgentContext) -> AgentState:
"""Execute technical analysis."""
transcript = state.get("transcript", "")
video_title = state.get("video_title", "")
if not transcript:
raise ServiceError("No transcript provided for technical analysis")
start_time = datetime.now()
# Technical analysis prompt
prompt = f"""
You are a Technical Analysis Agent specializing in analyzing technical concepts,
implementation details, tools, technologies, and architectural patterns.
Video Title: {video_title}
Focus areas:
- Technical concepts and methodologies explained
- Tools, frameworks, and technologies mentioned
- Implementation approaches and best practices
- Code examples and technical demonstrations
- System architecture and design patterns
- Performance considerations and optimizations
- Technical challenges and solutions presented
Analyze the following transcript from a technical perspective:
{transcript[:8000]}
Provide your analysis in JSON format:
{{
"summary": "Technical overview in 2-3 paragraphs focusing on implementation and architecture",
"key_insights": ["List of 5-8 specific technical insights and takeaways"],
"focus_areas": ["Primary technical topics covered"],
"recommendations": ["3-5 actionable technical recommendations"],
"confidence_score": 0.85
}}
"""
try:
response = await self.ai_service.generate_response(
prompt=prompt,
temperature=self.config.temperature,
max_tokens=self.config.max_tokens
)
processing_time = (datetime.now() - start_time).total_seconds()
# Parse response
analysis_data = self._parse_response(response)
# Create result
result = AnalysisResult(
agent_type="technical",
summary=analysis_data.get("summary", ""),
key_insights=analysis_data.get("key_insights", []),
focus_areas=analysis_data.get("focus_areas", self.config.focus_areas),
recommendations=analysis_data.get("recommendations", []),
confidence_score=analysis_data.get("confidence_score", 0.7),
processing_time_seconds=processing_time,
raw_response=response
)
# Update state
state["technical_analysis"] = result.dict()
state["status"] = "completed"
return state
except Exception as e:
logger.error(f"Technical analysis failed: {e}")
state["error"] = str(e)
state["status"] = "error"
return state
def _parse_response(self, response: str) -> Dict[str, Any]:
"""Parse AI response into structured data."""
try:
import json
# Try JSON parsing first
if response.strip().startswith('{'):
return json.loads(response)
elif '```json' in response:
start = response.find('```json') + 7
end = response.find('```', start)
json_str = response[start:end].strip()
return json.loads(json_str)
except:
pass
# Fallback text parsing
return {
"summary": response[:500] if response else "Technical analysis failed",
"key_insights": ["Technical analysis could not be completed"],
"focus_areas": self.config.focus_areas,
"recommendations": ["Retry analysis with improved input"],
"confidence_score": 0.3
}
class BusinessAnalysisAgent(BaseAgent):
"""Agent specialized in business value and strategic analysis."""
def __init__(self, ai_service: Optional[DeepSeekService] = None):
"""Initialize business analysis agent."""
metadata = AgentMetadata(
agent_id="business_analyst",
name="Business Analysis Agent",
description="Analyzes business value, market implications, and strategic insights",
category="analysis",
capabilities=[
"business_analysis", "roi_analysis", "market_research",
"strategic_planning", "competitive_analysis", "value_assessment"
]
)
config = AgentConfig(
temperature=0.4,
max_tokens=1500,
focus_areas=[
"business_value", "market_implications", "roi_analysis",
"strategic_insights", "competitive_advantages", "risk_assessment"
]
)
super().__init__(metadata, config)
self.ai_service = ai_service or DeepSeekService()
async def execute(self, state: AgentState, context: AgentContext) -> AgentState:
"""Execute business analysis."""
transcript = state.get("transcript", "")
video_title = state.get("video_title", "")
if not transcript:
raise ServiceError("No transcript provided for business analysis")
start_time = datetime.now()
# Business analysis prompt
prompt = f"""
You are a Business Analysis Agent specializing in analyzing business value,
market implications, ROI considerations, and strategic insights.
Video Title: {video_title}
Focus areas:
- Business value propositions and ROI implications
- Market opportunities and competitive advantages
- Strategic decision-making insights
- Cost-benefit analysis and resource allocation
- Revenue generation potential and business models
- Risk assessment and mitigation strategies
- Stakeholder impact and organizational benefits
Analyze the following transcript from a business perspective:
{transcript[:8000]}
Provide your analysis in JSON format:
{{
"summary": "Business-focused overview in 2-3 paragraphs emphasizing value and strategy",
"key_insights": ["List of 5-8 specific business insights and opportunities"],
"focus_areas": ["Primary business topics and value propositions"],
"recommendations": ["3-5 actionable business recommendations"],
"confidence_score": 0.85
}}
"""
try:
response = await self.ai_service.generate_response(
prompt=prompt,
temperature=self.config.temperature,
max_tokens=self.config.max_tokens
)
processing_time = (datetime.now() - start_time).total_seconds()
# Parse response
analysis_data = self._parse_response(response)
# Create result
result = AnalysisResult(
agent_type="business",
summary=analysis_data.get("summary", ""),
key_insights=analysis_data.get("key_insights", []),
focus_areas=analysis_data.get("focus_areas", self.config.focus_areas),
recommendations=analysis_data.get("recommendations", []),
confidence_score=analysis_data.get("confidence_score", 0.7),
processing_time_seconds=processing_time,
raw_response=response
)
# Update state
state["business_analysis"] = result.dict()
state["status"] = "completed"
return state
except Exception as e:
logger.error(f"Business analysis failed: {e}")
state["error"] = str(e)
state["status"] = "error"
return state
def _parse_response(self, response: str) -> Dict[str, Any]:
"""Parse AI response into structured data."""
try:
import json
if response.strip().startswith('{'):
return json.loads(response)
elif '```json' in response:
start = response.find('```json') + 7
end = response.find('```', start)
json_str = response[start:end].strip()
return json.loads(json_str)
except:
pass
return {
"summary": response[:500] if response else "Business analysis failed",
"key_insights": ["Business analysis could not be completed"],
"focus_areas": self.config.focus_areas,
"recommendations": ["Retry analysis with improved input"],
"confidence_score": 0.3
}
class UserExperienceAnalysisAgent(BaseAgent):
"""Agent specialized in user experience and usability analysis."""
def __init__(self, ai_service: Optional[DeepSeekService] = None):
"""Initialize UX analysis agent."""
metadata = AgentMetadata(
agent_id="ux_analyst",
name="User Experience Analysis Agent",
description="Analyzes user journey, usability, and accessibility aspects",
category="analysis",
capabilities=[
"ux_analysis", "usability_assessment", "accessibility_review",
"user_journey_mapping", "interaction_design", "user_research"
]
)
config = AgentConfig(
temperature=0.4,
max_tokens=1500,
focus_areas=[
"user_journey", "usability_principles", "accessibility_features",
"user_engagement", "pain_point_analysis", "experience_optimization"
]
)
super().__init__(metadata, config)
self.ai_service = ai_service or DeepSeekService()
async def execute(self, state: AgentState, context: AgentContext) -> AgentState:
"""Execute UX analysis."""
transcript = state.get("transcript", "")
video_title = state.get("video_title", "")
if not transcript:
raise ServiceError("No transcript provided for UX analysis")
start_time = datetime.now()
# UX analysis prompt
prompt = f"""
You are a User Experience Analysis Agent specializing in analyzing user journey,
usability, accessibility, and overall user experience aspects.
Video Title: {video_title}
Focus areas:
- User journey and experience flow
- Usability principles and interface design
- Accessibility considerations and inclusive design
- User engagement patterns and behavior
- Pain points and friction areas identified
- User satisfaction and experience optimization
- Design principles and user-centered approaches
Analyze the following transcript from a UX perspective:
{transcript[:8000]}
Provide your analysis in JSON format:
{{
"summary": "UX-focused overview in 2-3 paragraphs emphasizing user experience and design",
"key_insights": ["List of 5-8 specific UX insights and user experience findings"],
"focus_areas": ["Primary UX topics and user experience areas"],
"recommendations": ["3-5 actionable UX improvements and recommendations"],
"confidence_score": 0.85
}}
"""
try:
response = await self.ai_service.generate_response(
prompt=prompt,
temperature=self.config.temperature,
max_tokens=self.config.max_tokens
)
processing_time = (datetime.now() - start_time).total_seconds()
# Parse response
analysis_data = self._parse_response(response)
# Create result
result = AnalysisResult(
agent_type="user_experience",
summary=analysis_data.get("summary", ""),
key_insights=analysis_data.get("key_insights", []),
focus_areas=analysis_data.get("focus_areas", self.config.focus_areas),
recommendations=analysis_data.get("recommendations", []),
confidence_score=analysis_data.get("confidence_score", 0.7),
processing_time_seconds=processing_time,
raw_response=response
)
# Update state
state["ux_analysis"] = result.dict()
state["status"] = "completed"
return state
except Exception as e:
logger.error(f"UX analysis failed: {e}")
state["error"] = str(e)
state["status"] = "error"
return state
def _parse_response(self, response: str) -> Dict[str, Any]:
"""Parse AI response into structured data."""
try:
import json
if response.strip().startswith('{'):
return json.loads(response)
elif '```json' in response:
start = response.find('```json') + 7
end = response.find('```', start)
json_str = response[start:end].strip()
return json.loads(json_str)
except:
pass
return {
"summary": response[:500] if response else "UX analysis failed",
"key_insights": ["UX analysis could not be completed"],
"focus_areas": self.config.focus_areas,
"recommendations": ["Retry analysis with improved input"],
"confidence_score": 0.3
}
class SynthesisAgent(BaseAgent):
"""Agent that synthesizes insights from multiple analysis perspectives."""
def __init__(self, ai_service: Optional[DeepSeekService] = None):
"""Initialize synthesis agent."""
metadata = AgentMetadata(
agent_id="synthesis_agent",
name="Multi-Perspective Synthesis Agent",
description="Combines insights from multiple analysis perspectives into unified understanding",
category="synthesis",
capabilities=[
"multi_perspective_synthesis", "insight_integration", "conflict_resolution",
"holistic_analysis", "cross_domain_connections", "unified_recommendations"
]
)
config = AgentConfig(
temperature=0.5, # Slightly higher for creative synthesis
max_tokens=2000,
focus_areas=[
"cross_perspective_synthesis", "insight_integration", "conflict_resolution",
"holistic_understanding", "unified_recommendations"
]
)
super().__init__(metadata, config)
self.ai_service = ai_service or DeepSeekService()
async def execute(self, state: AgentState, context: AgentContext) -> AgentState:
"""Execute synthesis of multiple perspectives."""
# Get analysis results from state
technical_analysis = state.get("technical_analysis")
business_analysis = state.get("business_analysis")
ux_analysis = state.get("ux_analysis")
video_title = state.get("video_title", "")
# Ensure we have analyses to synthesize
analyses = []
if technical_analysis:
analyses.append(("Technical", technical_analysis))
if business_analysis:
analyses.append(("Business", business_analysis))
if ux_analysis:
analyses.append(("User Experience", ux_analysis))
if not analyses:
state["error"] = "No analysis results available for synthesis"
state["status"] = "error"
return state
start_time = datetime.now()
# Build synthesis prompt
perspectives_text = []
for perspective_name, analysis in analyses:
text = f"""
{perspective_name} Perspective:
Summary: {analysis.get('summary', '')}
Key Insights: {', '.join(analysis.get('key_insights', [])[:5])}
Recommendations: {', '.join(analysis.get('recommendations', [])[:3])}
"""
perspectives_text.append(text)
prompt = f"""
You are a Multi-Perspective Synthesis Agent responsible for combining insights from
Technical, Business, and User Experience analysis agents into a unified, comprehensive summary.
Video Title: {video_title}
Your role:
- Synthesize insights from all provided perspective analyses
- Identify connections and relationships between different viewpoints
- Resolve any conflicts or contradictions between perspectives
- Create a holistic understanding that incorporates all viewpoints
- Highlight the most significant insights across all perspectives
- Provide unified recommendations that consider technical, business, and UX factors
Perspective Analyses to Synthesize:
{''.join(perspectives_text)}
Provide your synthesis in JSON format:
{{
"summary": "Comprehensive synthesis in 3-4 paragraphs integrating all perspectives",
"unified_insights": ["List of 8-12 most significant insights across all perspectives"],
"cross_perspective_connections": ["Key relationships between technical, business, and UX aspects"],
"recommendations": ["5-7 unified recommendations considering all perspectives"],
"confidence_score": 0.90
}}
"""
try:
response = await self.ai_service.generate_response(
prompt=prompt,
temperature=self.config.temperature,
max_tokens=self.config.max_tokens
)
processing_time = (datetime.now() - start_time).total_seconds()
# Parse response
synthesis_data = self._parse_response(response)
# Update state with synthesis results
state["synthesis_summary"] = synthesis_data.get("summary", "")
state["unified_insights"] = synthesis_data.get("unified_insights", [])
state["cross_perspective_connections"] = synthesis_data.get("cross_perspective_connections", [])
state["synthesis_recommendations"] = synthesis_data.get("recommendations", [])
state["synthesis_confidence"] = synthesis_data.get("confidence_score", 0.8)
state["synthesis_processing_time"] = processing_time
state["status"] = "completed"
return state
except Exception as e:
logger.error(f"Synthesis failed: {e}")
state["error"] = str(e)
state["status"] = "error"
return state
def _parse_response(self, response: str) -> Dict[str, Any]:
"""Parse AI response into structured data."""
try:
import json
if response.strip().startswith('{'):
return json.loads(response)
elif '```json' in response:
start = response.find('```json') + 7
end = response.find('```', start)
json_str = response[start:end].strip()
return json.loads(json_str)
except:
pass
return {
"summary": response[:800] if response else "Synthesis failed",
"unified_insights": ["Synthesis could not be completed"],
"cross_perspective_connections": ["Unable to identify connections"],
"recommendations": ["Retry synthesis with improved analysis results"],
"confidence_score": 0.3
}
# Multi-Agent Analysis Orchestrator
class MultiAgentAnalysisOrchestrator:
"""Orchestrates multi-agent analysis of video content."""
def __init__(self, ai_service: Optional[DeepSeekService] = None):
"""Initialize the orchestrator."""
self.ai_service = ai_service or DeepSeekService()
# Initialize agents
self.technical_agent = TechnicalAnalysisAgent(self.ai_service)
self.business_agent = BusinessAnalysisAgent(self.ai_service)
self.ux_agent = UserExperienceAnalysisAgent(self.ai_service)
self.synthesis_agent = SynthesisAgent(self.ai_service)
logger.info("Multi-agent analysis orchestrator initialized")
async def analyze_video(
self,
transcript: str,
video_id: str,
video_title: str = "",
agent_types: Optional[List[str]] = None
) -> MultiAgentAnalysisResult:
"""Analyze video with multiple agents."""
if not transcript or len(transcript.strip()) < 50:
raise ServiceError("Transcript too short for multi-agent analysis")
# Default to all agent types
if agent_types is None:
agent_types = ["technical", "business", "user_experience"]
total_start_time = datetime.now()
# Create initial state
state: AgentState = {
"transcript": transcript,
"video_id": video_id,
"video_title": video_title,
"status": "initialized"
}
# Create context
context = AgentContext()
# Run analysis agents in parallel
analysis_tasks = []
if "technical" in agent_types:
analysis_tasks.append(("technical", self.technical_agent.execute(state.copy(), context)))
if "business" in agent_types:
analysis_tasks.append(("business", self.business_agent.execute(state.copy(), context)))
if "user_experience" in agent_types:
analysis_tasks.append(("ux", self.ux_agent.execute(state.copy(), context)))
# Execute analyses in parallel
logger.info(f"Starting {len(analysis_tasks)} analysis agents for video {video_id}")
analysis_results = await asyncio.gather(*[task[1] for task in analysis_tasks], return_exceptions=True)
# Process results
successful_analyses = []
merged_state = state.copy()
for (agent_type, _), result in zip(analysis_tasks, analysis_results):
if isinstance(result, Exception):
logger.error(f"Error in {agent_type} analysis: {result}")
continue
if result.get("status") == "completed":
successful_analyses.append(agent_type)
# Merge results into state
merged_state.update(result)
if not successful_analyses:
raise ServiceError("All analysis agents failed")
# Run synthesis agent
logger.info("Running synthesis agent")
synthesis_result = await self.synthesis_agent.execute(merged_state, context)
# Calculate total processing time
total_processing_time = (datetime.now() - total_start_time).total_seconds()
# Extract individual analysis results
agent_analyses = []
for agent_type in successful_analyses:
if agent_type == "technical" and "technical_analysis" in merged_state:
agent_analyses.append(AnalysisResult(**merged_state["technical_analysis"]))
elif agent_type == "business" and "business_analysis" in merged_state:
agent_analyses.append(AnalysisResult(**merged_state["business_analysis"]))
elif agent_type == "ux" and "ux_analysis" in merged_state:
agent_analyses.append(AnalysisResult(**merged_state["ux_analysis"]))
# Calculate quality score
avg_confidence = sum(a.confidence_score for a in agent_analyses) / len(agent_analyses) if agent_analyses else 0.5
synthesis_confidence = synthesis_result.get("synthesis_confidence", 0.5)
quality_score = (avg_confidence * 0.7) + (synthesis_confidence * 0.3)
# Create final result
result = MultiAgentAnalysisResult(
video_id=video_id,
video_title=video_title,
agent_analyses=agent_analyses,
synthesis_summary=synthesis_result.get("synthesis_summary", ""),
unified_insights=synthesis_result.get("unified_insights", []),
cross_perspective_connections=synthesis_result.get("cross_perspective_connections", []),
total_processing_time=total_processing_time,
quality_score=quality_score
)
logger.info(f"Multi-agent analysis completed for video {video_id} in {total_processing_time:.2f}s")
return result