youtube-summarizer/backend/services/multi_agent_service.py

609 lines
25 KiB
Python

"""Multi-agent summarization service for YouTube videos."""
import asyncio
import logging
from typing import Dict, List, Optional, Any
from datetime import datetime
from dataclasses import dataclass
from enum import Enum
from pydantic import BaseModel
from ..core.exceptions import ServiceError
from .deepseek_service import DeepSeekService
logger = logging.getLogger(__name__)
class AgentPerspective(str, Enum):
"""Different agent perspectives for analysis."""
TECHNICAL = "technical"
BUSINESS = "business"
USER_EXPERIENCE = "user"
SYNTHESIS = "synthesis"
@dataclass
class PerspectivePrompt:
"""Prompt configuration for each perspective."""
system_prompt: str
analysis_focus: List[str]
output_format: str
class PerspectiveAnalysis(BaseModel):
"""Result from a single perspective agent."""
agent_type: AgentPerspective
summary: str
key_insights: List[str]
confidence_score: float
focus_areas: List[str]
recommendations: List[str]
processing_time_seconds: float
class MultiAgentAnalysisResult(BaseModel):
"""Result from complete multi-agent analysis."""
video_id: str
perspectives: List[PerspectiveAnalysis]
synthesis_summary: str
unified_insights: List[str]
processing_time_seconds: float
quality_score: float
created_at: datetime
class MultiAgentSummarizerService:
"""Service for multi-agent video summarization using different perspectives."""
def __init__(self, ai_service: Optional[DeepSeekService] = None):
"""Initialize the multi-agent service.
Args:
ai_service: DeepSeek AI service instance
"""
self.ai_service = ai_service or DeepSeekService()
self.perspective_prompts = self._initialize_perspective_prompts()
def _initialize_perspective_prompts(self) -> Dict[AgentPerspective, PerspectivePrompt]:
"""Initialize prompt templates for each agent perspective."""
prompts = {
AgentPerspective.TECHNICAL: PerspectivePrompt(
system_prompt="""You are a Technical Analysis Agent specializing in analyzing technical concepts,
implementation details, tools, technologies, and architectural patterns mentioned in video content.
Focus on:
- Technical concepts and methodologies explained
- Tools, frameworks, and technologies mentioned
- Implementation approaches and best practices
- Code examples and technical demonstrations
- System architecture and design patterns
- Performance considerations and optimizations
- Technical challenges and solutions presented
Provide specific, actionable technical insights that would be valuable for developers and engineers.""",
analysis_focus=[
"technical_concepts", "tools_and_technologies", "implementation_details",
"architecture_patterns", "best_practices", "performance_optimization",
"code_examples", "technical_challenges"
],
output_format="""Provide your analysis in this JSON structure:
{
"summary": "Technical overview in 2-3 paragraphs focusing on implementation and architecture",
"key_insights": ["List of 5-8 specific technical insights and takeaways"],
"focus_areas": ["Primary technical topics covered"],
"recommendations": ["3-5 actionable technical recommendations"],
"confidence_score": 0.85
}"""
),
AgentPerspective.BUSINESS: PerspectivePrompt(
system_prompt="""You are a Business Analysis Agent specializing in analyzing business value,
market implications, ROI considerations, and strategic insights from video content.
Focus on:
- Business value propositions and ROI implications
- Market opportunities and competitive advantages
- Strategic decision-making insights
- Cost-benefit analysis and resource allocation
- Revenue generation potential and business models
- Risk assessment and mitigation strategies
- Stakeholder impact and organizational benefits
Provide actionable business insights suitable for executives and decision-makers.""",
analysis_focus=[
"business_value", "market_implications", "roi_analysis",
"strategic_insights", "competitive_advantages", "risk_assessment",
"revenue_potential", "stakeholder_impact"
],
output_format="""Provide your analysis in this JSON structure:
{
"summary": "Business-focused overview in 2-3 paragraphs emphasizing value and strategy",
"key_insights": ["List of 5-8 specific business insights and opportunities"],
"focus_areas": ["Primary business topics and value propositions"],
"recommendations": ["3-5 actionable business recommendations"],
"confidence_score": 0.85
}"""
),
AgentPerspective.USER_EXPERIENCE: PerspectivePrompt(
system_prompt="""You are a User Experience Analysis Agent specializing in analyzing user journey,
usability, accessibility, and overall user experience aspects from video content.
Focus on:
- User journey and experience flow
- Usability principles and interface design
- Accessibility considerations and inclusive design
- User engagement patterns and behavior
- Pain points and friction areas identified
- User satisfaction and experience optimization
- Design principles and user-centered approaches
Provide insights valuable for UX designers, product managers, and user advocates.""",
analysis_focus=[
"user_journey", "usability_principles", "accessibility_features",
"user_engagement", "pain_point_analysis", "experience_optimization",
"design_patterns", "user_satisfaction"
],
output_format="""Provide your analysis in this JSON structure:
{
"summary": "UX-focused overview in 2-3 paragraphs emphasizing user experience and design",
"key_insights": ["List of 5-8 specific UX insights and user experience findings"],
"focus_areas": ["Primary UX topics and user experience areas"],
"recommendations": ["3-5 actionable UX improvements and recommendations"],
"confidence_score": 0.85
}"""
),
AgentPerspective.SYNTHESIS: PerspectivePrompt(
system_prompt="""You are a Synthesis Agent responsible for combining insights from Technical,
Business, and User Experience analysis agents into a unified, comprehensive summary.
Your role:
- Synthesize insights from all three perspective analyses
- Identify connections and relationships between different viewpoints
- Resolve any conflicts or contradictions between perspectives
- Create a holistic understanding that incorporates all viewpoints
- Highlight the most significant insights across all perspectives
- Provide unified recommendations that consider technical, business, and UX factors
Create a comprehensive synthesis that would be valuable for cross-functional teams.""",
analysis_focus=[
"cross_perspective_synthesis", "insight_integration", "conflict_resolution",
"holistic_understanding", "unified_recommendations", "comprehensive_overview"
],
output_format="""Provide your synthesis in this JSON structure:
{
"summary": "Comprehensive synthesis in 3-4 paragraphs integrating all perspectives",
"unified_insights": ["List of 8-12 most significant insights across all perspectives"],
"cross_perspective_connections": ["Key relationships between technical, business, and UX aspects"],
"recommendations": ["5-7 unified recommendations considering all perspectives"],
"confidence_score": 0.90
}"""
)
}
return prompts
async def analyze_with_multiple_perspectives(
self,
transcript: str,
video_id: str,
video_title: str = "",
perspectives: Optional[List[AgentPerspective]] = None
) -> MultiAgentAnalysisResult:
"""Analyze video content using multiple agent perspectives.
Args:
transcript: Video transcript text
video_id: YouTube video ID
video_title: Video title for context
perspectives: List of perspectives to analyze (defaults to all except synthesis)
Returns:
Complete multi-agent analysis result
"""
if not transcript or len(transcript.strip()) < 50:
raise ServiceError("Transcript too short for multi-agent analysis")
# Default to all perspectives except synthesis (synthesis runs after others)
if perspectives is None:
perspectives = [
AgentPerspective.TECHNICAL,
AgentPerspective.BUSINESS,
AgentPerspective.USER_EXPERIENCE
]
start_time = datetime.now()
logger.info(f"Starting multi-agent analysis for video {video_id} with perspectives: {perspectives}")
try:
# Run perspective analyses in parallel
perspective_tasks = []
for perspective in perspectives:
task = self._analyze_perspective(transcript, video_id, video_title, perspective)
perspective_tasks.append(task)
# Wait for all perspective analyses to complete
perspective_results = await asyncio.gather(*perspective_tasks, return_exceptions=True)
# Process results and handle any exceptions
successful_analyses = []
for i, result in enumerate(perspective_results):
if isinstance(result, Exception):
logger.error(f"Error in {perspectives[i]} analysis: {result}")
continue
successful_analyses.append(result)
if not successful_analyses:
raise ServiceError("All perspective analyses failed")
# Run synthesis agent to combine all perspectives
synthesis_summary = await self._synthesize_perspectives(
successful_analyses, transcript, video_id, video_title
)
# Calculate total processing time
processing_time = (datetime.now() - start_time).total_seconds()
# Calculate overall quality score
quality_score = self._calculate_quality_score(successful_analyses)
# Extract unified insights from synthesis
unified_insights = self._extract_unified_insights(successful_analyses, synthesis_summary)
result = MultiAgentAnalysisResult(
video_id=video_id,
perspectives=successful_analyses,
synthesis_summary=synthesis_summary,
unified_insights=unified_insights,
processing_time_seconds=processing_time,
quality_score=quality_score,
created_at=start_time
)
logger.info(f"Multi-agent analysis completed for video {video_id} in {processing_time:.2f}s")
return result
except Exception as e:
logger.error(f"Error in multi-agent analysis for video {video_id}: {e}")
raise ServiceError(f"Multi-agent analysis failed: {str(e)}")
async def _analyze_perspective(
self,
transcript: str,
video_id: str,
video_title: str,
perspective: AgentPerspective
) -> PerspectiveAnalysis:
"""Analyze transcript from a specific perspective.
Args:
transcript: Video transcript
video_id: Video ID for context
video_title: Video title for context
perspective: Analysis perspective to use
Returns:
Analysis result from the specified perspective
"""
perspective_config = self.perspective_prompts[perspective]
start_time = datetime.now()
# Build context-aware prompt
context_prompt = f"""
Video Title: {video_title}
Video ID: {video_id}
Please analyze the following video transcript from a {perspective.value} perspective.
{perspective_config.system_prompt}
Transcript:
{transcript[:8000]} # Limit transcript length to avoid token limits
{perspective_config.output_format}
"""
try:
# Get AI analysis
response = await self.ai_service.generate_response(
prompt=context_prompt,
temperature=0.3, # Lower temperature for more consistent analysis
max_tokens=1500
)
processing_time = (datetime.now() - start_time).total_seconds()
# Parse AI response (attempt JSON parsing, fallback to text)
analysis_data = self._parse_ai_response(response, perspective)
# Create PerspectiveAnalysis object
return PerspectiveAnalysis(
agent_type=perspective,
summary=analysis_data.get("summary", ""),
key_insights=analysis_data.get("key_insights", []),
confidence_score=analysis_data.get("confidence_score", 0.7),
focus_areas=analysis_data.get("focus_areas", perspective_config.analysis_focus),
recommendations=analysis_data.get("recommendations", []),
processing_time_seconds=processing_time
)
except Exception as e:
logger.error(f"Error in {perspective.value} analysis: {e}")
# Return minimal analysis if AI call fails
processing_time = (datetime.now() - start_time).total_seconds()
return PerspectiveAnalysis(
agent_type=perspective,
summary=f"Analysis from {perspective.value} perspective failed due to technical error.",
key_insights=[f"Unable to complete {perspective.value} analysis"],
confidence_score=0.1,
focus_areas=perspective_config.analysis_focus,
recommendations=["Retry analysis with improved transcript quality"],
processing_time_seconds=processing_time
)
async def _synthesize_perspectives(
self,
analyses: List[PerspectiveAnalysis],
transcript: str,
video_id: str,
video_title: str
) -> str:
"""Synthesize insights from multiple perspective analyses.
Args:
analyses: List of perspective analyses to synthesize
transcript: Original transcript for context
video_id: Video ID
video_title: Video title
Returns:
Synthesized summary combining all perspectives
"""
if not analyses:
return "No perspective analyses available for synthesis."
synthesis_config = self.perspective_prompts[AgentPerspective.SYNTHESIS]
# Build synthesis input from perspective analyses
perspectives_summary = []
for analysis in analyses:
perspective_text = f"""
{analysis.agent_type.value.title()} Perspective:
Summary: {analysis.summary}
Key Insights: {', '.join(analysis.key_insights[:5])} # Limit to top 5 insights
Recommendations: {', '.join(analysis.recommendations[:3])} # Limit to top 3 recommendations
"""
perspectives_summary.append(perspective_text)
synthesis_prompt = f"""
Video Title: {video_title}
Video ID: {video_id}
{synthesis_config.system_prompt}
Please synthesize the following perspective analyses into a unified, comprehensive summary:
{''.join(perspectives_summary)}
{synthesis_config.output_format}
"""
try:
response = await self.ai_service.generate_response(
prompt=synthesis_prompt,
temperature=0.4, # Slightly higher temperature for creative synthesis
max_tokens=2000
)
# Parse synthesis response
synthesis_data = self._parse_ai_response(response, AgentPerspective.SYNTHESIS)
return synthesis_data.get("summary", response)
except Exception as e:
logger.error(f"Error in synthesis: {e}")
# Fallback: create basic synthesis
return self._create_fallback_synthesis(analyses)
def _parse_ai_response(self, response: str, perspective: AgentPerspective) -> Dict[str, Any]:
"""Parse AI response, attempting JSON first, then fallback to text parsing.
Args:
response: Raw AI response
perspective: Perspective type for context
Returns:
Parsed data dictionary
"""
try:
import json
# Try to extract JSON from response
if response.strip().startswith('{'):
return json.loads(response)
elif '```json' in response:
# Extract JSON from markdown code block
start = response.find('```json') + 7
end = response.find('```', start)
json_str = response[start:end].strip()
return json.loads(json_str)
except (json.JSONDecodeError, ValueError):
pass
# Fallback: extract key information from text
return self._extract_from_text_response(response, perspective)
def _extract_from_text_response(self, response: str, perspective: AgentPerspective) -> Dict[str, Any]:
"""Extract structured data from text response when JSON parsing fails.
Args:
response: Text response from AI
perspective: Perspective type
Returns:
Extracted data dictionary
"""
lines = response.split('\n')
# Basic text extraction logic
data = {
"summary": "",
"key_insights": [],
"focus_areas": [],
"recommendations": [],
"confidence_score": 0.7
}
current_section = None
for line in lines:
line = line.strip()
if not line:
continue
# Identify sections
if any(keyword in line.lower() for keyword in ['summary', 'overview']):
current_section = 'summary'
continue
elif any(keyword in line.lower() for keyword in ['insights', 'key points']):
current_section = 'key_insights'
continue
elif any(keyword in line.lower() for keyword in ['recommendations', 'actions']):
current_section = 'recommendations'
continue
# Extract content based on current section
if current_section == 'summary' and not data["summary"]:
data["summary"] = line
elif current_section == 'key_insights' and line.startswith(('-', '', '*')):
data["key_insights"].append(line.lstrip('-•* '))
elif current_section == 'recommendations' and line.startswith(('-', '', '*')):
data["recommendations"].append(line.lstrip('-•* '))
# If no structured content found, use first paragraph as summary
if not data["summary"]:
paragraphs = response.split('\n\n')
data["summary"] = paragraphs[0] if paragraphs else response[:300]
return data
def _calculate_quality_score(self, analyses: List[PerspectiveAnalysis]) -> float:
"""Calculate overall quality score from perspective analyses.
Args:
analyses: List of perspective analyses
Returns:
Quality score between 0.0 and 1.0
"""
if not analyses:
return 0.0
# Average confidence scores
avg_confidence = sum(analysis.confidence_score for analysis in analyses) / len(analyses)
# Factor in completeness (number of insights and recommendations)
completeness_scores = []
for analysis in analyses:
insight_score = min(len(analysis.key_insights) / 5.0, 1.0) # Target 5 insights
rec_score = min(len(analysis.recommendations) / 3.0, 1.0) # Target 3 recommendations
completeness_scores.append((insight_score + rec_score) / 2.0)
avg_completeness = sum(completeness_scores) / len(completeness_scores)
# Weighted final score
quality_score = (avg_confidence * 0.7) + (avg_completeness * 0.3)
return round(quality_score, 2)
def _extract_unified_insights(
self,
analyses: List[PerspectiveAnalysis],
synthesis_summary: str
) -> List[str]:
"""Extract unified insights from all analyses.
Args:
analyses: List of perspective analyses
synthesis_summary: Synthesis summary text
Returns:
List of unified insights
"""
unified_insights = []
# Collect top insights from each perspective
for analysis in analyses:
for insight in analysis.key_insights[:3]: # Top 3 from each perspective
if insight and insight not in unified_insights:
unified_insights.append(f"[{analysis.agent_type.value.title()}] {insight}")
# Add synthesis-specific insights if available
try:
import json
if synthesis_summary.strip().startswith('{'):
synthesis_data = json.loads(synthesis_summary)
if "unified_insights" in synthesis_data:
for insight in synthesis_data["unified_insights"][:3]:
if insight and insight not in unified_insights:
unified_insights.append(f"[Synthesis] {insight}")
except:
pass
return unified_insights[:12] # Limit to 12 total insights
def _create_fallback_synthesis(self, analyses: List[PerspectiveAnalysis]) -> str:
"""Create basic synthesis when AI synthesis fails.
Args:
analyses: List of perspective analyses
Returns:
Fallback synthesis text
"""
perspectives = [analysis.agent_type.value for analysis in analyses]
synthesis = f"This video was analyzed from {len(analyses)} different perspectives: {', '.join(perspectives)}.\n\n"
for analysis in analyses:
synthesis += f"From a {analysis.agent_type.value} standpoint: {analysis.summary[:200]}...\n\n"
synthesis += "The combination of these perspectives provides a comprehensive understanding of the video content, "
synthesis += "addressing technical implementation, business value, and user experience considerations."
return synthesis
async def get_analysis_health(self) -> Dict[str, Any]:
"""Get health status of the multi-agent analysis service.
Returns:
Service health information
"""
health_info = {
"service": "multi_agent_summarizer",
"status": "healthy",
"perspectives_available": len(self.perspective_prompts),
"ai_service_available": self.ai_service is not None,
"timestamp": datetime.now().isoformat()
}
# Test AI service connectivity
if self.ai_service:
try:
await self.ai_service.generate_response("test", max_tokens=10)
health_info["ai_service_status"] = "connected"
except Exception:
health_info["ai_service_status"] = "connection_error"
health_info["status"] = "degraded"
else:
health_info["ai_service_status"] = "not_configured"
health_info["status"] = "error"
return health_info