609 lines
25 KiB
Python
609 lines
25 KiB
Python
"""Multi-agent summarization service for YouTube videos."""
|
|
|
|
import asyncio
|
|
import logging
|
|
from typing import Dict, List, Optional, Any
|
|
from datetime import datetime
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
|
|
from pydantic import BaseModel
|
|
|
|
from ..core.exceptions import ServiceError
|
|
from .deepseek_service import DeepSeekService
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class AgentPerspective(str, Enum):
|
|
"""Different agent perspectives for analysis."""
|
|
TECHNICAL = "technical"
|
|
BUSINESS = "business"
|
|
USER_EXPERIENCE = "user"
|
|
SYNTHESIS = "synthesis"
|
|
|
|
|
|
@dataclass
|
|
class PerspectivePrompt:
|
|
"""Prompt configuration for each perspective."""
|
|
system_prompt: str
|
|
analysis_focus: List[str]
|
|
output_format: str
|
|
|
|
|
|
class PerspectiveAnalysis(BaseModel):
|
|
"""Result from a single perspective agent."""
|
|
agent_type: AgentPerspective
|
|
summary: str
|
|
key_insights: List[str]
|
|
confidence_score: float
|
|
focus_areas: List[str]
|
|
recommendations: List[str]
|
|
processing_time_seconds: float
|
|
|
|
|
|
class MultiAgentAnalysisResult(BaseModel):
|
|
"""Result from complete multi-agent analysis."""
|
|
video_id: str
|
|
perspectives: List[PerspectiveAnalysis]
|
|
synthesis_summary: str
|
|
unified_insights: List[str]
|
|
processing_time_seconds: float
|
|
quality_score: float
|
|
created_at: datetime
|
|
|
|
|
|
class MultiAgentSummarizerService:
|
|
"""Service for multi-agent video summarization using different perspectives."""
|
|
|
|
def __init__(self, ai_service: Optional[DeepSeekService] = None):
|
|
"""Initialize the multi-agent service.
|
|
|
|
Args:
|
|
ai_service: DeepSeek AI service instance
|
|
"""
|
|
self.ai_service = ai_service or DeepSeekService()
|
|
self.perspective_prompts = self._initialize_perspective_prompts()
|
|
|
|
def _initialize_perspective_prompts(self) -> Dict[AgentPerspective, PerspectivePrompt]:
|
|
"""Initialize prompt templates for each agent perspective."""
|
|
|
|
prompts = {
|
|
AgentPerspective.TECHNICAL: PerspectivePrompt(
|
|
system_prompt="""You are a Technical Analysis Agent specializing in analyzing technical concepts,
|
|
implementation details, tools, technologies, and architectural patterns mentioned in video content.
|
|
|
|
Focus on:
|
|
- Technical concepts and methodologies explained
|
|
- Tools, frameworks, and technologies mentioned
|
|
- Implementation approaches and best practices
|
|
- Code examples and technical demonstrations
|
|
- System architecture and design patterns
|
|
- Performance considerations and optimizations
|
|
- Technical challenges and solutions presented
|
|
|
|
Provide specific, actionable technical insights that would be valuable for developers and engineers.""",
|
|
|
|
analysis_focus=[
|
|
"technical_concepts", "tools_and_technologies", "implementation_details",
|
|
"architecture_patterns", "best_practices", "performance_optimization",
|
|
"code_examples", "technical_challenges"
|
|
],
|
|
|
|
output_format="""Provide your analysis in this JSON structure:
|
|
{
|
|
"summary": "Technical overview in 2-3 paragraphs focusing on implementation and architecture",
|
|
"key_insights": ["List of 5-8 specific technical insights and takeaways"],
|
|
"focus_areas": ["Primary technical topics covered"],
|
|
"recommendations": ["3-5 actionable technical recommendations"],
|
|
"confidence_score": 0.85
|
|
}"""
|
|
),
|
|
|
|
AgentPerspective.BUSINESS: PerspectivePrompt(
|
|
system_prompt="""You are a Business Analysis Agent specializing in analyzing business value,
|
|
market implications, ROI considerations, and strategic insights from video content.
|
|
|
|
Focus on:
|
|
- Business value propositions and ROI implications
|
|
- Market opportunities and competitive advantages
|
|
- Strategic decision-making insights
|
|
- Cost-benefit analysis and resource allocation
|
|
- Revenue generation potential and business models
|
|
- Risk assessment and mitigation strategies
|
|
- Stakeholder impact and organizational benefits
|
|
|
|
Provide actionable business insights suitable for executives and decision-makers.""",
|
|
|
|
analysis_focus=[
|
|
"business_value", "market_implications", "roi_analysis",
|
|
"strategic_insights", "competitive_advantages", "risk_assessment",
|
|
"revenue_potential", "stakeholder_impact"
|
|
],
|
|
|
|
output_format="""Provide your analysis in this JSON structure:
|
|
{
|
|
"summary": "Business-focused overview in 2-3 paragraphs emphasizing value and strategy",
|
|
"key_insights": ["List of 5-8 specific business insights and opportunities"],
|
|
"focus_areas": ["Primary business topics and value propositions"],
|
|
"recommendations": ["3-5 actionable business recommendations"],
|
|
"confidence_score": 0.85
|
|
}"""
|
|
),
|
|
|
|
AgentPerspective.USER_EXPERIENCE: PerspectivePrompt(
|
|
system_prompt="""You are a User Experience Analysis Agent specializing in analyzing user journey,
|
|
usability, accessibility, and overall user experience aspects from video content.
|
|
|
|
Focus on:
|
|
- User journey and experience flow
|
|
- Usability principles and interface design
|
|
- Accessibility considerations and inclusive design
|
|
- User engagement patterns and behavior
|
|
- Pain points and friction areas identified
|
|
- User satisfaction and experience optimization
|
|
- Design principles and user-centered approaches
|
|
|
|
Provide insights valuable for UX designers, product managers, and user advocates.""",
|
|
|
|
analysis_focus=[
|
|
"user_journey", "usability_principles", "accessibility_features",
|
|
"user_engagement", "pain_point_analysis", "experience_optimization",
|
|
"design_patterns", "user_satisfaction"
|
|
],
|
|
|
|
output_format="""Provide your analysis in this JSON structure:
|
|
{
|
|
"summary": "UX-focused overview in 2-3 paragraphs emphasizing user experience and design",
|
|
"key_insights": ["List of 5-8 specific UX insights and user experience findings"],
|
|
"focus_areas": ["Primary UX topics and user experience areas"],
|
|
"recommendations": ["3-5 actionable UX improvements and recommendations"],
|
|
"confidence_score": 0.85
|
|
}"""
|
|
),
|
|
|
|
AgentPerspective.SYNTHESIS: PerspectivePrompt(
|
|
system_prompt="""You are a Synthesis Agent responsible for combining insights from Technical,
|
|
Business, and User Experience analysis agents into a unified, comprehensive summary.
|
|
|
|
Your role:
|
|
- Synthesize insights from all three perspective analyses
|
|
- Identify connections and relationships between different viewpoints
|
|
- Resolve any conflicts or contradictions between perspectives
|
|
- Create a holistic understanding that incorporates all viewpoints
|
|
- Highlight the most significant insights across all perspectives
|
|
- Provide unified recommendations that consider technical, business, and UX factors
|
|
|
|
Create a comprehensive synthesis that would be valuable for cross-functional teams.""",
|
|
|
|
analysis_focus=[
|
|
"cross_perspective_synthesis", "insight_integration", "conflict_resolution",
|
|
"holistic_understanding", "unified_recommendations", "comprehensive_overview"
|
|
],
|
|
|
|
output_format="""Provide your synthesis in this JSON structure:
|
|
{
|
|
"summary": "Comprehensive synthesis in 3-4 paragraphs integrating all perspectives",
|
|
"unified_insights": ["List of 8-12 most significant insights across all perspectives"],
|
|
"cross_perspective_connections": ["Key relationships between technical, business, and UX aspects"],
|
|
"recommendations": ["5-7 unified recommendations considering all perspectives"],
|
|
"confidence_score": 0.90
|
|
}"""
|
|
)
|
|
}
|
|
|
|
return prompts
|
|
|
|
async def analyze_with_multiple_perspectives(
|
|
self,
|
|
transcript: str,
|
|
video_id: str,
|
|
video_title: str = "",
|
|
perspectives: Optional[List[AgentPerspective]] = None
|
|
) -> MultiAgentAnalysisResult:
|
|
"""Analyze video content using multiple agent perspectives.
|
|
|
|
Args:
|
|
transcript: Video transcript text
|
|
video_id: YouTube video ID
|
|
video_title: Video title for context
|
|
perspectives: List of perspectives to analyze (defaults to all except synthesis)
|
|
|
|
Returns:
|
|
Complete multi-agent analysis result
|
|
"""
|
|
if not transcript or len(transcript.strip()) < 50:
|
|
raise ServiceError("Transcript too short for multi-agent analysis")
|
|
|
|
# Default to all perspectives except synthesis (synthesis runs after others)
|
|
if perspectives is None:
|
|
perspectives = [
|
|
AgentPerspective.TECHNICAL,
|
|
AgentPerspective.BUSINESS,
|
|
AgentPerspective.USER_EXPERIENCE
|
|
]
|
|
|
|
start_time = datetime.now()
|
|
logger.info(f"Starting multi-agent analysis for video {video_id} with perspectives: {perspectives}")
|
|
|
|
try:
|
|
# Run perspective analyses in parallel
|
|
perspective_tasks = []
|
|
for perspective in perspectives:
|
|
task = self._analyze_perspective(transcript, video_id, video_title, perspective)
|
|
perspective_tasks.append(task)
|
|
|
|
# Wait for all perspective analyses to complete
|
|
perspective_results = await asyncio.gather(*perspective_tasks, return_exceptions=True)
|
|
|
|
# Process results and handle any exceptions
|
|
successful_analyses = []
|
|
for i, result in enumerate(perspective_results):
|
|
if isinstance(result, Exception):
|
|
logger.error(f"Error in {perspectives[i]} analysis: {result}")
|
|
continue
|
|
successful_analyses.append(result)
|
|
|
|
if not successful_analyses:
|
|
raise ServiceError("All perspective analyses failed")
|
|
|
|
# Run synthesis agent to combine all perspectives
|
|
synthesis_summary = await self._synthesize_perspectives(
|
|
successful_analyses, transcript, video_id, video_title
|
|
)
|
|
|
|
# Calculate total processing time
|
|
processing_time = (datetime.now() - start_time).total_seconds()
|
|
|
|
# Calculate overall quality score
|
|
quality_score = self._calculate_quality_score(successful_analyses)
|
|
|
|
# Extract unified insights from synthesis
|
|
unified_insights = self._extract_unified_insights(successful_analyses, synthesis_summary)
|
|
|
|
result = MultiAgentAnalysisResult(
|
|
video_id=video_id,
|
|
perspectives=successful_analyses,
|
|
synthesis_summary=synthesis_summary,
|
|
unified_insights=unified_insights,
|
|
processing_time_seconds=processing_time,
|
|
quality_score=quality_score,
|
|
created_at=start_time
|
|
)
|
|
|
|
logger.info(f"Multi-agent analysis completed for video {video_id} in {processing_time:.2f}s")
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in multi-agent analysis for video {video_id}: {e}")
|
|
raise ServiceError(f"Multi-agent analysis failed: {str(e)}")
|
|
|
|
async def _analyze_perspective(
|
|
self,
|
|
transcript: str,
|
|
video_id: str,
|
|
video_title: str,
|
|
perspective: AgentPerspective
|
|
) -> PerspectiveAnalysis:
|
|
"""Analyze transcript from a specific perspective.
|
|
|
|
Args:
|
|
transcript: Video transcript
|
|
video_id: Video ID for context
|
|
video_title: Video title for context
|
|
perspective: Analysis perspective to use
|
|
|
|
Returns:
|
|
Analysis result from the specified perspective
|
|
"""
|
|
perspective_config = self.perspective_prompts[perspective]
|
|
start_time = datetime.now()
|
|
|
|
# Build context-aware prompt
|
|
context_prompt = f"""
|
|
Video Title: {video_title}
|
|
Video ID: {video_id}
|
|
|
|
Please analyze the following video transcript from a {perspective.value} perspective.
|
|
|
|
{perspective_config.system_prompt}
|
|
|
|
Transcript:
|
|
{transcript[:8000]} # Limit transcript length to avoid token limits
|
|
|
|
{perspective_config.output_format}
|
|
"""
|
|
|
|
try:
|
|
# Get AI analysis
|
|
response = await self.ai_service.generate_response(
|
|
prompt=context_prompt,
|
|
temperature=0.3, # Lower temperature for more consistent analysis
|
|
max_tokens=1500
|
|
)
|
|
|
|
processing_time = (datetime.now() - start_time).total_seconds()
|
|
|
|
# Parse AI response (attempt JSON parsing, fallback to text)
|
|
analysis_data = self._parse_ai_response(response, perspective)
|
|
|
|
# Create PerspectiveAnalysis object
|
|
return PerspectiveAnalysis(
|
|
agent_type=perspective,
|
|
summary=analysis_data.get("summary", ""),
|
|
key_insights=analysis_data.get("key_insights", []),
|
|
confidence_score=analysis_data.get("confidence_score", 0.7),
|
|
focus_areas=analysis_data.get("focus_areas", perspective_config.analysis_focus),
|
|
recommendations=analysis_data.get("recommendations", []),
|
|
processing_time_seconds=processing_time
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in {perspective.value} analysis: {e}")
|
|
# Return minimal analysis if AI call fails
|
|
processing_time = (datetime.now() - start_time).total_seconds()
|
|
return PerspectiveAnalysis(
|
|
agent_type=perspective,
|
|
summary=f"Analysis from {perspective.value} perspective failed due to technical error.",
|
|
key_insights=[f"Unable to complete {perspective.value} analysis"],
|
|
confidence_score=0.1,
|
|
focus_areas=perspective_config.analysis_focus,
|
|
recommendations=["Retry analysis with improved transcript quality"],
|
|
processing_time_seconds=processing_time
|
|
)
|
|
|
|
async def _synthesize_perspectives(
|
|
self,
|
|
analyses: List[PerspectiveAnalysis],
|
|
transcript: str,
|
|
video_id: str,
|
|
video_title: str
|
|
) -> str:
|
|
"""Synthesize insights from multiple perspective analyses.
|
|
|
|
Args:
|
|
analyses: List of perspective analyses to synthesize
|
|
transcript: Original transcript for context
|
|
video_id: Video ID
|
|
video_title: Video title
|
|
|
|
Returns:
|
|
Synthesized summary combining all perspectives
|
|
"""
|
|
if not analyses:
|
|
return "No perspective analyses available for synthesis."
|
|
|
|
synthesis_config = self.perspective_prompts[AgentPerspective.SYNTHESIS]
|
|
|
|
# Build synthesis input from perspective analyses
|
|
perspectives_summary = []
|
|
for analysis in analyses:
|
|
perspective_text = f"""
|
|
{analysis.agent_type.value.title()} Perspective:
|
|
Summary: {analysis.summary}
|
|
Key Insights: {', '.join(analysis.key_insights[:5])} # Limit to top 5 insights
|
|
Recommendations: {', '.join(analysis.recommendations[:3])} # Limit to top 3 recommendations
|
|
"""
|
|
perspectives_summary.append(perspective_text)
|
|
|
|
synthesis_prompt = f"""
|
|
Video Title: {video_title}
|
|
Video ID: {video_id}
|
|
|
|
{synthesis_config.system_prompt}
|
|
|
|
Please synthesize the following perspective analyses into a unified, comprehensive summary:
|
|
|
|
{''.join(perspectives_summary)}
|
|
|
|
{synthesis_config.output_format}
|
|
"""
|
|
|
|
try:
|
|
response = await self.ai_service.generate_response(
|
|
prompt=synthesis_prompt,
|
|
temperature=0.4, # Slightly higher temperature for creative synthesis
|
|
max_tokens=2000
|
|
)
|
|
|
|
# Parse synthesis response
|
|
synthesis_data = self._parse_ai_response(response, AgentPerspective.SYNTHESIS)
|
|
return synthesis_data.get("summary", response)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in synthesis: {e}")
|
|
# Fallback: create basic synthesis
|
|
return self._create_fallback_synthesis(analyses)
|
|
|
|
def _parse_ai_response(self, response: str, perspective: AgentPerspective) -> Dict[str, Any]:
|
|
"""Parse AI response, attempting JSON first, then fallback to text parsing.
|
|
|
|
Args:
|
|
response: Raw AI response
|
|
perspective: Perspective type for context
|
|
|
|
Returns:
|
|
Parsed data dictionary
|
|
"""
|
|
try:
|
|
import json
|
|
# Try to extract JSON from response
|
|
if response.strip().startswith('{'):
|
|
return json.loads(response)
|
|
elif '```json' in response:
|
|
# Extract JSON from markdown code block
|
|
start = response.find('```json') + 7
|
|
end = response.find('```', start)
|
|
json_str = response[start:end].strip()
|
|
return json.loads(json_str)
|
|
except (json.JSONDecodeError, ValueError):
|
|
pass
|
|
|
|
# Fallback: extract key information from text
|
|
return self._extract_from_text_response(response, perspective)
|
|
|
|
def _extract_from_text_response(self, response: str, perspective: AgentPerspective) -> Dict[str, Any]:
|
|
"""Extract structured data from text response when JSON parsing fails.
|
|
|
|
Args:
|
|
response: Text response from AI
|
|
perspective: Perspective type
|
|
|
|
Returns:
|
|
Extracted data dictionary
|
|
"""
|
|
lines = response.split('\n')
|
|
|
|
# Basic text extraction logic
|
|
data = {
|
|
"summary": "",
|
|
"key_insights": [],
|
|
"focus_areas": [],
|
|
"recommendations": [],
|
|
"confidence_score": 0.7
|
|
}
|
|
|
|
current_section = None
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
# Identify sections
|
|
if any(keyword in line.lower() for keyword in ['summary', 'overview']):
|
|
current_section = 'summary'
|
|
continue
|
|
elif any(keyword in line.lower() for keyword in ['insights', 'key points']):
|
|
current_section = 'key_insights'
|
|
continue
|
|
elif any(keyword in line.lower() for keyword in ['recommendations', 'actions']):
|
|
current_section = 'recommendations'
|
|
continue
|
|
|
|
# Extract content based on current section
|
|
if current_section == 'summary' and not data["summary"]:
|
|
data["summary"] = line
|
|
elif current_section == 'key_insights' and line.startswith(('-', '•', '*')):
|
|
data["key_insights"].append(line.lstrip('-•* '))
|
|
elif current_section == 'recommendations' and line.startswith(('-', '•', '*')):
|
|
data["recommendations"].append(line.lstrip('-•* '))
|
|
|
|
# If no structured content found, use first paragraph as summary
|
|
if not data["summary"]:
|
|
paragraphs = response.split('\n\n')
|
|
data["summary"] = paragraphs[0] if paragraphs else response[:300]
|
|
|
|
return data
|
|
|
|
def _calculate_quality_score(self, analyses: List[PerspectiveAnalysis]) -> float:
|
|
"""Calculate overall quality score from perspective analyses.
|
|
|
|
Args:
|
|
analyses: List of perspective analyses
|
|
|
|
Returns:
|
|
Quality score between 0.0 and 1.0
|
|
"""
|
|
if not analyses:
|
|
return 0.0
|
|
|
|
# Average confidence scores
|
|
avg_confidence = sum(analysis.confidence_score for analysis in analyses) / len(analyses)
|
|
|
|
# Factor in completeness (number of insights and recommendations)
|
|
completeness_scores = []
|
|
for analysis in analyses:
|
|
insight_score = min(len(analysis.key_insights) / 5.0, 1.0) # Target 5 insights
|
|
rec_score = min(len(analysis.recommendations) / 3.0, 1.0) # Target 3 recommendations
|
|
completeness_scores.append((insight_score + rec_score) / 2.0)
|
|
|
|
avg_completeness = sum(completeness_scores) / len(completeness_scores)
|
|
|
|
# Weighted final score
|
|
quality_score = (avg_confidence * 0.7) + (avg_completeness * 0.3)
|
|
return round(quality_score, 2)
|
|
|
|
def _extract_unified_insights(
|
|
self,
|
|
analyses: List[PerspectiveAnalysis],
|
|
synthesis_summary: str
|
|
) -> List[str]:
|
|
"""Extract unified insights from all analyses.
|
|
|
|
Args:
|
|
analyses: List of perspective analyses
|
|
synthesis_summary: Synthesis summary text
|
|
|
|
Returns:
|
|
List of unified insights
|
|
"""
|
|
unified_insights = []
|
|
|
|
# Collect top insights from each perspective
|
|
for analysis in analyses:
|
|
for insight in analysis.key_insights[:3]: # Top 3 from each perspective
|
|
if insight and insight not in unified_insights:
|
|
unified_insights.append(f"[{analysis.agent_type.value.title()}] {insight}")
|
|
|
|
# Add synthesis-specific insights if available
|
|
try:
|
|
import json
|
|
if synthesis_summary.strip().startswith('{'):
|
|
synthesis_data = json.loads(synthesis_summary)
|
|
if "unified_insights" in synthesis_data:
|
|
for insight in synthesis_data["unified_insights"][:3]:
|
|
if insight and insight not in unified_insights:
|
|
unified_insights.append(f"[Synthesis] {insight}")
|
|
except:
|
|
pass
|
|
|
|
return unified_insights[:12] # Limit to 12 total insights
|
|
|
|
def _create_fallback_synthesis(self, analyses: List[PerspectiveAnalysis]) -> str:
|
|
"""Create basic synthesis when AI synthesis fails.
|
|
|
|
Args:
|
|
analyses: List of perspective analyses
|
|
|
|
Returns:
|
|
Fallback synthesis text
|
|
"""
|
|
perspectives = [analysis.agent_type.value for analysis in analyses]
|
|
|
|
synthesis = f"This video was analyzed from {len(analyses)} different perspectives: {', '.join(perspectives)}.\n\n"
|
|
|
|
for analysis in analyses:
|
|
synthesis += f"From a {analysis.agent_type.value} standpoint: {analysis.summary[:200]}...\n\n"
|
|
|
|
synthesis += "The combination of these perspectives provides a comprehensive understanding of the video content, "
|
|
synthesis += "addressing technical implementation, business value, and user experience considerations."
|
|
|
|
return synthesis
|
|
|
|
async def get_analysis_health(self) -> Dict[str, Any]:
|
|
"""Get health status of the multi-agent analysis service.
|
|
|
|
Returns:
|
|
Service health information
|
|
"""
|
|
health_info = {
|
|
"service": "multi_agent_summarizer",
|
|
"status": "healthy",
|
|
"perspectives_available": len(self.perspective_prompts),
|
|
"ai_service_available": self.ai_service is not None,
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
|
|
# Test AI service connectivity
|
|
if self.ai_service:
|
|
try:
|
|
await self.ai_service.generate_response("test", max_tokens=10)
|
|
health_info["ai_service_status"] = "connected"
|
|
except Exception:
|
|
health_info["ai_service_status"] = "connection_error"
|
|
health_info["status"] = "degraded"
|
|
else:
|
|
health_info["ai_service_status"] = "not_configured"
|
|
health_info["status"] = "error"
|
|
|
|
return health_info |