youtube-summarizer/backend/services/anthropic_summarizer.py

406 lines
16 KiB
Python

"""Anthropic Claude summarization service."""
import asyncio
import json
import time
import sys
import os
from typing import Dict, List, Optional
import re
from anthropic import AsyncAnthropic
# Add library path to import BaseAIService
lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../lib'))
if lib_path not in sys.path:
sys.path.insert(0, lib_path)
try:
from ai_assistant_lib.services.ai.base_ai_service import BaseAIService, AIModelConfig, AIRequest, AIResponse
except ImportError:
# Fallback to old implementation if library not available
from .ai_service import AIService as BaseAIService
# Create dummy classes for compatibility
class AIModelConfig:
def __init__(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
class AIRequest:
def __init__(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
class AIResponse:
def __init__(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
from .ai_service import SummaryRequest, SummaryResult, SummaryLength
from ..core.exceptions import AIServiceError, ErrorCode
class AnthropicSummarizer(BaseAIService):
"""Anthropic Claude-based summarization service."""
def __init__(self, api_key: str, model: str = "claude-3-5-haiku-20241022"):
"""Initialize Anthropic summarizer.
Args:
api_key: Anthropic API key
model: Model to use (default: claude-3-5-haiku for cost efficiency)
"""
config = AIModelConfig(
model_name=model,
temperature=0.3,
max_tokens=8192,
timeout_seconds=120,
max_retries=3,
backoff_factor=2.0
)
# Initialize BaseAIService
super().__init__(
name="anthropic-summarizer",
api_key=api_key,
default_config=config
)
# Cost per 1K tokens (as of 2025) - Claude 3.5 Haiku
self.input_cost_per_1k = 0.00025 # $0.25 per 1M input tokens
self.output_cost_per_1k = 0.00125 # $1.25 per 1M output tokens
# Token limits for Claude models
self.max_tokens_input = 200000 # 200k context window
self.max_tokens_output = 8192 # Max output tokens
async def _create_client(self):
"""Create the Anthropic client."""
return AsyncAnthropic(api_key=self.api_key)
async def _make_prediction(self, request: AIRequest) -> AIResponse:
"""Make prediction using Anthropic Claude."""
try:
response = await self._client.messages.create(
model=request.model_config.model_name,
max_tokens=request.model_config.max_tokens or self.max_tokens_output,
temperature=request.model_config.temperature,
messages=[{"role": "user", "content": request.prompt}]
)
response_text = response.content[0].text
return AIResponse(
request_id=request.request_id,
content=response_text,
model_name=request.model_config.model_name,
usage={
"input_tokens": response.usage.input_tokens,
"output_tokens": response.usage.output_tokens,
"total_tokens": response.usage.input_tokens + response.usage.output_tokens
}
)
except Exception as e:
from ai_assistant_lib.core.exceptions import AIServiceError as LibAIServiceError
raise LibAIServiceError(
service_name=self.name,
operation="_make_prediction",
details={
"error": str(e),
"model": request.model_config.model_name
}
) from e
async def generate_summary(self, request: SummaryRequest) -> SummaryResult:
"""Generate structured summary using Anthropic Claude."""
# Handle very long transcripts with chunking
estimated_tokens = self.get_token_count(request.transcript)
if estimated_tokens > 150000: # Leave room for prompt and response
return await self._generate_chunked_summary(request)
prompt = self._build_summary_prompt(request)
try:
# Create model config for this request
model_config = AIModelConfig(
model_name=self.default_config.model_name,
temperature=0.3,
max_tokens=self._get_max_tokens(request.length),
timeout_seconds=self.default_config.timeout_seconds
)
# Use BaseAIService predict method with retry, rate limiting, etc.
response = await self.predict(
prompt=prompt,
model_config=model_config
)
# Extract JSON from response
result_data = self._extract_json_from_response(response.content)
# Calculate costs
input_tokens = response.usage.get("input_tokens", 0)
output_tokens = response.usage.get("output_tokens", 0)
input_cost = (input_tokens / 1000) * self.input_cost_per_1k
output_cost = (output_tokens / 1000) * self.output_cost_per_1k
total_cost = input_cost + output_cost
return SummaryResult(
summary=result_data.get("summary", ""),
key_points=result_data.get("key_points", []),
main_themes=result_data.get("main_themes", []),
actionable_insights=result_data.get("actionable_insights", []),
confidence_score=result_data.get("confidence_score", 0.85),
processing_metadata={
"model": response.model_name,
"processing_time_seconds": response.processing_time_ms / 1000 if response.processing_time_ms else 0,
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"total_tokens": input_tokens + output_tokens,
"chunks_processed": 1
},
cost_data={
"input_cost_usd": input_cost,
"output_cost_usd": output_cost,
"total_cost_usd": total_cost,
"cost_per_summary": total_cost
}
)
except Exception as e:
raise AIServiceError(
message=f"Anthropic summarization failed: {str(e)}",
error_code=ErrorCode.AI_SERVICE_ERROR,
details={
"model": self.default_config.model_name,
"transcript_length": len(request.transcript),
"error_type": type(e).__name__
}
)
def _extract_json_from_response(self, response_text: str) -> dict:
"""Extract JSON from Claude's response which may include additional text."""
try:
# First try direct JSON parsing
return json.loads(response_text)
except json.JSONDecodeError:
# Look for JSON block in the response
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
if json_match:
try:
return json.loads(json_match.group())
except json.JSONDecodeError:
pass
# Fallback: create structure from response text
return self._parse_structured_response(response_text)
def _parse_structured_response(self, response_text: str) -> dict:
"""Parse structured response when JSON parsing fails."""
# This is a fallback parser for when Claude doesn't return pure JSON
lines = response_text.split('\n')
summary = ""
key_points = []
main_themes = []
actionable_insights = []
confidence_score = 0.85
current_section = None
for line in lines:
line = line.strip()
if not line:
continue
# Detect sections
if "summary" in line.lower() and ":" in line:
current_section = "summary"
if ":" in line:
summary = line.split(":", 1)[1].strip()
continue
elif "key points" in line.lower() or "key_points" in line.lower():
current_section = "key_points"
continue
elif "main themes" in line.lower() or "main_themes" in line.lower():
current_section = "main_themes"
continue
elif "actionable insights" in line.lower() or "actionable_insights" in line.lower():
current_section = "actionable_insights"
continue
elif "confidence" in line.lower():
# Extract confidence score
numbers = re.findall(r'0?\.\d+|\d+', line)
if numbers:
confidence_score = float(numbers[0])
continue
# Add content to appropriate section
if current_section == "summary" and summary == "":
summary = line
elif current_section == "key_points" and line.startswith(('-', '', '*')):
key_points.append(line[1:].strip())
elif current_section == "main_themes" and line.startswith(('-', '', '*')):
main_themes.append(line[1:].strip())
elif current_section == "actionable_insights" and line.startswith(('-', '', '*')):
actionable_insights.append(line[1:].strip())
return {
"summary": summary,
"key_points": key_points,
"main_themes": main_themes,
"actionable_insights": actionable_insights,
"confidence_score": confidence_score
}
def _build_summary_prompt(self, request: SummaryRequest) -> str:
"""Build optimized prompt for Claude summary generation."""
length_instructions = {
SummaryLength.BRIEF: "Generate a concise summary in 100-200 words",
SummaryLength.STANDARD: "Generate a comprehensive summary in 300-500 words",
SummaryLength.DETAILED: "Generate a detailed summary in 500-800 words"
}
focus_instruction = ""
if request.focus_areas:
focus_instruction = f"\nPay special attention to these areas: {', '.join(request.focus_areas)}"
return f"""
Analyze this YouTube video transcript and provide a structured summary in JSON format.
{length_instructions[request.length]}.
Please respond with a valid JSON object in this exact format:
{{
"summary": "Main summary text here",
"key_points": ["Point 1", "Point 2", "Point 3"],
"main_themes": ["Theme 1", "Theme 2", "Theme 3"],
"actionable_insights": ["Insight 1", "Insight 2"],
"confidence_score": 0.95
}}
Guidelines:
- Extract 3-7 key points that capture the most important information
- Identify 2-4 main themes or topics discussed
- Provide 2-5 actionable insights that viewers can apply
- Assign a confidence score (0.0-1.0) based on transcript quality and coherence
- Use clear, engaging language that's accessible to a general audience
- Focus on value and practical takeaways{focus_instruction}
Transcript:
{request.transcript}
"""
async def _generate_chunked_summary(self, request: SummaryRequest) -> SummaryResult:
"""Handle very long transcripts using map-reduce approach."""
# Split transcript into manageable chunks
chunks = self._split_transcript_intelligently(request.transcript)
# Generate summary for each chunk
chunk_summaries = []
total_cost = 0.0
total_tokens = 0
for i, chunk in enumerate(chunks):
chunk_request = SummaryRequest(
transcript=chunk,
length=SummaryLength.BRIEF, # Brief summaries for chunks
focus_areas=request.focus_areas,
language=request.language
)
chunk_result = await self.generate_summary(chunk_request)
chunk_summaries.append(chunk_result.summary)
total_cost += chunk_result.cost_data["total_cost_usd"]
total_tokens += chunk_result.processing_metadata["total_tokens"]
# Add delay to respect rate limits
await asyncio.sleep(0.1)
# Combine chunk summaries into final summary
combined_transcript = "\n\n".join([
f"Section {i+1} Summary: {summary}"
for i, summary in enumerate(chunk_summaries)
])
final_request = SummaryRequest(
transcript=combined_transcript,
length=request.length,
focus_areas=request.focus_areas,
language=request.language
)
final_result = await self.generate_summary(final_request)
# Update metadata to reflect chunked processing
final_result.processing_metadata.update({
"chunks_processed": len(chunks),
"total_tokens": total_tokens + final_result.processing_metadata["total_tokens"],
"chunking_strategy": "intelligent_content_boundaries"
})
final_result.cost_data["total_cost_usd"] = total_cost + final_result.cost_data["total_cost_usd"]
return final_result
def _split_transcript_intelligently(self, transcript: str, max_tokens: int = 120000) -> List[str]:
"""Split transcript at natural boundaries while respecting token limits."""
# Split by paragraphs first, then sentences if needed
paragraphs = transcript.split('\n\n')
chunks = []
current_chunk = []
current_tokens = 0
for paragraph in paragraphs:
paragraph_tokens = self.get_token_count(paragraph)
# If single paragraph exceeds limit, split by sentences
if paragraph_tokens > max_tokens:
sentences = paragraph.split('. ')
for sentence in sentences:
sentence_tokens = self.get_token_count(sentence)
if current_tokens + sentence_tokens > max_tokens and current_chunk:
chunks.append(' '.join(current_chunk))
current_chunk = [sentence]
current_tokens = sentence_tokens
else:
current_chunk.append(sentence)
current_tokens += sentence_tokens
else:
if current_tokens + paragraph_tokens > max_tokens and current_chunk:
chunks.append('\n\n'.join(current_chunk))
current_chunk = [paragraph]
current_tokens = paragraph_tokens
else:
current_chunk.append(paragraph)
current_tokens += paragraph_tokens
# Add final chunk
if current_chunk:
chunks.append('\n\n'.join(current_chunk))
return chunks
def _get_max_tokens(self, length: SummaryLength) -> int:
"""Get max output tokens based on summary length."""
return {
SummaryLength.BRIEF: 400,
SummaryLength.STANDARD: 800,
SummaryLength.DETAILED: 1500
}[length]
def estimate_cost(self, transcript: str, length: SummaryLength) -> float:
"""Estimate cost for summarizing transcript."""
input_tokens = self.get_token_count(transcript)
output_tokens = self._get_max_tokens(length)
input_cost = (input_tokens / 1000) * self.input_cost_per_1k
output_cost = (output_tokens / 1000) * self.output_cost_per_1k
return input_cost + output_cost
def get_token_count(self, text: str) -> int:
"""Estimate token count for Anthropic model (roughly 4 chars per token)."""
# Anthropic uses a similar tokenization to OpenAI, roughly 4 characters per token
return len(text) // 4