"""Anthropic Claude summarization service.""" import asyncio import json import time import sys import os from typing import Dict, List, Optional import re from anthropic import AsyncAnthropic # Add library path to import BaseAIService lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../lib')) if lib_path not in sys.path: sys.path.insert(0, lib_path) try: from ai_assistant_lib.services.ai.base_ai_service import BaseAIService, AIModelConfig, AIRequest, AIResponse except ImportError: # Fallback to old implementation if library not available from .ai_service import AIService as BaseAIService # Create dummy classes for compatibility class AIModelConfig: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) class AIRequest: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) class AIResponse: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) from .ai_service import SummaryRequest, SummaryResult, SummaryLength from ..core.exceptions import AIServiceError, ErrorCode class AnthropicSummarizer(BaseAIService): """Anthropic Claude-based summarization service.""" def __init__(self, api_key: str, model: str = "claude-3-5-haiku-20241022"): """Initialize Anthropic summarizer. Args: api_key: Anthropic API key model: Model to use (default: claude-3-5-haiku for cost efficiency) """ config = AIModelConfig( model_name=model, temperature=0.3, max_tokens=8192, timeout_seconds=120, max_retries=3, backoff_factor=2.0 ) # Initialize BaseAIService super().__init__( name="anthropic-summarizer", api_key=api_key, default_config=config ) # Cost per 1K tokens (as of 2025) - Claude 3.5 Haiku self.input_cost_per_1k = 0.00025 # $0.25 per 1M input tokens self.output_cost_per_1k = 0.00125 # $1.25 per 1M output tokens # Token limits for Claude models self.max_tokens_input = 200000 # 200k context window self.max_tokens_output = 8192 # Max output tokens async def _create_client(self): """Create the Anthropic client.""" return AsyncAnthropic(api_key=self.api_key) async def _make_prediction(self, request: AIRequest) -> AIResponse: """Make prediction using Anthropic Claude.""" try: response = await self._client.messages.create( model=request.model_config.model_name, max_tokens=request.model_config.max_tokens or self.max_tokens_output, temperature=request.model_config.temperature, messages=[{"role": "user", "content": request.prompt}] ) response_text = response.content[0].text return AIResponse( request_id=request.request_id, content=response_text, model_name=request.model_config.model_name, usage={ "input_tokens": response.usage.input_tokens, "output_tokens": response.usage.output_tokens, "total_tokens": response.usage.input_tokens + response.usage.output_tokens } ) except Exception as e: from ai_assistant_lib.core.exceptions import AIServiceError as LibAIServiceError raise LibAIServiceError( service_name=self.name, operation="_make_prediction", details={ "error": str(e), "model": request.model_config.model_name } ) from e async def generate_summary(self, request: SummaryRequest) -> SummaryResult: """Generate structured summary using Anthropic Claude.""" # Handle very long transcripts with chunking estimated_tokens = self.get_token_count(request.transcript) if estimated_tokens > 150000: # Leave room for prompt and response return await self._generate_chunked_summary(request) prompt = self._build_summary_prompt(request) try: # Create model config for this request model_config = AIModelConfig( model_name=self.default_config.model_name, temperature=0.3, max_tokens=self._get_max_tokens(request.length), timeout_seconds=self.default_config.timeout_seconds ) # Use BaseAIService predict method with retry, rate limiting, etc. response = await self.predict( prompt=prompt, model_config=model_config ) # Extract JSON from response result_data = self._extract_json_from_response(response.content) # Calculate costs input_tokens = response.usage.get("input_tokens", 0) output_tokens = response.usage.get("output_tokens", 0) input_cost = (input_tokens / 1000) * self.input_cost_per_1k output_cost = (output_tokens / 1000) * self.output_cost_per_1k total_cost = input_cost + output_cost return SummaryResult( summary=result_data.get("summary", ""), key_points=result_data.get("key_points", []), main_themes=result_data.get("main_themes", []), actionable_insights=result_data.get("actionable_insights", []), confidence_score=result_data.get("confidence_score", 0.85), processing_metadata={ "model": response.model_name, "processing_time_seconds": response.processing_time_ms / 1000 if response.processing_time_ms else 0, "input_tokens": input_tokens, "output_tokens": output_tokens, "total_tokens": input_tokens + output_tokens, "chunks_processed": 1 }, cost_data={ "input_cost_usd": input_cost, "output_cost_usd": output_cost, "total_cost_usd": total_cost, "cost_per_summary": total_cost } ) except Exception as e: raise AIServiceError( message=f"Anthropic summarization failed: {str(e)}", error_code=ErrorCode.AI_SERVICE_ERROR, details={ "model": self.default_config.model_name, "transcript_length": len(request.transcript), "error_type": type(e).__name__ } ) def _extract_json_from_response(self, response_text: str) -> dict: """Extract JSON from Claude's response which may include additional text.""" try: # First try direct JSON parsing return json.loads(response_text) except json.JSONDecodeError: # Look for JSON block in the response json_match = re.search(r'\{.*\}', response_text, re.DOTALL) if json_match: try: return json.loads(json_match.group()) except json.JSONDecodeError: pass # Fallback: create structure from response text return self._parse_structured_response(response_text) def _parse_structured_response(self, response_text: str) -> dict: """Parse structured response when JSON parsing fails.""" # This is a fallback parser for when Claude doesn't return pure JSON lines = response_text.split('\n') summary = "" key_points = [] main_themes = [] actionable_insights = [] confidence_score = 0.85 current_section = None for line in lines: line = line.strip() if not line: continue # Detect sections if "summary" in line.lower() and ":" in line: current_section = "summary" if ":" in line: summary = line.split(":", 1)[1].strip() continue elif "key points" in line.lower() or "key_points" in line.lower(): current_section = "key_points" continue elif "main themes" in line.lower() or "main_themes" in line.lower(): current_section = "main_themes" continue elif "actionable insights" in line.lower() or "actionable_insights" in line.lower(): current_section = "actionable_insights" continue elif "confidence" in line.lower(): # Extract confidence score numbers = re.findall(r'0?\.\d+|\d+', line) if numbers: confidence_score = float(numbers[0]) continue # Add content to appropriate section if current_section == "summary" and summary == "": summary = line elif current_section == "key_points" and line.startswith(('-', '•', '*')): key_points.append(line[1:].strip()) elif current_section == "main_themes" and line.startswith(('-', '•', '*')): main_themes.append(line[1:].strip()) elif current_section == "actionable_insights" and line.startswith(('-', '•', '*')): actionable_insights.append(line[1:].strip()) return { "summary": summary, "key_points": key_points, "main_themes": main_themes, "actionable_insights": actionable_insights, "confidence_score": confidence_score } def _build_summary_prompt(self, request: SummaryRequest) -> str: """Build optimized prompt for Claude summary generation.""" length_instructions = { SummaryLength.BRIEF: "Generate a concise summary in 100-200 words", SummaryLength.STANDARD: "Generate a comprehensive summary in 300-500 words", SummaryLength.DETAILED: "Generate a detailed summary in 500-800 words" } focus_instruction = "" if request.focus_areas: focus_instruction = f"\nPay special attention to these areas: {', '.join(request.focus_areas)}" return f""" Analyze this YouTube video transcript and provide a structured summary in JSON format. {length_instructions[request.length]}. Please respond with a valid JSON object in this exact format: {{ "summary": "Main summary text here", "key_points": ["Point 1", "Point 2", "Point 3"], "main_themes": ["Theme 1", "Theme 2", "Theme 3"], "actionable_insights": ["Insight 1", "Insight 2"], "confidence_score": 0.95 }} Guidelines: - Extract 3-7 key points that capture the most important information - Identify 2-4 main themes or topics discussed - Provide 2-5 actionable insights that viewers can apply - Assign a confidence score (0.0-1.0) based on transcript quality and coherence - Use clear, engaging language that's accessible to a general audience - Focus on value and practical takeaways{focus_instruction} Transcript: {request.transcript} """ async def _generate_chunked_summary(self, request: SummaryRequest) -> SummaryResult: """Handle very long transcripts using map-reduce approach.""" # Split transcript into manageable chunks chunks = self._split_transcript_intelligently(request.transcript) # Generate summary for each chunk chunk_summaries = [] total_cost = 0.0 total_tokens = 0 for i, chunk in enumerate(chunks): chunk_request = SummaryRequest( transcript=chunk, length=SummaryLength.BRIEF, # Brief summaries for chunks focus_areas=request.focus_areas, language=request.language ) chunk_result = await self.generate_summary(chunk_request) chunk_summaries.append(chunk_result.summary) total_cost += chunk_result.cost_data["total_cost_usd"] total_tokens += chunk_result.processing_metadata["total_tokens"] # Add delay to respect rate limits await asyncio.sleep(0.1) # Combine chunk summaries into final summary combined_transcript = "\n\n".join([ f"Section {i+1} Summary: {summary}" for i, summary in enumerate(chunk_summaries) ]) final_request = SummaryRequest( transcript=combined_transcript, length=request.length, focus_areas=request.focus_areas, language=request.language ) final_result = await self.generate_summary(final_request) # Update metadata to reflect chunked processing final_result.processing_metadata.update({ "chunks_processed": len(chunks), "total_tokens": total_tokens + final_result.processing_metadata["total_tokens"], "chunking_strategy": "intelligent_content_boundaries" }) final_result.cost_data["total_cost_usd"] = total_cost + final_result.cost_data["total_cost_usd"] return final_result def _split_transcript_intelligently(self, transcript: str, max_tokens: int = 120000) -> List[str]: """Split transcript at natural boundaries while respecting token limits.""" # Split by paragraphs first, then sentences if needed paragraphs = transcript.split('\n\n') chunks = [] current_chunk = [] current_tokens = 0 for paragraph in paragraphs: paragraph_tokens = self.get_token_count(paragraph) # If single paragraph exceeds limit, split by sentences if paragraph_tokens > max_tokens: sentences = paragraph.split('. ') for sentence in sentences: sentence_tokens = self.get_token_count(sentence) if current_tokens + sentence_tokens > max_tokens and current_chunk: chunks.append(' '.join(current_chunk)) current_chunk = [sentence] current_tokens = sentence_tokens else: current_chunk.append(sentence) current_tokens += sentence_tokens else: if current_tokens + paragraph_tokens > max_tokens and current_chunk: chunks.append('\n\n'.join(current_chunk)) current_chunk = [paragraph] current_tokens = paragraph_tokens else: current_chunk.append(paragraph) current_tokens += paragraph_tokens # Add final chunk if current_chunk: chunks.append('\n\n'.join(current_chunk)) return chunks def _get_max_tokens(self, length: SummaryLength) -> int: """Get max output tokens based on summary length.""" return { SummaryLength.BRIEF: 400, SummaryLength.STANDARD: 800, SummaryLength.DETAILED: 1500 }[length] def estimate_cost(self, transcript: str, length: SummaryLength) -> float: """Estimate cost for summarizing transcript.""" input_tokens = self.get_token_count(transcript) output_tokens = self._get_max_tokens(length) input_cost = (input_tokens / 1000) * self.input_cost_per_1k output_cost = (output_tokens / 1000) * self.output_cost_per_1k return input_cost + output_cost def get_token_count(self, text: str) -> int: """Estimate token count for Anthropic model (roughly 4 chars per token).""" # Anthropic uses a similar tokenization to OpenAI, roughly 4 characters per token return len(text) // 4