"""OpenAI GPT-4o-mini summarization service.""" import asyncio import json import time from typing import Dict, List, Optional import tiktoken from openai import AsyncOpenAI from .ai_service import AIService, SummaryRequest, SummaryResult, SummaryLength from ..core.exceptions import AIServiceError, ErrorCode class OpenAISummarizer(AIService): """OpenAI-based summarization service using GPT-4o-mini.""" def __init__(self, api_key: str, model: str = "gpt-4o-mini"): """Initialize OpenAI summarizer. Args: api_key: OpenAI API key model: Model to use (default: gpt-4o-mini for cost efficiency) """ self.client = AsyncOpenAI(api_key=api_key) self.model = model # Use cl100k_base encoding for GPT-4 models self.encoding = tiktoken.get_encoding("cl100k_base") # Cost per 1K tokens (as of 2025) self.input_cost_per_1k = 0.00015 # $0.15 per 1M input tokens self.output_cost_per_1k = 0.0006 # $0.60 per 1M output tokens async def generate_summary(self, request: SummaryRequest) -> SummaryResult: """Generate structured summary using OpenAI GPT-4o-mini.""" # Handle long transcripts with chunking if self.get_token_count(request.transcript) > 15000: # Leave room for prompt return await self._generate_chunked_summary(request) prompt = self._build_summary_prompt(request) try: start_time = time.time() response = await self.client.chat.completions.create( model=self.model, messages=[ {"role": "system", "content": "You are an expert content summarizer specializing in YouTube video analysis."}, {"role": "user", "content": prompt} ], temperature=0.3, # Lower temperature for consistent summaries max_tokens=self._get_max_tokens(request.length), response_format={"type": "json_object"} # Ensure structured JSON response ) processing_time = time.time() - start_time usage = response.usage # Parse structured response result_data = json.loads(response.choices[0].message.content) # Calculate costs input_cost = (usage.prompt_tokens / 1000) * self.input_cost_per_1k output_cost = (usage.completion_tokens / 1000) * self.output_cost_per_1k total_cost = input_cost + output_cost return SummaryResult( summary=result_data.get("summary", ""), key_points=result_data.get("key_points", []), main_themes=result_data.get("main_themes", []), actionable_insights=result_data.get("actionable_insights", []), confidence_score=result_data.get("confidence_score", 0.85), processing_metadata={ "model": self.model, "processing_time_seconds": processing_time, "prompt_tokens": usage.prompt_tokens, "completion_tokens": usage.completion_tokens, "total_tokens": usage.total_tokens, "chunks_processed": 1 }, cost_data={ "input_cost_usd": input_cost, "output_cost_usd": output_cost, "total_cost_usd": total_cost, "cost_per_summary": total_cost } ) except Exception as e: raise AIServiceError( message=f"OpenAI summarization failed: {str(e)}", error_code=ErrorCode.AI_SERVICE_ERROR, details={ "model": self.model, "transcript_length": len(request.transcript), "error_type": type(e).__name__ } ) def _build_summary_prompt(self, request: SummaryRequest) -> str: """Build optimized prompt for summary generation.""" length_instructions = { SummaryLength.BRIEF: "Generate a concise summary in 100-200 words", SummaryLength.STANDARD: "Generate a comprehensive summary in 300-500 words", SummaryLength.DETAILED: "Generate a detailed summary in 500-800 words" } focus_instruction = "" if request.focus_areas: focus_instruction = f"\nPay special attention to these areas: {', '.join(request.focus_areas)}" return f""" Analyze this YouTube video transcript and provide a structured summary in JSON format. {length_instructions[request.length]}. Required JSON structure: {{ "summary": "Main summary text here", "key_points": ["Point 1", "Point 2", "Point 3", ...], "main_themes": ["Theme 1", "Theme 2", "Theme 3"], "actionable_insights": ["Insight 1", "Insight 2", ...], "confidence_score": 0.95 }} Guidelines: - Extract 3-7 key points that capture the most important information - Identify 2-4 main themes or topics discussed - Provide 2-5 actionable insights that viewers can apply - Assign a confidence score (0.0-1.0) based on transcript quality and coherence - Use clear, engaging language that's accessible to a general audience - Focus on value and practical takeaways{focus_instruction} Transcript: {request.transcript} """ async def _generate_chunked_summary(self, request: SummaryRequest) -> SummaryResult: """Handle long transcripts using map-reduce approach.""" # Split transcript into manageable chunks chunks = self._split_transcript_intelligently(request.transcript) # Generate summary for each chunk chunk_summaries = [] total_cost = 0.0 total_tokens = 0 for i, chunk in enumerate(chunks): chunk_request = SummaryRequest( transcript=chunk, length=SummaryLength.BRIEF, # Brief summaries for chunks focus_areas=request.focus_areas, language=request.language ) chunk_result = await self.generate_summary(chunk_request) chunk_summaries.append(chunk_result.summary) total_cost += chunk_result.cost_data["total_cost_usd"] total_tokens += chunk_result.processing_metadata["total_tokens"] # Add delay to respect rate limits await asyncio.sleep(0.1) # Combine chunk summaries into final summary combined_transcript = "\n\n".join([ f"Section {i+1} Summary: {summary}" for i, summary in enumerate(chunk_summaries) ]) final_request = SummaryRequest( transcript=combined_transcript, length=request.length, focus_areas=request.focus_areas, language=request.language ) final_result = await self.generate_summary(final_request) # Update metadata to reflect chunked processing final_result.processing_metadata.update({ "chunks_processed": len(chunks), "total_tokens": total_tokens + final_result.processing_metadata["total_tokens"], "chunking_strategy": "intelligent_content_boundaries" }) final_result.cost_data["total_cost_usd"] = total_cost + final_result.cost_data["total_cost_usd"] return final_result def _split_transcript_intelligently(self, transcript: str, max_tokens: int = 12000) -> List[str]: """Split transcript at natural boundaries while respecting token limits.""" # Split by paragraphs first, then sentences if needed paragraphs = transcript.split('\n\n') chunks = [] current_chunk = [] current_tokens = 0 for paragraph in paragraphs: paragraph_tokens = self.get_token_count(paragraph) # If single paragraph exceeds limit, split by sentences if paragraph_tokens > max_tokens: sentences = paragraph.split('. ') for sentence in sentences: sentence_tokens = self.get_token_count(sentence) if current_tokens + sentence_tokens > max_tokens and current_chunk: chunks.append(' '.join(current_chunk)) current_chunk = [sentence] current_tokens = sentence_tokens else: current_chunk.append(sentence) current_tokens += sentence_tokens else: if current_tokens + paragraph_tokens > max_tokens and current_chunk: chunks.append('\n\n'.join(current_chunk)) current_chunk = [paragraph] current_tokens = paragraph_tokens else: current_chunk.append(paragraph) current_tokens += paragraph_tokens # Add final chunk if current_chunk: chunks.append('\n\n'.join(current_chunk)) return chunks def _get_max_tokens(self, length: SummaryLength) -> int: """Get max output tokens based on summary length.""" return { SummaryLength.BRIEF: 300, SummaryLength.STANDARD: 700, SummaryLength.DETAILED: 1200 }[length] def estimate_cost(self, transcript: str, length: SummaryLength) -> float: """Estimate cost for summarizing transcript.""" input_tokens = self.get_token_count(transcript) output_tokens = self._get_max_tokens(length) input_cost = (input_tokens / 1000) * self.input_cost_per_1k output_cost = (output_tokens / 1000) * self.output_cost_per_1k return input_cost + output_cost def get_token_count(self, text: str) -> int: """Get accurate token count for OpenAI model.""" return len(self.encoding.encode(text))