youtube-summarizer/backend/services/openai_summarizer.py

251 lines
10 KiB
Python

"""OpenAI GPT-4o-mini summarization service."""
import asyncio
import json
import time
from typing import Dict, List, Optional
import tiktoken
from openai import AsyncOpenAI
from .ai_service import AIService, SummaryRequest, SummaryResult, SummaryLength
from ..core.exceptions import AIServiceError, ErrorCode
class OpenAISummarizer(AIService):
"""OpenAI-based summarization service using GPT-4o-mini."""
def __init__(self, api_key: str, model: str = "gpt-4o-mini"):
"""Initialize OpenAI summarizer.
Args:
api_key: OpenAI API key
model: Model to use (default: gpt-4o-mini for cost efficiency)
"""
self.client = AsyncOpenAI(api_key=api_key)
self.model = model
# Use cl100k_base encoding for GPT-4 models
self.encoding = tiktoken.get_encoding("cl100k_base")
# Cost per 1K tokens (as of 2025)
self.input_cost_per_1k = 0.00015 # $0.15 per 1M input tokens
self.output_cost_per_1k = 0.0006 # $0.60 per 1M output tokens
async def generate_summary(self, request: SummaryRequest) -> SummaryResult:
"""Generate structured summary using OpenAI GPT-4o-mini."""
# Handle long transcripts with chunking
if self.get_token_count(request.transcript) > 15000: # Leave room for prompt
return await self._generate_chunked_summary(request)
prompt = self._build_summary_prompt(request)
try:
start_time = time.time()
response = await self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "You are an expert content summarizer specializing in YouTube video analysis."},
{"role": "user", "content": prompt}
],
temperature=0.3, # Lower temperature for consistent summaries
max_tokens=self._get_max_tokens(request.length),
response_format={"type": "json_object"} # Ensure structured JSON response
)
processing_time = time.time() - start_time
usage = response.usage
# Parse structured response
result_data = json.loads(response.choices[0].message.content)
# Calculate costs
input_cost = (usage.prompt_tokens / 1000) * self.input_cost_per_1k
output_cost = (usage.completion_tokens / 1000) * self.output_cost_per_1k
total_cost = input_cost + output_cost
return SummaryResult(
summary=result_data.get("summary", ""),
key_points=result_data.get("key_points", []),
main_themes=result_data.get("main_themes", []),
actionable_insights=result_data.get("actionable_insights", []),
confidence_score=result_data.get("confidence_score", 0.85),
processing_metadata={
"model": self.model,
"processing_time_seconds": processing_time,
"prompt_tokens": usage.prompt_tokens,
"completion_tokens": usage.completion_tokens,
"total_tokens": usage.total_tokens,
"chunks_processed": 1
},
cost_data={
"input_cost_usd": input_cost,
"output_cost_usd": output_cost,
"total_cost_usd": total_cost,
"cost_per_summary": total_cost
}
)
except Exception as e:
raise AIServiceError(
message=f"OpenAI summarization failed: {str(e)}",
error_code=ErrorCode.AI_SERVICE_ERROR,
details={
"model": self.model,
"transcript_length": len(request.transcript),
"error_type": type(e).__name__
}
)
def _build_summary_prompt(self, request: SummaryRequest) -> str:
"""Build optimized prompt for summary generation."""
length_instructions = {
SummaryLength.BRIEF: "Generate a concise summary in 100-200 words",
SummaryLength.STANDARD: "Generate a comprehensive summary in 300-500 words",
SummaryLength.DETAILED: "Generate a detailed summary in 500-800 words"
}
focus_instruction = ""
if request.focus_areas:
focus_instruction = f"\nPay special attention to these areas: {', '.join(request.focus_areas)}"
return f"""
Analyze this YouTube video transcript and provide a structured summary in JSON format.
{length_instructions[request.length]}.
Required JSON structure:
{{
"summary": "Main summary text here",
"key_points": ["Point 1", "Point 2", "Point 3", ...],
"main_themes": ["Theme 1", "Theme 2", "Theme 3"],
"actionable_insights": ["Insight 1", "Insight 2", ...],
"confidence_score": 0.95
}}
Guidelines:
- Extract 3-7 key points that capture the most important information
- Identify 2-4 main themes or topics discussed
- Provide 2-5 actionable insights that viewers can apply
- Assign a confidence score (0.0-1.0) based on transcript quality and coherence
- Use clear, engaging language that's accessible to a general audience
- Focus on value and practical takeaways{focus_instruction}
Transcript:
{request.transcript}
"""
async def _generate_chunked_summary(self, request: SummaryRequest) -> SummaryResult:
"""Handle long transcripts using map-reduce approach."""
# Split transcript into manageable chunks
chunks = self._split_transcript_intelligently(request.transcript)
# Generate summary for each chunk
chunk_summaries = []
total_cost = 0.0
total_tokens = 0
for i, chunk in enumerate(chunks):
chunk_request = SummaryRequest(
transcript=chunk,
length=SummaryLength.BRIEF, # Brief summaries for chunks
focus_areas=request.focus_areas,
language=request.language
)
chunk_result = await self.generate_summary(chunk_request)
chunk_summaries.append(chunk_result.summary)
total_cost += chunk_result.cost_data["total_cost_usd"]
total_tokens += chunk_result.processing_metadata["total_tokens"]
# Add delay to respect rate limits
await asyncio.sleep(0.1)
# Combine chunk summaries into final summary
combined_transcript = "\n\n".join([
f"Section {i+1} Summary: {summary}"
for i, summary in enumerate(chunk_summaries)
])
final_request = SummaryRequest(
transcript=combined_transcript,
length=request.length,
focus_areas=request.focus_areas,
language=request.language
)
final_result = await self.generate_summary(final_request)
# Update metadata to reflect chunked processing
final_result.processing_metadata.update({
"chunks_processed": len(chunks),
"total_tokens": total_tokens + final_result.processing_metadata["total_tokens"],
"chunking_strategy": "intelligent_content_boundaries"
})
final_result.cost_data["total_cost_usd"] = total_cost + final_result.cost_data["total_cost_usd"]
return final_result
def _split_transcript_intelligently(self, transcript: str, max_tokens: int = 12000) -> List[str]:
"""Split transcript at natural boundaries while respecting token limits."""
# Split by paragraphs first, then sentences if needed
paragraphs = transcript.split('\n\n')
chunks = []
current_chunk = []
current_tokens = 0
for paragraph in paragraphs:
paragraph_tokens = self.get_token_count(paragraph)
# If single paragraph exceeds limit, split by sentences
if paragraph_tokens > max_tokens:
sentences = paragraph.split('. ')
for sentence in sentences:
sentence_tokens = self.get_token_count(sentence)
if current_tokens + sentence_tokens > max_tokens and current_chunk:
chunks.append(' '.join(current_chunk))
current_chunk = [sentence]
current_tokens = sentence_tokens
else:
current_chunk.append(sentence)
current_tokens += sentence_tokens
else:
if current_tokens + paragraph_tokens > max_tokens and current_chunk:
chunks.append('\n\n'.join(current_chunk))
current_chunk = [paragraph]
current_tokens = paragraph_tokens
else:
current_chunk.append(paragraph)
current_tokens += paragraph_tokens
# Add final chunk
if current_chunk:
chunks.append('\n\n'.join(current_chunk))
return chunks
def _get_max_tokens(self, length: SummaryLength) -> int:
"""Get max output tokens based on summary length."""
return {
SummaryLength.BRIEF: 300,
SummaryLength.STANDARD: 700,
SummaryLength.DETAILED: 1200
}[length]
def estimate_cost(self, transcript: str, length: SummaryLength) -> float:
"""Estimate cost for summarizing transcript."""
input_tokens = self.get_token_count(transcript)
output_tokens = self._get_max_tokens(length)
input_cost = (input_tokens / 1000) * self.input_cost_per_1k
output_cost = (output_tokens / 1000) * self.output_cost_per_1k
return input_cost + output_cost
def get_token_count(self, text: str) -> int:
"""Get accurate token count for OpenAI model."""
return len(self.encoding.encode(text))