youtube-summarizer/backend/services/deepseek_summarizer.py

337 lines
12 KiB
Python

"""DeepSeek V2 summarization service."""
import asyncio
import json
import time
from typing import Dict, List, Optional
import httpx
from .ai_service import AIService, SummaryRequest, SummaryResult, SummaryLength, ModelUsage
from ..core.exceptions import AIServiceError, ErrorCode
class DeepSeekSummarizer(AIService):
"""DeepSeek-based summarization service."""
def __init__(self, api_key: str, model: str = "deepseek-chat"):
"""Initialize DeepSeek summarizer.
Args:
api_key: DeepSeek API key
model: Model to use (default: deepseek-chat)
"""
self.api_key = api_key
self.model = model
self.base_url = "https://api.deepseek.com/v1"
# Cost per 1K tokens (DeepSeek pricing)
self.input_cost_per_1k = 0.00014 # $0.14 per 1M input tokens
self.output_cost_per_1k = 0.00028 # $0.28 per 1M output tokens
# HTTP client for API calls
self.client = httpx.AsyncClient(
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
timeout=60.0
)
async def generate_summary(self, request: SummaryRequest) -> SummaryResult:
"""Generate structured summary using DeepSeek."""
# Handle long transcripts with chunking
if self.get_token_count(request.transcript) > 30000: # DeepSeek context limit
return await self._generate_chunked_summary(request)
prompt = self._build_summary_prompt(request)
try:
start_time = time.time()
# Make API request
response = await self.client.post(
f"{self.base_url}/chat/completions",
json={
"model": self.model,
"messages": [
{
"role": "system",
"content": "You are an expert content summarizer specializing in video analysis. Provide clear, structured summaries."
},
{
"role": "user",
"content": prompt
}
],
"max_tokens": self._get_max_tokens(request.length),
"temperature": 0.3, # Lower temperature for consistency
"response_format": {"type": "json_object"}
}
)
response.raise_for_status()
result = response.json()
# Extract response
content = result["choices"][0]["message"]["content"]
usage = result.get("usage", {})
# Parse JSON response
try:
summary_data = json.loads(content)
except json.JSONDecodeError:
# Fallback to text parsing
summary_data = self._parse_text_response(content)
# Calculate processing time and cost
processing_time = time.time() - start_time
input_tokens = usage.get("prompt_tokens", 0)
output_tokens = usage.get("completion_tokens", 0)
cost_estimate = self._calculate_cost(input_tokens, output_tokens)
return SummaryResult(
summary=summary_data.get("summary", content),
key_points=summary_data.get("key_points", []),
main_themes=summary_data.get("main_themes", []),
actionable_insights=summary_data.get("actionable_insights", []),
confidence_score=summary_data.get("confidence_score", 0.85),
processing_metadata={
"model": self.model,
"processing_time": processing_time,
"chunk_count": 1,
"fallback_used": False
},
usage=ModelUsage(
input_tokens=input_tokens,
output_tokens=output_tokens,
total_tokens=input_tokens + output_tokens,
model=self.model
),
cost_data={
"input_cost": cost_estimate["input_cost"],
"output_cost": cost_estimate["output_cost"],
"total_cost": cost_estimate["total_cost"],
"cost_savings": 0.0
}
)
except httpx.HTTPStatusError as e:
if e.response.status_code == 429:
raise AIServiceError(
message="DeepSeek API rate limit exceeded",
error_code=ErrorCode.RATE_LIMIT_ERROR,
recoverable=True
)
elif e.response.status_code == 401:
raise AIServiceError(
message="Invalid DeepSeek API key",
error_code=ErrorCode.AUTHENTICATION_ERROR,
recoverable=False
)
else:
raise AIServiceError(
message=f"DeepSeek API error: {e.response.text}",
error_code=ErrorCode.AI_SERVICE_ERROR,
recoverable=True
)
except Exception as e:
raise AIServiceError(
message=f"Failed to generate summary: {str(e)}",
error_code=ErrorCode.AI_SERVICE_ERROR,
recoverable=True
)
def get_token_count(self, text: str) -> int:
"""Estimate token count for text.
DeepSeek uses a similar tokenization to GPT models.
We'll use a rough estimate of 1 token per 4 characters.
"""
return len(text) // 4
def _get_max_tokens(self, length: SummaryLength) -> int:
"""Get maximum tokens based on summary length."""
if length == SummaryLength.BRIEF:
return 500
elif length == SummaryLength.DETAILED:
return 2000
else: # STANDARD
return 1000
def _build_summary_prompt(self, request: SummaryRequest) -> str:
"""Build the summary prompt."""
length_instructions = {
SummaryLength.BRIEF: "Provide a concise summary in 2-3 paragraphs",
SummaryLength.STANDARD: "Provide a comprehensive summary in 4-5 paragraphs",
SummaryLength.DETAILED: "Provide an extensive, detailed summary with thorough analysis"
}
focus_context = ""
if request.focus_areas:
focus_context = f"\nFocus particularly on: {', '.join(request.focus_areas)}"
prompt = f"""Analyze this video transcript and provide a structured summary.
Transcript:
{request.transcript}
{focus_context}
{length_instructions.get(request.length, length_instructions[SummaryLength.STANDARD])}
Provide your response as a JSON object with this structure:
{{
"summary": "Main summary text",
"key_points": ["key point 1", "key point 2", ...],
"main_themes": ["theme 1", "theme 2", ...],
"actionable_insights": ["insight 1", "insight 2", ...],
"confidence_score": 0.0-1.0
}}"""
return prompt
def _parse_text_response(self, text: str) -> Dict:
"""Parse text response as fallback."""
lines = text.strip().split('\n')
# Try to extract sections
summary = ""
key_points = []
main_themes = []
actionable_insights = []
current_section = "summary"
for line in lines:
line = line.strip()
if not line:
continue
# Check for section headers
if "key point" in line.lower() or "main point" in line.lower():
current_section = "key_points"
elif "theme" in line.lower() or "topic" in line.lower():
current_section = "main_themes"
elif "insight" in line.lower() or "action" in line.lower():
current_section = "actionable_insights"
elif line.startswith("- ") or line.startswith(""):
# Bullet point
content = line[2:].strip()
if current_section == "key_points":
key_points.append(content)
elif current_section == "main_themes":
main_themes.append(content)
elif current_section == "actionable_insights":
actionable_insights.append(content)
else:
if current_section == "summary":
summary += line + " "
return {
"summary": summary.strip() or text,
"key_points": key_points[:5],
"main_themes": main_themes[:4],
"actionable_insights": actionable_insights[:3],
"confidence_score": 0.7
}
def _calculate_cost(self, input_tokens: int, output_tokens: int) -> Dict[str, float]:
"""Calculate cost for the request."""
input_cost = (input_tokens / 1000) * self.input_cost_per_1k
output_cost = (output_tokens / 1000) * self.output_cost_per_1k
return {
"input_cost": input_cost,
"output_cost": output_cost,
"total_cost": input_cost + output_cost
}
async def _generate_chunked_summary(self, request: SummaryRequest) -> SummaryResult:
"""Generate summary for long transcripts using chunking."""
# Split transcript into chunks
max_chunk_size = 28000 # Leave room for prompt
chunks = self._split_transcript(request.transcript, max_chunk_size)
# Summarize each chunk
chunk_summaries = []
total_input_tokens = 0
total_output_tokens = 0
for i, chunk in enumerate(chunks):
chunk_request = SummaryRequest(
transcript=chunk,
length=SummaryLength.BRIEF, # Brief for chunks
focus_areas=request.focus_areas
)
result = await self.generate_summary(chunk_request)
chunk_summaries.append(result.summary)
total_input_tokens += result.usage.input_tokens
total_output_tokens += result.usage.output_tokens
# Rate limiting
if i < len(chunks) - 1:
await asyncio.sleep(1)
# Combine chunk summaries
combined = "\n\n".join(chunk_summaries)
# Generate final summary from combined chunks
final_request = SummaryRequest(
transcript=combined,
length=request.length,
focus_areas=request.focus_areas
)
final_result = await self.generate_summary(final_request)
# Update token counts
final_result.usage.input_tokens += total_input_tokens
final_result.usage.output_tokens += total_output_tokens
final_result.usage.total_tokens = (
final_result.usage.input_tokens + final_result.usage.output_tokens
)
# Update metadata
final_result.processing_metadata["chunk_count"] = len(chunks)
# Recalculate cost
cost = self._calculate_cost(
final_result.usage.input_tokens,
final_result.usage.output_tokens
)
final_result.cost_data.update(cost)
return final_result
def _split_transcript(self, transcript: str, max_tokens: int) -> List[str]:
"""Split transcript into chunks."""
words = transcript.split()
chunks = []
current_chunk = []
current_size = 0
for word in words:
word_tokens = self.get_token_count(word)
if current_size + word_tokens > max_tokens and current_chunk:
chunks.append(" ".join(current_chunk))
current_chunk = [word]
current_size = word_tokens
else:
current_chunk.append(word)
current_size += word_tokens
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
async def __aenter__(self):
"""Async context manager entry."""
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Async context manager exit - cleanup resources."""
await self.client.aclose()