448 lines
17 KiB
Python
448 lines
17 KiB
Python
"""DeepSeek V2 summarization service."""
|
|
import asyncio
|
|
import json
|
|
import time
|
|
import sys
|
|
import os
|
|
from typing import Dict, List, Optional
|
|
import httpx
|
|
|
|
# Add library path to import BaseAIService
|
|
lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../lib'))
|
|
if lib_path not in sys.path:
|
|
sys.path.insert(0, lib_path)
|
|
|
|
try:
|
|
from ai_assistant_lib.services.ai.base_ai_service import BaseAIService, AIModelConfig, AIRequest, AIResponse
|
|
USING_BASE_AI_SERVICE = True
|
|
except ImportError:
|
|
# Fallback to old implementation if library not available
|
|
from .ai_service import AIService as BaseAIService
|
|
USING_BASE_AI_SERVICE = False
|
|
# Create dummy classes for compatibility
|
|
class AIModelConfig:
|
|
def __init__(self, **kwargs):
|
|
for k, v in kwargs.items():
|
|
setattr(self, k, v)
|
|
class AIRequest:
|
|
def __init__(self, **kwargs):
|
|
for k, v in kwargs.items():
|
|
setattr(self, k, v)
|
|
class AIResponse:
|
|
def __init__(self, **kwargs):
|
|
for k, v in kwargs.items():
|
|
setattr(self, k, v)
|
|
|
|
from .ai_service import SummaryRequest, SummaryResult, SummaryLength, ModelUsage
|
|
from ..core.exceptions import AIServiceError, ErrorCode
|
|
|
|
|
|
class DeepSeekSummarizer(BaseAIService):
|
|
"""DeepSeek-based summarization service."""
|
|
|
|
def __init__(self, api_key: str, model: str = "deepseek-chat"):
|
|
"""Initialize DeepSeek summarizer.
|
|
|
|
Args:
|
|
api_key: DeepSeek API key
|
|
model: Model to use (default: deepseek-chat)
|
|
"""
|
|
config = AIModelConfig(
|
|
model_name=model,
|
|
temperature=0.3,
|
|
max_tokens=2000,
|
|
timeout_seconds=60,
|
|
max_retries=3,
|
|
backoff_factor=2.0
|
|
)
|
|
|
|
# Store configuration for both inheritance patterns
|
|
self.api_key = api_key
|
|
self.default_config = config
|
|
|
|
# Initialize based on which BaseAIService we're using
|
|
if USING_BASE_AI_SERVICE:
|
|
# Initialize library BaseAIService with full parameters
|
|
super().__init__(
|
|
name="deepseek-summarizer",
|
|
api_key=api_key,
|
|
default_config=config
|
|
)
|
|
else:
|
|
# Initialize abstract AIService (no parameters) and add missing attributes
|
|
super().__init__()
|
|
self.name = "deepseek-summarizer"
|
|
self.is_initialized = False
|
|
self._client = None
|
|
|
|
self.base_url = "https://api.deepseek.com/v1"
|
|
|
|
# Cost per 1K tokens (DeepSeek pricing)
|
|
self.input_cost_per_1k = 0.00014 # $0.14 per 1M input tokens
|
|
self.output_cost_per_1k = 0.00028 # $0.28 per 1M output tokens
|
|
|
|
async def initialize(self):
|
|
"""Initialize the service (fallback implementation)."""
|
|
if not USING_BASE_AI_SERVICE:
|
|
self._client = await self._create_client()
|
|
self.is_initialized = True
|
|
else:
|
|
await super().initialize()
|
|
|
|
@property
|
|
def client(self):
|
|
"""Get the HTTP client."""
|
|
return self._client
|
|
|
|
async def predict(self, prompt: str, model_config: 'AIModelConfig') -> 'AIResponse':
|
|
"""Predict method fallback implementation."""
|
|
if USING_BASE_AI_SERVICE:
|
|
# Use library implementation
|
|
return await super().predict(prompt, model_config)
|
|
else:
|
|
# Fallback implementation
|
|
import uuid
|
|
request = AIRequest(
|
|
request_id=str(uuid.uuid4()),
|
|
prompt=prompt,
|
|
model_config=model_config
|
|
)
|
|
return await self._make_prediction(request)
|
|
|
|
async def _create_client(self):
|
|
"""Create the HTTP client for DeepSeek API."""
|
|
return httpx.AsyncClient(
|
|
headers={
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"Content-Type": "application/json"
|
|
},
|
|
timeout=self.default_config.timeout_seconds
|
|
)
|
|
|
|
async def _make_prediction(self, request: AIRequest) -> AIResponse:
|
|
"""Make prediction using DeepSeek API."""
|
|
try:
|
|
response = await self._client.post(
|
|
f"{self.base_url}/chat/completions",
|
|
json={
|
|
"model": request.model_config.model_name,
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "You are an expert content summarizer specializing in video analysis. Provide clear, structured summaries."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": request.prompt
|
|
}
|
|
],
|
|
"max_tokens": request.model_config.max_tokens,
|
|
"temperature": request.model_config.temperature,
|
|
"response_format": {"type": "json_object"}
|
|
}
|
|
)
|
|
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
|
|
content = result["choices"][0]["message"]["content"]
|
|
usage = result.get("usage", {})
|
|
|
|
return AIResponse(
|
|
request_id=request.request_id,
|
|
content=content,
|
|
model_name=request.model_config.model_name,
|
|
usage={
|
|
"input_tokens": usage.get("prompt_tokens", 0),
|
|
"output_tokens": usage.get("completion_tokens", 0),
|
|
"total_tokens": usage.get("total_tokens", 0)
|
|
}
|
|
)
|
|
|
|
except Exception as e:
|
|
if USING_BASE_AI_SERVICE:
|
|
from ai_assistant_lib.core.exceptions import AIServiceError as LibAIServiceError
|
|
raise LibAIServiceError(
|
|
service_name=self.name,
|
|
operation="_make_prediction",
|
|
details={
|
|
"error": str(e),
|
|
"model": request.model_config.model_name
|
|
}
|
|
) from e
|
|
else:
|
|
# For fallback, just re-raise the original error
|
|
raise
|
|
|
|
async def generate_summary(self, request: SummaryRequest) -> SummaryResult:
|
|
"""Generate structured summary using DeepSeek."""
|
|
|
|
# Handle long transcripts with chunking
|
|
if self.get_token_count(request.transcript) > 30000: # DeepSeek context limit
|
|
return await self._generate_chunked_summary(request)
|
|
|
|
prompt = self._build_summary_prompt(request)
|
|
|
|
try:
|
|
# Create model config for this request
|
|
model_config = AIModelConfig(
|
|
model_name=self.default_config.model_name,
|
|
temperature=0.3,
|
|
max_tokens=self._get_max_tokens(request.length),
|
|
timeout_seconds=self.default_config.timeout_seconds
|
|
)
|
|
|
|
# Use BaseAIService predict method with retry, rate limiting, etc.
|
|
response = await self.predict(
|
|
prompt=prompt,
|
|
model_config=model_config
|
|
)
|
|
|
|
# Parse JSON response
|
|
try:
|
|
summary_data = json.loads(response.content)
|
|
except json.JSONDecodeError:
|
|
# Fallback to text parsing
|
|
summary_data = self._parse_text_response(response.content)
|
|
|
|
# Calculate cost
|
|
input_tokens = response.usage.get("input_tokens", 0)
|
|
output_tokens = response.usage.get("output_tokens", 0)
|
|
cost_estimate = self._calculate_cost(input_tokens, output_tokens)
|
|
|
|
return SummaryResult(
|
|
summary=summary_data.get("summary", response.content),
|
|
key_points=summary_data.get("key_points", []),
|
|
main_themes=summary_data.get("main_themes", []),
|
|
actionable_insights=summary_data.get("actionable_insights", []),
|
|
confidence_score=summary_data.get("confidence_score", 0.85),
|
|
processing_metadata={
|
|
"model": response.model_name,
|
|
"processing_time_seconds": getattr(response, 'processing_time_ms', 0) / 1000 if getattr(response, 'processing_time_ms', 0) else 0,
|
|
"input_tokens": input_tokens,
|
|
"output_tokens": output_tokens,
|
|
"total_tokens": input_tokens + output_tokens,
|
|
"chunks_processed": 1
|
|
},
|
|
cost_data={
|
|
"input_cost_usd": cost_estimate["input_cost"],
|
|
"output_cost_usd": cost_estimate["output_cost"],
|
|
"total_cost_usd": cost_estimate["total_cost"],
|
|
"cost_per_summary": cost_estimate["total_cost"]
|
|
}
|
|
)
|
|
|
|
except Exception as e:
|
|
raise AIServiceError(
|
|
message=f"DeepSeek summarization failed: {str(e)}",
|
|
error_code=ErrorCode.AI_SERVICE_ERROR,
|
|
details={
|
|
"model": self.default_config.model_name,
|
|
"transcript_length": len(request.transcript),
|
|
"error_type": type(e).__name__
|
|
}
|
|
)
|
|
|
|
def get_token_count(self, text: str) -> int:
|
|
"""Estimate token count for text.
|
|
|
|
DeepSeek uses a similar tokenization to GPT models.
|
|
We'll use a rough estimate of 1 token per 4 characters.
|
|
"""
|
|
return len(text) // 4
|
|
|
|
def estimate_cost(self, transcript: str, length: SummaryLength) -> float:
|
|
"""Estimate cost for summarizing transcript."""
|
|
input_tokens = self.get_token_count(transcript)
|
|
output_tokens = self._get_max_tokens(length)
|
|
|
|
input_cost = (input_tokens / 1000) * self.input_cost_per_1k
|
|
output_cost = (output_tokens / 1000) * self.output_cost_per_1k
|
|
|
|
return input_cost + output_cost
|
|
|
|
def _get_max_tokens(self, length: SummaryLength) -> int:
|
|
"""Get maximum tokens based on summary length."""
|
|
if length == SummaryLength.BRIEF:
|
|
return 500
|
|
elif length == SummaryLength.DETAILED:
|
|
return 2000
|
|
else: # STANDARD
|
|
return 1000
|
|
|
|
def _build_summary_prompt(self, request: SummaryRequest) -> str:
|
|
"""Build the summary prompt."""
|
|
length_instructions = {
|
|
SummaryLength.BRIEF: "Provide a concise summary in 2-3 paragraphs",
|
|
SummaryLength.STANDARD: "Provide a comprehensive summary in 4-5 paragraphs",
|
|
SummaryLength.DETAILED: "Provide an extensive, detailed summary with thorough analysis"
|
|
}
|
|
|
|
focus_context = ""
|
|
if request.focus_areas:
|
|
focus_context = f"\nFocus particularly on: {', '.join(request.focus_areas)}"
|
|
|
|
prompt = f"""Analyze this video transcript and provide a structured summary.
|
|
|
|
Transcript:
|
|
{request.transcript}
|
|
|
|
{focus_context}
|
|
|
|
{length_instructions.get(request.length, length_instructions[SummaryLength.STANDARD])}
|
|
|
|
Provide your response as a JSON object with this structure:
|
|
{{
|
|
"summary": "Main summary text",
|
|
"key_points": ["key point 1", "key point 2", ...],
|
|
"main_themes": ["theme 1", "theme 2", ...],
|
|
"actionable_insights": ["insight 1", "insight 2", ...],
|
|
"confidence_score": 0.0-1.0
|
|
}}"""
|
|
|
|
return prompt
|
|
|
|
def _parse_text_response(self, text: str) -> Dict:
|
|
"""Parse text response as fallback."""
|
|
lines = text.strip().split('\n')
|
|
|
|
# Try to extract sections
|
|
summary = ""
|
|
key_points = []
|
|
main_themes = []
|
|
actionable_insights = []
|
|
|
|
current_section = "summary"
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
# Check for section headers
|
|
if "key point" in line.lower() or "main point" in line.lower():
|
|
current_section = "key_points"
|
|
elif "theme" in line.lower() or "topic" in line.lower():
|
|
current_section = "main_themes"
|
|
elif "insight" in line.lower() or "action" in line.lower():
|
|
current_section = "actionable_insights"
|
|
elif line.startswith("- ") or line.startswith("• "):
|
|
# Bullet point
|
|
content = line[2:].strip()
|
|
if current_section == "key_points":
|
|
key_points.append(content)
|
|
elif current_section == "main_themes":
|
|
main_themes.append(content)
|
|
elif current_section == "actionable_insights":
|
|
actionable_insights.append(content)
|
|
else:
|
|
if current_section == "summary":
|
|
summary += line + " "
|
|
|
|
return {
|
|
"summary": summary.strip() or text,
|
|
"key_points": key_points[:5],
|
|
"main_themes": main_themes[:4],
|
|
"actionable_insights": actionable_insights[:3],
|
|
"confidence_score": 0.7
|
|
}
|
|
|
|
def _calculate_cost(self, input_tokens: int, output_tokens: int) -> Dict[str, float]:
|
|
"""Calculate cost for the request."""
|
|
input_cost = (input_tokens / 1000) * self.input_cost_per_1k
|
|
output_cost = (output_tokens / 1000) * self.output_cost_per_1k
|
|
|
|
return {
|
|
"input_cost": input_cost,
|
|
"output_cost": output_cost,
|
|
"total_cost": input_cost + output_cost
|
|
}
|
|
|
|
async def _generate_chunked_summary(self, request: SummaryRequest) -> SummaryResult:
|
|
"""Generate summary for long transcripts using chunking."""
|
|
# Split transcript into chunks
|
|
max_chunk_size = 28000 # Leave room for prompt
|
|
chunks = self._split_transcript(request.transcript, max_chunk_size)
|
|
|
|
# Summarize each chunk
|
|
chunk_summaries = []
|
|
total_input_tokens = 0
|
|
total_output_tokens = 0
|
|
|
|
for i, chunk in enumerate(chunks):
|
|
chunk_request = SummaryRequest(
|
|
transcript=chunk,
|
|
length=SummaryLength.BRIEF, # Brief for chunks
|
|
focus_areas=request.focus_areas
|
|
)
|
|
|
|
result = await self.generate_summary(chunk_request)
|
|
chunk_summaries.append(result.summary)
|
|
total_input_tokens += result.usage.input_tokens
|
|
total_output_tokens += result.usage.output_tokens
|
|
|
|
# Rate limiting
|
|
if i < len(chunks) - 1:
|
|
await asyncio.sleep(1)
|
|
|
|
# Combine chunk summaries
|
|
combined = "\n\n".join(chunk_summaries)
|
|
|
|
# Generate final summary from combined chunks
|
|
final_request = SummaryRequest(
|
|
transcript=combined,
|
|
length=request.length,
|
|
focus_areas=request.focus_areas
|
|
)
|
|
|
|
final_result = await self.generate_summary(final_request)
|
|
|
|
# Update token counts
|
|
final_result.usage.input_tokens += total_input_tokens
|
|
final_result.usage.output_tokens += total_output_tokens
|
|
final_result.usage.total_tokens = (
|
|
final_result.usage.input_tokens + final_result.usage.output_tokens
|
|
)
|
|
|
|
# Update metadata
|
|
final_result.processing_metadata["chunk_count"] = len(chunks)
|
|
|
|
# Recalculate cost
|
|
cost = self._calculate_cost(
|
|
final_result.usage.input_tokens,
|
|
final_result.usage.output_tokens
|
|
)
|
|
final_result.cost_data.update(cost)
|
|
|
|
return final_result
|
|
|
|
def _split_transcript(self, transcript: str, max_tokens: int) -> List[str]:
|
|
"""Split transcript into chunks."""
|
|
words = transcript.split()
|
|
chunks = []
|
|
current_chunk = []
|
|
current_size = 0
|
|
|
|
for word in words:
|
|
word_tokens = self.get_token_count(word)
|
|
if current_size + word_tokens > max_tokens and current_chunk:
|
|
chunks.append(" ".join(current_chunk))
|
|
current_chunk = [word]
|
|
current_size = word_tokens
|
|
else:
|
|
current_chunk.append(word)
|
|
current_size += word_tokens
|
|
|
|
if current_chunk:
|
|
chunks.append(" ".join(current_chunk))
|
|
|
|
return chunks
|
|
|
|
async def __aenter__(self):
|
|
"""Async context manager entry."""
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
"""Async context manager exit - cleanup resources."""
|
|
if self.client:
|
|
await self.client.aclose() |