"""DeepSeek V2 summarization service.""" import asyncio import json import time import sys import os from typing import Dict, List, Optional import httpx # Add library path to import BaseAIService lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../lib')) if lib_path not in sys.path: sys.path.insert(0, lib_path) try: from ai_assistant_lib.services.ai.base_ai_service import BaseAIService, AIModelConfig, AIRequest, AIResponse USING_BASE_AI_SERVICE = True except ImportError: # Fallback to old implementation if library not available from .ai_service import AIService as BaseAIService USING_BASE_AI_SERVICE = False # Create dummy classes for compatibility class AIModelConfig: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) class AIRequest: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) class AIResponse: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) from .ai_service import SummaryRequest, SummaryResult, SummaryLength, ModelUsage from ..core.exceptions import AIServiceError, ErrorCode class DeepSeekSummarizer(BaseAIService): """DeepSeek-based summarization service.""" def __init__(self, api_key: str, model: str = "deepseek-chat"): """Initialize DeepSeek summarizer. Args: api_key: DeepSeek API key model: Model to use (default: deepseek-chat) """ config = AIModelConfig( model_name=model, temperature=0.3, max_tokens=2000, timeout_seconds=60, max_retries=3, backoff_factor=2.0 ) # Store configuration for both inheritance patterns self.api_key = api_key self.default_config = config # Initialize based on which BaseAIService we're using if USING_BASE_AI_SERVICE: # Initialize library BaseAIService with full parameters super().__init__( name="deepseek-summarizer", api_key=api_key, default_config=config ) else: # Initialize abstract AIService (no parameters) and add missing attributes super().__init__() self.name = "deepseek-summarizer" self.is_initialized = False self._client = None self.base_url = "https://api.deepseek.com/v1" # Cost per 1K tokens (DeepSeek pricing) self.input_cost_per_1k = 0.00014 # $0.14 per 1M input tokens self.output_cost_per_1k = 0.00028 # $0.28 per 1M output tokens async def initialize(self): """Initialize the service (fallback implementation).""" if not USING_BASE_AI_SERVICE: self._client = await self._create_client() self.is_initialized = True else: await super().initialize() @property def client(self): """Get the HTTP client.""" return self._client async def predict(self, prompt: str, model_config: 'AIModelConfig') -> 'AIResponse': """Predict method fallback implementation.""" if USING_BASE_AI_SERVICE: # Use library implementation return await super().predict(prompt, model_config) else: # Fallback implementation import uuid request = AIRequest( request_id=str(uuid.uuid4()), prompt=prompt, model_config=model_config ) return await self._make_prediction(request) async def _create_client(self): """Create the HTTP client for DeepSeek API.""" return httpx.AsyncClient( headers={ "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" }, timeout=self.default_config.timeout_seconds ) async def _make_prediction(self, request: AIRequest) -> AIResponse: """Make prediction using DeepSeek API.""" try: response = await self._client.post( f"{self.base_url}/chat/completions", json={ "model": request.model_config.model_name, "messages": [ { "role": "system", "content": "You are an expert content summarizer specializing in video analysis. Provide clear, structured summaries." }, { "role": "user", "content": request.prompt } ], "max_tokens": request.model_config.max_tokens, "temperature": request.model_config.temperature, "response_format": {"type": "json_object"} } ) response.raise_for_status() result = response.json() content = result["choices"][0]["message"]["content"] usage = result.get("usage", {}) return AIResponse( request_id=request.request_id, content=content, model_name=request.model_config.model_name, usage={ "input_tokens": usage.get("prompt_tokens", 0), "output_tokens": usage.get("completion_tokens", 0), "total_tokens": usage.get("total_tokens", 0) } ) except Exception as e: if USING_BASE_AI_SERVICE: from ai_assistant_lib.core.exceptions import AIServiceError as LibAIServiceError raise LibAIServiceError( service_name=self.name, operation="_make_prediction", details={ "error": str(e), "model": request.model_config.model_name } ) from e else: # For fallback, just re-raise the original error raise async def generate_summary(self, request: SummaryRequest) -> SummaryResult: """Generate structured summary using DeepSeek.""" # Handle long transcripts with chunking if self.get_token_count(request.transcript) > 30000: # DeepSeek context limit return await self._generate_chunked_summary(request) prompt = self._build_summary_prompt(request) try: # Create model config for this request model_config = AIModelConfig( model_name=self.default_config.model_name, temperature=0.3, max_tokens=self._get_max_tokens(request.length), timeout_seconds=self.default_config.timeout_seconds ) # Use BaseAIService predict method with retry, rate limiting, etc. response = await self.predict( prompt=prompt, model_config=model_config ) # Parse JSON response try: summary_data = json.loads(response.content) except json.JSONDecodeError: # Fallback to text parsing summary_data = self._parse_text_response(response.content) # Calculate cost input_tokens = response.usage.get("input_tokens", 0) output_tokens = response.usage.get("output_tokens", 0) cost_estimate = self._calculate_cost(input_tokens, output_tokens) return SummaryResult( summary=summary_data.get("summary", response.content), key_points=summary_data.get("key_points", []), main_themes=summary_data.get("main_themes", []), actionable_insights=summary_data.get("actionable_insights", []), confidence_score=summary_data.get("confidence_score", 0.85), processing_metadata={ "model": response.model_name, "processing_time_seconds": getattr(response, 'processing_time_ms', 0) / 1000 if getattr(response, 'processing_time_ms', 0) else 0, "input_tokens": input_tokens, "output_tokens": output_tokens, "total_tokens": input_tokens + output_tokens, "chunks_processed": 1 }, cost_data={ "input_cost_usd": cost_estimate["input_cost"], "output_cost_usd": cost_estimate["output_cost"], "total_cost_usd": cost_estimate["total_cost"], "cost_per_summary": cost_estimate["total_cost"] } ) except Exception as e: raise AIServiceError( message=f"DeepSeek summarization failed: {str(e)}", error_code=ErrorCode.AI_SERVICE_ERROR, details={ "model": self.default_config.model_name, "transcript_length": len(request.transcript), "error_type": type(e).__name__ } ) def get_token_count(self, text: str) -> int: """Estimate token count for text. DeepSeek uses a similar tokenization to GPT models. We'll use a rough estimate of 1 token per 4 characters. """ return len(text) // 4 def estimate_cost(self, transcript: str, length: SummaryLength) -> float: """Estimate cost for summarizing transcript.""" input_tokens = self.get_token_count(transcript) output_tokens = self._get_max_tokens(length) input_cost = (input_tokens / 1000) * self.input_cost_per_1k output_cost = (output_tokens / 1000) * self.output_cost_per_1k return input_cost + output_cost def _get_max_tokens(self, length: SummaryLength) -> int: """Get maximum tokens based on summary length.""" if length == SummaryLength.BRIEF: return 500 elif length == SummaryLength.DETAILED: return 2000 else: # STANDARD return 1000 def _build_summary_prompt(self, request: SummaryRequest) -> str: """Build the summary prompt.""" length_instructions = { SummaryLength.BRIEF: "Provide a concise summary in 2-3 paragraphs", SummaryLength.STANDARD: "Provide a comprehensive summary in 4-5 paragraphs", SummaryLength.DETAILED: "Provide an extensive, detailed summary with thorough analysis" } focus_context = "" if request.focus_areas: focus_context = f"\nFocus particularly on: {', '.join(request.focus_areas)}" prompt = f"""Analyze this video transcript and provide a structured summary. Transcript: {request.transcript} {focus_context} {length_instructions.get(request.length, length_instructions[SummaryLength.STANDARD])} Provide your response as a JSON object with this structure: {{ "summary": "Main summary text", "key_points": ["key point 1", "key point 2", ...], "main_themes": ["theme 1", "theme 2", ...], "actionable_insights": ["insight 1", "insight 2", ...], "confidence_score": 0.0-1.0 }}""" return prompt def _parse_text_response(self, text: str) -> Dict: """Parse text response as fallback.""" lines = text.strip().split('\n') # Try to extract sections summary = "" key_points = [] main_themes = [] actionable_insights = [] current_section = "summary" for line in lines: line = line.strip() if not line: continue # Check for section headers if "key point" in line.lower() or "main point" in line.lower(): current_section = "key_points" elif "theme" in line.lower() or "topic" in line.lower(): current_section = "main_themes" elif "insight" in line.lower() or "action" in line.lower(): current_section = "actionable_insights" elif line.startswith("- ") or line.startswith("• "): # Bullet point content = line[2:].strip() if current_section == "key_points": key_points.append(content) elif current_section == "main_themes": main_themes.append(content) elif current_section == "actionable_insights": actionable_insights.append(content) else: if current_section == "summary": summary += line + " " return { "summary": summary.strip() or text, "key_points": key_points[:5], "main_themes": main_themes[:4], "actionable_insights": actionable_insights[:3], "confidence_score": 0.7 } def _calculate_cost(self, input_tokens: int, output_tokens: int) -> Dict[str, float]: """Calculate cost for the request.""" input_cost = (input_tokens / 1000) * self.input_cost_per_1k output_cost = (output_tokens / 1000) * self.output_cost_per_1k return { "input_cost": input_cost, "output_cost": output_cost, "total_cost": input_cost + output_cost } async def _generate_chunked_summary(self, request: SummaryRequest) -> SummaryResult: """Generate summary for long transcripts using chunking.""" # Split transcript into chunks max_chunk_size = 28000 # Leave room for prompt chunks = self._split_transcript(request.transcript, max_chunk_size) # Summarize each chunk chunk_summaries = [] total_input_tokens = 0 total_output_tokens = 0 for i, chunk in enumerate(chunks): chunk_request = SummaryRequest( transcript=chunk, length=SummaryLength.BRIEF, # Brief for chunks focus_areas=request.focus_areas ) result = await self.generate_summary(chunk_request) chunk_summaries.append(result.summary) total_input_tokens += result.usage.input_tokens total_output_tokens += result.usage.output_tokens # Rate limiting if i < len(chunks) - 1: await asyncio.sleep(1) # Combine chunk summaries combined = "\n\n".join(chunk_summaries) # Generate final summary from combined chunks final_request = SummaryRequest( transcript=combined, length=request.length, focus_areas=request.focus_areas ) final_result = await self.generate_summary(final_request) # Update token counts final_result.usage.input_tokens += total_input_tokens final_result.usage.output_tokens += total_output_tokens final_result.usage.total_tokens = ( final_result.usage.input_tokens + final_result.usage.output_tokens ) # Update metadata final_result.processing_metadata["chunk_count"] = len(chunks) # Recalculate cost cost = self._calculate_cost( final_result.usage.input_tokens, final_result.usage.output_tokens ) final_result.cost_data.update(cost) return final_result def _split_transcript(self, transcript: str, max_tokens: int) -> List[str]: """Split transcript into chunks.""" words = transcript.split() chunks = [] current_chunk = [] current_size = 0 for word in words: word_tokens = self.get_token_count(word) if current_size + word_tokens > max_tokens and current_chunk: chunks.append(" ".join(current_chunk)) current_chunk = [word] current_size = word_tokens else: current_chunk.append(word) current_size += word_tokens if current_chunk: chunks.append(" ".join(current_chunk)) return chunks async def __aenter__(self): """Async context manager entry.""" return self async def __aexit__(self, exc_type, exc_val, exc_tb): """Async context manager exit - cleanup resources.""" if self.client: await self.client.aclose()