youtube-summarizer/backend/services/deepseek_summarizer.py

"""DeepSeek V2 summarization service."""
import asyncio
import json
import time
from typing import Dict, List, Optional
import httpx

from .ai_service import AIService, SummaryRequest, SummaryResult, SummaryLength, ModelUsage
from ..core.exceptions import AIServiceError, ErrorCode


class DeepSeekSummarizer(AIService):
    """DeepSeek-based summarization service."""

    def __init__(self, api_key: str, model: str = "deepseek-chat"):
        """Initialize DeepSeek summarizer.

        Args:
            api_key: DeepSeek API key
            model: Model to use (default: deepseek-chat)
        """
        self.api_key = api_key
        self.model = model
        self.base_url = "https://api.deepseek.com/v1"

        # Cost per 1K tokens (DeepSeek pricing)
        self.input_cost_per_1k = 0.00014   # $0.14 per 1M input tokens
        self.output_cost_per_1k = 0.00028  # $0.28 per 1M output tokens

        # HTTP client for API calls
        self.client = httpx.AsyncClient(
            headers={
                "Authorization": f"Bearer {api_key}",
                "Content-Type": "application/json"
            },
            timeout=60.0
        )

    async def generate_summary(self, request: SummaryRequest) -> SummaryResult:
        """Generate structured summary using DeepSeek."""

        # Handle long transcripts with chunking
        if self.get_token_count(request.transcript) > 30000:  # DeepSeek context limit
            return await self._generate_chunked_summary(request)

        prompt = self._build_summary_prompt(request)

        try:
            start_time = time.time()

            # Make API request
            response = await self.client.post(
                f"{self.base_url}/chat/completions",
                json={
                    "model": self.model,
                    "messages": [
                        {
                            "role": "system",
                            "content": "You are an expert content summarizer specializing in video analysis. Provide clear, structured summaries."
                        },
                        {
                            "role": "user",
                            "content": prompt
                        }
                    ],
                    "max_tokens": self._get_max_tokens(request.length),
                    "temperature": 0.3,  # Lower temperature for consistency
                    "response_format": {"type": "json_object"}
                }
            )

            response.raise_for_status()
            result = response.json()

            # Extract response
            content = result["choices"][0]["message"]["content"]
            usage = result.get("usage", {})

            # Parse JSON response
            try:
                summary_data = json.loads(content)
            except json.JSONDecodeError:
                # Fallback to text parsing
                summary_data = self._parse_text_response(content)

            # Calculate processing time and cost
            processing_time = time.time() - start_time
            input_tokens = usage.get("prompt_tokens", 0)
            output_tokens = usage.get("completion_tokens", 0)

            cost_estimate = self._calculate_cost(input_tokens, output_tokens)

            return SummaryResult(
                summary=summary_data.get("summary", content),
                key_points=summary_data.get("key_points", []),
                main_themes=summary_data.get("main_themes", []),
                actionable_insights=summary_data.get("actionable_insights", []),
                confidence_score=summary_data.get("confidence_score", 0.85),
                processing_metadata={
                    "model": self.model,
                    "processing_time": processing_time,
                    "chunk_count": 1,
                    "fallback_used": False
                },
                usage=ModelUsage(
                    input_tokens=input_tokens,
                    output_tokens=output_tokens,
                    total_tokens=input_tokens + output_tokens,
                    model=self.model
                ),
                cost_data={
                    "input_cost": cost_estimate["input_cost"],
                    "output_cost": cost_estimate["output_cost"],
                    "total_cost": cost_estimate["total_cost"],
                    "cost_savings": 0.0
                }
            )

        except httpx.HTTPStatusError as e:
            if e.response.status_code == 429:
                raise AIServiceError(
                    message="DeepSeek API rate limit exceeded",
                    error_code=ErrorCode.RATE_LIMIT_ERROR,
                    recoverable=True
                )
            elif e.response.status_code == 401:
                raise AIServiceError(
                    message="Invalid DeepSeek API key",
                    error_code=ErrorCode.AUTHENTICATION_ERROR,
                    recoverable=False
                )
            else:
                raise AIServiceError(
                    message=f"DeepSeek API error: {e.response.text}",
                    error_code=ErrorCode.AI_SERVICE_ERROR,
                    recoverable=True
                )

        except Exception as e:
            raise AIServiceError(
                message=f"Failed to generate summary: {str(e)}",
                error_code=ErrorCode.AI_SERVICE_ERROR,
                recoverable=True
            )

    def get_token_count(self, text: str) -> int:
        """Estimate token count for text.

        DeepSeek uses a similar tokenization to GPT models.
        We'll use a rough estimate of 1 token per 4 characters.
        """
        return len(text) // 4

    def _get_max_tokens(self, length: SummaryLength) -> int:
        """Get maximum tokens based on summary length."""
        if length == SummaryLength.BRIEF:
            return 500
        elif length == SummaryLength.DETAILED:
            return 2000
        else:  # STANDARD
            return 1000

    def _build_summary_prompt(self, request: SummaryRequest) -> str:
        """Build the summary prompt."""
        length_instructions = {
            SummaryLength.BRIEF: "Provide a concise summary in 2-3 paragraphs",
            SummaryLength.STANDARD: "Provide a comprehensive summary in 4-5 paragraphs",
            SummaryLength.DETAILED: "Provide an extensive, detailed summary with thorough analysis"
        }

        focus_context = ""
        if request.focus_areas:
            focus_context = f"\nFocus particularly on: {', '.join(request.focus_areas)}"

        prompt = f"""Analyze this video transcript and provide a structured summary.

Transcript:
{request.transcript}

{focus_context}

{length_instructions.get(request.length, length_instructions[SummaryLength.STANDARD])}

Provide your response as a JSON object with this structure:
{{
    "summary": "Main summary text",
    "key_points": ["key point 1", "key point 2", ...],
    "main_themes": ["theme 1", "theme 2", ...],
    "actionable_insights": ["insight 1", "insight 2", ...],
    "confidence_score": 0.0-1.0
}}"""

        return prompt

    def _parse_text_response(self, text: str) -> Dict:
        """Parse text response as fallback."""
        lines = text.strip().split('\n')

        # Try to extract sections
        summary = ""
        key_points = []
        main_themes = []
        actionable_insights = []

        current_section = "summary"

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # Check for section headers
            if "key point" in line.lower() or "main point" in line.lower():
                current_section = "key_points"
            elif "theme" in line.lower() or "topic" in line.lower():
                current_section = "main_themes"
            elif "insight" in line.lower() or "action" in line.lower():
                current_section = "actionable_insights"
            elif line.startswith("- ") or line.startswith("• "):
                # Bullet point
                content = line[2:].strip()
                if current_section == "key_points":
                    key_points.append(content)
                elif current_section == "main_themes":
                    main_themes.append(content)
                elif current_section == "actionable_insights":
                    actionable_insights.append(content)
            else:
                if current_section == "summary":
                    summary += line + " "

        return {
            "summary": summary.strip() or text,
            "key_points": key_points[:5],
            "main_themes": main_themes[:4],
            "actionable_insights": actionable_insights[:3],
            "confidence_score": 0.7
        }

    def _calculate_cost(self, input_tokens: int, output_tokens: int) -> Dict[str, float]:
        """Calculate cost for the request."""
        input_cost = (input_tokens / 1000) * self.input_cost_per_1k
        output_cost = (output_tokens / 1000) * self.output_cost_per_1k

        return {
            "input_cost": input_cost,
            "output_cost": output_cost,
            "total_cost": input_cost + output_cost
        }

    async def _generate_chunked_summary(self, request: SummaryRequest) -> SummaryResult:
        """Generate summary for long transcripts using chunking."""
        # Split transcript into chunks
        max_chunk_size = 28000  # Leave room for prompt
        chunks = self._split_transcript(request.transcript, max_chunk_size)

        # Summarize each chunk
        chunk_summaries = []
        total_input_tokens = 0
        total_output_tokens = 0

        for i, chunk in enumerate(chunks):
            chunk_request = SummaryRequest(
                transcript=chunk,
                length=SummaryLength.BRIEF,  # Brief for chunks
                focus_areas=request.focus_areas
            )

            result = await self.generate_summary(chunk_request)
            chunk_summaries.append(result.summary)
            total_input_tokens += result.usage.input_tokens
            total_output_tokens += result.usage.output_tokens

            # Rate limiting
            if i < len(chunks) - 1:
                await asyncio.sleep(1)

        # Combine chunk summaries
        combined = "\n\n".join(chunk_summaries)

        # Generate final summary from combined chunks
        final_request = SummaryRequest(
            transcript=combined,
            length=request.length,
            focus_areas=request.focus_areas
        )

        final_result = await self.generate_summary(final_request)

        # Update token counts
        final_result.usage.input_tokens += total_input_tokens
        final_result.usage.output_tokens += total_output_tokens
        final_result.usage.total_tokens = (
            final_result.usage.input_tokens + final_result.usage.output_tokens
        )

        # Update metadata
        final_result.processing_metadata["chunk_count"] = len(chunks)

        # Recalculate cost
        cost = self._calculate_cost(
            final_result.usage.input_tokens,
            final_result.usage.output_tokens
        )
        final_result.cost_data.update(cost)

        return final_result

    def _split_transcript(self, transcript: str, max_tokens: int) -> List[str]:
        """Split transcript into chunks."""
        words = transcript.split()
        chunks = []
        current_chunk = []
        current_size = 0

        for word in words:
            word_tokens = self.get_token_count(word)
            if current_size + word_tokens > max_tokens and current_chunk:
                chunks.append(" ".join(current_chunk))
                current_chunk = [word]
                current_size = word_tokens
            else:
                current_chunk.append(word)
                current_size += word_tokens

        if current_chunk:
            chunks.append(" ".join(current_chunk))

        return chunks

    async def __aenter__(self):
        """Async context manager entry."""
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit - cleanup resources."""
        await self.client.aclose()