youtube-summarizer/backend/services/deepseek_summarizer.py

"""DeepSeek V2 summarization service."""
import asyncio
import json
import time
import sys
import os
from typing import Dict, List, Optional
import httpx

# Add library path to import BaseAIService
lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../lib'))
if lib_path not in sys.path:
    sys.path.insert(0, lib_path)

try:
    from ai_assistant_lib.services.ai.base_ai_service import BaseAIService, AIModelConfig, AIRequest, AIResponse
    USING_BASE_AI_SERVICE = True
except ImportError:
    # Fallback to old implementation if library not available
    from .ai_service import AIService as BaseAIService
    USING_BASE_AI_SERVICE = False
    # Create dummy classes for compatibility
    class AIModelConfig:
        def __init__(self, **kwargs):
            for k, v in kwargs.items():
                setattr(self, k, v)
    class AIRequest:
        def __init__(self, **kwargs):
            for k, v in kwargs.items():
                setattr(self, k, v)
    class AIResponse:
        def __init__(self, **kwargs):
            for k, v in kwargs.items():
                setattr(self, k, v)

from .ai_service import SummaryRequest, SummaryResult, SummaryLength, ModelUsage
from ..core.exceptions import AIServiceError, ErrorCode


class DeepSeekSummarizer(BaseAIService):
    """DeepSeek-based summarization service."""

    def __init__(self, api_key: str, model: str = "deepseek-chat"):
        """Initialize DeepSeek summarizer.

        Args:
            api_key: DeepSeek API key
            model: Model to use (default: deepseek-chat)
        """
        config = AIModelConfig(
            model_name=model,
            temperature=0.3,
            max_tokens=2000,
            timeout_seconds=60,
            max_retries=3,
            backoff_factor=2.0
        )

        # Store configuration for both inheritance patterns
        self.api_key = api_key
        self.default_config = config

        # Initialize based on which BaseAIService we're using
        if USING_BASE_AI_SERVICE:
            # Initialize library BaseAIService with full parameters
            super().__init__(
                name="deepseek-summarizer",
                api_key=api_key,
                default_config=config
            )
        else:
            # Initialize abstract AIService (no parameters) and add missing attributes
            super().__init__()
            self.name = "deepseek-summarizer"
            self.is_initialized = False
            self._client = None

        self.base_url = "https://api.deepseek.com/v1"

        # Cost per 1K tokens (DeepSeek pricing)
        self.input_cost_per_1k = 0.00014   # $0.14 per 1M input tokens
        self.output_cost_per_1k = 0.00028  # $0.28 per 1M output tokens

    async def initialize(self):
        """Initialize the service (fallback implementation)."""
        if not USING_BASE_AI_SERVICE:
            self._client = await self._create_client()
            self.is_initialized = True
        else:
            await super().initialize()

    @property
    def client(self):
        """Get the HTTP client."""
        return self._client

    async def predict(self, prompt: str, model_config: 'AIModelConfig') -> 'AIResponse':
        """Predict method fallback implementation."""
        if USING_BASE_AI_SERVICE:
            # Use library implementation
            return await super().predict(prompt, model_config)
        else:
            # Fallback implementation
            import uuid
            request = AIRequest(
                request_id=str(uuid.uuid4()),
                prompt=prompt,
                model_config=model_config
            )
            return await self._make_prediction(request)

    async def _create_client(self):
        """Create the HTTP client for DeepSeek API."""
        return httpx.AsyncClient(
            headers={
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json"
            },
            timeout=self.default_config.timeout_seconds
        )

    async def _make_prediction(self, request: AIRequest) -> AIResponse:
        """Make prediction using DeepSeek API."""
        try:
            response = await self._client.post(
                f"{self.base_url}/chat/completions",
                json={
                    "model": request.model_config.model_name,
                    "messages": [
                        {
                            "role": "system",
                            "content": "You are an expert content summarizer specializing in video analysis. Provide clear, structured summaries."
                        },
                        {
                            "role": "user",
                            "content": request.prompt
                        }
                    ],
                    "max_tokens": request.model_config.max_tokens,
                    "temperature": request.model_config.temperature,
                    "response_format": {"type": "json_object"}
                }
            )

            response.raise_for_status()
            result = response.json()

            content = result["choices"][0]["message"]["content"]
            usage = result.get("usage", {})

            return AIResponse(
                request_id=request.request_id,
                content=content,
                model_name=request.model_config.model_name,
                usage={
                    "input_tokens": usage.get("prompt_tokens", 0),
                    "output_tokens": usage.get("completion_tokens", 0),
                    "total_tokens": usage.get("total_tokens", 0)
                }
            )

        except Exception as e:
            if USING_BASE_AI_SERVICE:
                from ai_assistant_lib.core.exceptions import AIServiceError as LibAIServiceError
                raise LibAIServiceError(
                    service_name=self.name,
                    operation="_make_prediction",
                    details={
                        "error": str(e),
                        "model": request.model_config.model_name
                    }
                ) from e
            else:
                # For fallback, just re-raise the original error
                raise

    async def generate_summary(self, request: SummaryRequest) -> SummaryResult:
        """Generate structured summary using DeepSeek."""

        # Handle long transcripts with chunking
        if self.get_token_count(request.transcript) > 30000:  # DeepSeek context limit
            return await self._generate_chunked_summary(request)

        prompt = self._build_summary_prompt(request)

        try:
            # Create model config for this request
            model_config = AIModelConfig(
                model_name=self.default_config.model_name,
                temperature=0.3,
                max_tokens=self._get_max_tokens(request.length),
                timeout_seconds=self.default_config.timeout_seconds
            )

            # Use BaseAIService predict method with retry, rate limiting, etc.
            response = await self.predict(
                prompt=prompt,
                model_config=model_config
            )

            # Parse JSON response
            try:
                summary_data = json.loads(response.content)
            except json.JSONDecodeError:
                # Fallback to text parsing
                summary_data = self._parse_text_response(response.content)

            # Calculate cost
            input_tokens = response.usage.get("input_tokens", 0)
            output_tokens = response.usage.get("output_tokens", 0)
            cost_estimate = self._calculate_cost(input_tokens, output_tokens)

            return SummaryResult(
                summary=summary_data.get("summary", response.content),
                key_points=summary_data.get("key_points", []),
                main_themes=summary_data.get("main_themes", []),
                actionable_insights=summary_data.get("actionable_insights", []),
                confidence_score=summary_data.get("confidence_score", 0.85),
                processing_metadata={
                    "model": response.model_name,
                    "processing_time_seconds": getattr(response, 'processing_time_ms', 0) / 1000 if getattr(response, 'processing_time_ms', 0) else 0,
                    "input_tokens": input_tokens,
                    "output_tokens": output_tokens,
                    "total_tokens": input_tokens + output_tokens,
                    "chunks_processed": 1
                },
                cost_data={
                    "input_cost_usd": cost_estimate["input_cost"],
                    "output_cost_usd": cost_estimate["output_cost"],
                    "total_cost_usd": cost_estimate["total_cost"],
                    "cost_per_summary": cost_estimate["total_cost"]
                }
            )

        except Exception as e:
            raise AIServiceError(
                message=f"DeepSeek summarization failed: {str(e)}",
                error_code=ErrorCode.AI_SERVICE_ERROR,
                details={
                    "model": self.default_config.model_name,
                    "transcript_length": len(request.transcript),
                    "error_type": type(e).__name__
                }
            )

    def get_token_count(self, text: str) -> int:
        """Estimate token count for text.

        DeepSeek uses a similar tokenization to GPT models.
        We'll use a rough estimate of 1 token per 4 characters.
        """
        return len(text) // 4

    def estimate_cost(self, transcript: str, length: SummaryLength) -> float:
        """Estimate cost for summarizing transcript."""
        input_tokens = self.get_token_count(transcript)
        output_tokens = self._get_max_tokens(length)

        input_cost = (input_tokens / 1000) * self.input_cost_per_1k
        output_cost = (output_tokens / 1000) * self.output_cost_per_1k

        return input_cost + output_cost

    def _get_max_tokens(self, length: SummaryLength) -> int:
        """Get maximum tokens based on summary length."""
        if length == SummaryLength.BRIEF:
            return 500
        elif length == SummaryLength.DETAILED:
            return 2000
        else:  # STANDARD
            return 1000

    def _build_summary_prompt(self, request: SummaryRequest) -> str:
        """Build the summary prompt."""
        length_instructions = {
            SummaryLength.BRIEF: "Provide a concise summary in 2-3 paragraphs",
            SummaryLength.STANDARD: "Provide a comprehensive summary in 4-5 paragraphs",
            SummaryLength.DETAILED: "Provide an extensive, detailed summary with thorough analysis"
        }

        focus_context = ""
        if request.focus_areas:
            focus_context = f"\nFocus particularly on: {', '.join(request.focus_areas)}"

        prompt = f"""Analyze this video transcript and provide a structured summary.

Transcript:
{request.transcript}

{focus_context}

{length_instructions.get(request.length, length_instructions[SummaryLength.STANDARD])}

Provide your response as a JSON object with this structure:
{{
    "summary": "Main summary text",
    "key_points": ["key point 1", "key point 2", ...],
    "main_themes": ["theme 1", "theme 2", ...],
    "actionable_insights": ["insight 1", "insight 2", ...],
    "confidence_score": 0.0-1.0
}}"""

        return prompt

    def _parse_text_response(self, text: str) -> Dict:
        """Parse text response as fallback."""
        lines = text.strip().split('\n')

        # Try to extract sections
        summary = ""
        key_points = []
        main_themes = []
        actionable_insights = []

        current_section = "summary"

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # Check for section headers
            if "key point" in line.lower() or "main point" in line.lower():
                current_section = "key_points"
            elif "theme" in line.lower() or "topic" in line.lower():
                current_section = "main_themes"
            elif "insight" in line.lower() or "action" in line.lower():
                current_section = "actionable_insights"
            elif line.startswith("- ") or line.startswith("• "):
                # Bullet point
                content = line[2:].strip()
                if current_section == "key_points":
                    key_points.append(content)
                elif current_section == "main_themes":
                    main_themes.append(content)
                elif current_section == "actionable_insights":
                    actionable_insights.append(content)
            else:
                if current_section == "summary":
                    summary += line + " "

        return {
            "summary": summary.strip() or text,
            "key_points": key_points[:5],
            "main_themes": main_themes[:4],
            "actionable_insights": actionable_insights[:3],
            "confidence_score": 0.7
        }

    def _calculate_cost(self, input_tokens: int, output_tokens: int) -> Dict[str, float]:
        """Calculate cost for the request."""
        input_cost = (input_tokens / 1000) * self.input_cost_per_1k
        output_cost = (output_tokens / 1000) * self.output_cost_per_1k

        return {
            "input_cost": input_cost,
            "output_cost": output_cost,
            "total_cost": input_cost + output_cost
        }

    async def _generate_chunked_summary(self, request: SummaryRequest) -> SummaryResult:
        """Generate summary for long transcripts using chunking."""
        # Split transcript into chunks
        max_chunk_size = 28000  # Leave room for prompt
        chunks = self._split_transcript(request.transcript, max_chunk_size)

        # Summarize each chunk
        chunk_summaries = []
        total_input_tokens = 0
        total_output_tokens = 0

        for i, chunk in enumerate(chunks):
            chunk_request = SummaryRequest(
                transcript=chunk,
                length=SummaryLength.BRIEF,  # Brief for chunks
                focus_areas=request.focus_areas
            )

            result = await self.generate_summary(chunk_request)
            chunk_summaries.append(result.summary)
            total_input_tokens += result.usage.input_tokens
            total_output_tokens += result.usage.output_tokens

            # Rate limiting
            if i < len(chunks) - 1:
                await asyncio.sleep(1)

        # Combine chunk summaries
        combined = "\n\n".join(chunk_summaries)

        # Generate final summary from combined chunks
        final_request = SummaryRequest(
            transcript=combined,
            length=request.length,
            focus_areas=request.focus_areas
        )

        final_result = await self.generate_summary(final_request)

        # Update token counts
        final_result.usage.input_tokens += total_input_tokens
        final_result.usage.output_tokens += total_output_tokens
        final_result.usage.total_tokens = (
            final_result.usage.input_tokens + final_result.usage.output_tokens
        )

        # Update metadata
        final_result.processing_metadata["chunk_count"] = len(chunks)

        # Recalculate cost
        cost = self._calculate_cost(
            final_result.usage.input_tokens,
            final_result.usage.output_tokens
        )
        final_result.cost_data.update(cost)

        return final_result

    def _split_transcript(self, transcript: str, max_tokens: int) -> List[str]:
        """Split transcript into chunks."""
        words = transcript.split()
        chunks = []
        current_chunk = []
        current_size = 0

        for word in words:
            word_tokens = self.get_token_count(word)
            if current_size + word_tokens > max_tokens and current_chunk:
                chunks.append(" ".join(current_chunk))
                current_chunk = [word]
                current_size = word_tokens
            else:
                current_chunk.append(word)
                current_size += word_tokens

        if current_chunk:
            chunks.append(" ".join(current_chunk))

        return chunks

    async def __aenter__(self):
        """Async context manager entry."""
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit - cleanup resources."""
        if self.client:
            await self.client.aclose()