youtube-summarizer/backend/services/rag_chat_service.py

"""RAG-powered chat service for interactive Q&A with video content."""

import asyncio
import logging
import uuid
from typing import Dict, List, Optional, Any, Tuple
from datetime import datetime
from dataclasses import dataclass
from enum import Enum

import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer
import numpy as np

from pydantic import BaseModel

from ..core.exceptions import ServiceError
from .deepseek_service import DeepSeekService

logger = logging.getLogger(__name__)


class MessageType(str, Enum):
    """Chat message types."""
    USER = "user"
    ASSISTANT = "assistant"
    SYSTEM = "system"


class SourceReference(BaseModel):
    """Reference to source content with timestamp."""
    chunk_id: str
    timestamp: int  # seconds
    timestamp_formatted: str  # [HH:MM:SS]
    youtube_link: str
    chunk_text: str
    relevance_score: float


class ChatMessage(BaseModel):
    """Individual chat message."""
    id: str
    message_type: MessageType
    content: str
    sources: List[SourceReference]
    processing_time_seconds: float
    created_at: datetime


class ChatSession(BaseModel):
    """Chat session for a video."""
    id: str
    user_id: str
    video_id: str
    summary_id: str
    session_name: str
    messages: List[ChatMessage]
    total_messages: int
    is_active: bool
    created_at: datetime
    updated_at: datetime


class ChatRequest(BaseModel):
    """Request to ask a question."""
    video_id: str
    question: str
    session_id: Optional[str] = None
    include_context: bool = True
    max_sources: int = 5


class ChatResponse(BaseModel):
    """Response from chat service."""
    session_id: str
    message: ChatMessage
    follow_up_suggestions: List[str]
    context_retrieved: bool
    total_chunks_searched: int


@dataclass
class TranscriptChunk:
    """Chunk of transcript with metadata."""
    chunk_id: str
    video_id: str
    chunk_text: str
    start_timestamp: int
    end_timestamp: int
    chunk_index: int
    word_count: int


class RAGChatService:
    """Service for RAG-powered chat with video content."""

    def __init__(
        self,
        ai_service: Optional[DeepSeekService] = None,
        chromadb_path: str = "./data/chromadb_rag"
    ):
        """Initialize RAG chat service.

        Args:
            ai_service: DeepSeek AI service for response generation
            chromadb_path: Path to ChromaDB persistent storage
        """
        self.ai_service = ai_service or DeepSeekService()
        self.chromadb_path = chromadb_path

        # Initialize embedding model (local, no API required)
        logger.info("Loading sentence transformer model...")
        self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

        # Initialize ChromaDB client
        self.chroma_client = chromadb.PersistentClient(
            path=chromadb_path,
            settings=Settings(
                anonymized_telemetry=False,
                allow_reset=True
            )
        )

        # Chat session storage (in-memory for now, could be database)
        self.chat_sessions: Dict[str, ChatSession] = {}

        logger.info(f"RAG Chat Service initialized with ChromaDB at {chromadb_path}")

    async def process_video_for_rag(
        self,
        video_id: str,
        transcript: str,
        video_title: str = ""
    ) -> bool:
        """Process video transcript for RAG by creating embeddings.

        Args:
            video_id: YouTube video ID
            transcript: Video transcript text
            video_title: Video title for context

        Returns:
            True if processing successful
        """
        if not transcript or len(transcript.strip()) < 50:
            raise ServiceError("Transcript too short for RAG processing")

        logger.info(f"Processing video {video_id} for RAG with {len(transcript)} characters")

        try:
            # 1. Chunk the transcript
            chunks = self._chunk_transcript(transcript, video_id)
            logger.info(f"Created {len(chunks)} chunks for video {video_id}")

            # 2. Generate embeddings for chunks
            chunk_texts = [chunk.chunk_text for chunk in chunks]
            logger.info("Generating embeddings...")
            embeddings = self.embedding_model.encode(chunk_texts, convert_to_tensor=False)

            # 3. Store in ChromaDB
            collection_name = f"video_{video_id}"

            # Create or get collection
            try:
                collection = self.chroma_client.get_collection(collection_name)
                # Clear existing data
                collection.delete()
                logger.info(f"Cleared existing data for video {video_id}")
            except ValueError:
                # Collection doesn't exist, create it
                pass

            collection = self.chroma_client.create_collection(
                name=collection_name,
                metadata={"video_id": video_id, "video_title": video_title}
            )

            # Prepare data for ChromaDB
            chunk_ids = [chunk.chunk_id for chunk in chunks]
            metadatas = [
                {
                    "video_id": chunk.video_id,
                    "start_timestamp": chunk.start_timestamp,
                    "end_timestamp": chunk.end_timestamp,
                    "chunk_index": chunk.chunk_index,
                    "word_count": chunk.word_count
                }
                for chunk in chunks
            ]

            # Add to collection
            collection.add(
                embeddings=embeddings.tolist(),
                documents=chunk_texts,
                metadatas=metadatas,
                ids=chunk_ids
            )

            logger.info(f"Successfully stored {len(chunks)} chunks in ChromaDB for video {video_id}")
            return True

        except Exception as e:
            logger.error(f"Error processing video {video_id} for RAG: {e}")
            raise ServiceError(f"RAG processing failed: {str(e)}")

    def _chunk_transcript(self, transcript: str, video_id: str) -> List[TranscriptChunk]:
        """Chunk transcript into semantically meaningful segments.

        Args:
            transcript: Full transcript text
            video_id: Video ID for chunk IDs

        Returns:
            List of transcript chunks
        """
        # Simple chunking strategy: split by paragraphs with overlap
        paragraphs = transcript.split('\n\n')
        chunks = []

        chunk_size = 300  # Target words per chunk
        overlap = 50  # Overlap words between chunks

        current_chunk = ""
        current_word_count = 0
        chunk_index = 0
        estimated_timestamp = 0
        words_per_minute = 150  # Average speaking rate

        for paragraph in paragraphs:
            paragraph = paragraph.strip()
            if not paragraph:
                continue

            paragraph_words = len(paragraph.split())

            # Add paragraph to current chunk
            current_chunk += paragraph + "\n\n"
            current_word_count += paragraph_words

            # Create chunk if we've reached target size
            if current_word_count >= chunk_size or paragraph == paragraphs[-1]:
                if current_chunk.strip():
                    # Calculate timestamps (rough estimation)
                    chunk_duration = (current_word_count / words_per_minute) * 60
                    start_timestamp = estimated_timestamp
                    end_timestamp = estimated_timestamp + int(chunk_duration)

                    chunk = TranscriptChunk(
                        chunk_id=f"{video_id}_chunk_{chunk_index}",
                        video_id=video_id,
                        chunk_text=current_chunk.strip(),
                        start_timestamp=start_timestamp,
                        end_timestamp=end_timestamp,
                        chunk_index=chunk_index,
                        word_count=current_word_count
                    )
                    chunks.append(chunk)

                    # Prepare next chunk with overlap
                    if paragraph == paragraphs[-1]:
                        break

                    # Create overlap by keeping last part of current chunk
                    sentences = current_chunk.strip().split('.')
                    if len(sentences) > 2:
                        overlap_text = '. '.join(sentences[-2:]).strip()
                        overlap_words = len(overlap_text.split())

                        current_chunk = overlap_text + ".\n\n"
                        current_word_count = overlap_words
                        estimated_timestamp = end_timestamp - (overlap_words / words_per_minute * 60)
                    else:
                        current_chunk = ""
                        current_word_count = 0
                        estimated_timestamp = end_timestamp

                    chunk_index += 1

        return chunks

    async def ask_question(
        self,
        request: ChatRequest,
        user_id: str = "anonymous"
    ) -> ChatResponse:
        """Ask a question about video content using RAG.

        Args:
            request: Chat request with question and video ID
            user_id: User ID for session management

        Returns:
            Chat response with answer and sources
        """
        if not request.question or len(request.question.strip()) < 3:
            raise ServiceError("Question is too short")

        start_time = datetime.now()
        logger.info(f"Processing question for video {request.video_id}: {request.question[:100]}...")

        try:
            # 1. Get or create chat session
            session = await self._get_or_create_session(
                request.session_id, user_id, request.video_id
            )

            # 2. Retrieve relevant chunks
            relevant_chunks, total_searched = await self._retrieve_relevant_chunks(
                request.video_id, request.question, request.max_sources
            )

            # 3. Generate response using RAG
            response_content = await self._generate_rag_response(
                request.question, relevant_chunks, session.messages[-5:] if session.messages else []
            )

            # 4. Create source references
            source_refs = self._create_source_references(relevant_chunks, request.video_id)

            # 5. Generate follow-up suggestions
            follow_ups = await self._generate_follow_up_suggestions(
                request.question, response_content, relevant_chunks
            )

            # 6. Create chat message
            processing_time = (datetime.now() - start_time).total_seconds()

            message = ChatMessage(
                id=str(uuid.uuid4()),
                message_type=MessageType.ASSISTANT,
                content=response_content,
                sources=source_refs,
                processing_time_seconds=processing_time,
                created_at=start_time
            )

            # 7. Add to session
            user_message = ChatMessage(
                id=str(uuid.uuid4()),
                message_type=MessageType.USER,
                content=request.question,
                sources=[],
                processing_time_seconds=0,
                created_at=start_time
            )

            session.messages.extend([user_message, message])
            session.total_messages += 2
            session.updated_at = datetime.now()

            # 8. Store session
            self.chat_sessions[session.id] = session

            response = ChatResponse(
                session_id=session.id,
                message=message,
                follow_up_suggestions=follow_ups,
                context_retrieved=len(relevant_chunks) > 0,
                total_chunks_searched=total_searched
            )

            logger.info(f"Question answered in {processing_time:.2f}s with {len(source_refs)} sources")
            return response

        except Exception as e:
            logger.error(f"Error answering question for video {request.video_id}: {e}")
            raise ServiceError(f"Failed to answer question: {str(e)}")

    async def _retrieve_relevant_chunks(
        self,
        video_id: str,
        question: str,
        max_results: int = 5
    ) -> Tuple[List[Dict[str, Any]], int]:
        """Retrieve relevant chunks using semantic search.

        Args:
            video_id: Video ID to search
            question: User question
            max_results: Maximum chunks to return

        Returns:
            Tuple of (relevant chunks, total searched)
        """
        collection_name = f"video_{video_id}"

        try:
            collection = self.chroma_client.get_collection(collection_name)

            # Generate embedding for question
            question_embedding = self.embedding_model.encode([question], convert_to_tensor=False)

            # Search for relevant chunks
            results = collection.query(
                query_embeddings=question_embedding.tolist(),
                n_results=max_results,
                include=['documents', 'metadatas', 'distances']
            )

            # Process results
            relevant_chunks = []
            if results['documents'] and len(results['documents'][0]) > 0:
                documents = results['documents'][0]
                metadatas = results['metadatas'][0]
                distances = results['distances'][0]

                for i, (doc, metadata, distance) in enumerate(zip(documents, metadatas, distances)):
                    # Convert distance to similarity score (lower distance = higher similarity)
                    relevance_score = max(0, 1 - distance)

                    chunk_data = {
                        'chunk_text': doc,
                        'metadata': metadata,
                        'relevance_score': relevance_score,
                        'rank': i + 1
                    }
                    relevant_chunks.append(chunk_data)

            # Get total count from collection
            total_count = collection.count()

            logger.info(f"Retrieved {len(relevant_chunks)} relevant chunks from {total_count} total chunks")
            return relevant_chunks, total_count

        except ValueError:
            logger.warning(f"No collection found for video {video_id}")
            return [], 0
        except Exception as e:
            logger.error(f"Error retrieving chunks for video {video_id}: {e}")
            return [], 0

    async def _generate_rag_response(
        self,
        question: str,
        relevant_chunks: List[Dict[str, Any]],
        chat_history: List[ChatMessage]
    ) -> str:
        """Generate response using retrieved chunks and chat history.

        Args:
            question: User question
            relevant_chunks: Retrieved relevant chunks
            chat_history: Recent chat history

        Returns:
            Generated response
        """
        if not relevant_chunks:
            return "I couldn't find relevant information in the video to answer your question. Could you please rephrase or ask about something else covered in the content?"

        # Build context from chunks
        context_parts = []
        for i, chunk in enumerate(relevant_chunks[:5], 1):
            timestamp = self._format_timestamp(chunk['metadata']['start_timestamp'])
            context_parts.append(f"[Context {i} - {timestamp}]: {chunk['chunk_text'][:400]}")

        context = "\n\n".join(context_parts)

        # Build chat history context
        history_context = ""
        if chat_history:
            recent_messages = []
            for msg in chat_history[-4:]:  # Last 4 messages
                if msg.message_type == MessageType.USER:
                    recent_messages.append(f"User: {msg.content}")
                elif msg.message_type == MessageType.ASSISTANT:
                    recent_messages.append(f"Assistant: {msg.content[:200]}...")

            if recent_messages:
                history_context = f"\n\nRecent conversation:\n{chr(10).join(recent_messages)}"

        system_prompt = """You are a helpful AI assistant that answers questions about video content.
        You have access to relevant sections of the video transcript with timestamps.

        Instructions:
        - Answer the user's question based on the provided context
        - Include timestamp references like [05:23] when referencing specific parts
        - If the context doesn't contain enough information, say so clearly
        - Keep responses conversational but informative
        - Don't make up information not in the context
        - If multiple contexts are relevant, synthesize information from them
        """

        prompt = f"""Based on the video content below, please answer this question: "{question}"

        Video Content:
        {context}
        {history_context}

        Please provide a helpful response that references specific timestamps when possible."""

        try:
            response = await self.ai_service.generate_response(
                prompt=prompt,
                system_prompt=system_prompt,
                temperature=0.4,  # Slightly creative but grounded
                max_tokens=800
            )

            # Add timestamp formatting to response if not present
            response = self._enhance_response_with_timestamps(response, relevant_chunks)

            return response

        except Exception as e:
            logger.error(f"Error generating RAG response: {e}")
            return "I encountered an error generating a response. Please try asking your question again."

    def _enhance_response_with_timestamps(
        self,
        response: str,
        relevant_chunks: List[Dict[str, Any]]
    ) -> str:
        """Enhance response with timestamp references.

        Args:
            response: Generated response
            relevant_chunks: Source chunks with timestamps

        Returns:
            Enhanced response with timestamps
        """
        # If response doesn't have timestamps, add them for the most relevant chunk
        if '[' not in response and relevant_chunks:
            most_relevant = relevant_chunks[0]
            timestamp = self._format_timestamp(most_relevant['metadata']['start_timestamp'])

            # Add timestamp reference to the beginning
            response = f"According to the video at [{timestamp}], {response[0].lower()}{response[1:]}"

        return response

    def _create_source_references(
        self,
        relevant_chunks: List[Dict[str, Any]],
        video_id: str
    ) -> List[SourceReference]:
        """Create source references from relevant chunks.

        Args:
            relevant_chunks: Retrieved chunks
            video_id: Video ID for YouTube links

        Returns:
            List of source references
        """
        source_refs = []

        for chunk in relevant_chunks:
            metadata = chunk['metadata']
            start_timestamp = metadata['start_timestamp']

            source_ref = SourceReference(
                chunk_id=f"{video_id}_chunk_{metadata['chunk_index']}",
                timestamp=start_timestamp,
                timestamp_formatted=f"[{self._format_timestamp(start_timestamp)}]",
                youtube_link=f"https://youtube.com/watch?v={video_id}&t={start_timestamp}s",
                chunk_text=chunk['chunk_text'][:200] + "..." if len(chunk['chunk_text']) > 200 else chunk['chunk_text'],
                relevance_score=round(chunk['relevance_score'], 3)
            )
            source_refs.append(source_ref)

        return source_refs

    async def _generate_follow_up_suggestions(
        self,
        question: str,
        response: str,
        relevant_chunks: List[Dict[str, Any]]
    ) -> List[str]:
        """Generate follow-up question suggestions.

        Args:
            question: Original question
            response: Generated response
            relevant_chunks: Source chunks

        Returns:
            List of follow-up suggestions
        """
        if not relevant_chunks:
            return []

        try:
            # Extract topics from chunks for follow-up suggestions
            chunk_topics = []
            for chunk in relevant_chunks[:3]:
                text = chunk['chunk_text'][:300]
                chunk_topics.append(text)

            context = " ".join(chunk_topics)

            system_prompt = """Generate 3 relevant follow-up questions based on the video content.
            Questions should be natural, specific, and encourage deeper exploration of the topic.
            Return only the questions, one per line, without numbering."""

            prompt = f"""Based on this video content and the user's interest in "{question}", suggest follow-up questions:

{context[:1000]}

Generate 3 specific follow-up questions that would help the user learn more about this topic."""

            suggestions_response = await self.ai_service.generate_response(
                prompt=prompt,
                system_prompt=system_prompt,
                temperature=0.6,  # More creative for suggestions
                max_tokens=200
            )

            # Parse suggestions
            suggestions = []
            for line in suggestions_response.split('\n'):
                line = line.strip()
                if line and not line.startswith(('-', '*', '1.', '2.', '3.')):
                    # Clean up the suggestion
                    line = line.lstrip('1234567890.-* ')
                    if len(line) > 10 and '?' in line:
                        suggestions.append(line)

            return suggestions[:3]  # Limit to 3 suggestions

        except Exception as e:
            logger.error(f"Error generating follow-up suggestions: {e}")
            return []

    async def _get_or_create_session(
        self,
        session_id: Optional[str],
        user_id: str,
        video_id: str
    ) -> ChatSession:
        """Get existing session or create new one.

        Args:
            session_id: Optional existing session ID
            user_id: User ID
            video_id: Video ID

        Returns:
            Chat session
        """
        if session_id and session_id in self.chat_sessions:
            session = self.chat_sessions[session_id]
            if session.video_id == video_id:
                return session

        # Create new session
        new_session = ChatSession(
            id=str(uuid.uuid4()),
            user_id=user_id,
            video_id=video_id,
            summary_id="",  # Will be set when linked to summary
            session_name=f"Chat - {datetime.now().strftime('%Y-%m-%d %H:%M')}",
            messages=[],
            total_messages=0,
            is_active=True,
            created_at=datetime.now(),
            updated_at=datetime.now()
        )

        self.chat_sessions[new_session.id] = new_session
        return new_session

    def _format_timestamp(self, seconds: int) -> str:
        """Format seconds as MM:SS or HH:MM:SS.

        Args:
            seconds: Time in seconds

        Returns:
            Formatted timestamp
        """
        hours = seconds // 3600
        minutes = (seconds % 3600) // 60
        secs = seconds % 60

        if hours > 0:
            return f"{hours:02d}:{minutes:02d}:{secs:02d}"
        else:
            return f"{minutes:02d}:{secs:02d}"

    async def get_chat_session(self, session_id: str) -> Optional[ChatSession]:
        """Get chat session by ID.

        Args:
            session_id: Session ID

        Returns:
            Chat session or None if not found
        """
        return self.chat_sessions.get(session_id)

    async def list_user_sessions(self, user_id: str, video_id: Optional[str] = None) -> List[ChatSession]:
        """List chat sessions for a user.

        Args:
            user_id: User ID
            video_id: Optional video ID filter

        Returns:
            List of user's chat sessions
        """
        sessions = []
        for session in self.chat_sessions.values():
            if session.user_id == user_id:
                if video_id is None or session.video_id == video_id:
                    sessions.append(session)

        # Sort by most recent
        sessions.sort(key=lambda s: s.updated_at, reverse=True)
        return sessions

    async def delete_session(self, session_id: str, user_id: str) -> bool:
        """Delete a chat session.

        Args:
            session_id: Session ID to delete
            user_id: User ID for authorization

        Returns:
            True if deleted successfully
        """
        if session_id in self.chat_sessions:
            session = self.chat_sessions[session_id]
            if session.user_id == user_id:
                del self.chat_sessions[session_id]
                return True
        return False

    async def export_session(self, session_id: str, user_id: str) -> Optional[str]:
        """Export chat session as markdown.

        Args:
            session_id: Session ID
            user_id: User ID for authorization

        Returns:
            Markdown export or None if not found
        """
        session = self.chat_sessions.get(session_id)
        if not session or session.user_id != user_id:
            return None

        lines = [
            f"# Chat Session: {session.session_name}",
            "",
            f"**Video ID:** {session.video_id}",
            f"**Created:** {session.created_at.strftime('%Y-%m-%d %H:%M:%S')}",
            f"**Total Messages:** {session.total_messages}",
            "",
            "---",
            ""
        ]

        for message in session.messages:
            if message.message_type == MessageType.USER:
                lines.extend([
                    f"## 👤 User",
                    "",
                    message.content,
                    ""
                ])
            elif message.message_type == MessageType.ASSISTANT:
                lines.extend([
                    f"## 🤖 Assistant",
                    "",
                    message.content,
                    ""
                ])

                if message.sources:
                    lines.extend([
                        "**Sources:**",
                        ""
                    ])

                    for source in message.sources:
                        lines.append(f"- {source.timestamp_formatted} [Jump to video]({source.youtube_link})")

                    lines.append("")

            lines.extend(["---", ""])

        return "\n".join(lines)

    async def get_service_health(self) -> Dict[str, Any]:
        """Get RAG chat service health status.

        Returns:
            Service health information
        """
        health = {
            "service": "rag_chat",
            "status": "healthy",
            "timestamp": datetime.now().isoformat()
        }

        try:
            # Test ChromaDB
            collections = self.chroma_client.list_collections()
            health["chromadb_status"] = "connected"
            health["collections_count"] = len(collections)

            # Test embedding model
            test_embedding = self.embedding_model.encode(["test"], convert_to_tensor=False)
            health["embedding_model_status"] = "loaded"
            health["embedding_dimension"] = len(test_embedding[0])

            # Active sessions
            health["active_sessions"] = len(self.chat_sessions)

            # Test AI service
            if self.ai_service:
                ai_health = await self.ai_service.test_connection()
                health["ai_service_status"] = ai_health["status"]
            else:
                health["ai_service_status"] = "not_configured"
                health["status"] = "degraded"

        except Exception as e:
            health["status"] = "error"
            health["error"] = str(e)

        return health