youtube-summarizer/backend/services/rag_service.py

"""RAG (Retrieval-Augmented Generation) service for video chat."""

import asyncio
import logging
from typing import List, Dict, Any, Optional, Tuple
from datetime import datetime
import json
import uuid

from backend.core.exceptions import ServiceError
from backend.models.chat import ChatSession, ChatMessage, VideoChunk
from backend.models.summary import Summary
from backend.services.semantic_search_service import SemanticSearchService
from backend.services.chroma_service import ChromaService
from backend.services.transcript_chunker import TranscriptChunker
from backend.services.deepseek_service import DeepSeekService
from backend.core.database_registry import registry

logger = logging.getLogger(__name__)


class RAGError(ServiceError):
    """RAG service specific errors."""
    pass


class RAGService:
    """Service for RAG-powered video chat and question answering."""

    def __init__(
        self,
        search_service: Optional[SemanticSearchService] = None,
        chroma_service: Optional[ChromaService] = None,
        chunker_service: Optional[TranscriptChunker] = None,
        ai_service: Optional[DeepSeekService] = None
    ):
        """Initialize RAG service.

        Args:
            search_service: Semantic search service
            chroma_service: ChromaDB service
            chunker_service: Transcript chunking service
            ai_service: AI service for response generation
        """
        self.search_service = search_service or SemanticSearchService()
        self.chroma_service = chroma_service or ChromaService()
        self.chunker_service = chunker_service or TranscriptChunker()
        self.ai_service = ai_service or DeepSeekService()

        # RAG configuration
        self.config = {
            'max_context_chunks': 5,
            'max_context_length': 4000,
            'min_similarity_threshold': 0.3,
            'max_response_tokens': 800,
            'temperature': 0.7,
            'include_source_timestamps': True
        }

        # Performance metrics
        self.metrics = {
            'total_queries': 0,
            'successful_responses': 0,
            'failed_responses': 0,
            'avg_response_time': 0.0,
            'avg_context_chunks': 0.0,
            'total_tokens_used': 0
        }

    async def initialize(self) -> None:
        """Initialize all service components."""
        try:
            await self.search_service.initialize()
            logger.info("RAG service initialized successfully")
        except Exception as e:
            logger.error(f"Failed to initialize RAG service: {e}")
            raise RAGError(f"RAG service initialization failed: {e}")

    async def index_video_content(
        self,
        video_id: str,
        transcript: str,
        summary_id: Optional[str] = None
    ) -> Dict[str, Any]:
        """Index video content for RAG search.

        Args:
            video_id: YouTube video ID
            transcript: Video transcript text
            summary_id: Optional summary ID

        Returns:
            Indexing results and statistics
        """
        try:
            logger.info(f"Indexing video content for {video_id}")
            start_time = datetime.now()

            # Chunk the transcript
            chunks = self.chunker_service.chunk_transcript(
                transcript=transcript,
                video_id=video_id
            )

            if not chunks:
                logger.warning(f"No chunks created for video {video_id}")
                return {
                    'video_id': video_id,
                    'chunks_created': 0,
                    'indexed': False,
                    'error': 'No chunks created from transcript'
                }

            # Store chunks in ChromaDB
            chroma_ids = await self.chroma_service.add_document_chunks(
                video_id=video_id,
                chunks=chunks
            )

            # Store chunk metadata in database
            indexed_chunks = []
            with registry.get_session() as session:
                for chunk, chroma_id in zip(chunks, chroma_ids):
                    video_chunk = VideoChunk(
                        video_id=video_id,
                        summary_id=summary_id,
                        chunk_index=chunk['chunk_index'],
                        chunk_type=chunk['chunk_type'],
                        start_timestamp=chunk.get('start_timestamp'),
                        end_timestamp=chunk.get('end_timestamp'),
                        content=chunk['content'],
                        content_length=chunk['content_length'],
                        content_hash=chunk['content_hash'],
                        chromadb_id=chroma_id,
                        embedding_model='sentence-transformers/all-MiniLM-L6-v2',
                        embedding_created_at=datetime.now()
                    )
                    session.add(video_chunk)
                    indexed_chunks.append({
                        'chunk_index': chunk['chunk_index'],
                        'content_length': chunk['content_length'],
                        'start_timestamp': chunk.get('start_timestamp'),
                        'end_timestamp': chunk.get('end_timestamp')
                    })

                session.commit()

            processing_time = (datetime.now() - start_time).total_seconds()

            result = {
                'video_id': video_id,
                'chunks_created': len(chunks),
                'chunks_indexed': len(chroma_ids),
                'processing_time_seconds': processing_time,
                'indexed': True,
                'chunks': indexed_chunks,
                'chunking_stats': self.chunker_service.get_chunking_stats(chunks)
            }

            logger.info(f"Successfully indexed {len(chunks)} chunks for video {video_id} in {processing_time:.3f}s")
            return result

        except Exception as e:
            logger.error(f"Failed to index video content: {e}")
            raise RAGError(f"Content indexing failed: {e}")

    async def chat_query(
        self,
        session_id: str,
        query: str,
        user_id: Optional[str] = None,
        search_mode: str = "hybrid",
        max_context_chunks: Optional[int] = None
    ) -> Dict[str, Any]:
        """Process a chat query using RAG.

        Args:
            session_id: Chat session ID
            query: User's question/query
            user_id: Optional user ID
            search_mode: Search strategy to use
            max_context_chunks: Override for max context chunks

        Returns:
            Chat response with sources and metadata
        """
        start_time = datetime.now()
        self.metrics['total_queries'] += 1

        try:
            logger.info(f"Processing chat query for session {session_id}: '{query[:50]}...'")

            # Get chat session and video context
            with registry.get_session() as session:
                chat_session = session.query(ChatSession).filter(
                    ChatSession.id == session_id
                ).first()

                if not chat_session:
                    raise RAGError(f"Chat session {session_id} not found")

                video_id = chat_session.video_id

            # Perform semantic search to get relevant context
            search_results = await self.search_service.search(
                query=query,
                video_id=video_id,
                search_mode=search_mode,
                max_results=max_context_chunks or self.config['max_context_chunks'],
                similarity_threshold=self.config['min_similarity_threshold'],
                user_id=user_id
            )

            context_chunks = search_results.get('results', [])

            if not context_chunks:
                logger.warning(f"No relevant context found for query: {query}")
                return await self._generate_no_context_response(query, session_id)

            # Generate AI response with context
            response = await self._generate_rag_response(
                query=query,
                context_chunks=context_chunks,
                session_id=session_id
            )

            # Store chat message in database
            await self._store_chat_message(
                session_id=session_id,
                query=query,
                response=response,
                context_chunks=context_chunks,
                search_results=search_results
            )

            # Update metrics
            self._update_metrics(start_time, len(context_chunks), response.get('total_tokens', 0))
            self.metrics['successful_responses'] += 1

            return response

        except Exception as e:
            logger.error(f"Chat query failed: {e}")
            self.metrics['failed_responses'] += 1
            raise RAGError(f"Chat query failed: {e}")

    async def _generate_rag_response(
        self,
        query: str,
        context_chunks: List[Dict[str, Any]],
        session_id: str
    ) -> Dict[str, Any]:
        """Generate AI response using RAG context.

        Args:
            query: User query
            context_chunks: Relevant context chunks
            session_id: Chat session ID

        Returns:
            Generated response with metadata
        """
        try:
            # Prepare context for AI model
            context_text = self._prepare_context_text(context_chunks)

            # Build RAG prompt
            rag_prompt = self._build_rag_prompt(query, context_text)

            # Generate response using AI service
            ai_response = await self.ai_service.generate_response(
                prompt=rag_prompt,
                max_tokens=self.config['max_response_tokens'],
                temperature=self.config['temperature']
            )

            # Format response with sources
            formatted_response = self._format_response_with_sources(
                ai_response=ai_response,
                context_chunks=context_chunks,
                query=query
            )

            return formatted_response

        except Exception as e:
            logger.error(f"Failed to generate RAG response: {e}")
            raise RAGError(f"Response generation failed: {e}")

    def _prepare_context_text(self, context_chunks: List[Dict[str, Any]]) -> str:
        """Prepare context text from chunks for AI prompt.

        Args:
            context_chunks: List of relevant chunks

        Returns:
            Formatted context text
        """
        context_parts = []
        total_length = 0

        for chunk in context_chunks:
            content = chunk.get('content', '')
            timestamp = chunk.get('timestamp_formatted', '')

            # Format context with timestamp
            if timestamp and self.config['include_source_timestamps']:
                context_part = f"{timestamp} {content}"
            else:
                context_part = content

            # Check if adding this chunk would exceed max context length
            if total_length + len(context_part) > self.config['max_context_length']:
                break

            context_parts.append(context_part)
            total_length += len(context_part)

        return "\n\n".join(context_parts)

    def _build_rag_prompt(self, query: str, context: str) -> str:
        """Build RAG prompt for AI model.

        Args:
            query: User query
            context: Relevant context from video

        Returns:
            Complete RAG prompt
        """
        prompt = f"""You are a helpful assistant that answers questions about YouTube video content. Use the provided context from the video to answer the user's question accurately and comprehensively.

CONTEXT FROM VIDEO:
{context}

USER QUESTION: {query}

INSTRUCTIONS:
- Answer based primarily on the provided context
- If the context contains timestamp information (like [HH:MM:SS]), reference specific timestamps in your response
- If the question cannot be fully answered from the context, acknowledge this limitation
- Be concise but thorough in your explanation
- Include specific details and examples from the video when relevant
- If you mention specific points, try to reference the timestamp where that information appears

RESPONSE:"""

        return prompt

    def _format_response_with_sources(
        self,
        ai_response: Dict[str, Any],
        context_chunks: List[Dict[str, Any]],
        query: str
    ) -> Dict[str, Any]:
        """Format AI response with source attribution.

        Args:
            ai_response: Raw AI response
            context_chunks: Source chunks
            query: Original query

        Returns:
            Formatted response with sources
        """
        response_text = ai_response.get('content', '')

        # Prepare source information
        sources = []
        for chunk in context_chunks:
            source = {
                'chunk_id': chunk.get('chunk_id'),
                'content_preview': chunk.get('content', '')[:200] + "..." if len(chunk.get('content', '')) > 200 else chunk.get('content', ''),
                'timestamp': chunk.get('start_timestamp'),
                'timestamp_formatted': chunk.get('timestamp_formatted'),
                'youtube_link': chunk.get('youtube_link'),
                'similarity_score': chunk.get('similarity_score', chunk.get('relevance_score', 0.0)),
                'search_method': chunk.get('search_method', 'unknown')
            }
            sources.append(source)

        return {
            'response': response_text,
            'sources': sources,
            'total_sources': len(sources),
            'query': query,
            'context_chunks_used': len(context_chunks),
            'model_used': ai_response.get('model', 'deepseek-chat'),
            'prompt_tokens': ai_response.get('usage', {}).get('prompt_tokens', 0),
            'completion_tokens': ai_response.get('usage', {}).get('completion_tokens', 0),
            'total_tokens': ai_response.get('usage', {}).get('total_tokens', 0),
            'processing_time_seconds': ai_response.get('processing_time', 0.0),
            'timestamp': datetime.now().isoformat()
        }

    async def _generate_no_context_response(
        self,
        query: str,
        session_id: str
    ) -> Dict[str, Any]:
        """Generate response when no relevant context is found.

        Args:
            query: User query
            session_id: Chat session ID

        Returns:
            No-context response
        """
        response_text = """I couldn't find relevant information in the video transcript to answer your question. This might be because:

1. The topic you're asking about isn't covered in this video
2. The question is too specific or uses different terminology
3. The video content hasn't been properly indexed yet

Could you try rephrasing your question or asking about a different topic that might be covered in the video?"""

        return {
            'response': response_text,
            'sources': [],
            'total_sources': 0,
            'query': query,
            'context_chunks_used': 0,
            'no_context_found': True,
            'timestamp': datetime.now().isoformat()
        }

    async def _store_chat_message(
        self,
        session_id: str,
        query: str,
        response: Dict[str, Any],
        context_chunks: List[Dict[str, Any]],
        search_results: Dict[str, Any]
    ) -> None:
        """Store chat message in database.

        Args:
            session_id: Chat session ID
            query: User query
            response: Generated response
            context_chunks: Context chunks used
            search_results: Raw search results
        """
        try:
            with registry.get_session() as session:
                # Store user message
                user_message = ChatMessage(
                    session_id=session_id,
                    message_type="user",
                    content=query,
                    created_at=datetime.now()
                )
                session.add(user_message)

                # Store assistant response
                assistant_message = ChatMessage(
                    session_id=session_id,
                    message_type="assistant",
                    content=response['response'],
                    original_query=query,
                    context_chunks=json.dumps([chunk.get('chunk_id') for chunk in context_chunks]),
                    sources=json.dumps(response.get('sources', [])),
                    total_sources=response.get('total_sources', 0),
                    model_used=response.get('model_used'),
                    prompt_tokens=response.get('prompt_tokens'),
                    completion_tokens=response.get('completion_tokens'),
                    total_tokens=response.get('total_tokens'),
                    processing_time_seconds=response.get('processing_time_seconds'),
                    created_at=datetime.now()
                )
                session.add(assistant_message)

                # Update session statistics
                chat_session = session.query(ChatSession).filter(
                    ChatSession.id == session_id
                ).first()

                if chat_session:
                    chat_session.message_count = (chat_session.message_count or 0) + 2
                    chat_session.last_message_at = datetime.now()
                    if response.get('processing_time_seconds'):
                        total_time = (chat_session.total_processing_time or 0.0) + response['processing_time_seconds']
                        chat_session.total_processing_time = total_time
                        chat_session.avg_response_time = total_time / (chat_session.message_count // 2)

                session.commit()
                logger.info(f"Stored chat messages for session {session_id}")

        except Exception as e:
            logger.error(f"Failed to store chat message: {e}")

    async def create_chat_session(
        self,
        video_id: str,
        user_id: Optional[str] = None,
        title: Optional[str] = None
    ) -> Dict[str, Any]:
        """Create a new chat session for a video.

        Args:
            video_id: YouTube video ID
            user_id: Optional user ID
            title: Optional session title

        Returns:
            Created session information
        """
        try:
            session_id = str(uuid.uuid4())

            # Get video information
            with registry.get_session() as session:
                summary = session.query(Summary).filter(
                    Summary.video_id == video_id
                ).first()

                # Generate title if not provided
                if not title and summary:
                    title = f"Chat about: {summary.video_title[:50]}..."
                elif not title:
                    title = f"Chat about video {video_id}"

                # Create chat session
                chat_session = ChatSession(
                    id=session_id,
                    user_id=user_id,
                    video_id=video_id,
                    summary_id=str(summary.id) if summary else None,
                    title=title,
                    session_config=json.dumps(self.config),
                    is_active=True,
                    created_at=datetime.now()
                )

                session.add(chat_session)
                session.commit()

            logger.info(f"Created chat session {session_id} for video {video_id}")

            return {
                'session_id': session_id,
                'video_id': video_id,
                'title': title,
                'user_id': user_id,
                'created_at': datetime.now().isoformat(),
                'config': self.config
            }

        except Exception as e:
            logger.error(f"Failed to create chat session: {e}")
            raise RAGError(f"Session creation failed: {e}")

    async def get_chat_history(
        self,
        session_id: str,
        limit: int = 50
    ) -> List[Dict[str, Any]]:
        """Get chat history for a session.

        Args:
            session_id: Chat session ID
            limit: Maximum number of messages

        Returns:
            List of chat messages
        """
        try:
            with registry.get_session() as session:
                messages = session.query(ChatMessage).filter(
                    ChatMessage.session_id == session_id
                ).order_by(ChatMessage.created_at.asc()).limit(limit).all()

                formatted_messages = []
                for msg in messages:
                    message_dict = {
                        'id': msg.id,
                        'message_type': msg.message_type,
                        'content': msg.content,
                        'created_at': msg.created_at.isoformat() if msg.created_at else None,
                    }

                    # Add sources for assistant messages
                    if msg.message_type == "assistant" and msg.sources:
                        try:
                            message_dict['sources'] = json.loads(msg.sources)
                            message_dict['total_sources'] = msg.total_sources
                        except:
                            pass

                    formatted_messages.append(message_dict)

                return formatted_messages

        except Exception as e:
            logger.error(f"Failed to get chat history: {e}")
            return []

    def _update_metrics(
        self,
        start_time: datetime,
        context_chunks_count: int,
        tokens_used: int
    ) -> None:
        """Update service metrics.

        Args:
            start_time: Query start time
            context_chunks_count: Number of context chunks used
            tokens_used: Number of tokens used
        """
        response_time = (datetime.now() - start_time).total_seconds()

        # Update averages
        total_queries = self.metrics['total_queries']

        # Average response time
        total_time = self.metrics['avg_response_time'] * (total_queries - 1)
        self.metrics['avg_response_time'] = (total_time + response_time) / total_queries

        # Average context chunks
        total_chunks = self.metrics['avg_context_chunks'] * (total_queries - 1)
        self.metrics['avg_context_chunks'] = (total_chunks + context_chunks_count) / total_queries

        # Total tokens
        self.metrics['total_tokens_used'] += tokens_used

    async def get_service_stats(self) -> Dict[str, Any]:
        """Get RAG service statistics.

        Returns:
            Service statistics
        """
        try:
            # Get ChromaDB stats
            chroma_stats = await self.chroma_service.get_collection_stats()

            # Get search service metrics
            search_metrics = self.search_service._get_current_metrics()

            return {
                'rag_metrics': dict(self.metrics),
                'chroma_stats': chroma_stats,
                'search_metrics': search_metrics,
                'config': dict(self.config),
                'timestamp': datetime.now().isoformat()
            }

        except Exception as e:
            logger.error(f"Failed to get service stats: {e}")
            return {'error': str(e)}

    async def health_check(self) -> Dict[str, Any]:
        """Perform health check on RAG service.

        Returns:
            Health check results
        """
        try:
            # Check all component health
            search_health = await self.search_service.health_check()

            # Test basic functionality
            test_successful = True
            try:
                # Test chunking
                test_chunks = self.chunker_service.chunk_transcript(
                    "This is a test transcript for health check.",
                    "test_video_id"
                )
                if not test_chunks:
                    test_successful = False
            except:
                test_successful = False

            return {
                'status': 'healthy' if search_health.get('status') == 'healthy' and test_successful else 'degraded',
                'search_service_status': search_health.get('status'),
                'chunking_test': 'passed' if test_successful else 'failed',
                'metrics': dict(self.metrics)
            }

        except Exception as e:
            logger.error(f"RAG service health check failed: {e}")
            return {
                'status': 'unhealthy',
                'error': str(e)
            }