"""RAG (Retrieval-Augmented Generation) service for video chat.""" import asyncio import logging from typing import List, Dict, Any, Optional, Tuple from datetime import datetime import json import uuid from backend.core.exceptions import ServiceError from backend.models.chat import ChatSession, ChatMessage, VideoChunk from backend.models.summary import Summary from backend.services.semantic_search_service import SemanticSearchService from backend.services.chroma_service import ChromaService from backend.services.transcript_chunker import TranscriptChunker from backend.services.deepseek_service import DeepSeekService from backend.core.database_registry import registry logger = logging.getLogger(__name__) class RAGError(ServiceError): """RAG service specific errors.""" pass class RAGService: """Service for RAG-powered video chat and question answering.""" def __init__( self, search_service: Optional[SemanticSearchService] = None, chroma_service: Optional[ChromaService] = None, chunker_service: Optional[TranscriptChunker] = None, ai_service: Optional[DeepSeekService] = None ): """Initialize RAG service. Args: search_service: Semantic search service chroma_service: ChromaDB service chunker_service: Transcript chunking service ai_service: AI service for response generation """ self.search_service = search_service or SemanticSearchService() self.chroma_service = chroma_service or ChromaService() self.chunker_service = chunker_service or TranscriptChunker() self.ai_service = ai_service or DeepSeekService() # RAG configuration self.config = { 'max_context_chunks': 5, 'max_context_length': 4000, 'min_similarity_threshold': 0.3, 'max_response_tokens': 800, 'temperature': 0.7, 'include_source_timestamps': True } # Performance metrics self.metrics = { 'total_queries': 0, 'successful_responses': 0, 'failed_responses': 0, 'avg_response_time': 0.0, 'avg_context_chunks': 0.0, 'total_tokens_used': 0 } async def initialize(self) -> None: """Initialize all service components.""" try: await self.search_service.initialize() logger.info("RAG service initialized successfully") except Exception as e: logger.error(f"Failed to initialize RAG service: {e}") raise RAGError(f"RAG service initialization failed: {e}") async def index_video_content( self, video_id: str, transcript: str, summary_id: Optional[str] = None ) -> Dict[str, Any]: """Index video content for RAG search. Args: video_id: YouTube video ID transcript: Video transcript text summary_id: Optional summary ID Returns: Indexing results and statistics """ try: logger.info(f"Indexing video content for {video_id}") start_time = datetime.now() # Chunk the transcript chunks = self.chunker_service.chunk_transcript( transcript=transcript, video_id=video_id ) if not chunks: logger.warning(f"No chunks created for video {video_id}") return { 'video_id': video_id, 'chunks_created': 0, 'indexed': False, 'error': 'No chunks created from transcript' } # Store chunks in ChromaDB chroma_ids = await self.chroma_service.add_document_chunks( video_id=video_id, chunks=chunks ) # Store chunk metadata in database indexed_chunks = [] with registry.get_session() as session: for chunk, chroma_id in zip(chunks, chroma_ids): video_chunk = VideoChunk( video_id=video_id, summary_id=summary_id, chunk_index=chunk['chunk_index'], chunk_type=chunk['chunk_type'], start_timestamp=chunk.get('start_timestamp'), end_timestamp=chunk.get('end_timestamp'), content=chunk['content'], content_length=chunk['content_length'], content_hash=chunk['content_hash'], chromadb_id=chroma_id, embedding_model='sentence-transformers/all-MiniLM-L6-v2', embedding_created_at=datetime.now() ) session.add(video_chunk) indexed_chunks.append({ 'chunk_index': chunk['chunk_index'], 'content_length': chunk['content_length'], 'start_timestamp': chunk.get('start_timestamp'), 'end_timestamp': chunk.get('end_timestamp') }) session.commit() processing_time = (datetime.now() - start_time).total_seconds() result = { 'video_id': video_id, 'chunks_created': len(chunks), 'chunks_indexed': len(chroma_ids), 'processing_time_seconds': processing_time, 'indexed': True, 'chunks': indexed_chunks, 'chunking_stats': self.chunker_service.get_chunking_stats(chunks) } logger.info(f"Successfully indexed {len(chunks)} chunks for video {video_id} in {processing_time:.3f}s") return result except Exception as e: logger.error(f"Failed to index video content: {e}") raise RAGError(f"Content indexing failed: {e}") async def chat_query( self, session_id: str, query: str, user_id: Optional[str] = None, search_mode: str = "hybrid", max_context_chunks: Optional[int] = None ) -> Dict[str, Any]: """Process a chat query using RAG. Args: session_id: Chat session ID query: User's question/query user_id: Optional user ID search_mode: Search strategy to use max_context_chunks: Override for max context chunks Returns: Chat response with sources and metadata """ start_time = datetime.now() self.metrics['total_queries'] += 1 try: logger.info(f"Processing chat query for session {session_id}: '{query[:50]}...'") # Get chat session and video context with registry.get_session() as session: chat_session = session.query(ChatSession).filter( ChatSession.id == session_id ).first() if not chat_session: raise RAGError(f"Chat session {session_id} not found") video_id = chat_session.video_id # Perform semantic search to get relevant context search_results = await self.search_service.search( query=query, video_id=video_id, search_mode=search_mode, max_results=max_context_chunks or self.config['max_context_chunks'], similarity_threshold=self.config['min_similarity_threshold'], user_id=user_id ) context_chunks = search_results.get('results', []) if not context_chunks: logger.warning(f"No relevant context found for query: {query}") return await self._generate_no_context_response(query, session_id) # Generate AI response with context response = await self._generate_rag_response( query=query, context_chunks=context_chunks, session_id=session_id ) # Store chat message in database await self._store_chat_message( session_id=session_id, query=query, response=response, context_chunks=context_chunks, search_results=search_results ) # Update metrics self._update_metrics(start_time, len(context_chunks), response.get('total_tokens', 0)) self.metrics['successful_responses'] += 1 return response except Exception as e: logger.error(f"Chat query failed: {e}") self.metrics['failed_responses'] += 1 raise RAGError(f"Chat query failed: {e}") async def _generate_rag_response( self, query: str, context_chunks: List[Dict[str, Any]], session_id: str ) -> Dict[str, Any]: """Generate AI response using RAG context. Args: query: User query context_chunks: Relevant context chunks session_id: Chat session ID Returns: Generated response with metadata """ try: # Prepare context for AI model context_text = self._prepare_context_text(context_chunks) # Build RAG prompt rag_prompt = self._build_rag_prompt(query, context_text) # Generate response using AI service ai_response = await self.ai_service.generate_response( prompt=rag_prompt, max_tokens=self.config['max_response_tokens'], temperature=self.config['temperature'] ) # Format response with sources formatted_response = self._format_response_with_sources( ai_response=ai_response, context_chunks=context_chunks, query=query ) return formatted_response except Exception as e: logger.error(f"Failed to generate RAG response: {e}") raise RAGError(f"Response generation failed: {e}") def _prepare_context_text(self, context_chunks: List[Dict[str, Any]]) -> str: """Prepare context text from chunks for AI prompt. Args: context_chunks: List of relevant chunks Returns: Formatted context text """ context_parts = [] total_length = 0 for chunk in context_chunks: content = chunk.get('content', '') timestamp = chunk.get('timestamp_formatted', '') # Format context with timestamp if timestamp and self.config['include_source_timestamps']: context_part = f"{timestamp} {content}" else: context_part = content # Check if adding this chunk would exceed max context length if total_length + len(context_part) > self.config['max_context_length']: break context_parts.append(context_part) total_length += len(context_part) return "\n\n".join(context_parts) def _build_rag_prompt(self, query: str, context: str) -> str: """Build RAG prompt for AI model. Args: query: User query context: Relevant context from video Returns: Complete RAG prompt """ prompt = f"""You are a helpful assistant that answers questions about YouTube video content. Use the provided context from the video to answer the user's question accurately and comprehensively. CONTEXT FROM VIDEO: {context} USER QUESTION: {query} INSTRUCTIONS: - Answer based primarily on the provided context - If the context contains timestamp information (like [HH:MM:SS]), reference specific timestamps in your response - If the question cannot be fully answered from the context, acknowledge this limitation - Be concise but thorough in your explanation - Include specific details and examples from the video when relevant - If you mention specific points, try to reference the timestamp where that information appears RESPONSE:""" return prompt def _format_response_with_sources( self, ai_response: Dict[str, Any], context_chunks: List[Dict[str, Any]], query: str ) -> Dict[str, Any]: """Format AI response with source attribution. Args: ai_response: Raw AI response context_chunks: Source chunks query: Original query Returns: Formatted response with sources """ response_text = ai_response.get('content', '') # Prepare source information sources = [] for chunk in context_chunks: source = { 'chunk_id': chunk.get('chunk_id'), 'content_preview': chunk.get('content', '')[:200] + "..." if len(chunk.get('content', '')) > 200 else chunk.get('content', ''), 'timestamp': chunk.get('start_timestamp'), 'timestamp_formatted': chunk.get('timestamp_formatted'), 'youtube_link': chunk.get('youtube_link'), 'similarity_score': chunk.get('similarity_score', chunk.get('relevance_score', 0.0)), 'search_method': chunk.get('search_method', 'unknown') } sources.append(source) return { 'response': response_text, 'sources': sources, 'total_sources': len(sources), 'query': query, 'context_chunks_used': len(context_chunks), 'model_used': ai_response.get('model', 'deepseek-chat'), 'prompt_tokens': ai_response.get('usage', {}).get('prompt_tokens', 0), 'completion_tokens': ai_response.get('usage', {}).get('completion_tokens', 0), 'total_tokens': ai_response.get('usage', {}).get('total_tokens', 0), 'processing_time_seconds': ai_response.get('processing_time', 0.0), 'timestamp': datetime.now().isoformat() } async def _generate_no_context_response( self, query: str, session_id: str ) -> Dict[str, Any]: """Generate response when no relevant context is found. Args: query: User query session_id: Chat session ID Returns: No-context response """ response_text = """I couldn't find relevant information in the video transcript to answer your question. This might be because: 1. The topic you're asking about isn't covered in this video 2. The question is too specific or uses different terminology 3. The video content hasn't been properly indexed yet Could you try rephrasing your question or asking about a different topic that might be covered in the video?""" return { 'response': response_text, 'sources': [], 'total_sources': 0, 'query': query, 'context_chunks_used': 0, 'no_context_found': True, 'timestamp': datetime.now().isoformat() } async def _store_chat_message( self, session_id: str, query: str, response: Dict[str, Any], context_chunks: List[Dict[str, Any]], search_results: Dict[str, Any] ) -> None: """Store chat message in database. Args: session_id: Chat session ID query: User query response: Generated response context_chunks: Context chunks used search_results: Raw search results """ try: with registry.get_session() as session: # Store user message user_message = ChatMessage( session_id=session_id, message_type="user", content=query, created_at=datetime.now() ) session.add(user_message) # Store assistant response assistant_message = ChatMessage( session_id=session_id, message_type="assistant", content=response['response'], original_query=query, context_chunks=json.dumps([chunk.get('chunk_id') for chunk in context_chunks]), sources=json.dumps(response.get('sources', [])), total_sources=response.get('total_sources', 0), model_used=response.get('model_used'), prompt_tokens=response.get('prompt_tokens'), completion_tokens=response.get('completion_tokens'), total_tokens=response.get('total_tokens'), processing_time_seconds=response.get('processing_time_seconds'), created_at=datetime.now() ) session.add(assistant_message) # Update session statistics chat_session = session.query(ChatSession).filter( ChatSession.id == session_id ).first() if chat_session: chat_session.message_count = (chat_session.message_count or 0) + 2 chat_session.last_message_at = datetime.now() if response.get('processing_time_seconds'): total_time = (chat_session.total_processing_time or 0.0) + response['processing_time_seconds'] chat_session.total_processing_time = total_time chat_session.avg_response_time = total_time / (chat_session.message_count // 2) session.commit() logger.info(f"Stored chat messages for session {session_id}") except Exception as e: logger.error(f"Failed to store chat message: {e}") async def create_chat_session( self, video_id: str, user_id: Optional[str] = None, title: Optional[str] = None ) -> Dict[str, Any]: """Create a new chat session for a video. Args: video_id: YouTube video ID user_id: Optional user ID title: Optional session title Returns: Created session information """ try: session_id = str(uuid.uuid4()) # Get video information with registry.get_session() as session: summary = session.query(Summary).filter( Summary.video_id == video_id ).first() # Generate title if not provided if not title and summary: title = f"Chat about: {summary.video_title[:50]}..." elif not title: title = f"Chat about video {video_id}" # Create chat session chat_session = ChatSession( id=session_id, user_id=user_id, video_id=video_id, summary_id=str(summary.id) if summary else None, title=title, session_config=json.dumps(self.config), is_active=True, created_at=datetime.now() ) session.add(chat_session) session.commit() logger.info(f"Created chat session {session_id} for video {video_id}") return { 'session_id': session_id, 'video_id': video_id, 'title': title, 'user_id': user_id, 'created_at': datetime.now().isoformat(), 'config': self.config } except Exception as e: logger.error(f"Failed to create chat session: {e}") raise RAGError(f"Session creation failed: {e}") async def get_chat_history( self, session_id: str, limit: int = 50 ) -> List[Dict[str, Any]]: """Get chat history for a session. Args: session_id: Chat session ID limit: Maximum number of messages Returns: List of chat messages """ try: with registry.get_session() as session: messages = session.query(ChatMessage).filter( ChatMessage.session_id == session_id ).order_by(ChatMessage.created_at.asc()).limit(limit).all() formatted_messages = [] for msg in messages: message_dict = { 'id': msg.id, 'message_type': msg.message_type, 'content': msg.content, 'created_at': msg.created_at.isoformat() if msg.created_at else None, } # Add sources for assistant messages if msg.message_type == "assistant" and msg.sources: try: message_dict['sources'] = json.loads(msg.sources) message_dict['total_sources'] = msg.total_sources except: pass formatted_messages.append(message_dict) return formatted_messages except Exception as e: logger.error(f"Failed to get chat history: {e}") return [] def _update_metrics( self, start_time: datetime, context_chunks_count: int, tokens_used: int ) -> None: """Update service metrics. Args: start_time: Query start time context_chunks_count: Number of context chunks used tokens_used: Number of tokens used """ response_time = (datetime.now() - start_time).total_seconds() # Update averages total_queries = self.metrics['total_queries'] # Average response time total_time = self.metrics['avg_response_time'] * (total_queries - 1) self.metrics['avg_response_time'] = (total_time + response_time) / total_queries # Average context chunks total_chunks = self.metrics['avg_context_chunks'] * (total_queries - 1) self.metrics['avg_context_chunks'] = (total_chunks + context_chunks_count) / total_queries # Total tokens self.metrics['total_tokens_used'] += tokens_used async def get_service_stats(self) -> Dict[str, Any]: """Get RAG service statistics. Returns: Service statistics """ try: # Get ChromaDB stats chroma_stats = await self.chroma_service.get_collection_stats() # Get search service metrics search_metrics = self.search_service._get_current_metrics() return { 'rag_metrics': dict(self.metrics), 'chroma_stats': chroma_stats, 'search_metrics': search_metrics, 'config': dict(self.config), 'timestamp': datetime.now().isoformat() } except Exception as e: logger.error(f"Failed to get service stats: {e}") return {'error': str(e)} async def health_check(self) -> Dict[str, Any]: """Perform health check on RAG service. Returns: Health check results """ try: # Check all component health search_health = await self.search_service.health_check() # Test basic functionality test_successful = True try: # Test chunking test_chunks = self.chunker_service.chunk_transcript( "This is a test transcript for health check.", "test_video_id" ) if not test_chunks: test_successful = False except: test_successful = False return { 'status': 'healthy' if search_health.get('status') == 'healthy' and test_successful else 'degraded', 'search_service_status': search_health.get('status'), 'chunking_test': 'passed' if test_successful else 'failed', 'metrics': dict(self.metrics) } except Exception as e: logger.error(f"RAG service health check failed: {e}") return { 'status': 'unhealthy', 'error': str(e) }