youtube-summarizer/backend/services/rag_service.py

688 lines
25 KiB
Python

"""RAG (Retrieval-Augmented Generation) service for video chat."""
import asyncio
import logging
from typing import List, Dict, Any, Optional, Tuple
from datetime import datetime
import json
import uuid
from backend.core.exceptions import ServiceError
from backend.models.chat import ChatSession, ChatMessage, VideoChunk
from backend.models.summary import Summary
from backend.services.semantic_search_service import SemanticSearchService
from backend.services.chroma_service import ChromaService
from backend.services.transcript_chunker import TranscriptChunker
from backend.services.deepseek_service import DeepSeekService
from backend.core.database_registry import registry
logger = logging.getLogger(__name__)
class RAGError(ServiceError):
"""RAG service specific errors."""
pass
class RAGService:
"""Service for RAG-powered video chat and question answering."""
def __init__(
self,
search_service: Optional[SemanticSearchService] = None,
chroma_service: Optional[ChromaService] = None,
chunker_service: Optional[TranscriptChunker] = None,
ai_service: Optional[DeepSeekService] = None
):
"""Initialize RAG service.
Args:
search_service: Semantic search service
chroma_service: ChromaDB service
chunker_service: Transcript chunking service
ai_service: AI service for response generation
"""
self.search_service = search_service or SemanticSearchService()
self.chroma_service = chroma_service or ChromaService()
self.chunker_service = chunker_service or TranscriptChunker()
self.ai_service = ai_service or DeepSeekService()
# RAG configuration
self.config = {
'max_context_chunks': 5,
'max_context_length': 4000,
'min_similarity_threshold': 0.3,
'max_response_tokens': 800,
'temperature': 0.7,
'include_source_timestamps': True
}
# Performance metrics
self.metrics = {
'total_queries': 0,
'successful_responses': 0,
'failed_responses': 0,
'avg_response_time': 0.0,
'avg_context_chunks': 0.0,
'total_tokens_used': 0
}
async def initialize(self) -> None:
"""Initialize all service components."""
try:
await self.search_service.initialize()
logger.info("RAG service initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize RAG service: {e}")
raise RAGError(f"RAG service initialization failed: {e}")
async def index_video_content(
self,
video_id: str,
transcript: str,
summary_id: Optional[str] = None
) -> Dict[str, Any]:
"""Index video content for RAG search.
Args:
video_id: YouTube video ID
transcript: Video transcript text
summary_id: Optional summary ID
Returns:
Indexing results and statistics
"""
try:
logger.info(f"Indexing video content for {video_id}")
start_time = datetime.now()
# Chunk the transcript
chunks = self.chunker_service.chunk_transcript(
transcript=transcript,
video_id=video_id
)
if not chunks:
logger.warning(f"No chunks created for video {video_id}")
return {
'video_id': video_id,
'chunks_created': 0,
'indexed': False,
'error': 'No chunks created from transcript'
}
# Store chunks in ChromaDB
chroma_ids = await self.chroma_service.add_document_chunks(
video_id=video_id,
chunks=chunks
)
# Store chunk metadata in database
indexed_chunks = []
with registry.get_session() as session:
for chunk, chroma_id in zip(chunks, chroma_ids):
video_chunk = VideoChunk(
video_id=video_id,
summary_id=summary_id,
chunk_index=chunk['chunk_index'],
chunk_type=chunk['chunk_type'],
start_timestamp=chunk.get('start_timestamp'),
end_timestamp=chunk.get('end_timestamp'),
content=chunk['content'],
content_length=chunk['content_length'],
content_hash=chunk['content_hash'],
chromadb_id=chroma_id,
embedding_model='sentence-transformers/all-MiniLM-L6-v2',
embedding_created_at=datetime.now()
)
session.add(video_chunk)
indexed_chunks.append({
'chunk_index': chunk['chunk_index'],
'content_length': chunk['content_length'],
'start_timestamp': chunk.get('start_timestamp'),
'end_timestamp': chunk.get('end_timestamp')
})
session.commit()
processing_time = (datetime.now() - start_time).total_seconds()
result = {
'video_id': video_id,
'chunks_created': len(chunks),
'chunks_indexed': len(chroma_ids),
'processing_time_seconds': processing_time,
'indexed': True,
'chunks': indexed_chunks,
'chunking_stats': self.chunker_service.get_chunking_stats(chunks)
}
logger.info(f"Successfully indexed {len(chunks)} chunks for video {video_id} in {processing_time:.3f}s")
return result
except Exception as e:
logger.error(f"Failed to index video content: {e}")
raise RAGError(f"Content indexing failed: {e}")
async def chat_query(
self,
session_id: str,
query: str,
user_id: Optional[str] = None,
search_mode: str = "hybrid",
max_context_chunks: Optional[int] = None
) -> Dict[str, Any]:
"""Process a chat query using RAG.
Args:
session_id: Chat session ID
query: User's question/query
user_id: Optional user ID
search_mode: Search strategy to use
max_context_chunks: Override for max context chunks
Returns:
Chat response with sources and metadata
"""
start_time = datetime.now()
self.metrics['total_queries'] += 1
try:
logger.info(f"Processing chat query for session {session_id}: '{query[:50]}...'")
# Get chat session and video context
with registry.get_session() as session:
chat_session = session.query(ChatSession).filter(
ChatSession.id == session_id
).first()
if not chat_session:
raise RAGError(f"Chat session {session_id} not found")
video_id = chat_session.video_id
# Perform semantic search to get relevant context
search_results = await self.search_service.search(
query=query,
video_id=video_id,
search_mode=search_mode,
max_results=max_context_chunks or self.config['max_context_chunks'],
similarity_threshold=self.config['min_similarity_threshold'],
user_id=user_id
)
context_chunks = search_results.get('results', [])
if not context_chunks:
logger.warning(f"No relevant context found for query: {query}")
return await self._generate_no_context_response(query, session_id)
# Generate AI response with context
response = await self._generate_rag_response(
query=query,
context_chunks=context_chunks,
session_id=session_id
)
# Store chat message in database
await self._store_chat_message(
session_id=session_id,
query=query,
response=response,
context_chunks=context_chunks,
search_results=search_results
)
# Update metrics
self._update_metrics(start_time, len(context_chunks), response.get('total_tokens', 0))
self.metrics['successful_responses'] += 1
return response
except Exception as e:
logger.error(f"Chat query failed: {e}")
self.metrics['failed_responses'] += 1
raise RAGError(f"Chat query failed: {e}")
async def _generate_rag_response(
self,
query: str,
context_chunks: List[Dict[str, Any]],
session_id: str
) -> Dict[str, Any]:
"""Generate AI response using RAG context.
Args:
query: User query
context_chunks: Relevant context chunks
session_id: Chat session ID
Returns:
Generated response with metadata
"""
try:
# Prepare context for AI model
context_text = self._prepare_context_text(context_chunks)
# Build RAG prompt
rag_prompt = self._build_rag_prompt(query, context_text)
# Generate response using AI service
ai_response = await self.ai_service.generate_response(
prompt=rag_prompt,
max_tokens=self.config['max_response_tokens'],
temperature=self.config['temperature']
)
# Format response with sources
formatted_response = self._format_response_with_sources(
ai_response=ai_response,
context_chunks=context_chunks,
query=query
)
return formatted_response
except Exception as e:
logger.error(f"Failed to generate RAG response: {e}")
raise RAGError(f"Response generation failed: {e}")
def _prepare_context_text(self, context_chunks: List[Dict[str, Any]]) -> str:
"""Prepare context text from chunks for AI prompt.
Args:
context_chunks: List of relevant chunks
Returns:
Formatted context text
"""
context_parts = []
total_length = 0
for chunk in context_chunks:
content = chunk.get('content', '')
timestamp = chunk.get('timestamp_formatted', '')
# Format context with timestamp
if timestamp and self.config['include_source_timestamps']:
context_part = f"{timestamp} {content}"
else:
context_part = content
# Check if adding this chunk would exceed max context length
if total_length + len(context_part) > self.config['max_context_length']:
break
context_parts.append(context_part)
total_length += len(context_part)
return "\n\n".join(context_parts)
def _build_rag_prompt(self, query: str, context: str) -> str:
"""Build RAG prompt for AI model.
Args:
query: User query
context: Relevant context from video
Returns:
Complete RAG prompt
"""
prompt = f"""You are a helpful assistant that answers questions about YouTube video content. Use the provided context from the video to answer the user's question accurately and comprehensively.
CONTEXT FROM VIDEO:
{context}
USER QUESTION: {query}
INSTRUCTIONS:
- Answer based primarily on the provided context
- If the context contains timestamp information (like [HH:MM:SS]), reference specific timestamps in your response
- If the question cannot be fully answered from the context, acknowledge this limitation
- Be concise but thorough in your explanation
- Include specific details and examples from the video when relevant
- If you mention specific points, try to reference the timestamp where that information appears
RESPONSE:"""
return prompt
def _format_response_with_sources(
self,
ai_response: Dict[str, Any],
context_chunks: List[Dict[str, Any]],
query: str
) -> Dict[str, Any]:
"""Format AI response with source attribution.
Args:
ai_response: Raw AI response
context_chunks: Source chunks
query: Original query
Returns:
Formatted response with sources
"""
response_text = ai_response.get('content', '')
# Prepare source information
sources = []
for chunk in context_chunks:
source = {
'chunk_id': chunk.get('chunk_id'),
'content_preview': chunk.get('content', '')[:200] + "..." if len(chunk.get('content', '')) > 200 else chunk.get('content', ''),
'timestamp': chunk.get('start_timestamp'),
'timestamp_formatted': chunk.get('timestamp_formatted'),
'youtube_link': chunk.get('youtube_link'),
'similarity_score': chunk.get('similarity_score', chunk.get('relevance_score', 0.0)),
'search_method': chunk.get('search_method', 'unknown')
}
sources.append(source)
return {
'response': response_text,
'sources': sources,
'total_sources': len(sources),
'query': query,
'context_chunks_used': len(context_chunks),
'model_used': ai_response.get('model', 'deepseek-chat'),
'prompt_tokens': ai_response.get('usage', {}).get('prompt_tokens', 0),
'completion_tokens': ai_response.get('usage', {}).get('completion_tokens', 0),
'total_tokens': ai_response.get('usage', {}).get('total_tokens', 0),
'processing_time_seconds': ai_response.get('processing_time', 0.0),
'timestamp': datetime.now().isoformat()
}
async def _generate_no_context_response(
self,
query: str,
session_id: str
) -> Dict[str, Any]:
"""Generate response when no relevant context is found.
Args:
query: User query
session_id: Chat session ID
Returns:
No-context response
"""
response_text = """I couldn't find relevant information in the video transcript to answer your question. This might be because:
1. The topic you're asking about isn't covered in this video
2. The question is too specific or uses different terminology
3. The video content hasn't been properly indexed yet
Could you try rephrasing your question or asking about a different topic that might be covered in the video?"""
return {
'response': response_text,
'sources': [],
'total_sources': 0,
'query': query,
'context_chunks_used': 0,
'no_context_found': True,
'timestamp': datetime.now().isoformat()
}
async def _store_chat_message(
self,
session_id: str,
query: str,
response: Dict[str, Any],
context_chunks: List[Dict[str, Any]],
search_results: Dict[str, Any]
) -> None:
"""Store chat message in database.
Args:
session_id: Chat session ID
query: User query
response: Generated response
context_chunks: Context chunks used
search_results: Raw search results
"""
try:
with registry.get_session() as session:
# Store user message
user_message = ChatMessage(
session_id=session_id,
message_type="user",
content=query,
created_at=datetime.now()
)
session.add(user_message)
# Store assistant response
assistant_message = ChatMessage(
session_id=session_id,
message_type="assistant",
content=response['response'],
original_query=query,
context_chunks=json.dumps([chunk.get('chunk_id') for chunk in context_chunks]),
sources=json.dumps(response.get('sources', [])),
total_sources=response.get('total_sources', 0),
model_used=response.get('model_used'),
prompt_tokens=response.get('prompt_tokens'),
completion_tokens=response.get('completion_tokens'),
total_tokens=response.get('total_tokens'),
processing_time_seconds=response.get('processing_time_seconds'),
created_at=datetime.now()
)
session.add(assistant_message)
# Update session statistics
chat_session = session.query(ChatSession).filter(
ChatSession.id == session_id
).first()
if chat_session:
chat_session.message_count = (chat_session.message_count or 0) + 2
chat_session.last_message_at = datetime.now()
if response.get('processing_time_seconds'):
total_time = (chat_session.total_processing_time or 0.0) + response['processing_time_seconds']
chat_session.total_processing_time = total_time
chat_session.avg_response_time = total_time / (chat_session.message_count // 2)
session.commit()
logger.info(f"Stored chat messages for session {session_id}")
except Exception as e:
logger.error(f"Failed to store chat message: {e}")
async def create_chat_session(
self,
video_id: str,
user_id: Optional[str] = None,
title: Optional[str] = None
) -> Dict[str, Any]:
"""Create a new chat session for a video.
Args:
video_id: YouTube video ID
user_id: Optional user ID
title: Optional session title
Returns:
Created session information
"""
try:
session_id = str(uuid.uuid4())
# Get video information
with registry.get_session() as session:
summary = session.query(Summary).filter(
Summary.video_id == video_id
).first()
# Generate title if not provided
if not title and summary:
title = f"Chat about: {summary.video_title[:50]}..."
elif not title:
title = f"Chat about video {video_id}"
# Create chat session
chat_session = ChatSession(
id=session_id,
user_id=user_id,
video_id=video_id,
summary_id=str(summary.id) if summary else None,
title=title,
session_config=json.dumps(self.config),
is_active=True,
created_at=datetime.now()
)
session.add(chat_session)
session.commit()
logger.info(f"Created chat session {session_id} for video {video_id}")
return {
'session_id': session_id,
'video_id': video_id,
'title': title,
'user_id': user_id,
'created_at': datetime.now().isoformat(),
'config': self.config
}
except Exception as e:
logger.error(f"Failed to create chat session: {e}")
raise RAGError(f"Session creation failed: {e}")
async def get_chat_history(
self,
session_id: str,
limit: int = 50
) -> List[Dict[str, Any]]:
"""Get chat history for a session.
Args:
session_id: Chat session ID
limit: Maximum number of messages
Returns:
List of chat messages
"""
try:
with registry.get_session() as session:
messages = session.query(ChatMessage).filter(
ChatMessage.session_id == session_id
).order_by(ChatMessage.created_at.asc()).limit(limit).all()
formatted_messages = []
for msg in messages:
message_dict = {
'id': msg.id,
'message_type': msg.message_type,
'content': msg.content,
'created_at': msg.created_at.isoformat() if msg.created_at else None,
}
# Add sources for assistant messages
if msg.message_type == "assistant" and msg.sources:
try:
message_dict['sources'] = json.loads(msg.sources)
message_dict['total_sources'] = msg.total_sources
except:
pass
formatted_messages.append(message_dict)
return formatted_messages
except Exception as e:
logger.error(f"Failed to get chat history: {e}")
return []
def _update_metrics(
self,
start_time: datetime,
context_chunks_count: int,
tokens_used: int
) -> None:
"""Update service metrics.
Args:
start_time: Query start time
context_chunks_count: Number of context chunks used
tokens_used: Number of tokens used
"""
response_time = (datetime.now() - start_time).total_seconds()
# Update averages
total_queries = self.metrics['total_queries']
# Average response time
total_time = self.metrics['avg_response_time'] * (total_queries - 1)
self.metrics['avg_response_time'] = (total_time + response_time) / total_queries
# Average context chunks
total_chunks = self.metrics['avg_context_chunks'] * (total_queries - 1)
self.metrics['avg_context_chunks'] = (total_chunks + context_chunks_count) / total_queries
# Total tokens
self.metrics['total_tokens_used'] += tokens_used
async def get_service_stats(self) -> Dict[str, Any]:
"""Get RAG service statistics.
Returns:
Service statistics
"""
try:
# Get ChromaDB stats
chroma_stats = await self.chroma_service.get_collection_stats()
# Get search service metrics
search_metrics = self.search_service._get_current_metrics()
return {
'rag_metrics': dict(self.metrics),
'chroma_stats': chroma_stats,
'search_metrics': search_metrics,
'config': dict(self.config),
'timestamp': datetime.now().isoformat()
}
except Exception as e:
logger.error(f"Failed to get service stats: {e}")
return {'error': str(e)}
async def health_check(self) -> Dict[str, Any]:
"""Perform health check on RAG service.
Returns:
Health check results
"""
try:
# Check all component health
search_health = await self.search_service.health_check()
# Test basic functionality
test_successful = True
try:
# Test chunking
test_chunks = self.chunker_service.chunk_transcript(
"This is a test transcript for health check.",
"test_video_id"
)
if not test_chunks:
test_successful = False
except:
test_successful = False
return {
'status': 'healthy' if search_health.get('status') == 'healthy' and test_successful else 'degraded',
'search_service_status': search_health.get('status'),
'chunking_test': 'passed' if test_successful else 'failed',
'metrics': dict(self.metrics)
}
except Exception as e:
logger.error(f"RAG service health check failed: {e}")
return {
'status': 'unhealthy',
'error': str(e)
}