688 lines
25 KiB
Python
688 lines
25 KiB
Python
"""RAG (Retrieval-Augmented Generation) service for video chat."""
|
|
|
|
import asyncio
|
|
import logging
|
|
from typing import List, Dict, Any, Optional, Tuple
|
|
from datetime import datetime
|
|
import json
|
|
import uuid
|
|
|
|
from backend.core.exceptions import ServiceError
|
|
from backend.models.chat import ChatSession, ChatMessage, VideoChunk
|
|
from backend.models.summary import Summary
|
|
from backend.services.semantic_search_service import SemanticSearchService
|
|
from backend.services.chroma_service import ChromaService
|
|
from backend.services.transcript_chunker import TranscriptChunker
|
|
from backend.services.deepseek_service import DeepSeekService
|
|
from backend.core.database_registry import registry
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class RAGError(ServiceError):
|
|
"""RAG service specific errors."""
|
|
pass
|
|
|
|
|
|
class RAGService:
|
|
"""Service for RAG-powered video chat and question answering."""
|
|
|
|
def __init__(
|
|
self,
|
|
search_service: Optional[SemanticSearchService] = None,
|
|
chroma_service: Optional[ChromaService] = None,
|
|
chunker_service: Optional[TranscriptChunker] = None,
|
|
ai_service: Optional[DeepSeekService] = None
|
|
):
|
|
"""Initialize RAG service.
|
|
|
|
Args:
|
|
search_service: Semantic search service
|
|
chroma_service: ChromaDB service
|
|
chunker_service: Transcript chunking service
|
|
ai_service: AI service for response generation
|
|
"""
|
|
self.search_service = search_service or SemanticSearchService()
|
|
self.chroma_service = chroma_service or ChromaService()
|
|
self.chunker_service = chunker_service or TranscriptChunker()
|
|
self.ai_service = ai_service or DeepSeekService()
|
|
|
|
# RAG configuration
|
|
self.config = {
|
|
'max_context_chunks': 5,
|
|
'max_context_length': 4000,
|
|
'min_similarity_threshold': 0.3,
|
|
'max_response_tokens': 800,
|
|
'temperature': 0.7,
|
|
'include_source_timestamps': True
|
|
}
|
|
|
|
# Performance metrics
|
|
self.metrics = {
|
|
'total_queries': 0,
|
|
'successful_responses': 0,
|
|
'failed_responses': 0,
|
|
'avg_response_time': 0.0,
|
|
'avg_context_chunks': 0.0,
|
|
'total_tokens_used': 0
|
|
}
|
|
|
|
async def initialize(self) -> None:
|
|
"""Initialize all service components."""
|
|
try:
|
|
await self.search_service.initialize()
|
|
logger.info("RAG service initialized successfully")
|
|
except Exception as e:
|
|
logger.error(f"Failed to initialize RAG service: {e}")
|
|
raise RAGError(f"RAG service initialization failed: {e}")
|
|
|
|
async def index_video_content(
|
|
self,
|
|
video_id: str,
|
|
transcript: str,
|
|
summary_id: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""Index video content for RAG search.
|
|
|
|
Args:
|
|
video_id: YouTube video ID
|
|
transcript: Video transcript text
|
|
summary_id: Optional summary ID
|
|
|
|
Returns:
|
|
Indexing results and statistics
|
|
"""
|
|
try:
|
|
logger.info(f"Indexing video content for {video_id}")
|
|
start_time = datetime.now()
|
|
|
|
# Chunk the transcript
|
|
chunks = self.chunker_service.chunk_transcript(
|
|
transcript=transcript,
|
|
video_id=video_id
|
|
)
|
|
|
|
if not chunks:
|
|
logger.warning(f"No chunks created for video {video_id}")
|
|
return {
|
|
'video_id': video_id,
|
|
'chunks_created': 0,
|
|
'indexed': False,
|
|
'error': 'No chunks created from transcript'
|
|
}
|
|
|
|
# Store chunks in ChromaDB
|
|
chroma_ids = await self.chroma_service.add_document_chunks(
|
|
video_id=video_id,
|
|
chunks=chunks
|
|
)
|
|
|
|
# Store chunk metadata in database
|
|
indexed_chunks = []
|
|
with registry.get_session() as session:
|
|
for chunk, chroma_id in zip(chunks, chroma_ids):
|
|
video_chunk = VideoChunk(
|
|
video_id=video_id,
|
|
summary_id=summary_id,
|
|
chunk_index=chunk['chunk_index'],
|
|
chunk_type=chunk['chunk_type'],
|
|
start_timestamp=chunk.get('start_timestamp'),
|
|
end_timestamp=chunk.get('end_timestamp'),
|
|
content=chunk['content'],
|
|
content_length=chunk['content_length'],
|
|
content_hash=chunk['content_hash'],
|
|
chromadb_id=chroma_id,
|
|
embedding_model='sentence-transformers/all-MiniLM-L6-v2',
|
|
embedding_created_at=datetime.now()
|
|
)
|
|
session.add(video_chunk)
|
|
indexed_chunks.append({
|
|
'chunk_index': chunk['chunk_index'],
|
|
'content_length': chunk['content_length'],
|
|
'start_timestamp': chunk.get('start_timestamp'),
|
|
'end_timestamp': chunk.get('end_timestamp')
|
|
})
|
|
|
|
session.commit()
|
|
|
|
processing_time = (datetime.now() - start_time).total_seconds()
|
|
|
|
result = {
|
|
'video_id': video_id,
|
|
'chunks_created': len(chunks),
|
|
'chunks_indexed': len(chroma_ids),
|
|
'processing_time_seconds': processing_time,
|
|
'indexed': True,
|
|
'chunks': indexed_chunks,
|
|
'chunking_stats': self.chunker_service.get_chunking_stats(chunks)
|
|
}
|
|
|
|
logger.info(f"Successfully indexed {len(chunks)} chunks for video {video_id} in {processing_time:.3f}s")
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to index video content: {e}")
|
|
raise RAGError(f"Content indexing failed: {e}")
|
|
|
|
async def chat_query(
|
|
self,
|
|
session_id: str,
|
|
query: str,
|
|
user_id: Optional[str] = None,
|
|
search_mode: str = "hybrid",
|
|
max_context_chunks: Optional[int] = None
|
|
) -> Dict[str, Any]:
|
|
"""Process a chat query using RAG.
|
|
|
|
Args:
|
|
session_id: Chat session ID
|
|
query: User's question/query
|
|
user_id: Optional user ID
|
|
search_mode: Search strategy to use
|
|
max_context_chunks: Override for max context chunks
|
|
|
|
Returns:
|
|
Chat response with sources and metadata
|
|
"""
|
|
start_time = datetime.now()
|
|
self.metrics['total_queries'] += 1
|
|
|
|
try:
|
|
logger.info(f"Processing chat query for session {session_id}: '{query[:50]}...'")
|
|
|
|
# Get chat session and video context
|
|
with registry.get_session() as session:
|
|
chat_session = session.query(ChatSession).filter(
|
|
ChatSession.id == session_id
|
|
).first()
|
|
|
|
if not chat_session:
|
|
raise RAGError(f"Chat session {session_id} not found")
|
|
|
|
video_id = chat_session.video_id
|
|
|
|
# Perform semantic search to get relevant context
|
|
search_results = await self.search_service.search(
|
|
query=query,
|
|
video_id=video_id,
|
|
search_mode=search_mode,
|
|
max_results=max_context_chunks or self.config['max_context_chunks'],
|
|
similarity_threshold=self.config['min_similarity_threshold'],
|
|
user_id=user_id
|
|
)
|
|
|
|
context_chunks = search_results.get('results', [])
|
|
|
|
if not context_chunks:
|
|
logger.warning(f"No relevant context found for query: {query}")
|
|
return await self._generate_no_context_response(query, session_id)
|
|
|
|
# Generate AI response with context
|
|
response = await self._generate_rag_response(
|
|
query=query,
|
|
context_chunks=context_chunks,
|
|
session_id=session_id
|
|
)
|
|
|
|
# Store chat message in database
|
|
await self._store_chat_message(
|
|
session_id=session_id,
|
|
query=query,
|
|
response=response,
|
|
context_chunks=context_chunks,
|
|
search_results=search_results
|
|
)
|
|
|
|
# Update metrics
|
|
self._update_metrics(start_time, len(context_chunks), response.get('total_tokens', 0))
|
|
self.metrics['successful_responses'] += 1
|
|
|
|
return response
|
|
|
|
except Exception as e:
|
|
logger.error(f"Chat query failed: {e}")
|
|
self.metrics['failed_responses'] += 1
|
|
raise RAGError(f"Chat query failed: {e}")
|
|
|
|
async def _generate_rag_response(
|
|
self,
|
|
query: str,
|
|
context_chunks: List[Dict[str, Any]],
|
|
session_id: str
|
|
) -> Dict[str, Any]:
|
|
"""Generate AI response using RAG context.
|
|
|
|
Args:
|
|
query: User query
|
|
context_chunks: Relevant context chunks
|
|
session_id: Chat session ID
|
|
|
|
Returns:
|
|
Generated response with metadata
|
|
"""
|
|
try:
|
|
# Prepare context for AI model
|
|
context_text = self._prepare_context_text(context_chunks)
|
|
|
|
# Build RAG prompt
|
|
rag_prompt = self._build_rag_prompt(query, context_text)
|
|
|
|
# Generate response using AI service
|
|
ai_response = await self.ai_service.generate_response(
|
|
prompt=rag_prompt,
|
|
max_tokens=self.config['max_response_tokens'],
|
|
temperature=self.config['temperature']
|
|
)
|
|
|
|
# Format response with sources
|
|
formatted_response = self._format_response_with_sources(
|
|
ai_response=ai_response,
|
|
context_chunks=context_chunks,
|
|
query=query
|
|
)
|
|
|
|
return formatted_response
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to generate RAG response: {e}")
|
|
raise RAGError(f"Response generation failed: {e}")
|
|
|
|
def _prepare_context_text(self, context_chunks: List[Dict[str, Any]]) -> str:
|
|
"""Prepare context text from chunks for AI prompt.
|
|
|
|
Args:
|
|
context_chunks: List of relevant chunks
|
|
|
|
Returns:
|
|
Formatted context text
|
|
"""
|
|
context_parts = []
|
|
total_length = 0
|
|
|
|
for chunk in context_chunks:
|
|
content = chunk.get('content', '')
|
|
timestamp = chunk.get('timestamp_formatted', '')
|
|
|
|
# Format context with timestamp
|
|
if timestamp and self.config['include_source_timestamps']:
|
|
context_part = f"{timestamp} {content}"
|
|
else:
|
|
context_part = content
|
|
|
|
# Check if adding this chunk would exceed max context length
|
|
if total_length + len(context_part) > self.config['max_context_length']:
|
|
break
|
|
|
|
context_parts.append(context_part)
|
|
total_length += len(context_part)
|
|
|
|
return "\n\n".join(context_parts)
|
|
|
|
def _build_rag_prompt(self, query: str, context: str) -> str:
|
|
"""Build RAG prompt for AI model.
|
|
|
|
Args:
|
|
query: User query
|
|
context: Relevant context from video
|
|
|
|
Returns:
|
|
Complete RAG prompt
|
|
"""
|
|
prompt = f"""You are a helpful assistant that answers questions about YouTube video content. Use the provided context from the video to answer the user's question accurately and comprehensively.
|
|
|
|
CONTEXT FROM VIDEO:
|
|
{context}
|
|
|
|
USER QUESTION: {query}
|
|
|
|
INSTRUCTIONS:
|
|
- Answer based primarily on the provided context
|
|
- If the context contains timestamp information (like [HH:MM:SS]), reference specific timestamps in your response
|
|
- If the question cannot be fully answered from the context, acknowledge this limitation
|
|
- Be concise but thorough in your explanation
|
|
- Include specific details and examples from the video when relevant
|
|
- If you mention specific points, try to reference the timestamp where that information appears
|
|
|
|
RESPONSE:"""
|
|
|
|
return prompt
|
|
|
|
def _format_response_with_sources(
|
|
self,
|
|
ai_response: Dict[str, Any],
|
|
context_chunks: List[Dict[str, Any]],
|
|
query: str
|
|
) -> Dict[str, Any]:
|
|
"""Format AI response with source attribution.
|
|
|
|
Args:
|
|
ai_response: Raw AI response
|
|
context_chunks: Source chunks
|
|
query: Original query
|
|
|
|
Returns:
|
|
Formatted response with sources
|
|
"""
|
|
response_text = ai_response.get('content', '')
|
|
|
|
# Prepare source information
|
|
sources = []
|
|
for chunk in context_chunks:
|
|
source = {
|
|
'chunk_id': chunk.get('chunk_id'),
|
|
'content_preview': chunk.get('content', '')[:200] + "..." if len(chunk.get('content', '')) > 200 else chunk.get('content', ''),
|
|
'timestamp': chunk.get('start_timestamp'),
|
|
'timestamp_formatted': chunk.get('timestamp_formatted'),
|
|
'youtube_link': chunk.get('youtube_link'),
|
|
'similarity_score': chunk.get('similarity_score', chunk.get('relevance_score', 0.0)),
|
|
'search_method': chunk.get('search_method', 'unknown')
|
|
}
|
|
sources.append(source)
|
|
|
|
return {
|
|
'response': response_text,
|
|
'sources': sources,
|
|
'total_sources': len(sources),
|
|
'query': query,
|
|
'context_chunks_used': len(context_chunks),
|
|
'model_used': ai_response.get('model', 'deepseek-chat'),
|
|
'prompt_tokens': ai_response.get('usage', {}).get('prompt_tokens', 0),
|
|
'completion_tokens': ai_response.get('usage', {}).get('completion_tokens', 0),
|
|
'total_tokens': ai_response.get('usage', {}).get('total_tokens', 0),
|
|
'processing_time_seconds': ai_response.get('processing_time', 0.0),
|
|
'timestamp': datetime.now().isoformat()
|
|
}
|
|
|
|
async def _generate_no_context_response(
|
|
self,
|
|
query: str,
|
|
session_id: str
|
|
) -> Dict[str, Any]:
|
|
"""Generate response when no relevant context is found.
|
|
|
|
Args:
|
|
query: User query
|
|
session_id: Chat session ID
|
|
|
|
Returns:
|
|
No-context response
|
|
"""
|
|
response_text = """I couldn't find relevant information in the video transcript to answer your question. This might be because:
|
|
|
|
1. The topic you're asking about isn't covered in this video
|
|
2. The question is too specific or uses different terminology
|
|
3. The video content hasn't been properly indexed yet
|
|
|
|
Could you try rephrasing your question or asking about a different topic that might be covered in the video?"""
|
|
|
|
return {
|
|
'response': response_text,
|
|
'sources': [],
|
|
'total_sources': 0,
|
|
'query': query,
|
|
'context_chunks_used': 0,
|
|
'no_context_found': True,
|
|
'timestamp': datetime.now().isoformat()
|
|
}
|
|
|
|
async def _store_chat_message(
|
|
self,
|
|
session_id: str,
|
|
query: str,
|
|
response: Dict[str, Any],
|
|
context_chunks: List[Dict[str, Any]],
|
|
search_results: Dict[str, Any]
|
|
) -> None:
|
|
"""Store chat message in database.
|
|
|
|
Args:
|
|
session_id: Chat session ID
|
|
query: User query
|
|
response: Generated response
|
|
context_chunks: Context chunks used
|
|
search_results: Raw search results
|
|
"""
|
|
try:
|
|
with registry.get_session() as session:
|
|
# Store user message
|
|
user_message = ChatMessage(
|
|
session_id=session_id,
|
|
message_type="user",
|
|
content=query,
|
|
created_at=datetime.now()
|
|
)
|
|
session.add(user_message)
|
|
|
|
# Store assistant response
|
|
assistant_message = ChatMessage(
|
|
session_id=session_id,
|
|
message_type="assistant",
|
|
content=response['response'],
|
|
original_query=query,
|
|
context_chunks=json.dumps([chunk.get('chunk_id') for chunk in context_chunks]),
|
|
sources=json.dumps(response.get('sources', [])),
|
|
total_sources=response.get('total_sources', 0),
|
|
model_used=response.get('model_used'),
|
|
prompt_tokens=response.get('prompt_tokens'),
|
|
completion_tokens=response.get('completion_tokens'),
|
|
total_tokens=response.get('total_tokens'),
|
|
processing_time_seconds=response.get('processing_time_seconds'),
|
|
created_at=datetime.now()
|
|
)
|
|
session.add(assistant_message)
|
|
|
|
# Update session statistics
|
|
chat_session = session.query(ChatSession).filter(
|
|
ChatSession.id == session_id
|
|
).first()
|
|
|
|
if chat_session:
|
|
chat_session.message_count = (chat_session.message_count or 0) + 2
|
|
chat_session.last_message_at = datetime.now()
|
|
if response.get('processing_time_seconds'):
|
|
total_time = (chat_session.total_processing_time or 0.0) + response['processing_time_seconds']
|
|
chat_session.total_processing_time = total_time
|
|
chat_session.avg_response_time = total_time / (chat_session.message_count // 2)
|
|
|
|
session.commit()
|
|
logger.info(f"Stored chat messages for session {session_id}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to store chat message: {e}")
|
|
|
|
async def create_chat_session(
|
|
self,
|
|
video_id: str,
|
|
user_id: Optional[str] = None,
|
|
title: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""Create a new chat session for a video.
|
|
|
|
Args:
|
|
video_id: YouTube video ID
|
|
user_id: Optional user ID
|
|
title: Optional session title
|
|
|
|
Returns:
|
|
Created session information
|
|
"""
|
|
try:
|
|
session_id = str(uuid.uuid4())
|
|
|
|
# Get video information
|
|
with registry.get_session() as session:
|
|
summary = session.query(Summary).filter(
|
|
Summary.video_id == video_id
|
|
).first()
|
|
|
|
# Generate title if not provided
|
|
if not title and summary:
|
|
title = f"Chat about: {summary.video_title[:50]}..."
|
|
elif not title:
|
|
title = f"Chat about video {video_id}"
|
|
|
|
# Create chat session
|
|
chat_session = ChatSession(
|
|
id=session_id,
|
|
user_id=user_id,
|
|
video_id=video_id,
|
|
summary_id=str(summary.id) if summary else None,
|
|
title=title,
|
|
session_config=json.dumps(self.config),
|
|
is_active=True,
|
|
created_at=datetime.now()
|
|
)
|
|
|
|
session.add(chat_session)
|
|
session.commit()
|
|
|
|
logger.info(f"Created chat session {session_id} for video {video_id}")
|
|
|
|
return {
|
|
'session_id': session_id,
|
|
'video_id': video_id,
|
|
'title': title,
|
|
'user_id': user_id,
|
|
'created_at': datetime.now().isoformat(),
|
|
'config': self.config
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to create chat session: {e}")
|
|
raise RAGError(f"Session creation failed: {e}")
|
|
|
|
async def get_chat_history(
|
|
self,
|
|
session_id: str,
|
|
limit: int = 50
|
|
) -> List[Dict[str, Any]]:
|
|
"""Get chat history for a session.
|
|
|
|
Args:
|
|
session_id: Chat session ID
|
|
limit: Maximum number of messages
|
|
|
|
Returns:
|
|
List of chat messages
|
|
"""
|
|
try:
|
|
with registry.get_session() as session:
|
|
messages = session.query(ChatMessage).filter(
|
|
ChatMessage.session_id == session_id
|
|
).order_by(ChatMessage.created_at.asc()).limit(limit).all()
|
|
|
|
formatted_messages = []
|
|
for msg in messages:
|
|
message_dict = {
|
|
'id': msg.id,
|
|
'message_type': msg.message_type,
|
|
'content': msg.content,
|
|
'created_at': msg.created_at.isoformat() if msg.created_at else None,
|
|
}
|
|
|
|
# Add sources for assistant messages
|
|
if msg.message_type == "assistant" and msg.sources:
|
|
try:
|
|
message_dict['sources'] = json.loads(msg.sources)
|
|
message_dict['total_sources'] = msg.total_sources
|
|
except:
|
|
pass
|
|
|
|
formatted_messages.append(message_dict)
|
|
|
|
return formatted_messages
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get chat history: {e}")
|
|
return []
|
|
|
|
def _update_metrics(
|
|
self,
|
|
start_time: datetime,
|
|
context_chunks_count: int,
|
|
tokens_used: int
|
|
) -> None:
|
|
"""Update service metrics.
|
|
|
|
Args:
|
|
start_time: Query start time
|
|
context_chunks_count: Number of context chunks used
|
|
tokens_used: Number of tokens used
|
|
"""
|
|
response_time = (datetime.now() - start_time).total_seconds()
|
|
|
|
# Update averages
|
|
total_queries = self.metrics['total_queries']
|
|
|
|
# Average response time
|
|
total_time = self.metrics['avg_response_time'] * (total_queries - 1)
|
|
self.metrics['avg_response_time'] = (total_time + response_time) / total_queries
|
|
|
|
# Average context chunks
|
|
total_chunks = self.metrics['avg_context_chunks'] * (total_queries - 1)
|
|
self.metrics['avg_context_chunks'] = (total_chunks + context_chunks_count) / total_queries
|
|
|
|
# Total tokens
|
|
self.metrics['total_tokens_used'] += tokens_used
|
|
|
|
async def get_service_stats(self) -> Dict[str, Any]:
|
|
"""Get RAG service statistics.
|
|
|
|
Returns:
|
|
Service statistics
|
|
"""
|
|
try:
|
|
# Get ChromaDB stats
|
|
chroma_stats = await self.chroma_service.get_collection_stats()
|
|
|
|
# Get search service metrics
|
|
search_metrics = self.search_service._get_current_metrics()
|
|
|
|
return {
|
|
'rag_metrics': dict(self.metrics),
|
|
'chroma_stats': chroma_stats,
|
|
'search_metrics': search_metrics,
|
|
'config': dict(self.config),
|
|
'timestamp': datetime.now().isoformat()
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to get service stats: {e}")
|
|
return {'error': str(e)}
|
|
|
|
async def health_check(self) -> Dict[str, Any]:
|
|
"""Perform health check on RAG service.
|
|
|
|
Returns:
|
|
Health check results
|
|
"""
|
|
try:
|
|
# Check all component health
|
|
search_health = await self.search_service.health_check()
|
|
|
|
# Test basic functionality
|
|
test_successful = True
|
|
try:
|
|
# Test chunking
|
|
test_chunks = self.chunker_service.chunk_transcript(
|
|
"This is a test transcript for health check.",
|
|
"test_video_id"
|
|
)
|
|
if not test_chunks:
|
|
test_successful = False
|
|
except:
|
|
test_successful = False
|
|
|
|
return {
|
|
'status': 'healthy' if search_health.get('status') == 'healthy' and test_successful else 'degraded',
|
|
'search_service_status': search_health.get('status'),
|
|
'chunking_test': 'passed' if test_successful else 'failed',
|
|
'metrics': dict(self.metrics)
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"RAG service health check failed: {e}")
|
|
return {
|
|
'status': 'unhealthy',
|
|
'error': str(e)
|
|
} |