"""Database models for RAG-powered chat functionality.""" from sqlalchemy import Column, String, Text, DateTime, Float, Boolean, ForeignKey, Index, JSON, Integer from sqlalchemy.orm import relationship from sqlalchemy.sql import func from sqlalchemy.dialects.postgresql import UUID from sqlalchemy.types import TypeDecorator, CHAR import uuid from datetime import datetime from typing import Optional, List, Dict, Any from enum import Enum from backend.models.base import Model class GUID(TypeDecorator): """Platform-independent GUID type for SQLite and PostgreSQL compatibility.""" impl = CHAR cache_ok = True def load_dialect_impl(self, dialect): if dialect.name == 'postgresql': return dialect.type_descriptor(UUID()) else: return dialect.type_descriptor(CHAR(32)) def process_bind_param(self, value, dialect): if value is None: return value elif dialect.name == 'postgresql': return str(value) else: if not isinstance(value, uuid.UUID): return "%.32x" % uuid.UUID(value).int else: return "%.32x" % value.int def process_result_value(self, value, dialect): if value is None: return value else: if not isinstance(value, uuid.UUID): return uuid.UUID(value) return value class MessageType(str, Enum): """Chat message types.""" USER = "user" ASSISTANT = "assistant" SYSTEM = "system" class ChatSession(Model): """Chat session for RAG-powered video conversations.""" __tablename__ = "chat_sessions" id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) user_id = Column(String(36), ForeignKey("users.id"), nullable=True) video_id = Column(String(20), nullable=False) # YouTube video ID summary_id = Column(String(36), ForeignKey("summaries.id"), nullable=True) # Session metadata title = Column(String(200)) # Auto-generated or user-defined description = Column(Text) session_config = Column(JSON) # Model settings, search parameters, etc. # Session state is_active = Column(Boolean, default=True) message_count = Column(Integer, default=0) total_processing_time = Column(Float, default=0.0) # Analytics avg_response_time = Column(Float) user_satisfaction = Column(Integer) # 1-5 rating feedback_notes = Column(Text) # Timestamps created_at = Column(DateTime, server_default=func.now()) last_message_at = Column(DateTime) ended_at = Column(DateTime) # Relationships user = relationship("backend.models.user.User") summary = relationship("backend.models.summary.Summary") messages = relationship("backend.models.chat.ChatMessage", back_populates="session", cascade="all, delete-orphan") # Indexes __table_args__ = ( Index('ix_chat_sessions_user_id', 'user_id'), Index('ix_chat_sessions_video_id', 'video_id'), Index('ix_chat_sessions_is_active', 'is_active'), {'extend_existing': True} ) def __repr__(self): return f"" class ChatMessage(Model): """Individual chat message within a session.""" __tablename__ = "chat_messages" id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) session_id = Column(String(36), ForeignKey("chat_sessions.id", ondelete="CASCADE"), nullable=False) # Message content message_type = Column(String(20), nullable=False) # user, assistant, system content = Column(Text, nullable=False) original_query = Column(Text) # Original user query if this is an assistant response # RAG context context_chunks = Column(JSON) # List of chunk IDs used for response sources = Column(JSON) # Array of {chunk_id, timestamp, relevance_score} total_sources = Column(Integer, default=0) # AI metadata model_used = Column(String(100)) prompt_tokens = Column(Integer) completion_tokens = Column(Integer) total_tokens = Column(Integer) cost_usd = Column(Float) # Processing metadata processing_time_seconds = Column(Float) search_time_seconds = Column(Float) generation_time_seconds = Column(Float) # User interaction user_rating = Column(Integer) # 1-5 thumbs up/down user_feedback = Column(Text) is_helpful = Column(Boolean) # Timestamps created_at = Column(DateTime, server_default=func.now()) # Relationships session = relationship("backend.models.chat.ChatSession", back_populates="messages") # Indexes __table_args__ = ( Index('ix_chat_messages_session_id', 'session_id'), Index('ix_chat_messages_message_type', 'message_type'), Index('ix_chat_messages_created_at', 'created_at'), {'extend_existing': True} ) def __repr__(self): return f"" @property def formatted_sources(self) -> List[Dict[str, Any]]: """Format sources with timestamp links.""" if not self.sources: return [] formatted = [] for source in self.sources: if isinstance(source, dict): chunk_id = source.get('chunk_id') timestamp = source.get('timestamp') score = source.get('relevance_score', 0.0) # Format timestamp as [HH:MM:SS] link if timestamp: hours = int(timestamp // 3600) minutes = int((timestamp % 3600) // 60) seconds = int(timestamp % 60) time_str = f"[{hours:02d}:{minutes:02d}:{seconds:02d}]" else: time_str = "[00:00:00]" formatted.append({ 'chunk_id': chunk_id, 'timestamp': timestamp, 'timestamp_formatted': time_str, 'relevance_score': round(score, 3), 'youtube_link': f"https://youtube.com/watch?v={self.session.video_id}&t={int(timestamp)}s" if timestamp else None }) return formatted class VideoChunk(Model): """Video content chunks for ChromaDB vector storage.""" __tablename__ = "video_chunks" id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) video_id = Column(String(20), nullable=False) # YouTube video ID summary_id = Column(String(36), ForeignKey("summaries.id"), nullable=True) # Chunk metadata chunk_index = Column(Integer, nullable=False) chunk_type = Column(String(50), nullable=False) # transcript, summary, metadata start_timestamp = Column(Float) # Start time in seconds end_timestamp = Column(Float) # End time in seconds # Content content = Column(Text, nullable=False) content_length = Column(Integer) content_hash = Column(String(64)) # For deduplication # ChromaDB integration chromadb_id = Column(String(100)) # ID in ChromaDB collection embedding_model = Column(String(100)) # Model used for embedding embedding_created_at = Column(DateTime) # Processing metadata created_at = Column(DateTime, server_default=func.now()) updated_at = Column(DateTime, onupdate=func.now()) # Relationships summary = relationship("backend.models.summary.Summary") # Indexes __table_args__ = ( Index('ix_video_chunks_video_id', 'video_id'), Index('ix_video_chunks_hash', 'content_hash'), Index('ix_video_chunks_timestamps', 'start_timestamp', 'end_timestamp'), {'extend_existing': True} ) def __repr__(self): return f"" @property def timestamp_range(self) -> str: """Format timestamp range for display.""" if self.start_timestamp is not None and self.end_timestamp is not None: start_h = int(self.start_timestamp // 3600) start_m = int((self.start_timestamp % 3600) // 60) start_s = int(self.start_timestamp % 60) end_h = int(self.end_timestamp // 3600) end_m = int((self.end_timestamp % 3600) // 60) end_e = int(self.end_timestamp % 60) return f"[{start_h:02d}:{start_m:02d}:{start_s:02d}] - [{end_h:02d}:{end_m:02d}:{end_e:02d}]" return "[00:00:00] - [00:00:00]"