youtube-summarizer/backend/models/cache.py

101 lines
3.3 KiB
Python

"""Cache models for storing transcripts and summaries."""
from sqlalchemy import Column, String, Text, DateTime, Float, Integer, JSON, Index
from sqlalchemy.ext.declarative import declarative_base
from datetime import datetime
Base = declarative_base()
class CachedTranscript(Base):
"""Cache storage for video transcripts."""
__tablename__ = "cached_transcripts"
id = Column(Integer, primary_key=True)
video_id = Column(String(20), nullable=False, index=True)
language = Column(String(10), nullable=False, default="en")
# Content
content = Column(Text, nullable=False)
metadata = Column(JSON, default=dict)
extraction_method = Column(String(50), nullable=False)
# Cache management
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
expires_at = Column(DateTime, nullable=False, index=True)
access_count = Column(Integer, default=1)
last_accessed = Column(DateTime, default=datetime.utcnow)
# Performance tracking
size_bytes = Column(Integer, nullable=False, default=0)
# Composite index for efficient lookups
__table_args__ = (
Index('idx_video_language', 'video_id', 'language'),
)
class CachedSummary(Base):
"""Cache storage for AI-generated summaries."""
__tablename__ = "cached_summaries"
id = Column(Integer, primary_key=True)
transcript_hash = Column(String(32), nullable=False, index=True)
config_hash = Column(String(32), nullable=False, index=True)
# Summary content
summary = Column(Text, nullable=False)
key_points = Column(JSON, default=list)
main_themes = Column(JSON, default=list)
actionable_insights = Column(JSON, default=list)
confidence_score = Column(Float, default=0.0)
# Processing metadata
processing_metadata = Column(JSON, default=dict)
cost_data = Column(JSON, default=dict)
cache_metadata = Column(JSON, default=dict)
# Cache management
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
expires_at = Column(DateTime, nullable=False, index=True)
access_count = Column(Integer, default=1)
last_accessed = Column(DateTime, default=datetime.utcnow)
# Performance tracking
size_bytes = Column(Integer, nullable=False, default=0)
# Composite index for efficient lookups
__table_args__ = (
Index('idx_transcript_config_hash', 'transcript_hash', 'config_hash'),
)
class CacheAnalytics(Base):
"""Analytics and metrics for cache performance."""
__tablename__ = "cache_analytics"
id = Column(Integer, primary_key=True)
date = Column(DateTime, nullable=False, index=True)
# Hit rate metrics
transcript_hits = Column(Integer, default=0)
transcript_misses = Column(Integer, default=0)
summary_hits = Column(Integer, default=0)
summary_misses = Column(Integer, default=0)
# Performance metrics
average_response_time_ms = Column(Float, default=0.0)
total_cache_size_mb = Column(Float, default=0.0)
# Cost savings
estimated_api_cost_saved_usd = Column(Float, default=0.0)
estimated_time_saved_seconds = Column(Float, default=0.0)
# Resource usage
redis_memory_mb = Column(Float, default=0.0)
database_size_mb = Column(Float, default=0.0)
created_at = Column(DateTime, default=datetime.utcnow)