youtube-summarizer/backend/models/playlist_models.py

134 lines
5.5 KiB
Python

"""Database models for playlist and multi-video analysis."""
from sqlalchemy import Column, String, Integer, Text, DateTime, Float, Boolean, ForeignKey, JSON
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.types import TypeDecorator, CHAR
import uuid
from datetime import datetime
from backend.models.base import Model
class GUID(TypeDecorator):
"""Platform-independent GUID type for SQLite and PostgreSQL compatibility."""
impl = CHAR
cache_ok = True
def load_dialect_impl(self, dialect):
if dialect.name == 'postgresql':
return dialect.type_descriptor(UUID())
else:
return dialect.type_descriptor(CHAR(32))
def process_bind_param(self, value, dialect):
if value is None:
return value
elif dialect.name == 'postgresql':
return str(value)
else:
if not isinstance(value, uuid.UUID):
return "%.32x" % uuid.UUID(value).int
else:
return "%.32x" % value.int
def process_result_value(self, value, dialect):
if value is None:
return value
else:
if not isinstance(value, uuid.UUID):
return uuid.UUID(value)
return value
class Playlist(Model):
"""YouTube playlist metadata and analysis tracking."""
__tablename__ = "playlists"
__table_args__ = {'extend_existing': True}
id = Column(GUID, primary_key=True, default=uuid.uuid4)
user_id = Column(GUID, ForeignKey("users.id", ondelete="SET NULL"), nullable=True)
playlist_id = Column(String(50), nullable=True, index=True) # YouTube playlist ID
playlist_url = Column(Text)
title = Column(String(500))
channel_name = Column(String(200))
video_count = Column(Integer)
total_duration = Column(Integer) # Total duration in seconds
analyzed_at = Column(DateTime)
created_at = Column(DateTime, default=datetime.utcnow)
# Relationships
user = relationship("backend.models.user.User")
videos = relationship("backend.models.playlist_models.PlaylistVideo", back_populates="playlist", cascade="all, delete-orphan")
multi_video_analysis = relationship("backend.models.playlist_models.MultiVideoAnalysis", back_populates="playlist", uselist=False)
def __repr__(self):
return f"<Playlist(id={self.id}, title={self.title}, videos={self.video_count})>"
class PlaylistVideo(Model):
"""Individual videos within a playlist."""
__tablename__ = "playlist_videos"
__table_args__ = {'extend_existing': True}
id = Column(GUID, primary_key=True, default=uuid.uuid4)
playlist_id = Column(GUID, ForeignKey("playlists.id", ondelete="CASCADE"), nullable=False)
video_id = Column(String(20), nullable=False)
title = Column(String(500))
position = Column(Integer, nullable=False)
duration = Column(String(20)) # Duration in ISO 8601 format (PT4M13S)
upload_date = Column(DateTime)
analysis_status = Column(String(20), default="pending") # pending, processing, completed, failed
agent_analysis_id = Column(GUID, ForeignKey("agent_summaries.id"))
error_message = Column(Text)
created_at = Column(DateTime, server_default=func.now())
updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now())
# Relationships
playlist = relationship("backend.models.playlist_models.Playlist", back_populates="videos")
agent_analysis = relationship("backend.models.agent_models.AgentSummary")
def __repr__(self):
return f"<PlaylistVideo(id={self.id}, video_id={self.video_id}, position={self.position})>"
class MultiVideoAnalysis(Model):
"""Cross-video analysis results for playlists or channels."""
__tablename__ = "multi_video_analyses"
__table_args__ = {'extend_existing': True}
id = Column(GUID, primary_key=True, default=uuid.uuid4)
playlist_id = Column(GUID, ForeignKey("playlists.id"), nullable=True)
analysis_type = Column(String(50), nullable=False) # playlist, channel, custom
video_ids = Column(JSON) # JSON array of video IDs
# Analysis results
common_themes = Column(JSON)
content_progression = Column(JSON)
key_insights = Column(JSON)
agent_perspectives = Column(JSON)
synthesis_summary = Column(Text)
# Metadata
videos_analyzed = Column(Integer, default=0)
analysis_duration_seconds = Column(Float)
confidence_score = Column(Float) # Overall confidence in analysis
created_at = Column(DateTime, server_default=func.now())
updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now())
# Relationships
playlist = relationship("backend.models.playlist_models.Playlist", back_populates="multi_video_analysis")
def __repr__(self):
return f"<MultiVideoAnalysis(id={self.id}, type={self.analysis_type}, videos={self.videos_analyzed})>"
# Update the Playlist model to include new relationships
# Note: This extends the existing Playlist model from the migration
class PlaylistExtension:
"""Extension methods and relationships for the Playlist model."""
# Add these relationships to the existing Playlist model via monkey patching or inheritance
videos = relationship("backend.models.playlist_models.PlaylistVideo", back_populates="playlist", cascade="all, delete-orphan")
multi_video_analysis = relationship("backend.models.playlist_models.MultiVideoAnalysis", back_populates="playlist", uselist=False)