youtube-summarizer/backend/alembic/versions/add_epic_4_features.py

298 lines
17 KiB
Python

"""Add Epic 4 features: multi-agent analysis, enhanced exports, RAG chat
Revision ID: add_epic_4_features
Revises: 0ee25b86d28b
Create Date: 2025-08-27 10:00:00.000000
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = 'add_epic_4_features'
down_revision: Union[str, None] = '0ee25b86d28b'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Add tables for Epic 4 features: multi-agent analysis, enhanced exports, RAG chat."""
# 1. Agent Summaries - Multi-agent analysis results
op.create_table('agent_summaries',
sa.Column('id', sa.String(length=36), nullable=False),
sa.Column('summary_id', sa.String(length=36), nullable=False),
sa.Column('agent_type', sa.String(length=20), nullable=False), # technical, business, user, synthesis
sa.Column('agent_summary', sa.Text(), nullable=True),
sa.Column('key_insights', sa.JSON(), nullable=True),
sa.Column('focus_areas', sa.JSON(), nullable=True),
sa.Column('recommendations', sa.JSON(), nullable=True),
sa.Column('confidence_score', sa.Float(), nullable=True),
sa.Column('processing_time_seconds', sa.Float(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['summary_id'], ['summaries.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_agent_summaries_summary_id'), 'agent_summaries', ['summary_id'], unique=False)
op.create_index(op.f('ix_agent_summaries_agent_type'), 'agent_summaries', ['agent_type'], unique=False)
# 2. Playlists - Multi-video analysis
op.create_table('playlists',
sa.Column('id', sa.String(length=36), nullable=False),
sa.Column('user_id', sa.String(length=36), nullable=True),
sa.Column('playlist_id', sa.String(length=50), nullable=True),
sa.Column('playlist_url', sa.Text(), nullable=True),
sa.Column('title', sa.String(length=500), nullable=True),
sa.Column('channel_name', sa.String(length=200), nullable=True),
sa.Column('video_count', sa.Integer(), nullable=True),
sa.Column('total_duration', sa.Integer(), nullable=True),
sa.Column('analyzed_at', sa.DateTime(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='SET NULL'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_playlists_user_id'), 'playlists', ['user_id'], unique=False)
op.create_index(op.f('ix_playlists_playlist_id'), 'playlists', ['playlist_id'], unique=False)
# 3. Playlist Analysis - Cross-video analysis results
op.create_table('playlist_analysis',
sa.Column('id', sa.String(length=36), nullable=False),
sa.Column('playlist_id', sa.String(length=36), nullable=False),
sa.Column('themes', sa.JSON(), nullable=True),
sa.Column('content_progression', sa.JSON(), nullable=True),
sa.Column('key_insights', sa.JSON(), nullable=True),
sa.Column('agent_perspectives', sa.JSON(), nullable=True),
sa.Column('synthesis_summary', sa.Text(), nullable=True),
sa.Column('quality_score', sa.Float(), nullable=True),
sa.Column('processing_time_seconds', sa.Float(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['playlist_id'], ['playlists.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_playlist_analysis_playlist_id'), 'playlist_analysis', ['playlist_id'], unique=False)
# 4. Prompt Templates - Custom AI model configurations
op.create_table('prompt_templates',
sa.Column('id', sa.String(length=36), nullable=False),
sa.Column('user_id', sa.String(length=36), nullable=True),
sa.Column('name', sa.String(length=200), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('prompt_text', sa.Text(), nullable=False),
sa.Column('domain_category', sa.String(length=50), nullable=True), # educational, business, technical, etc.
sa.Column('model_config', sa.JSON(), nullable=True), # temperature, max_tokens, etc.
sa.Column('is_public', sa.Boolean(), nullable=True),
sa.Column('usage_count', sa.Integer(), nullable=True),
sa.Column('rating', sa.Float(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_prompt_templates_user_id'), 'prompt_templates', ['user_id'], unique=False)
op.create_index(op.f('ix_prompt_templates_domain_category'), 'prompt_templates', ['domain_category'], unique=False)
op.create_index(op.f('ix_prompt_templates_is_public'), 'prompt_templates', ['is_public'], unique=False)
# 5. Prompt Experiments - A/B testing framework
op.create_table('prompt_experiments',
sa.Column('id', sa.String(length=36), nullable=False),
sa.Column('name', sa.String(length=200), nullable=False),
sa.Column('description', sa.Text(), nullable=True),
sa.Column('baseline_template_id', sa.String(length=36), nullable=True),
sa.Column('variant_template_id', sa.String(length=36), nullable=True),
sa.Column('status', sa.String(length=20), nullable=True), # active, completed, paused
sa.Column('success_metric', sa.String(length=50), nullable=True), # quality_score, user_rating, processing_time
sa.Column('statistical_significance', sa.Float(), nullable=True),
sa.Column('baseline_score', sa.Float(), nullable=True),
sa.Column('variant_score', sa.Float(), nullable=True),
sa.Column('sample_size', sa.Integer(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('completed_at', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['baseline_template_id'], ['prompt_templates.id'], ondelete='SET NULL'),
sa.ForeignKeyConstraint(['variant_template_id'], ['prompt_templates.id'], ondelete='SET NULL'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_prompt_experiments_status'), 'prompt_experiments', ['status'], unique=False)
# 6. Export Metadata - Enhanced export tracking
op.create_table('export_metadata',
sa.Column('id', sa.String(length=36), nullable=False),
sa.Column('summary_id', sa.String(length=36), nullable=False),
sa.Column('template_id', sa.String(length=36), nullable=True),
sa.Column('export_type', sa.String(length=20), nullable=False), # markdown, pdf, json, html
sa.Column('executive_summary', sa.Text(), nullable=True),
sa.Column('section_count', sa.Integer(), nullable=True),
sa.Column('timestamp_count', sa.Integer(), nullable=True),
sa.Column('word_count', sa.Integer(), nullable=True),
sa.Column('processing_time_seconds', sa.Float(), nullable=True),
sa.Column('quality_score', sa.Float(), nullable=True),
sa.Column('export_config', sa.JSON(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['summary_id'], ['summaries.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['template_id'], ['prompt_templates.id'], ondelete='SET NULL'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_export_metadata_summary_id'), 'export_metadata', ['summary_id'], unique=False)
op.create_index(op.f('ix_export_metadata_export_type'), 'export_metadata', ['export_type'], unique=False)
# 7. Summary Sections - Timestamped sections for enhanced export
op.create_table('summary_sections',
sa.Column('id', sa.String(length=36), nullable=False),
sa.Column('summary_id', sa.String(length=36), nullable=False),
sa.Column('section_index', sa.Integer(), nullable=False),
sa.Column('title', sa.String(length=300), nullable=True),
sa.Column('start_timestamp', sa.Integer(), nullable=True), # seconds
sa.Column('end_timestamp', sa.Integer(), nullable=True),
sa.Column('content', sa.Text(), nullable=True),
sa.Column('summary', sa.Text(), nullable=True),
sa.Column('key_points', sa.JSON(), nullable=True),
sa.Column('youtube_link', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['summary_id'], ['summaries.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_summary_sections_summary_id'), 'summary_sections', ['summary_id'], unique=False)
op.create_index(op.f('ix_summary_sections_section_index'), 'summary_sections', ['section_index'], unique=False)
# 8. Chat Sessions - RAG chat sessions
op.create_table('chat_sessions',
sa.Column('id', sa.String(length=36), nullable=False),
sa.Column('user_id', sa.String(length=36), nullable=True),
sa.Column('video_id', sa.String(length=20), nullable=False),
sa.Column('summary_id', sa.String(length=36), nullable=True),
sa.Column('session_name', sa.String(length=200), nullable=True),
sa.Column('total_messages', sa.Integer(), nullable=True),
sa.Column('is_active', sa.Boolean(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['summary_id'], ['summaries.id'], ondelete='SET NULL'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_chat_sessions_user_id'), 'chat_sessions', ['user_id'], unique=False)
op.create_index(op.f('ix_chat_sessions_video_id'), 'chat_sessions', ['video_id'], unique=False)
op.create_index(op.f('ix_chat_sessions_is_active'), 'chat_sessions', ['is_active'], unique=False)
# 9. Chat Messages - Individual chat messages
op.create_table('chat_messages',
sa.Column('id', sa.String(length=36), nullable=False),
sa.Column('session_id', sa.String(length=36), nullable=False),
sa.Column('message_type', sa.String(length=20), nullable=False), # user, assistant, system
sa.Column('content', sa.Text(), nullable=False),
sa.Column('sources', sa.JSON(), nullable=True), # Array of {chunk_id, timestamp, relevance_score}
sa.Column('processing_time_seconds', sa.Float(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['session_id'], ['chat_sessions.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_chat_messages_session_id'), 'chat_messages', ['session_id'], unique=False)
op.create_index(op.f('ix_chat_messages_message_type'), 'chat_messages', ['message_type'], unique=False)
# 10. Video Chunks - Vector embeddings for RAG (ChromaDB metadata reference)
op.create_table('video_chunks',
sa.Column('id', sa.String(length=36), nullable=False),
sa.Column('video_id', sa.String(length=20), nullable=False),
sa.Column('chunk_index', sa.Integer(), nullable=False),
sa.Column('chunk_text', sa.Text(), nullable=False),
sa.Column('start_timestamp', sa.Integer(), nullable=True), # seconds
sa.Column('end_timestamp', sa.Integer(), nullable=True),
sa.Column('word_count', sa.Integer(), nullable=True),
sa.Column('embedding_id', sa.String(length=100), nullable=True), # ChromaDB document ID
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_video_chunks_video_id'), 'video_chunks', ['video_id'], unique=False)
op.create_index(op.f('ix_video_chunks_chunk_index'), 'video_chunks', ['chunk_index'], unique=False)
op.create_index(op.f('ix_video_chunks_embedding_id'), 'video_chunks', ['embedding_id'], unique=False)
# 11. RAG Analytics - Performance tracking
op.create_table('rag_analytics',
sa.Column('id', sa.String(length=36), nullable=False),
sa.Column('video_id', sa.String(length=20), nullable=False),
sa.Column('question', sa.Text(), nullable=False),
sa.Column('retrieval_count', sa.Integer(), nullable=True),
sa.Column('relevance_scores', sa.JSON(), nullable=True),
sa.Column('response_quality_score', sa.Float(), nullable=True),
sa.Column('user_feedback', sa.Integer(), nullable=True), # 1-5 rating
sa.Column('processing_time_seconds', sa.Float(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_rag_analytics_video_id'), 'rag_analytics', ['video_id'], unique=False)
op.create_index(op.f('ix_rag_analytics_user_feedback'), 'rag_analytics', ['user_feedback'], unique=False)
# 12. Add new columns to existing summaries table for Epic 4 features
op.add_column('summaries', sa.Column('transcript_source', sa.String(length=20), nullable=True)) # youtube, whisper, both
op.add_column('summaries', sa.Column('transcript_quality_score', sa.Float(), nullable=True))
op.add_column('summaries', sa.Column('processing_method', sa.String(length=50), nullable=True))
op.add_column('summaries', sa.Column('multi_agent_analysis', sa.Boolean(), nullable=True))
op.add_column('summaries', sa.Column('enhanced_export_available', sa.Boolean(), nullable=True))
op.add_column('summaries', sa.Column('rag_enabled', sa.Boolean(), nullable=True))
# Create indexes for new columns
op.create_index(op.f('ix_summaries_transcript_source'), 'summaries', ['transcript_source'], unique=False)
op.create_index(op.f('ix_summaries_multi_agent_analysis'), 'summaries', ['multi_agent_analysis'], unique=False)
def downgrade() -> None:
"""Remove Epic 4 features."""
# Remove indexes for new summary columns
op.drop_index(op.f('ix_summaries_multi_agent_analysis'), table_name='summaries')
op.drop_index(op.f('ix_summaries_transcript_source'), table_name='summaries')
# Remove new columns from summaries table
op.drop_column('summaries', 'rag_enabled')
op.drop_column('summaries', 'enhanced_export_available')
op.drop_column('summaries', 'multi_agent_analysis')
op.drop_column('summaries', 'processing_method')
op.drop_column('summaries', 'transcript_quality_score')
op.drop_column('summaries', 'transcript_source')
# Drop tables in reverse dependency order
op.drop_index(op.f('ix_rag_analytics_user_feedback'), table_name='rag_analytics')
op.drop_index(op.f('ix_rag_analytics_video_id'), table_name='rag_analytics')
op.drop_table('rag_analytics')
op.drop_index(op.f('ix_video_chunks_embedding_id'), table_name='video_chunks')
op.drop_index(op.f('ix_video_chunks_chunk_index'), table_name='video_chunks')
op.drop_index(op.f('ix_video_chunks_video_id'), table_name='video_chunks')
op.drop_table('video_chunks')
op.drop_index(op.f('ix_chat_messages_message_type'), table_name='chat_messages')
op.drop_index(op.f('ix_chat_messages_session_id'), table_name='chat_messages')
op.drop_table('chat_messages')
op.drop_index(op.f('ix_chat_sessions_is_active'), table_name='chat_sessions')
op.drop_index(op.f('ix_chat_sessions_video_id'), table_name='chat_sessions')
op.drop_index(op.f('ix_chat_sessions_user_id'), table_name='chat_sessions')
op.drop_table('chat_sessions')
op.drop_index(op.f('ix_summary_sections_section_index'), table_name='summary_sections')
op.drop_index(op.f('ix_summary_sections_summary_id'), table_name='summary_sections')
op.drop_table('summary_sections')
op.drop_index(op.f('ix_export_metadata_export_type'), table_name='export_metadata')
op.drop_index(op.f('ix_export_metadata_summary_id'), table_name='export_metadata')
op.drop_table('export_metadata')
op.drop_index(op.f('ix_prompt_experiments_status'), table_name='prompt_experiments')
op.drop_table('prompt_experiments')
op.drop_index(op.f('ix_prompt_templates_is_public'), table_name='prompt_templates')
op.drop_index(op.f('ix_prompt_templates_domain_category'), table_name='prompt_templates')
op.drop_index(op.f('ix_prompt_templates_user_id'), table_name='prompt_templates')
op.drop_table('prompt_templates')
op.drop_index(op.f('ix_playlist_analysis_playlist_id'), table_name='playlist_analysis')
op.drop_table('playlist_analysis')
op.drop_index(op.f('ix_playlists_playlist_id'), table_name='playlists')
op.drop_index(op.f('ix_playlists_user_id'), table_name='playlists')
op.drop_table('playlists')
op.drop_index(op.f('ix_agent_summaries_agent_type'), table_name='agent_summaries')
op.drop_index(op.f('ix_agent_summaries_summary_id'), table_name='agent_summaries')
op.drop_table('agent_summaries')