youtube-summarizer/backend/alembic/versions/add_batch_processing_tables.py

96 lines
4.7 KiB
Python

"""Add batch processing tables
Revision ID: add_batch_processing_001
Revises: add_history_fields_001
Create Date: 2025-08-27 10:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import sqlite
# revision identifiers, used by Alembic.
revision = 'add_batch_processing_001'
down_revision = 'add_history_fields_001'
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create batch_jobs table
op.create_table('batch_jobs',
sa.Column('id', sa.String(), nullable=False),
sa.Column('user_id', sa.String(), nullable=False),
sa.Column('name', sa.String(length=255), nullable=True),
sa.Column('status', sa.String(length=50), nullable=True),
sa.Column('urls', sa.JSON(), nullable=False),
sa.Column('model', sa.String(length=50), nullable=True),
sa.Column('summary_length', sa.String(length=20), nullable=True),
sa.Column('options', sa.JSON(), nullable=True),
sa.Column('total_videos', sa.Integer(), nullable=False),
sa.Column('completed_videos', sa.Integer(), nullable=True),
sa.Column('failed_videos', sa.Integer(), nullable=True),
sa.Column('skipped_videos', sa.Integer(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('started_at', sa.DateTime(), nullable=True),
sa.Column('completed_at', sa.DateTime(), nullable=True),
sa.Column('estimated_completion', sa.DateTime(), nullable=True),
sa.Column('total_processing_time', sa.Float(), nullable=True),
sa.Column('results', sa.JSON(), nullable=True),
sa.Column('export_url', sa.String(length=500), nullable=True),
sa.Column('total_cost_usd', sa.Float(), nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
sa.PrimaryKeyConstraint('id')
)
# Create batch_job_items table
op.create_table('batch_job_items',
sa.Column('id', sa.String(), nullable=False),
sa.Column('batch_job_id', sa.String(), nullable=False),
sa.Column('summary_id', sa.String(), nullable=True),
sa.Column('url', sa.String(length=500), nullable=False),
sa.Column('position', sa.Integer(), nullable=False),
sa.Column('status', sa.String(length=50), nullable=True),
sa.Column('video_id', sa.String(length=20), nullable=True),
sa.Column('video_title', sa.String(length=500), nullable=True),
sa.Column('channel_name', sa.String(length=255), nullable=True),
sa.Column('duration_seconds', sa.Integer(), nullable=True),
sa.Column('started_at', sa.DateTime(), nullable=True),
sa.Column('completed_at', sa.DateTime(), nullable=True),
sa.Column('processing_time_seconds', sa.Float(), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
sa.Column('error_type', sa.String(length=100), nullable=True),
sa.Column('retry_count', sa.Integer(), nullable=True),
sa.Column('max_retries', sa.Integer(), nullable=True),
sa.Column('cost_usd', sa.Float(), nullable=True),
sa.ForeignKeyConstraint(['batch_job_id'], ['batch_jobs.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['summary_id'], ['summaries.id'], ),
sa.PrimaryKeyConstraint('id')
)
# Create indexes for performance
op.create_index('idx_batch_jobs_user_status', 'batch_jobs', ['user_id', 'status'])
op.create_index('idx_batch_jobs_created_at', 'batch_jobs', ['created_at'])
op.create_index('idx_batch_job_items_batch_status', 'batch_job_items', ['batch_job_id', 'status'])
op.create_index('idx_batch_job_items_position', 'batch_job_items', ['batch_job_id', 'position'])
# Set default values for nullable integer columns
op.execute("UPDATE batch_jobs SET completed_videos = 0 WHERE completed_videos IS NULL")
op.execute("UPDATE batch_jobs SET failed_videos = 0 WHERE failed_videos IS NULL")
op.execute("UPDATE batch_jobs SET skipped_videos = 0 WHERE skipped_videos IS NULL")
op.execute("UPDATE batch_jobs SET total_cost_usd = 0.0 WHERE total_cost_usd IS NULL")
op.execute("UPDATE batch_job_items SET retry_count = 0 WHERE retry_count IS NULL")
op.execute("UPDATE batch_job_items SET max_retries = 2 WHERE max_retries IS NULL")
op.execute("UPDATE batch_job_items SET cost_usd = 0.0 WHERE cost_usd IS NULL")
def downgrade() -> None:
# Drop indexes
op.drop_index('idx_batch_job_items_position', table_name='batch_job_items')
op.drop_index('idx_batch_job_items_batch_status', table_name='batch_job_items')
op.drop_index('idx_batch_jobs_created_at', table_name='batch_jobs')
op.drop_index('idx_batch_jobs_user_status', table_name='batch_jobs')
# Drop tables
op.drop_table('batch_job_items')
op.drop_table('batch_jobs')