"""Add v2 schema Revision ID: 20241230_add_v2_schema Revises: dcdfa10e65bd Create Date: 2024-12-30 10:00:00.000000 """ from alembic import op import sqlalchemy as sa from sqlalchemy.dialects.postgresql import JSONB # revision identifiers, used by Alembic. revision = '20241230_add_v2_schema' down_revision = 'dcdfa10e65bd' branch_labels = None depends_on = None def upgrade() -> None: """Upgrade to v2 schema. Creates new tables for speaker profiles and v2 processing jobs, and adds v2-specific columns to the transcription_results table. """ # Create speaker_profiles table op.create_table( 'speaker_profiles', sa.Column('id', sa.Integer(), nullable=False), sa.Column('name', sa.String(255), nullable=False), sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP')), sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP')), sa.Column('characteristics', JSONB, nullable=True), sa.Column('embedding', sa.Text(), nullable=True), sa.Column('sample_count', sa.Integer(), server_default='0'), sa.Column('user_id', sa.Integer(), nullable=True), sa.PrimaryKeyConstraint('id') ) # Create indexes for speaker_profiles op.create_index('ix_speaker_profiles_name', 'speaker_profiles', ['name']) op.create_index('ix_speaker_profiles_user_id', 'speaker_profiles', ['user_id']) # Create v2_processing_jobs table op.create_table( 'v2_processing_jobs', sa.Column('id', sa.Integer(), nullable=False), sa.Column('status', sa.String(50), server_default='pending', nullable=False), sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP')), sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP')), sa.Column('completed_at', sa.TIMESTAMP(timezone=True), nullable=True), sa.Column('transcript_id', sa.UUID(), nullable=True), sa.Column('job_type', sa.String(50), nullable=False), sa.Column('parameters', JSONB, nullable=True), sa.Column('progress', sa.Float(), server_default='0'), sa.Column('error_message', sa.Text(), nullable=True), sa.Column('result_data', JSONB, nullable=True), sa.PrimaryKeyConstraint('id') ) # Create indexes for v2_processing_jobs op.create_index('ix_v2_processing_jobs_status', 'v2_processing_jobs', ['status']) op.create_index('ix_v2_processing_jobs_transcript_id', 'v2_processing_jobs', ['transcript_id']) op.create_index('ix_v2_processing_jobs_job_type', 'v2_processing_jobs', ['job_type']) # Add foreign key constraint for v2_processing_jobs op.create_foreign_key( 'fk_v2_processing_jobs_transcript_id', 'v2_processing_jobs', 'transcription_results', ['transcript_id'], ['id'], ondelete='CASCADE' ) # Add v2 columns to transcription_results table op.add_column('transcription_results', sa.Column('pipeline_version', sa.String(20), nullable=True)) op.add_column('transcription_results', sa.Column('enhanced_content', JSONB, nullable=True)) op.add_column('transcription_results', sa.Column('diarization_content', JSONB, nullable=True)) op.add_column('transcription_results', sa.Column('merged_content', JSONB, nullable=True)) op.add_column('transcription_results', sa.Column('domain_used', sa.String(100), nullable=True)) op.add_column('transcription_results', sa.Column('accuracy_estimate', sa.Float(), nullable=True)) op.add_column('transcription_results', sa.Column('speaker_count', sa.Integer(), nullable=True)) op.add_column('transcription_results', sa.Column('quality_warnings', JSONB, nullable=True)) op.add_column('transcription_results', sa.Column('processing_metadata', JSONB, nullable=True)) # Create indexes for new v2 columns op.create_index('ix_transcription_results_pipeline_version', 'transcription_results', ['pipeline_version']) op.create_index('ix_transcription_results_domain_used', 'transcription_results', ['domain_used']) op.create_index('ix_transcription_results_speaker_count', 'transcription_results', ['speaker_count']) # Update existing transcripts to have pipeline_version = 'v1' op.execute(""" UPDATE transcription_results SET pipeline_version = 'v1' WHERE pipeline_version IS NULL """) def downgrade() -> None: """Downgrade from v2 schema. Removes v2-specific columns and tables, reverting to v1 schema. """ # Remove indexes for v2 columns op.drop_index('ix_transcription_results_speaker_count', 'transcription_results') op.drop_index('ix_transcription_results_domain_used', 'transcription_results') op.drop_index('ix_transcription_results_pipeline_version', 'transcription_results') # Remove v2 columns from transcription_results table op.drop_column('transcription_results', 'processing_metadata') op.drop_column('transcription_results', 'quality_warnings') op.drop_column('transcription_results', 'speaker_count') op.drop_column('transcription_results', 'accuracy_estimate') op.drop_column('transcription_results', 'domain_used') op.drop_column('transcription_results', 'merged_content') op.drop_column('transcription_results', 'diarization_content') op.drop_column('transcription_results', 'enhanced_content') op.drop_column('transcription_results', 'pipeline_version') # Remove foreign key constraint for v2_processing_jobs op.drop_constraint('fk_v2_processing_jobs_transcript_id', 'v2_processing_jobs', type_='foreignkey') # Remove indexes for v2_processing_jobs op.drop_index('ix_v2_processing_jobs_job_type', 'v2_processing_jobs') op.drop_index('ix_v2_processing_jobs_transcript_id', 'v2_processing_jobs') op.drop_index('ix_v2_processing_jobs_status', 'v2_processing_jobs') # Drop v2_processing_jobs table op.drop_table('v2_processing_jobs') # Remove indexes for speaker_profiles op.drop_index('ix_speaker_profiles_user_id', 'speaker_profiles') op.drop_index('ix_speaker_profiles_name', 'speaker_profiles') # Drop speaker_profiles table op.drop_table('speaker_profiles')