trax/migrations/versions/20241230_add_v2_schema.py

135 lines
6.2 KiB
Python

"""Add v2 schema
Revision ID: 20241230_add_v2_schema
Revises: dcdfa10e65bd
Create Date: 2024-12-30 10:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import JSONB
# revision identifiers, used by Alembic.
revision = '20241230_add_v2_schema'
down_revision = 'dcdfa10e65bd'
branch_labels = None
depends_on = None
def upgrade() -> None:
"""Upgrade to v2 schema.
Creates new tables for speaker profiles and v2 processing jobs,
and adds v2-specific columns to the transcription_results table.
"""
# Create speaker_profiles table
op.create_table(
'speaker_profiles',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(255), nullable=False),
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP')),
sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP')),
sa.Column('characteristics', JSONB, nullable=True),
sa.Column('embedding', sa.Text(), nullable=True),
sa.Column('sample_count', sa.Integer(), server_default='0'),
sa.Column('user_id', sa.Integer(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
# Create indexes for speaker_profiles
op.create_index('ix_speaker_profiles_name', 'speaker_profiles', ['name'])
op.create_index('ix_speaker_profiles_user_id', 'speaker_profiles', ['user_id'])
# Create v2_processing_jobs table
op.create_table(
'v2_processing_jobs',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('status', sa.String(50), server_default='pending', nullable=False),
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP')),
sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP')),
sa.Column('completed_at', sa.TIMESTAMP(timezone=True), nullable=True),
sa.Column('transcript_id', sa.UUID(), nullable=True),
sa.Column('job_type', sa.String(50), nullable=False),
sa.Column('parameters', JSONB, nullable=True),
sa.Column('progress', sa.Float(), server_default='0'),
sa.Column('error_message', sa.Text(), nullable=True),
sa.Column('result_data', JSONB, nullable=True),
sa.PrimaryKeyConstraint('id')
)
# Create indexes for v2_processing_jobs
op.create_index('ix_v2_processing_jobs_status', 'v2_processing_jobs', ['status'])
op.create_index('ix_v2_processing_jobs_transcript_id', 'v2_processing_jobs', ['transcript_id'])
op.create_index('ix_v2_processing_jobs_job_type', 'v2_processing_jobs', ['job_type'])
# Add foreign key constraint for v2_processing_jobs
op.create_foreign_key(
'fk_v2_processing_jobs_transcript_id',
'v2_processing_jobs', 'transcription_results',
['transcript_id'], ['id'],
ondelete='CASCADE'
)
# Add v2 columns to transcription_results table
op.add_column('transcription_results', sa.Column('pipeline_version', sa.String(20), nullable=True))
op.add_column('transcription_results', sa.Column('enhanced_content', JSONB, nullable=True))
op.add_column('transcription_results', sa.Column('diarization_content', JSONB, nullable=True))
op.add_column('transcription_results', sa.Column('merged_content', JSONB, nullable=True))
op.add_column('transcription_results', sa.Column('domain_used', sa.String(100), nullable=True))
op.add_column('transcription_results', sa.Column('accuracy_estimate', sa.Float(), nullable=True))
op.add_column('transcription_results', sa.Column('speaker_count', sa.Integer(), nullable=True))
op.add_column('transcription_results', sa.Column('quality_warnings', JSONB, nullable=True))
op.add_column('transcription_results', sa.Column('processing_metadata', JSONB, nullable=True))
# Create indexes for new v2 columns
op.create_index('ix_transcription_results_pipeline_version', 'transcription_results', ['pipeline_version'])
op.create_index('ix_transcription_results_domain_used', 'transcription_results', ['domain_used'])
op.create_index('ix_transcription_results_speaker_count', 'transcription_results', ['speaker_count'])
# Update existing transcripts to have pipeline_version = 'v1'
op.execute("""
UPDATE transcription_results
SET pipeline_version = 'v1'
WHERE pipeline_version IS NULL
""")
def downgrade() -> None:
"""Downgrade from v2 schema.
Removes v2-specific columns and tables, reverting to v1 schema.
"""
# Remove indexes for v2 columns
op.drop_index('ix_transcription_results_speaker_count', 'transcription_results')
op.drop_index('ix_transcription_results_domain_used', 'transcription_results')
op.drop_index('ix_transcription_results_pipeline_version', 'transcription_results')
# Remove v2 columns from transcription_results table
op.drop_column('transcription_results', 'processing_metadata')
op.drop_column('transcription_results', 'quality_warnings')
op.drop_column('transcription_results', 'speaker_count')
op.drop_column('transcription_results', 'accuracy_estimate')
op.drop_column('transcription_results', 'domain_used')
op.drop_column('transcription_results', 'merged_content')
op.drop_column('transcription_results', 'diarization_content')
op.drop_column('transcription_results', 'enhanced_content')
op.drop_column('transcription_results', 'pipeline_version')
# Remove foreign key constraint for v2_processing_jobs
op.drop_constraint('fk_v2_processing_jobs_transcript_id', 'v2_processing_jobs', type_='foreignkey')
# Remove indexes for v2_processing_jobs
op.drop_index('ix_v2_processing_jobs_job_type', 'v2_processing_jobs')
op.drop_index('ix_v2_processing_jobs_transcript_id', 'v2_processing_jobs')
op.drop_index('ix_v2_processing_jobs_status', 'v2_processing_jobs')
# Drop v2_processing_jobs table
op.drop_table('v2_processing_jobs')
# Remove indexes for speaker_profiles
op.drop_index('ix_speaker_profiles_user_id', 'speaker_profiles')
op.drop_index('ix_speaker_profiles_name', 'speaker_profiles')
# Drop speaker_profiles table
op.drop_table('speaker_profiles')