135 lines
6.2 KiB
Python
135 lines
6.2 KiB
Python
"""Add v2 schema
|
|
|
|
Revision ID: 20241230_add_v2_schema
|
|
Revises: dcdfa10e65bd
|
|
Create Date: 2024-12-30 10:00:00.000000
|
|
|
|
"""
|
|
from alembic import op
|
|
import sqlalchemy as sa
|
|
from sqlalchemy.dialects.postgresql import JSONB
|
|
|
|
# revision identifiers, used by Alembic.
|
|
revision = '20241230_add_v2_schema'
|
|
down_revision = 'dcdfa10e65bd'
|
|
branch_labels = None
|
|
depends_on = None
|
|
|
|
|
|
def upgrade() -> None:
|
|
"""Upgrade to v2 schema.
|
|
|
|
Creates new tables for speaker profiles and v2 processing jobs,
|
|
and adds v2-specific columns to the transcription_results table.
|
|
"""
|
|
# Create speaker_profiles table
|
|
op.create_table(
|
|
'speaker_profiles',
|
|
sa.Column('id', sa.Integer(), nullable=False),
|
|
sa.Column('name', sa.String(255), nullable=False),
|
|
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP')),
|
|
sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP')),
|
|
sa.Column('characteristics', JSONB, nullable=True),
|
|
sa.Column('embedding', sa.Text(), nullable=True),
|
|
sa.Column('sample_count', sa.Integer(), server_default='0'),
|
|
sa.Column('user_id', sa.Integer(), nullable=True),
|
|
sa.PrimaryKeyConstraint('id')
|
|
)
|
|
|
|
# Create indexes for speaker_profiles
|
|
op.create_index('ix_speaker_profiles_name', 'speaker_profiles', ['name'])
|
|
op.create_index('ix_speaker_profiles_user_id', 'speaker_profiles', ['user_id'])
|
|
|
|
# Create v2_processing_jobs table
|
|
op.create_table(
|
|
'v2_processing_jobs',
|
|
sa.Column('id', sa.Integer(), nullable=False),
|
|
sa.Column('status', sa.String(50), server_default='pending', nullable=False),
|
|
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP')),
|
|
sa.Column('updated_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP')),
|
|
sa.Column('completed_at', sa.TIMESTAMP(timezone=True), nullable=True),
|
|
sa.Column('transcript_id', sa.UUID(), nullable=True),
|
|
sa.Column('job_type', sa.String(50), nullable=False),
|
|
sa.Column('parameters', JSONB, nullable=True),
|
|
sa.Column('progress', sa.Float(), server_default='0'),
|
|
sa.Column('error_message', sa.Text(), nullable=True),
|
|
sa.Column('result_data', JSONB, nullable=True),
|
|
sa.PrimaryKeyConstraint('id')
|
|
)
|
|
|
|
# Create indexes for v2_processing_jobs
|
|
op.create_index('ix_v2_processing_jobs_status', 'v2_processing_jobs', ['status'])
|
|
op.create_index('ix_v2_processing_jobs_transcript_id', 'v2_processing_jobs', ['transcript_id'])
|
|
op.create_index('ix_v2_processing_jobs_job_type', 'v2_processing_jobs', ['job_type'])
|
|
|
|
# Add foreign key constraint for v2_processing_jobs
|
|
op.create_foreign_key(
|
|
'fk_v2_processing_jobs_transcript_id',
|
|
'v2_processing_jobs', 'transcription_results',
|
|
['transcript_id'], ['id'],
|
|
ondelete='CASCADE'
|
|
)
|
|
|
|
# Add v2 columns to transcription_results table
|
|
op.add_column('transcription_results', sa.Column('pipeline_version', sa.String(20), nullable=True))
|
|
op.add_column('transcription_results', sa.Column('enhanced_content', JSONB, nullable=True))
|
|
op.add_column('transcription_results', sa.Column('diarization_content', JSONB, nullable=True))
|
|
op.add_column('transcription_results', sa.Column('merged_content', JSONB, nullable=True))
|
|
op.add_column('transcription_results', sa.Column('domain_used', sa.String(100), nullable=True))
|
|
op.add_column('transcription_results', sa.Column('accuracy_estimate', sa.Float(), nullable=True))
|
|
op.add_column('transcription_results', sa.Column('speaker_count', sa.Integer(), nullable=True))
|
|
op.add_column('transcription_results', sa.Column('quality_warnings', JSONB, nullable=True))
|
|
op.add_column('transcription_results', sa.Column('processing_metadata', JSONB, nullable=True))
|
|
|
|
# Create indexes for new v2 columns
|
|
op.create_index('ix_transcription_results_pipeline_version', 'transcription_results', ['pipeline_version'])
|
|
op.create_index('ix_transcription_results_domain_used', 'transcription_results', ['domain_used'])
|
|
op.create_index('ix_transcription_results_speaker_count', 'transcription_results', ['speaker_count'])
|
|
|
|
# Update existing transcripts to have pipeline_version = 'v1'
|
|
op.execute("""
|
|
UPDATE transcription_results
|
|
SET pipeline_version = 'v1'
|
|
WHERE pipeline_version IS NULL
|
|
""")
|
|
|
|
|
|
def downgrade() -> None:
|
|
"""Downgrade from v2 schema.
|
|
|
|
Removes v2-specific columns and tables, reverting to v1 schema.
|
|
"""
|
|
# Remove indexes for v2 columns
|
|
op.drop_index('ix_transcription_results_speaker_count', 'transcription_results')
|
|
op.drop_index('ix_transcription_results_domain_used', 'transcription_results')
|
|
op.drop_index('ix_transcription_results_pipeline_version', 'transcription_results')
|
|
|
|
# Remove v2 columns from transcription_results table
|
|
op.drop_column('transcription_results', 'processing_metadata')
|
|
op.drop_column('transcription_results', 'quality_warnings')
|
|
op.drop_column('transcription_results', 'speaker_count')
|
|
op.drop_column('transcription_results', 'accuracy_estimate')
|
|
op.drop_column('transcription_results', 'domain_used')
|
|
op.drop_column('transcription_results', 'merged_content')
|
|
op.drop_column('transcription_results', 'diarization_content')
|
|
op.drop_column('transcription_results', 'enhanced_content')
|
|
op.drop_column('transcription_results', 'pipeline_version')
|
|
|
|
# Remove foreign key constraint for v2_processing_jobs
|
|
op.drop_constraint('fk_v2_processing_jobs_transcript_id', 'v2_processing_jobs', type_='foreignkey')
|
|
|
|
# Remove indexes for v2_processing_jobs
|
|
op.drop_index('ix_v2_processing_jobs_job_type', 'v2_processing_jobs')
|
|
op.drop_index('ix_v2_processing_jobs_transcript_id', 'v2_processing_jobs')
|
|
op.drop_index('ix_v2_processing_jobs_status', 'v2_processing_jobs')
|
|
|
|
# Drop v2_processing_jobs table
|
|
op.drop_table('v2_processing_jobs')
|
|
|
|
# Remove indexes for speaker_profiles
|
|
op.drop_index('ix_speaker_profiles_user_id', 'speaker_profiles')
|
|
op.drop_index('ix_speaker_profiles_name', 'speaker_profiles')
|
|
|
|
# Drop speaker_profiles table
|
|
op.drop_table('speaker_profiles')
|