youtube-summarizer/backend/models/batch_job.py

128 lines
4.8 KiB
Python

"""
Batch job models for processing multiple YouTube videos
"""
from sqlalchemy import Column, String, Integer, JSON, DateTime, ForeignKey, Text, Float
from sqlalchemy.orm import relationship
from datetime import datetime
import uuid
from backend.models.base import Model
class BatchJob(Model):
"""Model for batch video processing jobs"""
__tablename__ = "batch_jobs"
# Primary key and user reference
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
user_id = Column(String, ForeignKey("users.id"), nullable=False)
# Job metadata
name = Column(String(255))
status = Column(String(50), default="pending") # pending, processing, completed, cancelled, failed
# Configuration
urls = Column(JSON, nullable=False) # List of YouTube URLs
model = Column(String(50), default="deepseek")
summary_length = Column(String(20), default="standard")
options = Column(JSON) # Additional options like focus_areas, include_timestamps
# Progress tracking
total_videos = Column(Integer, nullable=False)
completed_videos = Column(Integer, default=0)
failed_videos = Column(Integer, default=0)
skipped_videos = Column(Integer, default=0)
# Timing
created_at = Column(DateTime, default=datetime.utcnow)
started_at = Column(DateTime)
completed_at = Column(DateTime)
estimated_completion = Column(DateTime)
total_processing_time = Column(Float) # in seconds
# Results
results = Column(JSON) # Array of {url, summary_id, status, error}
export_url = Column(String(500))
# Cost tracking
total_cost_usd = Column(Float, default=0.0)
# Relationships
user = relationship("backend.models.user.User", back_populates="batch_jobs")
items = relationship("backend.models.batch_job.BatchJobItem", back_populates="batch_job", cascade="all, delete-orphan")
def to_dict(self):
"""Convert to dictionary for API responses"""
return {
"id": self.id,
"name": self.name,
"status": self.status,
"total_videos": self.total_videos,
"completed_videos": self.completed_videos,
"failed_videos": self.failed_videos,
"progress_percentage": self.get_progress_percentage(),
"created_at": self.created_at.isoformat() if self.created_at else None,
"started_at": self.started_at.isoformat() if self.started_at else None,
"completed_at": self.completed_at.isoformat() if self.completed_at else None,
"export_url": self.export_url,
"total_cost_usd": self.total_cost_usd
}
def get_progress_percentage(self):
"""Calculate progress percentage"""
if self.total_videos == 0:
return 0
return round((self.completed_videos + self.failed_videos) / self.total_videos * 100, 1)
class BatchJobItem(Model):
"""Individual video item within a batch job"""
__tablename__ = "batch_job_items"
# Primary key and foreign keys
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
batch_job_id = Column(String, ForeignKey("batch_jobs.id", ondelete="CASCADE"), nullable=False)
summary_id = Column(String, ForeignKey("summaries.id"), nullable=True)
# Item details
url = Column(String(500), nullable=False)
position = Column(Integer, nullable=False) # Order in the batch
status = Column(String(50), default="pending") # pending, processing, completed, failed, skipped
# Video metadata (populated during processing)
video_id = Column(String(20))
video_title = Column(String(500))
channel_name = Column(String(255))
duration_seconds = Column(Integer)
# Processing details
started_at = Column(DateTime)
completed_at = Column(DateTime)
processing_time_seconds = Column(Float)
# Error tracking
error_message = Column(Text)
error_type = Column(String(100)) # validation_error, api_error, timeout, etc.
retry_count = Column(Integer, default=0)
max_retries = Column(Integer, default=2)
# Cost tracking
cost_usd = Column(Float, default=0.0)
# Relationships
batch_job = relationship("backend.models.batch_job.BatchJob", back_populates="items")
summary = relationship("backend.models.summary.Summary")
def to_dict(self):
"""Convert to dictionary for API responses"""
return {
"id": self.id,
"url": self.url,
"position": self.position,
"status": self.status,
"video_title": self.video_title,
"error_message": self.error_message,
"summary_id": self.summary_id,
"retry_count": self.retry_count,
"processing_time_seconds": self.processing_time_seconds
}