128 lines
4.8 KiB
Python
128 lines
4.8 KiB
Python
"""
|
|
Batch job models for processing multiple YouTube videos
|
|
"""
|
|
from sqlalchemy import Column, String, Integer, JSON, DateTime, ForeignKey, Text, Float
|
|
from sqlalchemy.orm import relationship
|
|
from datetime import datetime
|
|
import uuid
|
|
|
|
from backend.models.base import Model
|
|
|
|
|
|
class BatchJob(Model):
|
|
"""Model for batch video processing jobs"""
|
|
__tablename__ = "batch_jobs"
|
|
|
|
# Primary key and user reference
|
|
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
user_id = Column(String, ForeignKey("users.id"), nullable=False)
|
|
|
|
# Job metadata
|
|
name = Column(String(255))
|
|
status = Column(String(50), default="pending") # pending, processing, completed, cancelled, failed
|
|
|
|
# Configuration
|
|
urls = Column(JSON, nullable=False) # List of YouTube URLs
|
|
model = Column(String(50), default="deepseek")
|
|
summary_length = Column(String(20), default="standard")
|
|
options = Column(JSON) # Additional options like focus_areas, include_timestamps
|
|
|
|
# Progress tracking
|
|
total_videos = Column(Integer, nullable=False)
|
|
completed_videos = Column(Integer, default=0)
|
|
failed_videos = Column(Integer, default=0)
|
|
skipped_videos = Column(Integer, default=0)
|
|
|
|
# Timing
|
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
started_at = Column(DateTime)
|
|
completed_at = Column(DateTime)
|
|
estimated_completion = Column(DateTime)
|
|
total_processing_time = Column(Float) # in seconds
|
|
|
|
# Results
|
|
results = Column(JSON) # Array of {url, summary_id, status, error}
|
|
export_url = Column(String(500))
|
|
|
|
# Cost tracking
|
|
total_cost_usd = Column(Float, default=0.0)
|
|
|
|
# Relationships
|
|
user = relationship("backend.models.user.User", back_populates="batch_jobs")
|
|
items = relationship("backend.models.batch_job.BatchJobItem", back_populates="batch_job", cascade="all, delete-orphan")
|
|
|
|
def to_dict(self):
|
|
"""Convert to dictionary for API responses"""
|
|
return {
|
|
"id": self.id,
|
|
"name": self.name,
|
|
"status": self.status,
|
|
"total_videos": self.total_videos,
|
|
"completed_videos": self.completed_videos,
|
|
"failed_videos": self.failed_videos,
|
|
"progress_percentage": self.get_progress_percentage(),
|
|
"created_at": self.created_at.isoformat() if self.created_at else None,
|
|
"started_at": self.started_at.isoformat() if self.started_at else None,
|
|
"completed_at": self.completed_at.isoformat() if self.completed_at else None,
|
|
"export_url": self.export_url,
|
|
"total_cost_usd": self.total_cost_usd
|
|
}
|
|
|
|
def get_progress_percentage(self):
|
|
"""Calculate progress percentage"""
|
|
if self.total_videos == 0:
|
|
return 0
|
|
return round((self.completed_videos + self.failed_videos) / self.total_videos * 100, 1)
|
|
|
|
|
|
class BatchJobItem(Model):
|
|
"""Individual video item within a batch job"""
|
|
__tablename__ = "batch_job_items"
|
|
|
|
# Primary key and foreign keys
|
|
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
batch_job_id = Column(String, ForeignKey("batch_jobs.id", ondelete="CASCADE"), nullable=False)
|
|
summary_id = Column(String, ForeignKey("summaries.id"), nullable=True)
|
|
|
|
# Item details
|
|
url = Column(String(500), nullable=False)
|
|
position = Column(Integer, nullable=False) # Order in the batch
|
|
status = Column(String(50), default="pending") # pending, processing, completed, failed, skipped
|
|
|
|
# Video metadata (populated during processing)
|
|
video_id = Column(String(20))
|
|
video_title = Column(String(500))
|
|
channel_name = Column(String(255))
|
|
duration_seconds = Column(Integer)
|
|
|
|
# Processing details
|
|
started_at = Column(DateTime)
|
|
completed_at = Column(DateTime)
|
|
processing_time_seconds = Column(Float)
|
|
|
|
# Error tracking
|
|
error_message = Column(Text)
|
|
error_type = Column(String(100)) # validation_error, api_error, timeout, etc.
|
|
retry_count = Column(Integer, default=0)
|
|
max_retries = Column(Integer, default=2)
|
|
|
|
# Cost tracking
|
|
cost_usd = Column(Float, default=0.0)
|
|
|
|
# Relationships
|
|
batch_job = relationship("backend.models.batch_job.BatchJob", back_populates="items")
|
|
summary = relationship("backend.models.summary.Summary")
|
|
|
|
def to_dict(self):
|
|
"""Convert to dictionary for API responses"""
|
|
return {
|
|
"id": self.id,
|
|
"url": self.url,
|
|
"position": self.position,
|
|
"status": self.status,
|
|
"video_title": self.video_title,
|
|
"error_message": self.error_message,
|
|
"summary_id": self.summary_id,
|
|
"retry_count": self.retry_count,
|
|
"processing_time_seconds": self.processing_time_seconds
|
|
} |