youtube-summarizer/backend/models/job_history.py

143 lines
4.1 KiB
Python

"""Job history models for persistent storage-based job tracking."""
from pydantic import BaseModel, Field
from typing import Optional, Dict, Any, List
from datetime import datetime
from enum import Enum
class JobStatus(str, Enum):
"""Job processing status."""
COMPLETED = "completed"
PROCESSING = "processing"
FAILED = "failed"
class ProcessingStatus(str, Enum):
"""Individual processing step status."""
COMPLETED = "completed"
FAILED = "failed"
PENDING = "pending"
NOT_STARTED = "not_started"
class VideoInfo(BaseModel):
"""Video information metadata."""
title: str
url: str
duration: Optional[int] = None # Duration in seconds
thumbnail: Optional[str] = None
channel: Optional[str] = None
video_id: str
class ProcessingDetails(BaseModel):
"""Details about processing steps."""
transcript: Dict[str, Any] = Field(default_factory=lambda: {
"status": ProcessingStatus.NOT_STARTED,
"method": None,
"segments_count": None,
"processing_time": None,
"error": None
})
summary: Dict[str, Any] = Field(default_factory=lambda: {
"status": ProcessingStatus.NOT_STARTED,
"model": None,
"processing_time": None,
"error": None
})
created_at: datetime
last_processed_at: datetime
class JobFiles(BaseModel):
"""File paths associated with the job."""
audio: Optional[str] = None # Path to audio file
audio_metadata: Optional[str] = None # Path to audio metadata JSON
transcript: Optional[str] = None # Path to transcript text file
transcript_json: Optional[str] = None # Path to transcript JSON with segments
summary: Optional[str] = None # Path to summary file (future)
class JobMetrics(BaseModel):
"""Job processing metrics."""
file_size_mb: Optional[float] = None
processing_time_seconds: Optional[float] = None
word_count: Optional[int] = None
segment_count: Optional[int] = None
audio_duration_seconds: Optional[float] = None
class JobMetadata(BaseModel):
"""Complete job metadata schema."""
id: str # video_id
status: JobStatus
video_info: VideoInfo
processing: ProcessingDetails
files: JobFiles
metadata: JobMetrics
# Additional history features
notes: Optional[str] = None
tags: List[str] = Field(default_factory=list)
is_starred: bool = False
last_accessed: Optional[datetime] = None
access_count: int = 0
class Config:
use_enum_values = True
json_encoders = {
datetime: lambda v: v.isoformat()
}
class JobHistoryIndex(BaseModel):
"""Master index of all jobs."""
version: str = "1.0"
total_jobs: int
last_updated: datetime
jobs: List[str] # List of video_ids
# Index metadata
total_storage_mb: Optional[float] = None
oldest_job: Optional[datetime] = None
newest_job: Optional[datetime] = None
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class JobHistoryQuery(BaseModel):
"""Query parameters for job history API."""
page: int = Field(1, ge=1)
page_size: int = Field(15, ge=1, le=50)
search: Optional[str] = None
status_filter: Optional[List[JobStatus]] = None
date_from: Optional[datetime] = None
date_to: Optional[datetime] = None
sort_by: str = Field("created_at", pattern="^(created_at|title|duration|processing_time|word_count)$")
sort_order: str = Field("desc", pattern="^(asc|desc)$")
starred_only: bool = False
tags: Optional[List[str]] = None
class JobHistoryResponse(BaseModel):
"""Response for job history list API."""
jobs: List[JobMetadata]
total: int
page: int
page_size: int
total_pages: int
has_next: bool
has_previous: bool
class JobDetailResponse(BaseModel):
"""Response for individual job detail API."""
job: JobMetadata
transcript_content: Optional[str] = None
transcript_segments: Optional[List[Dict[str, Any]]] = None
summary_content: Optional[str] = None
file_exists: Dict[str, bool] = Field(default_factory=dict)