"""Job history models for persistent storage-based job tracking.""" from pydantic import BaseModel, Field from typing import Optional, Dict, Any, List from datetime import datetime from enum import Enum class JobStatus(str, Enum): """Job processing status.""" COMPLETED = "completed" PROCESSING = "processing" FAILED = "failed" class ProcessingStatus(str, Enum): """Individual processing step status.""" COMPLETED = "completed" FAILED = "failed" PENDING = "pending" NOT_STARTED = "not_started" class VideoInfo(BaseModel): """Video information metadata.""" title: str url: str duration: Optional[int] = None # Duration in seconds thumbnail: Optional[str] = None channel: Optional[str] = None video_id: str class ProcessingDetails(BaseModel): """Details about processing steps.""" transcript: Dict[str, Any] = Field(default_factory=lambda: { "status": ProcessingStatus.NOT_STARTED, "method": None, "segments_count": None, "processing_time": None, "error": None }) summary: Dict[str, Any] = Field(default_factory=lambda: { "status": ProcessingStatus.NOT_STARTED, "model": None, "processing_time": None, "error": None }) created_at: datetime last_processed_at: datetime class JobFiles(BaseModel): """File paths associated with the job.""" audio: Optional[str] = None # Path to audio file audio_metadata: Optional[str] = None # Path to audio metadata JSON transcript: Optional[str] = None # Path to transcript text file transcript_json: Optional[str] = None # Path to transcript JSON with segments summary: Optional[str] = None # Path to summary file (future) class JobMetrics(BaseModel): """Job processing metrics.""" file_size_mb: Optional[float] = None processing_time_seconds: Optional[float] = None word_count: Optional[int] = None segment_count: Optional[int] = None audio_duration_seconds: Optional[float] = None class JobMetadata(BaseModel): """Complete job metadata schema.""" id: str # video_id status: JobStatus video_info: VideoInfo processing: ProcessingDetails files: JobFiles metadata: JobMetrics # Additional history features notes: Optional[str] = None tags: List[str] = Field(default_factory=list) is_starred: bool = False last_accessed: Optional[datetime] = None access_count: int = 0 class Config: use_enum_values = True json_encoders = { datetime: lambda v: v.isoformat() } class JobHistoryIndex(BaseModel): """Master index of all jobs.""" version: str = "1.0" total_jobs: int last_updated: datetime jobs: List[str] # List of video_ids # Index metadata total_storage_mb: Optional[float] = None oldest_job: Optional[datetime] = None newest_job: Optional[datetime] = None class Config: json_encoders = { datetime: lambda v: v.isoformat() } class JobHistoryQuery(BaseModel): """Query parameters for job history API.""" page: int = Field(1, ge=1) page_size: int = Field(15, ge=1, le=50) search: Optional[str] = None status_filter: Optional[List[JobStatus]] = None date_from: Optional[datetime] = None date_to: Optional[datetime] = None sort_by: str = Field("created_at", pattern="^(created_at|title|duration|processing_time|word_count)$") sort_order: str = Field("desc", pattern="^(asc|desc)$") starred_only: bool = False tags: Optional[List[str]] = None class JobHistoryResponse(BaseModel): """Response for job history list API.""" jobs: List[JobMetadata] total: int page: int page_size: int total_pages: int has_next: bool has_previous: bool class JobDetailResponse(BaseModel): """Response for individual job detail API.""" job: JobMetadata transcript_content: Optional[str] = None transcript_segments: Optional[List[Dict[str, Any]]] = None summary_content: Optional[str] = None file_exists: Dict[str, bool] = Field(default_factory=dict)