143 lines
4.1 KiB
Python
143 lines
4.1 KiB
Python
"""Job history models for persistent storage-based job tracking."""
|
|
|
|
from pydantic import BaseModel, Field
|
|
from typing import Optional, Dict, Any, List
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
|
|
|
|
class JobStatus(str, Enum):
|
|
"""Job processing status."""
|
|
COMPLETED = "completed"
|
|
PROCESSING = "processing"
|
|
FAILED = "failed"
|
|
|
|
|
|
class ProcessingStatus(str, Enum):
|
|
"""Individual processing step status."""
|
|
COMPLETED = "completed"
|
|
FAILED = "failed"
|
|
PENDING = "pending"
|
|
NOT_STARTED = "not_started"
|
|
|
|
|
|
class VideoInfo(BaseModel):
|
|
"""Video information metadata."""
|
|
title: str
|
|
url: str
|
|
duration: Optional[int] = None # Duration in seconds
|
|
thumbnail: Optional[str] = None
|
|
channel: Optional[str] = None
|
|
video_id: str
|
|
|
|
|
|
class ProcessingDetails(BaseModel):
|
|
"""Details about processing steps."""
|
|
transcript: Dict[str, Any] = Field(default_factory=lambda: {
|
|
"status": ProcessingStatus.NOT_STARTED,
|
|
"method": None,
|
|
"segments_count": None,
|
|
"processing_time": None,
|
|
"error": None
|
|
})
|
|
summary: Dict[str, Any] = Field(default_factory=lambda: {
|
|
"status": ProcessingStatus.NOT_STARTED,
|
|
"model": None,
|
|
"processing_time": None,
|
|
"error": None
|
|
})
|
|
created_at: datetime
|
|
last_processed_at: datetime
|
|
|
|
|
|
class JobFiles(BaseModel):
|
|
"""File paths associated with the job."""
|
|
audio: Optional[str] = None # Path to audio file
|
|
audio_metadata: Optional[str] = None # Path to audio metadata JSON
|
|
transcript: Optional[str] = None # Path to transcript text file
|
|
transcript_json: Optional[str] = None # Path to transcript JSON with segments
|
|
summary: Optional[str] = None # Path to summary file (future)
|
|
|
|
|
|
class JobMetrics(BaseModel):
|
|
"""Job processing metrics."""
|
|
file_size_mb: Optional[float] = None
|
|
processing_time_seconds: Optional[float] = None
|
|
word_count: Optional[int] = None
|
|
segment_count: Optional[int] = None
|
|
audio_duration_seconds: Optional[float] = None
|
|
|
|
|
|
class JobMetadata(BaseModel):
|
|
"""Complete job metadata schema."""
|
|
id: str # video_id
|
|
status: JobStatus
|
|
video_info: VideoInfo
|
|
processing: ProcessingDetails
|
|
files: JobFiles
|
|
metadata: JobMetrics
|
|
|
|
# Additional history features
|
|
notes: Optional[str] = None
|
|
tags: List[str] = Field(default_factory=list)
|
|
is_starred: bool = False
|
|
last_accessed: Optional[datetime] = None
|
|
access_count: int = 0
|
|
|
|
class Config:
|
|
use_enum_values = True
|
|
json_encoders = {
|
|
datetime: lambda v: v.isoformat()
|
|
}
|
|
|
|
|
|
class JobHistoryIndex(BaseModel):
|
|
"""Master index of all jobs."""
|
|
version: str = "1.0"
|
|
total_jobs: int
|
|
last_updated: datetime
|
|
jobs: List[str] # List of video_ids
|
|
|
|
# Index metadata
|
|
total_storage_mb: Optional[float] = None
|
|
oldest_job: Optional[datetime] = None
|
|
newest_job: Optional[datetime] = None
|
|
|
|
class Config:
|
|
json_encoders = {
|
|
datetime: lambda v: v.isoformat()
|
|
}
|
|
|
|
|
|
class JobHistoryQuery(BaseModel):
|
|
"""Query parameters for job history API."""
|
|
page: int = Field(1, ge=1)
|
|
page_size: int = Field(15, ge=1, le=50)
|
|
search: Optional[str] = None
|
|
status_filter: Optional[List[JobStatus]] = None
|
|
date_from: Optional[datetime] = None
|
|
date_to: Optional[datetime] = None
|
|
sort_by: str = Field("created_at", pattern="^(created_at|title|duration|processing_time|word_count)$")
|
|
sort_order: str = Field("desc", pattern="^(asc|desc)$")
|
|
starred_only: bool = False
|
|
tags: Optional[List[str]] = None
|
|
|
|
|
|
class JobHistoryResponse(BaseModel):
|
|
"""Response for job history list API."""
|
|
jobs: List[JobMetadata]
|
|
total: int
|
|
page: int
|
|
page_size: int
|
|
total_pages: int
|
|
has_next: bool
|
|
has_previous: bool
|
|
|
|
|
|
class JobDetailResponse(BaseModel):
|
|
"""Response for individual job detail API."""
|
|
job: JobMetadata
|
|
transcript_content: Optional[str] = None
|
|
transcript_segments: Optional[List[Dict[str, Any]]] = None
|
|
summary_content: Optional[str] = None
|
|
file_exists: Dict[str, bool] = Field(default_factory=dict) |