youtube-summarizer/backend/models/video.py

311 lines
12 KiB
Python

"""
Video data models for request/response handling.
"""
from pydantic import BaseModel, Field, HttpUrl
from typing import Optional, List, Dict, Any
from datetime import datetime
from enum import Enum
class VideoQuality(str, Enum):
"""Video quality options."""
BEST = "best"
HIGH_1080P = "1080p"
MEDIUM_720P = "720p"
LOW_480P = "480p"
AUDIO_ONLY = "audio"
class DownloadStatus(str, Enum):
"""Download status states."""
PENDING = "pending"
DOWNLOADING = "downloading"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
class VideoDownloadRequest(BaseModel):
"""Request model for video download."""
url: HttpUrl = Field(..., description="YouTube video URL")
quality: VideoQuality = Field(
default=VideoQuality.MEDIUM_720P,
description="Video quality to download"
)
extract_audio: bool = Field(
default=True,
description="Extract audio from video"
)
force_download: bool = Field(
default=False,
description="Force re-download even if cached"
)
keep_video: bool = Field(
default=True,
description="Keep video after processing"
)
class Config:
json_schema_extra = {
"example": {
"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
"quality": "720p",
"extract_audio": True,
"force_download": False,
"keep_video": True
}
}
class VideoInfo(BaseModel):
"""Video information model."""
video_id: str = Field(..., description="YouTube video ID")
title: str = Field(..., description="Video title")
channel: str = Field(..., description="Channel name")
duration: int = Field(..., description="Duration in seconds")
thumbnail_url: Optional[str] = Field(None, description="Thumbnail URL")
description: Optional[str] = Field(None, description="Video description")
view_count: Optional[int] = Field(None, description="View count")
upload_date: Optional[str] = Field(None, description="Upload date")
tags: Optional[List[str]] = Field(default_factory=list, description="Video tags")
class DownloadProgress(BaseModel):
"""Download progress information."""
video_id: str = Field(..., description="Video ID")
status: DownloadStatus = Field(..., description="Current status")
percent: Optional[str] = Field(None, description="Download percentage")
speed: Optional[str] = Field(None, description="Download speed")
eta: Optional[str] = Field(None, description="Estimated time remaining")
downloaded_bytes: Optional[int] = Field(None, description="Bytes downloaded")
total_bytes: Optional[int] = Field(None, description="Total file size")
timestamp: datetime = Field(default_factory=datetime.now, description="Last update time")
error: Optional[str] = Field(None, description="Error message if failed")
class VideoResponse(BaseModel):
"""Response model for successful video download."""
video_id: str = Field(..., description="YouTube video ID")
title: str = Field(..., description="Video title")
video_path: str = Field(..., description="Path to downloaded video")
audio_path: Optional[str] = Field(None, description="Path to extracted audio")
download_date: str = Field(..., description="Download timestamp")
size_mb: float = Field(..., description="File size in MB")
duration: int = Field(..., description="Duration in seconds")
quality: str = Field(..., description="Downloaded quality")
cached: bool = Field(default=False, description="Was already cached")
class Config:
json_schema_extra = {
"example": {
"video_id": "dQw4w9WgXcQ",
"title": "Rick Astley - Never Gonna Give You Up",
"video_path": "/data/youtube-videos/videos/dQw4w9WgXcQ.mp4",
"audio_path": "/data/youtube-videos/audio/dQw4w9WgXcQ.mp3",
"download_date": "2025-01-26T12:00:00",
"size_mb": 25.6,
"duration": 213,
"quality": "720p",
"cached": False
}
}
class StorageStats(BaseModel):
"""Storage statistics model."""
total_videos: int = Field(..., description="Total number of videos")
total_size_mb: float = Field(..., description="Total storage used in MB")
total_size_gb: float = Field(..., description="Total storage used in GB")
max_size_gb: float = Field(..., description="Maximum allowed storage in GB")
available_mb: float = Field(..., description="Available storage in MB")
available_gb: float = Field(..., description="Available storage in GB")
usage_percent: float = Field(..., description="Storage usage percentage")
video_quality: str = Field(..., description="Default video quality")
keep_videos: bool = Field(..., description="Keep videos after processing")
by_category: Optional[Dict[str, float]] = Field(
None,
description="Storage usage by category in MB"
)
class Config:
json_schema_extra = {
"example": {
"total_videos": 42,
"total_size_mb": 5120.5,
"total_size_gb": 5.0,
"max_size_gb": 10.0,
"available_mb": 5119.5,
"available_gb": 5.0,
"usage_percent": 50.0,
"video_quality": "720p",
"keep_videos": True,
"by_category": {
"videos": 4500.0,
"audio": 500.0,
"metadata": 20.5,
"thumbnails": 100.0
}
}
}
class CleanupRequest(BaseModel):
"""Request model for storage cleanup."""
bytes_to_free: Optional[int] = Field(
None,
description="Specific number of bytes to free"
)
cleanup_old_files: bool = Field(
default=True,
description="Remove old files"
)
cleanup_temp: bool = Field(
default=True,
description="Clean temporary files"
)
cleanup_orphaned: bool = Field(
default=True,
description="Remove orphaned files not in cache"
)
days_threshold: int = Field(
default=30,
description="Age threshold in days for old files"
)
class CleanupResponse(BaseModel):
"""Response model for cleanup operation."""
bytes_freed: int = Field(..., description="Total bytes freed")
mb_freed: float = Field(..., description="Total MB freed")
gb_freed: float = Field(..., description="Total GB freed")
files_removed: int = Field(..., description="Number of files removed")
old_files_removed: int = Field(0, description="Old files removed")
orphaned_files_removed: int = Field(0, description="Orphaned files removed")
temp_files_removed: int = Field(0, description="Temporary files removed")
class Config:
json_schema_extra = {
"example": {
"bytes_freed": 536870912,
"mb_freed": 512.0,
"gb_freed": 0.5,
"files_removed": 15,
"old_files_removed": 10,
"orphaned_files_removed": 3,
"temp_files_removed": 2
}
}
class CachedVideo(BaseModel):
"""Model for cached video information."""
hash: str = Field(..., description="Cache hash")
video_id: str = Field(..., description="YouTube video ID")
title: str = Field(..., description="Video title")
channel: str = Field(..., description="Channel name")
duration: int = Field(..., description="Duration in seconds")
video_path: str = Field(..., description="Path to video file")
audio_path: Optional[str] = Field(None, description="Path to audio file")
download_date: str = Field(..., description="Download date")
size_bytes: int = Field(..., description="File size in bytes")
url: str = Field(..., description="Original YouTube URL")
quality: str = Field(..., description="Video quality")
exists: bool = Field(..., description="File still exists on disk")
keep: bool = Field(default=False, description="Protected from cleanup")
class BatchDownloadRequest(BaseModel):
"""Request model for batch video downloads."""
urls: List[HttpUrl] = Field(..., description="List of YouTube URLs")
quality: VideoQuality = Field(
default=VideoQuality.MEDIUM_720P,
description="Video quality for all downloads"
)
extract_audio: bool = Field(
default=True,
description="Extract audio from all videos"
)
continue_on_error: bool = Field(
default=True,
description="Continue downloading if one fails"
)
class Config:
json_schema_extra = {
"example": {
"urls": [
"https://www.youtube.com/watch?v=video1",
"https://www.youtube.com/watch?v=video2"
],
"quality": "720p",
"extract_audio": True,
"continue_on_error": True
}
}
class BatchDownloadResponse(BaseModel):
"""Response model for batch downloads."""
total: int = Field(..., description="Total videos to download")
successful: int = Field(..., description="Successfully downloaded")
failed: int = Field(..., description="Failed downloads")
skipped: int = Field(..., description="Skipped (already cached)")
results: List[Dict[str, Any]] = Field(..., description="Individual results")
class Config:
json_schema_extra = {
"example": {
"total": 5,
"successful": 3,
"failed": 1,
"skipped": 1,
"results": [
{"video_id": "abc123", "status": "success"},
{"video_id": "def456", "status": "cached"},
{"video_id": "ghi789", "status": "failed", "error": "Video unavailable"}
]
}
}
class VideoArchiveRequest(BaseModel):
"""Request to archive a video."""
video_id: str = Field(..., description="Video ID to archive")
archive_dir: str = Field(default="archive", description="Archive directory name")
class VideoRestoreRequest(BaseModel):
"""Request to restore a video from archive."""
video_id: str = Field(..., description="Video ID to restore")
archive_dir: str = Field(default="archive", description="Archive directory name")
class VideoSummary(BaseModel):
"""Video summary model."""
video_id: str = Field(..., description="YouTube video ID")
title: str = Field(..., description="Video title")
channel: str = Field(..., description="Channel name")
duration: int = Field(..., description="Duration in seconds")
transcript: Optional[str] = Field(None, description="Video transcript")
summary: Optional[str] = Field(None, description="AI-generated summary")
key_points: Optional[List[str]] = Field(None, description="Key points from video")
created_at: datetime = Field(default_factory=datetime.now, description="Creation timestamp")
model_used: Optional[str] = Field(None, description="AI model used for summary")
class Config:
json_schema_extra = {
"example": {
"video_id": "dQw4w9WgXcQ",
"title": "Rick Astley - Never Gonna Give You Up",
"channel": "RickAstleyVEVO",
"duration": 213,
"transcript": "Never gonna give you up...",
"summary": "A classic music video featuring...",
"key_points": ["Catchy pop song", "Famous for internet meme"],
"created_at": "2025-01-26T12:00:00",
"model_used": "claude-3-5-haiku"
}
}