""" Data models for the YouTube Summarizer SDK """ from typing import List, Optional, Dict, Any, Union from datetime import datetime from enum import Enum from dataclasses import dataclass, field from pydantic import BaseModel, HttpUrl, Field, validator class TranscriptSource(str, Enum): """Transcript source options""" YOUTUBE = "youtube" WHISPER = "whisper" BOTH = "both" class WhisperModelSize(str, Enum): """Whisper model size options""" TINY = "tiny" BASE = "base" SMALL = "small" MEDIUM = "medium" LARGE = "large" class ProcessingPriority(str, Enum): """Processing priority levels""" LOW = "low" NORMAL = "normal" HIGH = "high" URGENT = "urgent" class JobStatus(str, Enum): """Job processing status""" QUEUED = "queued" PROCESSING = "processing" COMPLETED = "completed" FAILED = "failed" CANCELLED = "cancelled" class WebhookEvent(str, Enum): """Webhook event types""" JOB_STARTED = "job.started" JOB_PROGRESS = "job.progress" JOB_COMPLETED = "job.completed" JOB_FAILED = "job.failed" BATCH_COMPLETED = "batch.completed" # Request Models class TranscriptRequest(BaseModel): """Request model for transcript extraction""" video_url: HttpUrl = Field(..., description="YouTube video URL") transcript_source: TranscriptSource = Field( default=TranscriptSource.YOUTUBE, description="Transcript source" ) whisper_model_size: Optional[WhisperModelSize] = Field( default=WhisperModelSize.SMALL, description="Whisper model size for AI transcription" ) priority: ProcessingPriority = Field( default=ProcessingPriority.NORMAL, description="Processing priority" ) webhook_url: Optional[HttpUrl] = Field( None, description="Webhook URL for notifications" ) include_quality_analysis: bool = Field( default=True, description="Include transcript quality analysis" ) custom_prompt: Optional[str] = Field( None, description="Custom processing prompt" ) tags: List[str] = Field( default_factory=list, description="Custom tags for organization" ) @validator('video_url') def validate_youtube_url(cls, v): """Validate YouTube URL format""" url_str = str(v) valid_patterns = [ 'youtube.com/watch', 'youtu.be/', 'youtube.com/embed', 'm.youtube.com/watch' ] if not any(pattern in url_str for pattern in valid_patterns): raise ValueError('Invalid YouTube URL format') return v class BatchProcessingRequest(BaseModel): """Request model for batch processing""" video_urls: List[HttpUrl] = Field( ..., min_items=1, max_items=1000, description="List of YouTube video URLs" ) batch_name: str = Field(..., description="Batch job name") transcript_source: TranscriptSource = Field( default=TranscriptSource.YOUTUBE, description="Transcript source for all videos" ) priority: ProcessingPriority = Field( default=ProcessingPriority.NORMAL, description="Processing priority" ) webhook_url: Optional[HttpUrl] = Field( None, description="Webhook URL for batch notifications" ) parallel_processing: bool = Field( default=False, description="Enable parallel processing" ) max_concurrent_jobs: int = Field( default=5, ge=1, le=50, description="Maximum concurrent jobs" ) # Response Models class JobResponse(BaseModel): """Response model for job creation""" job_id: str status: JobStatus priority: ProcessingPriority created_at: datetime estimated_completion: Optional[datetime] progress_percentage: float current_stage: str webhook_url: Optional[str] metadata: Dict[str, Any] class BatchJobResponse(BaseModel): """Response model for batch job creation""" batch_id: str status: JobStatus video_count: int priority: ProcessingPriority estimated_completion: Optional[datetime] parallel_processing: bool webhook_url: Optional[str] metadata: Dict[str, Any] class TranscriptResult(BaseModel): """Transcript extraction result""" job_id: str video_id: str video_url: str transcript_source: TranscriptSource transcript: Optional[str] segments: Optional[List[Dict[str, Any]]] processing_time_seconds: float quality_score: Optional[float] confidence_score: Optional[float] metadata: Dict[str, Any] class QualityComparison(BaseModel): """Quality comparison between transcript sources""" similarity_score: float punctuation_improvement_score: float capitalization_improvement_score: float technical_terms_improved: List[str] recommendation: TranscriptSource class DualTranscriptResult(BaseModel): """Result for dual transcript extraction""" job_id: str video_id: str video_url: str youtube_transcript: Optional[str] whisper_transcript: Optional[str] youtube_segments: Optional[List[Dict[str, Any]]] whisper_segments: Optional[List[Dict[str, Any]]] quality_comparison: Optional[QualityComparison] processing_time_seconds: float metadata: Dict[str, Any] class APIUsageStats(BaseModel): """API usage statistics""" total_requests: int requests_today: int requests_this_month: int average_response_time_ms: float success_rate: float rate_limit_remaining: int quota_reset_time: datetime class ProcessingTimeEstimate(BaseModel): """Processing time estimate""" estimated_time_seconds: float estimated_cost: Optional[float] factors: Optional[Dict[str, float]] # Webhook Models class WebhookPayload(BaseModel): """Webhook notification payload""" event: WebhookEvent timestamp: datetime data: Dict[str, Any] @dataclass class SDKConfig: """SDK configuration""" api_key: str base_url: str = "https://api.youtube-summarizer.com" timeout: float = 60.0 max_retries: int = 3 retry_delay: float = 1.0 verify_ssl: bool = True user_agent: str = field(default_factory=lambda: f"youtube-summarizer-python-sdk/4.2.0") @dataclass class WebSocketConfig: """WebSocket configuration for real-time updates""" url: str = "wss://api.youtube-summarizer.com/ws" auto_reconnect: bool = True max_reconnect_attempts: int = 5 heartbeat_interval: float = 30.0 reconnect_delay: float = 5.0 # MCP-specific models class MCPToolRequest(BaseModel): """MCP tool request model""" name: str arguments: Dict[str, Any] class MCPToolResult(BaseModel): """MCP tool result model""" content: List[Dict[str, Any]] is_error: bool = False class MCPResourceRequest(BaseModel): """MCP resource request model""" uri: str class MCPResourceResult(BaseModel): """MCP resource result model""" contents: List[Dict[str, Any]]