from pydantic import BaseModel, Field from typing import Optional, List, Dict, Any from datetime import datetime from enum import Enum class TranscriptSource(str, Enum): """Transcript source options for dual transcript functionality.""" YOUTUBE = "youtube" WHISPER = "whisper" BOTH = "both" class ExtractionMethod(str, Enum): YOUTUBE_API = "youtube_api" AUTO_CAPTIONS = "auto_captions" WHISPER_AUDIO = "whisper_audio" WHISPER_API = "whisper_api" MOCK = "mock" FAILED = "failed" class TranscriptSegment(BaseModel): text: str start: float duration: float @property def end(self) -> float: return self.start + self.duration class TranscriptMetadata(BaseModel): word_count: int estimated_reading_time: int # in seconds language: str has_timestamps: bool extraction_method: ExtractionMethod processing_time_seconds: float class TranscriptChunk(BaseModel): chunk_index: int text: str start_time: Optional[float] = None end_time: Optional[float] = None token_count: int class TranscriptResult(BaseModel): video_id: str transcript: Optional[str] = None segments: Optional[List[TranscriptSegment]] = None metadata: Optional[TranscriptMetadata] = None method: ExtractionMethod success: bool from_cache: bool = False error: Optional[Dict[str, Any]] = None class TranscriptRequest(BaseModel): video_id: str = Field(..., description="YouTube video ID") language_preference: str = Field("en", description="Preferred transcript language") include_metadata: bool = Field(True, description="Include transcript metadata") class TranscriptResponse(BaseModel): video_id: str transcript: Optional[str] = None segments: Optional[List[TranscriptSegment]] = None metadata: Optional[TranscriptMetadata] = None extraction_method: str language: str word_count: int cached: bool processing_time_seconds: float error: Optional[Dict[str, Any]] = None class JobResponse(BaseModel): job_id: str status: str message: str class JobStatusResponse(BaseModel): job_id: str status: str # "pending", "processing", "completed", "failed" progress_percentage: int current_step: Optional[str] = None result: Optional[TranscriptResponse] = None error: Optional[Dict[str, Any]] = None # Dual Transcript Models for Enhanced Functionality class DualTranscriptSegment(BaseModel): """Enhanced transcript segment with confidence and speaker info.""" start_time: float end_time: float text: str confidence: Optional[float] = None speaker: Optional[str] = None @property def duration(self) -> float: """Get duration of the segment in seconds.""" return self.end_time - self.start_time class DualTranscriptMetadata(BaseModel): """Enhanced metadata for dual transcript functionality.""" video_id: str language: str word_count: int total_segments: int has_timestamps: bool extraction_method: str processing_time_seconds: float = 0.0 quality_score: float = 0.0 confidence_score: float = 0.0 estimated_reading_time_minutes: Optional[float] = None def model_post_init(self, __context): """Calculate derived fields after initialization.""" if self.estimated_reading_time_minutes is None: # Average reading speed: 200 words per minute self.estimated_reading_time_minutes = self.word_count / 200.0 class TranscriptComparison(BaseModel): """Comparison metrics between two transcripts.""" word_count_difference: int similarity_score: float # 0-1 scale punctuation_improvement_score: float # 0-1 scale capitalization_improvement_score: float # 0-1 scale processing_time_ratio: float # whisper_time / youtube_time quality_difference: float # whisper_quality - youtube_quality confidence_difference: float # whisper_confidence - youtube_confidence recommendation: str # "youtube", "whisper", or "both" significant_differences: List[str] technical_terms_improved: List[str] class DualTranscriptResult(BaseModel): """Result from dual transcript extraction.""" video_id: str source: TranscriptSource youtube_transcript: Optional[List[DualTranscriptSegment]] = None youtube_metadata: Optional[DualTranscriptMetadata] = None whisper_transcript: Optional[List[DualTranscriptSegment]] = None whisper_metadata: Optional[DualTranscriptMetadata] = None comparison: Optional[TranscriptComparison] = None processing_time_seconds: float success: bool error: Optional[str] = None @property def has_youtube(self) -> bool: """Check if YouTube transcript is available.""" return self.youtube_transcript is not None and len(self.youtube_transcript) > 0 @property def has_whisper(self) -> bool: """Check if Whisper transcript is available.""" return self.whisper_transcript is not None and len(self.whisper_transcript) > 0 @property def has_comparison(self) -> bool: """Check if comparison data is available.""" return self.comparison is not None def get_transcript(self, source: str) -> Optional[List[DualTranscriptSegment]]: """Get transcript by source name.""" if source == "youtube": return self.youtube_transcript elif source == "whisper": return self.whisper_transcript else: return None def get_metadata(self, source: str) -> Optional[DualTranscriptMetadata]: """Get metadata by source name.""" if source == "youtube": return self.youtube_metadata elif source == "whisper": return self.whisper_metadata else: return None class DualTranscriptRequest(BaseModel): """Request model for dual transcript extraction.""" video_url: str transcript_source: TranscriptSource whisper_model_size: str = "small" # For Whisper: tiny, base, small, medium, large include_metadata: bool = True include_comparison: bool = True # Only relevant when source is BOTH class ProcessingTimeEstimate(BaseModel): """Processing time estimates for different transcript sources.""" youtube_seconds: Optional[float] = None whisper_seconds: Optional[float] = None total_seconds: Optional[float] = None estimated_completion: Optional[str] = None # ISO timestamp # Response models for API class DualTranscriptResponse(BaseModel): """API response for dual transcript extraction.""" video_id: str source: TranscriptSource youtube_transcript: Optional[List[DualTranscriptSegment]] = None youtube_metadata: Optional[DualTranscriptMetadata] = None whisper_transcript: Optional[List[DualTranscriptSegment]] = None whisper_metadata: Optional[DualTranscriptMetadata] = None comparison: Optional[TranscriptComparison] = None processing_time_seconds: float success: bool error: Optional[str] = None has_youtube: bool = False has_whisper: bool = False has_comparison: bool = False def model_post_init(self, __context): """Calculate derived properties after initialization.""" self.has_youtube = self.youtube_transcript is not None and len(self.youtube_transcript) > 0 self.has_whisper = self.whisper_transcript is not None and len(self.whisper_transcript) > 0 self.has_comparison = self.comparison is not None