""" Enhanced Transcript Service with local video file support. Integrates with VideoDownloadService for local file-based transcription. """ import asyncio import logging from typing import Optional, Dict, Any from pathlib import Path import json from backend.models.transcript import ( TranscriptResult, TranscriptMetadata, TranscriptSegment, ExtractionMethod ) from backend.core.exceptions import ( TranscriptExtractionError, ErrorCode ) from backend.services.transcript_service import TranscriptService from backend.services.video_download_service import VideoDownloadService, VideoDownloadError from backend.services.mock_cache import MockCacheClient logger = logging.getLogger(__name__) class MockWhisperService: """Mock Whisper service for local audio transcription.""" def __init__(self): self.model_name = "base" self.language = "en" async def transcribe_audio(self, audio_path: Path) -> Dict[str, Any]: """ Mock transcription of audio file. In production, this would use OpenAI Whisper or similar. Args: audio_path: Path to audio file Returns: Transcription result with segments """ await asyncio.sleep(1.0) # Simulate processing time # Generate mock transcript based on file video_id = audio_path.stem return { "text": f"""[Transcribed from local audio: {audio_path.name}] This is a high-quality transcription from the downloaded video. Local transcription provides better accuracy than online methods. The video discusses important topics including: - Advanced machine learning techniques - Modern software architecture patterns - Best practices for scalable applications - Performance optimization strategies Using local files ensures we can process videos even if they're removed from YouTube, and we get consistent quality across all transcriptions. This mock transcript demonstrates the enhanced capabilities of local processing, which would include proper timestamps and speaker detection in production.""", "segments": [ { "text": "This is a high-quality transcription from the downloaded video.", "start": 0.0, "end": 4.0 }, { "text": "Local transcription provides better accuracy than online methods.", "start": 4.0, "end": 8.0 }, { "text": "The video discusses important topics including advanced machine learning techniques.", "start": 8.0, "end": 13.0 } ], "language": "en", "duration": 120.0 # Mock duration } class EnhancedTranscriptService(TranscriptService): """ Enhanced transcript service that prioritizes local video files. Extraction priority: 1. Check for locally downloaded video/audio files 2. Fall back to YouTube Transcript API 3. Download video and extract audio if needed 4. Use Whisper for transcription """ def __init__( self, video_service: Optional[VideoDownloadService] = None, cache_client: Optional[MockCacheClient] = None, whisper_service: Optional[MockWhisperService] = None ): """ Initialize enhanced transcript service. Args: video_service: Video download service for local files cache_client: Cache client for transcript caching whisper_service: Whisper service for local transcription """ super().__init__(cache_client=cache_client) self.video_service = video_service or VideoDownloadService() self.whisper_service = whisper_service or MockWhisperService() # Update success rates to prefer local files self._method_success_rates = { "local_file": 0.95, # 95% success with local files "youtube_api": 0.7, # 70% success with YouTube API "auto_captions": 0.5, # 50% success with auto-captions "whisper_download": 0.9 # 90% success with download + Whisper } def _extract_video_id_from_url(self, url: str) -> str: """Extract video ID from YouTube URL.""" # Simple extraction for common YouTube URL formats if "youtube.com/watch?v=" in url: return url.split("v=")[1].split("&")[0] elif "youtu.be/" in url: return url.split("youtu.be/")[1].split("?")[0] else: # Assume it's already a video ID return url async def extract_transcript( self, video_id_or_url: str, language_preference: str = "en", force_download: bool = False ) -> TranscriptResult: """ Extract transcript with local file priority. Args: video_id_or_url: YouTube video ID or URL language_preference: Preferred language for transcript force_download: Force download even if online methods work Returns: TranscriptResult with transcript and metadata """ # Determine if input is URL or video ID if "youtube.com" in video_id_or_url or "youtu.be" in video_id_or_url: url = video_id_or_url video_id = self._extract_video_id_from_url(url) else: video_id = video_id_or_url url = f"https://www.youtube.com/watch?v={video_id}" # Check cache first cache_key = f"transcript:{video_id}:{language_preference}" cached_result = await self.cache_client.get(cache_key) if cached_result: logger.info(f"Transcript cache hit for {video_id}") return TranscriptResult.model_validate(json.loads(cached_result)) # Try local file first if available if self.video_service.is_video_downloaded(video_id): logger.info(f"Using local files for transcript extraction: {video_id}") local_result = await self._extract_from_local_video(video_id) if local_result: await self.cache_client.set(cache_key, local_result.model_dump_json(), ttl=86400) return local_result # If force_download, download the video first if force_download: logger.info(f"Force downloading video for transcription: {video_id}") download_result = await self._download_and_transcribe(url, video_id) if download_result: await self.cache_client.set(cache_key, download_result.model_dump_json(), ttl=86400) return download_result # Try YouTube API methods (from parent class) try: logger.info(f"Attempting YouTube API transcript extraction for {video_id}") api_result = await super().extract_transcript(video_id, language_preference) # Cache the result await self.cache_client.set(cache_key, api_result.model_dump_json(), ttl=86400) return api_result except TranscriptExtractionError as e: logger.warning(f"YouTube API methods failed: {e}") # As last resort, download video and transcribe logger.info(f"Falling back to download and transcribe for {video_id}") download_result = await self._download_and_transcribe(url, video_id) if download_result: await self.cache_client.set(cache_key, download_result.model_dump_json(), ttl=86400) return download_result # If all methods fail, raise error raise TranscriptExtractionError( message="Unable to extract transcript through any method", error_code=ErrorCode.TRANSCRIPT_UNAVAILABLE, details={ "video_id": video_id, "attempted_methods": [ "local_file", "youtube_api", "auto_captions", "download_and_transcribe" ], "suggestions": [ "Check if video is available and public", "Try again later", "Enable captions on the video" ] } ) async def _extract_from_local_video(self, video_id: str) -> Optional[TranscriptResult]: """ Extract transcript from locally stored video/audio files. Args: video_id: YouTube video ID Returns: TranscriptResult or None if extraction fails """ try: # Get cached video info video_hash = self.video_service._get_video_hash(video_id) cached_info = self.video_service.cache.get(video_hash) if not cached_info: logger.warning(f"No cache info for downloaded video {video_id}") return None # Check for audio file audio_path = cached_info.get('audio_path') if audio_path: audio_file = Path(audio_path) if audio_file.exists(): logger.info(f"Transcribing from local audio: {audio_file}") # Transcribe using Whisper transcription = await self.whisper_service.transcribe_audio(audio_file) # Convert to TranscriptResult segments = [ TranscriptSegment( text=seg["text"], start=seg["start"], duration=seg["end"] - seg["start"] ) for seg in transcription.get("segments", []) ] metadata = TranscriptMetadata( language=transcription.get("language", "en"), duration=transcription.get("duration", 0), word_count=len(transcription["text"].split()), has_timestamps=bool(segments) ) return TranscriptResult( video_id=video_id, transcript=transcription["text"], segments=segments, metadata=metadata, method=ExtractionMethod.WHISPER_AUDIO, language=transcription.get("language", "en"), success=True, from_cache=False, processing_time=1.0 # Mock processing time ) # If no audio file, check for video file video_path = cached_info.get('video_path') if video_path: video_file = Path(video_path) if video_file.exists(): logger.info(f"Video found but no audio extracted yet: {video_file}") # Could extract audio here if needed return None return None except Exception as e: logger.error(f"Error extracting from local video {video_id}: {e}") return None async def _download_and_transcribe(self, url: str, video_id: str) -> Optional[TranscriptResult]: """ Download video and transcribe the audio. Args: url: YouTube URL video_id: Video ID Returns: TranscriptResult or None if fails """ try: logger.info(f"Downloading video for transcription: {video_id}") # Download video with audio extraction video_path, audio_path = await self.video_service.download_video( url=url, extract_audio=True, force=False ) if audio_path and audio_path.exists(): logger.info(f"Audio extracted, transcribing: {audio_path}") # Transcribe using Whisper transcription = await self.whisper_service.transcribe_audio(audio_path) # Convert to TranscriptResult segments = [ TranscriptSegment( text=seg["text"], start=seg["start"], duration=seg["end"] - seg["start"] ) for seg in transcription.get("segments", []) ] metadata = TranscriptMetadata( language=transcription.get("language", "en"), duration=transcription.get("duration", 0), word_count=len(transcription["text"].split()), has_timestamps=bool(segments) ) return TranscriptResult( video_id=video_id, transcript=transcription["text"], segments=segments, metadata=metadata, method=ExtractionMethod.WHISPER_AUDIO, language=transcription.get("language", "en"), success=True, from_cache=False, processing_time=2.0 # Mock processing time ) logger.warning(f"Download succeeded but no audio extracted for {video_id}") return None except VideoDownloadError as e: logger.error(f"Failed to download video {video_id}: {e}") return None except Exception as e: logger.error(f"Error in download and transcribe for {video_id}: {e}") return None async def get_transcript_with_priority( self, video_id: str, prefer_local: bool = True, download_if_missing: bool = False ) -> TranscriptResult: """ Get transcript with configurable priority. Args: video_id: YouTube video ID prefer_local: Prefer local files over API download_if_missing: Download video if not available locally Returns: TranscriptResult """ url = f"https://www.youtube.com/watch?v={video_id}" if prefer_local and self.video_service.is_video_downloaded(video_id): # Try local first local_result = await self._extract_from_local_video(video_id) if local_result: return local_result # Try API methods try: return await super().extract_transcript(video_id) except TranscriptExtractionError: if download_if_missing: # Download and transcribe download_result = await self._download_and_transcribe(url, video_id) if download_result: return download_result raise def get_extraction_stats(self) -> Dict[str, Any]: """Get statistics about extraction methods and success rates.""" return { "method_success_rates": self._method_success_rates, "cached_videos": len(self.video_service.cache), "total_storage_mb": self.video_service.get_storage_stats()['total_size_mb'], "preferred_method": "local_file" if self.video_service.cache else "youtube_api" }