"""Cache management service for pipeline results and intermediate data.""" import json import hashlib import sys import os from datetime import datetime, timedelta from typing import Dict, Optional, Any from dataclasses import asdict # Add library path to import MemoryCache lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../../lib')) if lib_path not in sys.path: sys.path.insert(0, lib_path) try: from ai_assistant_lib.utils.helpers.cache import MemoryCache except ImportError: # Fallback to basic dict if library not available class MemoryCache: def __init__(self, default_ttl=3600, max_size=10000): self._cache = {} self.default_ttl = default_ttl async def get(self, key): return self._cache.get(key) async def set(self, key, value, ttl=None): self._cache[key] = value async def delete(self, key): return self._cache.pop(key, None) is not None async def clear(self): self._cache.clear() async def stats(self): return {"size": len(self._cache), "hit_rate": 0.0, "miss_rate": 0.0} class CacheManager: """Manages caching of pipeline results and intermediate data using AI Assistant Library.""" def __init__(self, default_ttl: int = 3600): """Initialize cache manager. Args: default_ttl: Default time-to-live for cache entries in seconds """ self.default_ttl = default_ttl self._cache = MemoryCache(default_ttl=default_ttl, max_size=10000) def _generate_key(self, prefix: str, identifier: str) -> str: """Generate cache key with prefix.""" return f"{prefix}:{identifier}" async def cache_pipeline_result(self, job_id: str, result: Any, ttl: Optional[int] = None) -> bool: """Cache pipeline result. Args: job_id: Pipeline job ID result: Pipeline result object ttl: Time-to-live in seconds (uses default if None) Returns: True if cached successfully """ try: key = self._generate_key("pipeline_result", job_id) # Convert result to dict if it's a dataclass if hasattr(result, '__dataclass_fields__'): result_data = asdict(result) else: result_data = result # MemoryCache handles TTL and expiration automatically await self._cache.set(key, result_data, ttl=ttl or self.default_ttl) return True except Exception as e: print(f"Failed to cache pipeline result: {e}") return False async def get_cached_pipeline_result(self, job_id: str) -> Optional[Dict[str, Any]]: """Get cached pipeline result. Args: job_id: Pipeline job ID Returns: Cached result data or None if not found/expired """ key = self._generate_key("pipeline_result", job_id) return await self._cache.get(key) async def cache_transcript(self, video_id: str, transcript: str, metadata: Dict[str, Any] = None, ttl: Optional[int] = None) -> bool: """Cache transcript data. Args: video_id: YouTube video ID transcript: Transcript text metadata: Optional metadata ttl: Time-to-live in seconds Returns: True if cached successfully """ try: key = self._generate_key("transcript", video_id) data = { "transcript": transcript, "metadata": metadata or {}, "video_id": video_id } await self._cache.set(key, data, ttl=ttl or self.default_ttl) return True except Exception as e: print(f"Failed to cache transcript: {e}") return False async def get_cached_transcript(self, video_id: str) -> Optional[Dict[str, Any]]: """Get cached transcript. Args: video_id: YouTube video ID Returns: Cached transcript data or None if not found/expired """ key = self._generate_key("transcript", video_id) return await self._cache.get(key) async def cache_video_metadata(self, video_id: str, metadata: Dict[str, Any], ttl: Optional[int] = None) -> bool: """Cache video metadata. Args: video_id: YouTube video ID metadata: Video metadata ttl: Time-to-live in seconds Returns: True if cached successfully """ try: key = self._generate_key("video_metadata", video_id) await self._cache.set(key, metadata, ttl=ttl or self.default_ttl) return True except Exception as e: print(f"Failed to cache video metadata: {e}") return False async def get_cached_video_metadata(self, video_id: str) -> Optional[Dict[str, Any]]: """Get cached video metadata. Args: video_id: YouTube video ID Returns: Cached metadata or None if not found/expired """ key = self._generate_key("video_metadata", video_id) return await self._cache.get(key) async def cache_summary(self, cache_key: str, summary_data: Dict[str, Any], ttl: Optional[int] = None) -> bool: """Cache summary data with custom key. Args: cache_key: Custom cache key (e.g., hash of transcript + config) summary_data: Summary result data ttl: Time-to-live in seconds Returns: True if cached successfully """ try: key = self._generate_key("summary", cache_key) await self._cache.set(key, summary_data, ttl=ttl or self.default_ttl) return True except Exception as e: print(f"Failed to cache summary: {e}") return False async def get_cached_summary(self, cache_key: str) -> Optional[Dict[str, Any]]: """Get cached summary data. Args: cache_key: Custom cache key Returns: Cached summary data or None if not found/expired """ key = self._generate_key("summary", cache_key) return await self._cache.get(key) def generate_summary_cache_key(self, video_id: str, config: Dict[str, Any]) -> str: """Generate cache key for summary based on video ID and configuration. Args: video_id: YouTube video ID config: Summary configuration Returns: Cache key string """ # Create deterministic key from video ID and config config_str = json.dumps(config, sort_keys=True) key_input = f"{video_id}:{config_str}" return hashlib.sha256(key_input.encode()).hexdigest()[:16] async def invalidate_video_cache(self, video_id: str) -> int: """Invalidate all cache entries for a video. Args: video_id: YouTube video ID Returns: Number of entries invalidated """ patterns = [ self._generate_key("transcript", video_id), self._generate_key("video_metadata", video_id), self._generate_key("pipeline_result", video_id) ] removed_count = 0 for key in patterns: if await self._cache.delete(key): removed_count += 1 # For summary keys with video_id, we'd need to scan all keys # This is a limitation of the current MemoryCache interface # In production, we'd use pattern-based invalidation return removed_count async def get_cache_stats(self) -> Dict[str, Any]: """Get cache statistics. Returns: Cache statistics dictionary """ stats = await self._cache.stats() return { "total_entries": stats.get("size", 0), "entries_by_type": {}, # Not available in current MemoryCache interface "default_ttl_seconds": self.default_ttl, "hit_rate": stats.get("hit_rate", 0.0), "miss_rate": stats.get("miss_rate", 0.0) } async def clear_cache(self) -> int: """Clear all cache entries. Returns: Number of entries cleared """ stats = await self._cache.stats() count = stats.get("size", 0) await self._cache.clear() return count