"""Service for managing file-based summary storage.""" import json import os from pathlib import Path from typing import List, Dict, Optional, Any from datetime import datetime import logging logger = logging.getLogger(__name__) class SummaryStorageService: """Service for managing summary files in the file system.""" def __init__(self, base_storage_path: str = "video_storage/summaries"): self.base_path = Path(base_storage_path) self.base_path.mkdir(parents=True, exist_ok=True) def get_video_summary_dir(self, video_id: str) -> Path: """Get the directory path for a video's summaries.""" return self.base_path / video_id def list_summaries(self, video_id: str) -> List[Dict[str, Any]]: """List all summaries for a given video ID.""" video_dir = self.get_video_summary_dir(video_id) if not video_dir.exists(): return [] summaries = [] # Find all JSON summary files summary_files = list(video_dir.glob("summary_*.json")) for summary_file in sorted(summary_files): try: with open(summary_file, 'r', encoding='utf-8') as f: summary_data = json.load(f) # Add file metadata file_stat = summary_file.stat() summary_data.update({ "file_path": str(summary_file.relative_to(self.base_path)), "file_size_bytes": file_stat.st_size, "file_created_at": datetime.fromtimestamp(file_stat.st_ctime).isoformat(), "file_modified_at": datetime.fromtimestamp(file_stat.st_mtime).isoformat() }) summaries.append(summary_data) except (json.JSONDecodeError, KeyError, OSError) as e: logger.warning(f"Failed to load summary file {summary_file}: {e}") continue # Sort by generated_at timestamp, most recent first summaries.sort( key=lambda x: x.get('generated_at', '1970-01-01T00:00:00'), reverse=True ) return summaries def get_summary(self, video_id: str, timestamp: str) -> Optional[Dict[str, Any]]: """Get a specific summary by video ID and timestamp.""" video_dir = self.get_video_summary_dir(video_id) # Try to find the summary file by timestamp summary_file = video_dir / f"summary_{timestamp}.json" if not summary_file.exists(): # If exact timestamp not found, try to find by partial match matching_files = list(video_dir.glob(f"summary_*{timestamp}*.json")) if not matching_files: return None summary_file = matching_files[0] try: with open(summary_file, 'r', encoding='utf-8') as f: summary_data = json.load(f) # Add file metadata file_stat = summary_file.stat() summary_data.update({ "file_path": str(summary_file.relative_to(self.base_path)), "file_size_bytes": file_stat.st_size, "file_created_at": datetime.fromtimestamp(file_stat.st_ctime).isoformat(), "file_modified_at": datetime.fromtimestamp(file_stat.st_mtime).isoformat() }) return summary_data except (json.JSONDecodeError, KeyError, OSError) as e: logger.error(f"Failed to load summary file {summary_file}: {e}") return None def save_summary( self, video_id: str, summary_data: Dict[str, Any], timestamp: Optional[str] = None ) -> str: """Save a summary to the file system.""" video_dir = self.get_video_summary_dir(video_id) video_dir.mkdir(parents=True, exist_ok=True) # Generate timestamp if not provided if not timestamp: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") summary_file = video_dir / f"summary_{timestamp}.json" # Ensure video_id and generated_at are set summary_data["video_id"] = video_id if "generated_at" not in summary_data: summary_data["generated_at"] = datetime.now().isoformat() try: with open(summary_file, 'w', encoding='utf-8') as f: json.dump(summary_data, f, indent=2, ensure_ascii=False) logger.info(f"Saved summary for video {video_id} to {summary_file}") return str(summary_file.relative_to(self.base_path)) except OSError as e: logger.error(f"Failed to save summary file {summary_file}: {e}") raise def delete_summary(self, video_id: str, timestamp: str) -> bool: """Delete a specific summary file.""" video_dir = self.get_video_summary_dir(video_id) summary_file = video_dir / f"summary_{timestamp}.json" try: if summary_file.exists(): summary_file.unlink() logger.info(f"Deleted summary file {summary_file}") # Clean up directory if empty if video_dir.exists() and not any(video_dir.iterdir()): video_dir.rmdir() logger.info(f"Removed empty directory {video_dir}") return True else: logger.warning(f"Summary file {summary_file} not found") return False except OSError as e: logger.error(f"Failed to delete summary file {summary_file}: {e}") return False def get_videos_with_summaries(self) -> List[str]: """Get list of video IDs that have summaries.""" if not self.base_path.exists(): return [] video_ids = [] for video_dir in self.base_path.iterdir(): if video_dir.is_dir(): # Check if directory has any summary files summary_files = list(video_dir.glob("summary_*.json")) if summary_files: video_ids.append(video_dir.name) return sorted(video_ids) def get_summary_stats(self) -> Dict[str, Any]: """Get statistics about stored summaries.""" video_ids = self.get_videos_with_summaries() total_summaries = 0 total_size_bytes = 0 model_counts = {} for video_id in video_ids: summaries = self.list_summaries(video_id) total_summaries += len(summaries) for summary in summaries: total_size_bytes += summary.get("file_size_bytes", 0) model = summary.get("model", "unknown") model_counts[model] = model_counts.get(model, 0) + 1 return { "total_videos_with_summaries": len(video_ids), "total_summaries": total_summaries, "total_size_bytes": total_size_bytes, "total_size_mb": round(total_size_bytes / (1024 * 1024), 2), "model_distribution": model_counts, "video_ids": video_ids } # Global instance storage_service = SummaryStorageService()