197 lines
7.3 KiB
Python
197 lines
7.3 KiB
Python
"""Service for managing file-based summary storage."""
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
from typing import List, Dict, Optional, Any
|
|
from datetime import datetime
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SummaryStorageService:
|
|
"""Service for managing summary files in the file system."""
|
|
|
|
def __init__(self, base_storage_path: str = "video_storage/summaries"):
|
|
self.base_path = Path(base_storage_path)
|
|
self.base_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
def get_video_summary_dir(self, video_id: str) -> Path:
|
|
"""Get the directory path for a video's summaries."""
|
|
return self.base_path / video_id
|
|
|
|
def list_summaries(self, video_id: str) -> List[Dict[str, Any]]:
|
|
"""List all summaries for a given video ID."""
|
|
video_dir = self.get_video_summary_dir(video_id)
|
|
|
|
if not video_dir.exists():
|
|
return []
|
|
|
|
summaries = []
|
|
|
|
# Find all JSON summary files
|
|
summary_files = list(video_dir.glob("summary_*.json"))
|
|
|
|
for summary_file in sorted(summary_files):
|
|
try:
|
|
with open(summary_file, 'r', encoding='utf-8') as f:
|
|
summary_data = json.load(f)
|
|
|
|
# Add file metadata
|
|
file_stat = summary_file.stat()
|
|
summary_data.update({
|
|
"file_path": str(summary_file.relative_to(self.base_path)),
|
|
"file_size_bytes": file_stat.st_size,
|
|
"file_created_at": datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
|
|
"file_modified_at": datetime.fromtimestamp(file_stat.st_mtime).isoformat()
|
|
})
|
|
|
|
summaries.append(summary_data)
|
|
|
|
except (json.JSONDecodeError, KeyError, OSError) as e:
|
|
logger.warning(f"Failed to load summary file {summary_file}: {e}")
|
|
continue
|
|
|
|
# Sort by generated_at timestamp, most recent first
|
|
summaries.sort(
|
|
key=lambda x: x.get('generated_at', '1970-01-01T00:00:00'),
|
|
reverse=True
|
|
)
|
|
|
|
return summaries
|
|
|
|
def get_summary(self, video_id: str, timestamp: str) -> Optional[Dict[str, Any]]:
|
|
"""Get a specific summary by video ID and timestamp."""
|
|
video_dir = self.get_video_summary_dir(video_id)
|
|
|
|
# Try to find the summary file by timestamp
|
|
summary_file = video_dir / f"summary_{timestamp}.json"
|
|
|
|
if not summary_file.exists():
|
|
# If exact timestamp not found, try to find by partial match
|
|
matching_files = list(video_dir.glob(f"summary_*{timestamp}*.json"))
|
|
if not matching_files:
|
|
return None
|
|
summary_file = matching_files[0]
|
|
|
|
try:
|
|
with open(summary_file, 'r', encoding='utf-8') as f:
|
|
summary_data = json.load(f)
|
|
|
|
# Add file metadata
|
|
file_stat = summary_file.stat()
|
|
summary_data.update({
|
|
"file_path": str(summary_file.relative_to(self.base_path)),
|
|
"file_size_bytes": file_stat.st_size,
|
|
"file_created_at": datetime.fromtimestamp(file_stat.st_ctime).isoformat(),
|
|
"file_modified_at": datetime.fromtimestamp(file_stat.st_mtime).isoformat()
|
|
})
|
|
|
|
return summary_data
|
|
|
|
except (json.JSONDecodeError, KeyError, OSError) as e:
|
|
logger.error(f"Failed to load summary file {summary_file}: {e}")
|
|
return None
|
|
|
|
def save_summary(
|
|
self,
|
|
video_id: str,
|
|
summary_data: Dict[str, Any],
|
|
timestamp: Optional[str] = None
|
|
) -> str:
|
|
"""Save a summary to the file system."""
|
|
video_dir = self.get_video_summary_dir(video_id)
|
|
video_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Generate timestamp if not provided
|
|
if not timestamp:
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
|
summary_file = video_dir / f"summary_{timestamp}.json"
|
|
|
|
# Ensure video_id and generated_at are set
|
|
summary_data["video_id"] = video_id
|
|
if "generated_at" not in summary_data:
|
|
summary_data["generated_at"] = datetime.now().isoformat()
|
|
|
|
try:
|
|
with open(summary_file, 'w', encoding='utf-8') as f:
|
|
json.dump(summary_data, f, indent=2, ensure_ascii=False)
|
|
|
|
logger.info(f"Saved summary for video {video_id} to {summary_file}")
|
|
return str(summary_file.relative_to(self.base_path))
|
|
|
|
except OSError as e:
|
|
logger.error(f"Failed to save summary file {summary_file}: {e}")
|
|
raise
|
|
|
|
def delete_summary(self, video_id: str, timestamp: str) -> bool:
|
|
"""Delete a specific summary file."""
|
|
video_dir = self.get_video_summary_dir(video_id)
|
|
summary_file = video_dir / f"summary_{timestamp}.json"
|
|
|
|
try:
|
|
if summary_file.exists():
|
|
summary_file.unlink()
|
|
logger.info(f"Deleted summary file {summary_file}")
|
|
|
|
# Clean up directory if empty
|
|
if video_dir.exists() and not any(video_dir.iterdir()):
|
|
video_dir.rmdir()
|
|
logger.info(f"Removed empty directory {video_dir}")
|
|
|
|
return True
|
|
else:
|
|
logger.warning(f"Summary file {summary_file} not found")
|
|
return False
|
|
|
|
except OSError as e:
|
|
logger.error(f"Failed to delete summary file {summary_file}: {e}")
|
|
return False
|
|
|
|
def get_videos_with_summaries(self) -> List[str]:
|
|
"""Get list of video IDs that have summaries."""
|
|
if not self.base_path.exists():
|
|
return []
|
|
|
|
video_ids = []
|
|
|
|
for video_dir in self.base_path.iterdir():
|
|
if video_dir.is_dir():
|
|
# Check if directory has any summary files
|
|
summary_files = list(video_dir.glob("summary_*.json"))
|
|
if summary_files:
|
|
video_ids.append(video_dir.name)
|
|
|
|
return sorted(video_ids)
|
|
|
|
def get_summary_stats(self) -> Dict[str, Any]:
|
|
"""Get statistics about stored summaries."""
|
|
video_ids = self.get_videos_with_summaries()
|
|
|
|
total_summaries = 0
|
|
total_size_bytes = 0
|
|
model_counts = {}
|
|
|
|
for video_id in video_ids:
|
|
summaries = self.list_summaries(video_id)
|
|
total_summaries += len(summaries)
|
|
|
|
for summary in summaries:
|
|
total_size_bytes += summary.get("file_size_bytes", 0)
|
|
model = summary.get("model", "unknown")
|
|
model_counts[model] = model_counts.get(model, 0) + 1
|
|
|
|
return {
|
|
"total_videos_with_summaries": len(video_ids),
|
|
"total_summaries": total_summaries,
|
|
"total_size_bytes": total_size_bytes,
|
|
"total_size_mb": round(total_size_bytes / (1024 * 1024), 2),
|
|
"model_distribution": model_counts,
|
|
"video_ids": video_ids
|
|
}
|
|
|
|
|
|
# Global instance
|
|
storage_service = SummaryStorageService() |