"""Unified database storage service for summaries.""" import json import logging from datetime import datetime from typing import List, Dict, Optional, Any from sqlalchemy import create_engine, desc from sqlalchemy.orm import sessionmaker, Session from sqlalchemy.exc import SQLAlchemyError from backend.core.config import settings from backend.core.database_registry import registry from backend.models import Summary from backend.models.pipeline import PipelineResult logger = logging.getLogger(__name__) class DatabaseStorageService: """Unified storage service for summaries using SQLite database.""" def __init__(self): """Initialize database connection.""" self.engine = create_engine(settings.DATABASE_URL) registry.create_all_tables(self.engine) self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine) logger.info("DatabaseStorageService initialized with database: %s", settings.DATABASE_URL) def get_session(self) -> Session: """Get a database session.""" return self.SessionLocal() def save_summary_from_pipeline(self, pipeline_result: PipelineResult) -> Summary: """Save pipeline result to database. Args: pipeline_result: Completed pipeline result Returns: Saved Summary model instance """ with self.get_session() as session: try: # Extract data from pipeline result summary_content = "" key_points = [] main_themes = [] if pipeline_result.summary: if isinstance(pipeline_result.summary, dict): summary_content = pipeline_result.summary.get('content', '') key_points = pipeline_result.summary.get('key_points', []) main_themes = pipeline_result.summary.get('main_themes', []) else: summary_content = str(pipeline_result.summary) # Extract quality score quality_score = None if pipeline_result.quality_metrics: quality_score = pipeline_result.quality_metrics.overall_score # Create Summary instance summary = Summary( video_id=pipeline_result.video_id, video_url=pipeline_result.video_url, video_title=pipeline_result.metadata.get('title') if pipeline_result.metadata else None, channel_name=pipeline_result.metadata.get('channel') if pipeline_result.metadata else None, video_duration=pipeline_result.metadata.get('duration_seconds') if pipeline_result.metadata else None, transcript=pipeline_result.transcript, summary=summary_content, key_points=key_points, main_themes=main_themes, model_used=pipeline_result.model_used or 'deepseek', processing_time=pipeline_result.processing_time, quality_score=quality_score, summary_length=pipeline_result.config.summary_length if pipeline_result.config else 'standard', focus_areas=pipeline_result.config.focus_areas if pipeline_result.config else [], source='frontend', # Mark as created via frontend/API job_id=pipeline_result.job_id, created_at=datetime.utcnow() ) session.add(summary) session.commit() session.refresh(summary) logger.info(f"Saved summary {summary.id} for video {summary.video_id}") return summary except SQLAlchemyError as e: logger.error(f"Database error saving summary: {e}") session.rollback() raise except Exception as e: logger.error(f"Error saving summary: {e}") session.rollback() raise def save_summary_from_dict(self, summary_data: Dict[str, Any]) -> Summary: """Save summary from dictionary (for CLI compatibility). Args: summary_data: Dictionary containing summary data Returns: Saved Summary model instance """ with self.get_session() as session: try: # Ensure required fields have defaults summary_data.setdefault('source', 'cli') summary_data.setdefault('created_at', datetime.utcnow()) # Handle list fields that might be strings for field in ['key_points', 'main_themes', 'focus_areas']: if field in summary_data and isinstance(summary_data[field], str): try: summary_data[field] = json.loads(summary_data[field]) except json.JSONDecodeError: summary_data[field] = [] summary = Summary(**summary_data) session.add(summary) session.commit() session.refresh(summary) logger.info(f"Saved summary {summary.id} from dict") return summary except SQLAlchemyError as e: logger.error(f"Database error saving summary from dict: {e}") session.rollback() raise def get_summary(self, summary_id: str) -> Optional[Summary]: """Get a specific summary by ID. Args: summary_id: UUID of the summary Returns: Summary instance or None if not found """ with self.get_session() as session: return session.query(Summary).filter_by(id=summary_id).first() def get_summary_by_video(self, video_id: str) -> List[Summary]: """Get all summaries for a specific video ID. Args: video_id: YouTube video ID Returns: List of Summary instances """ with self.get_session() as session: return session.query(Summary).filter_by(video_id=video_id).order_by(desc(Summary.created_at)).all() def list_summaries( self, limit: int = 10, skip: int = 0, model: Optional[str] = None, source: Optional[str] = None, user_id: Optional[str] = None ) -> List[Summary]: """List summaries with optional filtering. Args: limit: Maximum number of results skip: Number of results to skip model: Filter by AI model used source: Filter by source (frontend/cli/api) user_id: Filter by user ID Returns: List of Summary instances """ with self.get_session() as session: query = session.query(Summary) # Apply filters if model: query = query.filter_by(model_used=model) if source: query = query.filter_by(source=source) if user_id: query = query.filter_by(user_id=user_id) # Order by creation date (newest first) and apply pagination return query.order_by(desc(Summary.created_at)).offset(skip).limit(limit).all() def search_summaries( self, query: str, limit: int = 10 ) -> List[Summary]: """Search summaries by title or content. Args: query: Search query string limit: Maximum number of results Returns: List of matching Summary instances """ with self.get_session() as session: search_pattern = f"%{query}%" return session.query(Summary).filter( (Summary.video_title.ilike(search_pattern)) | (Summary.summary.ilike(search_pattern)) ).limit(limit).all() def get_summary_stats(self) -> Dict[str, Any]: """Get statistics about stored summaries. Returns: Dictionary with summary statistics """ with self.get_session() as session: from sqlalchemy import func total_count = session.query(Summary).count() # Model distribution model_stats = session.query( Summary.model_used, func.count(Summary.id) ).group_by(Summary.model_used).all() # Source distribution source_stats = session.query( Summary.source, func.count(Summary.id) ).group_by(Summary.source).all() # Recent activity (last 7 days) from datetime import timedelta recent_date = datetime.utcnow() - timedelta(days=7) recent_count = session.query(Summary).filter( Summary.created_at >= recent_date ).count() # Average scores avg_quality = session.query(func.avg(Summary.quality_score)).scalar() avg_processing_time = session.query(func.avg(Summary.processing_time)).scalar() return { "total_summaries": total_count, "recent_summaries_7d": recent_count, "model_distribution": dict(model_stats), "source_distribution": dict(source_stats), "average_quality_score": round(avg_quality, 2) if avg_quality else None, "average_processing_time": round(avg_processing_time, 2) if avg_processing_time else None } def update_summary(self, summary_id: str, updates: Dict[str, Any]) -> Optional[Summary]: """Update an existing summary. Args: summary_id: UUID of the summary to update updates: Dictionary of fields to update Returns: Updated Summary instance or None if not found """ with self.get_session() as session: summary = session.query(Summary).filter_by(id=summary_id).first() if summary: for key, value in updates.items(): if hasattr(summary, key): setattr(summary, key, value) summary.updated_at = datetime.utcnow() session.commit() session.refresh(summary) logger.info(f"Updated summary {summary_id}") return summary def delete_summary(self, summary_id: str) -> bool: """Delete a summary from database. Args: summary_id: UUID of the summary to delete Returns: True if deleted, False if not found """ with self.get_session() as session: summary = session.query(Summary).filter_by(id=summary_id).first() if summary: session.delete(summary) session.commit() logger.info(f"Deleted summary {summary_id}") return True return False # Global instance for easy access database_storage_service = DatabaseStorageService()