299 lines
11 KiB
Python
299 lines
11 KiB
Python
"""Unified database storage service for summaries."""
|
|
|
|
import json
|
|
import logging
|
|
from datetime import datetime
|
|
from typing import List, Dict, Optional, Any
|
|
from sqlalchemy import create_engine, desc
|
|
from sqlalchemy.orm import sessionmaker, Session
|
|
from sqlalchemy.exc import SQLAlchemyError
|
|
|
|
from backend.core.config import settings
|
|
from backend.core.database_registry import registry
|
|
from backend.models import Summary
|
|
from backend.models.pipeline import PipelineResult
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class DatabaseStorageService:
|
|
"""Unified storage service for summaries using SQLite database."""
|
|
|
|
def __init__(self):
|
|
"""Initialize database connection."""
|
|
self.engine = create_engine(settings.DATABASE_URL)
|
|
registry.create_all_tables(self.engine)
|
|
self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)
|
|
logger.info("DatabaseStorageService initialized with database: %s", settings.DATABASE_URL)
|
|
|
|
def get_session(self) -> Session:
|
|
"""Get a database session."""
|
|
return self.SessionLocal()
|
|
|
|
def save_summary_from_pipeline(self, pipeline_result: PipelineResult) -> Summary:
|
|
"""Save pipeline result to database.
|
|
|
|
Args:
|
|
pipeline_result: Completed pipeline result
|
|
|
|
Returns:
|
|
Saved Summary model instance
|
|
"""
|
|
with self.get_session() as session:
|
|
try:
|
|
# Extract data from pipeline result
|
|
summary_content = ""
|
|
key_points = []
|
|
main_themes = []
|
|
|
|
if pipeline_result.summary:
|
|
if isinstance(pipeline_result.summary, dict):
|
|
summary_content = pipeline_result.summary.get('content', '')
|
|
key_points = pipeline_result.summary.get('key_points', [])
|
|
main_themes = pipeline_result.summary.get('main_themes', [])
|
|
else:
|
|
summary_content = str(pipeline_result.summary)
|
|
|
|
# Extract quality score
|
|
quality_score = None
|
|
if pipeline_result.quality_metrics:
|
|
quality_score = pipeline_result.quality_metrics.overall_score
|
|
|
|
# Create Summary instance
|
|
summary = Summary(
|
|
video_id=pipeline_result.video_id,
|
|
video_url=pipeline_result.video_url,
|
|
video_title=pipeline_result.metadata.get('title') if pipeline_result.metadata else None,
|
|
channel_name=pipeline_result.metadata.get('channel') if pipeline_result.metadata else None,
|
|
video_duration=pipeline_result.metadata.get('duration_seconds') if pipeline_result.metadata else None,
|
|
transcript=pipeline_result.transcript,
|
|
summary=summary_content,
|
|
key_points=key_points,
|
|
main_themes=main_themes,
|
|
model_used=pipeline_result.model_used or 'deepseek',
|
|
processing_time=pipeline_result.processing_time,
|
|
quality_score=quality_score,
|
|
summary_length=pipeline_result.config.summary_length if pipeline_result.config else 'standard',
|
|
focus_areas=pipeline_result.config.focus_areas if pipeline_result.config else [],
|
|
source='frontend', # Mark as created via frontend/API
|
|
job_id=pipeline_result.job_id,
|
|
created_at=datetime.utcnow()
|
|
)
|
|
|
|
session.add(summary)
|
|
session.commit()
|
|
session.refresh(summary)
|
|
|
|
logger.info(f"Saved summary {summary.id} for video {summary.video_id}")
|
|
return summary
|
|
|
|
except SQLAlchemyError as e:
|
|
logger.error(f"Database error saving summary: {e}")
|
|
session.rollback()
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error saving summary: {e}")
|
|
session.rollback()
|
|
raise
|
|
|
|
def save_summary_from_dict(self, summary_data: Dict[str, Any]) -> Summary:
|
|
"""Save summary from dictionary (for CLI compatibility).
|
|
|
|
Args:
|
|
summary_data: Dictionary containing summary data
|
|
|
|
Returns:
|
|
Saved Summary model instance
|
|
"""
|
|
with self.get_session() as session:
|
|
try:
|
|
# Ensure required fields have defaults
|
|
summary_data.setdefault('source', 'cli')
|
|
summary_data.setdefault('created_at', datetime.utcnow())
|
|
|
|
# Handle list fields that might be strings
|
|
for field in ['key_points', 'main_themes', 'focus_areas']:
|
|
if field in summary_data and isinstance(summary_data[field], str):
|
|
try:
|
|
summary_data[field] = json.loads(summary_data[field])
|
|
except json.JSONDecodeError:
|
|
summary_data[field] = []
|
|
|
|
summary = Summary(**summary_data)
|
|
session.add(summary)
|
|
session.commit()
|
|
session.refresh(summary)
|
|
|
|
logger.info(f"Saved summary {summary.id} from dict")
|
|
return summary
|
|
|
|
except SQLAlchemyError as e:
|
|
logger.error(f"Database error saving summary from dict: {e}")
|
|
session.rollback()
|
|
raise
|
|
|
|
def get_summary(self, summary_id: str) -> Optional[Summary]:
|
|
"""Get a specific summary by ID.
|
|
|
|
Args:
|
|
summary_id: UUID of the summary
|
|
|
|
Returns:
|
|
Summary instance or None if not found
|
|
"""
|
|
with self.get_session() as session:
|
|
return session.query(Summary).filter_by(id=summary_id).first()
|
|
|
|
def get_summary_by_video(self, video_id: str) -> List[Summary]:
|
|
"""Get all summaries for a specific video ID.
|
|
|
|
Args:
|
|
video_id: YouTube video ID
|
|
|
|
Returns:
|
|
List of Summary instances
|
|
"""
|
|
with self.get_session() as session:
|
|
return session.query(Summary).filter_by(video_id=video_id).order_by(desc(Summary.created_at)).all()
|
|
|
|
def list_summaries(
|
|
self,
|
|
limit: int = 10,
|
|
skip: int = 0,
|
|
model: Optional[str] = None,
|
|
source: Optional[str] = None,
|
|
user_id: Optional[str] = None
|
|
) -> List[Summary]:
|
|
"""List summaries with optional filtering.
|
|
|
|
Args:
|
|
limit: Maximum number of results
|
|
skip: Number of results to skip
|
|
model: Filter by AI model used
|
|
source: Filter by source (frontend/cli/api)
|
|
user_id: Filter by user ID
|
|
|
|
Returns:
|
|
List of Summary instances
|
|
"""
|
|
with self.get_session() as session:
|
|
query = session.query(Summary)
|
|
|
|
# Apply filters
|
|
if model:
|
|
query = query.filter_by(model_used=model)
|
|
if source:
|
|
query = query.filter_by(source=source)
|
|
if user_id:
|
|
query = query.filter_by(user_id=user_id)
|
|
|
|
# Order by creation date (newest first) and apply pagination
|
|
return query.order_by(desc(Summary.created_at)).offset(skip).limit(limit).all()
|
|
|
|
def search_summaries(
|
|
self,
|
|
query: str,
|
|
limit: int = 10
|
|
) -> List[Summary]:
|
|
"""Search summaries by title or content.
|
|
|
|
Args:
|
|
query: Search query string
|
|
limit: Maximum number of results
|
|
|
|
Returns:
|
|
List of matching Summary instances
|
|
"""
|
|
with self.get_session() as session:
|
|
search_pattern = f"%{query}%"
|
|
return session.query(Summary).filter(
|
|
(Summary.video_title.ilike(search_pattern)) |
|
|
(Summary.summary.ilike(search_pattern))
|
|
).limit(limit).all()
|
|
|
|
def get_summary_stats(self) -> Dict[str, Any]:
|
|
"""Get statistics about stored summaries.
|
|
|
|
Returns:
|
|
Dictionary with summary statistics
|
|
"""
|
|
with self.get_session() as session:
|
|
from sqlalchemy import func
|
|
|
|
total_count = session.query(Summary).count()
|
|
|
|
# Model distribution
|
|
model_stats = session.query(
|
|
Summary.model_used,
|
|
func.count(Summary.id)
|
|
).group_by(Summary.model_used).all()
|
|
|
|
# Source distribution
|
|
source_stats = session.query(
|
|
Summary.source,
|
|
func.count(Summary.id)
|
|
).group_by(Summary.source).all()
|
|
|
|
# Recent activity (last 7 days)
|
|
from datetime import timedelta
|
|
recent_date = datetime.utcnow() - timedelta(days=7)
|
|
recent_count = session.query(Summary).filter(
|
|
Summary.created_at >= recent_date
|
|
).count()
|
|
|
|
# Average scores
|
|
avg_quality = session.query(func.avg(Summary.quality_score)).scalar()
|
|
avg_processing_time = session.query(func.avg(Summary.processing_time)).scalar()
|
|
|
|
return {
|
|
"total_summaries": total_count,
|
|
"recent_summaries_7d": recent_count,
|
|
"model_distribution": dict(model_stats),
|
|
"source_distribution": dict(source_stats),
|
|
"average_quality_score": round(avg_quality, 2) if avg_quality else None,
|
|
"average_processing_time": round(avg_processing_time, 2) if avg_processing_time else None
|
|
}
|
|
|
|
def update_summary(self, summary_id: str, updates: Dict[str, Any]) -> Optional[Summary]:
|
|
"""Update an existing summary.
|
|
|
|
Args:
|
|
summary_id: UUID of the summary to update
|
|
updates: Dictionary of fields to update
|
|
|
|
Returns:
|
|
Updated Summary instance or None if not found
|
|
"""
|
|
with self.get_session() as session:
|
|
summary = session.query(Summary).filter_by(id=summary_id).first()
|
|
if summary:
|
|
for key, value in updates.items():
|
|
if hasattr(summary, key):
|
|
setattr(summary, key, value)
|
|
summary.updated_at = datetime.utcnow()
|
|
session.commit()
|
|
session.refresh(summary)
|
|
logger.info(f"Updated summary {summary_id}")
|
|
return summary
|
|
|
|
def delete_summary(self, summary_id: str) -> bool:
|
|
"""Delete a summary from database.
|
|
|
|
Args:
|
|
summary_id: UUID of the summary to delete
|
|
|
|
Returns:
|
|
True if deleted, False if not found
|
|
"""
|
|
with self.get_session() as session:
|
|
summary = session.query(Summary).filter_by(id=summary_id).first()
|
|
if summary:
|
|
session.delete(summary)
|
|
session.commit()
|
|
logger.info(f"Deleted summary {summary_id}")
|
|
return True
|
|
return False
|
|
|
|
|
|
# Global instance for easy access
|
|
database_storage_service = DatabaseStorageService() |