youtube-summarizer/backend/api/summaries.py

633 lines
20 KiB
Python

"""Summary history management API endpoints."""
from typing import List, Optional, Dict, Any
from datetime import datetime, timedelta
from fastapi import APIRouter, Depends, HTTPException, status, Query, BackgroundTasks
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from sqlalchemy import and_, or_, desc, func
import json
import csv
import io
import zipfile
from pathlib import Path
from backend.core.database import get_db
from backend.models.summary import Summary
from backend.models.user import User
from backend.api.dependencies import get_current_user, get_current_active_user
from backend.services.export_service import ExportService
# Request/Response models
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
from datetime import datetime
class SummaryResponse(BaseModel):
"""Summary response model."""
id: str
video_id: str
video_title: str
video_url: str
video_duration: Optional[int]
channel_name: Optional[str]
published_at: Optional[datetime]
summary: str
key_points: Optional[List[str]]
main_themes: Optional[List[str]]
model_used: Optional[str]
confidence_score: Optional[float]
quality_score: Optional[float]
is_starred: bool
notes: Optional[str]
tags: Optional[List[str]]
shared_token: Optional[str]
is_public: bool
view_count: int
created_at: datetime
updated_at: datetime
class Config:
from_attributes = True
class SummaryListResponse(BaseModel):
"""Paginated summary list response."""
summaries: List[SummaryResponse]
total: int
page: int
page_size: int
has_more: bool
class SummaryUpdateRequest(BaseModel):
"""Request model for updating a summary."""
is_starred: Optional[bool] = None
notes: Optional[str] = None
tags: Optional[List[str]] = None
is_public: Optional[bool] = None
class SummarySearchRequest(BaseModel):
"""Search parameters for summaries."""
query: Optional[str] = None
start_date: Optional[datetime] = None
end_date: Optional[datetime] = None
tags: Optional[List[str]] = None
model: Optional[str] = None
starred_only: bool = False
sort_by: str = "created_at" # created_at, title, duration
sort_order: str = "desc" # asc, desc
class BulkDeleteRequest(BaseModel):
"""Request for bulk deletion."""
summary_ids: List[str]
class ShareRequest(BaseModel):
"""Request for sharing a summary."""
is_public: bool = True
expires_in_days: Optional[int] = None # None = no expiration
class ExportRequest(BaseModel):
"""Request for exporting summaries."""
format: str = "json" # json, csv, zip
summary_ids: Optional[List[str]] = None # None = all user's summaries
include_transcript: bool = False
class UserStatsResponse(BaseModel):
"""User statistics response."""
total_summaries: int
starred_count: int
total_duration_minutes: int
total_cost_usd: float
models_used: Dict[str, int]
summaries_by_month: Dict[str, int]
top_channels: List[Dict[str, Any]]
average_quality_score: float
# Create router
router = APIRouter(prefix="/api/summaries", tags=["summaries"])
@router.get("", response_model=SummaryListResponse)
async def list_summaries(
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
starred_only: bool = False,
search: Optional[str] = None,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_active_user)
):
"""Get paginated list of user's summaries."""
query = db.query(Summary).filter(Summary.user_id == current_user.id)
# Apply filters
if starred_only:
query = query.filter(Summary.is_starred == True)
if search:
search_pattern = f"%{search}%"
query = query.filter(
or_(
Summary.video_title.ilike(search_pattern),
Summary.summary.ilike(search_pattern),
Summary.channel_name.ilike(search_pattern)
)
)
# Get total count
total = query.count()
# Apply pagination
offset = (page - 1) * page_size
summaries = query.order_by(desc(Summary.created_at))\
.offset(offset)\
.limit(page_size)\
.all()
has_more = (offset + len(summaries)) < total
return SummaryListResponse(
summaries=[SummaryResponse.model_validate(s) for s in summaries],
total=total,
page=page,
page_size=page_size,
has_more=has_more
)
@router.get("/starred", response_model=List[SummaryResponse])
async def get_starred_summaries(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_active_user)
):
"""Get all starred summaries."""
summaries = db.query(Summary)\
.filter(Summary.user_id == current_user.id)\
.filter(Summary.is_starred == True)\
.order_by(desc(Summary.created_at))\
.all()
return [SummaryResponse.model_validate(s) for s in summaries]
@router.get("/stats", response_model=UserStatsResponse)
async def get_user_stats(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_active_user)
):
"""Get user's summary statistics."""
summaries = db.query(Summary).filter(Summary.user_id == current_user.id).all()
if not summaries:
return UserStatsResponse(
total_summaries=0,
starred_count=0,
total_duration_minutes=0,
total_cost_usd=0,
models_used={},
summaries_by_month={},
top_channels=[],
average_quality_score=0
)
# Calculate statistics
total_summaries = len(summaries)
starred_count = sum(1 for s in summaries if s.is_starred)
total_duration = sum(s.video_duration or 0 for s in summaries)
total_cost = sum(s.cost_usd or 0 for s in summaries)
# Models used
models_used = {}
for s in summaries:
if s.model_used:
models_used[s.model_used] = models_used.get(s.model_used, 0) + 1
# Summaries by month
summaries_by_month = {}
for s in summaries:
month_key = s.created_at.strftime("%Y-%m")
summaries_by_month[month_key] = summaries_by_month.get(month_key, 0) + 1
# Top channels
channel_counts = {}
for s in summaries:
if s.channel_name:
channel_counts[s.channel_name] = channel_counts.get(s.channel_name, 0) + 1
top_channels = [
{"name": name, "count": count}
for name, count in sorted(channel_counts.items(), key=lambda x: x[1], reverse=True)[:5]
]
# Average quality score
quality_scores = [s.quality_score for s in summaries if s.quality_score]
avg_quality = sum(quality_scores) / len(quality_scores) if quality_scores else 0
return UserStatsResponse(
total_summaries=total_summaries,
starred_count=starred_count,
total_duration_minutes=total_duration // 60,
total_cost_usd=round(total_cost, 2),
models_used=models_used,
summaries_by_month=summaries_by_month,
top_channels=top_channels,
average_quality_score=round(avg_quality, 2)
)
@router.get("/{summary_id}", response_model=SummaryResponse)
async def get_summary(
summary_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_active_user)
):
"""Get a single summary by ID."""
summary = db.query(Summary)\
.filter(Summary.id == summary_id)\
.filter(Summary.user_id == current_user.id)\
.first()
if not summary:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Summary not found"
)
return SummaryResponse.model_validate(summary)
@router.put("/{summary_id}", response_model=SummaryResponse)
async def update_summary(
summary_id: str,
update_data: SummaryUpdateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_active_user)
):
"""Update a summary (star, notes, tags)."""
summary = db.query(Summary)\
.filter(Summary.id == summary_id)\
.filter(Summary.user_id == current_user.id)\
.first()
if not summary:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Summary not found"
)
# Update fields
if update_data.is_starred is not None:
summary.is_starred = update_data.is_starred
if update_data.notes is not None:
summary.notes = update_data.notes
if update_data.tags is not None:
summary.tags = update_data.tags
if update_data.is_public is not None:
summary.is_public = update_data.is_public
summary.updated_at = datetime.utcnow()
db.commit()
db.refresh(summary)
return SummaryResponse.model_validate(summary)
@router.delete("/{summary_id}")
async def delete_summary(
summary_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_active_user)
):
"""Delete a single summary."""
summary = db.query(Summary)\
.filter(Summary.id == summary_id)\
.filter(Summary.user_id == current_user.id)\
.first()
if not summary:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Summary not found"
)
db.delete(summary)
db.commit()
return {"message": "Summary deleted successfully"}
@router.post("/bulk-delete")
async def bulk_delete_summaries(
request: BulkDeleteRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_active_user)
):
"""Delete multiple summaries at once."""
# Verify all summaries belong to the user
summaries = db.query(Summary)\
.filter(Summary.id.in_(request.summary_ids))\
.filter(Summary.user_id == current_user.id)\
.all()
if len(summaries) != len(request.summary_ids):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Some summaries not found or don't belong to you"
)
# Delete all summaries
for summary in summaries:
db.delete(summary)
db.commit()
return {"message": f"Deleted {len(summaries)} summaries successfully"}
@router.post("/search", response_model=SummaryListResponse)
async def search_summaries(
search_params: SummarySearchRequest,
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_active_user)
):
"""Advanced search for summaries."""
query = db.query(Summary).filter(Summary.user_id == current_user.id)
# Text search
if search_params.query:
search_pattern = f"%{search_params.query}%"
query = query.filter(
or_(
Summary.video_title.ilike(search_pattern),
Summary.summary.ilike(search_pattern),
Summary.channel_name.ilike(search_pattern),
Summary.notes.ilike(search_pattern)
)
)
# Date range filter
if search_params.start_date:
query = query.filter(Summary.created_at >= search_params.start_date)
if search_params.end_date:
query = query.filter(Summary.created_at <= search_params.end_date)
# Tags filter
if search_params.tags:
# This is a simple implementation - could be improved with proper JSON queries
for tag in search_params.tags:
query = query.filter(Summary.tags.contains([tag]))
# Model filter
if search_params.model:
query = query.filter(Summary.model_used == search_params.model)
# Starred filter
if search_params.starred_only:
query = query.filter(Summary.is_starred == True)
# Sorting
sort_column = getattr(Summary, search_params.sort_by, Summary.created_at)
if search_params.sort_order == "asc":
query = query.order_by(sort_column)
else:
query = query.order_by(desc(sort_column))
# Get total count
total = query.count()
# Apply pagination
offset = (page - 1) * page_size
summaries = query.offset(offset).limit(page_size).all()
has_more = (offset + len(summaries)) < total
return SummaryListResponse(
summaries=[SummaryResponse.model_validate(s) for s in summaries],
total=total,
page=page,
page_size=page_size,
has_more=has_more
)
@router.post("/{summary_id}/share", response_model=Dict[str, str])
async def share_summary(
summary_id: str,
share_request: ShareRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_active_user)
):
"""Generate a share link for a summary."""
summary = db.query(Summary)\
.filter(Summary.id == summary_id)\
.filter(Summary.user_id == current_user.id)\
.first()
if not summary:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Summary not found"
)
# Generate share token if not exists
if not summary.shared_token:
summary.generate_share_token()
summary.is_public = share_request.is_public
summary.updated_at = datetime.utcnow()
db.commit()
db.refresh(summary)
# Build share URL (adjust based on your frontend URL)
base_url = "http://localhost:3000" # This should come from config
share_url = f"{base_url}/shared/{summary.shared_token}"
return {
"share_url": share_url,
"token": summary.shared_token,
"is_public": summary.is_public
}
@router.get("/shared/{token}", response_model=SummaryResponse)
async def get_shared_summary(
token: str,
db: Session = Depends(get_db)
):
"""Access a shared summary (no auth required if public)."""
summary = db.query(Summary)\
.filter(Summary.shared_token == token)\
.filter(Summary.is_public == True)\
.first()
if not summary:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Shared summary not found or not public"
)
# Increment view count
summary.view_count = (summary.view_count or 0) + 1
db.commit()
return SummaryResponse.model_validate(summary)
@router.post("/export")
async def export_summaries(
export_request: ExportRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_active_user)
):
"""Export summaries in various formats."""
# Get summaries to export
query = db.query(Summary).filter(Summary.user_id == current_user.id)
if export_request.summary_ids:
query = query.filter(Summary.id.in_(export_request.summary_ids))
summaries = query.order_by(desc(Summary.created_at)).all()
if not summaries:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No summaries to export"
)
# Export based on format
if export_request.format == "json":
# JSON export
data = []
for summary in summaries:
item = {
"id": summary.id,
"video_title": summary.video_title,
"video_url": summary.video_url,
"channel": summary.channel_name,
"summary": summary.summary,
"key_points": summary.key_points,
"main_themes": summary.main_themes,
"notes": summary.notes,
"tags": summary.tags,
"created_at": summary.created_at.isoformat(),
"model": summary.model_used
}
if export_request.include_transcript:
item["transcript"] = summary.transcript
data.append(item)
json_str = json.dumps(data, indent=2, default=str)
return StreamingResponse(
io.StringIO(json_str),
media_type="application/json",
headers={"Content-Disposition": f"attachment; filename=summaries_export.json"}
)
elif export_request.format == "csv":
# CSV export
output = io.StringIO()
writer = csv.writer(output)
# Header
headers = ["ID", "Video Title", "URL", "Channel", "Summary", "Key Points",
"Main Themes", "Notes", "Tags", "Created At", "Model"]
if export_request.include_transcript:
headers.append("Transcript")
writer.writerow(headers)
# Data rows
for summary in summaries:
row = [
summary.id,
summary.video_title,
summary.video_url,
summary.channel_name,
summary.summary,
json.dumps(summary.key_points) if summary.key_points else "",
json.dumps(summary.main_themes) if summary.main_themes else "",
summary.notes or "",
json.dumps(summary.tags) if summary.tags else "",
summary.created_at.isoformat(),
summary.model_used or ""
]
if export_request.include_transcript:
row.append(summary.transcript or "")
writer.writerow(row)
output.seek(0)
return StreamingResponse(
output,
media_type="text/csv",
headers={"Content-Disposition": f"attachment; filename=summaries_export.csv"}
)
elif export_request.format == "zip":
# ZIP export with multiple formats
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zipf:
# Add JSON file
json_data = []
for i, summary in enumerate(summaries):
# Individual markdown file
md_content = f"""# {summary.video_title}
**URL**: {summary.video_url}
**Channel**: {summary.channel_name}
**Date**: {summary.created_at.strftime('%Y-%m-%d')}
**Model**: {summary.model_used}
## Summary
{summary.summary}
## Key Points
{chr(10).join('- ' + point for point in (summary.key_points or []))}
## Main Themes
{chr(10).join('- ' + theme for theme in (summary.main_themes or []))}
"""
if summary.notes:
md_content += f"\n## Notes\n\n{summary.notes}\n"
# Add markdown file to zip
filename = f"{i+1:03d}_{summary.video_title[:50].replace('/', '-')}.md"
zipf.writestr(f"summaries/{filename}", md_content)
# Add to JSON data
json_data.append({
"id": summary.id,
"video_title": summary.video_title,
"video_url": summary.video_url,
"summary": summary.summary,
"created_at": summary.created_at.isoformat()
})
# Add combined JSON
zipf.writestr("summaries.json", json.dumps(json_data, indent=2, default=str))
zip_buffer.seek(0)
return StreamingResponse(
zip_buffer,
media_type="application/zip",
headers={"Content-Disposition": f"attachment; filename=summaries_export.zip"}
)
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Unsupported export format: {export_request.format}"
)