1042 lines
40 KiB
Python
1042 lines
40 KiB
Python
#!/usr/bin/env python3
|
|
"""YouTube Summarizer Backend CLI Tool
|
|
|
|
A command-line interface for managing YouTube video summaries.
|
|
Supports regenerating summaries, adding new ones, and using custom prompts.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Optional, Dict, Any, List
|
|
import logging
|
|
import click
|
|
from rich.console import Console
|
|
from rich.table import Table
|
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
from rich.prompt import Prompt, Confirm
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy.orm import sessionmaker, Session
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.append(str(Path(__file__).parent.parent))
|
|
|
|
from backend.core.config import settings
|
|
from backend.core.database_registry import registry
|
|
from backend.models import Summary, User
|
|
from backend.services.summary_pipeline import SummaryPipeline
|
|
from backend.services.video_service import VideoService
|
|
from backend.services.transcript_service import TranscriptService
|
|
from backend.services.anthropic_summarizer import AnthropicSummarizer
|
|
from backend.services.deepseek_summarizer import DeepSeekSummarizer
|
|
from backend.services.gemini_summarizer import GeminiSummarizer
|
|
from backend.services.openai_summarizer import OpenAISummarizer
|
|
from backend.services.cache_manager import CacheManager
|
|
from backend.services.notification_service import NotificationService
|
|
from backend.services.summary_storage import SummaryStorageService
|
|
from backend.models.pipeline import PipelineConfig
|
|
from backend.mermaid_renderer import MermaidRenderer, DiagramEnhancer, render_summary_diagrams
|
|
|
|
# Initialize Rich console
|
|
console = Console()
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SummaryManager:
|
|
"""Manages database operations for summaries."""
|
|
|
|
def __init__(self):
|
|
self.engine = create_engine(settings.DATABASE_URL)
|
|
registry.create_all_tables(self.engine)
|
|
self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)
|
|
self.storage_service = SummaryStorageService()
|
|
|
|
def get_session(self) -> Session:
|
|
"""Get a database session."""
|
|
return self.SessionLocal()
|
|
|
|
def list_summaries(self, limit: int = 10, user_id: Optional[str] = None) -> List[Summary]:
|
|
"""List recent summaries from database."""
|
|
with self.get_session() as session:
|
|
query = session.query(Summary)
|
|
if user_id:
|
|
query = query.filter_by(user_id=user_id)
|
|
return query.order_by(Summary.created_at.desc()).limit(limit).all()
|
|
|
|
def get_summary(self, summary_id: str) -> Optional[Summary]:
|
|
"""Get a specific summary by ID."""
|
|
with self.get_session() as session:
|
|
return session.query(Summary).filter_by(id=summary_id).first()
|
|
|
|
def get_summary_by_video(self, video_id: str) -> List[Summary]:
|
|
"""Get all summaries for a specific video."""
|
|
with self.get_session() as session:
|
|
return session.query(Summary).filter_by(video_id=video_id).all()
|
|
|
|
def save_summary(self, summary_data: Dict[str, Any]) -> Summary:
|
|
"""Save a new summary to database using unified storage service."""
|
|
from backend.services.database_storage_service import database_storage_service
|
|
return database_storage_service.save_summary_from_dict(summary_data)
|
|
|
|
def update_summary(self, summary_id: str, updates: Dict[str, Any]) -> Optional[Summary]:
|
|
"""Update an existing summary."""
|
|
with self.get_session() as session:
|
|
summary = session.query(Summary).filter_by(id=summary_id).first()
|
|
if summary:
|
|
for key, value in updates.items():
|
|
if hasattr(summary, key):
|
|
setattr(summary, key, value)
|
|
summary.updated_at = datetime.utcnow()
|
|
session.commit()
|
|
session.refresh(summary)
|
|
return summary
|
|
|
|
def delete_summary(self, summary_id: str) -> bool:
|
|
"""Delete a summary from database."""
|
|
with self.get_session() as session:
|
|
summary = session.query(Summary).filter_by(id=summary_id).first()
|
|
if summary:
|
|
session.delete(summary)
|
|
session.commit()
|
|
return True
|
|
return False
|
|
|
|
|
|
class SummaryPipelineCLI:
|
|
"""CLI wrapper for the summary pipeline."""
|
|
|
|
def __init__(self, model: str = "deepseek"):
|
|
self.model = model
|
|
self.setup_services()
|
|
|
|
def setup_services(self):
|
|
"""Initialize all required services."""
|
|
# Initialize services
|
|
self.video_service = VideoService()
|
|
self.transcript_service = TranscriptService()
|
|
self.cache_manager = CacheManager()
|
|
self.notification_service = NotificationService()
|
|
|
|
# Select AI service based on model
|
|
if self.model == "anthropic":
|
|
self.ai_service = AnthropicSummarizer()
|
|
elif self.model == "gemini":
|
|
self.ai_service = GeminiSummarizer()
|
|
elif self.model == "openai":
|
|
self.ai_service = OpenAISummarizer()
|
|
else:
|
|
self.ai_service = DeepSeekSummarizer()
|
|
|
|
# Initialize pipeline
|
|
self.pipeline = SummaryPipeline(
|
|
video_service=self.video_service,
|
|
transcript_service=self.transcript_service,
|
|
ai_service=self.ai_service,
|
|
cache_manager=self.cache_manager,
|
|
notification_service=self.notification_service
|
|
)
|
|
|
|
self.summary_manager = SummaryManager()
|
|
|
|
async def process_video(
|
|
self,
|
|
video_url: str,
|
|
custom_prompt: Optional[str] = None,
|
|
summary_length: str = "standard",
|
|
focus_areas: Optional[List[str]] = None,
|
|
include_diagrams: bool = False
|
|
) -> Dict[str, Any]:
|
|
"""Process a video through the summary pipeline."""
|
|
# Create configuration
|
|
config = PipelineConfig(
|
|
summary_length=summary_length,
|
|
focus_areas=focus_areas or [],
|
|
include_timestamps=True,
|
|
enable_notifications=False
|
|
)
|
|
|
|
# Build enhanced prompt with diagram instructions if needed
|
|
if custom_prompt:
|
|
final_prompt = custom_prompt
|
|
else:
|
|
final_prompt = self._build_default_prompt(include_diagrams)
|
|
|
|
# If custom prompt provided, modify the AI service configuration
|
|
if final_prompt:
|
|
self.ai_service.custom_prompt = final_prompt
|
|
|
|
# Start processing
|
|
job_id = await self.pipeline.process_video(video_url, config)
|
|
|
|
# Wait for completion with progress updates
|
|
with Progress(
|
|
SpinnerColumn(),
|
|
TextColumn("[progress.description]{task.description}"),
|
|
console=console
|
|
) as progress:
|
|
task = progress.add_task("[cyan]Processing video...", total=None)
|
|
|
|
while True:
|
|
result = await self.pipeline.get_pipeline_result(job_id)
|
|
if result:
|
|
if result.status == "completed":
|
|
progress.update(task, description="[green]✓ Processing completed!")
|
|
return result.__dict__
|
|
elif result.status == "failed":
|
|
progress.update(task, description=f"[red]✗ Processing failed: {result.error}")
|
|
raise Exception(result.error)
|
|
else:
|
|
progress.update(task, description=f"[yellow]Processing: {result.status}")
|
|
|
|
await asyncio.sleep(1)
|
|
|
|
def _build_default_prompt(self, include_diagrams: bool) -> str:
|
|
"""Build default prompt with optional diagram instructions."""
|
|
base_prompt = """
|
|
Provide a comprehensive summary of this video content.
|
|
Include key points, main themes, and actionable insights.
|
|
"""
|
|
|
|
if include_diagrams:
|
|
diagram_prompt = """
|
|
|
|
IMPORTANT: Include relevant Mermaid diagrams where they would help visualize the content.
|
|
Consider creating diagrams for:
|
|
- Process flows or sequences discussed in the video
|
|
- Hierarchical structures or relationships
|
|
- Timeline of events or concepts
|
|
- Mind maps of main ideas and connections
|
|
- State diagrams for systems or workflows
|
|
- Entity relationship diagrams for data structures
|
|
|
|
Format diagrams as:
|
|
```mermaid
|
|
[diagram code]
|
|
```
|
|
|
|
Choose diagram types intelligently based on the content:
|
|
- Use flowchart for processes and decision trees
|
|
- Use sequence diagrams for interactions between components
|
|
- Use mind maps for conceptual relationships
|
|
- Use timeline for chronological information
|
|
- Use pie/bar charts for statistical data if mentioned
|
|
|
|
The AI should automatically determine if and when diagrams would enhance understanding.
|
|
"""
|
|
return base_prompt + diagram_prompt
|
|
|
|
return base_prompt
|
|
|
|
|
|
@click.group()
|
|
@click.option('--debug', is_flag=True, help='Enable debug logging')
|
|
def cli(debug):
|
|
"""YouTube Summarizer Backend CLI Tool"""
|
|
if debug:
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
console.print("[yellow]Debug mode enabled[/yellow]")
|
|
|
|
|
|
@cli.command()
|
|
@click.option('--limit', '-l', default=10, help='Number of summaries to show')
|
|
@click.option('--user-id', '-u', help='Filter by user ID')
|
|
@click.option('--video-id', '-v', help='Filter by video ID')
|
|
def list(limit, user_id, video_id):
|
|
"""List existing summaries"""
|
|
manager = SummaryManager()
|
|
|
|
if video_id:
|
|
summaries = manager.get_summary_by_video(video_id)
|
|
else:
|
|
summaries = manager.list_summaries(limit=limit, user_id=user_id)
|
|
|
|
if not summaries:
|
|
console.print("[yellow]No summaries found[/yellow]")
|
|
return
|
|
|
|
# Create table
|
|
table = Table(title="YouTube Summaries")
|
|
table.add_column("ID", style="cyan", width=36)
|
|
table.add_column("Video Title", style="green", width=40)
|
|
table.add_column("Model", style="yellow")
|
|
table.add_column("Created", style="magenta")
|
|
table.add_column("Quality", style="blue")
|
|
|
|
for summary in summaries:
|
|
quality = f"{summary.quality_score:.2f}" if summary.quality_score else "N/A"
|
|
created = summary.created_at.strftime("%Y-%m-%d %H:%M")
|
|
title = summary.video_title[:37] + "..." if len(summary.video_title or "") > 40 else summary.video_title
|
|
|
|
table.add_row(
|
|
summary.id,
|
|
title or "Unknown",
|
|
summary.model_used or "Unknown",
|
|
created,
|
|
quality
|
|
)
|
|
|
|
console.print(table)
|
|
|
|
|
|
@cli.command()
|
|
@click.argument('summary_id')
|
|
@click.option('--export', '-e', is_flag=True, help='Export to JSON file')
|
|
@click.option('--render-diagrams', '-r', is_flag=True, help='Render Mermaid diagrams if present')
|
|
@click.option('--suggest-diagrams', '-s', is_flag=True, help='Suggest diagrams based on content')
|
|
def show(summary_id, export, render_diagrams, suggest_diagrams):
|
|
"""Show details of a specific summary"""
|
|
manager = SummaryManager()
|
|
summary = manager.get_summary(summary_id)
|
|
|
|
if not summary:
|
|
console.print(f"[red]Summary not found: {summary_id}[/red]")
|
|
return
|
|
|
|
# Display summary details
|
|
console.print(f"\n[bold cyan]Summary Details[/bold cyan]")
|
|
console.print(f"[yellow]ID:[/yellow] {summary.id}")
|
|
console.print(f"[yellow]Video:[/yellow] {summary.video_title}")
|
|
console.print(f"[yellow]URL:[/yellow] {summary.video_url}")
|
|
console.print(f"[yellow]Model:[/yellow] {summary.model_used}")
|
|
console.print(f"[yellow]Created:[/yellow] {summary.created_at}")
|
|
|
|
if summary.summary:
|
|
console.print(f"\n[bold green]Summary:[/bold green]")
|
|
console.print(summary.summary)
|
|
|
|
# Check for and render diagrams if requested
|
|
if render_diagrams and '```mermaid' in summary.summary:
|
|
console.print(f"\n[bold cyan]📊 Rendering Mermaid Diagrams...[/bold cyan]")
|
|
output_dir = f"diagrams/{summary.id}"
|
|
results = render_summary_diagrams(summary.summary, output_dir)
|
|
|
|
if results:
|
|
console.print(f"[green]✓ Rendered {len(results)} diagram(s) to {output_dir}[/green]")
|
|
|
|
if summary.key_points:
|
|
console.print(f"\n[bold green]Key Points:[/bold green]")
|
|
for point in summary.key_points:
|
|
console.print(f" • {point}")
|
|
|
|
# Suggest diagrams if requested
|
|
if suggest_diagrams and summary.summary:
|
|
console.print(f"\n[bold cyan]📊 Diagram Suggestions:[/bold cyan]")
|
|
suggestions = DiagramEnhancer.suggest_diagrams(summary.summary)
|
|
|
|
if suggestions:
|
|
for suggestion in suggestions:
|
|
console.print(f"\n[yellow]Type:[/yellow] {suggestion['type']}")
|
|
console.print(f"[yellow]Reason:[/yellow] {suggestion['reason']}")
|
|
console.print(f"[dim]Template:[/dim]")
|
|
console.print(f"```mermaid\n{suggestion['template']}\n```")
|
|
else:
|
|
console.print("[dim]No specific diagram suggestions based on content[/dim]")
|
|
|
|
# Also create a summary structure diagram
|
|
if summary.key_points or summary.main_themes:
|
|
console.print(f"\n[yellow]Summary Structure Diagram:[/yellow]")
|
|
structure_diagram = DiagramEnhancer.create_summary_structure_diagram(
|
|
summary.key_points or [],
|
|
summary.main_themes or []
|
|
)
|
|
console.print(f"```mermaid\n{structure_diagram}\n```")
|
|
|
|
if export:
|
|
filename = f"summary_{summary.id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
|
export_data = {
|
|
"id": summary.id,
|
|
"video_id": summary.video_id,
|
|
"video_title": summary.video_title,
|
|
"video_url": summary.video_url,
|
|
"summary": summary.summary,
|
|
"key_points": summary.key_points,
|
|
"main_themes": summary.main_themes,
|
|
"model_used": summary.model_used,
|
|
"created_at": summary.created_at.isoformat() if summary.created_at else None
|
|
}
|
|
|
|
with open(filename, 'w') as f:
|
|
json.dump(export_data, f, indent=2)
|
|
|
|
console.print(f"\n[green]✓ Exported to {filename}[/green]")
|
|
|
|
|
|
@cli.command()
|
|
@click.argument('video_url')
|
|
@click.option('--model', '-m', default='deepseek',
|
|
type=click.Choice(['deepseek', 'anthropic', 'openai', 'gemini']),
|
|
help='AI model to use')
|
|
@click.option('--length', '-l', default='standard',
|
|
type=click.Choice(['brief', 'standard', 'detailed']),
|
|
help='Summary length')
|
|
@click.option('--prompt', '-p', help='Custom prompt for summarization')
|
|
@click.option('--focus', '-f', multiple=True, help='Focus areas (can specify multiple)')
|
|
@click.option('--diagrams', '-d', is_flag=True, help='Include Mermaid diagrams in summary')
|
|
def add(video_url, model, length, prompt, focus, diagrams):
|
|
"""Add a new video summary"""
|
|
console.print(f"\n[cyan]Adding new summary for: {video_url}[/cyan]")
|
|
if diagrams:
|
|
console.print("[yellow]📊 Diagram generation enabled[/yellow]")
|
|
|
|
# Initialize CLI with selected model
|
|
pipeline_cli = SummaryPipelineCLI(model=model)
|
|
|
|
# Run async processing
|
|
async def process():
|
|
try:
|
|
result = await pipeline_cli.process_video(
|
|
video_url=video_url,
|
|
custom_prompt=prompt,
|
|
summary_length=length,
|
|
focus_areas=list(focus) if focus else None,
|
|
include_diagrams=diagrams
|
|
)
|
|
|
|
# Save to database
|
|
summary_data = {
|
|
"video_id": result.get("video_id"),
|
|
"video_url": result.get("video_url"),
|
|
"video_title": result.get("metadata", {}).get("title"),
|
|
"transcript": result.get("transcript"),
|
|
"summary": result.get("summary", {}).get("content"),
|
|
"key_points": result.get("summary", {}).get("key_points"),
|
|
"main_themes": result.get("summary", {}).get("main_themes"),
|
|
"model_used": model,
|
|
"processing_time": result.get("processing_time"),
|
|
"quality_score": result.get("quality_metrics", {}).get("overall_score"),
|
|
"summary_length": length,
|
|
"focus_areas": list(focus) if focus else []
|
|
}
|
|
|
|
saved_summary = pipeline_cli.summary_manager.save_summary(summary_data)
|
|
|
|
console.print(f"\n[green]✓ Summary created successfully![/green]")
|
|
console.print(f"[yellow]Summary ID:[/yellow] {saved_summary.id}")
|
|
console.print(f"[yellow]Video Title:[/yellow] {saved_summary.video_title}")
|
|
|
|
# Display summary preview
|
|
if saved_summary.summary:
|
|
console.print(f"\n[bold]Summary Preview:[/bold]")
|
|
preview = saved_summary.summary[:500] + "..." if len(saved_summary.summary) > 500 else saved_summary.summary
|
|
console.print(preview)
|
|
|
|
return saved_summary
|
|
|
|
except Exception as e:
|
|
console.print(f"[red]Error: {e}[/red]")
|
|
raise
|
|
|
|
# Run the async function
|
|
asyncio.run(process())
|
|
|
|
|
|
@cli.command()
|
|
@click.argument('summary_id')
|
|
@click.option('--model', '-m', default='deepseek',
|
|
type=click.Choice(['deepseek', 'anthropic', 'openai', 'gemini']),
|
|
help='AI model to use')
|
|
@click.option('--prompt', '-p', help='Custom prompt for regeneration')
|
|
@click.option('--length', '-l',
|
|
type=click.Choice(['brief', 'standard', 'detailed']),
|
|
help='New summary length')
|
|
@click.option('--diagrams', '-d', is_flag=True, help='Include Mermaid diagrams in summary')
|
|
def regenerate(summary_id, model, prompt, length, diagrams):
|
|
"""Regenerate an existing summary"""
|
|
manager = SummaryManager()
|
|
summary = manager.get_summary(summary_id)
|
|
|
|
if not summary:
|
|
console.print(f"[red]Summary not found: {summary_id}[/red]")
|
|
return
|
|
|
|
console.print(f"\n[cyan]Regenerating summary for: {summary.video_title}[/cyan]")
|
|
console.print(f"[yellow]Original model:[/yellow] {summary.model_used}")
|
|
console.print(f"[yellow]New model:[/yellow] {model}")
|
|
if diagrams:
|
|
console.print("[yellow]📊 Diagram generation enabled[/yellow]")
|
|
|
|
if not Confirm.ask("Continue with regeneration?"):
|
|
console.print("[yellow]Regeneration cancelled[/yellow]")
|
|
return
|
|
|
|
# Initialize CLI with selected model
|
|
pipeline_cli = SummaryPipelineCLI(model=model)
|
|
|
|
# Run async processing
|
|
async def process():
|
|
try:
|
|
result = await pipeline_cli.process_video(
|
|
video_url=summary.video_url,
|
|
custom_prompt=prompt,
|
|
summary_length=length or summary.summary_length or 'standard',
|
|
focus_areas=summary.focus_areas,
|
|
include_diagrams=diagrams
|
|
)
|
|
|
|
# Update existing summary
|
|
updates = {
|
|
"summary": result.get("summary", {}).get("content"),
|
|
"key_points": result.get("summary", {}).get("key_points"),
|
|
"main_themes": result.get("summary", {}).get("main_themes"),
|
|
"model_used": model,
|
|
"processing_time": result.get("processing_time"),
|
|
"quality_score": result.get("quality_metrics", {}).get("overall_score"),
|
|
}
|
|
|
|
if length:
|
|
updates["summary_length"] = length
|
|
|
|
updated_summary = manager.update_summary(summary_id, updates)
|
|
|
|
console.print(f"\n[green]✓ Summary regenerated successfully![/green]")
|
|
|
|
# Display new summary preview
|
|
if updated_summary.summary:
|
|
console.print(f"\n[bold]New Summary Preview:[/bold]")
|
|
preview = updated_summary.summary[:500] + "..." if len(updated_summary.summary) > 500 else updated_summary.summary
|
|
console.print(preview)
|
|
|
|
return updated_summary
|
|
|
|
except Exception as e:
|
|
console.print(f"[red]Error: {e}[/red]")
|
|
raise
|
|
|
|
# Run the async function
|
|
asyncio.run(process())
|
|
|
|
|
|
@cli.command()
|
|
@click.argument('summary_id')
|
|
def delete(summary_id):
|
|
"""Delete a summary"""
|
|
manager = SummaryManager()
|
|
summary = manager.get_summary(summary_id)
|
|
|
|
if not summary:
|
|
console.print(f"[red]Summary not found: {summary_id}[/red]")
|
|
return
|
|
|
|
console.print(f"\n[yellow]Summary to delete:[/yellow]")
|
|
console.print(f" ID: {summary.id}")
|
|
console.print(f" Video: {summary.video_title}")
|
|
console.print(f" Created: {summary.created_at}")
|
|
|
|
if Confirm.ask("\n[red]Are you sure you want to delete this summary?[/red]"):
|
|
if manager.delete_summary(summary_id):
|
|
console.print(f"[green]✓ Summary deleted successfully[/green]")
|
|
else:
|
|
console.print(f"[red]Failed to delete summary[/red]")
|
|
else:
|
|
console.print("[yellow]Deletion cancelled[/yellow]")
|
|
|
|
|
|
@cli.command()
|
|
@click.option('--days', '-d', default=30, help='Days to keep summaries')
|
|
@click.option('--dry-run', is_flag=True, help='Show what would be deleted without deleting')
|
|
def cleanup(days, dry_run):
|
|
"""Clean up old summaries"""
|
|
manager = SummaryManager()
|
|
cutoff_date = datetime.utcnow() - timedelta(days=days)
|
|
|
|
with manager.get_session() as session:
|
|
old_summaries = session.query(Summary).filter(
|
|
Summary.created_at < cutoff_date
|
|
).all()
|
|
|
|
if not old_summaries:
|
|
console.print(f"[green]No summaries older than {days} days found[/green]")
|
|
return
|
|
|
|
console.print(f"\n[yellow]Found {len(old_summaries)} summaries older than {days} days:[/yellow]")
|
|
|
|
for summary in old_summaries:
|
|
console.print(f" • {summary.id}: {summary.video_title} (created {summary.created_at})")
|
|
|
|
if dry_run:
|
|
console.print("\n[cyan]Dry run - no summaries deleted[/cyan]")
|
|
else:
|
|
if Confirm.ask(f"\n[red]Delete {len(old_summaries)} summaries?[/red]"):
|
|
for summary in old_summaries:
|
|
session.delete(summary)
|
|
session.commit()
|
|
console.print(f"[green]✓ Deleted {len(old_summaries)} summaries[/green]")
|
|
else:
|
|
console.print("[yellow]Cleanup cancelled[/yellow]")
|
|
|
|
|
|
@cli.command()
|
|
def stats():
|
|
"""Show summary statistics"""
|
|
manager = SummaryManager()
|
|
storage_stats = manager.storage_service.get_summary_stats()
|
|
|
|
with manager.get_session() as session:
|
|
total_summaries = session.query(Summary).count()
|
|
|
|
# Get model distribution
|
|
from sqlalchemy import func
|
|
model_stats = session.query(
|
|
Summary.model_used,
|
|
func.count(Summary.id)
|
|
).group_by(Summary.model_used).all()
|
|
|
|
# Get average quality score
|
|
avg_quality = session.query(func.avg(Summary.quality_score)).scalar()
|
|
|
|
# Get recent activity
|
|
recent_date = datetime.utcnow() - timedelta(days=7)
|
|
recent_count = session.query(Summary).filter(
|
|
Summary.created_at >= recent_date
|
|
).count()
|
|
|
|
# Display statistics
|
|
console.print("\n[bold cyan]YouTube Summarizer Statistics[/bold cyan]")
|
|
console.print(f"[yellow]Total Summaries:[/yellow] {total_summaries}")
|
|
console.print(f"[yellow]Recent (7 days):[/yellow] {recent_count}")
|
|
console.print(f"[yellow]Average Quality:[/yellow] {avg_quality:.2f}" if avg_quality else "[yellow]Average Quality:[/yellow] N/A")
|
|
|
|
if model_stats:
|
|
console.print("\n[bold]Model Distribution:[/bold]")
|
|
for model, count in model_stats:
|
|
console.print(f" • {model or 'Unknown'}: {count}")
|
|
|
|
if storage_stats:
|
|
console.print("\n[bold]Storage Statistics:[/bold]")
|
|
console.print(f" • Videos with summaries: {storage_stats['total_videos_with_summaries']}")
|
|
console.print(f" • Total storage: {storage_stats['total_size_mb']:.2f} MB")
|
|
|
|
|
|
@cli.command()
|
|
@click.option('--prompt', '-p', required=True, help='Custom prompt template')
|
|
@click.option('--name', '-n', required=True, help='Name for this prompt template')
|
|
@click.option('--description', '-d', help='Description of the prompt')
|
|
def save_prompt(prompt, name, description):
|
|
"""Save a custom prompt template for reuse"""
|
|
# Save to a JSON file for now
|
|
prompts_file = Path("prompts.json")
|
|
|
|
if prompts_file.exists():
|
|
with open(prompts_file, 'r') as f:
|
|
prompts = json.load(f)
|
|
else:
|
|
prompts = {}
|
|
|
|
prompts[name] = {
|
|
"prompt": prompt,
|
|
"description": description,
|
|
"created_at": datetime.now().isoformat()
|
|
}
|
|
|
|
with open(prompts_file, 'w') as f:
|
|
json.dump(prompts, f, indent=2)
|
|
|
|
console.print(f"[green]✓ Prompt template '{name}' saved successfully[/green]")
|
|
|
|
|
|
@cli.command()
|
|
def list_prompts():
|
|
"""List saved prompt templates"""
|
|
prompts_file = Path("prompts.json")
|
|
|
|
if not prompts_file.exists():
|
|
console.print("[yellow]No saved prompts found[/yellow]")
|
|
return
|
|
|
|
with open(prompts_file, 'r') as f:
|
|
prompts = json.load(f)
|
|
|
|
if not prompts:
|
|
console.print("[yellow]No saved prompts found[/yellow]")
|
|
return
|
|
|
|
table = Table(title="Saved Prompt Templates")
|
|
table.add_column("Name", style="cyan")
|
|
table.add_column("Description", style="green")
|
|
table.add_column("Created", style="magenta")
|
|
|
|
for name, data in prompts.items():
|
|
created = datetime.fromisoformat(data['created_at']).strftime("%Y-%m-%d %H:%M")
|
|
table.add_row(
|
|
name,
|
|
data.get('description', 'N/A'),
|
|
created
|
|
)
|
|
|
|
console.print(table)
|
|
|
|
|
|
@cli.command()
|
|
@click.argument('summary_id')
|
|
@click.option('--interactive', '-i', is_flag=True, help='Interactive refinement mode')
|
|
@click.option('--model', '-m',
|
|
type=click.Choice(['deepseek', 'anthropic', 'openai', 'gemini']),
|
|
help='Switch to different model')
|
|
def refine(summary_id, interactive, model):
|
|
"""Iteratively refine a summary until satisfied
|
|
|
|
This command allows you to refine an existing summary multiple times
|
|
with different instructions until you're satisfied with the result.
|
|
"""
|
|
manager = SummaryManager()
|
|
summary = manager.get_summary(summary_id)
|
|
|
|
if not summary:
|
|
console.print(f"[red]Summary not found: {summary_id}[/red]")
|
|
return
|
|
|
|
console.print(f"\n[bold cyan]Refining Summary[/bold cyan]")
|
|
console.print(f"[yellow]Video:[/yellow] {summary.video_title}")
|
|
console.print(f"[yellow]Current Model:[/yellow] {summary.model_used}")
|
|
|
|
# Display current summary
|
|
console.print(f"\n[bold]Current Summary:[/bold]")
|
|
if summary.summary:
|
|
console.print(summary.summary[:500] + "..." if len(summary.summary) > 500 else summary.summary)
|
|
|
|
# Store refinement history
|
|
refinement_history = []
|
|
current_model = model or summary.model_used or 'deepseek'
|
|
|
|
if interactive:
|
|
console.print("\n[cyan]Interactive Refinement Mode[/cyan]")
|
|
console.print("Enter refinement instructions or type 'done' to finish, 'undo' to revert last change")
|
|
|
|
while True:
|
|
console.print("\n" + "="*50)
|
|
instruction = Prompt.ask("\n[green]Refinement instruction[/green]")
|
|
|
|
if instruction.lower() == 'done':
|
|
console.print("[green]✓ Refinement complete![/green]")
|
|
break
|
|
|
|
if instruction.lower() == 'undo':
|
|
if refinement_history:
|
|
# Revert to previous version
|
|
previous = refinement_history.pop()
|
|
updates = {
|
|
"summary": previous['summary'],
|
|
"key_points": previous.get('key_points'),
|
|
"main_themes": previous.get('main_themes')
|
|
}
|
|
summary = manager.update_summary(summary_id, updates)
|
|
console.print("[yellow]✓ Reverted to previous version[/yellow]")
|
|
continue
|
|
else:
|
|
console.print("[yellow]No previous versions to revert to[/yellow]")
|
|
continue
|
|
|
|
# Save current state before refinement
|
|
refinement_history.append({
|
|
"summary": summary.summary,
|
|
"key_points": summary.key_points,
|
|
"main_themes": summary.main_themes
|
|
})
|
|
|
|
# Perform refinement
|
|
console.print(f"\n[cyan]Refining with: {instruction[:50]}...[/cyan]")
|
|
|
|
pipeline_cli = SummaryPipelineCLI(model=current_model)
|
|
|
|
# Create refinement prompt
|
|
refinement_prompt = f"""
|
|
Original summary:
|
|
{summary.summary}
|
|
|
|
Refinement instruction:
|
|
{instruction}
|
|
|
|
Please provide an improved summary based on the refinement instruction above.
|
|
Maintain the same structure and key information unless specifically asked to change.
|
|
"""
|
|
|
|
async def refine_summary():
|
|
try:
|
|
result = await pipeline_cli.process_video(
|
|
video_url=summary.video_url,
|
|
custom_prompt=refinement_prompt,
|
|
summary_length=summary.summary_length or 'standard',
|
|
focus_areas=summary.focus_areas
|
|
)
|
|
|
|
# Update summary
|
|
updates = {
|
|
"summary": result.get("summary", {}).get("content"),
|
|
"key_points": result.get("summary", {}).get("key_points"),
|
|
"main_themes": result.get("summary", {}).get("main_themes"),
|
|
"model_used": current_model
|
|
}
|
|
|
|
updated_summary = manager.update_summary(summary_id, updates)
|
|
return updated_summary
|
|
|
|
except Exception as e:
|
|
console.print(f"[red]Error during refinement: {e}[/red]")
|
|
return None
|
|
|
|
# Run refinement
|
|
updated = asyncio.run(refine_summary())
|
|
|
|
if updated:
|
|
summary = updated
|
|
console.print(f"\n[green]✓ Refinement applied![/green]")
|
|
console.print(f"\n[bold]Updated Summary:[/bold]")
|
|
preview = summary.summary[:500] + "..." if len(summary.summary) > 500 else summary.summary
|
|
console.print(preview)
|
|
|
|
# Ask for satisfaction
|
|
if not Confirm.ask("\n[yellow]Are you satisfied with this refinement?[/yellow]"):
|
|
console.print("[cyan]Let's continue refining...[/cyan]")
|
|
else:
|
|
console.print("[green]✓ Great! Summary refined successfully![/green]")
|
|
break
|
|
else:
|
|
# Non-interactive mode - single refinement
|
|
instruction = Prompt.ask("[green]Enter refinement instruction[/green]")
|
|
|
|
pipeline_cli = SummaryPipelineCLI(model=current_model)
|
|
|
|
refinement_prompt = f"""
|
|
Original summary:
|
|
{summary.summary}
|
|
|
|
Refinement instruction:
|
|
{instruction}
|
|
|
|
Please provide an improved summary based on the refinement instruction above.
|
|
"""
|
|
|
|
async def refine_summary():
|
|
try:
|
|
result = await pipeline_cli.process_video(
|
|
video_url=summary.video_url,
|
|
custom_prompt=refinement_prompt,
|
|
summary_length=summary.summary_length or 'standard',
|
|
focus_areas=summary.focus_areas
|
|
)
|
|
|
|
updates = {
|
|
"summary": result.get("summary", {}).get("content"),
|
|
"key_points": result.get("summary", {}).get("key_points"),
|
|
"main_themes": result.get("summary", {}).get("main_themes"),
|
|
"model_used": current_model
|
|
}
|
|
|
|
return manager.update_summary(summary_id, updates)
|
|
|
|
except Exception as e:
|
|
console.print(f"[red]Error: {e}[/red]")
|
|
return None
|
|
|
|
updated = asyncio.run(refine_summary())
|
|
|
|
if updated:
|
|
console.print(f"\n[green]✓ Summary refined successfully![/green]")
|
|
console.print(f"\n[bold]Updated Summary:[/bold]")
|
|
console.print(updated.summary)
|
|
|
|
|
|
@cli.command()
|
|
@click.option('--input-file', '-i', type=click.Path(exists=True), help='File containing list of URLs')
|
|
@click.option('--model', '-m', default='deepseek',
|
|
type=click.Choice(['deepseek', 'anthropic', 'openai', 'gemini']),
|
|
help='AI model to use')
|
|
@click.option('--prompt', '-p', help='Custom prompt for all videos')
|
|
@click.option('--length', '-l', default='standard',
|
|
type=click.Choice(['brief', 'standard', 'detailed']),
|
|
help='Summary length for all videos')
|
|
def batch(input_file, model, prompt, length):
|
|
"""Process multiple videos in batch
|
|
|
|
Provide a file with one YouTube URL per line to process multiple videos.
|
|
"""
|
|
if not input_file:
|
|
# Interactive mode - ask for URLs
|
|
console.print("[cyan]Enter YouTube URLs (one per line, empty line to finish):[/cyan]")
|
|
urls = []
|
|
while True:
|
|
url = input()
|
|
if not url:
|
|
break
|
|
urls.append(url.strip())
|
|
else:
|
|
# Read from file
|
|
with open(input_file, 'r') as f:
|
|
urls = [line.strip() for line in f if line.strip()]
|
|
|
|
if not urls:
|
|
console.print("[yellow]No URLs provided[/yellow]")
|
|
return
|
|
|
|
console.print(f"\n[cyan]Processing {len(urls)} videos with model: {model}[/cyan]")
|
|
|
|
pipeline_cli = SummaryPipelineCLI(model=model)
|
|
results = []
|
|
|
|
async def process_batch():
|
|
for i, url in enumerate(urls, 1):
|
|
console.print(f"\n[yellow]Processing {i}/{len(urls)}: {url}[/yellow]")
|
|
|
|
try:
|
|
result = await pipeline_cli.process_video(
|
|
video_url=url,
|
|
custom_prompt=prompt,
|
|
summary_length=length
|
|
)
|
|
|
|
# Save to database
|
|
summary_data = {
|
|
"video_id": result.get("video_id"),
|
|
"video_url": url,
|
|
"video_title": result.get("metadata", {}).get("title"),
|
|
"transcript": result.get("transcript"),
|
|
"summary": result.get("summary", {}).get("content"),
|
|
"key_points": result.get("summary", {}).get("key_points"),
|
|
"main_themes": result.get("summary", {}).get("main_themes"),
|
|
"model_used": model,
|
|
"processing_time": result.get("processing_time"),
|
|
"quality_score": result.get("quality_metrics", {}).get("overall_score"),
|
|
"summary_length": length
|
|
}
|
|
|
|
saved = pipeline_cli.summary_manager.save_summary(summary_data)
|
|
results.append({
|
|
"url": url,
|
|
"id": saved.id,
|
|
"title": saved.video_title,
|
|
"status": "success"
|
|
})
|
|
|
|
console.print(f"[green]✓ Processed: {saved.video_title}[/green]")
|
|
|
|
except Exception as e:
|
|
console.print(f"[red]✗ Failed: {e}[/red]")
|
|
results.append({
|
|
"url": url,
|
|
"status": "failed",
|
|
"error": str(e)
|
|
})
|
|
|
|
return results
|
|
|
|
# Run batch processing
|
|
final_results = asyncio.run(process_batch())
|
|
|
|
# Display summary
|
|
console.print("\n" + "="*50)
|
|
console.print("[bold cyan]Batch Processing Complete[/bold cyan]")
|
|
|
|
success_count = len([r for r in final_results if r['status'] == 'success'])
|
|
failed_count = len([r for r in final_results if r['status'] == 'failed'])
|
|
|
|
console.print(f"[green]Successful:[/green] {success_count}")
|
|
console.print(f"[red]Failed:[/red] {failed_count}")
|
|
|
|
# Save results to file
|
|
output_file = f"batch_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
|
with open(output_file, 'w') as f:
|
|
json.dump(final_results, f, indent=2)
|
|
|
|
console.print(f"\n[yellow]Results saved to:[/yellow] {output_file}")
|
|
|
|
|
|
@cli.command()
|
|
@click.argument('summary_id')
|
|
@click.argument('other_summary_id')
|
|
def compare(summary_id, other_summary_id):
|
|
"""Compare two different summaries
|
|
|
|
Useful for comparing summaries generated with different models or prompts.
|
|
"""
|
|
manager = SummaryManager()
|
|
summary1 = manager.get_summary(summary_id)
|
|
summary2 = manager.get_summary(other_summary_id)
|
|
|
|
if not summary1:
|
|
console.print(f"[red]Summary not found: {summary_id}[/red]")
|
|
return
|
|
|
|
if not summary2:
|
|
console.print(f"[red]Summary not found: {other_summary_id}[/red]")
|
|
return
|
|
|
|
# Create comparison table
|
|
table = Table(title="Summary Comparison", show_lines=True)
|
|
table.add_column("Attribute", style="cyan", width=20)
|
|
table.add_column(f"Summary 1\n{summary_id[:8]}...", style="green", width=40)
|
|
table.add_column(f"Summary 2\n{other_summary_id[:8]}...", style="yellow", width=40)
|
|
|
|
# Add comparison rows
|
|
table.add_row(
|
|
"Video",
|
|
summary1.video_title or "N/A",
|
|
summary2.video_title or "N/A"
|
|
)
|
|
|
|
table.add_row(
|
|
"Model",
|
|
summary1.model_used or "Unknown",
|
|
summary2.model_used or "Unknown"
|
|
)
|
|
|
|
table.add_row(
|
|
"Length",
|
|
summary1.summary_length or "standard",
|
|
summary2.summary_length or "standard"
|
|
)
|
|
|
|
table.add_row(
|
|
"Quality Score",
|
|
f"{summary1.quality_score:.2f}" if summary1.quality_score else "N/A",
|
|
f"{summary2.quality_score:.2f}" if summary2.quality_score else "N/A"
|
|
)
|
|
|
|
table.add_row(
|
|
"Processing Time",
|
|
f"{summary1.processing_time:.2f}s" if summary1.processing_time else "N/A",
|
|
f"{summary2.processing_time:.2f}s" if summary2.processing_time else "N/A"
|
|
)
|
|
|
|
table.add_row(
|
|
"Created",
|
|
summary1.created_at.strftime("%Y-%m-%d %H:%M") if summary1.created_at else "N/A",
|
|
summary2.created_at.strftime("%Y-%m-%d %H:%M") if summary2.created_at else "N/A"
|
|
)
|
|
|
|
# Add summary preview
|
|
def truncate_text(text, length=200):
|
|
if not text:
|
|
return "N/A"
|
|
return text[:length] + "..." if len(text) > length else text
|
|
|
|
table.add_row(
|
|
"Summary Preview",
|
|
truncate_text(summary1.summary),
|
|
truncate_text(summary2.summary)
|
|
)
|
|
|
|
console.print(table)
|
|
|
|
# Show key points comparison if available
|
|
if summary1.key_points or summary2.key_points:
|
|
console.print("\n[bold]Key Points Comparison:[/bold]")
|
|
|
|
points1 = summary1.key_points or []
|
|
points2 = summary2.key_points or []
|
|
max_points = max(len(points1), len(points2))
|
|
|
|
for i in range(max_points):
|
|
point1 = points1[i] if i < len(points1) else "[dim]N/A[/dim]"
|
|
point2 = points2[i] if i < len(points2) else "[dim]N/A[/dim]"
|
|
|
|
console.print(f"\n[cyan]Point {i+1}:[/cyan]")
|
|
console.print(f" [green]Summary 1:[/green] {point1}")
|
|
console.print(f" [yellow]Summary 2:[/yellow] {point2}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
cli() |