youtube-summarizer/backend/cli.py

1042 lines
40 KiB
Python

#!/usr/bin/env python3
"""YouTube Summarizer Backend CLI Tool
A command-line interface for managing YouTube video summaries.
Supports regenerating summaries, adding new ones, and using custom prompts.
"""
import asyncio
import json
import os
import sys
from pathlib import Path
from datetime import datetime
from typing import Optional, Dict, Any, List
import logging
import click
from rich.console import Console
from rich.table import Table
from rich.progress import Progress, SpinnerColumn, TextColumn
from rich.prompt import Prompt, Confirm
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, Session
# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))
from backend.core.config import settings
from backend.core.database_registry import registry
from backend.models import Summary, User
from backend.services.summary_pipeline import SummaryPipeline
from backend.services.video_service import VideoService
from backend.services.transcript_service import TranscriptService
from backend.services.anthropic_summarizer import AnthropicSummarizer
from backend.services.deepseek_summarizer import DeepSeekSummarizer
from backend.services.gemini_summarizer import GeminiSummarizer
from backend.services.openai_summarizer import OpenAISummarizer
from backend.services.cache_manager import CacheManager
from backend.services.notification_service import NotificationService
from backend.services.summary_storage import SummaryStorageService
from backend.models.pipeline import PipelineConfig
from backend.mermaid_renderer import MermaidRenderer, DiagramEnhancer, render_summary_diagrams
# Initialize Rich console
console = Console()
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class SummaryManager:
"""Manages database operations for summaries."""
def __init__(self):
self.engine = create_engine(settings.DATABASE_URL)
registry.create_all_tables(self.engine)
self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine)
self.storage_service = SummaryStorageService()
def get_session(self) -> Session:
"""Get a database session."""
return self.SessionLocal()
def list_summaries(self, limit: int = 10, user_id: Optional[str] = None) -> List[Summary]:
"""List recent summaries from database."""
with self.get_session() as session:
query = session.query(Summary)
if user_id:
query = query.filter_by(user_id=user_id)
return query.order_by(Summary.created_at.desc()).limit(limit).all()
def get_summary(self, summary_id: str) -> Optional[Summary]:
"""Get a specific summary by ID."""
with self.get_session() as session:
return session.query(Summary).filter_by(id=summary_id).first()
def get_summary_by_video(self, video_id: str) -> List[Summary]:
"""Get all summaries for a specific video."""
with self.get_session() as session:
return session.query(Summary).filter_by(video_id=video_id).all()
def save_summary(self, summary_data: Dict[str, Any]) -> Summary:
"""Save a new summary to database using unified storage service."""
from backend.services.database_storage_service import database_storage_service
return database_storage_service.save_summary_from_dict(summary_data)
def update_summary(self, summary_id: str, updates: Dict[str, Any]) -> Optional[Summary]:
"""Update an existing summary."""
with self.get_session() as session:
summary = session.query(Summary).filter_by(id=summary_id).first()
if summary:
for key, value in updates.items():
if hasattr(summary, key):
setattr(summary, key, value)
summary.updated_at = datetime.utcnow()
session.commit()
session.refresh(summary)
return summary
def delete_summary(self, summary_id: str) -> bool:
"""Delete a summary from database."""
with self.get_session() as session:
summary = session.query(Summary).filter_by(id=summary_id).first()
if summary:
session.delete(summary)
session.commit()
return True
return False
class SummaryPipelineCLI:
"""CLI wrapper for the summary pipeline."""
def __init__(self, model: str = "deepseek"):
self.model = model
self.setup_services()
def setup_services(self):
"""Initialize all required services."""
# Initialize services
self.video_service = VideoService()
self.transcript_service = TranscriptService()
self.cache_manager = CacheManager()
self.notification_service = NotificationService()
# Select AI service based on model
if self.model == "anthropic":
self.ai_service = AnthropicSummarizer()
elif self.model == "gemini":
self.ai_service = GeminiSummarizer()
elif self.model == "openai":
self.ai_service = OpenAISummarizer()
else:
self.ai_service = DeepSeekSummarizer()
# Initialize pipeline
self.pipeline = SummaryPipeline(
video_service=self.video_service,
transcript_service=self.transcript_service,
ai_service=self.ai_service,
cache_manager=self.cache_manager,
notification_service=self.notification_service
)
self.summary_manager = SummaryManager()
async def process_video(
self,
video_url: str,
custom_prompt: Optional[str] = None,
summary_length: str = "standard",
focus_areas: Optional[List[str]] = None,
include_diagrams: bool = False
) -> Dict[str, Any]:
"""Process a video through the summary pipeline."""
# Create configuration
config = PipelineConfig(
summary_length=summary_length,
focus_areas=focus_areas or [],
include_timestamps=True,
enable_notifications=False
)
# Build enhanced prompt with diagram instructions if needed
if custom_prompt:
final_prompt = custom_prompt
else:
final_prompt = self._build_default_prompt(include_diagrams)
# If custom prompt provided, modify the AI service configuration
if final_prompt:
self.ai_service.custom_prompt = final_prompt
# Start processing
job_id = await self.pipeline.process_video(video_url, config)
# Wait for completion with progress updates
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
console=console
) as progress:
task = progress.add_task("[cyan]Processing video...", total=None)
while True:
result = await self.pipeline.get_pipeline_result(job_id)
if result:
if result.status == "completed":
progress.update(task, description="[green]✓ Processing completed!")
return result.__dict__
elif result.status == "failed":
progress.update(task, description=f"[red]✗ Processing failed: {result.error}")
raise Exception(result.error)
else:
progress.update(task, description=f"[yellow]Processing: {result.status}")
await asyncio.sleep(1)
def _build_default_prompt(self, include_diagrams: bool) -> str:
"""Build default prompt with optional diagram instructions."""
base_prompt = """
Provide a comprehensive summary of this video content.
Include key points, main themes, and actionable insights.
"""
if include_diagrams:
diagram_prompt = """
IMPORTANT: Include relevant Mermaid diagrams where they would help visualize the content.
Consider creating diagrams for:
- Process flows or sequences discussed in the video
- Hierarchical structures or relationships
- Timeline of events or concepts
- Mind maps of main ideas and connections
- State diagrams for systems or workflows
- Entity relationship diagrams for data structures
Format diagrams as:
```mermaid
[diagram code]
```
Choose diagram types intelligently based on the content:
- Use flowchart for processes and decision trees
- Use sequence diagrams for interactions between components
- Use mind maps for conceptual relationships
- Use timeline for chronological information
- Use pie/bar charts for statistical data if mentioned
The AI should automatically determine if and when diagrams would enhance understanding.
"""
return base_prompt + diagram_prompt
return base_prompt
@click.group()
@click.option('--debug', is_flag=True, help='Enable debug logging')
def cli(debug):
"""YouTube Summarizer Backend CLI Tool"""
if debug:
logging.getLogger().setLevel(logging.DEBUG)
console.print("[yellow]Debug mode enabled[/yellow]")
@cli.command()
@click.option('--limit', '-l', default=10, help='Number of summaries to show')
@click.option('--user-id', '-u', help='Filter by user ID')
@click.option('--video-id', '-v', help='Filter by video ID')
def list(limit, user_id, video_id):
"""List existing summaries"""
manager = SummaryManager()
if video_id:
summaries = manager.get_summary_by_video(video_id)
else:
summaries = manager.list_summaries(limit=limit, user_id=user_id)
if not summaries:
console.print("[yellow]No summaries found[/yellow]")
return
# Create table
table = Table(title="YouTube Summaries")
table.add_column("ID", style="cyan", width=36)
table.add_column("Video Title", style="green", width=40)
table.add_column("Model", style="yellow")
table.add_column("Created", style="magenta")
table.add_column("Quality", style="blue")
for summary in summaries:
quality = f"{summary.quality_score:.2f}" if summary.quality_score else "N/A"
created = summary.created_at.strftime("%Y-%m-%d %H:%M")
title = summary.video_title[:37] + "..." if len(summary.video_title or "") > 40 else summary.video_title
table.add_row(
summary.id,
title or "Unknown",
summary.model_used or "Unknown",
created,
quality
)
console.print(table)
@cli.command()
@click.argument('summary_id')
@click.option('--export', '-e', is_flag=True, help='Export to JSON file')
@click.option('--render-diagrams', '-r', is_flag=True, help='Render Mermaid diagrams if present')
@click.option('--suggest-diagrams', '-s', is_flag=True, help='Suggest diagrams based on content')
def show(summary_id, export, render_diagrams, suggest_diagrams):
"""Show details of a specific summary"""
manager = SummaryManager()
summary = manager.get_summary(summary_id)
if not summary:
console.print(f"[red]Summary not found: {summary_id}[/red]")
return
# Display summary details
console.print(f"\n[bold cyan]Summary Details[/bold cyan]")
console.print(f"[yellow]ID:[/yellow] {summary.id}")
console.print(f"[yellow]Video:[/yellow] {summary.video_title}")
console.print(f"[yellow]URL:[/yellow] {summary.video_url}")
console.print(f"[yellow]Model:[/yellow] {summary.model_used}")
console.print(f"[yellow]Created:[/yellow] {summary.created_at}")
if summary.summary:
console.print(f"\n[bold green]Summary:[/bold green]")
console.print(summary.summary)
# Check for and render diagrams if requested
if render_diagrams and '```mermaid' in summary.summary:
console.print(f"\n[bold cyan]📊 Rendering Mermaid Diagrams...[/bold cyan]")
output_dir = f"diagrams/{summary.id}"
results = render_summary_diagrams(summary.summary, output_dir)
if results:
console.print(f"[green]✓ Rendered {len(results)} diagram(s) to {output_dir}[/green]")
if summary.key_points:
console.print(f"\n[bold green]Key Points:[/bold green]")
for point in summary.key_points:
console.print(f"{point}")
# Suggest diagrams if requested
if suggest_diagrams and summary.summary:
console.print(f"\n[bold cyan]📊 Diagram Suggestions:[/bold cyan]")
suggestions = DiagramEnhancer.suggest_diagrams(summary.summary)
if suggestions:
for suggestion in suggestions:
console.print(f"\n[yellow]Type:[/yellow] {suggestion['type']}")
console.print(f"[yellow]Reason:[/yellow] {suggestion['reason']}")
console.print(f"[dim]Template:[/dim]")
console.print(f"```mermaid\n{suggestion['template']}\n```")
else:
console.print("[dim]No specific diagram suggestions based on content[/dim]")
# Also create a summary structure diagram
if summary.key_points or summary.main_themes:
console.print(f"\n[yellow]Summary Structure Diagram:[/yellow]")
structure_diagram = DiagramEnhancer.create_summary_structure_diagram(
summary.key_points or [],
summary.main_themes or []
)
console.print(f"```mermaid\n{structure_diagram}\n```")
if export:
filename = f"summary_{summary.id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
export_data = {
"id": summary.id,
"video_id": summary.video_id,
"video_title": summary.video_title,
"video_url": summary.video_url,
"summary": summary.summary,
"key_points": summary.key_points,
"main_themes": summary.main_themes,
"model_used": summary.model_used,
"created_at": summary.created_at.isoformat() if summary.created_at else None
}
with open(filename, 'w') as f:
json.dump(export_data, f, indent=2)
console.print(f"\n[green]✓ Exported to {filename}[/green]")
@cli.command()
@click.argument('video_url')
@click.option('--model', '-m', default='deepseek',
type=click.Choice(['deepseek', 'anthropic', 'openai', 'gemini']),
help='AI model to use')
@click.option('--length', '-l', default='standard',
type=click.Choice(['brief', 'standard', 'detailed']),
help='Summary length')
@click.option('--prompt', '-p', help='Custom prompt for summarization')
@click.option('--focus', '-f', multiple=True, help='Focus areas (can specify multiple)')
@click.option('--diagrams', '-d', is_flag=True, help='Include Mermaid diagrams in summary')
def add(video_url, model, length, prompt, focus, diagrams):
"""Add a new video summary"""
console.print(f"\n[cyan]Adding new summary for: {video_url}[/cyan]")
if diagrams:
console.print("[yellow]📊 Diagram generation enabled[/yellow]")
# Initialize CLI with selected model
pipeline_cli = SummaryPipelineCLI(model=model)
# Run async processing
async def process():
try:
result = await pipeline_cli.process_video(
video_url=video_url,
custom_prompt=prompt,
summary_length=length,
focus_areas=list(focus) if focus else None,
include_diagrams=diagrams
)
# Save to database
summary_data = {
"video_id": result.get("video_id"),
"video_url": result.get("video_url"),
"video_title": result.get("metadata", {}).get("title"),
"transcript": result.get("transcript"),
"summary": result.get("summary", {}).get("content"),
"key_points": result.get("summary", {}).get("key_points"),
"main_themes": result.get("summary", {}).get("main_themes"),
"model_used": model,
"processing_time": result.get("processing_time"),
"quality_score": result.get("quality_metrics", {}).get("overall_score"),
"summary_length": length,
"focus_areas": list(focus) if focus else []
}
saved_summary = pipeline_cli.summary_manager.save_summary(summary_data)
console.print(f"\n[green]✓ Summary created successfully![/green]")
console.print(f"[yellow]Summary ID:[/yellow] {saved_summary.id}")
console.print(f"[yellow]Video Title:[/yellow] {saved_summary.video_title}")
# Display summary preview
if saved_summary.summary:
console.print(f"\n[bold]Summary Preview:[/bold]")
preview = saved_summary.summary[:500] + "..." if len(saved_summary.summary) > 500 else saved_summary.summary
console.print(preview)
return saved_summary
except Exception as e:
console.print(f"[red]Error: {e}[/red]")
raise
# Run the async function
asyncio.run(process())
@cli.command()
@click.argument('summary_id')
@click.option('--model', '-m', default='deepseek',
type=click.Choice(['deepseek', 'anthropic', 'openai', 'gemini']),
help='AI model to use')
@click.option('--prompt', '-p', help='Custom prompt for regeneration')
@click.option('--length', '-l',
type=click.Choice(['brief', 'standard', 'detailed']),
help='New summary length')
@click.option('--diagrams', '-d', is_flag=True, help='Include Mermaid diagrams in summary')
def regenerate(summary_id, model, prompt, length, diagrams):
"""Regenerate an existing summary"""
manager = SummaryManager()
summary = manager.get_summary(summary_id)
if not summary:
console.print(f"[red]Summary not found: {summary_id}[/red]")
return
console.print(f"\n[cyan]Regenerating summary for: {summary.video_title}[/cyan]")
console.print(f"[yellow]Original model:[/yellow] {summary.model_used}")
console.print(f"[yellow]New model:[/yellow] {model}")
if diagrams:
console.print("[yellow]📊 Diagram generation enabled[/yellow]")
if not Confirm.ask("Continue with regeneration?"):
console.print("[yellow]Regeneration cancelled[/yellow]")
return
# Initialize CLI with selected model
pipeline_cli = SummaryPipelineCLI(model=model)
# Run async processing
async def process():
try:
result = await pipeline_cli.process_video(
video_url=summary.video_url,
custom_prompt=prompt,
summary_length=length or summary.summary_length or 'standard',
focus_areas=summary.focus_areas,
include_diagrams=diagrams
)
# Update existing summary
updates = {
"summary": result.get("summary", {}).get("content"),
"key_points": result.get("summary", {}).get("key_points"),
"main_themes": result.get("summary", {}).get("main_themes"),
"model_used": model,
"processing_time": result.get("processing_time"),
"quality_score": result.get("quality_metrics", {}).get("overall_score"),
}
if length:
updates["summary_length"] = length
updated_summary = manager.update_summary(summary_id, updates)
console.print(f"\n[green]✓ Summary regenerated successfully![/green]")
# Display new summary preview
if updated_summary.summary:
console.print(f"\n[bold]New Summary Preview:[/bold]")
preview = updated_summary.summary[:500] + "..." if len(updated_summary.summary) > 500 else updated_summary.summary
console.print(preview)
return updated_summary
except Exception as e:
console.print(f"[red]Error: {e}[/red]")
raise
# Run the async function
asyncio.run(process())
@cli.command()
@click.argument('summary_id')
def delete(summary_id):
"""Delete a summary"""
manager = SummaryManager()
summary = manager.get_summary(summary_id)
if not summary:
console.print(f"[red]Summary not found: {summary_id}[/red]")
return
console.print(f"\n[yellow]Summary to delete:[/yellow]")
console.print(f" ID: {summary.id}")
console.print(f" Video: {summary.video_title}")
console.print(f" Created: {summary.created_at}")
if Confirm.ask("\n[red]Are you sure you want to delete this summary?[/red]"):
if manager.delete_summary(summary_id):
console.print(f"[green]✓ Summary deleted successfully[/green]")
else:
console.print(f"[red]Failed to delete summary[/red]")
else:
console.print("[yellow]Deletion cancelled[/yellow]")
@cli.command()
@click.option('--days', '-d', default=30, help='Days to keep summaries')
@click.option('--dry-run', is_flag=True, help='Show what would be deleted without deleting')
def cleanup(days, dry_run):
"""Clean up old summaries"""
manager = SummaryManager()
cutoff_date = datetime.utcnow() - timedelta(days=days)
with manager.get_session() as session:
old_summaries = session.query(Summary).filter(
Summary.created_at < cutoff_date
).all()
if not old_summaries:
console.print(f"[green]No summaries older than {days} days found[/green]")
return
console.print(f"\n[yellow]Found {len(old_summaries)} summaries older than {days} days:[/yellow]")
for summary in old_summaries:
console.print(f"{summary.id}: {summary.video_title} (created {summary.created_at})")
if dry_run:
console.print("\n[cyan]Dry run - no summaries deleted[/cyan]")
else:
if Confirm.ask(f"\n[red]Delete {len(old_summaries)} summaries?[/red]"):
for summary in old_summaries:
session.delete(summary)
session.commit()
console.print(f"[green]✓ Deleted {len(old_summaries)} summaries[/green]")
else:
console.print("[yellow]Cleanup cancelled[/yellow]")
@cli.command()
def stats():
"""Show summary statistics"""
manager = SummaryManager()
storage_stats = manager.storage_service.get_summary_stats()
with manager.get_session() as session:
total_summaries = session.query(Summary).count()
# Get model distribution
from sqlalchemy import func
model_stats = session.query(
Summary.model_used,
func.count(Summary.id)
).group_by(Summary.model_used).all()
# Get average quality score
avg_quality = session.query(func.avg(Summary.quality_score)).scalar()
# Get recent activity
recent_date = datetime.utcnow() - timedelta(days=7)
recent_count = session.query(Summary).filter(
Summary.created_at >= recent_date
).count()
# Display statistics
console.print("\n[bold cyan]YouTube Summarizer Statistics[/bold cyan]")
console.print(f"[yellow]Total Summaries:[/yellow] {total_summaries}")
console.print(f"[yellow]Recent (7 days):[/yellow] {recent_count}")
console.print(f"[yellow]Average Quality:[/yellow] {avg_quality:.2f}" if avg_quality else "[yellow]Average Quality:[/yellow] N/A")
if model_stats:
console.print("\n[bold]Model Distribution:[/bold]")
for model, count in model_stats:
console.print(f"{model or 'Unknown'}: {count}")
if storage_stats:
console.print("\n[bold]Storage Statistics:[/bold]")
console.print(f" • Videos with summaries: {storage_stats['total_videos_with_summaries']}")
console.print(f" • Total storage: {storage_stats['total_size_mb']:.2f} MB")
@cli.command()
@click.option('--prompt', '-p', required=True, help='Custom prompt template')
@click.option('--name', '-n', required=True, help='Name for this prompt template')
@click.option('--description', '-d', help='Description of the prompt')
def save_prompt(prompt, name, description):
"""Save a custom prompt template for reuse"""
# Save to a JSON file for now
prompts_file = Path("prompts.json")
if prompts_file.exists():
with open(prompts_file, 'r') as f:
prompts = json.load(f)
else:
prompts = {}
prompts[name] = {
"prompt": prompt,
"description": description,
"created_at": datetime.now().isoformat()
}
with open(prompts_file, 'w') as f:
json.dump(prompts, f, indent=2)
console.print(f"[green]✓ Prompt template '{name}' saved successfully[/green]")
@cli.command()
def list_prompts():
"""List saved prompt templates"""
prompts_file = Path("prompts.json")
if not prompts_file.exists():
console.print("[yellow]No saved prompts found[/yellow]")
return
with open(prompts_file, 'r') as f:
prompts = json.load(f)
if not prompts:
console.print("[yellow]No saved prompts found[/yellow]")
return
table = Table(title="Saved Prompt Templates")
table.add_column("Name", style="cyan")
table.add_column("Description", style="green")
table.add_column("Created", style="magenta")
for name, data in prompts.items():
created = datetime.fromisoformat(data['created_at']).strftime("%Y-%m-%d %H:%M")
table.add_row(
name,
data.get('description', 'N/A'),
created
)
console.print(table)
@cli.command()
@click.argument('summary_id')
@click.option('--interactive', '-i', is_flag=True, help='Interactive refinement mode')
@click.option('--model', '-m',
type=click.Choice(['deepseek', 'anthropic', 'openai', 'gemini']),
help='Switch to different model')
def refine(summary_id, interactive, model):
"""Iteratively refine a summary until satisfied
This command allows you to refine an existing summary multiple times
with different instructions until you're satisfied with the result.
"""
manager = SummaryManager()
summary = manager.get_summary(summary_id)
if not summary:
console.print(f"[red]Summary not found: {summary_id}[/red]")
return
console.print(f"\n[bold cyan]Refining Summary[/bold cyan]")
console.print(f"[yellow]Video:[/yellow] {summary.video_title}")
console.print(f"[yellow]Current Model:[/yellow] {summary.model_used}")
# Display current summary
console.print(f"\n[bold]Current Summary:[/bold]")
if summary.summary:
console.print(summary.summary[:500] + "..." if len(summary.summary) > 500 else summary.summary)
# Store refinement history
refinement_history = []
current_model = model or summary.model_used or 'deepseek'
if interactive:
console.print("\n[cyan]Interactive Refinement Mode[/cyan]")
console.print("Enter refinement instructions or type 'done' to finish, 'undo' to revert last change")
while True:
console.print("\n" + "="*50)
instruction = Prompt.ask("\n[green]Refinement instruction[/green]")
if instruction.lower() == 'done':
console.print("[green]✓ Refinement complete![/green]")
break
if instruction.lower() == 'undo':
if refinement_history:
# Revert to previous version
previous = refinement_history.pop()
updates = {
"summary": previous['summary'],
"key_points": previous.get('key_points'),
"main_themes": previous.get('main_themes')
}
summary = manager.update_summary(summary_id, updates)
console.print("[yellow]✓ Reverted to previous version[/yellow]")
continue
else:
console.print("[yellow]No previous versions to revert to[/yellow]")
continue
# Save current state before refinement
refinement_history.append({
"summary": summary.summary,
"key_points": summary.key_points,
"main_themes": summary.main_themes
})
# Perform refinement
console.print(f"\n[cyan]Refining with: {instruction[:50]}...[/cyan]")
pipeline_cli = SummaryPipelineCLI(model=current_model)
# Create refinement prompt
refinement_prompt = f"""
Original summary:
{summary.summary}
Refinement instruction:
{instruction}
Please provide an improved summary based on the refinement instruction above.
Maintain the same structure and key information unless specifically asked to change.
"""
async def refine_summary():
try:
result = await pipeline_cli.process_video(
video_url=summary.video_url,
custom_prompt=refinement_prompt,
summary_length=summary.summary_length or 'standard',
focus_areas=summary.focus_areas
)
# Update summary
updates = {
"summary": result.get("summary", {}).get("content"),
"key_points": result.get("summary", {}).get("key_points"),
"main_themes": result.get("summary", {}).get("main_themes"),
"model_used": current_model
}
updated_summary = manager.update_summary(summary_id, updates)
return updated_summary
except Exception as e:
console.print(f"[red]Error during refinement: {e}[/red]")
return None
# Run refinement
updated = asyncio.run(refine_summary())
if updated:
summary = updated
console.print(f"\n[green]✓ Refinement applied![/green]")
console.print(f"\n[bold]Updated Summary:[/bold]")
preview = summary.summary[:500] + "..." if len(summary.summary) > 500 else summary.summary
console.print(preview)
# Ask for satisfaction
if not Confirm.ask("\n[yellow]Are you satisfied with this refinement?[/yellow]"):
console.print("[cyan]Let's continue refining...[/cyan]")
else:
console.print("[green]✓ Great! Summary refined successfully![/green]")
break
else:
# Non-interactive mode - single refinement
instruction = Prompt.ask("[green]Enter refinement instruction[/green]")
pipeline_cli = SummaryPipelineCLI(model=current_model)
refinement_prompt = f"""
Original summary:
{summary.summary}
Refinement instruction:
{instruction}
Please provide an improved summary based on the refinement instruction above.
"""
async def refine_summary():
try:
result = await pipeline_cli.process_video(
video_url=summary.video_url,
custom_prompt=refinement_prompt,
summary_length=summary.summary_length or 'standard',
focus_areas=summary.focus_areas
)
updates = {
"summary": result.get("summary", {}).get("content"),
"key_points": result.get("summary", {}).get("key_points"),
"main_themes": result.get("summary", {}).get("main_themes"),
"model_used": current_model
}
return manager.update_summary(summary_id, updates)
except Exception as e:
console.print(f"[red]Error: {e}[/red]")
return None
updated = asyncio.run(refine_summary())
if updated:
console.print(f"\n[green]✓ Summary refined successfully![/green]")
console.print(f"\n[bold]Updated Summary:[/bold]")
console.print(updated.summary)
@cli.command()
@click.option('--input-file', '-i', type=click.Path(exists=True), help='File containing list of URLs')
@click.option('--model', '-m', default='deepseek',
type=click.Choice(['deepseek', 'anthropic', 'openai', 'gemini']),
help='AI model to use')
@click.option('--prompt', '-p', help='Custom prompt for all videos')
@click.option('--length', '-l', default='standard',
type=click.Choice(['brief', 'standard', 'detailed']),
help='Summary length for all videos')
def batch(input_file, model, prompt, length):
"""Process multiple videos in batch
Provide a file with one YouTube URL per line to process multiple videos.
"""
if not input_file:
# Interactive mode - ask for URLs
console.print("[cyan]Enter YouTube URLs (one per line, empty line to finish):[/cyan]")
urls = []
while True:
url = input()
if not url:
break
urls.append(url.strip())
else:
# Read from file
with open(input_file, 'r') as f:
urls = [line.strip() for line in f if line.strip()]
if not urls:
console.print("[yellow]No URLs provided[/yellow]")
return
console.print(f"\n[cyan]Processing {len(urls)} videos with model: {model}[/cyan]")
pipeline_cli = SummaryPipelineCLI(model=model)
results = []
async def process_batch():
for i, url in enumerate(urls, 1):
console.print(f"\n[yellow]Processing {i}/{len(urls)}: {url}[/yellow]")
try:
result = await pipeline_cli.process_video(
video_url=url,
custom_prompt=prompt,
summary_length=length
)
# Save to database
summary_data = {
"video_id": result.get("video_id"),
"video_url": url,
"video_title": result.get("metadata", {}).get("title"),
"transcript": result.get("transcript"),
"summary": result.get("summary", {}).get("content"),
"key_points": result.get("summary", {}).get("key_points"),
"main_themes": result.get("summary", {}).get("main_themes"),
"model_used": model,
"processing_time": result.get("processing_time"),
"quality_score": result.get("quality_metrics", {}).get("overall_score"),
"summary_length": length
}
saved = pipeline_cli.summary_manager.save_summary(summary_data)
results.append({
"url": url,
"id": saved.id,
"title": saved.video_title,
"status": "success"
})
console.print(f"[green]✓ Processed: {saved.video_title}[/green]")
except Exception as e:
console.print(f"[red]✗ Failed: {e}[/red]")
results.append({
"url": url,
"status": "failed",
"error": str(e)
})
return results
# Run batch processing
final_results = asyncio.run(process_batch())
# Display summary
console.print("\n" + "="*50)
console.print("[bold cyan]Batch Processing Complete[/bold cyan]")
success_count = len([r for r in final_results if r['status'] == 'success'])
failed_count = len([r for r in final_results if r['status'] == 'failed'])
console.print(f"[green]Successful:[/green] {success_count}")
console.print(f"[red]Failed:[/red] {failed_count}")
# Save results to file
output_file = f"batch_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(output_file, 'w') as f:
json.dump(final_results, f, indent=2)
console.print(f"\n[yellow]Results saved to:[/yellow] {output_file}")
@cli.command()
@click.argument('summary_id')
@click.argument('other_summary_id')
def compare(summary_id, other_summary_id):
"""Compare two different summaries
Useful for comparing summaries generated with different models or prompts.
"""
manager = SummaryManager()
summary1 = manager.get_summary(summary_id)
summary2 = manager.get_summary(other_summary_id)
if not summary1:
console.print(f"[red]Summary not found: {summary_id}[/red]")
return
if not summary2:
console.print(f"[red]Summary not found: {other_summary_id}[/red]")
return
# Create comparison table
table = Table(title="Summary Comparison", show_lines=True)
table.add_column("Attribute", style="cyan", width=20)
table.add_column(f"Summary 1\n{summary_id[:8]}...", style="green", width=40)
table.add_column(f"Summary 2\n{other_summary_id[:8]}...", style="yellow", width=40)
# Add comparison rows
table.add_row(
"Video",
summary1.video_title or "N/A",
summary2.video_title or "N/A"
)
table.add_row(
"Model",
summary1.model_used or "Unknown",
summary2.model_used or "Unknown"
)
table.add_row(
"Length",
summary1.summary_length or "standard",
summary2.summary_length or "standard"
)
table.add_row(
"Quality Score",
f"{summary1.quality_score:.2f}" if summary1.quality_score else "N/A",
f"{summary2.quality_score:.2f}" if summary2.quality_score else "N/A"
)
table.add_row(
"Processing Time",
f"{summary1.processing_time:.2f}s" if summary1.processing_time else "N/A",
f"{summary2.processing_time:.2f}s" if summary2.processing_time else "N/A"
)
table.add_row(
"Created",
summary1.created_at.strftime("%Y-%m-%d %H:%M") if summary1.created_at else "N/A",
summary2.created_at.strftime("%Y-%m-%d %H:%M") if summary2.created_at else "N/A"
)
# Add summary preview
def truncate_text(text, length=200):
if not text:
return "N/A"
return text[:length] + "..." if len(text) > length else text
table.add_row(
"Summary Preview",
truncate_text(summary1.summary),
truncate_text(summary2.summary)
)
console.print(table)
# Show key points comparison if available
if summary1.key_points or summary2.key_points:
console.print("\n[bold]Key Points Comparison:[/bold]")
points1 = summary1.key_points or []
points2 = summary2.key_points or []
max_points = max(len(points1), len(points2))
for i in range(max_points):
point1 = points1[i] if i < len(points1) else "[dim]N/A[/dim]"
point2 = points2[i] if i < len(points2) else "[dim]N/A[/dim]"
console.print(f"\n[cyan]Point {i+1}:[/cyan]")
console.print(f" [green]Summary 1:[/green] {point1}")
console.print(f" [yellow]Summary 2:[/yellow] {point2}")
if __name__ == "__main__":
cli()