#!/usr/bin/env python3 """YouTube Summarizer Backend CLI Tool A command-line interface for managing YouTube video summaries. Supports regenerating summaries, adding new ones, and using custom prompts. """ import asyncio import json import os import sys from pathlib import Path from datetime import datetime from typing import Optional, Dict, Any, List import logging import click from rich.console import Console from rich.table import Table from rich.progress import Progress, SpinnerColumn, TextColumn from rich.prompt import Prompt, Confirm from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker, Session # Add parent directory to path for imports sys.path.append(str(Path(__file__).parent.parent)) from backend.core.config import settings from backend.core.database_registry import registry from backend.models import Summary, User from backend.services.summary_pipeline import SummaryPipeline from backend.services.video_service import VideoService from backend.services.transcript_service import TranscriptService from backend.services.anthropic_summarizer import AnthropicSummarizer from backend.services.deepseek_summarizer import DeepSeekSummarizer from backend.services.gemini_summarizer import GeminiSummarizer from backend.services.openai_summarizer import OpenAISummarizer from backend.services.cache_manager import CacheManager from backend.services.notification_service import NotificationService from backend.services.summary_storage import SummaryStorageService from backend.models.pipeline import PipelineConfig from backend.mermaid_renderer import MermaidRenderer, DiagramEnhancer, render_summary_diagrams # Initialize Rich console console = Console() logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class SummaryManager: """Manages database operations for summaries.""" def __init__(self): self.engine = create_engine(settings.DATABASE_URL) registry.create_all_tables(self.engine) self.SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self.engine) self.storage_service = SummaryStorageService() def get_session(self) -> Session: """Get a database session.""" return self.SessionLocal() def list_summaries(self, limit: int = 10, user_id: Optional[str] = None) -> List[Summary]: """List recent summaries from database.""" with self.get_session() as session: query = session.query(Summary) if user_id: query = query.filter_by(user_id=user_id) return query.order_by(Summary.created_at.desc()).limit(limit).all() def get_summary(self, summary_id: str) -> Optional[Summary]: """Get a specific summary by ID.""" with self.get_session() as session: return session.query(Summary).filter_by(id=summary_id).first() def get_summary_by_video(self, video_id: str) -> List[Summary]: """Get all summaries for a specific video.""" with self.get_session() as session: return session.query(Summary).filter_by(video_id=video_id).all() def save_summary(self, summary_data: Dict[str, Any]) -> Summary: """Save a new summary to database using unified storage service.""" from backend.services.database_storage_service import database_storage_service return database_storage_service.save_summary_from_dict(summary_data) def update_summary(self, summary_id: str, updates: Dict[str, Any]) -> Optional[Summary]: """Update an existing summary.""" with self.get_session() as session: summary = session.query(Summary).filter_by(id=summary_id).first() if summary: for key, value in updates.items(): if hasattr(summary, key): setattr(summary, key, value) summary.updated_at = datetime.utcnow() session.commit() session.refresh(summary) return summary def delete_summary(self, summary_id: str) -> bool: """Delete a summary from database.""" with self.get_session() as session: summary = session.query(Summary).filter_by(id=summary_id).first() if summary: session.delete(summary) session.commit() return True return False class SummaryPipelineCLI: """CLI wrapper for the summary pipeline.""" def __init__(self, model: str = "deepseek"): self.model = model self.setup_services() def setup_services(self): """Initialize all required services.""" # Initialize services self.video_service = VideoService() self.transcript_service = TranscriptService() self.cache_manager = CacheManager() self.notification_service = NotificationService() # Select AI service based on model if self.model == "anthropic": self.ai_service = AnthropicSummarizer() elif self.model == "gemini": self.ai_service = GeminiSummarizer() elif self.model == "openai": self.ai_service = OpenAISummarizer() else: self.ai_service = DeepSeekSummarizer() # Initialize pipeline self.pipeline = SummaryPipeline( video_service=self.video_service, transcript_service=self.transcript_service, ai_service=self.ai_service, cache_manager=self.cache_manager, notification_service=self.notification_service ) self.summary_manager = SummaryManager() async def process_video( self, video_url: str, custom_prompt: Optional[str] = None, summary_length: str = "standard", focus_areas: Optional[List[str]] = None, include_diagrams: bool = False ) -> Dict[str, Any]: """Process a video through the summary pipeline.""" # Create configuration config = PipelineConfig( summary_length=summary_length, focus_areas=focus_areas or [], include_timestamps=True, enable_notifications=False ) # Build enhanced prompt with diagram instructions if needed if custom_prompt: final_prompt = custom_prompt else: final_prompt = self._build_default_prompt(include_diagrams) # If custom prompt provided, modify the AI service configuration if final_prompt: self.ai_service.custom_prompt = final_prompt # Start processing job_id = await self.pipeline.process_video(video_url, config) # Wait for completion with progress updates with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), console=console ) as progress: task = progress.add_task("[cyan]Processing video...", total=None) while True: result = await self.pipeline.get_pipeline_result(job_id) if result: if result.status == "completed": progress.update(task, description="[green]✓ Processing completed!") return result.__dict__ elif result.status == "failed": progress.update(task, description=f"[red]✗ Processing failed: {result.error}") raise Exception(result.error) else: progress.update(task, description=f"[yellow]Processing: {result.status}") await asyncio.sleep(1) def _build_default_prompt(self, include_diagrams: bool) -> str: """Build default prompt with optional diagram instructions.""" base_prompt = """ Provide a comprehensive summary of this video content. Include key points, main themes, and actionable insights. """ if include_diagrams: diagram_prompt = """ IMPORTANT: Include relevant Mermaid diagrams where they would help visualize the content. Consider creating diagrams for: - Process flows or sequences discussed in the video - Hierarchical structures or relationships - Timeline of events or concepts - Mind maps of main ideas and connections - State diagrams for systems or workflows - Entity relationship diagrams for data structures Format diagrams as: ```mermaid [diagram code] ``` Choose diagram types intelligently based on the content: - Use flowchart for processes and decision trees - Use sequence diagrams for interactions between components - Use mind maps for conceptual relationships - Use timeline for chronological information - Use pie/bar charts for statistical data if mentioned The AI should automatically determine if and when diagrams would enhance understanding. """ return base_prompt + diagram_prompt return base_prompt @click.group() @click.option('--debug', is_flag=True, help='Enable debug logging') def cli(debug): """YouTube Summarizer Backend CLI Tool""" if debug: logging.getLogger().setLevel(logging.DEBUG) console.print("[yellow]Debug mode enabled[/yellow]") @cli.command() @click.option('--limit', '-l', default=10, help='Number of summaries to show') @click.option('--user-id', '-u', help='Filter by user ID') @click.option('--video-id', '-v', help='Filter by video ID') def list(limit, user_id, video_id): """List existing summaries""" manager = SummaryManager() if video_id: summaries = manager.get_summary_by_video(video_id) else: summaries = manager.list_summaries(limit=limit, user_id=user_id) if not summaries: console.print("[yellow]No summaries found[/yellow]") return # Create table table = Table(title="YouTube Summaries") table.add_column("ID", style="cyan", width=36) table.add_column("Video Title", style="green", width=40) table.add_column("Model", style="yellow") table.add_column("Created", style="magenta") table.add_column("Quality", style="blue") for summary in summaries: quality = f"{summary.quality_score:.2f}" if summary.quality_score else "N/A" created = summary.created_at.strftime("%Y-%m-%d %H:%M") title = summary.video_title[:37] + "..." if len(summary.video_title or "") > 40 else summary.video_title table.add_row( summary.id, title or "Unknown", summary.model_used or "Unknown", created, quality ) console.print(table) @cli.command() @click.argument('summary_id') @click.option('--export', '-e', is_flag=True, help='Export to JSON file') @click.option('--render-diagrams', '-r', is_flag=True, help='Render Mermaid diagrams if present') @click.option('--suggest-diagrams', '-s', is_flag=True, help='Suggest diagrams based on content') def show(summary_id, export, render_diagrams, suggest_diagrams): """Show details of a specific summary""" manager = SummaryManager() summary = manager.get_summary(summary_id) if not summary: console.print(f"[red]Summary not found: {summary_id}[/red]") return # Display summary details console.print(f"\n[bold cyan]Summary Details[/bold cyan]") console.print(f"[yellow]ID:[/yellow] {summary.id}") console.print(f"[yellow]Video:[/yellow] {summary.video_title}") console.print(f"[yellow]URL:[/yellow] {summary.video_url}") console.print(f"[yellow]Model:[/yellow] {summary.model_used}") console.print(f"[yellow]Created:[/yellow] {summary.created_at}") if summary.summary: console.print(f"\n[bold green]Summary:[/bold green]") console.print(summary.summary) # Check for and render diagrams if requested if render_diagrams and '```mermaid' in summary.summary: console.print(f"\n[bold cyan]📊 Rendering Mermaid Diagrams...[/bold cyan]") output_dir = f"diagrams/{summary.id}" results = render_summary_diagrams(summary.summary, output_dir) if results: console.print(f"[green]✓ Rendered {len(results)} diagram(s) to {output_dir}[/green]") if summary.key_points: console.print(f"\n[bold green]Key Points:[/bold green]") for point in summary.key_points: console.print(f" • {point}") # Suggest diagrams if requested if suggest_diagrams and summary.summary: console.print(f"\n[bold cyan]📊 Diagram Suggestions:[/bold cyan]") suggestions = DiagramEnhancer.suggest_diagrams(summary.summary) if suggestions: for suggestion in suggestions: console.print(f"\n[yellow]Type:[/yellow] {suggestion['type']}") console.print(f"[yellow]Reason:[/yellow] {suggestion['reason']}") console.print(f"[dim]Template:[/dim]") console.print(f"```mermaid\n{suggestion['template']}\n```") else: console.print("[dim]No specific diagram suggestions based on content[/dim]") # Also create a summary structure diagram if summary.key_points or summary.main_themes: console.print(f"\n[yellow]Summary Structure Diagram:[/yellow]") structure_diagram = DiagramEnhancer.create_summary_structure_diagram( summary.key_points or [], summary.main_themes or [] ) console.print(f"```mermaid\n{structure_diagram}\n```") if export: filename = f"summary_{summary.id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" export_data = { "id": summary.id, "video_id": summary.video_id, "video_title": summary.video_title, "video_url": summary.video_url, "summary": summary.summary, "key_points": summary.key_points, "main_themes": summary.main_themes, "model_used": summary.model_used, "created_at": summary.created_at.isoformat() if summary.created_at else None } with open(filename, 'w') as f: json.dump(export_data, f, indent=2) console.print(f"\n[green]✓ Exported to {filename}[/green]") @cli.command() @click.argument('video_url') @click.option('--model', '-m', default='deepseek', type=click.Choice(['deepseek', 'anthropic', 'openai', 'gemini']), help='AI model to use') @click.option('--length', '-l', default='standard', type=click.Choice(['brief', 'standard', 'detailed']), help='Summary length') @click.option('--prompt', '-p', help='Custom prompt for summarization') @click.option('--focus', '-f', multiple=True, help='Focus areas (can specify multiple)') @click.option('--diagrams', '-d', is_flag=True, help='Include Mermaid diagrams in summary') def add(video_url, model, length, prompt, focus, diagrams): """Add a new video summary""" console.print(f"\n[cyan]Adding new summary for: {video_url}[/cyan]") if diagrams: console.print("[yellow]📊 Diagram generation enabled[/yellow]") # Initialize CLI with selected model pipeline_cli = SummaryPipelineCLI(model=model) # Run async processing async def process(): try: result = await pipeline_cli.process_video( video_url=video_url, custom_prompt=prompt, summary_length=length, focus_areas=list(focus) if focus else None, include_diagrams=diagrams ) # Save to database summary_data = { "video_id": result.get("video_id"), "video_url": result.get("video_url"), "video_title": result.get("metadata", {}).get("title"), "transcript": result.get("transcript"), "summary": result.get("summary", {}).get("content"), "key_points": result.get("summary", {}).get("key_points"), "main_themes": result.get("summary", {}).get("main_themes"), "model_used": model, "processing_time": result.get("processing_time"), "quality_score": result.get("quality_metrics", {}).get("overall_score"), "summary_length": length, "focus_areas": list(focus) if focus else [] } saved_summary = pipeline_cli.summary_manager.save_summary(summary_data) console.print(f"\n[green]✓ Summary created successfully![/green]") console.print(f"[yellow]Summary ID:[/yellow] {saved_summary.id}") console.print(f"[yellow]Video Title:[/yellow] {saved_summary.video_title}") # Display summary preview if saved_summary.summary: console.print(f"\n[bold]Summary Preview:[/bold]") preview = saved_summary.summary[:500] + "..." if len(saved_summary.summary) > 500 else saved_summary.summary console.print(preview) return saved_summary except Exception as e: console.print(f"[red]Error: {e}[/red]") raise # Run the async function asyncio.run(process()) @cli.command() @click.argument('summary_id') @click.option('--model', '-m', default='deepseek', type=click.Choice(['deepseek', 'anthropic', 'openai', 'gemini']), help='AI model to use') @click.option('--prompt', '-p', help='Custom prompt for regeneration') @click.option('--length', '-l', type=click.Choice(['brief', 'standard', 'detailed']), help='New summary length') @click.option('--diagrams', '-d', is_flag=True, help='Include Mermaid diagrams in summary') def regenerate(summary_id, model, prompt, length, diagrams): """Regenerate an existing summary""" manager = SummaryManager() summary = manager.get_summary(summary_id) if not summary: console.print(f"[red]Summary not found: {summary_id}[/red]") return console.print(f"\n[cyan]Regenerating summary for: {summary.video_title}[/cyan]") console.print(f"[yellow]Original model:[/yellow] {summary.model_used}") console.print(f"[yellow]New model:[/yellow] {model}") if diagrams: console.print("[yellow]📊 Diagram generation enabled[/yellow]") if not Confirm.ask("Continue with regeneration?"): console.print("[yellow]Regeneration cancelled[/yellow]") return # Initialize CLI with selected model pipeline_cli = SummaryPipelineCLI(model=model) # Run async processing async def process(): try: result = await pipeline_cli.process_video( video_url=summary.video_url, custom_prompt=prompt, summary_length=length or summary.summary_length or 'standard', focus_areas=summary.focus_areas, include_diagrams=diagrams ) # Update existing summary updates = { "summary": result.get("summary", {}).get("content"), "key_points": result.get("summary", {}).get("key_points"), "main_themes": result.get("summary", {}).get("main_themes"), "model_used": model, "processing_time": result.get("processing_time"), "quality_score": result.get("quality_metrics", {}).get("overall_score"), } if length: updates["summary_length"] = length updated_summary = manager.update_summary(summary_id, updates) console.print(f"\n[green]✓ Summary regenerated successfully![/green]") # Display new summary preview if updated_summary.summary: console.print(f"\n[bold]New Summary Preview:[/bold]") preview = updated_summary.summary[:500] + "..." if len(updated_summary.summary) > 500 else updated_summary.summary console.print(preview) return updated_summary except Exception as e: console.print(f"[red]Error: {e}[/red]") raise # Run the async function asyncio.run(process()) @cli.command() @click.argument('summary_id') def delete(summary_id): """Delete a summary""" manager = SummaryManager() summary = manager.get_summary(summary_id) if not summary: console.print(f"[red]Summary not found: {summary_id}[/red]") return console.print(f"\n[yellow]Summary to delete:[/yellow]") console.print(f" ID: {summary.id}") console.print(f" Video: {summary.video_title}") console.print(f" Created: {summary.created_at}") if Confirm.ask("\n[red]Are you sure you want to delete this summary?[/red]"): if manager.delete_summary(summary_id): console.print(f"[green]✓ Summary deleted successfully[/green]") else: console.print(f"[red]Failed to delete summary[/red]") else: console.print("[yellow]Deletion cancelled[/yellow]") @cli.command() @click.option('--days', '-d', default=30, help='Days to keep summaries') @click.option('--dry-run', is_flag=True, help='Show what would be deleted without deleting') def cleanup(days, dry_run): """Clean up old summaries""" manager = SummaryManager() cutoff_date = datetime.utcnow() - timedelta(days=days) with manager.get_session() as session: old_summaries = session.query(Summary).filter( Summary.created_at < cutoff_date ).all() if not old_summaries: console.print(f"[green]No summaries older than {days} days found[/green]") return console.print(f"\n[yellow]Found {len(old_summaries)} summaries older than {days} days:[/yellow]") for summary in old_summaries: console.print(f" • {summary.id}: {summary.video_title} (created {summary.created_at})") if dry_run: console.print("\n[cyan]Dry run - no summaries deleted[/cyan]") else: if Confirm.ask(f"\n[red]Delete {len(old_summaries)} summaries?[/red]"): for summary in old_summaries: session.delete(summary) session.commit() console.print(f"[green]✓ Deleted {len(old_summaries)} summaries[/green]") else: console.print("[yellow]Cleanup cancelled[/yellow]") @cli.command() def stats(): """Show summary statistics""" manager = SummaryManager() storage_stats = manager.storage_service.get_summary_stats() with manager.get_session() as session: total_summaries = session.query(Summary).count() # Get model distribution from sqlalchemy import func model_stats = session.query( Summary.model_used, func.count(Summary.id) ).group_by(Summary.model_used).all() # Get average quality score avg_quality = session.query(func.avg(Summary.quality_score)).scalar() # Get recent activity recent_date = datetime.utcnow() - timedelta(days=7) recent_count = session.query(Summary).filter( Summary.created_at >= recent_date ).count() # Display statistics console.print("\n[bold cyan]YouTube Summarizer Statistics[/bold cyan]") console.print(f"[yellow]Total Summaries:[/yellow] {total_summaries}") console.print(f"[yellow]Recent (7 days):[/yellow] {recent_count}") console.print(f"[yellow]Average Quality:[/yellow] {avg_quality:.2f}" if avg_quality else "[yellow]Average Quality:[/yellow] N/A") if model_stats: console.print("\n[bold]Model Distribution:[/bold]") for model, count in model_stats: console.print(f" • {model or 'Unknown'}: {count}") if storage_stats: console.print("\n[bold]Storage Statistics:[/bold]") console.print(f" • Videos with summaries: {storage_stats['total_videos_with_summaries']}") console.print(f" • Total storage: {storage_stats['total_size_mb']:.2f} MB") @cli.command() @click.option('--prompt', '-p', required=True, help='Custom prompt template') @click.option('--name', '-n', required=True, help='Name for this prompt template') @click.option('--description', '-d', help='Description of the prompt') def save_prompt(prompt, name, description): """Save a custom prompt template for reuse""" # Save to a JSON file for now prompts_file = Path("prompts.json") if prompts_file.exists(): with open(prompts_file, 'r') as f: prompts = json.load(f) else: prompts = {} prompts[name] = { "prompt": prompt, "description": description, "created_at": datetime.now().isoformat() } with open(prompts_file, 'w') as f: json.dump(prompts, f, indent=2) console.print(f"[green]✓ Prompt template '{name}' saved successfully[/green]") @cli.command() def list_prompts(): """List saved prompt templates""" prompts_file = Path("prompts.json") if not prompts_file.exists(): console.print("[yellow]No saved prompts found[/yellow]") return with open(prompts_file, 'r') as f: prompts = json.load(f) if not prompts: console.print("[yellow]No saved prompts found[/yellow]") return table = Table(title="Saved Prompt Templates") table.add_column("Name", style="cyan") table.add_column("Description", style="green") table.add_column("Created", style="magenta") for name, data in prompts.items(): created = datetime.fromisoformat(data['created_at']).strftime("%Y-%m-%d %H:%M") table.add_row( name, data.get('description', 'N/A'), created ) console.print(table) @cli.command() @click.argument('summary_id') @click.option('--interactive', '-i', is_flag=True, help='Interactive refinement mode') @click.option('--model', '-m', type=click.Choice(['deepseek', 'anthropic', 'openai', 'gemini']), help='Switch to different model') def refine(summary_id, interactive, model): """Iteratively refine a summary until satisfied This command allows you to refine an existing summary multiple times with different instructions until you're satisfied with the result. """ manager = SummaryManager() summary = manager.get_summary(summary_id) if not summary: console.print(f"[red]Summary not found: {summary_id}[/red]") return console.print(f"\n[bold cyan]Refining Summary[/bold cyan]") console.print(f"[yellow]Video:[/yellow] {summary.video_title}") console.print(f"[yellow]Current Model:[/yellow] {summary.model_used}") # Display current summary console.print(f"\n[bold]Current Summary:[/bold]") if summary.summary: console.print(summary.summary[:500] + "..." if len(summary.summary) > 500 else summary.summary) # Store refinement history refinement_history = [] current_model = model or summary.model_used or 'deepseek' if interactive: console.print("\n[cyan]Interactive Refinement Mode[/cyan]") console.print("Enter refinement instructions or type 'done' to finish, 'undo' to revert last change") while True: console.print("\n" + "="*50) instruction = Prompt.ask("\n[green]Refinement instruction[/green]") if instruction.lower() == 'done': console.print("[green]✓ Refinement complete![/green]") break if instruction.lower() == 'undo': if refinement_history: # Revert to previous version previous = refinement_history.pop() updates = { "summary": previous['summary'], "key_points": previous.get('key_points'), "main_themes": previous.get('main_themes') } summary = manager.update_summary(summary_id, updates) console.print("[yellow]✓ Reverted to previous version[/yellow]") continue else: console.print("[yellow]No previous versions to revert to[/yellow]") continue # Save current state before refinement refinement_history.append({ "summary": summary.summary, "key_points": summary.key_points, "main_themes": summary.main_themes }) # Perform refinement console.print(f"\n[cyan]Refining with: {instruction[:50]}...[/cyan]") pipeline_cli = SummaryPipelineCLI(model=current_model) # Create refinement prompt refinement_prompt = f""" Original summary: {summary.summary} Refinement instruction: {instruction} Please provide an improved summary based on the refinement instruction above. Maintain the same structure and key information unless specifically asked to change. """ async def refine_summary(): try: result = await pipeline_cli.process_video( video_url=summary.video_url, custom_prompt=refinement_prompt, summary_length=summary.summary_length or 'standard', focus_areas=summary.focus_areas ) # Update summary updates = { "summary": result.get("summary", {}).get("content"), "key_points": result.get("summary", {}).get("key_points"), "main_themes": result.get("summary", {}).get("main_themes"), "model_used": current_model } updated_summary = manager.update_summary(summary_id, updates) return updated_summary except Exception as e: console.print(f"[red]Error during refinement: {e}[/red]") return None # Run refinement updated = asyncio.run(refine_summary()) if updated: summary = updated console.print(f"\n[green]✓ Refinement applied![/green]") console.print(f"\n[bold]Updated Summary:[/bold]") preview = summary.summary[:500] + "..." if len(summary.summary) > 500 else summary.summary console.print(preview) # Ask for satisfaction if not Confirm.ask("\n[yellow]Are you satisfied with this refinement?[/yellow]"): console.print("[cyan]Let's continue refining...[/cyan]") else: console.print("[green]✓ Great! Summary refined successfully![/green]") break else: # Non-interactive mode - single refinement instruction = Prompt.ask("[green]Enter refinement instruction[/green]") pipeline_cli = SummaryPipelineCLI(model=current_model) refinement_prompt = f""" Original summary: {summary.summary} Refinement instruction: {instruction} Please provide an improved summary based on the refinement instruction above. """ async def refine_summary(): try: result = await pipeline_cli.process_video( video_url=summary.video_url, custom_prompt=refinement_prompt, summary_length=summary.summary_length or 'standard', focus_areas=summary.focus_areas ) updates = { "summary": result.get("summary", {}).get("content"), "key_points": result.get("summary", {}).get("key_points"), "main_themes": result.get("summary", {}).get("main_themes"), "model_used": current_model } return manager.update_summary(summary_id, updates) except Exception as e: console.print(f"[red]Error: {e}[/red]") return None updated = asyncio.run(refine_summary()) if updated: console.print(f"\n[green]✓ Summary refined successfully![/green]") console.print(f"\n[bold]Updated Summary:[/bold]") console.print(updated.summary) @cli.command() @click.option('--input-file', '-i', type=click.Path(exists=True), help='File containing list of URLs') @click.option('--model', '-m', default='deepseek', type=click.Choice(['deepseek', 'anthropic', 'openai', 'gemini']), help='AI model to use') @click.option('--prompt', '-p', help='Custom prompt for all videos') @click.option('--length', '-l', default='standard', type=click.Choice(['brief', 'standard', 'detailed']), help='Summary length for all videos') def batch(input_file, model, prompt, length): """Process multiple videos in batch Provide a file with one YouTube URL per line to process multiple videos. """ if not input_file: # Interactive mode - ask for URLs console.print("[cyan]Enter YouTube URLs (one per line, empty line to finish):[/cyan]") urls = [] while True: url = input() if not url: break urls.append(url.strip()) else: # Read from file with open(input_file, 'r') as f: urls = [line.strip() for line in f if line.strip()] if not urls: console.print("[yellow]No URLs provided[/yellow]") return console.print(f"\n[cyan]Processing {len(urls)} videos with model: {model}[/cyan]") pipeline_cli = SummaryPipelineCLI(model=model) results = [] async def process_batch(): for i, url in enumerate(urls, 1): console.print(f"\n[yellow]Processing {i}/{len(urls)}: {url}[/yellow]") try: result = await pipeline_cli.process_video( video_url=url, custom_prompt=prompt, summary_length=length ) # Save to database summary_data = { "video_id": result.get("video_id"), "video_url": url, "video_title": result.get("metadata", {}).get("title"), "transcript": result.get("transcript"), "summary": result.get("summary", {}).get("content"), "key_points": result.get("summary", {}).get("key_points"), "main_themes": result.get("summary", {}).get("main_themes"), "model_used": model, "processing_time": result.get("processing_time"), "quality_score": result.get("quality_metrics", {}).get("overall_score"), "summary_length": length } saved = pipeline_cli.summary_manager.save_summary(summary_data) results.append({ "url": url, "id": saved.id, "title": saved.video_title, "status": "success" }) console.print(f"[green]✓ Processed: {saved.video_title}[/green]") except Exception as e: console.print(f"[red]✗ Failed: {e}[/red]") results.append({ "url": url, "status": "failed", "error": str(e) }) return results # Run batch processing final_results = asyncio.run(process_batch()) # Display summary console.print("\n" + "="*50) console.print("[bold cyan]Batch Processing Complete[/bold cyan]") success_count = len([r for r in final_results if r['status'] == 'success']) failed_count = len([r for r in final_results if r['status'] == 'failed']) console.print(f"[green]Successful:[/green] {success_count}") console.print(f"[red]Failed:[/red] {failed_count}") # Save results to file output_file = f"batch_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" with open(output_file, 'w') as f: json.dump(final_results, f, indent=2) console.print(f"\n[yellow]Results saved to:[/yellow] {output_file}") @cli.command() @click.argument('summary_id') @click.argument('other_summary_id') def compare(summary_id, other_summary_id): """Compare two different summaries Useful for comparing summaries generated with different models or prompts. """ manager = SummaryManager() summary1 = manager.get_summary(summary_id) summary2 = manager.get_summary(other_summary_id) if not summary1: console.print(f"[red]Summary not found: {summary_id}[/red]") return if not summary2: console.print(f"[red]Summary not found: {other_summary_id}[/red]") return # Create comparison table table = Table(title="Summary Comparison", show_lines=True) table.add_column("Attribute", style="cyan", width=20) table.add_column(f"Summary 1\n{summary_id[:8]}...", style="green", width=40) table.add_column(f"Summary 2\n{other_summary_id[:8]}...", style="yellow", width=40) # Add comparison rows table.add_row( "Video", summary1.video_title or "N/A", summary2.video_title or "N/A" ) table.add_row( "Model", summary1.model_used or "Unknown", summary2.model_used or "Unknown" ) table.add_row( "Length", summary1.summary_length or "standard", summary2.summary_length or "standard" ) table.add_row( "Quality Score", f"{summary1.quality_score:.2f}" if summary1.quality_score else "N/A", f"{summary2.quality_score:.2f}" if summary2.quality_score else "N/A" ) table.add_row( "Processing Time", f"{summary1.processing_time:.2f}s" if summary1.processing_time else "N/A", f"{summary2.processing_time:.2f}s" if summary2.processing_time else "N/A" ) table.add_row( "Created", summary1.created_at.strftime("%Y-%m-%d %H:%M") if summary1.created_at else "N/A", summary2.created_at.strftime("%Y-%m-%d %H:%M") if summary2.created_at else "N/A" ) # Add summary preview def truncate_text(text, length=200): if not text: return "N/A" return text[:length] + "..." if len(text) > length else text table.add_row( "Summary Preview", truncate_text(summary1.summary), truncate_text(summary2.summary) ) console.print(table) # Show key points comparison if available if summary1.key_points or summary2.key_points: console.print("\n[bold]Key Points Comparison:[/bold]") points1 = summary1.key_points or [] points2 = summary2.key_points or [] max_points = max(len(points1), len(points2)) for i in range(max_points): point1 = points1[i] if i < len(points1) else "[dim]N/A[/dim]" point2 = points2[i] if i < len(points2) else "[dim]N/A[/dim]" console.print(f"\n[cyan]Point {i+1}:[/cyan]") console.print(f" [green]Summary 1:[/green] {point1}") console.print(f" [yellow]Summary 2:[/yellow] {point2}") if __name__ == "__main__": cli()