#!/usr/bin/env python3 """Generate summary for a video using the DeepSeek API.""" import asyncio import json import os import sys from pathlib import Path from datetime import datetime import httpx # Add backend to path sys.path.append(str(Path(__file__).parent.parent)) async def generate_summary_via_api(video_id: str, transcript_path: str): """Generate summary using the API endpoint.""" print(f"\nšŸ“¹ Processing video: {video_id}") # Load transcript with open(transcript_path, 'r') as f: transcript_data = json.load(f) # Extract text from segments if isinstance(transcript_data, list): transcript_text = ' '.join([segment.get('text', '') for segment in transcript_data]) else: transcript_text = transcript_data.get('text', '') print(f"šŸ“ Transcript length: {len(transcript_text)} characters") print(f"šŸ“ First 200 chars: {transcript_text[:200]}...") # Call API to generate summary async with httpx.AsyncClient(timeout=60.0) as client: try: print("šŸ¤– Calling DeepSeek API to generate summary...") response = await client.post( "http://localhost:8000/api/summarize", json={ "transcript": transcript_text, "length": "standard", "focus_areas": ["key points", "main topics", "conclusions"], "include_timestamps": False } ) if response.status_code == 200: result = response.json() # Save summary to file summary_dir = Path(f"video_storage/summaries/{video_id}") summary_dir.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") summary_file = summary_dir / f"summary_{timestamp}.json" with open(summary_file, 'w') as f: json.dump(result, f, indent=2) print(f"āœ… Summary saved to: {summary_file}") return result else: print(f"āŒ API Error: {response.status_code}") print(f"Response: {response.text}") return None except Exception as e: print(f"āŒ Error calling API: {e}") return None def format_summary(result: dict): """Format summary for display.""" if not result: return print("\n" + "="*60) print("šŸ“Š SUMMARY RESULTS") print("="*60) print("\nšŸ“ Main Summary:") print("-" * 40) print(result.get('summary', 'No summary available')) print("\nšŸŽÆ Key Points:") print("-" * 40) for i, point in enumerate(result.get('key_points', []), 1): print(f"{i}. {point}") print("\nšŸ“š Main Themes:") print("-" * 40) for theme in result.get('main_themes', []): print(f"• {theme}") print("\nšŸ’” Actionable Insights:") print("-" * 40) for insight in result.get('actionable_insights', []): print(f"→ {insight}") if 'confidence_score' in result: print(f"\nšŸ“Š Confidence Score: {result['confidence_score']:.2%}") if 'cost_data' in result: cost = result['cost_data'].get('total_cost_usd', 0) print(f"šŸ’° Processing Cost: ${cost:.6f}") async def main(): """Process the most recent video.""" # Video: "Me at the zoo" - First YouTube video video_id = "jNQXAC9IVRw" transcript_path = Path("/Users/enias/projects/my-ai-projects/apps/youtube-summarizer/video_storage/transcripts/jNQXAC9IVRw.json") print("šŸš€ YouTube Video Summary Generator (via API)") print("=" * 60) print(f"Video ID: {video_id}") print(f"Transcript: {transcript_path}") if not transcript_path.exists(): print(f"āŒ Transcript file not found: {transcript_path}") return result = await generate_summary_via_api(video_id, str(transcript_path)) if result: format_summary(result) print("\nāœ… Summary generation complete!") else: print("\nāŒ Failed to generate summary") if __name__ == "__main__": asyncio.run(main())