133 lines
4.3 KiB
Python
133 lines
4.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Generate summary for a video using the DeepSeek API."""
|
|
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import httpx
|
|
|
|
# Add backend to path
|
|
sys.path.append(str(Path(__file__).parent.parent))
|
|
|
|
async def generate_summary_via_api(video_id: str, transcript_path: str):
|
|
"""Generate summary using the API endpoint."""
|
|
|
|
print(f"\n📹 Processing video: {video_id}")
|
|
|
|
# Load transcript
|
|
with open(transcript_path, 'r') as f:
|
|
transcript_data = json.load(f)
|
|
|
|
# Extract text from segments
|
|
if isinstance(transcript_data, list):
|
|
transcript_text = ' '.join([segment.get('text', '') for segment in transcript_data])
|
|
else:
|
|
transcript_text = transcript_data.get('text', '')
|
|
|
|
print(f"📝 Transcript length: {len(transcript_text)} characters")
|
|
print(f"📝 First 200 chars: {transcript_text[:200]}...")
|
|
|
|
# Call API to generate summary
|
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
try:
|
|
print("🤖 Calling DeepSeek API to generate summary...")
|
|
|
|
response = await client.post(
|
|
"http://localhost:8000/api/summarize",
|
|
json={
|
|
"transcript": transcript_text,
|
|
"length": "standard",
|
|
"focus_areas": ["key points", "main topics", "conclusions"],
|
|
"include_timestamps": False
|
|
}
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
|
|
# Save summary to file
|
|
summary_dir = Path(f"video_storage/summaries/{video_id}")
|
|
summary_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
summary_file = summary_dir / f"summary_{timestamp}.json"
|
|
|
|
with open(summary_file, 'w') as f:
|
|
json.dump(result, f, indent=2)
|
|
|
|
print(f"✅ Summary saved to: {summary_file}")
|
|
return result
|
|
|
|
else:
|
|
print(f"❌ API Error: {response.status_code}")
|
|
print(f"Response: {response.text}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error calling API: {e}")
|
|
return None
|
|
|
|
def format_summary(result: dict):
|
|
"""Format summary for display."""
|
|
if not result:
|
|
return
|
|
|
|
print("\n" + "="*60)
|
|
print("📊 SUMMARY RESULTS")
|
|
print("="*60)
|
|
|
|
print("\n📝 Main Summary:")
|
|
print("-" * 40)
|
|
print(result.get('summary', 'No summary available'))
|
|
|
|
print("\n🎯 Key Points:")
|
|
print("-" * 40)
|
|
for i, point in enumerate(result.get('key_points', []), 1):
|
|
print(f"{i}. {point}")
|
|
|
|
print("\n📚 Main Themes:")
|
|
print("-" * 40)
|
|
for theme in result.get('main_themes', []):
|
|
print(f"• {theme}")
|
|
|
|
print("\n💡 Actionable Insights:")
|
|
print("-" * 40)
|
|
for insight in result.get('actionable_insights', []):
|
|
print(f"→ {insight}")
|
|
|
|
if 'confidence_score' in result:
|
|
print(f"\n📊 Confidence Score: {result['confidence_score']:.2%}")
|
|
|
|
if 'cost_data' in result:
|
|
cost = result['cost_data'].get('total_cost_usd', 0)
|
|
print(f"💰 Processing Cost: ${cost:.6f}")
|
|
|
|
async def main():
|
|
"""Process the most recent video."""
|
|
|
|
# Video: "Me at the zoo" - First YouTube video
|
|
video_id = "jNQXAC9IVRw"
|
|
transcript_path = Path("/Users/enias/projects/my-ai-projects/apps/youtube-summarizer/video_storage/transcripts/jNQXAC9IVRw.json")
|
|
|
|
print("🚀 YouTube Video Summary Generator (via API)")
|
|
print("=" * 60)
|
|
print(f"Video ID: {video_id}")
|
|
print(f"Transcript: {transcript_path}")
|
|
|
|
if not transcript_path.exists():
|
|
print(f"❌ Transcript file not found: {transcript_path}")
|
|
return
|
|
|
|
result = await generate_summary_via_api(video_id, str(transcript_path))
|
|
|
|
if result:
|
|
format_summary(result)
|
|
print("\n✅ Summary generation complete!")
|
|
else:
|
|
print("\n❌ Failed to generate summary")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |