youtube-summarizer/test_real_youtube_video.py

226 lines
8.2 KiB
Python

#!/usr/bin/env python3
"""Test Gemini with real YouTube video: https://www.youtube.com/watch?v=DCquejfz04A"""
import requests
import json
import time
from typing import Dict, Any
def test_youtube_video_with_gemini():
"""Test Gemini integration with the provided YouTube video."""
video_url = "https://www.youtube.com/watch?v=DCquejfz04A"
print("🎬 Testing Gemini with Real YouTube Video")
print(f"📺 Video URL: {video_url}")
# Step 1: Check if backend is running and models are available
print("\n=== Step 1: Verify Backend Status ===")
try:
response = requests.get("http://localhost:8000/api/models/available")
if response.status_code != 200:
print(f"❌ Backend not available: {response.status_code}")
return False
models = response.json()
gemini_available = False
for model in models['models']:
if model['provider'] == 'google':
gemini_available = True
print(f"✅ Gemini found: {model['display_name']}")
print(f" Context: {model['context_window']:,} tokens")
print(f" Pricing: ${model['pricing']['input_per_1k']:.4f}/${model['pricing']['output_per_1k']:.4f} per 1K")
break
if not gemini_available:
print("❌ Gemini not available")
return False
except Exception as e:
print(f"❌ Error checking backend: {e}")
return False
# Step 2: Start video processing
print("\n=== Step 2: Start Video Processing ===")
process_request = {
"video_url": video_url,
"options": {
"ai_model": "gemini-1.5-pro", # Force Gemini
"summary_length": "standard",
"focus_areas": ["main points", "key insights"],
"include_timestamps": True
}
}
try:
print("🚀 Starting video processing with Gemini...")
response = requests.post(
"http://localhost:8000/api/process",
json=process_request,
headers={"Content-Type": "application/json"}
)
print(f"📨 Response status: {response.status_code}")
if response.status_code == 200:
result = response.json()
job_id = result.get('job_id')
if job_id:
print(f"✅ Job started: {job_id}")
return monitor_processing(job_id, video_url)
else:
print("❌ No job ID returned")
return False
elif response.status_code == 422:
error_detail = response.json()
print(f"❌ Validation error: {error_detail}")
return False
else:
print(f"❌ Processing failed: {response.status_code}")
print(f"Response: {response.text}")
return False
except Exception as e:
print(f"❌ Error starting processing: {e}")
return False
def monitor_processing(job_id: str, video_url: str) -> bool:
"""Monitor the processing job and display results."""
print(f"\n=== Step 3: Monitor Processing (Job {job_id}) ===")
max_wait_time = 300 # 5 minutes max
check_interval = 5 # Check every 5 seconds
elapsed_time = 0
while elapsed_time < max_wait_time:
try:
response = requests.get(f"http://localhost:8000/api/process/{job_id}")
if response.status_code == 200:
result = response.json()
status = result.get('status', 'unknown')
print(f"⏳ Status: {status} (elapsed: {elapsed_time}s)")
if status == 'completed':
print("✅ Processing completed!")
display_results(result)
return True
elif status == 'failed':
error = result.get('error', 'Unknown error')
print(f"❌ Processing failed: {error}")
return False
elif status in ['processing', 'pending', 'in_progress']:
# Show progress if available
if 'progress' in result:
progress = result['progress']
print(f" Progress: {progress.get('current_step', 'N/A')} - {progress.get('message', '')}")
# Continue monitoring
time.sleep(check_interval)
elapsed_time += check_interval
else:
print(f"⚠️ Unknown status: {status}")
time.sleep(check_interval)
elapsed_time += check_interval
else:
print(f"❌ Error checking status: {response.status_code}")
return False
except Exception as e:
print(f"❌ Error monitoring job: {e}")
return False
print(f"⏰ Timeout after {max_wait_time} seconds")
return False
def display_results(result: Dict[str, Any]):
"""Display the processing results."""
print("\n🎉 === GEMINI RESULTS ===")
# Basic info
if 'metadata' in result:
metadata = result['metadata']
print(f"📺 Video Title: {metadata.get('title', 'N/A')}")
print(f"⏱️ Duration: {metadata.get('duration', 'N/A')}")
print(f"👀 Views: {metadata.get('view_count', 'N/A')}")
# AI Processing info
if 'ai_processing' in result:
ai_info = result['ai_processing']
model_used = ai_info.get('model_used', 'N/A')
processing_time = ai_info.get('processing_time_seconds', 0)
print(f"\n🤖 AI Model Used: {model_used}")
print(f"⚡ Processing Time: {processing_time:.2f}s")
# Cost information
if 'cost_data' in ai_info:
cost_data = ai_info['cost_data']
total_cost = cost_data.get('total_cost_usd', 0)
input_tokens = cost_data.get('input_tokens', 0)
output_tokens = cost_data.get('output_tokens', 0)
print(f"💰 Total Cost: ${total_cost:.4f}")
print(f"📄 Input Tokens: {input_tokens:,}")
print(f"📝 Output Tokens: {output_tokens:,}")
# Highlight large context advantage
if input_tokens > 50000: # More than 50k tokens
print(f"🚀 LARGE CONTEXT ADVANTAGE: {input_tokens:,} tokens processed in single pass!")
print(" (Other models would require chunking)")
# Summary content
if 'summary' in result:
summary = result['summary']
print(f"\n📋 Summary:")
print(f" {summary[:300]}...")
# Key points
if 'key_points' in result:
key_points = result['key_points']
print(f"\n🔑 Key Points ({len(key_points)} found):")
for i, point in enumerate(key_points[:5], 1):
print(f" {i}. {point}")
if len(key_points) > 5:
print(f" ... and {len(key_points) - 5} more")
# Chapters/timestamps
if 'chapters' in result:
chapters = result['chapters']
print(f"\n📚 Chapters ({len(chapters)} found):")
for chapter in chapters[:3]:
timestamp = chapter.get('timestamp', 'N/A')
title = chapter.get('title', 'N/A')
print(f" {timestamp}: {title}")
if len(chapters) > 3:
print(f" ... and {len(chapters) - 3} more")
if __name__ == "__main__":
print("🎬 YouTube Video Processing Test with Gemini 2M Context")
print("=" * 60)
success = test_youtube_video_with_gemini()
if success:
print("\n🎉 ✅ SUCCESS! Gemini processed the YouTube video successfully!")
print("✅ 2M token context window demonstrated")
print("✅ Real-world video processing completed")
print("✅ Large context advantage confirmed")
else:
print("\n❌ Test failed - see details above")
print("\n" + "=" * 60)