trax/examples/youtube_metadata_example.py

100 lines
3.0 KiB
Python

#!/usr/bin/env python3
"""Example script demonstrating YouTube metadata extraction.
This script shows how to use the YouTube metadata extraction service
to extract and store metadata from YouTube URLs.
"""
import asyncio
import logging
from pathlib import Path
# Add src to path for imports
import sys
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from services.youtube_service import YouTubeMetadataService
from repositories.youtube_repository import YouTubeRepository
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
async def main():
"""Main example function."""
print("🎥 YouTube Metadata Extraction Example")
print("=" * 50)
# Initialize service
service = YouTubeMetadataService()
await service.initialize()
# Check service health
health = service.get_health_status()
print(f"Service Status: {health['status']}")
print(f"yt-dlp Available: {health['yt_dlp_available']}")
if not health['yt_dlp_available']:
print("⚠️ yt-dlp not available. Please install it first:")
print(" pip install yt-dlp")
return
# Example YouTube URLs (replace with real URLs for testing)
test_urls = [
"https://www.youtube.com/watch?v=dQw4w9WgXcQ", # Rick Roll (for testing)
# Add more URLs here for testing
]
print(f"\n📋 Processing {len(test_urls)} YouTube URLs...")
for i, url in enumerate(test_urls, 1):
print(f"\n{i}. Processing: {url}")
try:
# Extract and store metadata
video = await service.extract_and_store_metadata(url)
print(f" ✅ Success!")
print(f" 📺 Title: {video.title}")
print(f" 👤 Channel: {video.channel}")
print(f" ⏱️ Duration: {video.duration_seconds // 60}:{video.duration_seconds % 60:02d}")
print(f" 🆔 YouTube ID: {video.youtube_id}")
except Exception as e:
print(f" ❌ Error: {e}")
# Show statistics
print(f"\n📊 Database Statistics")
print("-" * 30)
repo = YouTubeRepository()
stats = await repo.get_statistics()
print(f"Total Videos: {stats['total_videos']}")
print(f"Total Duration: {stats['total_duration_hours']:.1f} hours")
if stats['top_channels']:
print(f"\nTop Channels:")
for channel in stats['top_channels'][:3]:
print(f"{channel['channel']}: {channel['count']} videos")
# List recent videos
print(f"\n📺 Recent Videos")
print("-" * 30)
videos = await repo.list_all(limit=5)
if videos:
for video in videos:
duration = f"{video.duration_seconds // 60}:{video.duration_seconds % 60:02d}"
print(f"{video.title[:50]}... ({duration}) - {video.channel}")
else:
print(" No videos found in database.")
print(f"\n✨ Example completed!")
if __name__ == "__main__":
asyncio.run(main())