100 lines
3.0 KiB
Python
100 lines
3.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Example script demonstrating YouTube metadata extraction.
|
|
|
|
This script shows how to use the YouTube metadata extraction service
|
|
to extract and store metadata from YouTube URLs.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
# Add src to path for imports
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
|
|
|
from services.youtube_service import YouTubeMetadataService
|
|
from repositories.youtube_repository import YouTubeRepository
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def main():
|
|
"""Main example function."""
|
|
print("🎥 YouTube Metadata Extraction Example")
|
|
print("=" * 50)
|
|
|
|
# Initialize service
|
|
service = YouTubeMetadataService()
|
|
await service.initialize()
|
|
|
|
# Check service health
|
|
health = service.get_health_status()
|
|
print(f"Service Status: {health['status']}")
|
|
print(f"yt-dlp Available: {health['yt_dlp_available']}")
|
|
|
|
if not health['yt_dlp_available']:
|
|
print("⚠️ yt-dlp not available. Please install it first:")
|
|
print(" pip install yt-dlp")
|
|
return
|
|
|
|
# Example YouTube URLs (replace with real URLs for testing)
|
|
test_urls = [
|
|
"https://www.youtube.com/watch?v=dQw4w9WgXcQ", # Rick Roll (for testing)
|
|
# Add more URLs here for testing
|
|
]
|
|
|
|
print(f"\n📋 Processing {len(test_urls)} YouTube URLs...")
|
|
|
|
for i, url in enumerate(test_urls, 1):
|
|
print(f"\n{i}. Processing: {url}")
|
|
|
|
try:
|
|
# Extract and store metadata
|
|
video = await service.extract_and_store_metadata(url)
|
|
|
|
print(f" ✅ Success!")
|
|
print(f" 📺 Title: {video.title}")
|
|
print(f" 👤 Channel: {video.channel}")
|
|
print(f" ⏱️ Duration: {video.duration_seconds // 60}:{video.duration_seconds % 60:02d}")
|
|
print(f" 🆔 YouTube ID: {video.youtube_id}")
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Error: {e}")
|
|
|
|
# Show statistics
|
|
print(f"\n📊 Database Statistics")
|
|
print("-" * 30)
|
|
|
|
repo = YouTubeRepository()
|
|
stats = await repo.get_statistics()
|
|
|
|
print(f"Total Videos: {stats['total_videos']}")
|
|
print(f"Total Duration: {stats['total_duration_hours']:.1f} hours")
|
|
|
|
if stats['top_channels']:
|
|
print(f"\nTop Channels:")
|
|
for channel in stats['top_channels'][:3]:
|
|
print(f" • {channel['channel']}: {channel['count']} videos")
|
|
|
|
# List recent videos
|
|
print(f"\n📺 Recent Videos")
|
|
print("-" * 30)
|
|
|
|
videos = await repo.list_all(limit=5)
|
|
|
|
if videos:
|
|
for video in videos:
|
|
duration = f"{video.duration_seconds // 60}:{video.duration_seconds % 60:02d}"
|
|
print(f" • {video.title[:50]}... ({duration}) - {video.channel}")
|
|
else:
|
|
print(" No videos found in database.")
|
|
|
|
print(f"\n✨ Example completed!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|