#!/usr/bin/env python3 """Example script demonstrating YouTube metadata extraction. This script shows how to use the YouTube metadata extraction service to extract and store metadata from YouTube URLs. """ import asyncio import logging from pathlib import Path # Add src to path for imports import sys sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from services.youtube_service import YouTubeMetadataService from repositories.youtube_repository import YouTubeRepository # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) async def main(): """Main example function.""" print("šŸŽ„ YouTube Metadata Extraction Example") print("=" * 50) # Initialize service service = YouTubeMetadataService() await service.initialize() # Check service health health = service.get_health_status() print(f"Service Status: {health['status']}") print(f"yt-dlp Available: {health['yt_dlp_available']}") if not health['yt_dlp_available']: print("āš ļø yt-dlp not available. Please install it first:") print(" pip install yt-dlp") return # Example YouTube URLs (replace with real URLs for testing) test_urls = [ "https://www.youtube.com/watch?v=dQw4w9WgXcQ", # Rick Roll (for testing) # Add more URLs here for testing ] print(f"\nšŸ“‹ Processing {len(test_urls)} YouTube URLs...") for i, url in enumerate(test_urls, 1): print(f"\n{i}. Processing: {url}") try: # Extract and store metadata video = await service.extract_and_store_metadata(url) print(f" āœ… Success!") print(f" šŸ“ŗ Title: {video.title}") print(f" šŸ‘¤ Channel: {video.channel}") print(f" ā±ļø Duration: {video.duration_seconds // 60}:{video.duration_seconds % 60:02d}") print(f" šŸ†” YouTube ID: {video.youtube_id}") except Exception as e: print(f" āŒ Error: {e}") # Show statistics print(f"\nšŸ“Š Database Statistics") print("-" * 30) repo = YouTubeRepository() stats = await repo.get_statistics() print(f"Total Videos: {stats['total_videos']}") print(f"Total Duration: {stats['total_duration_hours']:.1f} hours") if stats['top_channels']: print(f"\nTop Channels:") for channel in stats['top_channels'][:3]: print(f" • {channel['channel']}: {channel['count']} videos") # List recent videos print(f"\nšŸ“ŗ Recent Videos") print("-" * 30) videos = await repo.list_all(limit=5) if videos: for video in videos: duration = f"{video.duration_seconds // 60}:{video.duration_seconds % 60:02d}" print(f" • {video.title[:50]}... ({duration}) - {video.channel}") else: print(" No videos found in database.") print(f"\n✨ Example completed!") if __name__ == "__main__": asyncio.run(main())