156 lines
5.3 KiB
Python
156 lines
5.3 KiB
Python
import re
|
|
import logging
|
|
from typing import Optional, List, Dict, Any
|
|
from backend.core.exceptions import (
|
|
UserInputError,
|
|
ValidationError,
|
|
UnsupportedFormatError,
|
|
ErrorCode
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class VideoService:
|
|
def __init__(self):
|
|
self.supported_formats = [
|
|
"https://youtube.com/watch?v=VIDEO_ID",
|
|
"https://youtu.be/VIDEO_ID",
|
|
"https://youtube.com/embed/VIDEO_ID",
|
|
"https://m.youtube.com/watch?v=VIDEO_ID",
|
|
"https://www.youtube.com/watch?v=VIDEO_ID"
|
|
]
|
|
|
|
# Patterns that match any video ID length (for validation later)
|
|
self.url_patterns = [
|
|
r'(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([a-zA-Z0-9_-]+)',
|
|
r'(?:https?://)?(?:www\.)?youtu\.be/([a-zA-Z0-9_-]+)',
|
|
r'(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]+)',
|
|
r'(?:https?://)?(?:m\.)?youtube\.com/watch\?v=([a-zA-Z0-9_-]+)',
|
|
r'(?:https?://)?youtube\.com/watch\?v=([a-zA-Z0-9_-]+)'
|
|
]
|
|
|
|
self.playlist_patterns = [
|
|
r'(?:https?://)?(?:www\.)?youtube\.com/playlist\?list=',
|
|
r'(?:https?://)?(?:www\.)?youtube\.com/watch\?.*&list=',
|
|
r'(?:https?://)?(?:www\.)?youtube\.com/watch\?.*\?list='
|
|
]
|
|
|
|
def extract_video_id(self, url: str) -> str:
|
|
"""
|
|
Extract YouTube video ID from various URL formats.
|
|
|
|
Args:
|
|
url: YouTube URL to parse
|
|
|
|
Returns:
|
|
str: 11-character video ID
|
|
|
|
Raises:
|
|
ValidationError: If URL format is invalid
|
|
UnsupportedFormatError: If URL is a playlist or other unsupported format
|
|
"""
|
|
if not url:
|
|
raise ValidationError(
|
|
message="URL cannot be empty",
|
|
details={"url": url, "supported_formats": self.supported_formats}
|
|
)
|
|
|
|
url = url.strip()
|
|
|
|
# Check for playlist URLs first
|
|
if self._is_playlist_url(url):
|
|
logger.info(f"Playlist URL detected: {url}")
|
|
raise UnsupportedFormatError(
|
|
message="Playlist URLs are not yet supported. Please provide a single video URL.",
|
|
details={
|
|
"url": url,
|
|
"detected_type": "playlist",
|
|
"suggestion": "You can copy the URL of a specific video from the playlist instead",
|
|
"supported_formats": self.supported_formats
|
|
}
|
|
)
|
|
|
|
# Try to extract video ID using patterns
|
|
for pattern in self.url_patterns:
|
|
match = re.search(pattern, url)
|
|
if match:
|
|
video_id = match.group(1)
|
|
|
|
# Validate video ID format
|
|
if self._validate_video_id(video_id):
|
|
logger.info(f"Successfully extracted video ID: {video_id} from URL: {url}")
|
|
return video_id
|
|
else:
|
|
raise ValidationError(
|
|
message=f"Invalid video ID format: {video_id}",
|
|
details={
|
|
"url": url,
|
|
"video_id": video_id,
|
|
"expected_format": "11 characters containing letters, numbers, underscores, or hyphens"
|
|
}
|
|
)
|
|
|
|
# For any URL (YouTube or not) that doesn't match our patterns,
|
|
# it's a validation error
|
|
raise ValidationError(
|
|
message="Invalid YouTube URL format",
|
|
details={
|
|
"url": url,
|
|
"supported_formats": self.supported_formats,
|
|
"suggestion": "Please provide a valid YouTube video URL with a video ID"
|
|
}
|
|
)
|
|
|
|
def _validate_video_id(self, video_id: str) -> bool:
|
|
"""
|
|
Validate that video ID is exactly 11 characters with valid format.
|
|
|
|
Args:
|
|
video_id: Video ID to validate
|
|
|
|
Returns:
|
|
bool: True if valid, False otherwise
|
|
"""
|
|
if not video_id or len(video_id) != 11:
|
|
return False
|
|
|
|
# YouTube video IDs contain letters, numbers, underscores, and hyphens
|
|
valid_pattern = r'^[a-zA-Z0-9_-]{11}$'
|
|
return bool(re.match(valid_pattern, video_id))
|
|
|
|
def _is_playlist_url(self, url: str) -> bool:
|
|
"""
|
|
Check if URL is a playlist URL.
|
|
|
|
Args:
|
|
url: URL to check
|
|
|
|
Returns:
|
|
bool: True if playlist URL, False otherwise
|
|
"""
|
|
for pattern in self.playlist_patterns:
|
|
if re.search(pattern, url):
|
|
return True
|
|
return False
|
|
|
|
def normalize_url(self, video_id: str) -> str:
|
|
"""
|
|
Create a normalized YouTube URL from a video ID.
|
|
|
|
Args:
|
|
video_id: YouTube video ID
|
|
|
|
Returns:
|
|
str: Normalized YouTube URL
|
|
"""
|
|
return f"https://youtube.com/watch?v={video_id}"
|
|
|
|
def get_supported_formats(self) -> List[str]:
|
|
"""
|
|
Get list of supported URL formats.
|
|
|
|
Returns:
|
|
List[str]: List of supported format examples
|
|
"""
|
|
return self.supported_formats.copy() |