358 lines
12 KiB
Python
358 lines
12 KiB
Python
"""
|
|
Storage Manager for video file organization and cleanup.
|
|
Handles directory management, file operations, and storage optimization.
|
|
"""
|
|
|
|
import os
|
|
import shutil
|
|
import json
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Optional, Tuple
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class StorageManager:
|
|
"""Manages storage for downloaded videos and audio files."""
|
|
|
|
def __init__(self, base_dir: str = "data/youtube-videos"):
|
|
"""
|
|
Initialize storage manager.
|
|
|
|
Args:
|
|
base_dir: Base directory for all storage operations
|
|
"""
|
|
self.base_dir = Path(base_dir)
|
|
self.ensure_directory_structure()
|
|
|
|
def ensure_directory_structure(self):
|
|
"""Ensure all required directories exist."""
|
|
directories = [
|
|
self.base_dir,
|
|
self.base_dir / "videos",
|
|
self.base_dir / "audio",
|
|
self.base_dir / "metadata",
|
|
self.base_dir / "thumbnails",
|
|
self.base_dir / "temp"
|
|
]
|
|
|
|
for directory in directories:
|
|
directory.mkdir(parents=True, exist_ok=True)
|
|
logger.debug(f"Ensured directory exists: {directory}")
|
|
|
|
def get_video_directory(self, video_id: str) -> Path:
|
|
"""
|
|
Get or create directory for a specific video.
|
|
|
|
Args:
|
|
video_id: YouTube video ID
|
|
|
|
Returns:
|
|
Path to video directory
|
|
"""
|
|
video_dir = self.base_dir / "videos" / video_id
|
|
video_dir.mkdir(parents=True, exist_ok=True)
|
|
return video_dir
|
|
|
|
def calculate_directory_size(self, directory: Path) -> int:
|
|
"""
|
|
Calculate total size of all files in a directory.
|
|
|
|
Args:
|
|
directory: Path to directory
|
|
|
|
Returns:
|
|
Total size in bytes
|
|
"""
|
|
total_size = 0
|
|
|
|
if not directory.exists():
|
|
return 0
|
|
|
|
for path in directory.rglob('*'):
|
|
if path.is_file():
|
|
try:
|
|
total_size += path.stat().st_size
|
|
except (OSError, PermissionError):
|
|
logger.warning(f"Could not access file: {path}")
|
|
|
|
return total_size
|
|
|
|
def get_storage_usage(self) -> Dict[str, int]:
|
|
"""
|
|
Get storage usage by category.
|
|
|
|
Returns:
|
|
Dictionary with storage usage by type
|
|
"""
|
|
return {
|
|
'videos': self.calculate_directory_size(self.base_dir / "videos"),
|
|
'audio': self.calculate_directory_size(self.base_dir / "audio"),
|
|
'metadata': self.calculate_directory_size(self.base_dir / "metadata"),
|
|
'thumbnails': self.calculate_directory_size(self.base_dir / "thumbnails"),
|
|
'temp': self.calculate_directory_size(self.base_dir / "temp"),
|
|
'total': self.calculate_directory_size(self.base_dir)
|
|
}
|
|
|
|
def find_old_files(self, days: int = 30) -> List[Path]:
|
|
"""
|
|
Find files older than specified days.
|
|
|
|
Args:
|
|
days: Age threshold in days
|
|
|
|
Returns:
|
|
List of file paths older than threshold
|
|
"""
|
|
old_files = []
|
|
threshold = datetime.now() - timedelta(days=days)
|
|
|
|
for category in ['videos', 'audio', 'thumbnails']:
|
|
directory = self.base_dir / category
|
|
if directory.exists():
|
|
for path in directory.rglob('*'):
|
|
if path.is_file():
|
|
try:
|
|
mtime = datetime.fromtimestamp(path.stat().st_mtime)
|
|
if mtime < threshold:
|
|
old_files.append(path)
|
|
except (OSError, PermissionError):
|
|
logger.warning(f"Could not check file age: {path}")
|
|
|
|
return old_files
|
|
|
|
def cleanup_temp_files(self) -> int:
|
|
"""
|
|
Clean up temporary files.
|
|
|
|
Returns:
|
|
Number of bytes freed
|
|
"""
|
|
temp_dir = self.base_dir / "temp"
|
|
bytes_freed = 0
|
|
|
|
if temp_dir.exists():
|
|
bytes_freed = self.calculate_directory_size(temp_dir)
|
|
try:
|
|
shutil.rmtree(temp_dir)
|
|
temp_dir.mkdir(exist_ok=True)
|
|
logger.info(f"Cleaned temp directory, freed {bytes_freed / (1024*1024):.2f} MB")
|
|
except Exception as e:
|
|
logger.error(f"Error cleaning temp directory: {e}")
|
|
return 0
|
|
|
|
return bytes_freed
|
|
|
|
def cleanup_orphaned_files(self, cache: Dict) -> int:
|
|
"""
|
|
Remove files not referenced in cache.
|
|
|
|
Args:
|
|
cache: Video cache dictionary
|
|
|
|
Returns:
|
|
Number of bytes freed
|
|
"""
|
|
bytes_freed = 0
|
|
|
|
# Get all referenced files from cache
|
|
referenced_files = set()
|
|
for video_hash, info in cache.items():
|
|
if 'video_path' in info:
|
|
referenced_files.add(Path(info['video_path']))
|
|
if 'audio_path' in info:
|
|
referenced_files.add(Path(info['audio_path']))
|
|
|
|
# Add metadata files
|
|
video_id = info.get('video_id')
|
|
if video_id:
|
|
referenced_files.add(self.base_dir / "videos" / f"{video_id}.info.json")
|
|
referenced_files.add(self.base_dir / "thumbnails" / f"{video_id}.jpg")
|
|
|
|
# Find and remove orphaned files
|
|
for category in ['videos', 'audio', 'metadata', 'thumbnails']:
|
|
directory = self.base_dir / category
|
|
if directory.exists():
|
|
for path in directory.glob('*'):
|
|
if path.is_file() and path not in referenced_files:
|
|
try:
|
|
size = path.stat().st_size
|
|
path.unlink()
|
|
bytes_freed += size
|
|
logger.info(f"Removed orphaned file: {path}")
|
|
except Exception as e:
|
|
logger.error(f"Error removing orphaned file {path}: {e}")
|
|
|
|
return bytes_freed
|
|
|
|
def move_file_safe(self, source: Path, destination: Path) -> bool:
|
|
"""
|
|
Safely move a file with error handling.
|
|
|
|
Args:
|
|
source: Source file path
|
|
destination: Destination file path
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
try:
|
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.move(str(source), str(destination))
|
|
logger.debug(f"Moved {source} to {destination}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error moving file from {source} to {destination}: {e}")
|
|
return False
|
|
|
|
def copy_file_safe(self, source: Path, destination: Path) -> bool:
|
|
"""
|
|
Safely copy a file with error handling.
|
|
|
|
Args:
|
|
source: Source file path
|
|
destination: Destination file path
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
try:
|
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(str(source), str(destination))
|
|
logger.debug(f"Copied {source} to {destination}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error copying file from {source} to {destination}: {e}")
|
|
return False
|
|
|
|
def get_file_info(self, file_path: Path) -> Optional[Dict]:
|
|
"""
|
|
Get detailed information about a file.
|
|
|
|
Args:
|
|
file_path: Path to the file
|
|
|
|
Returns:
|
|
File information dictionary or None if file doesn't exist
|
|
"""
|
|
if not file_path.exists():
|
|
return None
|
|
|
|
try:
|
|
stat = file_path.stat()
|
|
return {
|
|
'path': str(file_path),
|
|
'name': file_path.name,
|
|
'size_bytes': stat.st_size,
|
|
'size_mb': stat.st_size / (1024 * 1024),
|
|
'created': datetime.fromtimestamp(stat.st_ctime).isoformat(),
|
|
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
|
'accessed': datetime.fromtimestamp(stat.st_atime).isoformat(),
|
|
'extension': file_path.suffix,
|
|
'parent': str(file_path.parent)
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error getting file info for {file_path}: {e}")
|
|
return None
|
|
|
|
def archive_video(self, video_id: str, archive_dir: str = "archive") -> bool:
|
|
"""
|
|
Archive a video and its associated files.
|
|
|
|
Args:
|
|
video_id: YouTube video ID
|
|
archive_dir: Archive directory name
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
archive_path = self.base_dir / archive_dir / video_id
|
|
archive_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
files_to_archive = [
|
|
(self.base_dir / "videos" / f"{video_id}.mp4", archive_path / f"{video_id}.mp4"),
|
|
(self.base_dir / "audio" / f"{video_id}.mp3", archive_path / f"{video_id}.mp3"),
|
|
(self.base_dir / "metadata" / f"{video_id}.json", archive_path / f"{video_id}.json"),
|
|
(self.base_dir / "thumbnails" / f"{video_id}.jpg", archive_path / f"{video_id}.jpg"),
|
|
]
|
|
|
|
success = True
|
|
for source, dest in files_to_archive:
|
|
if source.exists():
|
|
if not self.move_file_safe(source, dest):
|
|
success = False
|
|
|
|
if success:
|
|
logger.info(f"Archived video {video_id} to {archive_path}")
|
|
|
|
return success
|
|
|
|
def restore_from_archive(self, video_id: str, archive_dir: str = "archive") -> bool:
|
|
"""
|
|
Restore a video from archive.
|
|
|
|
Args:
|
|
video_id: YouTube video ID
|
|
archive_dir: Archive directory name
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
archive_path = self.base_dir / archive_dir / video_id
|
|
|
|
if not archive_path.exists():
|
|
logger.error(f"Archive not found for video {video_id}")
|
|
return False
|
|
|
|
files_to_restore = [
|
|
(archive_path / f"{video_id}.mp4", self.base_dir / "videos" / f"{video_id}.mp4"),
|
|
(archive_path / f"{video_id}.mp3", self.base_dir / "audio" / f"{video_id}.mp3"),
|
|
(archive_path / f"{video_id}.json", self.base_dir / "metadata" / f"{video_id}.json"),
|
|
(archive_path / f"{video_id}.jpg", self.base_dir / "thumbnails" / f"{video_id}.jpg"),
|
|
]
|
|
|
|
success = True
|
|
for source, dest in files_to_restore:
|
|
if source.exists():
|
|
if not self.move_file_safe(source, dest):
|
|
success = False
|
|
|
|
if success:
|
|
# Remove empty archive directory
|
|
try:
|
|
archive_path.rmdir()
|
|
except OSError:
|
|
pass # Directory not empty, that's okay
|
|
|
|
logger.info(f"Restored video {video_id} from archive")
|
|
|
|
return success
|
|
|
|
def get_disk_usage(self) -> Dict:
|
|
"""
|
|
Get disk usage statistics for the storage directory.
|
|
|
|
Returns:
|
|
Disk usage information
|
|
"""
|
|
try:
|
|
stat = shutil.disk_usage(self.base_dir)
|
|
return {
|
|
'total_bytes': stat.total,
|
|
'total_gb': stat.total / (1024 ** 3),
|
|
'used_bytes': stat.used,
|
|
'used_gb': stat.used / (1024 ** 3),
|
|
'free_bytes': stat.free,
|
|
'free_gb': stat.free / (1024 ** 3),
|
|
'percent_used': (stat.used / stat.total * 100) if stat.total > 0 else 0
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error getting disk usage: {e}")
|
|
return {
|
|
'error': str(e),
|
|
'total_bytes': 0,
|
|
'used_bytes': 0,
|
|
'free_bytes': 0
|
|
} |