youtube-summarizer/backend/api/history.py

273 lines
10 KiB
Python

"""API endpoints for job history management."""
from fastapi import APIRouter, HTTPException, Depends, Query
from typing import List, Optional
import logging
from datetime import datetime
from backend.models.job_history import (
JobHistoryQuery, JobHistoryResponse, JobDetailResponse,
JobStatus, JobMetadata
)
from backend.services.job_history_service import JobHistoryService
from backend.config.video_download_config import VideoDownloadConfig
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/history", tags=["history"])
# Dependency for job history service
def get_job_history_service() -> JobHistoryService:
config = VideoDownloadConfig()
return JobHistoryService(config)
@router.post("/initialize", summary="Initialize job history index")
async def initialize_history(
service: JobHistoryService = Depends(get_job_history_service)
):
"""Initialize or rebuild the job history index from existing files."""
try:
await service.initialize_index()
return {"message": "Job history index initialized successfully"}
except Exception as e:
logger.error(f"Failed to initialize history index: {e}")
raise HTTPException(status_code=500, detail=f"Failed to initialize history: {str(e)}")
@router.get("", response_model=JobHistoryResponse, summary="Get job history")
async def get_job_history(
page: int = Query(1, ge=1, description="Page number"),
page_size: int = Query(15, ge=1, le=50, description="Items per page"),
search: Optional[str] = Query(None, description="Search in title, video ID, or channel"),
status: Optional[List[JobStatus]] = Query(None, description="Filter by job status"),
date_from: Optional[datetime] = Query(None, description="Filter jobs from this date"),
date_to: Optional[datetime] = Query(None, description="Filter jobs to this date"),
sort_by: str = Query("created_at", pattern="^(created_at|title|duration|processing_time|word_count)$", description="Sort field"),
sort_order: str = Query("desc", pattern="^(asc|desc)$", description="Sort order"),
starred_only: bool = Query(False, description="Show only starred jobs"),
tags: Optional[List[str]] = Query(None, description="Filter by tags"),
service: JobHistoryService = Depends(get_job_history_service)
):
"""Get paginated job history with filtering and sorting."""
try:
query = JobHistoryQuery(
page=page,
page_size=page_size,
search=search,
status_filter=status,
date_from=date_from,
date_to=date_to,
sort_by=sort_by,
sort_order=sort_order,
starred_only=starred_only,
tags=tags
)
return await service.get_job_history(query)
except Exception as e:
logger.error(f"Failed to get job history: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get job history: {str(e)}")
@router.get("/{video_id}", response_model=JobDetailResponse, summary="Get job details")
async def get_job_detail(
video_id: str,
service: JobHistoryService = Depends(get_job_history_service)
):
"""Get detailed information for a specific job."""
try:
job_detail = await service.get_job_detail(video_id)
if not job_detail:
raise HTTPException(status_code=404, detail=f"Job {video_id} not found")
return job_detail
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to get job detail for {video_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get job detail: {str(e)}")
@router.patch("/{video_id}", response_model=JobMetadata, summary="Update job")
async def update_job(
video_id: str,
is_starred: Optional[bool] = None,
notes: Optional[str] = None,
tags: Optional[List[str]] = None,
service: JobHistoryService = Depends(get_job_history_service)
):
"""Update job metadata (starring, notes, tags)."""
try:
updates = {}
if is_starred is not None:
updates["is_starred"] = is_starred
if notes is not None:
updates["notes"] = notes
if tags is not None:
updates["tags"] = tags
if not updates:
raise HTTPException(status_code=400, detail="No updates provided")
updated_job = await service.update_job(video_id, **updates)
if not updated_job:
raise HTTPException(status_code=404, detail=f"Job {video_id} not found")
return updated_job
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to update job {video_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to update job: {str(e)}")
@router.delete("/{video_id}", summary="Delete job")
async def delete_job(
video_id: str,
delete_files: bool = Query(False, description="Also delete associated files"),
service: JobHistoryService = Depends(get_job_history_service)
):
"""Delete a job and optionally its associated files."""
try:
success = await service.delete_job(video_id, delete_files=delete_files)
if not success:
raise HTTPException(status_code=404, detail=f"Job {video_id} not found")
return {"message": f"Job {video_id} deleted successfully", "files_deleted": delete_files}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to delete job {video_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to delete job: {str(e)}")
@router.get("/{video_id}/files/{file_type}", summary="Download job file")
async def download_job_file(
video_id: str,
file_type: str,
service: JobHistoryService = Depends(get_job_history_service)
):
"""Download a specific file associated with a job."""
try:
from fastapi.responses import FileResponse
job_detail = await service.get_job_detail(video_id)
if not job_detail:
raise HTTPException(status_code=404, detail=f"Job {video_id} not found")
# Map file types to file paths
file_mapping = {
"audio": job_detail.job.files.audio,
"transcript": job_detail.job.files.transcript,
"transcript_json": job_detail.job.files.transcript_json,
"summary": job_detail.job.files.summary
}
if file_type not in file_mapping:
raise HTTPException(status_code=400, detail=f"Invalid file type: {file_type}")
file_path = file_mapping[file_type]
if not file_path:
raise HTTPException(status_code=404, detail=f"File {file_type} not available for job {video_id}")
# Get full path
config = VideoDownloadConfig()
storage_dirs = config.get_storage_dirs()
full_path = storage_dirs["base"] / file_path
if not full_path.exists():
raise HTTPException(status_code=404, detail=f"File {file_type} not found on disk")
# Determine media type
media_types = {
"audio": "audio/mpeg",
"transcript": "text/plain",
"transcript_json": "application/json",
"summary": "text/plain"
}
return FileResponse(
path=str(full_path),
media_type=media_types.get(file_type, "application/octet-stream"),
filename=f"{video_id}_{file_type}.{full_path.suffix.lstrip('.')}"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to download file {file_type} for job {video_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to download file: {str(e)}")
@router.post("/{video_id}/reprocess", summary="Reprocess job")
async def reprocess_job(
video_id: str,
regenerate_transcript: bool = Query(False, description="Regenerate transcript"),
generate_summary: bool = Query(False, description="Generate summary"),
service: JobHistoryService = Depends(get_job_history_service)
):
"""Reprocess a job (regenerate transcript or generate summary)."""
try:
# This is a placeholder for future implementation
# Would integrate with existing transcript and summary services
job_detail = await service.get_job_detail(video_id)
if not job_detail:
raise HTTPException(status_code=404, detail=f"Job {video_id} not found")
# For now, just return a message indicating what would be done
actions = []
if regenerate_transcript:
actions.append("regenerate transcript")
if generate_summary:
actions.append("generate summary")
if not actions:
raise HTTPException(status_code=400, detail="No reprocessing actions specified")
return {
"message": f"Reprocessing requested for job {video_id}",
"actions": actions,
"status": "queued", # Would be actual status in real implementation
"note": "Reprocessing implementation pending - would integrate with existing services"
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to reprocess job {video_id}: {e}")
raise HTTPException(status_code=500, detail=f"Failed to reprocess job: {str(e)}")
@router.get("/stats/overview", summary="Get history statistics")
async def get_history_stats(
service: JobHistoryService = Depends(get_job_history_service)
):
"""Get overview statistics for job history."""
try:
# Load index to get basic stats
index = await service._load_index()
if not index:
return {
"total_jobs": 0,
"total_storage_mb": 0,
"oldest_job": None,
"newest_job": None
}
return {
"total_jobs": index.total_jobs,
"total_storage_mb": index.total_storage_mb,
"oldest_job": index.oldest_job,
"newest_job": index.newest_job,
"last_updated": index.last_updated
}
except Exception as e:
logger.error(f"Failed to get history stats: {e}")
raise HTTPException(status_code=500, detail=f"Failed to get history stats: {str(e)}")