129 lines
4.6 KiB
Python
129 lines
4.6 KiB
Python
import pytest
|
|
from fastapi.testclient import TestClient
|
|
import sys
|
|
from pathlib import Path
|
|
import asyncio
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
|
|
|
|
from backend.main import app
|
|
|
|
|
|
class TestTranscriptAPI:
|
|
|
|
@pytest.fixture
|
|
def client(self):
|
|
return TestClient(app)
|
|
|
|
def test_get_transcript_success(self, client):
|
|
"""Test successful transcript retrieval"""
|
|
response = client.get("/api/transcripts/dQw4w9WgXcQ")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
assert data["video_id"] == "dQw4w9WgXcQ"
|
|
assert "transcript" in data
|
|
assert data["extraction_method"] in ["youtube_api", "auto_captions", "whisper_audio", "mock"]
|
|
assert data["word_count"] > 0
|
|
assert "processing_time_seconds" in data
|
|
|
|
def test_get_transcript_with_language(self, client):
|
|
"""Test transcript retrieval with language preference"""
|
|
response = client.get("/api/transcripts/test123?language_preference=es")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
assert data["language"] == "es"
|
|
assert data["video_id"] == "test123"
|
|
|
|
def test_async_transcript_extraction(self, client):
|
|
"""Test background transcript extraction job"""
|
|
# Start async extraction
|
|
response = client.post("/api/transcripts/extract", json={
|
|
"video_id": "test123",
|
|
"language_preference": "en",
|
|
"include_metadata": True
|
|
})
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
assert "job_id" in data
|
|
assert data["status"] == "processing"
|
|
assert data["message"] == "Transcript extraction started"
|
|
|
|
job_id = data["job_id"]
|
|
|
|
# Check job status
|
|
status_response = client.get(f"/api/transcripts/jobs/{job_id}")
|
|
assert status_response.status_code == 200
|
|
|
|
status_data = status_response.json()
|
|
assert status_data["job_id"] == job_id
|
|
assert status_data["status"] in ["pending", "processing", "completed", "failed"]
|
|
assert "progress_percentage" in status_data
|
|
|
|
def test_job_status_not_found(self, client):
|
|
"""Test job status for non-existent job"""
|
|
response = client.get("/api/transcripts/jobs/non-existent-job-id")
|
|
|
|
assert response.status_code == 404
|
|
assert "not found" in response.json()["detail"].lower()
|
|
|
|
def test_chunk_transcript(self, client):
|
|
"""Test transcript chunking for large content"""
|
|
response = client.post("/api/transcripts/dQw4w9WgXcQ/chunk?max_tokens=100")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
assert data["video_id"] == "dQw4w9WgXcQ"
|
|
assert "total_chunks" in data
|
|
assert "chunks" in data
|
|
assert isinstance(data["chunks"], list)
|
|
|
|
if data["chunks"]:
|
|
chunk = data["chunks"][0]
|
|
assert "chunk_index" in chunk
|
|
assert "text" in chunk
|
|
assert "token_count" in chunk
|
|
|
|
def test_cache_stats(self, client):
|
|
"""Test cache statistics endpoint"""
|
|
response = client.get("/api/transcripts/cache/stats")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
assert "total_keys" in data
|
|
assert "active_keys" in data
|
|
assert "cache_size_bytes" in data
|
|
|
|
def test_transcript_metadata_included(self, client):
|
|
"""Test that metadata is included when requested"""
|
|
response = client.get("/api/transcripts/test123?include_metadata=true")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
if data["transcript"]: # If transcript was successfully extracted
|
|
assert "metadata" in data
|
|
metadata = data["metadata"]
|
|
assert "word_count" in metadata
|
|
assert "language" in metadata
|
|
assert "extraction_method" in metadata
|
|
|
|
def test_transcript_metadata_excluded(self, client):
|
|
"""Test that metadata is excluded when not requested"""
|
|
response = client.get("/api/transcripts/test123?include_metadata=false")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
|
|
# Metadata might still be None even if not requested
|
|
# The important thing is the endpoint works
|
|
assert "video_id" in data
|
|
assert "extraction_method" in data |