youtube-summarizer/backend/tests/integration/test_summarization_api.py

255 lines
10 KiB
Python

"""Integration tests for summarization API."""
import json
import pytest
import os
from unittest.mock import patch, MagicMock
from fastapi.testclient import TestClient
from backend.main import app
from backend.services.ai_service import SummaryLength
client = TestClient(app)
class TestSummarizationAPI:
"""Test cases for summarization API endpoints."""
@pytest.fixture(autouse=True)
def setup(self):
"""Setup test environment."""
with patch("os.getenv") as mock_getenv:
# Return test key for DEEPSEEK_API_KEY, None for others
def getenv_side_effect(key, default=None):
if key == "DEEPSEEK_API_KEY":
return "test-api-key"
return default
mock_getenv.side_effect = getenv_side_effect
yield
@pytest.fixture
def mock_ai_service(self):
"""Mock AI service for testing."""
mock_service = MagicMock()
mock_service.estimate_cost.return_value = 0.05
mock_service.generate_summary.return_value = MagicMock(
summary="This is a test summary.",
key_points=["Point 1", "Point 2", "Point 3"],
main_themes=["Theme 1", "Theme 2"],
actionable_insights=["Insight 1", "Insight 2"],
confidence_score=0.92,
processing_metadata={
"model": "claude-3-5-haiku-20241022",
"processing_time_seconds": 2.5,
"input_tokens": 100,
"output_tokens": 50,
"total_tokens": 150,
"chunks_processed": 1
},
cost_data={
"input_cost_usd": 0.015,
"output_cost_usd": 0.030,
"total_cost_usd": 0.045,
"cost_per_summary": 0.045
}
)
return mock_service
def test_summarize_success_sync(self, mock_ai_service):
"""Test successful synchronous summarization."""
with patch("backend.api.summarization.get_ai_service", return_value=mock_ai_service):
response = client.post("/api/summarize", json={
"transcript": "This is a test transcript with enough content to be meaningful for summarization testing.",
"length": "standard",
"async_processing": False
})
assert response.status_code == 200
data = response.json()
assert data["status"] == "completed"
assert data["summary"] == "This is a test summary."
assert len(data["key_points"]) == 3
assert len(data["main_themes"]) == 2
assert len(data["actionable_insights"]) == 2
assert data["confidence_score"] == 0.92
assert "processing_metadata" in data
assert "cost_data" in data
def test_summarize_success_async(self, mock_ai_service):
"""Test successful asynchronous summarization."""
with patch("backend.api.summarization.get_ai_service", return_value=mock_ai_service):
response = client.post("/api/summarize", json={
"transcript": "This is a test transcript with enough content for async processing.",
"length": "detailed",
"async_processing": True
})
assert response.status_code == 200
data = response.json()
assert data["status"] == "processing"
assert "summary_id" in data
assert data["summary"] is None
def test_summarize_with_focus_areas(self, mock_ai_service):
"""Test summarization with focus areas."""
with patch("backend.api.summarization.get_ai_service", return_value=mock_ai_service):
response = client.post("/api/summarize", json={
"transcript": "This is a technical tutorial about machine learning and AI.",
"length": "standard",
"focus_areas": ["technical", "educational"],
"async_processing": False
})
assert response.status_code == 200
data = response.json()
assert data["status"] == "completed"
def test_summarize_transcript_too_short(self):
"""Test error for transcript too short."""
response = client.post("/api/summarize", json={
"transcript": "Short.",
"length": "standard"
})
assert response.status_code == 400
assert "too short" in response.json()["detail"]
def test_summarize_cost_limit_exceeded(self, mock_ai_service):
"""Test cost limit exceeded error."""
mock_ai_service.estimate_cost.return_value = 1.50 # Above $1.00 limit
with patch("backend.api.summarization.get_ai_service", return_value=mock_ai_service):
response = client.post("/api/summarize", json={
"transcript": "This is a very long transcript that would be expensive to process." * 100,
"length": "detailed"
})
assert response.status_code == 400
data = response.json()
assert "cost limit exceeded" in data["detail"]["error"].lower()
assert "cost_reduction_tips" in data["detail"]["details"]
def test_summarize_long_transcript_forces_async(self, mock_ai_service):
"""Test that very long transcripts force async processing."""
# Create a very long transcript
long_transcript = "This is a sentence in a very long transcript. " * 2000
with patch("backend.api.summarization.get_ai_service", return_value=mock_ai_service):
response = client.post("/api/summarize", json={
"transcript": long_transcript,
"length": "standard",
"async_processing": False # Should be overridden
})
assert response.status_code == 200
data = response.json()
assert data["status"] == "processing" # Forced to async
assert "summary_id" in data
def test_get_summary_success(self, mock_ai_service):
"""Test getting completed async summary."""
with patch("backend.api.summarization.get_ai_service", return_value=mock_ai_service):
# First create an async summary
response = client.post("/api/summarize", json={
"transcript": "This is a test transcript for async processing.",
"length": "standard",
"async_processing": True
})
summary_id = response.json()["summary_id"]
# Simulate completion by directly setting result
from backend.api.summarization import job_results
job_results[summary_id] = {
"status": "completed",
"summary": "Async summary completed.",
"key_points": ["Async point 1", "Async point 2"],
"main_themes": ["Async theme"],
"actionable_insights": ["Async insight"],
"confidence_score": 0.88,
"processing_metadata": {"model": "claude-3-5-haiku-20241022"},
"cost_data": {"total_cost_usd": 0.05}
}
# Get the completed summary
response = client.get(f"/api/summaries/{summary_id}")
assert response.status_code == 200
data = response.json()
assert data["status"] == "completed"
assert data["summary"] == "Async summary completed."
def test_get_summary_not_found(self):
"""Test getting non-existent summary."""
response = client.get("/api/summaries/non-existent-id")
assert response.status_code == 404
assert response.json()["detail"] == "Summary not found"
def test_summarize_without_deepseek_key(self):
"""Test error when DeepSeek API key is not configured."""
with patch("os.getenv", return_value=None): # No API key
response = client.post("/api/summarize", json={
"transcript": "This is a test transcript.",
"length": "standard"
})
assert response.status_code == 500
assert "DeepSeek API key not configured" in response.json()["detail"]
def test_summarize_ai_service_error(self, mock_ai_service):
"""Test handling of AI service errors."""
from backend.core.exceptions import AIServiceError, ErrorCode
mock_ai_service.generate_summary.side_effect = AIServiceError(
message="AI service failed",
error_code=ErrorCode.AI_SERVICE_ERROR,
details={"error_type": "APIError"}
)
with patch("backend.api.summarization.get_ai_service", return_value=mock_ai_service):
response = client.post("/api/summarize", json={
"transcript": "This is a test transcript that will cause an error.",
"length": "standard",
"async_processing": False
})
assert response.status_code == 500
data = response.json()
assert "AI service error" in data["detail"]["error"]
assert data["detail"]["code"] == "AI_SERVICE_ERROR"
def test_request_validation(self):
"""Test request validation."""
# Test missing transcript
response = client.post("/api/summarize", json={
"length": "standard"
})
assert response.status_code == 422
# Test invalid length
response = client.post("/api/summarize", json={
"transcript": "Valid transcript content here.",
"length": "invalid_length"
})
assert response.status_code == 422
# Test invalid focus areas (should be list)
response = client.post("/api/summarize", json={
"transcript": "Valid transcript content here.",
"length": "standard",
"focus_areas": "not_a_list"
})
assert response.status_code == 422