youtube-summarizer/backend/tests/integration/test_pipeline_api.py

520 lines
21 KiB
Python

"""Integration tests for Pipeline API endpoints."""
import pytest
from fastapi.testclient import TestClient
from unittest.mock import Mock, AsyncMock, patch
import asyncio
from datetime import datetime
from backend.main import app
from backend.models.pipeline import PipelineStage
@pytest.fixture
def client():
"""Create test client."""
return TestClient(app)
@pytest.fixture
def mock_dependencies():
"""Mock all pipeline dependencies."""
mocks = {
'video_service': Mock(),
'transcript_service': Mock(),
'ai_service': Mock(),
'cache_manager': Mock(),
'notification_service': Mock()
}
# Setup mock behaviors
mocks['video_service'].extract_video_id = AsyncMock(return_value="test_video_id")
mocks['video_service'].get_video_metadata = AsyncMock(return_value={
"title": "Test Video",
"description": "Test description",
"category": "Education"
})
mocks['transcript_service'].extract_transcript = AsyncMock()
mocks['transcript_service'].extract_transcript.return_value = Mock(
transcript="Test transcript content"
)
mocks['ai_service'].generate_summary = AsyncMock()
mocks['ai_service'].generate_summary.return_value = Mock(
summary="Test summary",
key_points=["Point 1", "Point 2"],
main_themes=["Theme 1"],
actionable_insights=["Insight 1"],
confidence_score=0.8,
processing_metadata={"tokens": 100},
cost_data={"cost": 0.01}
)
mocks['cache_manager'].get_cached_pipeline_result = AsyncMock(return_value=None)
mocks['cache_manager'].cache_pipeline_result = AsyncMock(return_value=True)
mocks['notification_service'].send_completion_notification = AsyncMock(return_value=True)
mocks['notification_service'].send_progress_notification = AsyncMock(return_value=True)
return mocks
class TestPipelineAPI:
"""Test suite for Pipeline API endpoints."""
def test_process_video_endpoint(self, client):
"""Test POST /api/process endpoint."""
request_data = {
"video_url": "https://youtube.com/watch?v=test123",
"summary_length": "standard",
"focus_areas": ["main points"],
"include_timestamps": False,
"enable_notifications": True,
"quality_threshold": 0.7
}
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.process_video = AsyncMock(return_value="test_job_id")
mock_get_pipeline.return_value = mock_pipeline
response = client.post("/api/process", json=request_data)
assert response.status_code == 200
data = response.json()
assert data["job_id"] == "test_job_id"
assert data["status"] == "processing"
assert data["message"] == "Video processing started"
assert "estimated_completion_time" in data
# Verify pipeline was called correctly
mock_pipeline.process_video.assert_called_once()
def test_process_video_invalid_url(self, client):
"""Test process endpoint with invalid URL."""
request_data = {
"video_url": "not-a-valid-url",
"summary_length": "standard"
}
response = client.post("/api/process", json=request_data)
# Should fail validation
assert response.status_code == 422 # Validation error
def test_process_video_missing_api_key(self, client):
"""Test process endpoint without API key configured."""
request_data = {
"video_url": "https://youtube.com/watch?v=test123"
}
with patch.dict('os.environ', {}, clear=True):
# Clear environment to simulate missing API key
response = client.post("/api/process", json=request_data)
assert response.status_code == 500
assert "DeepSeek API key not configured" in response.json()["detail"]
def test_get_pipeline_status_running(self, client):
"""Test GET /api/process/{job_id} for running job."""
job_id = "test_job_123"
# Mock running pipeline result
mock_result = Mock()
mock_result.job_id = job_id
mock_result.status = PipelineStage.GENERATING_SUMMARY
mock_result.video_metadata = {"title": "Test Video"}
mock_result.processing_time_seconds = None
mock_result.error = None
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.get_pipeline_result = AsyncMock(return_value=mock_result)
mock_get_pipeline.return_value = mock_pipeline
response = client.get(f"/api/process/{job_id}")
assert response.status_code == 200
data = response.json()
assert data["job_id"] == job_id
assert data["status"] == "generating_summary"
assert data["progress_percentage"] == 75 # Based on stage mapping
assert "current_message" in data
assert data["video_metadata"]["title"] == "Test Video"
assert data["result"] is None # Not completed yet
def test_get_pipeline_status_completed(self, client):
"""Test GET /api/process/{job_id} for completed job."""
job_id = "completed_job_456"
# Mock completed pipeline result
mock_result = Mock()
mock_result.job_id = job_id
mock_result.status = PipelineStage.COMPLETED
mock_result.video_metadata = {"title": "Completed Video"}
mock_result.processing_time_seconds = 120.5
mock_result.summary = "Final summary"
mock_result.key_points = ["Key point 1", "Key point 2"]
mock_result.main_themes = ["Theme 1"]
mock_result.actionable_insights = ["Action 1"]
mock_result.confidence_score = 0.9
mock_result.quality_score = 0.85
mock_result.cost_data = {"total_cost": 0.05}
mock_result.error = None
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.get_pipeline_result = AsyncMock(return_value=mock_result)
mock_get_pipeline.return_value = mock_pipeline
response = client.get(f"/api/process/{job_id}")
assert response.status_code == 200
data = response.json()
assert data["job_id"] == job_id
assert data["status"] == "completed"
assert data["progress_percentage"] == 100
assert data["processing_time_seconds"] == 120.5
# Check result data
result = data["result"]
assert result["summary"] == "Final summary"
assert len(result["key_points"]) == 2
assert len(result["main_themes"]) == 1
assert result["confidence_score"] == 0.9
assert result["quality_score"] == 0.85
def test_get_pipeline_status_failed(self, client):
"""Test GET /api/process/{job_id} for failed job."""
job_id = "failed_job_789"
# Mock failed pipeline result
mock_result = Mock()
mock_result.job_id = job_id
mock_result.status = PipelineStage.FAILED
mock_result.video_metadata = None
mock_result.processing_time_seconds = 45.0
mock_result.error = {
"message": "Video not accessible",
"type": "VideoAccessError",
"stage": "extracting_transcript",
"retry_count": 2
}
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.get_pipeline_result = AsyncMock(return_value=mock_result)
mock_get_pipeline.return_value = mock_pipeline
response = client.get(f"/api/process/{job_id}")
assert response.status_code == 200
data = response.json()
assert data["job_id"] == job_id
assert data["status"] == "failed"
assert data["progress_percentage"] == 0
# Check error data
error = data["error"]
assert error["message"] == "Video not accessible"
assert error["type"] == "VideoAccessError"
assert error["retry_count"] == 2
def test_get_pipeline_status_not_found(self, client):
"""Test GET /api/process/{job_id} for non-existent job."""
job_id = "non_existent_job"
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.get_pipeline_result = AsyncMock(return_value=None)
mock_get_pipeline.return_value = mock_pipeline
response = client.get(f"/api/process/{job_id}")
assert response.status_code == 404
assert "Pipeline job not found" in response.json()["detail"]
def test_cancel_pipeline_success(self, client):
"""Test DELETE /api/process/{job_id} for successful cancellation."""
job_id = "cancel_job_123"
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.cancel_pipeline = AsyncMock(return_value=True)
mock_get_pipeline.return_value = mock_pipeline
response = client.delete(f"/api/process/{job_id}")
assert response.status_code == 200
data = response.json()
assert data["message"] == "Pipeline cancelled successfully"
mock_pipeline.cancel_pipeline.assert_called_once_with(job_id)
def test_cancel_pipeline_not_found(self, client):
"""Test DELETE /api/process/{job_id} for non-existent job."""
job_id = "non_existent_job"
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.cancel_pipeline = AsyncMock(return_value=False)
mock_get_pipeline.return_value = mock_pipeline
response = client.delete(f"/api/process/{job_id}")
assert response.status_code == 404
assert "Pipeline job not found" in response.json()["detail"]
def test_pipeline_history_endpoint(self, client):
"""Test GET /api/process/{job_id}/history endpoint."""
job_id = "history_job_123"
# Mock pipeline result with history
mock_result = Mock()
mock_result.job_id = job_id
mock_result.started_at = datetime(2025, 1, 25, 10, 0, 0)
mock_result.completed_at = datetime(2025, 1, 25, 10, 2, 30)
mock_result.processing_time_seconds = 150.0
mock_result.retry_count = 1
mock_result.status = PipelineStage.COMPLETED
mock_result.video_url = "https://youtube.com/watch?v=test123"
mock_result.video_id = "test123"
mock_result.error = None
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.get_pipeline_result = AsyncMock(return_value=mock_result)
mock_get_pipeline.return_value = mock_pipeline
response = client.get(f"/api/process/{job_id}/history")
assert response.status_code == 200
data = response.json()
assert data["job_id"] == job_id
assert data["created_at"] == "2025-01-25T10:00:00"
assert data["completed_at"] == "2025-01-25T10:02:30"
assert data["processing_time_seconds"] == 150.0
assert data["retry_count"] == 1
assert data["final_status"] == "completed"
assert data["video_url"] == "https://youtube.com/watch?v=test123"
assert data["error_history"] == []
def test_pipeline_stats_endpoint(self, client):
"""Test GET /api/stats endpoint."""
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline, \
patch('backend.api.pipeline.get_cache_manager') as mock_get_cache, \
patch('backend.api.pipeline.get_notification_service') as mock_get_notif, \
patch('backend.api.pipeline.websocket_manager') as mock_ws:
# Setup mocks
mock_pipeline = Mock()
mock_pipeline.get_active_jobs.return_value = ["job1", "job2"]
mock_get_pipeline.return_value = mock_pipeline
mock_cache = Mock()
mock_cache.get_cache_stats = AsyncMock(return_value={
"total_entries": 10,
"entries_by_type": {"pipeline_result": 5, "transcript": 3, "metadata": 2}
})
mock_get_cache.return_value = mock_cache
mock_notif = Mock()
mock_notif.get_notification_stats.return_value = {
"total_notifications": 25,
"notifications_by_type": {"completion": 10, "error": 2, "progress": 13}
}
mock_get_notif.return_value = mock_notif
mock_ws.get_stats.return_value = {
"total_connections": 3,
"job_connections": {"job1": 2, "job2": 1}
}
response = client.get("/api/stats")
assert response.status_code == 200
data = response.json()
assert data["active_jobs"]["count"] == 2
assert data["active_jobs"]["job_ids"] == ["job1", "job2"]
assert data["cache"]["total_entries"] == 10
assert data["notifications"]["total_notifications"] == 25
assert data["websockets"]["total_connections"] == 3
assert "timestamp" in data
def test_cleanup_endpoint(self, client):
"""Test POST /api/cleanup endpoint."""
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline, \
patch('backend.api.pipeline.get_cache_manager') as mock_get_cache, \
patch('backend.api.pipeline.get_notification_service') as mock_get_notif:
# Setup mocks
mock_pipeline = Mock()
mock_pipeline.cleanup_completed_jobs = AsyncMock()
mock_get_pipeline.return_value = mock_pipeline
mock_cache = Mock()
mock_get_cache.return_value = mock_cache
mock_notif = Mock()
mock_notif.clear_history.return_value = None
mock_get_notif.return_value = mock_notif
response = client.post("/api/cleanup", params={"max_age_hours": 48})
assert response.status_code == 200
data = response.json()
assert data["message"] == "Cleanup completed successfully"
assert data["max_age_hours"] == 48
assert "timestamp" in data
# Verify cleanup methods were called
mock_pipeline.cleanup_completed_jobs.assert_called_once_with(48)
mock_notif.clear_history.assert_called_once()
def test_health_check_healthy(self, client):
"""Test GET /api/health endpoint when healthy."""
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline, \
patch.dict('os.environ', {'DEEPSEEK_API_KEY': 'test_key'}):
mock_pipeline = Mock()
mock_pipeline.get_active_jobs.return_value = ["job1"]
mock_get_pipeline.return_value = mock_pipeline
response = client.get("/api/health")
assert response.status_code == 200
data = response.json()
assert data["status"] == "healthy"
assert data["active_jobs"] == 1
assert data["deepseek_api_available"] is True
assert "timestamp" in data
def test_health_check_degraded(self, client):
"""Test GET /api/health endpoint when degraded."""
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline, \
patch.dict('os.environ', {}, clear=True): # No API key
mock_pipeline = Mock()
mock_pipeline.get_active_jobs.return_value = []
mock_get_pipeline.return_value = mock_pipeline
response = client.get("/api/health")
assert response.status_code == 200
data = response.json()
assert data["status"] == "degraded"
assert data["active_jobs"] == 0
assert data["deepseek_api_available"] is False
assert data["warning"] == "DeepSeek API key not configured"
class TestPipelineAPIErrorHandling:
"""Test error handling in Pipeline API."""
def test_process_video_service_error(self, client):
"""Test handling of service errors in process endpoint."""
request_data = {
"video_url": "https://youtube.com/watch?v=test123"
}
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.process_video = AsyncMock(
side_effect=Exception("Service temporarily unavailable")
)
mock_get_pipeline.return_value = mock_pipeline
response = client.post("/api/process", json=request_data)
assert response.status_code == 500
assert "Failed to start processing" in response.json()["detail"]
assert "Service temporarily unavailable" in response.json()["detail"]
def test_stats_endpoint_error(self, client):
"""Test error handling in stats endpoint."""
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.get_active_jobs.side_effect = Exception("Database error")
mock_get_pipeline.return_value = mock_pipeline
response = client.get("/api/stats")
assert response.status_code == 500
assert "Failed to retrieve statistics" in response.json()["detail"]
def test_cleanup_endpoint_error(self, client):
"""Test error handling in cleanup endpoint."""
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.cleanup_completed_jobs = AsyncMock(
side_effect=Exception("Cleanup failed")
)
mock_get_pipeline.return_value = mock_pipeline
response = client.post("/api/cleanup")
assert response.status_code == 500
assert "Cleanup failed" in response.json()["detail"]
def test_health_check_error(self, client):
"""Test error handling in health check endpoint."""
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.get_active_jobs.side_effect = Exception("Health check failed")
mock_get_pipeline.return_value = mock_pipeline
response = client.get("/api/health")
assert response.status_code == 503
assert "Health check failed" in response.json()["detail"]
class TestPipelineAPIValidation:
"""Test request validation in Pipeline API."""
def test_process_video_missing_url(self, client):
"""Test process endpoint with missing URL."""
request_data = {
"summary_length": "standard"
}
response = client.post("/api/process", json=request_data)
assert response.status_code == 422 # Validation error
def test_process_video_invalid_summary_length(self, client):
"""Test process endpoint with invalid summary length."""
request_data = {
"video_url": "https://youtube.com/watch?v=test123",
"summary_length": "invalid_length"
}
response = client.post("/api/process", json=request_data)
# Should still work as validation may be handled in service layer
# or accept any string value
assert response.status_code in [200, 422]
def test_process_video_invalid_quality_threshold(self, client):
"""Test process endpoint with invalid quality threshold."""
request_data = {
"video_url": "https://youtube.com/watch?v=test123",
"quality_threshold": 1.5 # Should be 0.0-1.0
}
with patch('backend.api.pipeline.get_summary_pipeline') as mock_get_pipeline:
mock_pipeline = Mock()
mock_pipeline.process_video = AsyncMock(return_value="test_job")
mock_get_pipeline.return_value = mock_pipeline
response = client.post("/api/process", json=request_data)
# Should either validate or pass through
assert response.status_code in [200, 422]