510 lines
18 KiB
Python
510 lines
18 KiB
Python
"""Unit tests for SummaryPipeline orchestration service."""
|
|
import pytest
|
|
import asyncio
|
|
from unittest.mock import Mock, AsyncMock, patch
|
|
from datetime import datetime, timedelta
|
|
|
|
from backend.services.summary_pipeline import SummaryPipeline
|
|
from backend.models.pipeline import (
|
|
PipelineStage, PipelineConfig, PipelineResult, ContentAnalysis
|
|
)
|
|
from backend.services.ai_service import SummaryResult
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_video_service():
|
|
"""Mock VideoService for testing."""
|
|
service = Mock()
|
|
service.extract_video_id = AsyncMock(return_value="test_video_id")
|
|
service.get_video_metadata = AsyncMock(return_value={
|
|
"title": "Test Video",
|
|
"description": "Test description",
|
|
"duration": "PT10M30S",
|
|
"category": "Education",
|
|
"tags": ["test", "tutorial"],
|
|
"language": "en"
|
|
})
|
|
return service
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_transcript_service():
|
|
"""Mock TranscriptService for testing."""
|
|
service = Mock()
|
|
service.extract_transcript = AsyncMock()
|
|
service.extract_transcript.return_value = Mock(
|
|
transcript="This is a test transcript with educational content. "
|
|
"We will learn about important concepts and examples."
|
|
)
|
|
return service
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_ai_service():
|
|
"""Mock AnthropicSummarizer for testing."""
|
|
service = Mock()
|
|
service.generate_summary = AsyncMock()
|
|
service.generate_summary.return_value = SummaryResult(
|
|
summary="Test summary of the video content",
|
|
key_points=["Point 1", "Point 2", "Point 3"],
|
|
main_themes=["Theme 1", "Theme 2"],
|
|
actionable_insights=["Insight 1"],
|
|
confidence_score=0.85,
|
|
processing_metadata={"tokens_used": 1000},
|
|
cost_data={"total_cost": 0.01}
|
|
)
|
|
return service
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_cache_manager():
|
|
"""Mock CacheManager for testing."""
|
|
cache = Mock()
|
|
cache.get_cached_pipeline_result = AsyncMock(return_value=None)
|
|
cache.cache_pipeline_result = AsyncMock(return_value=True)
|
|
cache.get_cached_video_metadata = AsyncMock(return_value=None)
|
|
cache.cache_video_metadata = AsyncMock(return_value=True)
|
|
return cache
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_notification_service():
|
|
"""Mock NotificationService for testing."""
|
|
service = Mock()
|
|
service.send_completion_notification = AsyncMock(return_value=True)
|
|
service.send_error_notification = AsyncMock(return_value=True)
|
|
service.send_progress_notification = AsyncMock(return_value=True)
|
|
return service
|
|
|
|
|
|
@pytest.fixture
|
|
def pipeline(mock_video_service, mock_transcript_service, mock_ai_service,
|
|
mock_cache_manager, mock_notification_service):
|
|
"""Create SummaryPipeline instance with mocked dependencies."""
|
|
return SummaryPipeline(
|
|
video_service=mock_video_service,
|
|
transcript_service=mock_transcript_service,
|
|
ai_service=mock_ai_service,
|
|
cache_manager=mock_cache_manager,
|
|
notification_service=mock_notification_service
|
|
)
|
|
|
|
|
|
class TestSummaryPipeline:
|
|
"""Test suite for SummaryPipeline class."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_process_video_initialization(self, pipeline):
|
|
"""Test video processing initialization."""
|
|
video_url = "https://youtube.com/watch?v=test123"
|
|
config = PipelineConfig(summary_length="standard")
|
|
|
|
job_id = await pipeline.process_video(video_url, config)
|
|
|
|
# Verify job ID is generated
|
|
assert job_id is not None
|
|
assert len(job_id) > 0
|
|
|
|
# Verify job is tracked
|
|
assert job_id in pipeline.active_jobs
|
|
|
|
# Verify initial state
|
|
result = pipeline.active_jobs[job_id]
|
|
assert result.job_id == job_id
|
|
assert result.video_url == video_url
|
|
assert result.status == PipelineStage.INITIALIZED
|
|
assert result.started_at is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_process_video_with_progress_callback(self, pipeline):
|
|
"""Test video processing with progress callback."""
|
|
video_url = "https://youtube.com/watch?v=test123"
|
|
progress_updates = []
|
|
|
|
async def progress_callback(job_id, progress):
|
|
progress_updates.append((job_id, progress))
|
|
|
|
job_id = await pipeline.process_video(
|
|
video_url,
|
|
progress_callback=progress_callback
|
|
)
|
|
|
|
# Allow some processing time
|
|
await asyncio.sleep(0.1)
|
|
|
|
# Verify callback is registered
|
|
assert job_id in pipeline.progress_callbacks
|
|
assert len(pipeline.progress_callbacks[job_id]) == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_successful_pipeline_execution(self, pipeline):
|
|
"""Test complete successful pipeline execution."""
|
|
video_url = "https://youtube.com/watch?v=test123"
|
|
config = PipelineConfig(
|
|
summary_length="standard",
|
|
enable_notifications=True
|
|
)
|
|
|
|
# Start processing
|
|
job_id = await pipeline.process_video(video_url, config)
|
|
|
|
# Wait for completion (with timeout)
|
|
max_wait = 5.0
|
|
waited = 0.0
|
|
while waited < max_wait:
|
|
result = pipeline.active_jobs.get(job_id)
|
|
if result and result.status in [PipelineStage.COMPLETED, PipelineStage.FAILED]:
|
|
break
|
|
await asyncio.sleep(0.1)
|
|
waited += 0.1
|
|
|
|
# Verify completion
|
|
result = pipeline.active_jobs[job_id]
|
|
assert result.status == PipelineStage.COMPLETED
|
|
assert result.video_id == "test_video_id"
|
|
assert result.summary is not None
|
|
assert result.key_points is not None
|
|
assert result.main_themes is not None
|
|
assert result.quality_score is not None
|
|
assert result.completed_at is not None
|
|
|
|
# Verify services were called
|
|
pipeline.video_service.extract_video_id.assert_called_once()
|
|
pipeline.transcript_service.extract_transcript.assert_called_once()
|
|
pipeline.ai_service.generate_summary.assert_called_once()
|
|
pipeline.cache_manager.cache_pipeline_result.assert_called_once()
|
|
pipeline.notification_service.send_completion_notification.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_pipeline_error_handling(self, pipeline):
|
|
"""Test pipeline error handling and retry logic."""
|
|
# Make video service fail
|
|
pipeline.video_service.extract_video_id.side_effect = Exception("Video not found")
|
|
|
|
video_url = "https://youtube.com/watch?v=invalid"
|
|
config = PipelineConfig(max_retries=1)
|
|
|
|
job_id = await pipeline.process_video(video_url, config)
|
|
|
|
# Wait for failure
|
|
max_wait = 5.0
|
|
waited = 0.0
|
|
while waited < max_wait:
|
|
result = pipeline.active_jobs.get(job_id)
|
|
if result and result.status == PipelineStage.FAILED:
|
|
break
|
|
await asyncio.sleep(0.1)
|
|
waited += 0.1
|
|
|
|
# Verify failure handling
|
|
result = pipeline.active_jobs[job_id]
|
|
assert result.status == PipelineStage.FAILED
|
|
assert result.error is not None
|
|
assert "Video not found" in result.error["message"]
|
|
assert result.retry_count <= config.max_retries
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_content_analysis(self, pipeline):
|
|
"""Test content analysis functionality."""
|
|
transcript = """
|
|
This is a technical tutorial about programming concepts.
|
|
We will learn about algorithms, functions, and databases.
|
|
The implementation details are complex but important to understand.
|
|
"""
|
|
|
|
metadata = {
|
|
"title": "Programming Tutorial",
|
|
"category": "Education",
|
|
"tags": ["programming", "tutorial", "technical"],
|
|
"language": "en"
|
|
}
|
|
|
|
analysis = await pipeline._analyze_content_characteristics(transcript, metadata)
|
|
|
|
assert isinstance(analysis, ContentAnalysis)
|
|
assert analysis.content_type == "technical"
|
|
assert analysis.word_count > 0
|
|
assert analysis.language == "en"
|
|
assert len(analysis.technical_indicators) > 0
|
|
assert "algorithm" in analysis.technical_indicators
|
|
assert "function" in analysis.technical_indicators
|
|
|
|
def test_config_optimization(self, pipeline):
|
|
"""Test configuration optimization based on content analysis."""
|
|
base_config = PipelineConfig(
|
|
summary_length="standard",
|
|
focus_areas=[],
|
|
quality_threshold=0.7
|
|
)
|
|
|
|
# Test technical content optimization
|
|
technical_analysis = ContentAnalysis(
|
|
transcript_length=5000,
|
|
word_count=1000,
|
|
estimated_reading_time=4.0,
|
|
complexity_score=0.8,
|
|
content_type="technical",
|
|
language="en",
|
|
technical_indicators=["algorithm", "function", "code"],
|
|
educational_indicators=[],
|
|
entertainment_indicators=[]
|
|
)
|
|
|
|
optimized_config = pipeline._optimize_config_for_content(
|
|
base_config, technical_analysis
|
|
)
|
|
|
|
assert optimized_config.summary_length == "standard" # Not changed for 1000 words
|
|
assert "technical concepts" in optimized_config.focus_areas
|
|
assert optimized_config.quality_threshold < base_config.quality_threshold # Lowered due to complexity
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_quality_validation(self, pipeline):
|
|
"""Test summary quality validation."""
|
|
result = PipelineResult(
|
|
job_id="test",
|
|
video_url="test",
|
|
video_id="test",
|
|
status=PipelineStage.COMPLETED,
|
|
summary="This is a good summary with appropriate length and detail.",
|
|
key_points=["Point 1", "Point 2", "Point 3", "Point 4"],
|
|
main_themes=["Theme 1", "Theme 2"],
|
|
actionable_insights=["Insight 1"],
|
|
confidence_score=0.9
|
|
)
|
|
|
|
analysis = ContentAnalysis(
|
|
transcript_length=10000,
|
|
word_count=2000,
|
|
estimated_reading_time=8.0,
|
|
complexity_score=0.5,
|
|
content_type="general",
|
|
language="en",
|
|
technical_indicators=[],
|
|
educational_indicators=[],
|
|
entertainment_indicators=[]
|
|
)
|
|
|
|
quality_score = await pipeline._validate_summary_quality(result, analysis)
|
|
|
|
assert 0.0 <= quality_score <= 1.0
|
|
assert quality_score > 0.5 # Should be reasonably high for good summary
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_pipeline_cancellation(self, pipeline):
|
|
"""Test pipeline cancellation functionality."""
|
|
video_url = "https://youtube.com/watch?v=test123"
|
|
|
|
# Start processing
|
|
job_id = await pipeline.process_video(video_url)
|
|
|
|
# Verify job is active
|
|
assert job_id in pipeline.active_jobs
|
|
|
|
# Cancel the job
|
|
success = await pipeline.cancel_pipeline(job_id)
|
|
|
|
assert success is True
|
|
|
|
# Verify job is cancelled
|
|
result = pipeline.active_jobs[job_id]
|
|
assert result.status == PipelineStage.CANCELLED
|
|
assert result.completed_at is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_pipeline_result(self, pipeline):
|
|
"""Test getting pipeline result."""
|
|
video_url = "https://youtube.com/watch?v=test123"
|
|
|
|
# Test non-existent job
|
|
result = await pipeline.get_pipeline_result("non_existent")
|
|
assert result is None
|
|
|
|
# Test active job
|
|
job_id = await pipeline.process_video(video_url)
|
|
result = await pipeline.get_pipeline_result(job_id)
|
|
|
|
assert result is not None
|
|
assert result.job_id == job_id
|
|
|
|
def test_iso_duration_parsing(self, pipeline):
|
|
"""Test ISO 8601 duration parsing."""
|
|
# Test various duration formats
|
|
assert pipeline._parse_iso_duration("PT10M30S") == 630 # 10:30
|
|
assert pipeline._parse_iso_duration("PT1H5M") == 3900 # 1:05:00
|
|
assert pipeline._parse_iso_duration("PT45S") == 45 # 0:45
|
|
assert pipeline._parse_iso_duration("PT2H") == 7200 # 2:00:00
|
|
assert pipeline._parse_iso_duration("invalid") == 0 # Invalid format
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cache_integration(self, pipeline):
|
|
"""Test cache integration in pipeline."""
|
|
# Setup cache to return existing result
|
|
cached_result = {
|
|
"job_id": "cached_job",
|
|
"video_url": "https://youtube.com/watch?v=cached",
|
|
"video_id": "cached_id",
|
|
"status": PipelineStage.COMPLETED.value,
|
|
"summary": "Cached summary",
|
|
"key_points": ["Cached point 1", "Cached point 2"],
|
|
"quality_score": 0.8,
|
|
"completed_at": datetime.utcnow().isoformat()
|
|
}
|
|
|
|
pipeline.cache_manager.get_cached_pipeline_result.return_value = cached_result
|
|
|
|
video_url = "https://youtube.com/watch?v=cached"
|
|
job_id = await pipeline.process_video(video_url)
|
|
|
|
# Wait for cache restoration
|
|
await asyncio.sleep(0.2)
|
|
|
|
# Verify cache was checked
|
|
pipeline.cache_manager.get_cached_pipeline_result.assert_called()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_notification_integration(self, pipeline):
|
|
"""Test notification service integration."""
|
|
video_url = "https://youtube.com/watch?v=test123"
|
|
config = PipelineConfig(enable_notifications=True)
|
|
|
|
job_id = await pipeline.process_video(video_url, config)
|
|
|
|
# Wait for completion
|
|
max_wait = 5.0
|
|
waited = 0.0
|
|
while waited < max_wait:
|
|
result = pipeline.active_jobs.get(job_id)
|
|
if result and result.status == PipelineStage.COMPLETED:
|
|
break
|
|
await asyncio.sleep(0.1)
|
|
waited += 0.1
|
|
|
|
# Verify notifications were sent
|
|
pipeline.notification_service.send_progress_notification.assert_called()
|
|
pipeline.notification_service.send_completion_notification.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cleanup_completed_jobs(self, pipeline):
|
|
"""Test cleanup of old completed jobs."""
|
|
# Create some old completed jobs
|
|
old_time = datetime.utcnow() - timedelta(hours=25)
|
|
|
|
pipeline.active_jobs["old_job"] = PipelineResult(
|
|
job_id="old_job",
|
|
video_url="test",
|
|
video_id="test",
|
|
status=PipelineStage.COMPLETED,
|
|
completed_at=old_time
|
|
)
|
|
|
|
pipeline.active_jobs["recent_job"] = PipelineResult(
|
|
job_id="recent_job",
|
|
video_url="test",
|
|
video_id="test",
|
|
status=PipelineStage.COMPLETED,
|
|
completed_at=datetime.utcnow() - timedelta(hours=1)
|
|
)
|
|
|
|
# Cleanup jobs older than 24 hours
|
|
await pipeline.cleanup_completed_jobs(max_age_hours=24)
|
|
|
|
# Verify old job was removed but recent job remains
|
|
assert "old_job" not in pipeline.active_jobs
|
|
assert "recent_job" in pipeline.active_jobs
|
|
|
|
def test_get_active_jobs(self, pipeline):
|
|
"""Test getting list of active jobs."""
|
|
# Initially no jobs
|
|
assert pipeline.get_active_jobs() == []
|
|
|
|
# Add some jobs manually for testing
|
|
pipeline.active_jobs["job1"] = Mock()
|
|
pipeline.active_jobs["job2"] = Mock()
|
|
|
|
active_jobs = pipeline.get_active_jobs()
|
|
assert len(active_jobs) == 2
|
|
assert "job1" in active_jobs
|
|
assert "job2" in active_jobs
|
|
|
|
|
|
class TestPipelineEdgeCases:
|
|
"""Test edge cases and error conditions."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_empty_transcript_handling(self, pipeline):
|
|
"""Test handling of empty transcript."""
|
|
pipeline.transcript_service.extract_transcript.return_value = Mock(
|
|
transcript=""
|
|
)
|
|
|
|
video_url = "https://youtube.com/watch?v=empty"
|
|
job_id = await pipeline.process_video(video_url)
|
|
|
|
# Wait for processing
|
|
await asyncio.sleep(0.2)
|
|
|
|
# Should handle empty transcript gracefully
|
|
result = pipeline.active_jobs.get(job_id)
|
|
assert result is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_invalid_video_url_handling(self, pipeline):
|
|
"""Test handling of invalid video URLs."""
|
|
pipeline.video_service.extract_video_id.side_effect = ValueError("Invalid URL")
|
|
|
|
video_url = "https://not-youtube.com/watch?v=invalid"
|
|
config = PipelineConfig(max_retries=0) # No retries for faster test
|
|
|
|
job_id = await pipeline.process_video(video_url, config)
|
|
|
|
# Wait for failure
|
|
await asyncio.sleep(0.2)
|
|
|
|
result = pipeline.active_jobs.get(job_id)
|
|
assert result.status == PipelineStage.FAILED
|
|
assert "Invalid URL" in result.error["message"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ai_service_failure_with_retry(self, pipeline):
|
|
"""Test AI service failure with retry mechanism."""
|
|
# Make AI service fail initially, then succeed
|
|
call_count = 0
|
|
|
|
async def failing_generate_summary(*args, **kwargs):
|
|
nonlocal call_count
|
|
call_count += 1
|
|
if call_count == 1:
|
|
raise Exception("API rate limit exceeded")
|
|
return SummaryResult(
|
|
summary="Retry successful",
|
|
key_points=["Point 1"],
|
|
main_themes=["Theme 1"],
|
|
actionable_insights=[],
|
|
confidence_score=0.7,
|
|
processing_metadata={},
|
|
cost_data={}
|
|
)
|
|
|
|
pipeline.ai_service.generate_summary.side_effect = failing_generate_summary
|
|
|
|
video_url = "https://youtube.com/watch?v=retry_test"
|
|
config = PipelineConfig(max_retries=1)
|
|
|
|
job_id = await pipeline.process_video(video_url, config)
|
|
|
|
# Wait for retry and completion
|
|
max_wait = 10.0
|
|
waited = 0.0
|
|
while waited < max_wait:
|
|
result = pipeline.active_jobs.get(job_id)
|
|
if result and result.status in [PipelineStage.COMPLETED, PipelineStage.FAILED]:
|
|
break
|
|
await asyncio.sleep(0.1)
|
|
waited += 0.1
|
|
|
|
result = pipeline.active_jobs[job_id]
|
|
# Should eventually succeed after retry
|
|
if result.status == PipelineStage.COMPLETED:
|
|
assert result.retry_count > 0
|
|
assert call_count > 1 |