540 lines
19 KiB
Python
540 lines
19 KiB
Python
"""Unit tests for export service and exporters."""
|
|
|
|
import pytest
|
|
import pytest_asyncio
|
|
from unittest.mock import Mock, AsyncMock, patch, MagicMock
|
|
from datetime import datetime
|
|
import tempfile
|
|
import os
|
|
import json
|
|
from pathlib import Path
|
|
import zipfile
|
|
|
|
from backend.services.export_service import (
|
|
ExportService,
|
|
ExportFormat,
|
|
ExportStatus,
|
|
ExportRequest,
|
|
BulkExportRequest,
|
|
ExportResult,
|
|
BaseExporter
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def export_service():
|
|
"""Create an export service instance with temp directory."""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
service = ExportService(export_dir=temp_dir)
|
|
yield service
|
|
# Cleanup happens automatically with TemporaryDirectory
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_summary_data():
|
|
"""Sample summary data for testing."""
|
|
return {
|
|
"video_id": "abc123",
|
|
"video_url": "https://youtube.com/watch?v=abc123",
|
|
"video_metadata": {
|
|
"title": "Test Video Title",
|
|
"channel_name": "Test Channel",
|
|
"duration": "10:30",
|
|
"published_at": "2025-01-25",
|
|
"views": 1000,
|
|
"likes": 100
|
|
},
|
|
"summary": "This is a test summary of the video content. It covers important topics.",
|
|
"key_points": [
|
|
"First key point",
|
|
"Second key point",
|
|
"Third key point"
|
|
],
|
|
"main_themes": [
|
|
"Technology",
|
|
"Innovation"
|
|
],
|
|
"actionable_insights": [
|
|
"Implement the new system",
|
|
"Review the documentation"
|
|
],
|
|
"confidence_score": 0.95,
|
|
"processing_metadata": {
|
|
"model": "claude-3-5-haiku",
|
|
"processing_time_seconds": 5.2,
|
|
"tokens_used": 1500
|
|
},
|
|
"cost_data": {
|
|
"input_cost": 0.015,
|
|
"output_cost": 0.03,
|
|
"total_cost": 0.045
|
|
},
|
|
"created_at": "2025-01-25T10:00:00Z"
|
|
}
|
|
|
|
|
|
class TestExportService:
|
|
"""Test ExportService class."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_export_summary_markdown(self, export_service, sample_summary_data):
|
|
"""Test exporting summary to Markdown format."""
|
|
request = ExportRequest(
|
|
summary_id="test123",
|
|
format=ExportFormat.MARKDOWN,
|
|
include_metadata=True
|
|
)
|
|
|
|
result = await export_service.export_summary(sample_summary_data, request)
|
|
|
|
assert result.status == ExportStatus.COMPLETED
|
|
assert result.format == ExportFormat.MARKDOWN
|
|
assert result.file_path is not None
|
|
assert os.path.exists(result.file_path)
|
|
assert result.file_size_bytes > 0
|
|
assert result.download_url == f"/api/export/download/{result.export_id}"
|
|
|
|
# Check file content
|
|
with open(result.file_path, 'r') as f:
|
|
content = f.read()
|
|
assert "Test Video Title" in content
|
|
assert "Test Channel" in content
|
|
assert "First key point" in content
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_export_summary_json(self, export_service, sample_summary_data):
|
|
"""Test exporting summary to JSON format."""
|
|
request = ExportRequest(
|
|
summary_id="test123",
|
|
format=ExportFormat.JSON
|
|
)
|
|
|
|
result = await export_service.export_summary(sample_summary_data, request)
|
|
|
|
assert result.status == ExportStatus.COMPLETED
|
|
assert result.format == ExportFormat.JSON
|
|
assert result.file_path.endswith('.json')
|
|
|
|
# Validate JSON structure
|
|
with open(result.file_path, 'r') as f:
|
|
json_data = json.load(f)
|
|
assert "youtube_summarizer_export" in json_data
|
|
assert json_data["video"]["id"] == "abc123"
|
|
assert json_data["summary"]["text"] == sample_summary_data["summary"]
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_export_summary_text(self, export_service, sample_summary_data):
|
|
"""Test exporting summary to plain text format."""
|
|
request = ExportRequest(
|
|
summary_id="test123",
|
|
format=ExportFormat.PLAIN_TEXT
|
|
)
|
|
|
|
result = await export_service.export_summary(sample_summary_data, request)
|
|
|
|
assert result.status == ExportStatus.COMPLETED
|
|
assert result.file_path.endswith('.txt')
|
|
|
|
with open(result.file_path, 'r') as f:
|
|
content = f.read()
|
|
assert "Test Video Title" in content
|
|
assert "SUMMARY" in content.upper()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_export_summary_html(self, export_service, sample_summary_data):
|
|
"""Test exporting summary to HTML format."""
|
|
request = ExportRequest(
|
|
summary_id="test123",
|
|
format=ExportFormat.HTML
|
|
)
|
|
|
|
result = await export_service.export_summary(sample_summary_data, request)
|
|
|
|
assert result.status == ExportStatus.COMPLETED
|
|
assert result.file_path.endswith('.html')
|
|
|
|
with open(result.file_path, 'r') as f:
|
|
content = f.read()
|
|
assert "<html" in content.lower()
|
|
assert "Test Video Title" in content
|
|
assert "<h1>" in content or "<h2>" in content
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_export_with_custom_branding(self, export_service, sample_summary_data):
|
|
"""Test export with custom branding."""
|
|
branding = {
|
|
"company_name": "Test Company",
|
|
"logo_url": "https://example.com/logo.png",
|
|
"primary_color": "#007bff"
|
|
}
|
|
|
|
request = ExportRequest(
|
|
summary_id="test123",
|
|
format=ExportFormat.MARKDOWN,
|
|
custom_branding=branding
|
|
)
|
|
|
|
result = await export_service.export_summary(sample_summary_data, request)
|
|
|
|
assert result.status == ExportStatus.COMPLETED
|
|
|
|
with open(result.file_path, 'r') as f:
|
|
content = f.read()
|
|
assert "Test Company" in content
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bulk_export_multiple_formats(self, export_service, sample_summary_data):
|
|
"""Test bulk export with multiple formats."""
|
|
summaries = [sample_summary_data, sample_summary_data.copy()]
|
|
summaries[1]["video_metadata"]["title"] = "Second Video"
|
|
|
|
request = BulkExportRequest(
|
|
summary_ids=["test1", "test2"],
|
|
formats=[ExportFormat.MARKDOWN, ExportFormat.JSON],
|
|
organize_by="format"
|
|
)
|
|
|
|
result = await export_service.bulk_export_summaries(summaries, request)
|
|
|
|
assert result.status == ExportStatus.COMPLETED
|
|
assert result.file_path.endswith('.zip')
|
|
assert os.path.exists(result.file_path)
|
|
assert result.file_size_bytes > 0
|
|
|
|
# Verify ZIP contents
|
|
with zipfile.ZipFile(result.file_path, 'r') as zf:
|
|
namelist = zf.namelist()
|
|
# Should have 2 videos * 2 formats = 4 files
|
|
assert len(namelist) >= 4
|
|
assert any('markdown' in name for name in namelist)
|
|
assert any('json' in name for name in namelist)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bulk_export_organize_by_date(self, export_service, sample_summary_data):
|
|
"""Test bulk export organized by date."""
|
|
summaries = [sample_summary_data]
|
|
|
|
request = BulkExportRequest(
|
|
summary_ids=["test1"],
|
|
formats=[ExportFormat.MARKDOWN],
|
|
organize_by="date"
|
|
)
|
|
|
|
result = await export_service.bulk_export_summaries(summaries, request)
|
|
|
|
assert result.status == ExportStatus.COMPLETED
|
|
|
|
with zipfile.ZipFile(result.file_path, 'r') as zf:
|
|
namelist = zf.namelist()
|
|
# Should have date folder
|
|
assert any('2025-01-25' in name for name in namelist)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bulk_export_organize_by_video(self, export_service, sample_summary_data):
|
|
"""Test bulk export organized by video."""
|
|
summaries = [sample_summary_data]
|
|
|
|
request = BulkExportRequest(
|
|
summary_ids=["test1"],
|
|
formats=[ExportFormat.MARKDOWN, ExportFormat.JSON],
|
|
organize_by="video"
|
|
)
|
|
|
|
result = await export_service.bulk_export_summaries(summaries, request)
|
|
|
|
assert result.status == ExportStatus.COMPLETED
|
|
|
|
with zipfile.ZipFile(result.file_path, 'r') as zf:
|
|
namelist = zf.namelist()
|
|
# Should have video folder with both formats inside
|
|
assert any('Test_Video_Title' in name for name in namelist)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_export_invalid_format(self, export_service, sample_summary_data):
|
|
"""Test export with unavailable format (PDF without reportlab)."""
|
|
# Remove PDF exporter to simulate missing dependency
|
|
if ExportFormat.PDF in export_service.exporters:
|
|
del export_service.exporters[ExportFormat.PDF]
|
|
|
|
request = ExportRequest(
|
|
summary_id="test123",
|
|
format=ExportFormat.PDF
|
|
)
|
|
|
|
result = await export_service.export_summary(sample_summary_data, request)
|
|
|
|
assert result.status == ExportStatus.FAILED
|
|
assert "not available" in result.error
|
|
|
|
def test_sanitize_filename(self, export_service):
|
|
"""Test filename sanitization."""
|
|
dangerous_names = [
|
|
"Video: Test <script>alert('xss')</script>",
|
|
"../../etc/passwd",
|
|
"C:\\Windows\\System32\\config",
|
|
"Video|with*special?chars",
|
|
"Video\x00with\x1fcontrol\x80chars"
|
|
]
|
|
|
|
for name in dangerous_names:
|
|
sanitized = export_service._sanitize_filename(name)
|
|
# Check no dangerous characters remain
|
|
assert '<' not in sanitized
|
|
assert '>' not in sanitized
|
|
assert ':' not in sanitized
|
|
assert '"' not in sanitized
|
|
assert '/' not in sanitized
|
|
assert '\\' not in sanitized
|
|
assert '|' not in sanitized
|
|
assert '?' not in sanitized
|
|
assert '*' not in sanitized
|
|
assert '\x00' not in sanitized
|
|
# Check reasonable length
|
|
assert len(sanitized) <= 100
|
|
assert sanitized.strip() == sanitized
|
|
|
|
def test_get_export_status(self, export_service):
|
|
"""Test getting export status."""
|
|
export_id = "test-export-123"
|
|
result = ExportResult(
|
|
export_id=export_id,
|
|
status=ExportStatus.COMPLETED,
|
|
format=ExportFormat.MARKDOWN
|
|
)
|
|
|
|
export_service.active_exports[export_id] = result
|
|
|
|
retrieved = export_service.get_export_status(export_id)
|
|
assert retrieved == result
|
|
|
|
# Test non-existent export
|
|
assert export_service.get_export_status("non-existent") is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cleanup_old_exports(self, export_service):
|
|
"""Test cleanup of old export files."""
|
|
# Create old and new exports
|
|
old_export = ExportResult(
|
|
export_id="old",
|
|
status=ExportStatus.COMPLETED,
|
|
format=ExportFormat.MARKDOWN,
|
|
created_at=datetime(2020, 1, 1) # Very old
|
|
)
|
|
|
|
new_export = ExportResult(
|
|
export_id="new",
|
|
status=ExportStatus.COMPLETED,
|
|
format=ExportFormat.MARKDOWN,
|
|
created_at=datetime.utcnow() # Current
|
|
)
|
|
|
|
# Create actual files
|
|
with tempfile.NamedTemporaryFile(delete=False, dir=export_service.export_dir) as f:
|
|
old_export.file_path = f.name
|
|
f.write(b"old content")
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, dir=export_service.export_dir) as f:
|
|
new_export.file_path = f.name
|
|
f.write(b"new content")
|
|
|
|
export_service.active_exports["old"] = old_export
|
|
export_service.active_exports["new"] = new_export
|
|
|
|
# Run cleanup
|
|
await export_service.cleanup_old_exports(max_age_hours=24)
|
|
|
|
# Old export should be removed
|
|
assert "old" not in export_service.active_exports
|
|
assert not os.path.exists(old_export.file_path)
|
|
|
|
# New export should remain
|
|
assert "new" in export_service.active_exports
|
|
assert os.path.exists(new_export.file_path)
|
|
|
|
# Clean up test file
|
|
os.remove(new_export.file_path)
|
|
|
|
|
|
class TestExporters:
|
|
"""Test individual exporters."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_markdown_exporter(self, sample_summary_data):
|
|
"""Test Markdown exporter."""
|
|
from backend.services.exporters.markdown_exporter import MarkdownExporter
|
|
|
|
exporter = MarkdownExporter()
|
|
file_path = await exporter.export(sample_summary_data)
|
|
|
|
assert os.path.exists(file_path)
|
|
assert file_path.endswith('.md')
|
|
|
|
with open(file_path, 'r') as f:
|
|
content = f.read()
|
|
# Check markdown formatting
|
|
assert '# ' in content # Headers
|
|
assert '- ' in content # Lists
|
|
assert '**' in content or '__' in content # Bold text
|
|
|
|
# Cleanup
|
|
os.remove(file_path)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_json_exporter_structure(self, sample_summary_data):
|
|
"""Test JSON exporter structure."""
|
|
from backend.services.exporters.json_exporter import JSONExporter
|
|
|
|
exporter = JSONExporter()
|
|
file_path = await exporter.export(sample_summary_data)
|
|
|
|
assert os.path.exists(file_path)
|
|
|
|
with open(file_path, 'r') as f:
|
|
data = json.load(f)
|
|
|
|
# Verify structure
|
|
assert "youtube_summarizer_export" in data
|
|
assert "version" in data["youtube_summarizer_export"]
|
|
assert "video" in data
|
|
assert "summary" in data
|
|
assert "processing" in data
|
|
|
|
# Verify data integrity
|
|
assert data["video"]["id"] == sample_summary_data["video_id"]
|
|
assert data["summary"]["confidence_score"] == sample_summary_data["confidence_score"]
|
|
|
|
os.remove(file_path)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_html_exporter_structure(self, sample_summary_data):
|
|
"""Test HTML exporter structure."""
|
|
from backend.services.exporters.html_exporter import HTMLExporter
|
|
|
|
exporter = HTMLExporter()
|
|
file_path = await exporter.export(sample_summary_data)
|
|
|
|
assert os.path.exists(file_path)
|
|
|
|
with open(file_path, 'r') as f:
|
|
content = f.read()
|
|
|
|
# Check HTML structure
|
|
assert '<!DOCTYPE html>' in content
|
|
assert '<html' in content
|
|
assert '<head>' in content
|
|
assert '<body>' in content
|
|
assert '</html>' in content
|
|
|
|
# Check content presence
|
|
assert sample_summary_data["video_metadata"]["title"] in content
|
|
assert sample_summary_data["summary"] in content
|
|
|
|
os.remove(file_path)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_text_exporter_simplicity(self, sample_summary_data):
|
|
"""Test plain text exporter."""
|
|
from backend.services.exporters.text_exporter import PlainTextExporter
|
|
|
|
exporter = PlainTextExporter()
|
|
file_path = await exporter.export(sample_summary_data)
|
|
|
|
assert os.path.exists(file_path)
|
|
assert file_path.endswith('.txt')
|
|
|
|
with open(file_path, 'r') as f:
|
|
content = f.read()
|
|
|
|
# Should be plain text without markup
|
|
assert '<' not in content # No HTML
|
|
assert '#' not in content or '# ' not in content # No markdown headers
|
|
assert '**' not in content # No markdown bold
|
|
|
|
# But should have content
|
|
assert sample_summary_data["summary"] in content
|
|
assert "KEY POINTS" in content.upper()
|
|
|
|
os.remove(file_path)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_pdf_exporter_if_available(self, sample_summary_data):
|
|
"""Test PDF exporter if reportlab is installed."""
|
|
try:
|
|
from backend.services.exporters.pdf_exporter import PDFExporter
|
|
import reportlab
|
|
except ImportError:
|
|
pytest.skip("PDF export requires reportlab")
|
|
|
|
exporter = PDFExporter()
|
|
file_path = await exporter.export(sample_summary_data)
|
|
|
|
assert os.path.exists(file_path)
|
|
assert file_path.endswith('.pdf')
|
|
|
|
# Check it's a valid PDF (basic check)
|
|
with open(file_path, 'rb') as f:
|
|
header = f.read(5)
|
|
assert header == b'%PDF-' # PDF magic bytes
|
|
|
|
os.remove(file_path)
|
|
|
|
|
|
class TestBulkExport:
|
|
"""Test bulk export functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bulk_export_progress(self, export_service, sample_summary_data):
|
|
"""Test bulk export with progress tracking."""
|
|
summaries = [sample_summary_data for _ in range(3)]
|
|
|
|
request = BulkExportRequest(
|
|
summary_ids=["id1", "id2", "id3"],
|
|
formats=[ExportFormat.MARKDOWN],
|
|
organize_by="format"
|
|
)
|
|
|
|
result = await export_service.bulk_export_summaries(summaries, request)
|
|
|
|
assert result.status == ExportStatus.COMPLETED
|
|
assert result.created_at is not None
|
|
assert result.completed_at is not None
|
|
assert result.completed_at > result.created_at
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bulk_export_empty_list(self, export_service):
|
|
"""Test bulk export with empty summary list."""
|
|
request = BulkExportRequest(
|
|
summary_ids=[],
|
|
formats=[ExportFormat.MARKDOWN],
|
|
organize_by="format"
|
|
)
|
|
|
|
result = await export_service.bulk_export_summaries([], request)
|
|
|
|
# Should complete successfully but with minimal content
|
|
assert result.status == ExportStatus.COMPLETED
|
|
assert result.file_path.endswith('.zip')
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bulk_export_mixed_formats(self, export_service, sample_summary_data):
|
|
"""Test bulk export with all available formats."""
|
|
summaries = [sample_summary_data]
|
|
|
|
# Get all available formats
|
|
available_formats = list(export_service.exporters.keys())
|
|
|
|
request = BulkExportRequest(
|
|
summary_ids=["test1"],
|
|
formats=available_formats,
|
|
organize_by="format"
|
|
)
|
|
|
|
result = await export_service.bulk_export_summaries(summaries, request)
|
|
|
|
assert result.status == ExportStatus.COMPLETED
|
|
|
|
# Verify each format is in the archive
|
|
with zipfile.ZipFile(result.file_path, 'r') as zf:
|
|
namelist = zf.namelist()
|
|
for format in available_formats:
|
|
assert any(format.value in name for name in namelist) |