"""Unit tests for export service and exporters."""
import pytest
import pytest_asyncio
from unittest.mock import Mock, AsyncMock, patch, MagicMock
from datetime import datetime
import tempfile
import os
import json
from pathlib import Path
import zipfile
from backend.services.export_service import (
ExportService,
ExportFormat,
ExportStatus,
ExportRequest,
BulkExportRequest,
ExportResult,
BaseExporter
)
@pytest.fixture
def export_service():
"""Create an export service instance with temp directory."""
with tempfile.TemporaryDirectory() as temp_dir:
service = ExportService(export_dir=temp_dir)
yield service
# Cleanup happens automatically with TemporaryDirectory
@pytest.fixture
def sample_summary_data():
"""Sample summary data for testing."""
return {
"video_id": "abc123",
"video_url": "https://youtube.com/watch?v=abc123",
"video_metadata": {
"title": "Test Video Title",
"channel_name": "Test Channel",
"duration": "10:30",
"published_at": "2025-01-25",
"views": 1000,
"likes": 100
},
"summary": "This is a test summary of the video content. It covers important topics.",
"key_points": [
"First key point",
"Second key point",
"Third key point"
],
"main_themes": [
"Technology",
"Innovation"
],
"actionable_insights": [
"Implement the new system",
"Review the documentation"
],
"confidence_score": 0.95,
"processing_metadata": {
"model": "claude-3-5-haiku",
"processing_time_seconds": 5.2,
"tokens_used": 1500
},
"cost_data": {
"input_cost": 0.015,
"output_cost": 0.03,
"total_cost": 0.045
},
"created_at": "2025-01-25T10:00:00Z"
}
class TestExportService:
"""Test ExportService class."""
@pytest.mark.asyncio
async def test_export_summary_markdown(self, export_service, sample_summary_data):
"""Test exporting summary to Markdown format."""
request = ExportRequest(
summary_id="test123",
format=ExportFormat.MARKDOWN,
include_metadata=True
)
result = await export_service.export_summary(sample_summary_data, request)
assert result.status == ExportStatus.COMPLETED
assert result.format == ExportFormat.MARKDOWN
assert result.file_path is not None
assert os.path.exists(result.file_path)
assert result.file_size_bytes > 0
assert result.download_url == f"/api/export/download/{result.export_id}"
# Check file content
with open(result.file_path, 'r') as f:
content = f.read()
assert "Test Video Title" in content
assert "Test Channel" in content
assert "First key point" in content
@pytest.mark.asyncio
async def test_export_summary_json(self, export_service, sample_summary_data):
"""Test exporting summary to JSON format."""
request = ExportRequest(
summary_id="test123",
format=ExportFormat.JSON
)
result = await export_service.export_summary(sample_summary_data, request)
assert result.status == ExportStatus.COMPLETED
assert result.format == ExportFormat.JSON
assert result.file_path.endswith('.json')
# Validate JSON structure
with open(result.file_path, 'r') as f:
json_data = json.load(f)
assert "youtube_summarizer_export" in json_data
assert json_data["video"]["id"] == "abc123"
assert json_data["summary"]["text"] == sample_summary_data["summary"]
@pytest.mark.asyncio
async def test_export_summary_text(self, export_service, sample_summary_data):
"""Test exporting summary to plain text format."""
request = ExportRequest(
summary_id="test123",
format=ExportFormat.PLAIN_TEXT
)
result = await export_service.export_summary(sample_summary_data, request)
assert result.status == ExportStatus.COMPLETED
assert result.file_path.endswith('.txt')
with open(result.file_path, 'r') as f:
content = f.read()
assert "Test Video Title" in content
assert "SUMMARY" in content.upper()
@pytest.mark.asyncio
async def test_export_summary_html(self, export_service, sample_summary_data):
"""Test exporting summary to HTML format."""
request = ExportRequest(
summary_id="test123",
format=ExportFormat.HTML
)
result = await export_service.export_summary(sample_summary_data, request)
assert result.status == ExportStatus.COMPLETED
assert result.file_path.endswith('.html')
with open(result.file_path, 'r') as f:
content = f.read()
assert "" in content or "
" in content
@pytest.mark.asyncio
async def test_export_with_custom_branding(self, export_service, sample_summary_data):
"""Test export with custom branding."""
branding = {
"company_name": "Test Company",
"logo_url": "https://example.com/logo.png",
"primary_color": "#007bff"
}
request = ExportRequest(
summary_id="test123",
format=ExportFormat.MARKDOWN,
custom_branding=branding
)
result = await export_service.export_summary(sample_summary_data, request)
assert result.status == ExportStatus.COMPLETED
with open(result.file_path, 'r') as f:
content = f.read()
assert "Test Company" in content
@pytest.mark.asyncio
async def test_bulk_export_multiple_formats(self, export_service, sample_summary_data):
"""Test bulk export with multiple formats."""
summaries = [sample_summary_data, sample_summary_data.copy()]
summaries[1]["video_metadata"]["title"] = "Second Video"
request = BulkExportRequest(
summary_ids=["test1", "test2"],
formats=[ExportFormat.MARKDOWN, ExportFormat.JSON],
organize_by="format"
)
result = await export_service.bulk_export_summaries(summaries, request)
assert result.status == ExportStatus.COMPLETED
assert result.file_path.endswith('.zip')
assert os.path.exists(result.file_path)
assert result.file_size_bytes > 0
# Verify ZIP contents
with zipfile.ZipFile(result.file_path, 'r') as zf:
namelist = zf.namelist()
# Should have 2 videos * 2 formats = 4 files
assert len(namelist) >= 4
assert any('markdown' in name for name in namelist)
assert any('json' in name for name in namelist)
@pytest.mark.asyncio
async def test_bulk_export_organize_by_date(self, export_service, sample_summary_data):
"""Test bulk export organized by date."""
summaries = [sample_summary_data]
request = BulkExportRequest(
summary_ids=["test1"],
formats=[ExportFormat.MARKDOWN],
organize_by="date"
)
result = await export_service.bulk_export_summaries(summaries, request)
assert result.status == ExportStatus.COMPLETED
with zipfile.ZipFile(result.file_path, 'r') as zf:
namelist = zf.namelist()
# Should have date folder
assert any('2025-01-25' in name for name in namelist)
@pytest.mark.asyncio
async def test_bulk_export_organize_by_video(self, export_service, sample_summary_data):
"""Test bulk export organized by video."""
summaries = [sample_summary_data]
request = BulkExportRequest(
summary_ids=["test1"],
formats=[ExportFormat.MARKDOWN, ExportFormat.JSON],
organize_by="video"
)
result = await export_service.bulk_export_summaries(summaries, request)
assert result.status == ExportStatus.COMPLETED
with zipfile.ZipFile(result.file_path, 'r') as zf:
namelist = zf.namelist()
# Should have video folder with both formats inside
assert any('Test_Video_Title' in name for name in namelist)
@pytest.mark.asyncio
async def test_export_invalid_format(self, export_service, sample_summary_data):
"""Test export with unavailable format (PDF without reportlab)."""
# Remove PDF exporter to simulate missing dependency
if ExportFormat.PDF in export_service.exporters:
del export_service.exporters[ExportFormat.PDF]
request = ExportRequest(
summary_id="test123",
format=ExportFormat.PDF
)
result = await export_service.export_summary(sample_summary_data, request)
assert result.status == ExportStatus.FAILED
assert "not available" in result.error
def test_sanitize_filename(self, export_service):
"""Test filename sanitization."""
dangerous_names = [
"Video: Test ",
"../../etc/passwd",
"C:\\Windows\\System32\\config",
"Video|with*special?chars",
"Video\x00with\x1fcontrol\x80chars"
]
for name in dangerous_names:
sanitized = export_service._sanitize_filename(name)
# Check no dangerous characters remain
assert '<' not in sanitized
assert '>' not in sanitized
assert ':' not in sanitized
assert '"' not in sanitized
assert '/' not in sanitized
assert '\\' not in sanitized
assert '|' not in sanitized
assert '?' not in sanitized
assert '*' not in sanitized
assert '\x00' not in sanitized
# Check reasonable length
assert len(sanitized) <= 100
assert sanitized.strip() == sanitized
def test_get_export_status(self, export_service):
"""Test getting export status."""
export_id = "test-export-123"
result = ExportResult(
export_id=export_id,
status=ExportStatus.COMPLETED,
format=ExportFormat.MARKDOWN
)
export_service.active_exports[export_id] = result
retrieved = export_service.get_export_status(export_id)
assert retrieved == result
# Test non-existent export
assert export_service.get_export_status("non-existent") is None
@pytest.mark.asyncio
async def test_cleanup_old_exports(self, export_service):
"""Test cleanup of old export files."""
# Create old and new exports
old_export = ExportResult(
export_id="old",
status=ExportStatus.COMPLETED,
format=ExportFormat.MARKDOWN,
created_at=datetime(2020, 1, 1) # Very old
)
new_export = ExportResult(
export_id="new",
status=ExportStatus.COMPLETED,
format=ExportFormat.MARKDOWN,
created_at=datetime.utcnow() # Current
)
# Create actual files
with tempfile.NamedTemporaryFile(delete=False, dir=export_service.export_dir) as f:
old_export.file_path = f.name
f.write(b"old content")
with tempfile.NamedTemporaryFile(delete=False, dir=export_service.export_dir) as f:
new_export.file_path = f.name
f.write(b"new content")
export_service.active_exports["old"] = old_export
export_service.active_exports["new"] = new_export
# Run cleanup
await export_service.cleanup_old_exports(max_age_hours=24)
# Old export should be removed
assert "old" not in export_service.active_exports
assert not os.path.exists(old_export.file_path)
# New export should remain
assert "new" in export_service.active_exports
assert os.path.exists(new_export.file_path)
# Clean up test file
os.remove(new_export.file_path)
class TestExporters:
"""Test individual exporters."""
@pytest.mark.asyncio
async def test_markdown_exporter(self, sample_summary_data):
"""Test Markdown exporter."""
from backend.services.exporters.markdown_exporter import MarkdownExporter
exporter = MarkdownExporter()
file_path = await exporter.export(sample_summary_data)
assert os.path.exists(file_path)
assert file_path.endswith('.md')
with open(file_path, 'r') as f:
content = f.read()
# Check markdown formatting
assert '# ' in content # Headers
assert '- ' in content # Lists
assert '**' in content or '__' in content # Bold text
# Cleanup
os.remove(file_path)
@pytest.mark.asyncio
async def test_json_exporter_structure(self, sample_summary_data):
"""Test JSON exporter structure."""
from backend.services.exporters.json_exporter import JSONExporter
exporter = JSONExporter()
file_path = await exporter.export(sample_summary_data)
assert os.path.exists(file_path)
with open(file_path, 'r') as f:
data = json.load(f)
# Verify structure
assert "youtube_summarizer_export" in data
assert "version" in data["youtube_summarizer_export"]
assert "video" in data
assert "summary" in data
assert "processing" in data
# Verify data integrity
assert data["video"]["id"] == sample_summary_data["video_id"]
assert data["summary"]["confidence_score"] == sample_summary_data["confidence_score"]
os.remove(file_path)
@pytest.mark.asyncio
async def test_html_exporter_structure(self, sample_summary_data):
"""Test HTML exporter structure."""
from backend.services.exporters.html_exporter import HTMLExporter
exporter = HTMLExporter()
file_path = await exporter.export(sample_summary_data)
assert os.path.exists(file_path)
with open(file_path, 'r') as f:
content = f.read()
# Check HTML structure
assert '' in content
assert '' in content
assert '' in content
assert '' in content
# Check content presence
assert sample_summary_data["video_metadata"]["title"] in content
assert sample_summary_data["summary"] in content
os.remove(file_path)
@pytest.mark.asyncio
async def test_text_exporter_simplicity(self, sample_summary_data):
"""Test plain text exporter."""
from backend.services.exporters.text_exporter import PlainTextExporter
exporter = PlainTextExporter()
file_path = await exporter.export(sample_summary_data)
assert os.path.exists(file_path)
assert file_path.endswith('.txt')
with open(file_path, 'r') as f:
content = f.read()
# Should be plain text without markup
assert '<' not in content # No HTML
assert '#' not in content or '# ' not in content # No markdown headers
assert '**' not in content # No markdown bold
# But should have content
assert sample_summary_data["summary"] in content
assert "KEY POINTS" in content.upper()
os.remove(file_path)
@pytest.mark.asyncio
async def test_pdf_exporter_if_available(self, sample_summary_data):
"""Test PDF exporter if reportlab is installed."""
try:
from backend.services.exporters.pdf_exporter import PDFExporter
import reportlab
except ImportError:
pytest.skip("PDF export requires reportlab")
exporter = PDFExporter()
file_path = await exporter.export(sample_summary_data)
assert os.path.exists(file_path)
assert file_path.endswith('.pdf')
# Check it's a valid PDF (basic check)
with open(file_path, 'rb') as f:
header = f.read(5)
assert header == b'%PDF-' # PDF magic bytes
os.remove(file_path)
class TestBulkExport:
"""Test bulk export functionality."""
@pytest.mark.asyncio
async def test_bulk_export_progress(self, export_service, sample_summary_data):
"""Test bulk export with progress tracking."""
summaries = [sample_summary_data for _ in range(3)]
request = BulkExportRequest(
summary_ids=["id1", "id2", "id3"],
formats=[ExportFormat.MARKDOWN],
organize_by="format"
)
result = await export_service.bulk_export_summaries(summaries, request)
assert result.status == ExportStatus.COMPLETED
assert result.created_at is not None
assert result.completed_at is not None
assert result.completed_at > result.created_at
@pytest.mark.asyncio
async def test_bulk_export_empty_list(self, export_service):
"""Test bulk export with empty summary list."""
request = BulkExportRequest(
summary_ids=[],
formats=[ExportFormat.MARKDOWN],
organize_by="format"
)
result = await export_service.bulk_export_summaries([], request)
# Should complete successfully but with minimal content
assert result.status == ExportStatus.COMPLETED
assert result.file_path.endswith('.zip')
@pytest.mark.asyncio
async def test_bulk_export_mixed_formats(self, export_service, sample_summary_data):
"""Test bulk export with all available formats."""
summaries = [sample_summary_data]
# Get all available formats
available_formats = list(export_service.exporters.keys())
request = BulkExportRequest(
summary_ids=["test1"],
formats=available_formats,
organize_by="format"
)
result = await export_service.bulk_export_summaries(summaries, request)
assert result.status == ExportStatus.COMPLETED
# Verify each format is in the archive
with zipfile.ZipFile(result.file_path, 'r') as zf:
namelist = zf.namelist()
for format in available_formats:
assert any(format.value in name for name in namelist)