"""Unit tests for export service and exporters.""" import pytest import pytest_asyncio from unittest.mock import Mock, AsyncMock, patch, MagicMock from datetime import datetime import tempfile import os import json from pathlib import Path import zipfile from backend.services.export_service import ( ExportService, ExportFormat, ExportStatus, ExportRequest, BulkExportRequest, ExportResult, BaseExporter ) @pytest.fixture def export_service(): """Create an export service instance with temp directory.""" with tempfile.TemporaryDirectory() as temp_dir: service = ExportService(export_dir=temp_dir) yield service # Cleanup happens automatically with TemporaryDirectory @pytest.fixture def sample_summary_data(): """Sample summary data for testing.""" return { "video_id": "abc123", "video_url": "https://youtube.com/watch?v=abc123", "video_metadata": { "title": "Test Video Title", "channel_name": "Test Channel", "duration": "10:30", "published_at": "2025-01-25", "views": 1000, "likes": 100 }, "summary": "This is a test summary of the video content. It covers important topics.", "key_points": [ "First key point", "Second key point", "Third key point" ], "main_themes": [ "Technology", "Innovation" ], "actionable_insights": [ "Implement the new system", "Review the documentation" ], "confidence_score": 0.95, "processing_metadata": { "model": "claude-3-5-haiku", "processing_time_seconds": 5.2, "tokens_used": 1500 }, "cost_data": { "input_cost": 0.015, "output_cost": 0.03, "total_cost": 0.045 }, "created_at": "2025-01-25T10:00:00Z" } class TestExportService: """Test ExportService class.""" @pytest.mark.asyncio async def test_export_summary_markdown(self, export_service, sample_summary_data): """Test exporting summary to Markdown format.""" request = ExportRequest( summary_id="test123", format=ExportFormat.MARKDOWN, include_metadata=True ) result = await export_service.export_summary(sample_summary_data, request) assert result.status == ExportStatus.COMPLETED assert result.format == ExportFormat.MARKDOWN assert result.file_path is not None assert os.path.exists(result.file_path) assert result.file_size_bytes > 0 assert result.download_url == f"/api/export/download/{result.export_id}" # Check file content with open(result.file_path, 'r') as f: content = f.read() assert "Test Video Title" in content assert "Test Channel" in content assert "First key point" in content @pytest.mark.asyncio async def test_export_summary_json(self, export_service, sample_summary_data): """Test exporting summary to JSON format.""" request = ExportRequest( summary_id="test123", format=ExportFormat.JSON ) result = await export_service.export_summary(sample_summary_data, request) assert result.status == ExportStatus.COMPLETED assert result.format == ExportFormat.JSON assert result.file_path.endswith('.json') # Validate JSON structure with open(result.file_path, 'r') as f: json_data = json.load(f) assert "youtube_summarizer_export" in json_data assert json_data["video"]["id"] == "abc123" assert json_data["summary"]["text"] == sample_summary_data["summary"] @pytest.mark.asyncio async def test_export_summary_text(self, export_service, sample_summary_data): """Test exporting summary to plain text format.""" request = ExportRequest( summary_id="test123", format=ExportFormat.PLAIN_TEXT ) result = await export_service.export_summary(sample_summary_data, request) assert result.status == ExportStatus.COMPLETED assert result.file_path.endswith('.txt') with open(result.file_path, 'r') as f: content = f.read() assert "Test Video Title" in content assert "SUMMARY" in content.upper() @pytest.mark.asyncio async def test_export_summary_html(self, export_service, sample_summary_data): """Test exporting summary to HTML format.""" request = ExportRequest( summary_id="test123", format=ExportFormat.HTML ) result = await export_service.export_summary(sample_summary_data, request) assert result.status == ExportStatus.COMPLETED assert result.file_path.endswith('.html') with open(result.file_path, 'r') as f: content = f.read() assert "" in content or "

" in content @pytest.mark.asyncio async def test_export_with_custom_branding(self, export_service, sample_summary_data): """Test export with custom branding.""" branding = { "company_name": "Test Company", "logo_url": "https://example.com/logo.png", "primary_color": "#007bff" } request = ExportRequest( summary_id="test123", format=ExportFormat.MARKDOWN, custom_branding=branding ) result = await export_service.export_summary(sample_summary_data, request) assert result.status == ExportStatus.COMPLETED with open(result.file_path, 'r') as f: content = f.read() assert "Test Company" in content @pytest.mark.asyncio async def test_bulk_export_multiple_formats(self, export_service, sample_summary_data): """Test bulk export with multiple formats.""" summaries = [sample_summary_data, sample_summary_data.copy()] summaries[1]["video_metadata"]["title"] = "Second Video" request = BulkExportRequest( summary_ids=["test1", "test2"], formats=[ExportFormat.MARKDOWN, ExportFormat.JSON], organize_by="format" ) result = await export_service.bulk_export_summaries(summaries, request) assert result.status == ExportStatus.COMPLETED assert result.file_path.endswith('.zip') assert os.path.exists(result.file_path) assert result.file_size_bytes > 0 # Verify ZIP contents with zipfile.ZipFile(result.file_path, 'r') as zf: namelist = zf.namelist() # Should have 2 videos * 2 formats = 4 files assert len(namelist) >= 4 assert any('markdown' in name for name in namelist) assert any('json' in name for name in namelist) @pytest.mark.asyncio async def test_bulk_export_organize_by_date(self, export_service, sample_summary_data): """Test bulk export organized by date.""" summaries = [sample_summary_data] request = BulkExportRequest( summary_ids=["test1"], formats=[ExportFormat.MARKDOWN], organize_by="date" ) result = await export_service.bulk_export_summaries(summaries, request) assert result.status == ExportStatus.COMPLETED with zipfile.ZipFile(result.file_path, 'r') as zf: namelist = zf.namelist() # Should have date folder assert any('2025-01-25' in name for name in namelist) @pytest.mark.asyncio async def test_bulk_export_organize_by_video(self, export_service, sample_summary_data): """Test bulk export organized by video.""" summaries = [sample_summary_data] request = BulkExportRequest( summary_ids=["test1"], formats=[ExportFormat.MARKDOWN, ExportFormat.JSON], organize_by="video" ) result = await export_service.bulk_export_summaries(summaries, request) assert result.status == ExportStatus.COMPLETED with zipfile.ZipFile(result.file_path, 'r') as zf: namelist = zf.namelist() # Should have video folder with both formats inside assert any('Test_Video_Title' in name for name in namelist) @pytest.mark.asyncio async def test_export_invalid_format(self, export_service, sample_summary_data): """Test export with unavailable format (PDF without reportlab).""" # Remove PDF exporter to simulate missing dependency if ExportFormat.PDF in export_service.exporters: del export_service.exporters[ExportFormat.PDF] request = ExportRequest( summary_id="test123", format=ExportFormat.PDF ) result = await export_service.export_summary(sample_summary_data, request) assert result.status == ExportStatus.FAILED assert "not available" in result.error def test_sanitize_filename(self, export_service): """Test filename sanitization.""" dangerous_names = [ "Video: Test ", "../../etc/passwd", "C:\\Windows\\System32\\config", "Video|with*special?chars", "Video\x00with\x1fcontrol\x80chars" ] for name in dangerous_names: sanitized = export_service._sanitize_filename(name) # Check no dangerous characters remain assert '<' not in sanitized assert '>' not in sanitized assert ':' not in sanitized assert '"' not in sanitized assert '/' not in sanitized assert '\\' not in sanitized assert '|' not in sanitized assert '?' not in sanitized assert '*' not in sanitized assert '\x00' not in sanitized # Check reasonable length assert len(sanitized) <= 100 assert sanitized.strip() == sanitized def test_get_export_status(self, export_service): """Test getting export status.""" export_id = "test-export-123" result = ExportResult( export_id=export_id, status=ExportStatus.COMPLETED, format=ExportFormat.MARKDOWN ) export_service.active_exports[export_id] = result retrieved = export_service.get_export_status(export_id) assert retrieved == result # Test non-existent export assert export_service.get_export_status("non-existent") is None @pytest.mark.asyncio async def test_cleanup_old_exports(self, export_service): """Test cleanup of old export files.""" # Create old and new exports old_export = ExportResult( export_id="old", status=ExportStatus.COMPLETED, format=ExportFormat.MARKDOWN, created_at=datetime(2020, 1, 1) # Very old ) new_export = ExportResult( export_id="new", status=ExportStatus.COMPLETED, format=ExportFormat.MARKDOWN, created_at=datetime.utcnow() # Current ) # Create actual files with tempfile.NamedTemporaryFile(delete=False, dir=export_service.export_dir) as f: old_export.file_path = f.name f.write(b"old content") with tempfile.NamedTemporaryFile(delete=False, dir=export_service.export_dir) as f: new_export.file_path = f.name f.write(b"new content") export_service.active_exports["old"] = old_export export_service.active_exports["new"] = new_export # Run cleanup await export_service.cleanup_old_exports(max_age_hours=24) # Old export should be removed assert "old" not in export_service.active_exports assert not os.path.exists(old_export.file_path) # New export should remain assert "new" in export_service.active_exports assert os.path.exists(new_export.file_path) # Clean up test file os.remove(new_export.file_path) class TestExporters: """Test individual exporters.""" @pytest.mark.asyncio async def test_markdown_exporter(self, sample_summary_data): """Test Markdown exporter.""" from backend.services.exporters.markdown_exporter import MarkdownExporter exporter = MarkdownExporter() file_path = await exporter.export(sample_summary_data) assert os.path.exists(file_path) assert file_path.endswith('.md') with open(file_path, 'r') as f: content = f.read() # Check markdown formatting assert '# ' in content # Headers assert '- ' in content # Lists assert '**' in content or '__' in content # Bold text # Cleanup os.remove(file_path) @pytest.mark.asyncio async def test_json_exporter_structure(self, sample_summary_data): """Test JSON exporter structure.""" from backend.services.exporters.json_exporter import JSONExporter exporter = JSONExporter() file_path = await exporter.export(sample_summary_data) assert os.path.exists(file_path) with open(file_path, 'r') as f: data = json.load(f) # Verify structure assert "youtube_summarizer_export" in data assert "version" in data["youtube_summarizer_export"] assert "video" in data assert "summary" in data assert "processing" in data # Verify data integrity assert data["video"]["id"] == sample_summary_data["video_id"] assert data["summary"]["confidence_score"] == sample_summary_data["confidence_score"] os.remove(file_path) @pytest.mark.asyncio async def test_html_exporter_structure(self, sample_summary_data): """Test HTML exporter structure.""" from backend.services.exporters.html_exporter import HTMLExporter exporter = HTMLExporter() file_path = await exporter.export(sample_summary_data) assert os.path.exists(file_path) with open(file_path, 'r') as f: content = f.read() # Check HTML structure assert '' in content assert '' in content assert '' in content assert '' in content # Check content presence assert sample_summary_data["video_metadata"]["title"] in content assert sample_summary_data["summary"] in content os.remove(file_path) @pytest.mark.asyncio async def test_text_exporter_simplicity(self, sample_summary_data): """Test plain text exporter.""" from backend.services.exporters.text_exporter import PlainTextExporter exporter = PlainTextExporter() file_path = await exporter.export(sample_summary_data) assert os.path.exists(file_path) assert file_path.endswith('.txt') with open(file_path, 'r') as f: content = f.read() # Should be plain text without markup assert '<' not in content # No HTML assert '#' not in content or '# ' not in content # No markdown headers assert '**' not in content # No markdown bold # But should have content assert sample_summary_data["summary"] in content assert "KEY POINTS" in content.upper() os.remove(file_path) @pytest.mark.asyncio async def test_pdf_exporter_if_available(self, sample_summary_data): """Test PDF exporter if reportlab is installed.""" try: from backend.services.exporters.pdf_exporter import PDFExporter import reportlab except ImportError: pytest.skip("PDF export requires reportlab") exporter = PDFExporter() file_path = await exporter.export(sample_summary_data) assert os.path.exists(file_path) assert file_path.endswith('.pdf') # Check it's a valid PDF (basic check) with open(file_path, 'rb') as f: header = f.read(5) assert header == b'%PDF-' # PDF magic bytes os.remove(file_path) class TestBulkExport: """Test bulk export functionality.""" @pytest.mark.asyncio async def test_bulk_export_progress(self, export_service, sample_summary_data): """Test bulk export with progress tracking.""" summaries = [sample_summary_data for _ in range(3)] request = BulkExportRequest( summary_ids=["id1", "id2", "id3"], formats=[ExportFormat.MARKDOWN], organize_by="format" ) result = await export_service.bulk_export_summaries(summaries, request) assert result.status == ExportStatus.COMPLETED assert result.created_at is not None assert result.completed_at is not None assert result.completed_at > result.created_at @pytest.mark.asyncio async def test_bulk_export_empty_list(self, export_service): """Test bulk export with empty summary list.""" request = BulkExportRequest( summary_ids=[], formats=[ExportFormat.MARKDOWN], organize_by="format" ) result = await export_service.bulk_export_summaries([], request) # Should complete successfully but with minimal content assert result.status == ExportStatus.COMPLETED assert result.file_path.endswith('.zip') @pytest.mark.asyncio async def test_bulk_export_mixed_formats(self, export_service, sample_summary_data): """Test bulk export with all available formats.""" summaries = [sample_summary_data] # Get all available formats available_formats = list(export_service.exporters.keys()) request = BulkExportRequest( summary_ids=["test1"], formats=available_formats, organize_by="format" ) result = await export_service.bulk_export_summaries(summaries, request) assert result.status == ExportStatus.COMPLETED # Verify each format is in the archive with zipfile.ZipFile(result.file_path, 'r') as zf: namelist = zf.namelist() for format in available_formats: assert any(format.value in name for name in namelist)