youtube-summarizer/backend/tests/unit/test_export_service.py

"""Unit tests for export service and exporters."""

import pytest
import pytest_asyncio
from unittest.mock import Mock, AsyncMock, patch, MagicMock
from datetime import datetime
import tempfile
import os
import json
from pathlib import Path
import zipfile

from backend.services.export_service import (
    ExportService,
    ExportFormat,
    ExportStatus,
    ExportRequest,
    BulkExportRequest,
    ExportResult,
    BaseExporter
)


@pytest.fixture
def export_service():
    """Create an export service instance with temp directory."""
    with tempfile.TemporaryDirectory() as temp_dir:
        service = ExportService(export_dir=temp_dir)
        yield service
        # Cleanup happens automatically with TemporaryDirectory


@pytest.fixture
def sample_summary_data():
    """Sample summary data for testing."""
    return {
        "video_id": "abc123",
        "video_url": "https://youtube.com/watch?v=abc123",
        "video_metadata": {
            "title": "Test Video Title",
            "channel_name": "Test Channel",
            "duration": "10:30",
            "published_at": "2025-01-25",
            "views": 1000,
            "likes": 100
        },
        "summary": "This is a test summary of the video content. It covers important topics.",
        "key_points": [
            "First key point",
            "Second key point",
            "Third key point"
        ],
        "main_themes": [
            "Technology",
            "Innovation"
        ],
        "actionable_insights": [
            "Implement the new system",
            "Review the documentation"
        ],
        "confidence_score": 0.95,
        "processing_metadata": {
            "model": "claude-3-5-haiku",
            "processing_time_seconds": 5.2,
            "tokens_used": 1500
        },
        "cost_data": {
            "input_cost": 0.015,
            "output_cost": 0.03,
            "total_cost": 0.045
        },
        "created_at": "2025-01-25T10:00:00Z"
    }


class TestExportService:
    """Test ExportService class."""

    @pytest.mark.asyncio
    async def test_export_summary_markdown(self, export_service, sample_summary_data):
        """Test exporting summary to Markdown format."""
        request = ExportRequest(
            summary_id="test123",
            format=ExportFormat.MARKDOWN,
            include_metadata=True
        )

        result = await export_service.export_summary(sample_summary_data, request)

        assert result.status == ExportStatus.COMPLETED
        assert result.format == ExportFormat.MARKDOWN
        assert result.file_path is not None
        assert os.path.exists(result.file_path)
        assert result.file_size_bytes > 0
        assert result.download_url == f"/api/export/download/{result.export_id}"

        # Check file content
        with open(result.file_path, 'r') as f:
            content = f.read()
            assert "Test Video Title" in content
            assert "Test Channel" in content
            assert "First key point" in content

    @pytest.mark.asyncio
    async def test_export_summary_json(self, export_service, sample_summary_data):
        """Test exporting summary to JSON format."""
        request = ExportRequest(
            summary_id="test123",
            format=ExportFormat.JSON
        )

        result = await export_service.export_summary(sample_summary_data, request)

        assert result.status == ExportStatus.COMPLETED
        assert result.format == ExportFormat.JSON
        assert result.file_path.endswith('.json')

        # Validate JSON structure
        with open(result.file_path, 'r') as f:
            json_data = json.load(f)
            assert "youtube_summarizer_export" in json_data
            assert json_data["video"]["id"] == "abc123"
            assert json_data["summary"]["text"] == sample_summary_data["summary"]

    @pytest.mark.asyncio
    async def test_export_summary_text(self, export_service, sample_summary_data):
        """Test exporting summary to plain text format."""
        request = ExportRequest(
            summary_id="test123",
            format=ExportFormat.PLAIN_TEXT
        )

        result = await export_service.export_summary(sample_summary_data, request)

        assert result.status == ExportStatus.COMPLETED
        assert result.file_path.endswith('.txt')

        with open(result.file_path, 'r') as f:
            content = f.read()
            assert "Test Video Title" in content
            assert "SUMMARY" in content.upper()

    @pytest.mark.asyncio
    async def test_export_summary_html(self, export_service, sample_summary_data):
        """Test exporting summary to HTML format."""
        request = ExportRequest(
            summary_id="test123",
            format=ExportFormat.HTML
        )

        result = await export_service.export_summary(sample_summary_data, request)

        assert result.status == ExportStatus.COMPLETED
        assert result.file_path.endswith('.html')

        with open(result.file_path, 'r') as f:
            content = f.read()
            assert "<html" in content.lower()
            assert "Test Video Title" in content
            assert "<h1>" in content or "<h2>" in content

    @pytest.mark.asyncio
    async def test_export_with_custom_branding(self, export_service, sample_summary_data):
        """Test export with custom branding."""
        branding = {
            "company_name": "Test Company",
            "logo_url": "https://example.com/logo.png",
            "primary_color": "#007bff"
        }

        request = ExportRequest(
            summary_id="test123",
            format=ExportFormat.MARKDOWN,
            custom_branding=branding
        )

        result = await export_service.export_summary(sample_summary_data, request)

        assert result.status == ExportStatus.COMPLETED

        with open(result.file_path, 'r') as f:
            content = f.read()
            assert "Test Company" in content

    @pytest.mark.asyncio
    async def test_bulk_export_multiple_formats(self, export_service, sample_summary_data):
        """Test bulk export with multiple formats."""
        summaries = [sample_summary_data, sample_summary_data.copy()]
        summaries[1]["video_metadata"]["title"] = "Second Video"

        request = BulkExportRequest(
            summary_ids=["test1", "test2"],
            formats=[ExportFormat.MARKDOWN, ExportFormat.JSON],
            organize_by="format"
        )

        result = await export_service.bulk_export_summaries(summaries, request)

        assert result.status == ExportStatus.COMPLETED
        assert result.file_path.endswith('.zip')
        assert os.path.exists(result.file_path)
        assert result.file_size_bytes > 0

        # Verify ZIP contents
        with zipfile.ZipFile(result.file_path, 'r') as zf:
            namelist = zf.namelist()
            # Should have 2 videos * 2 formats = 4 files
            assert len(namelist) >= 4
            assert any('markdown' in name for name in namelist)
            assert any('json' in name for name in namelist)

    @pytest.mark.asyncio
    async def test_bulk_export_organize_by_date(self, export_service, sample_summary_data):
        """Test bulk export organized by date."""
        summaries = [sample_summary_data]

        request = BulkExportRequest(
            summary_ids=["test1"],
            formats=[ExportFormat.MARKDOWN],
            organize_by="date"
        )

        result = await export_service.bulk_export_summaries(summaries, request)

        assert result.status == ExportStatus.COMPLETED

        with zipfile.ZipFile(result.file_path, 'r') as zf:
            namelist = zf.namelist()
            # Should have date folder
            assert any('2025-01-25' in name for name in namelist)

    @pytest.mark.asyncio
    async def test_bulk_export_organize_by_video(self, export_service, sample_summary_data):
        """Test bulk export organized by video."""
        summaries = [sample_summary_data]

        request = BulkExportRequest(
            summary_ids=["test1"],
            formats=[ExportFormat.MARKDOWN, ExportFormat.JSON],
            organize_by="video"
        )

        result = await export_service.bulk_export_summaries(summaries, request)

        assert result.status == ExportStatus.COMPLETED

        with zipfile.ZipFile(result.file_path, 'r') as zf:
            namelist = zf.namelist()
            # Should have video folder with both formats inside
            assert any('Test_Video_Title' in name for name in namelist)

    @pytest.mark.asyncio
    async def test_export_invalid_format(self, export_service, sample_summary_data):
        """Test export with unavailable format (PDF without reportlab)."""
        # Remove PDF exporter to simulate missing dependency
        if ExportFormat.PDF in export_service.exporters:
            del export_service.exporters[ExportFormat.PDF]

        request = ExportRequest(
            summary_id="test123",
            format=ExportFormat.PDF
        )

        result = await export_service.export_summary(sample_summary_data, request)

        assert result.status == ExportStatus.FAILED
        assert "not available" in result.error

    def test_sanitize_filename(self, export_service):
        """Test filename sanitization."""
        dangerous_names = [
            "Video: Test <script>alert('xss')</script>",
            "../../etc/passwd",
            "C:\\Windows\\System32\\config",
            "Video|with*special?chars",
            "Video\x00with\x1fcontrol\x80chars"
        ]

        for name in dangerous_names:
            sanitized = export_service._sanitize_filename(name)
            # Check no dangerous characters remain
            assert '<' not in sanitized
            assert '>' not in sanitized
            assert ':' not in sanitized
            assert '"' not in sanitized
            assert '/' not in sanitized
            assert '\\' not in sanitized
            assert '|' not in sanitized
            assert '?' not in sanitized
            assert '*' not in sanitized
            assert '\x00' not in sanitized
            # Check reasonable length
            assert len(sanitized) <= 100
            assert sanitized.strip() == sanitized

    def test_get_export_status(self, export_service):
        """Test getting export status."""
        export_id = "test-export-123"
        result = ExportResult(
            export_id=export_id,
            status=ExportStatus.COMPLETED,
            format=ExportFormat.MARKDOWN
        )

        export_service.active_exports[export_id] = result

        retrieved = export_service.get_export_status(export_id)
        assert retrieved == result

        # Test non-existent export
        assert export_service.get_export_status("non-existent") is None

    @pytest.mark.asyncio
    async def test_cleanup_old_exports(self, export_service):
        """Test cleanup of old export files."""
        # Create old and new exports
        old_export = ExportResult(
            export_id="old",
            status=ExportStatus.COMPLETED,
            format=ExportFormat.MARKDOWN,
            created_at=datetime(2020, 1, 1)  # Very old
        )

        new_export = ExportResult(
            export_id="new",
            status=ExportStatus.COMPLETED,
            format=ExportFormat.MARKDOWN,
            created_at=datetime.utcnow()  # Current
        )

        # Create actual files
        with tempfile.NamedTemporaryFile(delete=False, dir=export_service.export_dir) as f:
            old_export.file_path = f.name
            f.write(b"old content")

        with tempfile.NamedTemporaryFile(delete=False, dir=export_service.export_dir) as f:
            new_export.file_path = f.name
            f.write(b"new content")

        export_service.active_exports["old"] = old_export
        export_service.active_exports["new"] = new_export

        # Run cleanup
        await export_service.cleanup_old_exports(max_age_hours=24)

        # Old export should be removed
        assert "old" not in export_service.active_exports
        assert not os.path.exists(old_export.file_path)

        # New export should remain
        assert "new" in export_service.active_exports
        assert os.path.exists(new_export.file_path)

        # Clean up test file
        os.remove(new_export.file_path)


class TestExporters:
    """Test individual exporters."""

    @pytest.mark.asyncio
    async def test_markdown_exporter(self, sample_summary_data):
        """Test Markdown exporter."""
        from backend.services.exporters.markdown_exporter import MarkdownExporter

        exporter = MarkdownExporter()
        file_path = await exporter.export(sample_summary_data)

        assert os.path.exists(file_path)
        assert file_path.endswith('.md')

        with open(file_path, 'r') as f:
            content = f.read()
            # Check markdown formatting
            assert '# ' in content  # Headers
            assert '- ' in content  # Lists
            assert '**' in content or '__' in content  # Bold text

        # Cleanup
        os.remove(file_path)

    @pytest.mark.asyncio
    async def test_json_exporter_structure(self, sample_summary_data):
        """Test JSON exporter structure."""
        from backend.services.exporters.json_exporter import JSONExporter

        exporter = JSONExporter()
        file_path = await exporter.export(sample_summary_data)

        assert os.path.exists(file_path)

        with open(file_path, 'r') as f:
            data = json.load(f)

            # Verify structure
            assert "youtube_summarizer_export" in data
            assert "version" in data["youtube_summarizer_export"]
            assert "video" in data
            assert "summary" in data
            assert "processing" in data

            # Verify data integrity
            assert data["video"]["id"] == sample_summary_data["video_id"]
            assert data["summary"]["confidence_score"] == sample_summary_data["confidence_score"]

        os.remove(file_path)

    @pytest.mark.asyncio
    async def test_html_exporter_structure(self, sample_summary_data):
        """Test HTML exporter structure."""
        from backend.services.exporters.html_exporter import HTMLExporter

        exporter = HTMLExporter()
        file_path = await exporter.export(sample_summary_data)

        assert os.path.exists(file_path)

        with open(file_path, 'r') as f:
            content = f.read()

            # Check HTML structure
            assert '<!DOCTYPE html>' in content
            assert '<html' in content
            assert '<head>' in content
            assert '<body>' in content
            assert '</html>' in content

            # Check content presence
            assert sample_summary_data["video_metadata"]["title"] in content
            assert sample_summary_data["summary"] in content

        os.remove(file_path)

    @pytest.mark.asyncio
    async def test_text_exporter_simplicity(self, sample_summary_data):
        """Test plain text exporter."""
        from backend.services.exporters.text_exporter import PlainTextExporter

        exporter = PlainTextExporter()
        file_path = await exporter.export(sample_summary_data)

        assert os.path.exists(file_path)
        assert file_path.endswith('.txt')

        with open(file_path, 'r') as f:
            content = f.read()

            # Should be plain text without markup
            assert '<' not in content  # No HTML
            assert '#' not in content or '# ' not in content  # No markdown headers
            assert '**' not in content  # No markdown bold

            # But should have content
            assert sample_summary_data["summary"] in content
            assert "KEY POINTS" in content.upper()

        os.remove(file_path)

    @pytest.mark.asyncio
    async def test_pdf_exporter_if_available(self, sample_summary_data):
        """Test PDF exporter if reportlab is installed."""
        try:
            from backend.services.exporters.pdf_exporter import PDFExporter
            import reportlab
        except ImportError:
            pytest.skip("PDF export requires reportlab")

        exporter = PDFExporter()
        file_path = await exporter.export(sample_summary_data)

        assert os.path.exists(file_path)
        assert file_path.endswith('.pdf')

        # Check it's a valid PDF (basic check)
        with open(file_path, 'rb') as f:
            header = f.read(5)
            assert header == b'%PDF-'  # PDF magic bytes

        os.remove(file_path)


class TestBulkExport:
    """Test bulk export functionality."""

    @pytest.mark.asyncio
    async def test_bulk_export_progress(self, export_service, sample_summary_data):
        """Test bulk export with progress tracking."""
        summaries = [sample_summary_data for _ in range(3)]

        request = BulkExportRequest(
            summary_ids=["id1", "id2", "id3"],
            formats=[ExportFormat.MARKDOWN],
            organize_by="format"
        )

        result = await export_service.bulk_export_summaries(summaries, request)

        assert result.status == ExportStatus.COMPLETED
        assert result.created_at is not None
        assert result.completed_at is not None
        assert result.completed_at > result.created_at

    @pytest.mark.asyncio
    async def test_bulk_export_empty_list(self, export_service):
        """Test bulk export with empty summary list."""
        request = BulkExportRequest(
            summary_ids=[],
            formats=[ExportFormat.MARKDOWN],
            organize_by="format"
        )

        result = await export_service.bulk_export_summaries([], request)

        # Should complete successfully but with minimal content
        assert result.status == ExportStatus.COMPLETED
        assert result.file_path.endswith('.zip')

    @pytest.mark.asyncio
    async def test_bulk_export_mixed_formats(self, export_service, sample_summary_data):
        """Test bulk export with all available formats."""
        summaries = [sample_summary_data]

        # Get all available formats
        available_formats = list(export_service.exporters.keys())

        request = BulkExportRequest(
            summary_ids=["test1"],
            formats=available_formats,
            organize_by="format"
        )

        result = await export_service.bulk_export_summaries(summaries, request)

        assert result.status == ExportStatus.COMPLETED

        # Verify each format is in the archive
        with zipfile.ZipFile(result.file_path, 'r') as zf:
            namelist = zf.namelist()
            for format in available_formats:
                assert any(format.value in name for name in namelist)