youtube-summarizer/backend/services/exporters/markdown_exporter.py

"""
Markdown Exporter for YouTube Summaries
Exports summaries to clean, formatted Markdown documents
"""

import tempfile
from typing import Dict, Any, Optional
from ..export_service import BaseExporter


class MarkdownExporter(BaseExporter):
    """Export summaries to Markdown format"""

    async def export(
        self,
        summary_data: Dict[str, Any],
        template: Optional[str] = None,
        branding: Optional[Dict[str, Any]] = None
    ) -> str:
        """Export to Markdown"""

        data = self._prepare_summary_data(summary_data)

        # Use custom template if provided, otherwise default
        if template:
            content = await self._render_custom_template(template, data)
        else:
            content = self._render_default_template(data, branding)

        # Write to temporary file
        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
            f.write(content)
            return f.name

    async def _render_custom_template(self, template: str, data: Dict[str, Any]) -> str:
        """Render custom template with data"""
        from jinja2 import Template
        try:
            # Use Jinja2 for proper template rendering
            jinja_template = Template(template)
            return jinja_template.render(**data)
        except Exception as e:
            # Fallback to simple replacement if Jinja2 fails
            content = template
            for key, value in data.items():
                content = content.replace(f"{{{{{key}}}}}", str(value))
            return content

    def _render_default_template(self, data: Dict[str, Any], branding: Optional[Dict[str, Any]]) -> str:
        """Render default Markdown template"""

        video_metadata = data.get("video_metadata", {})
        processing_metadata = data.get("processing_metadata", {})

        # Header with branding
        header = ""
        if branding and branding.get("company_name"):
            header = f"*Generated by {branding['company_name']} using YouTube Summarizer*\n\n"

        markdown = f"""{header}# YouTube Video Summary

## Video Information
- **Title**: {video_metadata.get('title', 'N/A')}
- **URL**: {data.get('video_url', 'N/A')}
- **Channel**: {video_metadata.get('channel_name', 'N/A')}
- **Duration**: {self._format_duration(video_metadata.get('duration'))}
- **Published**: {video_metadata.get('published_at', 'N/A')}
- **Views**: {self._format_number(video_metadata.get('view_count'))}

## Summary

{data.get('summary', 'No summary available')}

## Key Points

"""

        # Add key points
        key_points = data.get('key_points', [])
        if key_points:
            for point in key_points:
                markdown += f"- {point}\n"
        else:
            markdown += "*No key points identified*\n"

        markdown += "\n## Main Themes\n\n"

        # Add main themes
        main_themes = data.get('main_themes', [])
        if main_themes:
            for theme in main_themes:
                markdown += f"- **{theme}**\n"
        else:
            markdown += "*No main themes identified*\n"

        markdown += "\n## Actionable Insights\n\n"

        # Add actionable insights
        insights = data.get('actionable_insights', [])
        if insights:
            for i, insight in enumerate(insights, 1):
                markdown += f"{i}. {insight}\n"
        else:
            markdown += "*No actionable insights identified*\n"

        # Add chapters/timestamps if available
        chapters = data.get('chapters', [])
        if chapters:
            markdown += "\n## Chapter Breakdown\n\n"
            for chapter in chapters:
                timestamp = chapter.get('timestamp', '')
                title = chapter.get('title', '')
                summary = chapter.get('summary', '')
                markdown += f"### [{timestamp}] {title}\n{summary}\n\n"

        # Add metadata footer
        markdown += f"""

---

## Processing Information
- **AI Model**: {processing_metadata.get('model', 'N/A')}
- **Processing Time**: {self._format_duration(processing_metadata.get('processing_time_seconds'))}
- **Confidence Score**: {self._format_percentage(data.get('confidence_score'))}
- **Token Usage**: {processing_metadata.get('tokens_used', 'N/A')}
- **Generated**: {data.get('export_metadata', {}).get('exported_at', 'N/A')}

*Summary generated by YouTube Summarizer - Transform video content into actionable insights*
"""

        return markdown

    def _format_duration(self, duration: Optional[Any]) -> str:
        """Format duration from seconds or string to human-readable format"""
        if not duration:
            return 'N/A'

        # If it's already a string (like "10:30"), return it
        if isinstance(duration, str):
            return duration

        # If it's a number, format as seconds
        try:
            seconds = int(duration)
            hours = seconds // 3600
            minutes = (seconds % 3600) // 60
            seconds = seconds % 60

            if hours > 0:
                return f"{hours}h {minutes}m {seconds}s"
            elif minutes > 0:
                return f"{minutes}m {seconds}s"
            else:
                return f"{seconds}s"
        except (ValueError, TypeError):
            return str(duration) if duration else 'N/A'

    def _format_number(self, number: Optional[int]) -> str:
        """Format large numbers with commas"""
        if number is None:
            return 'N/A'
        return f"{number:,}"

    def _format_percentage(self, value: Optional[float]) -> str:
        """Format decimal as percentage"""
        if value is None:
            return 'N/A'
        return f"{value * 100:.1f}%"

    def get_file_extension(self) -> str:
        return "md"