youtube-summarizer/backend/services/exporters/text_exporter.py

"""
Plain Text Exporter for YouTube Summaries
Exports summaries to simple, readable plain text format
"""

import tempfile
from typing import Dict, Any, Optional
from ..export_service import BaseExporter


class PlainTextExporter(BaseExporter):
    """Export summaries to plain text format"""

    async def export(
        self,
        summary_data: Dict[str, Any],
        template: Optional[str] = None,
        branding: Optional[Dict[str, Any]] = None
    ) -> str:
        """Export to plain text"""

        data = self._prepare_summary_data(summary_data)

        # Use custom template if provided, otherwise default
        if template:
            content = await self._render_custom_template(template, data)
        else:
            content = self._render_default_template(data, branding)

        # Write to temporary file
        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
            f.write(content)
            return f.name

    async def _render_custom_template(self, template: str, data: Dict[str, Any]) -> str:
        """Render custom template with data"""
        content = template
        for key, value in data.items():
            content = content.replace(f"{{{{{key}}}}}", str(value))
        return content

    def _render_default_template(self, data: Dict[str, Any], branding: Optional[Dict[str, Any]]) -> str:
        """Render default plain text template"""

        video_metadata = data.get("video_metadata", {})
        processing_metadata = data.get("processing_metadata", {})

        # Header
        text = "=" * 80 + "\n"
        text += "YOUTUBE VIDEO SUMMARY\n"
        text += "=" * 80 + "\n\n"

        # Branding
        if branding and branding.get("company_name"):
            text += f"Generated by {branding['company_name']} using YouTube Summarizer\n"
            text += "-" * 80 + "\n\n"

        # Video Information
        text += "VIDEO INFORMATION\n"
        text += "-" * 40 + "\n"
        text += f"Title: {video_metadata.get('title', 'N/A')}\n"
        text += f"Channel: {video_metadata.get('channel_name', 'N/A')}\n"
        text += f"URL: {data.get('video_url', 'N/A')}\n"
        text += f"Duration: {self._format_duration(video_metadata.get('duration'))}\n"
        text += f"Published: {video_metadata.get('published_at', 'N/A')}\n"
        text += f"Views: {self._format_number(video_metadata.get('view_count'))}\n"
        text += "\n"

        # Summary
        text += "SUMMARY\n"
        text += "-" * 40 + "\n"
        text += self._wrap_text(data.get('summary', 'No summary available'), width=80)
        text += "\n\n"

        # Key Points
        text += "KEY POINTS\n"
        text += "-" * 40 + "\n"
        key_points = data.get('key_points', [])
        if key_points:
            for i, point in enumerate(key_points, 1):
                text += f"{i}. {self._wrap_text(point, width=76, indent=3)}\n"
        else:
            text += "No key points identified\n"
        text += "\n"

        # Main Themes
        text += "MAIN THEMES\n"
        text += "-" * 40 + "\n"
        main_themes = data.get('main_themes', [])
        if main_themes:
            for theme in main_themes:
                text += f"* {theme}\n"
        else:
            text += "No main themes identified\n"
        text += "\n"

        # Actionable Insights
        text += "ACTIONABLE INSIGHTS\n"
        text += "-" * 40 + "\n"
        insights = data.get('actionable_insights', [])
        if insights:
            for i, insight in enumerate(insights, 1):
                text += f"{i}. {self._wrap_text(insight, width=76, indent=3)}\n"
        else:
            text += "No actionable insights identified\n"
        text += "\n"

        # Chapters (if available)
        chapters = data.get('chapters', [])
        if chapters:
            text += "CHAPTER BREAKDOWN\n"
            text += "-" * 40 + "\n"
            for chapter in chapters:
                timestamp = chapter.get('timestamp', '')
                title = chapter.get('title', '')
                summary = chapter.get('summary', '')
                text += f"[{timestamp}] {title}\n"
                if summary:
                    text += f"   {self._wrap_text(summary, width=77, indent=3)}\n"
                text += "\n"

        # Footer
        text += "=" * 80 + "\n"
        text += "PROCESSING INFORMATION\n"
        text += "-" * 40 + "\n"
        text += f"AI Model: {processing_metadata.get('model', 'N/A')}\n"
        text += f"Processing Time: {self._format_duration(processing_metadata.get('processing_time_seconds'))}\n"
        text += f"Confidence Score: {self._format_percentage(data.get('confidence_score'))}\n"
        text += f"Generated: {data.get('export_metadata', {}).get('exported_at', 'N/A')}\n"
        text += "\n"
        text += "=" * 80 + "\n"
        text += "Summary generated by YouTube Summarizer\n"
        text += "Transform video content into actionable insights\n"
        text += "=" * 80 + "\n"

        return text

    def _wrap_text(self, text: str, width: int = 80, indent: int = 0) -> str:
        """Wrap text to specified width with optional indentation"""
        if not text:
            return ""

        import textwrap
        wrapper = textwrap.TextWrapper(
            width=width,
            subsequent_indent=' ' * indent,
            break_long_words=False,
            break_on_hyphens=False
        )

        paragraphs = text.split('\n')
        wrapped_paragraphs = []

        for paragraph in paragraphs:
            if paragraph.strip():
                wrapped = wrapper.fill(paragraph)
                wrapped_paragraphs.append(wrapped)
            else:
                wrapped_paragraphs.append('')

        return '\n'.join(wrapped_paragraphs)

    def _format_duration(self, duration: Optional[Any]) -> str:
        """Format duration from seconds to human-readable format"""
        if not duration:
            return 'N/A'

        # Handle string format (e.g., "10:30")
        if isinstance(duration, str):
            return duration

        # Handle numeric format (seconds)
        try:
            seconds = int(duration)
        except (ValueError, TypeError):
            return 'N/A'

        hours = seconds // 3600
        minutes = (seconds % 3600) // 60
        secs = seconds % 60

        if hours > 0:
            return f"{hours}h {minutes}m {secs}s"
        elif minutes > 0:
            return f"{minutes}m {secs}s"
        else:
            return f"{secs}s"

    def _format_number(self, number: Optional[int]) -> str:
        """Format large numbers with commas"""
        if number is None:
            return 'N/A'
        return f"{number:,}"

    def _format_percentage(self, value: Optional[float]) -> str:
        """Format decimal as percentage"""
        if value is None:
            return 'N/A'
        return f"{value * 100:.1f}%"

    def get_file_extension(self) -> str:
        return "txt"