youtube-summarizer/backend/services/exporters/text_exporter.py

202 lines
7.0 KiB
Python

"""
Plain Text Exporter for YouTube Summaries
Exports summaries to simple, readable plain text format
"""
import tempfile
from typing import Dict, Any, Optional
from ..export_service import BaseExporter
class PlainTextExporter(BaseExporter):
"""Export summaries to plain text format"""
async def export(
self,
summary_data: Dict[str, Any],
template: Optional[str] = None,
branding: Optional[Dict[str, Any]] = None
) -> str:
"""Export to plain text"""
data = self._prepare_summary_data(summary_data)
# Use custom template if provided, otherwise default
if template:
content = await self._render_custom_template(template, data)
else:
content = self._render_default_template(data, branding)
# Write to temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write(content)
return f.name
async def _render_custom_template(self, template: str, data: Dict[str, Any]) -> str:
"""Render custom template with data"""
content = template
for key, value in data.items():
content = content.replace(f"{{{{{key}}}}}", str(value))
return content
def _render_default_template(self, data: Dict[str, Any], branding: Optional[Dict[str, Any]]) -> str:
"""Render default plain text template"""
video_metadata = data.get("video_metadata", {})
processing_metadata = data.get("processing_metadata", {})
# Header
text = "=" * 80 + "\n"
text += "YOUTUBE VIDEO SUMMARY\n"
text += "=" * 80 + "\n\n"
# Branding
if branding and branding.get("company_name"):
text += f"Generated by {branding['company_name']} using YouTube Summarizer\n"
text += "-" * 80 + "\n\n"
# Video Information
text += "VIDEO INFORMATION\n"
text += "-" * 40 + "\n"
text += f"Title: {video_metadata.get('title', 'N/A')}\n"
text += f"Channel: {video_metadata.get('channel_name', 'N/A')}\n"
text += f"URL: {data.get('video_url', 'N/A')}\n"
text += f"Duration: {self._format_duration(video_metadata.get('duration'))}\n"
text += f"Published: {video_metadata.get('published_at', 'N/A')}\n"
text += f"Views: {self._format_number(video_metadata.get('view_count'))}\n"
text += "\n"
# Summary
text += "SUMMARY\n"
text += "-" * 40 + "\n"
text += self._wrap_text(data.get('summary', 'No summary available'), width=80)
text += "\n\n"
# Key Points
text += "KEY POINTS\n"
text += "-" * 40 + "\n"
key_points = data.get('key_points', [])
if key_points:
for i, point in enumerate(key_points, 1):
text += f"{i}. {self._wrap_text(point, width=76, indent=3)}\n"
else:
text += "No key points identified\n"
text += "\n"
# Main Themes
text += "MAIN THEMES\n"
text += "-" * 40 + "\n"
main_themes = data.get('main_themes', [])
if main_themes:
for theme in main_themes:
text += f"* {theme}\n"
else:
text += "No main themes identified\n"
text += "\n"
# Actionable Insights
text += "ACTIONABLE INSIGHTS\n"
text += "-" * 40 + "\n"
insights = data.get('actionable_insights', [])
if insights:
for i, insight in enumerate(insights, 1):
text += f"{i}. {self._wrap_text(insight, width=76, indent=3)}\n"
else:
text += "No actionable insights identified\n"
text += "\n"
# Chapters (if available)
chapters = data.get('chapters', [])
if chapters:
text += "CHAPTER BREAKDOWN\n"
text += "-" * 40 + "\n"
for chapter in chapters:
timestamp = chapter.get('timestamp', '')
title = chapter.get('title', '')
summary = chapter.get('summary', '')
text += f"[{timestamp}] {title}\n"
if summary:
text += f" {self._wrap_text(summary, width=77, indent=3)}\n"
text += "\n"
# Footer
text += "=" * 80 + "\n"
text += "PROCESSING INFORMATION\n"
text += "-" * 40 + "\n"
text += f"AI Model: {processing_metadata.get('model', 'N/A')}\n"
text += f"Processing Time: {self._format_duration(processing_metadata.get('processing_time_seconds'))}\n"
text += f"Confidence Score: {self._format_percentage(data.get('confidence_score'))}\n"
text += f"Generated: {data.get('export_metadata', {}).get('exported_at', 'N/A')}\n"
text += "\n"
text += "=" * 80 + "\n"
text += "Summary generated by YouTube Summarizer\n"
text += "Transform video content into actionable insights\n"
text += "=" * 80 + "\n"
return text
def _wrap_text(self, text: str, width: int = 80, indent: int = 0) -> str:
"""Wrap text to specified width with optional indentation"""
if not text:
return ""
import textwrap
wrapper = textwrap.TextWrapper(
width=width,
subsequent_indent=' ' * indent,
break_long_words=False,
break_on_hyphens=False
)
paragraphs = text.split('\n')
wrapped_paragraphs = []
for paragraph in paragraphs:
if paragraph.strip():
wrapped = wrapper.fill(paragraph)
wrapped_paragraphs.append(wrapped)
else:
wrapped_paragraphs.append('')
return '\n'.join(wrapped_paragraphs)
def _format_duration(self, duration: Optional[Any]) -> str:
"""Format duration from seconds to human-readable format"""
if not duration:
return 'N/A'
# Handle string format (e.g., "10:30")
if isinstance(duration, str):
return duration
# Handle numeric format (seconds)
try:
seconds = int(duration)
except (ValueError, TypeError):
return 'N/A'
hours = seconds // 3600
minutes = (seconds % 3600) // 60
secs = seconds % 60
if hours > 0:
return f"{hours}h {minutes}m {secs}s"
elif minutes > 0:
return f"{minutes}m {secs}s"
else:
return f"{secs}s"
def _format_number(self, number: Optional[int]) -> str:
"""Format large numbers with commas"""
if number is None:
return 'N/A'
return f"{number:,}"
def _format_percentage(self, value: Optional[float]) -> str:
"""Format decimal as percentage"""
if value is None:
return 'N/A'
return f"{value * 100:.1f}%"
def get_file_extension(self) -> str:
return "txt"