youtube-summarizer/backend/services/exporters/markdown_exporter.py

171 lines
5.8 KiB
Python

"""
Markdown Exporter for YouTube Summaries
Exports summaries to clean, formatted Markdown documents
"""
import tempfile
from typing import Dict, Any, Optional
from ..export_service import BaseExporter
class MarkdownExporter(BaseExporter):
"""Export summaries to Markdown format"""
async def export(
self,
summary_data: Dict[str, Any],
template: Optional[str] = None,
branding: Optional[Dict[str, Any]] = None
) -> str:
"""Export to Markdown"""
data = self._prepare_summary_data(summary_data)
# Use custom template if provided, otherwise default
if template:
content = await self._render_custom_template(template, data)
else:
content = self._render_default_template(data, branding)
# Write to temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
f.write(content)
return f.name
async def _render_custom_template(self, template: str, data: Dict[str, Any]) -> str:
"""Render custom template with data"""
from jinja2 import Template
try:
# Use Jinja2 for proper template rendering
jinja_template = Template(template)
return jinja_template.render(**data)
except Exception as e:
# Fallback to simple replacement if Jinja2 fails
content = template
for key, value in data.items():
content = content.replace(f"{{{{{key}}}}}", str(value))
return content
def _render_default_template(self, data: Dict[str, Any], branding: Optional[Dict[str, Any]]) -> str:
"""Render default Markdown template"""
video_metadata = data.get("video_metadata", {})
processing_metadata = data.get("processing_metadata", {})
# Header with branding
header = ""
if branding and branding.get("company_name"):
header = f"*Generated by {branding['company_name']} using YouTube Summarizer*\n\n"
markdown = f"""{header}# YouTube Video Summary
## Video Information
- **Title**: {video_metadata.get('title', 'N/A')}
- **URL**: {data.get('video_url', 'N/A')}
- **Channel**: {video_metadata.get('channel_name', 'N/A')}
- **Duration**: {self._format_duration(video_metadata.get('duration'))}
- **Published**: {video_metadata.get('published_at', 'N/A')}
- **Views**: {self._format_number(video_metadata.get('view_count'))}
## Summary
{data.get('summary', 'No summary available')}
## Key Points
"""
# Add key points
key_points = data.get('key_points', [])
if key_points:
for point in key_points:
markdown += f"- {point}\n"
else:
markdown += "*No key points identified*\n"
markdown += "\n## Main Themes\n\n"
# Add main themes
main_themes = data.get('main_themes', [])
if main_themes:
for theme in main_themes:
markdown += f"- **{theme}**\n"
else:
markdown += "*No main themes identified*\n"
markdown += "\n## Actionable Insights\n\n"
# Add actionable insights
insights = data.get('actionable_insights', [])
if insights:
for i, insight in enumerate(insights, 1):
markdown += f"{i}. {insight}\n"
else:
markdown += "*No actionable insights identified*\n"
# Add chapters/timestamps if available
chapters = data.get('chapters', [])
if chapters:
markdown += "\n## Chapter Breakdown\n\n"
for chapter in chapters:
timestamp = chapter.get('timestamp', '')
title = chapter.get('title', '')
summary = chapter.get('summary', '')
markdown += f"### [{timestamp}] {title}\n{summary}\n\n"
# Add metadata footer
markdown += f"""
---
## Processing Information
- **AI Model**: {processing_metadata.get('model', 'N/A')}
- **Processing Time**: {self._format_duration(processing_metadata.get('processing_time_seconds'))}
- **Confidence Score**: {self._format_percentage(data.get('confidence_score'))}
- **Token Usage**: {processing_metadata.get('tokens_used', 'N/A')}
- **Generated**: {data.get('export_metadata', {}).get('exported_at', 'N/A')}
*Summary generated by YouTube Summarizer - Transform video content into actionable insights*
"""
return markdown
def _format_duration(self, duration: Optional[Any]) -> str:
"""Format duration from seconds or string to human-readable format"""
if not duration:
return 'N/A'
# If it's already a string (like "10:30"), return it
if isinstance(duration, str):
return duration
# If it's a number, format as seconds
try:
seconds = int(duration)
hours = seconds // 3600
minutes = (seconds % 3600) // 60
seconds = seconds % 60
if hours > 0:
return f"{hours}h {minutes}m {seconds}s"
elif minutes > 0:
return f"{minutes}m {seconds}s"
else:
return f"{seconds}s"
except (ValueError, TypeError):
return str(duration) if duration else 'N/A'
def _format_number(self, number: Optional[int]) -> str:
"""Format large numbers with commas"""
if number is None:
return 'N/A'
return f"{number:,}"
def _format_percentage(self, value: Optional[float]) -> str:
"""Format decimal as percentage"""
if value is None:
return 'N/A'
return f"{value * 100:.1f}%"
def get_file_extension(self) -> str:
return "md"