171 lines
5.8 KiB
Python
171 lines
5.8 KiB
Python
"""
|
|
Markdown Exporter for YouTube Summaries
|
|
Exports summaries to clean, formatted Markdown documents
|
|
"""
|
|
|
|
import tempfile
|
|
from typing import Dict, Any, Optional
|
|
from ..export_service import BaseExporter
|
|
|
|
|
|
class MarkdownExporter(BaseExporter):
|
|
"""Export summaries to Markdown format"""
|
|
|
|
async def export(
|
|
self,
|
|
summary_data: Dict[str, Any],
|
|
template: Optional[str] = None,
|
|
branding: Optional[Dict[str, Any]] = None
|
|
) -> str:
|
|
"""Export to Markdown"""
|
|
|
|
data = self._prepare_summary_data(summary_data)
|
|
|
|
# Use custom template if provided, otherwise default
|
|
if template:
|
|
content = await self._render_custom_template(template, data)
|
|
else:
|
|
content = self._render_default_template(data, branding)
|
|
|
|
# Write to temporary file
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
|
|
f.write(content)
|
|
return f.name
|
|
|
|
async def _render_custom_template(self, template: str, data: Dict[str, Any]) -> str:
|
|
"""Render custom template with data"""
|
|
from jinja2 import Template
|
|
try:
|
|
# Use Jinja2 for proper template rendering
|
|
jinja_template = Template(template)
|
|
return jinja_template.render(**data)
|
|
except Exception as e:
|
|
# Fallback to simple replacement if Jinja2 fails
|
|
content = template
|
|
for key, value in data.items():
|
|
content = content.replace(f"{{{{{key}}}}}", str(value))
|
|
return content
|
|
|
|
def _render_default_template(self, data: Dict[str, Any], branding: Optional[Dict[str, Any]]) -> str:
|
|
"""Render default Markdown template"""
|
|
|
|
video_metadata = data.get("video_metadata", {})
|
|
processing_metadata = data.get("processing_metadata", {})
|
|
|
|
# Header with branding
|
|
header = ""
|
|
if branding and branding.get("company_name"):
|
|
header = f"*Generated by {branding['company_name']} using YouTube Summarizer*\n\n"
|
|
|
|
markdown = f"""{header}# YouTube Video Summary
|
|
|
|
## Video Information
|
|
- **Title**: {video_metadata.get('title', 'N/A')}
|
|
- **URL**: {data.get('video_url', 'N/A')}
|
|
- **Channel**: {video_metadata.get('channel_name', 'N/A')}
|
|
- **Duration**: {self._format_duration(video_metadata.get('duration'))}
|
|
- **Published**: {video_metadata.get('published_at', 'N/A')}
|
|
- **Views**: {self._format_number(video_metadata.get('view_count'))}
|
|
|
|
## Summary
|
|
|
|
{data.get('summary', 'No summary available')}
|
|
|
|
## Key Points
|
|
|
|
"""
|
|
|
|
# Add key points
|
|
key_points = data.get('key_points', [])
|
|
if key_points:
|
|
for point in key_points:
|
|
markdown += f"- {point}\n"
|
|
else:
|
|
markdown += "*No key points identified*\n"
|
|
|
|
markdown += "\n## Main Themes\n\n"
|
|
|
|
# Add main themes
|
|
main_themes = data.get('main_themes', [])
|
|
if main_themes:
|
|
for theme in main_themes:
|
|
markdown += f"- **{theme}**\n"
|
|
else:
|
|
markdown += "*No main themes identified*\n"
|
|
|
|
markdown += "\n## Actionable Insights\n\n"
|
|
|
|
# Add actionable insights
|
|
insights = data.get('actionable_insights', [])
|
|
if insights:
|
|
for i, insight in enumerate(insights, 1):
|
|
markdown += f"{i}. {insight}\n"
|
|
else:
|
|
markdown += "*No actionable insights identified*\n"
|
|
|
|
# Add chapters/timestamps if available
|
|
chapters = data.get('chapters', [])
|
|
if chapters:
|
|
markdown += "\n## Chapter Breakdown\n\n"
|
|
for chapter in chapters:
|
|
timestamp = chapter.get('timestamp', '')
|
|
title = chapter.get('title', '')
|
|
summary = chapter.get('summary', '')
|
|
markdown += f"### [{timestamp}] {title}\n{summary}\n\n"
|
|
|
|
# Add metadata footer
|
|
markdown += f"""
|
|
|
|
---
|
|
|
|
## Processing Information
|
|
- **AI Model**: {processing_metadata.get('model', 'N/A')}
|
|
- **Processing Time**: {self._format_duration(processing_metadata.get('processing_time_seconds'))}
|
|
- **Confidence Score**: {self._format_percentage(data.get('confidence_score'))}
|
|
- **Token Usage**: {processing_metadata.get('tokens_used', 'N/A')}
|
|
- **Generated**: {data.get('export_metadata', {}).get('exported_at', 'N/A')}
|
|
|
|
*Summary generated by YouTube Summarizer - Transform video content into actionable insights*
|
|
"""
|
|
|
|
return markdown
|
|
|
|
def _format_duration(self, duration: Optional[Any]) -> str:
|
|
"""Format duration from seconds or string to human-readable format"""
|
|
if not duration:
|
|
return 'N/A'
|
|
|
|
# If it's already a string (like "10:30"), return it
|
|
if isinstance(duration, str):
|
|
return duration
|
|
|
|
# If it's a number, format as seconds
|
|
try:
|
|
seconds = int(duration)
|
|
hours = seconds // 3600
|
|
minutes = (seconds % 3600) // 60
|
|
seconds = seconds % 60
|
|
|
|
if hours > 0:
|
|
return f"{hours}h {minutes}m {seconds}s"
|
|
elif minutes > 0:
|
|
return f"{minutes}m {seconds}s"
|
|
else:
|
|
return f"{seconds}s"
|
|
except (ValueError, TypeError):
|
|
return str(duration) if duration else 'N/A'
|
|
|
|
def _format_number(self, number: Optional[int]) -> str:
|
|
"""Format large numbers with commas"""
|
|
if number is None:
|
|
return 'N/A'
|
|
return f"{number:,}"
|
|
|
|
def _format_percentage(self, value: Optional[float]) -> str:
|
|
"""Format decimal as percentage"""
|
|
if value is None:
|
|
return 'N/A'
|
|
return f"{value * 100:.1f}%"
|
|
|
|
def get_file_extension(self) -> str:
|
|
return "md" |