youtube-summarizer/backend/services/exporters/html_exporter.py

534 lines
16 KiB
Python

"""
HTML Exporter for YouTube Summaries
Exports summaries to responsive HTML format with embedded styles
"""
import tempfile
from typing import Dict, Any, Optional
from ..export_service import BaseExporter
import html as html_module
class HTMLExporter(BaseExporter):
"""Export summaries to HTML format"""
async def export(
self,
summary_data: Dict[str, Any],
template: Optional[str] = None,
branding: Optional[Dict[str, Any]] = None
) -> str:
"""Export to HTML"""
data = self._prepare_summary_data(summary_data)
# Use custom template if provided, otherwise default
if template:
content = await self._render_custom_template(template, data)
else:
content = self._render_default_template(data, branding)
# Write to temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
f.write(content)
return f.name
async def _render_custom_template(self, template: str, data: Dict[str, Any]) -> str:
"""Render custom template with data"""
content = template
for key, value in data.items():
content = content.replace(f"{{{{{key}}}}}", str(value))
return content
def _render_default_template(self, data: Dict[str, Any], branding: Optional[Dict[str, Any]]) -> str:
"""Render default HTML template with responsive design"""
video_metadata = data.get("video_metadata", {})
processing_metadata = data.get("processing_metadata", {})
# Escape HTML in text content
def escape(text):
if text is None:
return 'N/A'
return html_module.escape(str(text))
# Generate HTML
html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>YouTube Summary - {escape(video_metadata.get('title', 'Video'))}</title>
<style>
{self._get_default_styles(branding)}
</style>
</head>
<body>
<div class="container">
<header>
<h1>YouTube Video Summary</h1>
{f'<p class="branding">Generated by {escape(branding.get("company_name"))} using YouTube Summarizer</p>' if branding and branding.get("company_name") else ''}
</header>
<section class="video-info">
<h2>Video Information</h2>
<div class="info-grid">
<div class="info-item">
<span class="label">Title:</span>
<span class="value">{escape(video_metadata.get('title', 'N/A'))}</span>
</div>
<div class="info-item">
<span class="label">Channel:</span>
<span class="value">{escape(video_metadata.get('channel_name', 'N/A'))}</span>
</div>
<div class="info-item">
<span class="label">Duration:</span>
<span class="value">{escape(self._format_duration(video_metadata.get('duration')))}</span>
</div>
<div class="info-item">
<span class="label">Published:</span>
<span class="value">{escape(video_metadata.get('published_at', 'N/A'))}</span>
</div>
<div class="info-item">
<span class="label">Views:</span>
<span class="value">{escape(self._format_number(video_metadata.get('view_count')))}</span>
</div>
<div class="info-item full-width">
<span class="label">URL:</span>
<a href="{escape(data.get('video_url', '#'))}" target="_blank" class="value">{escape(data.get('video_url', 'N/A'))}</a>
</div>
</div>
</section>
<section class="summary">
<h2>Summary</h2>
<div class="content">
{self._format_paragraph(data.get('summary', 'No summary available'))}
</div>
</section>
<section class="key-points">
<h2>Key Points</h2>
<ul>
{self._format_list_items(data.get('key_points', []))}
</ul>
</section>
<section class="main-themes">
<h2>Main Themes</h2>
<div class="theme-tags">
{self._format_theme_tags(data.get('main_themes', []))}
</div>
</section>
<section class="actionable-insights">
<h2>Actionable Insights</h2>
<ol>
{self._format_list_items(data.get('actionable_insights', []), ordered=True)}
</ol>
</section>
{self._format_chapters_section(data.get('chapters', []))}
<footer>
<div class="processing-info">
<h3>Processing Information</h3>
<div class="info-grid">
<div class="info-item">
<span class="label">AI Model:</span>
<span class="value">{escape(processing_metadata.get('model', 'N/A'))}</span>
</div>
<div class="info-item">
<span class="label">Processing Time:</span>
<span class="value">{escape(self._format_duration(processing_metadata.get('processing_time_seconds')))}</span>
</div>
<div class="info-item">
<span class="label">Confidence Score:</span>
<span class="value">{escape(self._format_percentage(data.get('confidence_score')))}</span>
</div>
<div class="info-item">
<span class="label">Generated:</span>
<span class="value">{escape(data.get('export_metadata', {}).get('exported_at', 'N/A'))}</span>
</div>
</div>
</div>
<p class="footer-text">Summary generated by YouTube Summarizer - Transform video content into actionable insights</p>
</footer>
</div>
</body>
</html>"""
return html
def _get_default_styles(self, branding: Optional[Dict[str, Any]]) -> str:
"""Get default CSS styles with optional branding customization"""
# Extract brand colors if provided
primary_color = "#2563eb"
secondary_color = "#1e40af"
if branding:
primary_color = branding.get("primary_color", primary_color)
secondary_color = branding.get("secondary_color", secondary_color)
return f"""
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
line-height: 1.6;
color: #333;
background: linear-gradient(135deg, #f5f5f5 0%, #e8e8e8 100%);
min-height: 100vh;
}}
.container {{
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}}
header {{
text-align: center;
padding: 40px 0;
background: linear-gradient(135deg, {primary_color} 0%, {secondary_color} 100%);
color: white;
border-radius: 10px;
margin-bottom: 30px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}}
header h1 {{
font-size: 2.5em;
margin-bottom: 10px;
}}
.branding {{
font-size: 0.9em;
opacity: 0.9;
}}
section {{
background: white;
padding: 30px;
margin-bottom: 25px;
border-radius: 10px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}}
section h2 {{
color: {primary_color};
font-size: 1.8em;
margin-bottom: 20px;
padding-bottom: 10px;
border-bottom: 2px solid #e5e5e5;
}}
section h3 {{
color: {secondary_color};
font-size: 1.3em;
margin-bottom: 15px;
}}
.info-grid {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 20px;
margin-top: 20px;
}}
.info-item {{
display: flex;
align-items: center;
}}
.info-item.full-width {{
grid-column: 1 / -1;
}}
.info-item .label {{
font-weight: 600;
color: #666;
margin-right: 10px;
min-width: 100px;
}}
.info-item .value {{
flex: 1;
color: #333;
}}
.info-item a.value {{
color: {primary_color};
text-decoration: none;
word-break: break-all;
}}
.info-item a.value:hover {{
text-decoration: underline;
}}
.content {{
font-size: 1.1em;
line-height: 1.8;
color: #444;
}}
.content p {{
margin-bottom: 15px;
}}
ul, ol {{
padding-left: 30px;
margin-top: 15px;
}}
ul li, ol li {{
margin-bottom: 12px;
line-height: 1.7;
color: #444;
}}
.theme-tags {{
display: flex;
flex-wrap: wrap;
gap: 10px;
margin-top: 15px;
}}
.theme-tag {{
display: inline-block;
padding: 8px 16px;
background: {primary_color};
color: white;
border-radius: 20px;
font-size: 0.9em;
font-weight: 500;
}}
.chapters {{
margin-top: 30px;
}}
.chapter {{
margin-bottom: 25px;
padding: 20px;
background: #f9f9f9;
border-left: 4px solid {primary_color};
border-radius: 5px;
}}
.chapter-header {{
display: flex;
align-items: center;
margin-bottom: 10px;
}}
.timestamp {{
background: {primary_color};
color: white;
padding: 4px 10px;
border-radius: 4px;
margin-right: 15px;
font-size: 0.9em;
font-weight: 600;
}}
.chapter-title {{
font-size: 1.2em;
font-weight: 600;
color: #333;
}}
.chapter-summary {{
color: #666;
line-height: 1.6;
margin-top: 10px;
}}
footer {{
background: #2c2c2c;
color: #fff;
padding: 30px;
border-radius: 10px;
margin-top: 40px;
}}
footer h3 {{
color: #fff;
margin-bottom: 20px;
}}
footer .info-item .label {{
color: #bbb;
}}
footer .info-item .value {{
color: #fff;
}}
.footer-text {{
text-align: center;
margin-top: 30px;
padding-top: 20px;
border-top: 1px solid #444;
color: #999;
font-size: 0.9em;
}}
@media (max-width: 768px) {{
header h1 {{
font-size: 2em;
}}
section {{
padding: 20px;
}}
section h2 {{
font-size: 1.5em;
}}
.info-grid {{
grid-template-columns: 1fr;
}}
}}
@media print {{
body {{
background: white;
}}
header {{
background: none;
color: #333;
border: 2px solid #333;
}}
section {{
box-shadow: none;
border: 1px solid #ddd;
}}
footer {{
background: white;
color: #333;
border: 2px solid #333;
}}
}}
"""
def _format_paragraph(self, text: str) -> str:
"""Format text into HTML paragraphs"""
if not text:
return "<p>No content available</p>"
paragraphs = text.split('\n\n')
formatted = []
for para in paragraphs:
if para.strip():
escaped_para = html_module.escape(para.strip())
formatted.append(f"<p>{escaped_para}</p>")
return '\n'.join(formatted)
def _format_list_items(self, items: list, ordered: bool = False) -> str:
"""Format list items as HTML"""
if not items:
return "<li>No items available</li>"
formatted = []
for item in items:
escaped_item = html_module.escape(str(item))
formatted.append(f"<li>{escaped_item}</li>")
return '\n'.join(formatted)
def _format_theme_tags(self, themes: list) -> str:
"""Format themes as tag elements"""
if not themes:
return '<span class="theme-tag">No themes identified</span>'
formatted = []
for theme in themes:
escaped_theme = html_module.escape(str(theme))
formatted.append(f'<span class="theme-tag">{escaped_theme}</span>')
return '\n'.join(formatted)
def _format_chapters_section(self, chapters: list) -> str:
"""Format chapters section if available"""
if not chapters:
return ""
section = """
<section class="chapters">
<h2>Chapter Breakdown</h2>
<div class="chapters-container">
"""
for chapter in chapters:
timestamp = html_module.escape(str(chapter.get('timestamp', '')))
title = html_module.escape(str(chapter.get('title', '')))
summary = html_module.escape(str(chapter.get('summary', '')))
section += f"""
<div class="chapter">
<div class="chapter-header">
<span class="timestamp">{timestamp}</span>
<span class="chapter-title">{title}</span>
</div>
{f'<div class="chapter-summary">{summary}</div>' if summary else ''}
</div>
"""
section += """
</div>
</section>
"""
return section
def _format_duration(self, duration: Optional[Any]) -> str:
"""Format duration from seconds to human-readable format"""
if not duration:
return 'N/A'
# Handle string format (e.g., "10:30")
if isinstance(duration, str):
return duration
# Handle numeric format (seconds)
try:
seconds = int(duration)
except (ValueError, TypeError):
return 'N/A'
hours = seconds // 3600
minutes = (seconds % 3600) // 60
secs = seconds % 60
if hours > 0:
return f"{hours}h {minutes}m {secs}s"
elif minutes > 0:
return f"{minutes}m {secs}s"
else:
return f"{secs}s"
def _format_number(self, number: Optional[int]) -> str:
"""Format large numbers with commas"""
if number is None:
return 'N/A'
return f"{number:,}"
def _format_percentage(self, value: Optional[float]) -> str:
"""Format decimal as percentage"""
if value is None:
return 'N/A'
return f"{value * 100:.1f}%"
def get_file_extension(self) -> str:
return "html"