"""
PDF Exporter for YouTube Summaries
Exports summaries to professionally formatted PDF documents
Requires: pip install reportlab
"""
import tempfile
from typing import Dict, Any, Optional, List
from ..export_service import BaseExporter
try:
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
from reportlab.lib import colors
from reportlab.lib.enums import TA_JUSTIFY, TA_CENTER
REPORTLAB_AVAILABLE = True
except ImportError:
REPORTLAB_AVAILABLE = False
class PDFExporter(BaseExporter):
"""Export summaries to PDF format"""
def __init__(self):
if not REPORTLAB_AVAILABLE:
raise ImportError("reportlab is required for PDF export. Install with: pip install reportlab")
async def export(
self,
summary_data: Dict[str, Any],
template: Optional[str] = None,
branding: Optional[Dict[str, Any]] = None
) -> str:
"""Export to PDF"""
data = self._prepare_summary_data(summary_data)
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
doc = SimpleDocTemplate(
f.name,
pagesize=A4,
leftMargin=1*inch,
rightMargin=1*inch,
topMargin=1*inch,
bottomMargin=1*inch,
title=f"YouTube Summary - {data.get('video_metadata', {}).get('title', 'Video')}",
author="YouTube Summarizer"
)
story = self._build_pdf_content(data, branding)
doc.build(story, onFirstPage=self._add_page_number, onLaterPages=self._add_page_number)
return f.name
def _build_pdf_content(self, data: Dict[str, Any], branding: Optional[Dict[str, Any]]) -> List:
"""Build PDF content elements"""
styles = getSampleStyleSheet()
story = []
# Custom styles
primary_color = colors.HexColor("#2563eb")
if branding and branding.get("primary_color"):
try:
primary_color = colors.HexColor(branding["primary_color"])
except:
pass
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Title'],
fontSize=24,
textColor=primary_color,
spaceAfter=30,
alignment=TA_CENTER
)
heading_style = ParagraphStyle(
'CustomHeading',
parent=styles['Heading2'],
fontSize=14,
textColor=primary_color,
spaceBefore=20,
spaceAfter=10,
leftIndent=0
)
subheading_style = ParagraphStyle(
'CustomSubHeading',
parent=styles['Heading3'],
fontSize=12,
textColor=colors.darkgray,
spaceBefore=15,
spaceAfter=8
)
body_style = ParagraphStyle(
'CustomBody',
parent=styles['Normal'],
fontSize=11,
alignment=TA_JUSTIFY,
spaceAfter=12
)
# Title Page
story.append(Paragraph("YouTube Video Summary", title_style))
# Branding
if branding and branding.get("company_name"):
branding_style = ParagraphStyle(
'Branding',
parent=styles['Normal'],
fontSize=10,
textColor=colors.gray,
alignment=TA_CENTER
)
story.append(Paragraph(f"Generated by {branding['company_name']} using YouTube Summarizer", branding_style))
story.append(Spacer(1, 30))
# Video Information Table
video_metadata = data.get("video_metadata", {})
video_info = [
["Video Title", self._safe_str(video_metadata.get('title', 'N/A'))],
["Channel", self._safe_str(video_metadata.get('channel_name', 'N/A'))],
["Duration", self._format_duration(video_metadata.get('duration'))],
["Published", self._safe_str(video_metadata.get('published_at', 'N/A'))],
["Views", self._format_number(video_metadata.get('view_count'))],
["URL", self._safe_str(data.get('video_url', 'N/A'))[:50] + "..."]
]
video_table = Table(video_info, colWidths=[2*inch, 4*inch])
video_table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (0, -1), colors.lightgrey),
('TEXTCOLOR', (0, 0), (0, -1), colors.black),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, -1), 10),
('GRID', (0, 0), (-1, -1), 1, colors.black),
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
('ROWBACKGROUNDS', (0, 0), (-1, -1), [colors.white, colors.whitesmoke])
]))
story.append(video_table)
story.append(Spacer(1, 40))
# Summary
story.append(Paragraph("Summary", heading_style))
summary_text = data.get('summary', 'No summary available')
story.append(Paragraph(self._safe_str(summary_text), body_style))
story.append(Spacer(1, 20))
# Key Points
story.append(Paragraph("Key Points", heading_style))
key_points = data.get('key_points', [])
if key_points:
for point in key_points:
story.append(Paragraph(f"• {self._safe_str(point)}", body_style))
else:
story.append(Paragraph("No key points identified", body_style))
story.append(Spacer(1, 20))
# Main Themes
story.append(Paragraph("Main Themes", heading_style))
main_themes = data.get('main_themes', [])
if main_themes:
for theme in main_themes:
story.append(Paragraph(f"• {self._safe_str(theme)}", body_style))
else:
story.append(Paragraph("No main themes identified", body_style))
story.append(Spacer(1, 20))
# Actionable Insights
story.append(Paragraph("Actionable Insights", heading_style))
insights = data.get('actionable_insights', [])
if insights:
for i, insight in enumerate(insights, 1):
story.append(Paragraph(f"{i}. {self._safe_str(insight)}", body_style))
else:
story.append(Paragraph("No actionable insights identified", body_style))
# Chapters (if available) - New Page
chapters = data.get('chapters', [])
if chapters:
story.append(PageBreak())
story.append(Paragraph("Chapter Breakdown", heading_style))
for chapter in chapters:
timestamp = chapter.get('timestamp', '')
title = chapter.get('title', '')
summary = chapter.get('summary', '')
story.append(Paragraph(f"[{timestamp}] {self._safe_str(title)}", subheading_style))
if summary:
story.append(Paragraph(self._safe_str(summary), body_style))
story.append(Spacer(1, 10))
# Footer - Processing Information
story.append(Spacer(1, 40))
footer_style = ParagraphStyle(
'Footer',
parent=styles['Normal'],
fontSize=8,
textColor=colors.grey
)
processing_metadata = data.get("processing_metadata", {})
footer_data = [
["AI Model", self._safe_str(processing_metadata.get('model', 'N/A'))],
["Processing Time", self._format_duration(processing_metadata.get('processing_time_seconds'))],
["Confidence Score", self._format_percentage(data.get('confidence_score'))],
["Token Usage", self._safe_str(processing_metadata.get('tokens_used', 'N/A'))],
["Generated", self._safe_str(data.get('export_metadata', {}).get('exported_at', 'N/A'))]
]
footer_table = Table(footer_data, colWidths=[1.5*inch, 2*inch])
footer_table.setStyle(TableStyle([
('FONTSIZE', (0, 0), (-1, -1), 8),
('TEXTCOLOR', (0, 0), (-1, -1), colors.grey),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold')
]))
story.append(footer_table)
story.append(Spacer(1, 20))
story.append(Paragraph(
"Summary generated by YouTube Summarizer - Transform video content into actionable insights",
footer_style
))
return story
def _add_page_number(self, canvas, doc):
"""Add page numbers to PDF"""
canvas.saveState()
canvas.setFont('Helvetica', 9)
canvas.setFillColor(colors.grey)
page_num = canvas.getPageNumber()
text = f"Page {page_num}"
canvas.drawCentredString(A4[0] / 2, 0.75 * inch, text)
canvas.restoreState()
def _safe_str(self, value: Any) -> str:
"""Safely convert value to string and escape for PDF"""
if value is None:
return 'N/A'
# Convert to string
str_value = str(value)
# Replace problematic characters for PDF
replacements = {
'&': '&',
'<': '<',
'>': '>',
'"': '"',
"'": '''
}
for old, new in replacements.items():
str_value = str_value.replace(old, new)
return str_value
def _format_duration(self, duration: Optional[Any]) -> str:
"""Format duration from seconds to human-readable format"""
if not duration:
return 'N/A'
# Handle string format (e.g., "10:30")
if isinstance(duration, str):
return duration
# Handle numeric format (seconds)
try:
seconds = int(duration)
except (ValueError, TypeError):
return 'N/A'
hours = seconds // 3600
minutes = (seconds % 3600) // 60
secs = seconds % 60
if hours > 0:
return f"{hours}h {minutes}m {secs}s"
elif minutes > 0:
return f"{minutes}m {secs}s"
else:
return f"{secs}s"
def _format_number(self, number: Optional[int]) -> str:
"""Format large numbers with commas"""
if number is None:
return 'N/A'
return f"{number:,}"
def _format_percentage(self, value: Optional[float]) -> str:
"""Format decimal as percentage"""
if value is None:
return 'N/A'
return f"{value * 100:.1f}%"
def get_file_extension(self) -> str:
return "pdf"