307 lines
11 KiB
Python
307 lines
11 KiB
Python
"""
|
|
PDF Exporter for YouTube Summaries
|
|
Exports summaries to professionally formatted PDF documents
|
|
Requires: pip install reportlab
|
|
"""
|
|
|
|
import tempfile
|
|
from typing import Dict, Any, Optional, List
|
|
from ..export_service import BaseExporter
|
|
|
|
try:
|
|
from reportlab.lib.pagesizes import letter, A4
|
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
from reportlab.lib.units import inch
|
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
|
from reportlab.lib import colors
|
|
from reportlab.lib.enums import TA_JUSTIFY, TA_CENTER
|
|
REPORTLAB_AVAILABLE = True
|
|
except ImportError:
|
|
REPORTLAB_AVAILABLE = False
|
|
|
|
|
|
class PDFExporter(BaseExporter):
|
|
"""Export summaries to PDF format"""
|
|
|
|
def __init__(self):
|
|
if not REPORTLAB_AVAILABLE:
|
|
raise ImportError("reportlab is required for PDF export. Install with: pip install reportlab")
|
|
|
|
async def export(
|
|
self,
|
|
summary_data: Dict[str, Any],
|
|
template: Optional[str] = None,
|
|
branding: Optional[Dict[str, Any]] = None
|
|
) -> str:
|
|
"""Export to PDF"""
|
|
|
|
data = self._prepare_summary_data(summary_data)
|
|
|
|
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
|
|
doc = SimpleDocTemplate(
|
|
f.name,
|
|
pagesize=A4,
|
|
leftMargin=1*inch,
|
|
rightMargin=1*inch,
|
|
topMargin=1*inch,
|
|
bottomMargin=1*inch,
|
|
title=f"YouTube Summary - {data.get('video_metadata', {}).get('title', 'Video')}",
|
|
author="YouTube Summarizer"
|
|
)
|
|
|
|
story = self._build_pdf_content(data, branding)
|
|
doc.build(story, onFirstPage=self._add_page_number, onLaterPages=self._add_page_number)
|
|
|
|
return f.name
|
|
|
|
def _build_pdf_content(self, data: Dict[str, Any], branding: Optional[Dict[str, Any]]) -> List:
|
|
"""Build PDF content elements"""
|
|
|
|
styles = getSampleStyleSheet()
|
|
story = []
|
|
|
|
# Custom styles
|
|
primary_color = colors.HexColor("#2563eb")
|
|
if branding and branding.get("primary_color"):
|
|
try:
|
|
primary_color = colors.HexColor(branding["primary_color"])
|
|
except:
|
|
pass
|
|
|
|
title_style = ParagraphStyle(
|
|
'CustomTitle',
|
|
parent=styles['Title'],
|
|
fontSize=24,
|
|
textColor=primary_color,
|
|
spaceAfter=30,
|
|
alignment=TA_CENTER
|
|
)
|
|
|
|
heading_style = ParagraphStyle(
|
|
'CustomHeading',
|
|
parent=styles['Heading2'],
|
|
fontSize=14,
|
|
textColor=primary_color,
|
|
spaceBefore=20,
|
|
spaceAfter=10,
|
|
leftIndent=0
|
|
)
|
|
|
|
subheading_style = ParagraphStyle(
|
|
'CustomSubHeading',
|
|
parent=styles['Heading3'],
|
|
fontSize=12,
|
|
textColor=colors.darkgray,
|
|
spaceBefore=15,
|
|
spaceAfter=8
|
|
)
|
|
|
|
body_style = ParagraphStyle(
|
|
'CustomBody',
|
|
parent=styles['Normal'],
|
|
fontSize=11,
|
|
alignment=TA_JUSTIFY,
|
|
spaceAfter=12
|
|
)
|
|
|
|
# Title Page
|
|
story.append(Paragraph("YouTube Video Summary", title_style))
|
|
|
|
# Branding
|
|
if branding and branding.get("company_name"):
|
|
branding_style = ParagraphStyle(
|
|
'Branding',
|
|
parent=styles['Normal'],
|
|
fontSize=10,
|
|
textColor=colors.gray,
|
|
alignment=TA_CENTER
|
|
)
|
|
story.append(Paragraph(f"Generated by {branding['company_name']} using YouTube Summarizer", branding_style))
|
|
|
|
story.append(Spacer(1, 30))
|
|
|
|
# Video Information Table
|
|
video_metadata = data.get("video_metadata", {})
|
|
video_info = [
|
|
["Video Title", self._safe_str(video_metadata.get('title', 'N/A'))],
|
|
["Channel", self._safe_str(video_metadata.get('channel_name', 'N/A'))],
|
|
["Duration", self._format_duration(video_metadata.get('duration'))],
|
|
["Published", self._safe_str(video_metadata.get('published_at', 'N/A'))],
|
|
["Views", self._format_number(video_metadata.get('view_count'))],
|
|
["URL", self._safe_str(data.get('video_url', 'N/A'))[:50] + "..."]
|
|
]
|
|
|
|
video_table = Table(video_info, colWidths=[2*inch, 4*inch])
|
|
video_table.setStyle(TableStyle([
|
|
('BACKGROUND', (0, 0), (0, -1), colors.lightgrey),
|
|
('TEXTCOLOR', (0, 0), (0, -1), colors.black),
|
|
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
|
('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
|
|
('FONTSIZE', (0, 0), (-1, -1), 10),
|
|
('GRID', (0, 0), (-1, -1), 1, colors.black),
|
|
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
|
|
('ROWBACKGROUNDS', (0, 0), (-1, -1), [colors.white, colors.whitesmoke])
|
|
]))
|
|
|
|
story.append(video_table)
|
|
story.append(Spacer(1, 40))
|
|
|
|
# Summary
|
|
story.append(Paragraph("Summary", heading_style))
|
|
summary_text = data.get('summary', 'No summary available')
|
|
story.append(Paragraph(self._safe_str(summary_text), body_style))
|
|
story.append(Spacer(1, 20))
|
|
|
|
# Key Points
|
|
story.append(Paragraph("Key Points", heading_style))
|
|
key_points = data.get('key_points', [])
|
|
if key_points:
|
|
for point in key_points:
|
|
story.append(Paragraph(f"• {self._safe_str(point)}", body_style))
|
|
else:
|
|
story.append(Paragraph("No key points identified", body_style))
|
|
story.append(Spacer(1, 20))
|
|
|
|
# Main Themes
|
|
story.append(Paragraph("Main Themes", heading_style))
|
|
main_themes = data.get('main_themes', [])
|
|
if main_themes:
|
|
for theme in main_themes:
|
|
story.append(Paragraph(f"• <b>{self._safe_str(theme)}</b>", body_style))
|
|
else:
|
|
story.append(Paragraph("No main themes identified", body_style))
|
|
story.append(Spacer(1, 20))
|
|
|
|
# Actionable Insights
|
|
story.append(Paragraph("Actionable Insights", heading_style))
|
|
insights = data.get('actionable_insights', [])
|
|
if insights:
|
|
for i, insight in enumerate(insights, 1):
|
|
story.append(Paragraph(f"{i}. {self._safe_str(insight)}", body_style))
|
|
else:
|
|
story.append(Paragraph("No actionable insights identified", body_style))
|
|
|
|
# Chapters (if available) - New Page
|
|
chapters = data.get('chapters', [])
|
|
if chapters:
|
|
story.append(PageBreak())
|
|
story.append(Paragraph("Chapter Breakdown", heading_style))
|
|
|
|
for chapter in chapters:
|
|
timestamp = chapter.get('timestamp', '')
|
|
title = chapter.get('title', '')
|
|
summary = chapter.get('summary', '')
|
|
|
|
story.append(Paragraph(f"<b>[{timestamp}] {self._safe_str(title)}</b>", subheading_style))
|
|
if summary:
|
|
story.append(Paragraph(self._safe_str(summary), body_style))
|
|
story.append(Spacer(1, 10))
|
|
|
|
# Footer - Processing Information
|
|
story.append(Spacer(1, 40))
|
|
|
|
footer_style = ParagraphStyle(
|
|
'Footer',
|
|
parent=styles['Normal'],
|
|
fontSize=8,
|
|
textColor=colors.grey
|
|
)
|
|
|
|
processing_metadata = data.get("processing_metadata", {})
|
|
footer_data = [
|
|
["AI Model", self._safe_str(processing_metadata.get('model', 'N/A'))],
|
|
["Processing Time", self._format_duration(processing_metadata.get('processing_time_seconds'))],
|
|
["Confidence Score", self._format_percentage(data.get('confidence_score'))],
|
|
["Token Usage", self._safe_str(processing_metadata.get('tokens_used', 'N/A'))],
|
|
["Generated", self._safe_str(data.get('export_metadata', {}).get('exported_at', 'N/A'))]
|
|
]
|
|
|
|
footer_table = Table(footer_data, colWidths=[1.5*inch, 2*inch])
|
|
footer_table.setStyle(TableStyle([
|
|
('FONTSIZE', (0, 0), (-1, -1), 8),
|
|
('TEXTCOLOR', (0, 0), (-1, -1), colors.grey),
|
|
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
|
('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold')
|
|
]))
|
|
|
|
story.append(footer_table)
|
|
story.append(Spacer(1, 20))
|
|
story.append(Paragraph(
|
|
"Summary generated by YouTube Summarizer - Transform video content into actionable insights",
|
|
footer_style
|
|
))
|
|
|
|
return story
|
|
|
|
def _add_page_number(self, canvas, doc):
|
|
"""Add page numbers to PDF"""
|
|
canvas.saveState()
|
|
canvas.setFont('Helvetica', 9)
|
|
canvas.setFillColor(colors.grey)
|
|
page_num = canvas.getPageNumber()
|
|
text = f"Page {page_num}"
|
|
canvas.drawCentredString(A4[0] / 2, 0.75 * inch, text)
|
|
canvas.restoreState()
|
|
|
|
def _safe_str(self, value: Any) -> str:
|
|
"""Safely convert value to string and escape for PDF"""
|
|
if value is None:
|
|
return 'N/A'
|
|
|
|
# Convert to string
|
|
str_value = str(value)
|
|
|
|
# Replace problematic characters for PDF
|
|
replacements = {
|
|
'&': '&',
|
|
'<': '<',
|
|
'>': '>',
|
|
'"': '"',
|
|
"'": '''
|
|
}
|
|
|
|
for old, new in replacements.items():
|
|
str_value = str_value.replace(old, new)
|
|
|
|
return str_value
|
|
|
|
def _format_duration(self, duration: Optional[Any]) -> str:
|
|
"""Format duration from seconds to human-readable format"""
|
|
if not duration:
|
|
return 'N/A'
|
|
|
|
# Handle string format (e.g., "10:30")
|
|
if isinstance(duration, str):
|
|
return duration
|
|
|
|
# Handle numeric format (seconds)
|
|
try:
|
|
seconds = int(duration)
|
|
except (ValueError, TypeError):
|
|
return 'N/A'
|
|
|
|
hours = seconds // 3600
|
|
minutes = (seconds % 3600) // 60
|
|
secs = seconds % 60
|
|
|
|
if hours > 0:
|
|
return f"{hours}h {minutes}m {secs}s"
|
|
elif minutes > 0:
|
|
return f"{minutes}m {secs}s"
|
|
else:
|
|
return f"{secs}s"
|
|
|
|
def _format_number(self, number: Optional[int]) -> str:
|
|
"""Format large numbers with commas"""
|
|
if number is None:
|
|
return 'N/A'
|
|
return f"{number:,}"
|
|
|
|
def _format_percentage(self, value: Optional[float]) -> str:
|
|
"""Format decimal as percentage"""
|
|
if value is None:
|
|
return 'N/A'
|
|
return f"{value * 100:.1f}%"
|
|
|
|
def get_file_extension(self) -> str:
|
|
return "pdf" |