""" PDF Exporter for YouTube Summaries Exports summaries to professionally formatted PDF documents Requires: pip install reportlab """ import tempfile from typing import Dict, Any, Optional, List from ..export_service import BaseExporter try: from reportlab.lib.pagesizes import letter, A4 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import inch from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak from reportlab.lib import colors from reportlab.lib.enums import TA_JUSTIFY, TA_CENTER REPORTLAB_AVAILABLE = True except ImportError: REPORTLAB_AVAILABLE = False class PDFExporter(BaseExporter): """Export summaries to PDF format""" def __init__(self): if not REPORTLAB_AVAILABLE: raise ImportError("reportlab is required for PDF export. Install with: pip install reportlab") async def export( self, summary_data: Dict[str, Any], template: Optional[str] = None, branding: Optional[Dict[str, Any]] = None ) -> str: """Export to PDF""" data = self._prepare_summary_data(summary_data) with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f: doc = SimpleDocTemplate( f.name, pagesize=A4, leftMargin=1*inch, rightMargin=1*inch, topMargin=1*inch, bottomMargin=1*inch, title=f"YouTube Summary - {data.get('video_metadata', {}).get('title', 'Video')}", author="YouTube Summarizer" ) story = self._build_pdf_content(data, branding) doc.build(story, onFirstPage=self._add_page_number, onLaterPages=self._add_page_number) return f.name def _build_pdf_content(self, data: Dict[str, Any], branding: Optional[Dict[str, Any]]) -> List: """Build PDF content elements""" styles = getSampleStyleSheet() story = [] # Custom styles primary_color = colors.HexColor("#2563eb") if branding and branding.get("primary_color"): try: primary_color = colors.HexColor(branding["primary_color"]) except: pass title_style = ParagraphStyle( 'CustomTitle', parent=styles['Title'], fontSize=24, textColor=primary_color, spaceAfter=30, alignment=TA_CENTER ) heading_style = ParagraphStyle( 'CustomHeading', parent=styles['Heading2'], fontSize=14, textColor=primary_color, spaceBefore=20, spaceAfter=10, leftIndent=0 ) subheading_style = ParagraphStyle( 'CustomSubHeading', parent=styles['Heading3'], fontSize=12, textColor=colors.darkgray, spaceBefore=15, spaceAfter=8 ) body_style = ParagraphStyle( 'CustomBody', parent=styles['Normal'], fontSize=11, alignment=TA_JUSTIFY, spaceAfter=12 ) # Title Page story.append(Paragraph("YouTube Video Summary", title_style)) # Branding if branding and branding.get("company_name"): branding_style = ParagraphStyle( 'Branding', parent=styles['Normal'], fontSize=10, textColor=colors.gray, alignment=TA_CENTER ) story.append(Paragraph(f"Generated by {branding['company_name']} using YouTube Summarizer", branding_style)) story.append(Spacer(1, 30)) # Video Information Table video_metadata = data.get("video_metadata", {}) video_info = [ ["Video Title", self._safe_str(video_metadata.get('title', 'N/A'))], ["Channel", self._safe_str(video_metadata.get('channel_name', 'N/A'))], ["Duration", self._format_duration(video_metadata.get('duration'))], ["Published", self._safe_str(video_metadata.get('published_at', 'N/A'))], ["Views", self._format_number(video_metadata.get('view_count'))], ["URL", self._safe_str(data.get('video_url', 'N/A'))[:50] + "..."] ] video_table = Table(video_info, colWidths=[2*inch, 4*inch]) video_table.setStyle(TableStyle([ ('BACKGROUND', (0, 0), (0, -1), colors.lightgrey), ('TEXTCOLOR', (0, 0), (0, -1), colors.black), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'), ('FONTSIZE', (0, 0), (-1, -1), 10), ('GRID', (0, 0), (-1, -1), 1, colors.black), ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), ('ROWBACKGROUNDS', (0, 0), (-1, -1), [colors.white, colors.whitesmoke]) ])) story.append(video_table) story.append(Spacer(1, 40)) # Summary story.append(Paragraph("Summary", heading_style)) summary_text = data.get('summary', 'No summary available') story.append(Paragraph(self._safe_str(summary_text), body_style)) story.append(Spacer(1, 20)) # Key Points story.append(Paragraph("Key Points", heading_style)) key_points = data.get('key_points', []) if key_points: for point in key_points: story.append(Paragraph(f"• {self._safe_str(point)}", body_style)) else: story.append(Paragraph("No key points identified", body_style)) story.append(Spacer(1, 20)) # Main Themes story.append(Paragraph("Main Themes", heading_style)) main_themes = data.get('main_themes', []) if main_themes: for theme in main_themes: story.append(Paragraph(f"• {self._safe_str(theme)}", body_style)) else: story.append(Paragraph("No main themes identified", body_style)) story.append(Spacer(1, 20)) # Actionable Insights story.append(Paragraph("Actionable Insights", heading_style)) insights = data.get('actionable_insights', []) if insights: for i, insight in enumerate(insights, 1): story.append(Paragraph(f"{i}. {self._safe_str(insight)}", body_style)) else: story.append(Paragraph("No actionable insights identified", body_style)) # Chapters (if available) - New Page chapters = data.get('chapters', []) if chapters: story.append(PageBreak()) story.append(Paragraph("Chapter Breakdown", heading_style)) for chapter in chapters: timestamp = chapter.get('timestamp', '') title = chapter.get('title', '') summary = chapter.get('summary', '') story.append(Paragraph(f"[{timestamp}] {self._safe_str(title)}", subheading_style)) if summary: story.append(Paragraph(self._safe_str(summary), body_style)) story.append(Spacer(1, 10)) # Footer - Processing Information story.append(Spacer(1, 40)) footer_style = ParagraphStyle( 'Footer', parent=styles['Normal'], fontSize=8, textColor=colors.grey ) processing_metadata = data.get("processing_metadata", {}) footer_data = [ ["AI Model", self._safe_str(processing_metadata.get('model', 'N/A'))], ["Processing Time", self._format_duration(processing_metadata.get('processing_time_seconds'))], ["Confidence Score", self._format_percentage(data.get('confidence_score'))], ["Token Usage", self._safe_str(processing_metadata.get('tokens_used', 'N/A'))], ["Generated", self._safe_str(data.get('export_metadata', {}).get('exported_at', 'N/A'))] ] footer_table = Table(footer_data, colWidths=[1.5*inch, 2*inch]) footer_table.setStyle(TableStyle([ ('FONTSIZE', (0, 0), (-1, -1), 8), ('TEXTCOLOR', (0, 0), (-1, -1), colors.grey), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold') ])) story.append(footer_table) story.append(Spacer(1, 20)) story.append(Paragraph( "Summary generated by YouTube Summarizer - Transform video content into actionable insights", footer_style )) return story def _add_page_number(self, canvas, doc): """Add page numbers to PDF""" canvas.saveState() canvas.setFont('Helvetica', 9) canvas.setFillColor(colors.grey) page_num = canvas.getPageNumber() text = f"Page {page_num}" canvas.drawCentredString(A4[0] / 2, 0.75 * inch, text) canvas.restoreState() def _safe_str(self, value: Any) -> str: """Safely convert value to string and escape for PDF""" if value is None: return 'N/A' # Convert to string str_value = str(value) # Replace problematic characters for PDF replacements = { '&': '&', '<': '<', '>': '>', '"': '"', "'": ''' } for old, new in replacements.items(): str_value = str_value.replace(old, new) return str_value def _format_duration(self, duration: Optional[Any]) -> str: """Format duration from seconds to human-readable format""" if not duration: return 'N/A' # Handle string format (e.g., "10:30") if isinstance(duration, str): return duration # Handle numeric format (seconds) try: seconds = int(duration) except (ValueError, TypeError): return 'N/A' hours = seconds // 3600 minutes = (seconds % 3600) // 60 secs = seconds % 60 if hours > 0: return f"{hours}h {minutes}m {secs}s" elif minutes > 0: return f"{minutes}m {secs}s" else: return f"{secs}s" def _format_number(self, number: Optional[int]) -> str: """Format large numbers with commas""" if number is None: return 'N/A' return f"{number:,}" def _format_percentage(self, value: Optional[float]) -> str: """Format decimal as percentage""" if value is None: return 'N/A' return f"{value * 100:.1f}%" def get_file_extension(self) -> str: return "pdf"