"""Enhanced Markdown Formatter for professional export documents. This service creates professional markdown documents with executive summaries, timestamped sections, table of contents, and consistent formatting. """ import asyncio import logging from datetime import datetime from typing import Dict, Any, List, Optional from dataclasses import dataclass from ..services.executive_summary_generator import ExecutiveSummary, ExecutiveSummaryGenerator from ..services.timestamp_processor import TimestampedSection, TimestampProcessor from ..core.exceptions import ServiceError logger = logging.getLogger(__name__) @dataclass class MarkdownExportConfig: """Configuration for markdown export.""" include_executive_summary: bool = True include_timestamps: bool = True include_toc: bool = True section_detail_level: str = "standard" # brief, standard, detailed include_metadata_header: bool = True include_footer: bool = True custom_template_id: Optional[str] = None @dataclass class EnhancedMarkdownExport: """Result of enhanced markdown export.""" markdown_content: str executive_summary: Optional[ExecutiveSummary] sections: List[TimestampedSection] table_of_contents: str metadata: Dict[str, Any] quality_score: float processing_time_seconds: float export_config: MarkdownExportConfig created_at: datetime class EnhancedMarkdownFormatter: """Service for creating professional markdown documents.""" def __init__( self, executive_generator: Optional[ExecutiveSummaryGenerator] = None, timestamp_processor: Optional[TimestampProcessor] = None ): """Initialize enhanced markdown formatter. Args: executive_generator: Service for executive summaries timestamp_processor: Service for timestamp processing """ self.executive_generator = executive_generator or ExecutiveSummaryGenerator() self.timestamp_processor = timestamp_processor or TimestampProcessor() # Formatting configuration self.max_line_length = 80 self.heading_levels = { "title": "#", "section": "##", "subsection": "###", "detail": "####" } logger.info("EnhancedMarkdownFormatter initialized") async def create_enhanced_export( self, video_title: str, video_url: str, content: str, transcript_data: List[Dict[str, Any]] = None, export_config: Optional[MarkdownExportConfig] = None ) -> EnhancedMarkdownExport: """Create comprehensive enhanced markdown export. Args: video_title: Title of the video video_url: YouTube video URL content: Main content/summary text transcript_data: Raw transcript data with timestamps export_config: Export configuration options Returns: Enhanced markdown export result """ start_time = datetime.now() config = export_config or MarkdownExportConfig() try: # Generate components in parallel where possible tasks = [] # Executive summary (if enabled) executive_summary = None if config.include_executive_summary: tasks.append(self._generate_executive_summary(content, video_title)) # Timestamp sections (if enabled and data available) sections = [] if config.include_timestamps and transcript_data: tasks.append(self._generate_timestamp_sections( transcript_data, video_url, video_title )) # Execute parallel tasks results = await asyncio.gather(*tasks, return_exceptions=True) # Process results result_idx = 0 if config.include_executive_summary: executive_summary = results[result_idx] if not isinstance(results[result_idx], Exception) else None result_idx += 1 if config.include_timestamps and transcript_data: section_result = results[result_idx] if not isinstance(results[result_idx], Exception) else None if section_result: sections = section_result.sections result_idx += 1 # Generate table of contents toc = "" if config.include_toc and sections: toc = await self.timestamp_processor.generate_table_of_contents(sections) # Assemble final markdown document markdown_content = await self._assemble_markdown_document( video_title=video_title, video_url=video_url, content=content, executive_summary=executive_summary, sections=sections, table_of_contents=toc, config=config ) # Calculate quality score quality_score = self._calculate_export_quality( executive_summary, sections, markdown_content ) # Generate metadata metadata = self._generate_export_metadata( video_title, video_url, executive_summary, sections, config ) processing_time = (datetime.now() - start_time).total_seconds() return EnhancedMarkdownExport( markdown_content=markdown_content, executive_summary=executive_summary, sections=sections, table_of_contents=toc, metadata=metadata, quality_score=quality_score, processing_time_seconds=processing_time, export_config=config, created_at=datetime.now() ) except Exception as e: logger.error(f"Error creating enhanced export: {e}") raise ServiceError(f"Enhanced export creation failed: {str(e)}") async def _generate_executive_summary( self, content: str, video_title: str ) -> Optional[ExecutiveSummary]: """Generate executive summary component.""" try: return await self.executive_generator.generate_executive_summary( content=content, video_title=video_title, summary_type="business" ) except Exception as e: logger.warning(f"Executive summary generation failed: {e}") return None async def _generate_timestamp_sections( self, transcript_data: List[Dict[str, Any]], video_url: str, video_title: str ): """Generate timestamp sections component.""" try: return await self.timestamp_processor.detect_semantic_sections( transcript_data=transcript_data, video_url=video_url, video_title=video_title ) except Exception as e: logger.warning(f"Timestamp section generation failed: {e}") return None async def _assemble_markdown_document( self, video_title: str, video_url: str, content: str, executive_summary: Optional[ExecutiveSummary], sections: List[TimestampedSection], table_of_contents: str, config: MarkdownExportConfig ) -> str: """Assemble final markdown document.""" document_parts = [] # 1. Metadata Header if config.include_metadata_header: if executive_summary: header = await self.executive_generator.generate_metadata_header( executive_summary, video_title, video_url ) else: header = self._generate_basic_header(video_title, video_url) document_parts.append(header) # 2. Executive Summary Section if config.include_executive_summary and executive_summary: exec_section = self._format_executive_summary_section(executive_summary) document_parts.append(exec_section) # 3. Table of Contents if config.include_toc and table_of_contents: document_parts.append(table_of_contents) # 4. Main Content Section main_content = self._format_main_content_section(content, config) document_parts.append(main_content) # 5. Timestamped Sections if config.include_timestamps and sections: sections_content = self._format_timestamped_sections(sections, config) document_parts.append(sections_content) # 6. Footer if config.include_footer: if executive_summary: footer = await self.executive_generator.generate_executive_footer(executive_summary) else: footer = self._generate_basic_footer() document_parts.append(footer) # Join all parts with proper spacing return '\n\n'.join(filter(None, document_parts)) def _generate_basic_header(self, video_title: str, video_url: str) -> str: """Generate basic header when executive summary not available.""" return f"""# {video_title} **Analysis Date**: {datetime.now().strftime("%B %d, %Y")} **Source**: {video_url} """ def _format_executive_summary_section(self, executive_summary: ExecutiveSummary) -> str: """Format executive summary as markdown section.""" section_parts = [ "## Executive Summary", "", executive_summary.overview ] # Add key metrics if available if executive_summary.key_metrics: metrics = executive_summary.key_metrics section_parts.extend([ "", "### Key Metrics", f"- **Duration**: {metrics.duration_minutes} minutes", f"- **Complexity**: {metrics.complexity_level.title()}", f"- **Main Topics**: {', '.join(metrics.main_topics[:3])}" ]) # Add business value if available if executive_summary.business_value: section_parts.extend([ "", "### Business Value", executive_summary.business_value ]) # Add action items if executive_summary.action_items: section_parts.extend([ "", "### Action Items" ]) for item in executive_summary.action_items: section_parts.append(f"- {item}") # Add strategic implications if executive_summary.strategic_implications: section_parts.extend([ "", "### Strategic Implications" ]) for implication in executive_summary.strategic_implications: section_parts.append(f"- {implication}") return '\n'.join(section_parts) def _format_main_content_section( self, content: str, config: MarkdownExportConfig ) -> str: """Format main content section.""" if config.section_detail_level == "brief": # Truncate content for brief format content_lines = content.split('\n') if len(content_lines) > 10: content = '\n'.join(content_lines[:10]) + "\n\n*[Content truncated for brief format]*" return f"""## Content Analysis {content}""" def _format_timestamped_sections( self, sections: List[TimestampedSection], config: MarkdownExportConfig ) -> str: """Format timestamped sections.""" if not sections: return "" section_parts = [ "## Detailed Sections", "" ] for section in sections: timestamp_display = self.timestamp_processor.seconds_to_timestamp(section.start_timestamp) # Section header with clickable timestamp section_header = f"### [{timestamp_display}] {section.title}" section_parts.append(section_header) section_parts.append("") # YouTube link section_parts.append(f"**[🎬 Jump to this section]({section.youtube_link})**") section_parts.append("") # Section summary if section.summary and config.section_detail_level != "brief": section_parts.append(f"*{section.summary}*") section_parts.append("") # Key points if section.key_points and config.section_detail_level == "detailed": section_parts.append("**Key Points:**") for point in section.key_points: section_parts.append(f"- {point}") section_parts.append("") # Section content (for detailed format) if config.section_detail_level == "detailed" and section.content: # Limit content length for readability content_preview = section.content[:500] if len(section.content) > 500: content_preview += "..." section_parts.append("**Content:**") section_parts.append(content_preview) section_parts.append("") return '\n'.join(section_parts) def _generate_basic_footer(self) -> str: """Generate basic footer when executive summary not available.""" return f""" --- **Generated**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} *This analysis was generated using AI and is intended for informational purposes.* """ def _calculate_export_quality( self, executive_summary: Optional[ExecutiveSummary], sections: List[TimestampedSection], markdown_content: str ) -> float: """Calculate overall quality score for export.""" quality_factors = [] # Executive summary quality if executive_summary: quality_factors.append(executive_summary.key_metrics.confidence_score) # Sections quality if sections: avg_section_quality = sum(s.confidence_score for s in sections) / len(sections) quality_factors.append(avg_section_quality) # Content length and structure content_length = len(markdown_content) if 1000 <= content_length <= 50000: # Good length range quality_factors.append(0.9) elif content_length < 1000: quality_factors.append(0.6) else: quality_factors.append(0.7) # Structure completeness structure_score = 0.0 if "# " in markdown_content: # Has title structure_score += 0.2 if "## " in markdown_content: # Has sections structure_score += 0.3 if "[" in markdown_content and "](" in markdown_content: # Has links structure_score += 0.3 if "**" in markdown_content: # Has bold formatting structure_score += 0.2 quality_factors.append(structure_score) # Return average quality score return sum(quality_factors) / len(quality_factors) if quality_factors else 0.5 def _generate_export_metadata( self, video_title: str, video_url: str, executive_summary: Optional[ExecutiveSummary], sections: List[TimestampedSection], config: MarkdownExportConfig ) -> Dict[str, Any]: """Generate metadata for export.""" metadata = { "video_title": video_title, "video_url": video_url, "export_format": "enhanced_markdown", "created_at": datetime.now().isoformat(), "config": { "include_executive_summary": config.include_executive_summary, "include_timestamps": config.include_timestamps, "include_toc": config.include_toc, "section_detail_level": config.section_detail_level } } if executive_summary: metadata["executive_summary"] = { "generated": True, "confidence_score": executive_summary.key_metrics.confidence_score, "processing_time": executive_summary.processing_time_seconds, "word_count": executive_summary.key_metrics.word_count } if sections: metadata["sections"] = { "total_sections": len(sections), "avg_confidence": sum(s.confidence_score for s in sections) / len(sections), "total_duration": max(s.end_timestamp for s in sections) if sections else 0 } return metadata async def create_table_of_contents_only( self, sections: List[TimestampedSection] ) -> str: """Create standalone table of contents.""" return await self.timestamp_processor.generate_table_of_contents(sections) def format_for_platform(self, markdown_content: str, platform: str) -> str: """Format markdown for specific platforms (GitHub, Notion, etc.).""" if platform.lower() == "github": # GitHub-specific formatting return self._format_for_github(markdown_content) elif platform.lower() == "notion": # Notion-specific formatting return self._format_for_notion(markdown_content) elif platform.lower() == "obsidian": # Obsidian-specific formatting return self._format_for_obsidian(markdown_content) else: return markdown_content def _format_for_github(self, content: str) -> str: """Optimize for GitHub markdown rendering.""" # GitHub supports most standard markdown features return content def _format_for_notion(self, content: str) -> str: """Optimize for Notion markdown import.""" # Notion has some limitations with complex markdown # Simplify some formatting for better compatibility content = content.replace("**[🎬", "[🎬") content = content.replace("]**", "]") return content def _format_for_obsidian(self, content: str) -> str: """Optimize for Obsidian markdown.""" # Obsidian supports wiki-style links and other features # Add backlink support if needed return content def get_formatter_stats(self) -> Dict[str, Any]: """Get formatter configuration and statistics.""" return { "service_name": "EnhancedMarkdownFormatter", "max_line_length": self.max_line_length, "heading_levels": self.heading_levels, "supported_platforms": ["github", "notion", "obsidian", "standard"] }