youtube-summarizer/backend/mermaid_renderer.py

442 lines
14 KiB
Python

#!/usr/bin/env python3
"""Mermaid Diagram Renderer
Utilities for extracting, rendering, and managing Mermaid diagrams from summaries.
"""
import re
import os
import subprocess
import tempfile
from pathlib import Path
from typing import List, Dict, Optional, Tuple
import logging
logger = logging.getLogger(__name__)
class MermaidRenderer:
"""Handles extraction and rendering of Mermaid diagrams from text."""
def __init__(self, output_dir: str = "diagrams"):
"""Initialize the Mermaid renderer.
Args:
output_dir: Directory to save rendered diagrams
"""
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
def extract_diagrams(self, text: str) -> List[Dict[str, str]]:
"""Extract all Mermaid diagram blocks from text.
Args:
text: Text containing Mermaid diagram blocks
Returns:
List of dictionaries containing diagram code and metadata
"""
# Pattern to match ```mermaid blocks
pattern = r'```mermaid\n(.*?)```'
matches = re.findall(pattern, text, re.DOTALL)
diagrams = []
for i, code in enumerate(matches):
# Try to extract title from diagram
title = self._extract_diagram_title(code)
if not title:
title = f"diagram_{i+1}"
# Detect diagram type
diagram_type = self._detect_diagram_type(code)
diagrams.append({
"code": code.strip(),
"title": title,
"type": diagram_type,
"index": i
})
return diagrams
def _extract_diagram_title(self, code: str) -> Optional[str]:
"""Extract title from diagram code if present."""
# Look for title in various formats
patterns = [
r'title\s+([^\n]+)', # Mermaid title directive
r'%%\s*title:\s*([^\n]+)', # Comment-based title
]
for pattern in patterns:
match = re.search(pattern, code, re.IGNORECASE)
if match:
return match.group(1).strip()
return None
def _detect_diagram_type(self, code: str) -> str:
"""Detect the type of Mermaid diagram."""
first_line = code.strip().split('\n')[0].lower()
if 'graph' in first_line or 'flowchart' in first_line:
return 'flowchart'
elif 'sequencediagram' in first_line:
return 'sequence'
elif 'classDiagram' in first_line:
return 'class'
elif 'stateDiagram' in first_line:
return 'state'
elif 'erDiagram' in first_line:
return 'er'
elif 'journey' in first_line:
return 'journey'
elif 'gantt' in first_line:
return 'gantt'
elif 'pie' in first_line:
return 'pie'
elif 'mindmap' in first_line:
return 'mindmap'
elif 'timeline' in first_line:
return 'timeline'
else:
return 'generic'
def render_diagram(
self,
diagram: Dict[str, str],
format: str = 'svg',
theme: str = 'default'
) -> Optional[str]:
"""Render a Mermaid diagram to an image file.
Args:
diagram: Diagram dictionary from extract_diagrams
format: Output format (svg, png, pdf)
theme: Mermaid theme (default, dark, forest, neutral)
Returns:
Path to rendered image file, or None if rendering failed
"""
# Check if mermaid CLI is available
if not self._check_mermaid_cli():
logger.warning("Mermaid CLI (mmdc) not found. Install with: npm install -g @mermaid-js/mermaid-cli")
return None
# Create temporary file for diagram code
with tempfile.NamedTemporaryFile(mode='w', suffix='.mmd', delete=False) as f:
f.write(diagram['code'])
temp_input = f.name
try:
# Generate output filename
safe_title = re.sub(r'[^a-zA-Z0-9_-]', '_', diagram['title'])
output_file = self.output_dir / f"{safe_title}.{format}"
# Build mmdc command
cmd = [
'mmdc',
'-i', temp_input,
'-o', str(output_file),
'-t', theme,
'--backgroundColor', 'transparent' if format == 'svg' else 'white'
]
# Run mermaid CLI
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
logger.info(f"Successfully rendered diagram: {output_file}")
return str(output_file)
else:
logger.error(f"Failed to render diagram: {result.stderr}")
return None
finally:
# Clean up temp file
os.unlink(temp_input)
def _check_mermaid_cli(self) -> bool:
"""Check if Mermaid CLI is available."""
try:
result = subprocess.run(['mmdc', '--version'], capture_output=True)
return result.returncode == 0
except FileNotFoundError:
return False
def render_to_ascii(self, diagram: Dict[str, str]) -> Optional[str]:
"""Render a Mermaid diagram to ASCII art for terminal display.
Args:
diagram: Diagram dictionary from extract_diagrams
Returns:
ASCII representation of the diagram
"""
# Check if mermaid-ascii is available
try:
# Create temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.mmd', delete=False) as f:
f.write(diagram['code'])
temp_input = f.name
try:
# Run mermaid-ascii
result = subprocess.run(
['mermaid-ascii', '-f', temp_input],
capture_output=True,
text=True
)
if result.returncode == 0:
return result.stdout
else:
# Fallback to simple text representation
return self._simple_ascii_fallback(diagram)
finally:
os.unlink(temp_input)
except FileNotFoundError:
# mermaid-ascii not installed, use fallback
return self._simple_ascii_fallback(diagram)
def _simple_ascii_fallback(self, diagram: Dict[str, str]) -> str:
"""Create a simple ASCII representation of the diagram structure."""
lines = diagram['code'].strip().split('\n')
# Simple box around the diagram type and title
diagram_type = diagram['type'].upper()
title = diagram['title']
width = max(len(diagram_type), len(title)) + 4
ascii_art = []
ascii_art.append('' + '' * width + '')
ascii_art.append('' + diagram_type.center(width - 2) + '')
ascii_art.append('' + title.center(width - 2) + '')
ascii_art.append('' + '' * width + '')
ascii_art.append('')
# Add simplified content
for line in lines[1:]: # Skip the first line (diagram type)
cleaned = line.strip()
if cleaned and not cleaned.startswith('%%'):
# Simple indentation preservation
indent = len(line) - len(line.lstrip())
ascii_art.append(' ' * indent + '' + cleaned)
return '\n'.join(ascii_art)
def save_diagram_code(self, diagram: Dict[str, str]) -> str:
"""Save diagram code to a .mmd file.
Args:
diagram: Diagram dictionary from extract_diagrams
Returns:
Path to saved .mmd file
"""
safe_title = re.sub(r'[^a-zA-Z0-9_-]', '_', diagram['title'])
output_file = self.output_dir / f"{safe_title}.mmd"
with open(output_file, 'w') as f:
f.write(diagram['code'])
return str(output_file)
def extract_and_render_all(
self,
text: str,
format: str = 'svg',
theme: str = 'default',
save_code: bool = True
) -> List[Dict[str, any]]:
"""Extract and render all diagrams from text.
Args:
text: Text containing Mermaid diagrams
format: Output format for rendered images
theme: Mermaid theme
save_code: Whether to save .mmd files
Returns:
List of results for each diagram
"""
diagrams = self.extract_diagrams(text)
results = []
for diagram in diagrams:
result = {
"title": diagram['title'],
"type": diagram['type'],
"index": diagram['index']
}
# Save code if requested
if save_code:
result['code_file'] = self.save_diagram_code(diagram)
# Render to image
rendered = self.render_diagram(diagram, format, theme)
if rendered:
result['image_file'] = rendered
# Generate ASCII version
ascii_art = self.render_to_ascii(diagram)
if ascii_art:
result['ascii'] = ascii_art
results.append(result)
return results
class DiagramEnhancer:
"""Enhances summaries by intelligently adding diagram suggestions."""
@staticmethod
def suggest_diagrams(text: str) -> List[Dict[str, str]]:
"""Analyze text and suggest appropriate Mermaid diagrams.
Args:
text: Summary text to analyze
Returns:
List of suggested diagrams with code templates
"""
suggestions = []
# Check for process/workflow indicators
if any(word in text.lower() for word in ['process', 'workflow', 'steps', 'procedure']):
suggestions.append({
"type": "flowchart",
"reason": "Process or workflow detected",
"template": """graph TD
A[Start] --> B[Step 1]
B --> C[Step 2]
C --> D[Decision]
D -->|Yes| E[Option 1]
D -->|No| F[Option 2]
E --> G[End]
F --> G"""
})
# Check for timeline indicators
if any(word in text.lower() for word in ['timeline', 'history', 'chronological', 'evolution']):
suggestions.append({
"type": "timeline",
"reason": "Timeline or chronological information detected",
"template": """timeline
title Timeline of Events
2020 : Event 1
2021 : Event 2
2022 : Event 3
2023 : Event 4"""
})
# Check for relationship indicators
if any(word in text.lower() for word in ['relationship', 'connection', 'interaction', 'between']):
suggestions.append({
"type": "mindmap",
"reason": "Relationships or connections detected",
"template": """mindmap
root((Central Concept))
Branch 1
Sub-item 1
Sub-item 2
Branch 2
Sub-item 3
Sub-item 4
Branch 3"""
})
# Check for statistical indicators
if any(word in text.lower() for word in ['percentage', 'statistics', 'distribution', 'proportion']):
suggestions.append({
"type": "pie",
"reason": "Statistical or proportional data detected",
"template": """pie title Distribution
"Category A" : 30
"Category B" : 25
"Category C" : 25
"Category D" : 20"""
})
return suggestions
@staticmethod
def create_summary_structure_diagram(key_points: List[str], main_themes: List[str]) -> str:
"""Create a mind map diagram of the summary structure.
Args:
key_points: List of key points from summary
main_themes: List of main themes
Returns:
Mermaid mindmap code
"""
diagram = ["mindmap", " root((Summary))"]
if main_themes:
diagram.append(" Themes")
for theme in main_themes[:5]: # Limit to 5 themes
safe_theme = theme.replace('"', "'")[:50]
diagram.append(f' "{safe_theme}"')
if key_points:
diagram.append(" Key Points")
for i, point in enumerate(key_points[:5], 1): # Limit to 5 points
safe_point = point.replace('"', "'")[:50]
diagram.append(f' "Point {i}: {safe_point}"')
return '\n'.join(diagram)
# CLI Integration
def render_summary_diagrams(summary_text: str, output_dir: str = "diagrams"):
"""Extract and render all diagrams from a summary.
Args:
summary_text: Summary containing Mermaid diagrams
output_dir: Directory to save rendered diagrams
"""
renderer = MermaidRenderer(output_dir)
results = renderer.extract_and_render_all(summary_text)
if results:
print(f"\n📊 Found and rendered {len(results)} diagram(s):")
for result in results:
print(f"\n{result['title']} ({result['type']})")
if 'image_file' in result:
print(f" Image: {result['image_file']}")
if 'code_file' in result:
print(f" Code: {result['code_file']}")
if 'ascii' in result:
print(f"\n ASCII Preview:\n{result['ascii']}")
else:
print("\n📊 No Mermaid diagrams found in summary")
return results
if __name__ == "__main__":
# Test example
test_text = """
# Video Summary
This video explains the process of making coffee.
```mermaid
graph TD
A[Start] --> B[Grind Beans]
B --> C[Boil Water]
C --> D[Pour Water]
D --> E[Wait 4 minutes]
E --> F[Enjoy Coffee]
```
The key points are...
"""
render_summary_diagrams(test_text)