""" JSON Exporter for YouTube Summaries Exports summaries to structured JSON format with full metadata """ import json import tempfile from typing import Dict, Any, Optional from ..export_service import BaseExporter class JSONExporter(BaseExporter): """Export summaries to structured JSON format""" async def export( self, summary_data: Dict[str, Any], template: Optional[str] = None, branding: Optional[Dict[str, Any]] = None ) -> str: """Export to JSON""" data = self._prepare_summary_data(summary_data) # Structure data for JSON export json_data = { "youtube_summarizer_export": { "version": "1.0", "exported_at": data["export_metadata"]["exported_at"], "exporter_version": data["export_metadata"]["exporter_version"] }, "video": { "id": data.get("video_id"), "url": data.get("video_url"), "metadata": { "title": data.get("video_metadata", {}).get("title"), "channel": data.get("video_metadata", {}).get("channel_name"), "channel_id": data.get("video_metadata", {}).get("channel_id"), "duration_seconds": data.get("video_metadata", {}).get("duration"), "published_at": data.get("video_metadata", {}).get("published_at"), "view_count": data.get("video_metadata", {}).get("view_count"), "like_count": data.get("video_metadata", {}).get("like_count"), "comment_count": data.get("video_metadata", {}).get("comment_count"), "description": data.get("video_metadata", {}).get("description"), "tags": data.get("video_metadata", {}).get("tags", []), "thumbnail_url": data.get("video_metadata", {}).get("thumbnail_url"), "categories": data.get("video_metadata", {}).get("categories", []) } }, "transcript": { "language": data.get("transcript_language", "en"), "segments": data.get("transcript_segments", []), "full_text": data.get("transcript_text"), "word_count": data.get("word_count"), "duration_seconds": data.get("transcript_duration") }, "summary": { "text": data.get("summary"), "key_points": data.get("key_points", []), "main_themes": data.get("main_themes", []), "actionable_insights": data.get("actionable_insights", []), "confidence_score": data.get("confidence_score"), "quality_metrics": { "completeness": data.get("quality_metrics", {}).get("completeness"), "coherence": data.get("quality_metrics", {}).get("coherence"), "relevance": data.get("quality_metrics", {}).get("relevance"), "accuracy": data.get("quality_metrics", {}).get("accuracy") }, "sentiment_analysis": { "overall_sentiment": data.get("sentiment", {}).get("overall"), "positive_score": data.get("sentiment", {}).get("positive"), "negative_score": data.get("sentiment", {}).get("negative"), "neutral_score": data.get("sentiment", {}).get("neutral") }, "topics": data.get("topics", []), "entities": data.get("entities", []), "keywords": data.get("keywords", []) }, "chapters": data.get("chapters", []), "related_content": { "recommended_videos": data.get("recommended_videos", []), "related_topics": data.get("related_topics", []), "external_links": data.get("external_links", []) }, "processing": { "metadata": { "model": data.get("processing_metadata", {}).get("model"), "model_version": data.get("processing_metadata", {}).get("model_version"), "processing_time_seconds": data.get("processing_metadata", {}).get("processing_time_seconds"), "timestamp": data.get("processing_metadata", {}).get("timestamp"), "cache_hit": data.get("processing_metadata", {}).get("cache_hit", False), "pipeline_version": data.get("processing_metadata", {}).get("pipeline_version") }, "cost_data": { "input_tokens": data.get("cost_data", {}).get("input_tokens"), "output_tokens": data.get("cost_data", {}).get("output_tokens"), "total_tokens": data.get("cost_data", {}).get("total_tokens"), "estimated_cost_usd": data.get("cost_data", {}).get("estimated_cost_usd"), "model_pricing": data.get("cost_data", {}).get("model_pricing") }, "quality_score": data.get("quality_score"), "errors": data.get("processing_errors", []), "warnings": data.get("processing_warnings", []) }, "user_data": { "user_id": data.get("user_id"), "session_id": data.get("session_id"), "preferences": data.get("user_preferences", {}), "customization": data.get("customization", {}) }, "branding": branding, "export_options": { "template": template, "include_metadata": True, "format_version": "1.0" } } # Clean up None values for cleaner JSON json_data = self._clean_none_values(json_data) # Write to temporary file with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: json.dump(json_data, f, indent=2, default=str, ensure_ascii=False) return f.name def _clean_none_values(self, data: Any) -> Any: """Recursively remove None values from dictionaries""" if isinstance(data, dict): return { key: self._clean_none_values(value) for key, value in data.items() if value is not None } elif isinstance(data, list): return [self._clean_none_values(item) for item in data] else: return data def get_file_extension(self) -> str: return "json"