141 lines
6.6 KiB
Python
141 lines
6.6 KiB
Python
"""
|
|
JSON Exporter for YouTube Summaries
|
|
Exports summaries to structured JSON format with full metadata
|
|
"""
|
|
|
|
import json
|
|
import tempfile
|
|
from typing import Dict, Any, Optional
|
|
from ..export_service import BaseExporter
|
|
|
|
|
|
class JSONExporter(BaseExporter):
|
|
"""Export summaries to structured JSON format"""
|
|
|
|
async def export(
|
|
self,
|
|
summary_data: Dict[str, Any],
|
|
template: Optional[str] = None,
|
|
branding: Optional[Dict[str, Any]] = None
|
|
) -> str:
|
|
"""Export to JSON"""
|
|
|
|
data = self._prepare_summary_data(summary_data)
|
|
|
|
# Structure data for JSON export
|
|
json_data = {
|
|
"youtube_summarizer_export": {
|
|
"version": "1.0",
|
|
"exported_at": data["export_metadata"]["exported_at"],
|
|
"exporter_version": data["export_metadata"]["exporter_version"]
|
|
},
|
|
"video": {
|
|
"id": data.get("video_id"),
|
|
"url": data.get("video_url"),
|
|
"metadata": {
|
|
"title": data.get("video_metadata", {}).get("title"),
|
|
"channel": data.get("video_metadata", {}).get("channel_name"),
|
|
"channel_id": data.get("video_metadata", {}).get("channel_id"),
|
|
"duration_seconds": data.get("video_metadata", {}).get("duration"),
|
|
"published_at": data.get("video_metadata", {}).get("published_at"),
|
|
"view_count": data.get("video_metadata", {}).get("view_count"),
|
|
"like_count": data.get("video_metadata", {}).get("like_count"),
|
|
"comment_count": data.get("video_metadata", {}).get("comment_count"),
|
|
"description": data.get("video_metadata", {}).get("description"),
|
|
"tags": data.get("video_metadata", {}).get("tags", []),
|
|
"thumbnail_url": data.get("video_metadata", {}).get("thumbnail_url"),
|
|
"categories": data.get("video_metadata", {}).get("categories", [])
|
|
}
|
|
},
|
|
"transcript": {
|
|
"language": data.get("transcript_language", "en"),
|
|
"segments": data.get("transcript_segments", []),
|
|
"full_text": data.get("transcript_text"),
|
|
"word_count": data.get("word_count"),
|
|
"duration_seconds": data.get("transcript_duration")
|
|
},
|
|
"summary": {
|
|
"text": data.get("summary"),
|
|
"key_points": data.get("key_points", []),
|
|
"main_themes": data.get("main_themes", []),
|
|
"actionable_insights": data.get("actionable_insights", []),
|
|
"confidence_score": data.get("confidence_score"),
|
|
"quality_metrics": {
|
|
"completeness": data.get("quality_metrics", {}).get("completeness"),
|
|
"coherence": data.get("quality_metrics", {}).get("coherence"),
|
|
"relevance": data.get("quality_metrics", {}).get("relevance"),
|
|
"accuracy": data.get("quality_metrics", {}).get("accuracy")
|
|
},
|
|
"sentiment_analysis": {
|
|
"overall_sentiment": data.get("sentiment", {}).get("overall"),
|
|
"positive_score": data.get("sentiment", {}).get("positive"),
|
|
"negative_score": data.get("sentiment", {}).get("negative"),
|
|
"neutral_score": data.get("sentiment", {}).get("neutral")
|
|
},
|
|
"topics": data.get("topics", []),
|
|
"entities": data.get("entities", []),
|
|
"keywords": data.get("keywords", [])
|
|
},
|
|
"chapters": data.get("chapters", []),
|
|
"related_content": {
|
|
"recommended_videos": data.get("recommended_videos", []),
|
|
"related_topics": data.get("related_topics", []),
|
|
"external_links": data.get("external_links", [])
|
|
},
|
|
"processing": {
|
|
"metadata": {
|
|
"model": data.get("processing_metadata", {}).get("model"),
|
|
"model_version": data.get("processing_metadata", {}).get("model_version"),
|
|
"processing_time_seconds": data.get("processing_metadata", {}).get("processing_time_seconds"),
|
|
"timestamp": data.get("processing_metadata", {}).get("timestamp"),
|
|
"cache_hit": data.get("processing_metadata", {}).get("cache_hit", False),
|
|
"pipeline_version": data.get("processing_metadata", {}).get("pipeline_version")
|
|
},
|
|
"cost_data": {
|
|
"input_tokens": data.get("cost_data", {}).get("input_tokens"),
|
|
"output_tokens": data.get("cost_data", {}).get("output_tokens"),
|
|
"total_tokens": data.get("cost_data", {}).get("total_tokens"),
|
|
"estimated_cost_usd": data.get("cost_data", {}).get("estimated_cost_usd"),
|
|
"model_pricing": data.get("cost_data", {}).get("model_pricing")
|
|
},
|
|
"quality_score": data.get("quality_score"),
|
|
"errors": data.get("processing_errors", []),
|
|
"warnings": data.get("processing_warnings", [])
|
|
},
|
|
"user_data": {
|
|
"user_id": data.get("user_id"),
|
|
"session_id": data.get("session_id"),
|
|
"preferences": data.get("user_preferences", {}),
|
|
"customization": data.get("customization", {})
|
|
},
|
|
"branding": branding,
|
|
"export_options": {
|
|
"template": template,
|
|
"include_metadata": True,
|
|
"format_version": "1.0"
|
|
}
|
|
}
|
|
|
|
# Clean up None values for cleaner JSON
|
|
json_data = self._clean_none_values(json_data)
|
|
|
|
# Write to temporary file
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
|
|
json.dump(json_data, f, indent=2, default=str, ensure_ascii=False)
|
|
return f.name
|
|
|
|
def _clean_none_values(self, data: Any) -> Any:
|
|
"""Recursively remove None values from dictionaries"""
|
|
if isinstance(data, dict):
|
|
return {
|
|
key: self._clean_none_values(value)
|
|
for key, value in data.items()
|
|
if value is not None
|
|
}
|
|
elif isinstance(data, list):
|
|
return [self._clean_none_values(item) for item in data]
|
|
else:
|
|
return data
|
|
|
|
def get_file_extension(self) -> str:
|
|
return "json" |