""" YouTube Summarizer MCP (Model Context Protocol) Integration Provides MCP server connectivity and tool execution for AI development environments """ import json import logging import asyncio from typing import Dict, Any, List, Optional, Union from datetime import datetime from .models import ( TranscriptRequest, BatchProcessingRequest, JobResponse, TranscriptResult, DualTranscriptResult, APIUsageStats, MCPToolRequest, MCPToolResult, MCPResourceRequest, MCPResourceResult ) from .client import YouTubeSummarizerClient from .exceptions import YouTubeSummarizerError, ValidationError logger = logging.getLogger(__name__) class YouTubeSummarizerMCP: """ MCP (Model Context Protocol) interface for YouTube Summarizer Provides standardized tool and resource access for AI development environments like Claude Code, enabling seamless integration with MCP-compatible editors. """ def __init__(self, client: YouTubeSummarizerClient): """ Initialize MCP interface with YouTube Summarizer client Args: client: Configured YouTubeSummarizerClient instance """ self.client = client self._tools = self._initialize_tools() self._resources = self._initialize_resources() def _initialize_tools(self) -> Dict[str, Dict[str, Any]]: """Initialize MCP tool definitions""" return { "extract_transcript": { "description": "Extract transcript from YouTube video with quality options", "parameters": { "type": "object", "properties": { "video_url": { "type": "string", "format": "uri", "description": "YouTube video URL" }, "transcript_source": { "type": "string", "enum": ["youtube", "whisper", "both"], "default": "youtube", "description": "Transcript source preference" }, "whisper_model_size": { "type": "string", "enum": ["tiny", "base", "small", "medium", "large"], "default": "small", "description": "Whisper model size for AI transcription" }, "priority": { "type": "string", "enum": ["low", "normal", "high", "urgent"], "default": "normal", "description": "Processing priority" }, "include_quality_analysis": { "type": "boolean", "default": True, "description": "Include transcript quality analysis" }, "wait_for_completion": { "type": "boolean", "default": True, "description": "Wait for processing to complete before returning" }, "timeout_seconds": { "type": "number", "default": 300, "description": "Maximum wait time in seconds" } }, "required": ["video_url"] } }, "batch_process_videos": { "description": "Process multiple YouTube videos in batch", "parameters": { "type": "object", "properties": { "video_urls": { "type": "array", "items": {"type": "string", "format": "uri"}, "minItems": 1, "maxItems": 100, "description": "Array of YouTube video URLs" }, "batch_name": { "type": "string", "description": "Descriptive name for the batch job" }, "transcript_source": { "type": "string", "enum": ["youtube", "whisper", "both"], "default": "youtube", "description": "Transcript source for all videos" }, "parallel_processing": { "type": "boolean", "default": False, "description": "Enable parallel processing" }, "max_concurrent_jobs": { "type": "integer", "minimum": 1, "maximum": 10, "default": 3, "description": "Maximum concurrent jobs" } }, "required": ["video_urls", "batch_name"] } }, "get_job_status": { "description": "Get processing job status and progress", "parameters": { "type": "object", "properties": { "job_id": { "type": "string", "description": "Job identifier returned from extract_transcript or batch_process_videos" } }, "required": ["job_id"] } }, "get_processing_estimate": { "description": "Get processing time and cost estimate for a video", "parameters": { "type": "object", "properties": { "video_url": { "type": "string", "format": "uri", "description": "YouTube video URL" }, "transcript_source": { "type": "string", "enum": ["youtube", "whisper", "both"], "default": "youtube", "description": "Transcript source preference" } }, "required": ["video_url"] } }, "search_summaries": { "description": "Search through processed summaries and transcripts", "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "Search query string" }, "limit": { "type": "integer", "minimum": 1, "maximum": 100, "default": 10, "description": "Maximum number of results" }, "offset": { "type": "integer", "minimum": 0, "default": 0, "description": "Results offset for pagination" } }, "required": ["query"] } }, "export_data": { "description": "Export user data in various formats", "parameters": { "type": "object", "properties": { "format": { "type": "string", "enum": ["json", "csv", "markdown", "pdf"], "default": "json", "description": "Export format" }, "date_from": { "type": "string", "format": "date", "description": "Start date for data export (YYYY-MM-DD)" }, "date_to": { "type": "string", "format": "date", "description": "End date for data export (YYYY-MM-DD)" }, "include_transcripts": { "type": "boolean", "default": True, "description": "Include transcript data in export" } } } } } def _initialize_resources(self) -> Dict[str, Dict[str, Any]]: """Initialize MCP resource definitions""" return { "video-metadata": { "description": "Get video metadata and processing information", "uri_template": "youtube-summarizer://video/{video_id}", "mime_type": "application/json" }, "processing-queue": { "description": "View current processing queue and job statistics", "uri_template": "youtube-summarizer://queue", "mime_type": "application/json" }, "analytics-dashboard": { "description": "Access usage analytics and performance metrics", "uri_template": "youtube-summarizer://analytics/{metric_type?}", "mime_type": "application/json" } } async def list_tools(self) -> List[Dict[str, Any]]: """ List available MCP tools Returns: List of tool definitions """ return [ { "name": name, "description": tool["description"], "inputSchema": tool["parameters"] } for name, tool in self._tools.items() ] async def list_resources(self) -> List[Dict[str, Any]]: """ List available MCP resources Returns: List of resource definitions """ return [ { "name": name, "description": resource["description"], "uri": resource["uri_template"], "mimeType": resource["mime_type"] } for name, resource in self._resources.items() ] async def call_tool(self, request: MCPToolRequest) -> MCPToolResult: """ Execute MCP tool Args: request: MCP tool request Returns: Tool execution result """ try: if request.name == "extract_transcript": return await self._extract_transcript(request.arguments) elif request.name == "batch_process_videos": return await self._batch_process_videos(request.arguments) elif request.name == "get_job_status": return await self._get_job_status(request.arguments) elif request.name == "get_processing_estimate": return await self._get_processing_estimate(request.arguments) elif request.name == "search_summaries": return await self._search_summaries(request.arguments) elif request.name == "export_data": return await self._export_data(request.arguments) else: return MCPToolResult( content=[{ "type": "text", "text": f"Unknown tool: {request.name}" }], is_error=True ) except Exception as e: logger.error(f"Tool execution error: {e}") return MCPToolResult( content=[{ "type": "text", "text": f"Tool execution failed: {str(e)}" }], is_error=True ) async def read_resource(self, request: MCPResourceRequest) -> MCPResourceResult: """ Read MCP resource Args: request: MCP resource request Returns: Resource content """ try: if request.uri.startswith("youtube-summarizer://video/"): video_id = request.uri.split("/")[-1] return await self._get_video_resource(video_id) elif request.uri == "youtube-summarizer://queue": return await self._get_queue_resource() elif request.uri.startswith("youtube-summarizer://analytics"): metric_type = request.uri.split("/")[-1] if "/" in request.uri else None return await self._get_analytics_resource(metric_type) else: return MCPResourceResult( contents=[{ "uri": request.uri, "mimeType": "text/plain", "text": f"Resource not found: {request.uri}" }] ) except Exception as e: logger.error(f"Resource read error: {e}") return MCPResourceResult( contents=[{ "uri": request.uri, "mimeType": "text/plain", "text": f"Resource read failed: {str(e)}" }] ) # Tool implementations async def _extract_transcript(self, args: Dict[str, Any]) -> MCPToolResult: """Extract transcript tool implementation""" try: # Build transcript request transcript_request = TranscriptRequest( video_url=args["video_url"], transcript_source=args.get("transcript_source", "youtube"), whisper_model_size=args.get("whisper_model_size", "small"), priority=args.get("priority", "normal"), include_quality_analysis=args.get("include_quality_analysis", True) ) # Submit job job = await self.client.extract_transcript(transcript_request) # Wait for completion if requested if args.get("wait_for_completion", True): timeout = args.get("timeout_seconds", 300) result = await self.client.wait_for_job(job.job_id, timeout=timeout) return MCPToolResult( content=[{ "type": "text", "text": f"Transcript extraction completed for {args['video_url']}", }, { "type": "resource", "resource": { "uri": f"youtube-summarizer://video/{result.video_id}", "text": json.dumps({ "job_id": job.job_id, "video_url": result.video_url, "transcript": result.transcript, "processing_time": result.processing_time_seconds, "quality_score": result.quality_score, "confidence_score": result.confidence_score, "metadata": result.metadata }, indent=2) } }] ) else: return MCPToolResult( content=[{ "type": "text", "text": f"Transcript extraction started. Job ID: {job.job_id}", }, { "type": "resource", "resource": { "uri": f"youtube-summarizer://job/{job.job_id}", "text": json.dumps({ "job_id": job.job_id, "status": job.status, "progress": job.progress_percentage, "estimated_completion": job.estimated_completion.isoformat() if job.estimated_completion else None }, indent=2) } }] ) except Exception as e: return MCPToolResult( content=[{ "type": "text", "text": f"Transcript extraction failed: {str(e)}" }], is_error=True ) async def _batch_process_videos(self, args: Dict[str, Any]) -> MCPToolResult: """Batch process videos tool implementation""" try: batch_request = BatchProcessingRequest( video_urls=args["video_urls"], batch_name=args["batch_name"], transcript_source=args.get("transcript_source", "youtube"), parallel_processing=args.get("parallel_processing", False), max_concurrent_jobs=args.get("max_concurrent_jobs", 3) ) batch_job = await self.client.batch_process(batch_request) return MCPToolResult( content=[{ "type": "text", "text": f"Batch processing started: {args['batch_name']}", }, { "type": "resource", "resource": { "uri": f"youtube-summarizer://batch/{batch_job.batch_id}", "text": json.dumps({ "batch_id": batch_job.batch_id, "batch_name": args["batch_name"], "video_count": batch_job.video_count, "status": batch_job.status, "parallel_processing": batch_job.parallel_processing, "estimated_completion": batch_job.estimated_completion.isoformat() if batch_job.estimated_completion else None }, indent=2) } }] ) except Exception as e: return MCPToolResult( content=[{ "type": "text", "text": f"Batch processing failed: {str(e)}" }], is_error=True ) async def _get_job_status(self, args: Dict[str, Any]) -> MCPToolResult: """Get job status tool implementation""" try: job_status = await self.client.get_job_status(args["job_id"]) return MCPToolResult( content=[{ "type": "text", "text": f"Job {args['job_id']} status: {job_status.status} ({job_status.progress_percentage}% complete)", }, { "type": "resource", "resource": { "uri": f"youtube-summarizer://job/{args['job_id']}", "text": json.dumps({ "job_id": job_status.job_id, "status": job_status.status, "priority": job_status.priority, "progress_percentage": job_status.progress_percentage, "current_stage": job_status.current_stage, "created_at": job_status.created_at.isoformat(), "estimated_completion": job_status.estimated_completion.isoformat() if job_status.estimated_completion else None, "metadata": job_status.metadata }, indent=2) } }] ) except Exception as e: return MCPToolResult( content=[{ "type": "text", "text": f"Failed to get job status: {str(e)}" }], is_error=True ) async def _get_processing_estimate(self, args: Dict[str, Any]) -> MCPToolResult: """Get processing estimate tool implementation""" try: estimate = await self.client.get_processing_estimate(args["video_url"]) return MCPToolResult( content=[{ "type": "text", "text": f"Processing estimate for {args['video_url']}: {estimate.estimated_time_seconds:.1f} seconds", }, { "type": "resource", "resource": { "uri": f"youtube-summarizer://estimate", "text": json.dumps({ "video_url": args["video_url"], "transcript_source": args.get("transcript_source", "youtube"), "estimated_time_seconds": estimate.estimated_time_seconds, "estimated_cost": estimate.estimated_cost, "factors": estimate.factors }, indent=2) } }] ) except Exception as e: return MCPToolResult( content=[{ "type": "text", "text": f"Failed to get processing estimate: {str(e)}" }], is_error=True ) async def _search_summaries(self, args: Dict[str, Any]) -> MCPToolResult: """Search summaries tool implementation""" try: results = await self.client.search_summaries( query=args["query"], limit=args.get("limit", 10), offset=args.get("offset", 0) ) return MCPToolResult( content=[{ "type": "text", "text": f"Found {len(results.get('results', []))} results for query: {args['query']}", }, { "type": "resource", "resource": { "uri": f"youtube-summarizer://search", "text": json.dumps(results, indent=2, default=str) } }] ) except Exception as e: return MCPToolResult( content=[{ "type": "text", "text": f"Search failed: {str(e)}" }], is_error=True ) async def _export_data(self, args: Dict[str, Any]) -> MCPToolResult: """Export data tool implementation""" try: export_result = await self.client.export_data( format=args.get("format", "json"), date_from=args.get("date_from"), date_to=args.get("date_to") ) return MCPToolResult( content=[{ "type": "text", "text": f"Data export completed in {args.get('format', 'json')} format", }, { "type": "resource", "resource": { "uri": f"youtube-summarizer://export", "text": json.dumps(export_result, indent=2, default=str) } }] ) except Exception as e: return MCPToolResult( content=[{ "type": "text", "text": f"Export failed: {str(e)}" }], is_error=True ) # Resource implementations async def _get_video_resource(self, video_id: str) -> MCPResourceResult: """Get video metadata resource""" try: # This would need API endpoint support return MCPResourceResult( contents=[{ "uri": f"youtube-summarizer://video/{video_id}", "mimeType": "application/json", "text": json.dumps({ "video_id": video_id, "note": "Video metadata endpoint not yet implemented", "timestamp": datetime.now().isoformat() }, indent=2) }] ) except Exception as e: return MCPResourceResult( contents=[{ "uri": f"youtube-summarizer://video/{video_id}", "mimeType": "text/plain", "text": f"Failed to load video resource: {str(e)}" }] ) async def _get_queue_resource(self) -> MCPResourceResult: """Get processing queue resource""" try: stats = await self.client.get_usage_stats() return MCPResourceResult( contents=[{ "uri": "youtube-summarizer://queue", "mimeType": "application/json", "text": json.dumps({ "queue_stats": { "note": "Processing queue details not yet implemented", "usage_stats": { "total_requests": stats.total_requests, "requests_today": stats.requests_today, "requests_this_month": stats.requests_this_month, "success_rate": stats.success_rate, "average_response_time_ms": stats.average_response_time_ms }, "timestamp": datetime.now().isoformat() } }, indent=2) }] ) except Exception as e: return MCPResourceResult( contents=[{ "uri": "youtube-summarizer://queue", "mimeType": "text/plain", "text": f"Failed to load queue resource: {str(e)}" }] ) async def _get_analytics_resource(self, metric_type: Optional[str] = None) -> MCPResourceResult: """Get analytics resource""" try: stats = await self.client.get_usage_stats() analytics_data = { "usage_statistics": { "total_requests": stats.total_requests, "requests_today": stats.requests_today, "requests_this_month": stats.requests_this_month, "success_rate": stats.success_rate, "average_response_time_ms": stats.average_response_time_ms, "rate_limit_remaining": stats.rate_limit_remaining, "quota_reset_time": stats.quota_reset_time.isoformat() }, "metric_type": metric_type, "timestamp": datetime.now().isoformat() } return MCPResourceResult( contents=[{ "uri": f"youtube-summarizer://analytics/{metric_type or ''}", "mimeType": "application/json", "text": json.dumps(analytics_data, indent=2) }] ) except Exception as e: return MCPResourceResult( contents=[{ "uri": f"youtube-summarizer://analytics/{metric_type or ''}", "mimeType": "text/plain", "text": f"Failed to load analytics resource: {str(e)}" }] ) # Convenience function for MCP server creation def create_mcp_interface(api_key: str, **kwargs) -> YouTubeSummarizerMCP: """ Create MCP interface with default client configuration Args: api_key: Your API key **kwargs: Additional client configuration Returns: Configured MCP interface """ from .client import create_client client = create_client(api_key, **kwargs) return YouTubeSummarizerMCP(client)