youtube-summarizer/sdks/python/youtube_summarizer_sdk/mcp.py

712 lines
28 KiB
Python

"""
YouTube Summarizer MCP (Model Context Protocol) Integration
Provides MCP server connectivity and tool execution for AI development environments
"""
import json
import logging
import asyncio
from typing import Dict, Any, List, Optional, Union
from datetime import datetime
from .models import (
TranscriptRequest, BatchProcessingRequest, JobResponse,
TranscriptResult, DualTranscriptResult, APIUsageStats,
MCPToolRequest, MCPToolResult, MCPResourceRequest, MCPResourceResult
)
from .client import YouTubeSummarizerClient
from .exceptions import YouTubeSummarizerError, ValidationError
logger = logging.getLogger(__name__)
class YouTubeSummarizerMCP:
"""
MCP (Model Context Protocol) interface for YouTube Summarizer
Provides standardized tool and resource access for AI development environments
like Claude Code, enabling seamless integration with MCP-compatible editors.
"""
def __init__(self, client: YouTubeSummarizerClient):
"""
Initialize MCP interface with YouTube Summarizer client
Args:
client: Configured YouTubeSummarizerClient instance
"""
self.client = client
self._tools = self._initialize_tools()
self._resources = self._initialize_resources()
def _initialize_tools(self) -> Dict[str, Dict[str, Any]]:
"""Initialize MCP tool definitions"""
return {
"extract_transcript": {
"description": "Extract transcript from YouTube video with quality options",
"parameters": {
"type": "object",
"properties": {
"video_url": {
"type": "string",
"format": "uri",
"description": "YouTube video URL"
},
"transcript_source": {
"type": "string",
"enum": ["youtube", "whisper", "both"],
"default": "youtube",
"description": "Transcript source preference"
},
"whisper_model_size": {
"type": "string",
"enum": ["tiny", "base", "small", "medium", "large"],
"default": "small",
"description": "Whisper model size for AI transcription"
},
"priority": {
"type": "string",
"enum": ["low", "normal", "high", "urgent"],
"default": "normal",
"description": "Processing priority"
},
"include_quality_analysis": {
"type": "boolean",
"default": True,
"description": "Include transcript quality analysis"
},
"wait_for_completion": {
"type": "boolean",
"default": True,
"description": "Wait for processing to complete before returning"
},
"timeout_seconds": {
"type": "number",
"default": 300,
"description": "Maximum wait time in seconds"
}
},
"required": ["video_url"]
}
},
"batch_process_videos": {
"description": "Process multiple YouTube videos in batch",
"parameters": {
"type": "object",
"properties": {
"video_urls": {
"type": "array",
"items": {"type": "string", "format": "uri"},
"minItems": 1,
"maxItems": 100,
"description": "Array of YouTube video URLs"
},
"batch_name": {
"type": "string",
"description": "Descriptive name for the batch job"
},
"transcript_source": {
"type": "string",
"enum": ["youtube", "whisper", "both"],
"default": "youtube",
"description": "Transcript source for all videos"
},
"parallel_processing": {
"type": "boolean",
"default": False,
"description": "Enable parallel processing"
},
"max_concurrent_jobs": {
"type": "integer",
"minimum": 1,
"maximum": 10,
"default": 3,
"description": "Maximum concurrent jobs"
}
},
"required": ["video_urls", "batch_name"]
}
},
"get_job_status": {
"description": "Get processing job status and progress",
"parameters": {
"type": "object",
"properties": {
"job_id": {
"type": "string",
"description": "Job identifier returned from extract_transcript or batch_process_videos"
}
},
"required": ["job_id"]
}
},
"get_processing_estimate": {
"description": "Get processing time and cost estimate for a video",
"parameters": {
"type": "object",
"properties": {
"video_url": {
"type": "string",
"format": "uri",
"description": "YouTube video URL"
},
"transcript_source": {
"type": "string",
"enum": ["youtube", "whisper", "both"],
"default": "youtube",
"description": "Transcript source preference"
}
},
"required": ["video_url"]
}
},
"search_summaries": {
"description": "Search through processed summaries and transcripts",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query string"
},
"limit": {
"type": "integer",
"minimum": 1,
"maximum": 100,
"default": 10,
"description": "Maximum number of results"
},
"offset": {
"type": "integer",
"minimum": 0,
"default": 0,
"description": "Results offset for pagination"
}
},
"required": ["query"]
}
},
"export_data": {
"description": "Export user data in various formats",
"parameters": {
"type": "object",
"properties": {
"format": {
"type": "string",
"enum": ["json", "csv", "markdown", "pdf"],
"default": "json",
"description": "Export format"
},
"date_from": {
"type": "string",
"format": "date",
"description": "Start date for data export (YYYY-MM-DD)"
},
"date_to": {
"type": "string",
"format": "date",
"description": "End date for data export (YYYY-MM-DD)"
},
"include_transcripts": {
"type": "boolean",
"default": True,
"description": "Include transcript data in export"
}
}
}
}
}
def _initialize_resources(self) -> Dict[str, Dict[str, Any]]:
"""Initialize MCP resource definitions"""
return {
"video-metadata": {
"description": "Get video metadata and processing information",
"uri_template": "youtube-summarizer://video/{video_id}",
"mime_type": "application/json"
},
"processing-queue": {
"description": "View current processing queue and job statistics",
"uri_template": "youtube-summarizer://queue",
"mime_type": "application/json"
},
"analytics-dashboard": {
"description": "Access usage analytics and performance metrics",
"uri_template": "youtube-summarizer://analytics/{metric_type?}",
"mime_type": "application/json"
}
}
async def list_tools(self) -> List[Dict[str, Any]]:
"""
List available MCP tools
Returns:
List of tool definitions
"""
return [
{
"name": name,
"description": tool["description"],
"inputSchema": tool["parameters"]
}
for name, tool in self._tools.items()
]
async def list_resources(self) -> List[Dict[str, Any]]:
"""
List available MCP resources
Returns:
List of resource definitions
"""
return [
{
"name": name,
"description": resource["description"],
"uri": resource["uri_template"],
"mimeType": resource["mime_type"]
}
for name, resource in self._resources.items()
]
async def call_tool(self, request: MCPToolRequest) -> MCPToolResult:
"""
Execute MCP tool
Args:
request: MCP tool request
Returns:
Tool execution result
"""
try:
if request.name == "extract_transcript":
return await self._extract_transcript(request.arguments)
elif request.name == "batch_process_videos":
return await self._batch_process_videos(request.arguments)
elif request.name == "get_job_status":
return await self._get_job_status(request.arguments)
elif request.name == "get_processing_estimate":
return await self._get_processing_estimate(request.arguments)
elif request.name == "search_summaries":
return await self._search_summaries(request.arguments)
elif request.name == "export_data":
return await self._export_data(request.arguments)
else:
return MCPToolResult(
content=[{
"type": "text",
"text": f"Unknown tool: {request.name}"
}],
is_error=True
)
except Exception as e:
logger.error(f"Tool execution error: {e}")
return MCPToolResult(
content=[{
"type": "text",
"text": f"Tool execution failed: {str(e)}"
}],
is_error=True
)
async def read_resource(self, request: MCPResourceRequest) -> MCPResourceResult:
"""
Read MCP resource
Args:
request: MCP resource request
Returns:
Resource content
"""
try:
if request.uri.startswith("youtube-summarizer://video/"):
video_id = request.uri.split("/")[-1]
return await self._get_video_resource(video_id)
elif request.uri == "youtube-summarizer://queue":
return await self._get_queue_resource()
elif request.uri.startswith("youtube-summarizer://analytics"):
metric_type = request.uri.split("/")[-1] if "/" in request.uri else None
return await self._get_analytics_resource(metric_type)
else:
return MCPResourceResult(
contents=[{
"uri": request.uri,
"mimeType": "text/plain",
"text": f"Resource not found: {request.uri}"
}]
)
except Exception as e:
logger.error(f"Resource read error: {e}")
return MCPResourceResult(
contents=[{
"uri": request.uri,
"mimeType": "text/plain",
"text": f"Resource read failed: {str(e)}"
}]
)
# Tool implementations
async def _extract_transcript(self, args: Dict[str, Any]) -> MCPToolResult:
"""Extract transcript tool implementation"""
try:
# Build transcript request
transcript_request = TranscriptRequest(
video_url=args["video_url"],
transcript_source=args.get("transcript_source", "youtube"),
whisper_model_size=args.get("whisper_model_size", "small"),
priority=args.get("priority", "normal"),
include_quality_analysis=args.get("include_quality_analysis", True)
)
# Submit job
job = await self.client.extract_transcript(transcript_request)
# Wait for completion if requested
if args.get("wait_for_completion", True):
timeout = args.get("timeout_seconds", 300)
result = await self.client.wait_for_job(job.job_id, timeout=timeout)
return MCPToolResult(
content=[{
"type": "text",
"text": f"Transcript extraction completed for {args['video_url']}",
}, {
"type": "resource",
"resource": {
"uri": f"youtube-summarizer://video/{result.video_id}",
"text": json.dumps({
"job_id": job.job_id,
"video_url": result.video_url,
"transcript": result.transcript,
"processing_time": result.processing_time_seconds,
"quality_score": result.quality_score,
"confidence_score": result.confidence_score,
"metadata": result.metadata
}, indent=2)
}
}]
)
else:
return MCPToolResult(
content=[{
"type": "text",
"text": f"Transcript extraction started. Job ID: {job.job_id}",
}, {
"type": "resource",
"resource": {
"uri": f"youtube-summarizer://job/{job.job_id}",
"text": json.dumps({
"job_id": job.job_id,
"status": job.status,
"progress": job.progress_percentage,
"estimated_completion": job.estimated_completion.isoformat() if job.estimated_completion else None
}, indent=2)
}
}]
)
except Exception as e:
return MCPToolResult(
content=[{
"type": "text",
"text": f"Transcript extraction failed: {str(e)}"
}],
is_error=True
)
async def _batch_process_videos(self, args: Dict[str, Any]) -> MCPToolResult:
"""Batch process videos tool implementation"""
try:
batch_request = BatchProcessingRequest(
video_urls=args["video_urls"],
batch_name=args["batch_name"],
transcript_source=args.get("transcript_source", "youtube"),
parallel_processing=args.get("parallel_processing", False),
max_concurrent_jobs=args.get("max_concurrent_jobs", 3)
)
batch_job = await self.client.batch_process(batch_request)
return MCPToolResult(
content=[{
"type": "text",
"text": f"Batch processing started: {args['batch_name']}",
}, {
"type": "resource",
"resource": {
"uri": f"youtube-summarizer://batch/{batch_job.batch_id}",
"text": json.dumps({
"batch_id": batch_job.batch_id,
"batch_name": args["batch_name"],
"video_count": batch_job.video_count,
"status": batch_job.status,
"parallel_processing": batch_job.parallel_processing,
"estimated_completion": batch_job.estimated_completion.isoformat() if batch_job.estimated_completion else None
}, indent=2)
}
}]
)
except Exception as e:
return MCPToolResult(
content=[{
"type": "text",
"text": f"Batch processing failed: {str(e)}"
}],
is_error=True
)
async def _get_job_status(self, args: Dict[str, Any]) -> MCPToolResult:
"""Get job status tool implementation"""
try:
job_status = await self.client.get_job_status(args["job_id"])
return MCPToolResult(
content=[{
"type": "text",
"text": f"Job {args['job_id']} status: {job_status.status} ({job_status.progress_percentage}% complete)",
}, {
"type": "resource",
"resource": {
"uri": f"youtube-summarizer://job/{args['job_id']}",
"text": json.dumps({
"job_id": job_status.job_id,
"status": job_status.status,
"priority": job_status.priority,
"progress_percentage": job_status.progress_percentage,
"current_stage": job_status.current_stage,
"created_at": job_status.created_at.isoformat(),
"estimated_completion": job_status.estimated_completion.isoformat() if job_status.estimated_completion else None,
"metadata": job_status.metadata
}, indent=2)
}
}]
)
except Exception as e:
return MCPToolResult(
content=[{
"type": "text",
"text": f"Failed to get job status: {str(e)}"
}],
is_error=True
)
async def _get_processing_estimate(self, args: Dict[str, Any]) -> MCPToolResult:
"""Get processing estimate tool implementation"""
try:
estimate = await self.client.get_processing_estimate(args["video_url"])
return MCPToolResult(
content=[{
"type": "text",
"text": f"Processing estimate for {args['video_url']}: {estimate.estimated_time_seconds:.1f} seconds",
}, {
"type": "resource",
"resource": {
"uri": f"youtube-summarizer://estimate",
"text": json.dumps({
"video_url": args["video_url"],
"transcript_source": args.get("transcript_source", "youtube"),
"estimated_time_seconds": estimate.estimated_time_seconds,
"estimated_cost": estimate.estimated_cost,
"factors": estimate.factors
}, indent=2)
}
}]
)
except Exception as e:
return MCPToolResult(
content=[{
"type": "text",
"text": f"Failed to get processing estimate: {str(e)}"
}],
is_error=True
)
async def _search_summaries(self, args: Dict[str, Any]) -> MCPToolResult:
"""Search summaries tool implementation"""
try:
results = await self.client.search_summaries(
query=args["query"],
limit=args.get("limit", 10),
offset=args.get("offset", 0)
)
return MCPToolResult(
content=[{
"type": "text",
"text": f"Found {len(results.get('results', []))} results for query: {args['query']}",
}, {
"type": "resource",
"resource": {
"uri": f"youtube-summarizer://search",
"text": json.dumps(results, indent=2, default=str)
}
}]
)
except Exception as e:
return MCPToolResult(
content=[{
"type": "text",
"text": f"Search failed: {str(e)}"
}],
is_error=True
)
async def _export_data(self, args: Dict[str, Any]) -> MCPToolResult:
"""Export data tool implementation"""
try:
export_result = await self.client.export_data(
format=args.get("format", "json"),
date_from=args.get("date_from"),
date_to=args.get("date_to")
)
return MCPToolResult(
content=[{
"type": "text",
"text": f"Data export completed in {args.get('format', 'json')} format",
}, {
"type": "resource",
"resource": {
"uri": f"youtube-summarizer://export",
"text": json.dumps(export_result, indent=2, default=str)
}
}]
)
except Exception as e:
return MCPToolResult(
content=[{
"type": "text",
"text": f"Export failed: {str(e)}"
}],
is_error=True
)
# Resource implementations
async def _get_video_resource(self, video_id: str) -> MCPResourceResult:
"""Get video metadata resource"""
try:
# This would need API endpoint support
return MCPResourceResult(
contents=[{
"uri": f"youtube-summarizer://video/{video_id}",
"mimeType": "application/json",
"text": json.dumps({
"video_id": video_id,
"note": "Video metadata endpoint not yet implemented",
"timestamp": datetime.now().isoformat()
}, indent=2)
}]
)
except Exception as e:
return MCPResourceResult(
contents=[{
"uri": f"youtube-summarizer://video/{video_id}",
"mimeType": "text/plain",
"text": f"Failed to load video resource: {str(e)}"
}]
)
async def _get_queue_resource(self) -> MCPResourceResult:
"""Get processing queue resource"""
try:
stats = await self.client.get_usage_stats()
return MCPResourceResult(
contents=[{
"uri": "youtube-summarizer://queue",
"mimeType": "application/json",
"text": json.dumps({
"queue_stats": {
"note": "Processing queue details not yet implemented",
"usage_stats": {
"total_requests": stats.total_requests,
"requests_today": stats.requests_today,
"requests_this_month": stats.requests_this_month,
"success_rate": stats.success_rate,
"average_response_time_ms": stats.average_response_time_ms
},
"timestamp": datetime.now().isoformat()
}
}, indent=2)
}]
)
except Exception as e:
return MCPResourceResult(
contents=[{
"uri": "youtube-summarizer://queue",
"mimeType": "text/plain",
"text": f"Failed to load queue resource: {str(e)}"
}]
)
async def _get_analytics_resource(self, metric_type: Optional[str] = None) -> MCPResourceResult:
"""Get analytics resource"""
try:
stats = await self.client.get_usage_stats()
analytics_data = {
"usage_statistics": {
"total_requests": stats.total_requests,
"requests_today": stats.requests_today,
"requests_this_month": stats.requests_this_month,
"success_rate": stats.success_rate,
"average_response_time_ms": stats.average_response_time_ms,
"rate_limit_remaining": stats.rate_limit_remaining,
"quota_reset_time": stats.quota_reset_time.isoformat()
},
"metric_type": metric_type,
"timestamp": datetime.now().isoformat()
}
return MCPResourceResult(
contents=[{
"uri": f"youtube-summarizer://analytics/{metric_type or ''}",
"mimeType": "application/json",
"text": json.dumps(analytics_data, indent=2)
}]
)
except Exception as e:
return MCPResourceResult(
contents=[{
"uri": f"youtube-summarizer://analytics/{metric_type or ''}",
"mimeType": "text/plain",
"text": f"Failed to load analytics resource: {str(e)}"
}]
)
# Convenience function for MCP server creation
def create_mcp_interface(api_key: str, **kwargs) -> YouTubeSummarizerMCP:
"""
Create MCP interface with default client configuration
Args:
api_key: Your API key
**kwargs: Additional client configuration
Returns:
Configured MCP interface
"""
from .client import create_client
client = create_client(api_key, **kwargs)
return YouTubeSummarizerMCP(client)