""" Webhook System for YouTube Summarizer Provides webhook registration, management, and delivery for autonomous operations """ import asyncio import json import logging import hmac import hashlib import time from typing import Any, Dict, List, Optional, Callable, Union from datetime import datetime, timedelta from enum import Enum from dataclasses import dataclass, field from urllib.parse import urlparse import httpx from pydantic import BaseModel, HttpUrl, Field logger = logging.getLogger(__name__) class WebhookEvent(str, Enum): """Supported webhook events""" TRANSCRIPTION_COMPLETED = "transcription.completed" TRANSCRIPTION_FAILED = "transcription.failed" SUMMARIZATION_COMPLETED = "summarization.completed" SUMMARIZATION_FAILED = "summarization.failed" BATCH_STARTED = "batch.started" BATCH_COMPLETED = "batch.completed" BATCH_FAILED = "batch.failed" VIDEO_PROCESSED = "video.processed" ERROR_OCCURRED = "error.occurred" SYSTEM_STATUS = "system.status" USER_QUOTA_EXCEEDED = "user.quota_exceeded" PROCESSING_DELAYED = "processing.delayed" class WebhookStatus(str, Enum): """Webhook delivery status""" PENDING = "pending" DELIVERED = "delivered" FAILED = "failed" RETRYING = "retrying" EXPIRED = "expired" class WebhookSecurityType(str, Enum): """Webhook security methods""" NONE = "none" HMAC_SHA256 = "hmac_sha256" BEARER_TOKEN = "bearer_token" API_KEY_HEADER = "api_key_header" @dataclass class WebhookConfig: """Webhook configuration""" url: str events: List[WebhookEvent] active: bool = True security_type: WebhookSecurityType = WebhookSecurityType.HMAC_SHA256 secret: Optional[str] = None headers: Dict[str, str] = field(default_factory=dict) timeout_seconds: int = 30 retry_attempts: int = 3 retry_delay_seconds: int = 5 filter_conditions: Optional[Dict[str, Any]] = None created_at: datetime = field(default_factory=datetime.now) updated_at: datetime = field(default_factory=datetime.now) @dataclass class WebhookDelivery: """Webhook delivery record""" id: str webhook_id: str event: WebhookEvent payload: Dict[str, Any] status: WebhookStatus = WebhookStatus.PENDING attempt_count: int = 0 last_attempt_at: Optional[datetime] = None delivered_at: Optional[datetime] = None response_status: Optional[int] = None response_body: Optional[str] = None error_message: Optional[str] = None created_at: datetime = field(default_factory=datetime.now) expires_at: datetime = field(default_factory=lambda: datetime.now() + timedelta(hours=24)) class WebhookPayload(BaseModel): """Standard webhook payload structure""" event: WebhookEvent timestamp: datetime = Field(default_factory=datetime.now) webhook_id: str delivery_id: str data: Dict[str, Any] metadata: Dict[str, Any] = Field(default_factory=dict) class WebhookManager: """Manages webhook registration, delivery, and retries""" def __init__(self): self.webhooks: Dict[str, WebhookConfig] = {} self.deliveries: Dict[str, WebhookDelivery] = {} self.event_handlers: Dict[WebhookEvent, List[Callable]] = {} self.delivery_queue: asyncio.Queue = asyncio.Queue() self.is_processing = False self.stats = { "total_deliveries": 0, "successful_deliveries": 0, "failed_deliveries": 0, "retry_attempts": 0, "average_response_time": 0.0 } # Start background processor asyncio.create_task(self._process_delivery_queue()) def register_webhook( self, webhook_id: str, url: str, events: List[WebhookEvent], security_type: WebhookSecurityType = WebhookSecurityType.HMAC_SHA256, secret: Optional[str] = None, **kwargs ) -> bool: """Register a new webhook""" try: # Validate URL parsed = urlparse(url) if not parsed.scheme or not parsed.netloc: raise ValueError("Invalid webhook URL") # Generate secret if not provided for HMAC if security_type == WebhookSecurityType.HMAC_SHA256 and not secret: secret = self._generate_secret() config = WebhookConfig( url=url, events=events, security_type=security_type, secret=secret, **kwargs ) self.webhooks[webhook_id] = config logger.info(f"Registered webhook {webhook_id} for events: {events}") return True except Exception as e: logger.error(f"Failed to register webhook {webhook_id}: {e}") return False def unregister_webhook(self, webhook_id: str) -> bool: """Unregister a webhook""" if webhook_id in self.webhooks: del self.webhooks[webhook_id] logger.info(f"Unregistered webhook {webhook_id}") return True return False def update_webhook(self, webhook_id: str, **updates) -> bool: """Update webhook configuration""" if webhook_id not in self.webhooks: return False config = self.webhooks[webhook_id] for key, value in updates.items(): if hasattr(config, key): setattr(config, key, value) config.updated_at = datetime.now() logger.info(f"Updated webhook {webhook_id}") return True def activate_webhook(self, webhook_id: str) -> bool: """Activate a webhook""" return self.update_webhook(webhook_id, active=True) def deactivate_webhook(self, webhook_id: str) -> bool: """Deactivate a webhook""" return self.update_webhook(webhook_id, active=False) async def trigger_event( self, event: WebhookEvent, data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None ) -> List[str]: """Trigger an event and queue webhook deliveries""" delivery_ids = [] metadata = metadata or {} # Find matching webhooks for webhook_id, config in self.webhooks.items(): if not config.active: continue if event not in config.events: continue # Apply filters if configured if config.filter_conditions and not self._matches_filters(data, config.filter_conditions): continue # Create delivery delivery_id = f"delivery_{int(time.time() * 1000)}_{webhook_id}" delivery = WebhookDelivery( id=delivery_id, webhook_id=webhook_id, event=event, payload=data ) self.deliveries[delivery_id] = delivery delivery_ids.append(delivery_id) # Queue for processing await self.delivery_queue.put(delivery_id) logger.info(f"Triggered event {event} - queued {len(delivery_ids)} deliveries") return delivery_ids async def _process_delivery_queue(self): """Background processor for webhook deliveries""" self.is_processing = True logger.info("Started webhook delivery processor") while True: try: # Get next delivery delivery_id = await self.delivery_queue.get() if delivery_id not in self.deliveries: continue delivery = self.deliveries[delivery_id] # Check if expired if datetime.now() > delivery.expires_at: delivery.status = WebhookStatus.EXPIRED logger.warning(f"Delivery {delivery_id} expired") continue # Attempt delivery await self._attempt_delivery(delivery) except Exception as e: logger.error(f"Error in delivery processor: {e}") await asyncio.sleep(1) # Brief pause on errors async def _attempt_delivery(self, delivery: WebhookDelivery): """Attempt to deliver a webhook""" webhook_id = delivery.webhook_id if webhook_id not in self.webhooks: logger.error(f"Webhook {webhook_id} not found for delivery {delivery.id}") return config = self.webhooks[webhook_id] delivery.attempt_count += 1 delivery.last_attempt_at = datetime.now() delivery.status = WebhookStatus.RETRYING if delivery.attempt_count > 1 else WebhookStatus.PENDING try: # Prepare payload payload = WebhookPayload( event=delivery.event, webhook_id=webhook_id, delivery_id=delivery.id, data=delivery.payload, metadata={ "attempt": delivery.attempt_count, "max_attempts": config.retry_attempts } ) # Prepare headers headers = config.headers.copy() headers["Content-Type"] = "application/json" headers["User-Agent"] = "YouTubeSummarizer-Webhook/1.0" headers["X-Webhook-Event"] = delivery.event.value headers["X-Webhook-Delivery"] = delivery.id headers["X-Webhook-Timestamp"] = str(int(payload.timestamp.timestamp())) # Add security headers payload_json = payload.json() if config.security_type == WebhookSecurityType.HMAC_SHA256 and config.secret: signature = self._create_hmac_signature(payload_json, config.secret) headers["X-Hub-Signature-256"] = f"sha256={signature}" elif config.security_type == WebhookSecurityType.BEARER_TOKEN and config.secret: headers["Authorization"] = f"Bearer {config.secret}" elif config.security_type == WebhookSecurityType.API_KEY_HEADER and config.secret: headers["X-API-Key"] = config.secret # Make HTTP request start_time = time.time() async with httpx.AsyncClient(timeout=config.timeout_seconds) as client: response = await client.post( config.url, content=payload_json, headers=headers ) response_time = time.time() - start_time # Update delivery record delivery.response_status = response.status_code delivery.response_body = response.text[:1000] # Limit body size # Check if successful if 200 <= response.status_code < 300: delivery.status = WebhookStatus.DELIVERED delivery.delivered_at = datetime.now() # Update stats self.stats["successful_deliveries"] += 1 self._update_average_response_time(response_time) logger.info(f"Successfully delivered webhook {delivery.id} to {config.url}") else: raise httpx.HTTPStatusError( f"HTTP {response.status_code}", request=response.request, response=response ) except Exception as e: logger.warning(f"Webhook delivery failed (attempt {delivery.attempt_count}): {e}") delivery.error_message = str(e) self.stats["retry_attempts"] += 1 # Check if we should retry if delivery.attempt_count < config.retry_attempts: # Schedule retry retry_delay = config.retry_delay_seconds * (2 ** (delivery.attempt_count - 1)) # Exponential backoff async def schedule_retry(): await asyncio.sleep(retry_delay) await self.delivery_queue.put(delivery.id) asyncio.create_task(schedule_retry()) logger.info(f"Scheduled retry for delivery {delivery.id} in {retry_delay}s") else: delivery.status = WebhookStatus.FAILED self.stats["failed_deliveries"] += 1 logger.error(f"Webhook delivery {delivery.id} permanently failed after {delivery.attempt_count} attempts") finally: self.stats["total_deliveries"] += 1 def _create_hmac_signature(self, payload: str, secret: str) -> str: """Create HMAC SHA256 signature for payload""" return hmac.new( secret.encode('utf-8'), payload.encode('utf-8'), hashlib.sha256 ).hexdigest() def _generate_secret(self) -> str: """Generate a secure secret for webhook signing""" import secrets return secrets.token_urlsafe(32) def _matches_filters(self, data: Dict[str, Any], filters: Dict[str, Any]) -> bool: """Check if data matches filter conditions""" for key, expected_value in filters.items(): if key not in data: return False actual_value = data[key] # Simple equality check (can be extended for more complex conditions) if isinstance(expected_value, dict): # Handle nested conditions if "$in" in expected_value: if actual_value not in expected_value["$in"]: return False elif "$gt" in expected_value: if actual_value <= expected_value["$gt"]: return False elif "$lt" in expected_value: if actual_value >= expected_value["$lt"]: return False else: if actual_value != expected_value: return False return True def _update_average_response_time(self, response_time: float): """Update rolling average response time""" current_avg = self.stats["average_response_time"] successful_count = self.stats["successful_deliveries"] if successful_count == 1: self.stats["average_response_time"] = response_time else: self.stats["average_response_time"] = ( (current_avg * (successful_count - 1) + response_time) / successful_count ) def get_webhook_status(self, webhook_id: str) -> Optional[Dict[str, Any]]: """Get webhook status and statistics""" if webhook_id not in self.webhooks: return None config = self.webhooks[webhook_id] # Calculate webhook-specific stats webhook_deliveries = [d for d in self.deliveries.values() if d.webhook_id == webhook_id] total = len(webhook_deliveries) successful = len([d for d in webhook_deliveries if d.status == WebhookStatus.DELIVERED]) failed = len([d for d in webhook_deliveries if d.status == WebhookStatus.FAILED]) pending = len([d for d in webhook_deliveries if d.status in [WebhookStatus.PENDING, WebhookStatus.RETRYING]]) return { "webhook_id": webhook_id, "url": config.url, "events": config.events, "active": config.active, "security_type": config.security_type, "created_at": config.created_at.isoformat(), "updated_at": config.updated_at.isoformat(), "statistics": { "total_deliveries": total, "successful_deliveries": successful, "failed_deliveries": failed, "pending_deliveries": pending, "success_rate": successful / total if total > 0 else 0.0 }, "recent_deliveries": [ { "id": d.id, "event": d.event, "status": d.status, "attempt_count": d.attempt_count, "created_at": d.created_at.isoformat(), "delivered_at": d.delivered_at.isoformat() if d.delivered_at else None } for d in sorted(webhook_deliveries, key=lambda x: x.created_at, reverse=True)[:10] ] } def get_delivery_status(self, delivery_id: str) -> Optional[Dict[str, Any]]: """Get specific delivery status""" if delivery_id not in self.deliveries: return None delivery = self.deliveries[delivery_id] return { "delivery_id": delivery.id, "webhook_id": delivery.webhook_id, "event": delivery.event, "status": delivery.status, "attempt_count": delivery.attempt_count, "last_attempt_at": delivery.last_attempt_at.isoformat() if delivery.last_attempt_at else None, "delivered_at": delivery.delivered_at.isoformat() if delivery.delivered_at else None, "response_status": delivery.response_status, "error_message": delivery.error_message, "created_at": delivery.created_at.isoformat(), "expires_at": delivery.expires_at.isoformat() } def get_system_stats(self) -> Dict[str, Any]: """Get overall webhook system statistics""" active_webhooks = len([w for w in self.webhooks.values() if w.active]) return { "webhook_manager_status": "running" if self.is_processing else "stopped", "total_webhooks": len(self.webhooks), "active_webhooks": active_webhooks, "total_deliveries": self.stats["total_deliveries"], "successful_deliveries": self.stats["successful_deliveries"], "failed_deliveries": self.stats["failed_deliveries"], "retry_attempts": self.stats["retry_attempts"], "success_rate": ( self.stats["successful_deliveries"] / self.stats["total_deliveries"] if self.stats["total_deliveries"] > 0 else 0.0 ), "average_response_time": round(self.stats["average_response_time"], 3), "queue_size": self.delivery_queue.qsize(), "pending_deliveries": len([ d for d in self.deliveries.values() if d.status in [WebhookStatus.PENDING, WebhookStatus.RETRYING] ]) } def cleanup_old_deliveries(self, days_old: int = 7): """Clean up old delivery records""" cutoff_date = datetime.now() - timedelta(days=days_old) old_deliveries = [ delivery_id for delivery_id, delivery in self.deliveries.items() if delivery.created_at < cutoff_date and delivery.status in [ WebhookStatus.DELIVERED, WebhookStatus.FAILED, WebhookStatus.EXPIRED ] ] for delivery_id in old_deliveries: del self.deliveries[delivery_id] logger.info(f"Cleaned up {len(old_deliveries)} old delivery records") return len(old_deliveries) # Global webhook manager instance webhook_manager = WebhookManager() # Convenience functions for common webhook operations async def register_webhook( webhook_id: str, url: str, events: List[WebhookEvent], secret: Optional[str] = None, **kwargs ) -> bool: """Register a webhook with the global manager""" return webhook_manager.register_webhook(webhook_id, url, events, secret=secret, **kwargs) async def trigger_event(event: WebhookEvent, data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None) -> List[str]: """Trigger an event with the global manager""" return await webhook_manager.trigger_event(event, data, metadata) def get_webhook_status(webhook_id: str) -> Optional[Dict[str, Any]]: """Get webhook status from global manager""" return webhook_manager.get_webhook_status(webhook_id) def get_system_stats() -> Dict[str, Any]: """Get system statistics from global manager""" return webhook_manager.get_system_stats()