youtube-summarizer/backend/autonomous/webhook_system.py

533 lines
20 KiB
Python

"""
Webhook System for YouTube Summarizer
Provides webhook registration, management, and delivery for autonomous operations
"""
import asyncio
import json
import logging
import hmac
import hashlib
import time
from typing import Any, Dict, List, Optional, Callable, Union
from datetime import datetime, timedelta
from enum import Enum
from dataclasses import dataclass, field
from urllib.parse import urlparse
import httpx
from pydantic import BaseModel, HttpUrl, Field
logger = logging.getLogger(__name__)
class WebhookEvent(str, Enum):
"""Supported webhook events"""
TRANSCRIPTION_COMPLETED = "transcription.completed"
TRANSCRIPTION_FAILED = "transcription.failed"
SUMMARIZATION_COMPLETED = "summarization.completed"
SUMMARIZATION_FAILED = "summarization.failed"
BATCH_STARTED = "batch.started"
BATCH_COMPLETED = "batch.completed"
BATCH_FAILED = "batch.failed"
VIDEO_PROCESSED = "video.processed"
ERROR_OCCURRED = "error.occurred"
SYSTEM_STATUS = "system.status"
USER_QUOTA_EXCEEDED = "user.quota_exceeded"
PROCESSING_DELAYED = "processing.delayed"
class WebhookStatus(str, Enum):
"""Webhook delivery status"""
PENDING = "pending"
DELIVERED = "delivered"
FAILED = "failed"
RETRYING = "retrying"
EXPIRED = "expired"
class WebhookSecurityType(str, Enum):
"""Webhook security methods"""
NONE = "none"
HMAC_SHA256 = "hmac_sha256"
BEARER_TOKEN = "bearer_token"
API_KEY_HEADER = "api_key_header"
@dataclass
class WebhookConfig:
"""Webhook configuration"""
url: str
events: List[WebhookEvent]
active: bool = True
security_type: WebhookSecurityType = WebhookSecurityType.HMAC_SHA256
secret: Optional[str] = None
headers: Dict[str, str] = field(default_factory=dict)
timeout_seconds: int = 30
retry_attempts: int = 3
retry_delay_seconds: int = 5
filter_conditions: Optional[Dict[str, Any]] = None
created_at: datetime = field(default_factory=datetime.now)
updated_at: datetime = field(default_factory=datetime.now)
@dataclass
class WebhookDelivery:
"""Webhook delivery record"""
id: str
webhook_id: str
event: WebhookEvent
payload: Dict[str, Any]
status: WebhookStatus = WebhookStatus.PENDING
attempt_count: int = 0
last_attempt_at: Optional[datetime] = None
delivered_at: Optional[datetime] = None
response_status: Optional[int] = None
response_body: Optional[str] = None
error_message: Optional[str] = None
created_at: datetime = field(default_factory=datetime.now)
expires_at: datetime = field(default_factory=lambda: datetime.now() + timedelta(hours=24))
class WebhookPayload(BaseModel):
"""Standard webhook payload structure"""
event: WebhookEvent
timestamp: datetime = Field(default_factory=datetime.now)
webhook_id: str
delivery_id: str
data: Dict[str, Any]
metadata: Dict[str, Any] = Field(default_factory=dict)
class WebhookManager:
"""Manages webhook registration, delivery, and retries"""
def __init__(self):
self.webhooks: Dict[str, WebhookConfig] = {}
self.deliveries: Dict[str, WebhookDelivery] = {}
self.event_handlers: Dict[WebhookEvent, List[Callable]] = {}
self.delivery_queue: asyncio.Queue = asyncio.Queue()
self.is_processing = False
self.stats = {
"total_deliveries": 0,
"successful_deliveries": 0,
"failed_deliveries": 0,
"retry_attempts": 0,
"average_response_time": 0.0
}
# Start background processor
asyncio.create_task(self._process_delivery_queue())
def register_webhook(
self,
webhook_id: str,
url: str,
events: List[WebhookEvent],
security_type: WebhookSecurityType = WebhookSecurityType.HMAC_SHA256,
secret: Optional[str] = None,
**kwargs
) -> bool:
"""Register a new webhook"""
try:
# Validate URL
parsed = urlparse(url)
if not parsed.scheme or not parsed.netloc:
raise ValueError("Invalid webhook URL")
# Generate secret if not provided for HMAC
if security_type == WebhookSecurityType.HMAC_SHA256 and not secret:
secret = self._generate_secret()
config = WebhookConfig(
url=url,
events=events,
security_type=security_type,
secret=secret,
**kwargs
)
self.webhooks[webhook_id] = config
logger.info(f"Registered webhook {webhook_id} for events: {events}")
return True
except Exception as e:
logger.error(f"Failed to register webhook {webhook_id}: {e}")
return False
def unregister_webhook(self, webhook_id: str) -> bool:
"""Unregister a webhook"""
if webhook_id in self.webhooks:
del self.webhooks[webhook_id]
logger.info(f"Unregistered webhook {webhook_id}")
return True
return False
def update_webhook(self, webhook_id: str, **updates) -> bool:
"""Update webhook configuration"""
if webhook_id not in self.webhooks:
return False
config = self.webhooks[webhook_id]
for key, value in updates.items():
if hasattr(config, key):
setattr(config, key, value)
config.updated_at = datetime.now()
logger.info(f"Updated webhook {webhook_id}")
return True
def activate_webhook(self, webhook_id: str) -> bool:
"""Activate a webhook"""
return self.update_webhook(webhook_id, active=True)
def deactivate_webhook(self, webhook_id: str) -> bool:
"""Deactivate a webhook"""
return self.update_webhook(webhook_id, active=False)
async def trigger_event(
self,
event: WebhookEvent,
data: Dict[str, Any],
metadata: Optional[Dict[str, Any]] = None
) -> List[str]:
"""Trigger an event and queue webhook deliveries"""
delivery_ids = []
metadata = metadata or {}
# Find matching webhooks
for webhook_id, config in self.webhooks.items():
if not config.active:
continue
if event not in config.events:
continue
# Apply filters if configured
if config.filter_conditions and not self._matches_filters(data, config.filter_conditions):
continue
# Create delivery
delivery_id = f"delivery_{int(time.time() * 1000)}_{webhook_id}"
delivery = WebhookDelivery(
id=delivery_id,
webhook_id=webhook_id,
event=event,
payload=data
)
self.deliveries[delivery_id] = delivery
delivery_ids.append(delivery_id)
# Queue for processing
await self.delivery_queue.put(delivery_id)
logger.info(f"Triggered event {event} - queued {len(delivery_ids)} deliveries")
return delivery_ids
async def _process_delivery_queue(self):
"""Background processor for webhook deliveries"""
self.is_processing = True
logger.info("Started webhook delivery processor")
while True:
try:
# Get next delivery
delivery_id = await self.delivery_queue.get()
if delivery_id not in self.deliveries:
continue
delivery = self.deliveries[delivery_id]
# Check if expired
if datetime.now() > delivery.expires_at:
delivery.status = WebhookStatus.EXPIRED
logger.warning(f"Delivery {delivery_id} expired")
continue
# Attempt delivery
await self._attempt_delivery(delivery)
except Exception as e:
logger.error(f"Error in delivery processor: {e}")
await asyncio.sleep(1) # Brief pause on errors
async def _attempt_delivery(self, delivery: WebhookDelivery):
"""Attempt to deliver a webhook"""
webhook_id = delivery.webhook_id
if webhook_id not in self.webhooks:
logger.error(f"Webhook {webhook_id} not found for delivery {delivery.id}")
return
config = self.webhooks[webhook_id]
delivery.attempt_count += 1
delivery.last_attempt_at = datetime.now()
delivery.status = WebhookStatus.RETRYING if delivery.attempt_count > 1 else WebhookStatus.PENDING
try:
# Prepare payload
payload = WebhookPayload(
event=delivery.event,
webhook_id=webhook_id,
delivery_id=delivery.id,
data=delivery.payload,
metadata={
"attempt": delivery.attempt_count,
"max_attempts": config.retry_attempts
}
)
# Prepare headers
headers = config.headers.copy()
headers["Content-Type"] = "application/json"
headers["User-Agent"] = "YouTubeSummarizer-Webhook/1.0"
headers["X-Webhook-Event"] = delivery.event.value
headers["X-Webhook-Delivery"] = delivery.id
headers["X-Webhook-Timestamp"] = str(int(payload.timestamp.timestamp()))
# Add security headers
payload_json = payload.json()
if config.security_type == WebhookSecurityType.HMAC_SHA256 and config.secret:
signature = self._create_hmac_signature(payload_json, config.secret)
headers["X-Hub-Signature-256"] = f"sha256={signature}"
elif config.security_type == WebhookSecurityType.BEARER_TOKEN and config.secret:
headers["Authorization"] = f"Bearer {config.secret}"
elif config.security_type == WebhookSecurityType.API_KEY_HEADER and config.secret:
headers["X-API-Key"] = config.secret
# Make HTTP request
start_time = time.time()
async with httpx.AsyncClient(timeout=config.timeout_seconds) as client:
response = await client.post(
config.url,
content=payload_json,
headers=headers
)
response_time = time.time() - start_time
# Update delivery record
delivery.response_status = response.status_code
delivery.response_body = response.text[:1000] # Limit body size
# Check if successful
if 200 <= response.status_code < 300:
delivery.status = WebhookStatus.DELIVERED
delivery.delivered_at = datetime.now()
# Update stats
self.stats["successful_deliveries"] += 1
self._update_average_response_time(response_time)
logger.info(f"Successfully delivered webhook {delivery.id} to {config.url}")
else:
raise httpx.HTTPStatusError(
f"HTTP {response.status_code}",
request=response.request,
response=response
)
except Exception as e:
logger.warning(f"Webhook delivery failed (attempt {delivery.attempt_count}): {e}")
delivery.error_message = str(e)
self.stats["retry_attempts"] += 1
# Check if we should retry
if delivery.attempt_count < config.retry_attempts:
# Schedule retry
retry_delay = config.retry_delay_seconds * (2 ** (delivery.attempt_count - 1)) # Exponential backoff
async def schedule_retry():
await asyncio.sleep(retry_delay)
await self.delivery_queue.put(delivery.id)
asyncio.create_task(schedule_retry())
logger.info(f"Scheduled retry for delivery {delivery.id} in {retry_delay}s")
else:
delivery.status = WebhookStatus.FAILED
self.stats["failed_deliveries"] += 1
logger.error(f"Webhook delivery {delivery.id} permanently failed after {delivery.attempt_count} attempts")
finally:
self.stats["total_deliveries"] += 1
def _create_hmac_signature(self, payload: str, secret: str) -> str:
"""Create HMAC SHA256 signature for payload"""
return hmac.new(
secret.encode('utf-8'),
payload.encode('utf-8'),
hashlib.sha256
).hexdigest()
def _generate_secret(self) -> str:
"""Generate a secure secret for webhook signing"""
import secrets
return secrets.token_urlsafe(32)
def _matches_filters(self, data: Dict[str, Any], filters: Dict[str, Any]) -> bool:
"""Check if data matches filter conditions"""
for key, expected_value in filters.items():
if key not in data:
return False
actual_value = data[key]
# Simple equality check (can be extended for more complex conditions)
if isinstance(expected_value, dict):
# Handle nested conditions
if "$in" in expected_value:
if actual_value not in expected_value["$in"]:
return False
elif "$gt" in expected_value:
if actual_value <= expected_value["$gt"]:
return False
elif "$lt" in expected_value:
if actual_value >= expected_value["$lt"]:
return False
else:
if actual_value != expected_value:
return False
return True
def _update_average_response_time(self, response_time: float):
"""Update rolling average response time"""
current_avg = self.stats["average_response_time"]
successful_count = self.stats["successful_deliveries"]
if successful_count == 1:
self.stats["average_response_time"] = response_time
else:
self.stats["average_response_time"] = (
(current_avg * (successful_count - 1) + response_time) / successful_count
)
def get_webhook_status(self, webhook_id: str) -> Optional[Dict[str, Any]]:
"""Get webhook status and statistics"""
if webhook_id not in self.webhooks:
return None
config = self.webhooks[webhook_id]
# Calculate webhook-specific stats
webhook_deliveries = [d for d in self.deliveries.values() if d.webhook_id == webhook_id]
total = len(webhook_deliveries)
successful = len([d for d in webhook_deliveries if d.status == WebhookStatus.DELIVERED])
failed = len([d for d in webhook_deliveries if d.status == WebhookStatus.FAILED])
pending = len([d for d in webhook_deliveries if d.status in [WebhookStatus.PENDING, WebhookStatus.RETRYING]])
return {
"webhook_id": webhook_id,
"url": config.url,
"events": config.events,
"active": config.active,
"security_type": config.security_type,
"created_at": config.created_at.isoformat(),
"updated_at": config.updated_at.isoformat(),
"statistics": {
"total_deliveries": total,
"successful_deliveries": successful,
"failed_deliveries": failed,
"pending_deliveries": pending,
"success_rate": successful / total if total > 0 else 0.0
},
"recent_deliveries": [
{
"id": d.id,
"event": d.event,
"status": d.status,
"attempt_count": d.attempt_count,
"created_at": d.created_at.isoformat(),
"delivered_at": d.delivered_at.isoformat() if d.delivered_at else None
}
for d in sorted(webhook_deliveries, key=lambda x: x.created_at, reverse=True)[:10]
]
}
def get_delivery_status(self, delivery_id: str) -> Optional[Dict[str, Any]]:
"""Get specific delivery status"""
if delivery_id not in self.deliveries:
return None
delivery = self.deliveries[delivery_id]
return {
"delivery_id": delivery.id,
"webhook_id": delivery.webhook_id,
"event": delivery.event,
"status": delivery.status,
"attempt_count": delivery.attempt_count,
"last_attempt_at": delivery.last_attempt_at.isoformat() if delivery.last_attempt_at else None,
"delivered_at": delivery.delivered_at.isoformat() if delivery.delivered_at else None,
"response_status": delivery.response_status,
"error_message": delivery.error_message,
"created_at": delivery.created_at.isoformat(),
"expires_at": delivery.expires_at.isoformat()
}
def get_system_stats(self) -> Dict[str, Any]:
"""Get overall webhook system statistics"""
active_webhooks = len([w for w in self.webhooks.values() if w.active])
return {
"webhook_manager_status": "running" if self.is_processing else "stopped",
"total_webhooks": len(self.webhooks),
"active_webhooks": active_webhooks,
"total_deliveries": self.stats["total_deliveries"],
"successful_deliveries": self.stats["successful_deliveries"],
"failed_deliveries": self.stats["failed_deliveries"],
"retry_attempts": self.stats["retry_attempts"],
"success_rate": (
self.stats["successful_deliveries"] / self.stats["total_deliveries"]
if self.stats["total_deliveries"] > 0 else 0.0
),
"average_response_time": round(self.stats["average_response_time"], 3),
"queue_size": self.delivery_queue.qsize(),
"pending_deliveries": len([
d for d in self.deliveries.values()
if d.status in [WebhookStatus.PENDING, WebhookStatus.RETRYING]
])
}
def cleanup_old_deliveries(self, days_old: int = 7):
"""Clean up old delivery records"""
cutoff_date = datetime.now() - timedelta(days=days_old)
old_deliveries = [
delivery_id for delivery_id, delivery in self.deliveries.items()
if delivery.created_at < cutoff_date and delivery.status in [
WebhookStatus.DELIVERED, WebhookStatus.FAILED, WebhookStatus.EXPIRED
]
]
for delivery_id in old_deliveries:
del self.deliveries[delivery_id]
logger.info(f"Cleaned up {len(old_deliveries)} old delivery records")
return len(old_deliveries)
# Global webhook manager instance
webhook_manager = WebhookManager()
# Convenience functions for common webhook operations
async def register_webhook(
webhook_id: str,
url: str,
events: List[WebhookEvent],
secret: Optional[str] = None,
**kwargs
) -> bool:
"""Register a webhook with the global manager"""
return webhook_manager.register_webhook(webhook_id, url, events, secret=secret, **kwargs)
async def trigger_event(event: WebhookEvent, data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None) -> List[str]:
"""Trigger an event with the global manager"""
return await webhook_manager.trigger_event(event, data, metadata)
def get_webhook_status(webhook_id: str) -> Optional[Dict[str, Any]]:
"""Get webhook status from global manager"""
return webhook_manager.get_webhook_status(webhook_id)
def get_system_stats() -> Dict[str, Any]:
"""Get system statistics from global manager"""
return webhook_manager.get_system_stats()