533 lines
20 KiB
Python
533 lines
20 KiB
Python
"""
|
|
Webhook System for YouTube Summarizer
|
|
Provides webhook registration, management, and delivery for autonomous operations
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import hmac
|
|
import hashlib
|
|
import time
|
|
from typing import Any, Dict, List, Optional, Callable, Union
|
|
from datetime import datetime, timedelta
|
|
from enum import Enum
|
|
from dataclasses import dataclass, field
|
|
from urllib.parse import urlparse
|
|
|
|
import httpx
|
|
from pydantic import BaseModel, HttpUrl, Field
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class WebhookEvent(str, Enum):
|
|
"""Supported webhook events"""
|
|
TRANSCRIPTION_COMPLETED = "transcription.completed"
|
|
TRANSCRIPTION_FAILED = "transcription.failed"
|
|
SUMMARIZATION_COMPLETED = "summarization.completed"
|
|
SUMMARIZATION_FAILED = "summarization.failed"
|
|
BATCH_STARTED = "batch.started"
|
|
BATCH_COMPLETED = "batch.completed"
|
|
BATCH_FAILED = "batch.failed"
|
|
VIDEO_PROCESSED = "video.processed"
|
|
ERROR_OCCURRED = "error.occurred"
|
|
SYSTEM_STATUS = "system.status"
|
|
USER_QUOTA_EXCEEDED = "user.quota_exceeded"
|
|
PROCESSING_DELAYED = "processing.delayed"
|
|
|
|
class WebhookStatus(str, Enum):
|
|
"""Webhook delivery status"""
|
|
PENDING = "pending"
|
|
DELIVERED = "delivered"
|
|
FAILED = "failed"
|
|
RETRYING = "retrying"
|
|
EXPIRED = "expired"
|
|
|
|
class WebhookSecurityType(str, Enum):
|
|
"""Webhook security methods"""
|
|
NONE = "none"
|
|
HMAC_SHA256 = "hmac_sha256"
|
|
BEARER_TOKEN = "bearer_token"
|
|
API_KEY_HEADER = "api_key_header"
|
|
|
|
@dataclass
|
|
class WebhookConfig:
|
|
"""Webhook configuration"""
|
|
url: str
|
|
events: List[WebhookEvent]
|
|
active: bool = True
|
|
security_type: WebhookSecurityType = WebhookSecurityType.HMAC_SHA256
|
|
secret: Optional[str] = None
|
|
headers: Dict[str, str] = field(default_factory=dict)
|
|
timeout_seconds: int = 30
|
|
retry_attempts: int = 3
|
|
retry_delay_seconds: int = 5
|
|
filter_conditions: Optional[Dict[str, Any]] = None
|
|
created_at: datetime = field(default_factory=datetime.now)
|
|
updated_at: datetime = field(default_factory=datetime.now)
|
|
|
|
@dataclass
|
|
class WebhookDelivery:
|
|
"""Webhook delivery record"""
|
|
id: str
|
|
webhook_id: str
|
|
event: WebhookEvent
|
|
payload: Dict[str, Any]
|
|
status: WebhookStatus = WebhookStatus.PENDING
|
|
attempt_count: int = 0
|
|
last_attempt_at: Optional[datetime] = None
|
|
delivered_at: Optional[datetime] = None
|
|
response_status: Optional[int] = None
|
|
response_body: Optional[str] = None
|
|
error_message: Optional[str] = None
|
|
created_at: datetime = field(default_factory=datetime.now)
|
|
expires_at: datetime = field(default_factory=lambda: datetime.now() + timedelta(hours=24))
|
|
|
|
class WebhookPayload(BaseModel):
|
|
"""Standard webhook payload structure"""
|
|
event: WebhookEvent
|
|
timestamp: datetime = Field(default_factory=datetime.now)
|
|
webhook_id: str
|
|
delivery_id: str
|
|
data: Dict[str, Any]
|
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
|
|
class WebhookManager:
|
|
"""Manages webhook registration, delivery, and retries"""
|
|
|
|
def __init__(self):
|
|
self.webhooks: Dict[str, WebhookConfig] = {}
|
|
self.deliveries: Dict[str, WebhookDelivery] = {}
|
|
self.event_handlers: Dict[WebhookEvent, List[Callable]] = {}
|
|
self.delivery_queue: asyncio.Queue = asyncio.Queue()
|
|
self.is_processing = False
|
|
self.stats = {
|
|
"total_deliveries": 0,
|
|
"successful_deliveries": 0,
|
|
"failed_deliveries": 0,
|
|
"retry_attempts": 0,
|
|
"average_response_time": 0.0
|
|
}
|
|
|
|
# Start background processor
|
|
asyncio.create_task(self._process_delivery_queue())
|
|
|
|
def register_webhook(
|
|
self,
|
|
webhook_id: str,
|
|
url: str,
|
|
events: List[WebhookEvent],
|
|
security_type: WebhookSecurityType = WebhookSecurityType.HMAC_SHA256,
|
|
secret: Optional[str] = None,
|
|
**kwargs
|
|
) -> bool:
|
|
"""Register a new webhook"""
|
|
try:
|
|
# Validate URL
|
|
parsed = urlparse(url)
|
|
if not parsed.scheme or not parsed.netloc:
|
|
raise ValueError("Invalid webhook URL")
|
|
|
|
# Generate secret if not provided for HMAC
|
|
if security_type == WebhookSecurityType.HMAC_SHA256 and not secret:
|
|
secret = self._generate_secret()
|
|
|
|
config = WebhookConfig(
|
|
url=url,
|
|
events=events,
|
|
security_type=security_type,
|
|
secret=secret,
|
|
**kwargs
|
|
)
|
|
|
|
self.webhooks[webhook_id] = config
|
|
logger.info(f"Registered webhook {webhook_id} for events: {events}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to register webhook {webhook_id}: {e}")
|
|
return False
|
|
|
|
def unregister_webhook(self, webhook_id: str) -> bool:
|
|
"""Unregister a webhook"""
|
|
if webhook_id in self.webhooks:
|
|
del self.webhooks[webhook_id]
|
|
logger.info(f"Unregistered webhook {webhook_id}")
|
|
return True
|
|
return False
|
|
|
|
def update_webhook(self, webhook_id: str, **updates) -> bool:
|
|
"""Update webhook configuration"""
|
|
if webhook_id not in self.webhooks:
|
|
return False
|
|
|
|
config = self.webhooks[webhook_id]
|
|
for key, value in updates.items():
|
|
if hasattr(config, key):
|
|
setattr(config, key, value)
|
|
|
|
config.updated_at = datetime.now()
|
|
logger.info(f"Updated webhook {webhook_id}")
|
|
return True
|
|
|
|
def activate_webhook(self, webhook_id: str) -> bool:
|
|
"""Activate a webhook"""
|
|
return self.update_webhook(webhook_id, active=True)
|
|
|
|
def deactivate_webhook(self, webhook_id: str) -> bool:
|
|
"""Deactivate a webhook"""
|
|
return self.update_webhook(webhook_id, active=False)
|
|
|
|
async def trigger_event(
|
|
self,
|
|
event: WebhookEvent,
|
|
data: Dict[str, Any],
|
|
metadata: Optional[Dict[str, Any]] = None
|
|
) -> List[str]:
|
|
"""Trigger an event and queue webhook deliveries"""
|
|
delivery_ids = []
|
|
metadata = metadata or {}
|
|
|
|
# Find matching webhooks
|
|
for webhook_id, config in self.webhooks.items():
|
|
if not config.active:
|
|
continue
|
|
|
|
if event not in config.events:
|
|
continue
|
|
|
|
# Apply filters if configured
|
|
if config.filter_conditions and not self._matches_filters(data, config.filter_conditions):
|
|
continue
|
|
|
|
# Create delivery
|
|
delivery_id = f"delivery_{int(time.time() * 1000)}_{webhook_id}"
|
|
delivery = WebhookDelivery(
|
|
id=delivery_id,
|
|
webhook_id=webhook_id,
|
|
event=event,
|
|
payload=data
|
|
)
|
|
|
|
self.deliveries[delivery_id] = delivery
|
|
delivery_ids.append(delivery_id)
|
|
|
|
# Queue for processing
|
|
await self.delivery_queue.put(delivery_id)
|
|
|
|
logger.info(f"Triggered event {event} - queued {len(delivery_ids)} deliveries")
|
|
return delivery_ids
|
|
|
|
async def _process_delivery_queue(self):
|
|
"""Background processor for webhook deliveries"""
|
|
self.is_processing = True
|
|
logger.info("Started webhook delivery processor")
|
|
|
|
while True:
|
|
try:
|
|
# Get next delivery
|
|
delivery_id = await self.delivery_queue.get()
|
|
|
|
if delivery_id not in self.deliveries:
|
|
continue
|
|
|
|
delivery = self.deliveries[delivery_id]
|
|
|
|
# Check if expired
|
|
if datetime.now() > delivery.expires_at:
|
|
delivery.status = WebhookStatus.EXPIRED
|
|
logger.warning(f"Delivery {delivery_id} expired")
|
|
continue
|
|
|
|
# Attempt delivery
|
|
await self._attempt_delivery(delivery)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in delivery processor: {e}")
|
|
await asyncio.sleep(1) # Brief pause on errors
|
|
|
|
async def _attempt_delivery(self, delivery: WebhookDelivery):
|
|
"""Attempt to deliver a webhook"""
|
|
webhook_id = delivery.webhook_id
|
|
|
|
if webhook_id not in self.webhooks:
|
|
logger.error(f"Webhook {webhook_id} not found for delivery {delivery.id}")
|
|
return
|
|
|
|
config = self.webhooks[webhook_id]
|
|
delivery.attempt_count += 1
|
|
delivery.last_attempt_at = datetime.now()
|
|
delivery.status = WebhookStatus.RETRYING if delivery.attempt_count > 1 else WebhookStatus.PENDING
|
|
|
|
try:
|
|
# Prepare payload
|
|
payload = WebhookPayload(
|
|
event=delivery.event,
|
|
webhook_id=webhook_id,
|
|
delivery_id=delivery.id,
|
|
data=delivery.payload,
|
|
metadata={
|
|
"attempt": delivery.attempt_count,
|
|
"max_attempts": config.retry_attempts
|
|
}
|
|
)
|
|
|
|
# Prepare headers
|
|
headers = config.headers.copy()
|
|
headers["Content-Type"] = "application/json"
|
|
headers["User-Agent"] = "YouTubeSummarizer-Webhook/1.0"
|
|
headers["X-Webhook-Event"] = delivery.event.value
|
|
headers["X-Webhook-Delivery"] = delivery.id
|
|
headers["X-Webhook-Timestamp"] = str(int(payload.timestamp.timestamp()))
|
|
|
|
# Add security headers
|
|
payload_json = payload.json()
|
|
if config.security_type == WebhookSecurityType.HMAC_SHA256 and config.secret:
|
|
signature = self._create_hmac_signature(payload_json, config.secret)
|
|
headers["X-Hub-Signature-256"] = f"sha256={signature}"
|
|
elif config.security_type == WebhookSecurityType.BEARER_TOKEN and config.secret:
|
|
headers["Authorization"] = f"Bearer {config.secret}"
|
|
elif config.security_type == WebhookSecurityType.API_KEY_HEADER and config.secret:
|
|
headers["X-API-Key"] = config.secret
|
|
|
|
# Make HTTP request
|
|
start_time = time.time()
|
|
async with httpx.AsyncClient(timeout=config.timeout_seconds) as client:
|
|
response = await client.post(
|
|
config.url,
|
|
content=payload_json,
|
|
headers=headers
|
|
)
|
|
|
|
response_time = time.time() - start_time
|
|
|
|
# Update delivery record
|
|
delivery.response_status = response.status_code
|
|
delivery.response_body = response.text[:1000] # Limit body size
|
|
|
|
# Check if successful
|
|
if 200 <= response.status_code < 300:
|
|
delivery.status = WebhookStatus.DELIVERED
|
|
delivery.delivered_at = datetime.now()
|
|
|
|
# Update stats
|
|
self.stats["successful_deliveries"] += 1
|
|
self._update_average_response_time(response_time)
|
|
|
|
logger.info(f"Successfully delivered webhook {delivery.id} to {config.url}")
|
|
|
|
else:
|
|
raise httpx.HTTPStatusError(
|
|
f"HTTP {response.status_code}",
|
|
request=response.request,
|
|
response=response
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Webhook delivery failed (attempt {delivery.attempt_count}): {e}")
|
|
|
|
delivery.error_message = str(e)
|
|
self.stats["retry_attempts"] += 1
|
|
|
|
# Check if we should retry
|
|
if delivery.attempt_count < config.retry_attempts:
|
|
# Schedule retry
|
|
retry_delay = config.retry_delay_seconds * (2 ** (delivery.attempt_count - 1)) # Exponential backoff
|
|
|
|
async def schedule_retry():
|
|
await asyncio.sleep(retry_delay)
|
|
await self.delivery_queue.put(delivery.id)
|
|
|
|
asyncio.create_task(schedule_retry())
|
|
logger.info(f"Scheduled retry for delivery {delivery.id} in {retry_delay}s")
|
|
|
|
else:
|
|
delivery.status = WebhookStatus.FAILED
|
|
self.stats["failed_deliveries"] += 1
|
|
logger.error(f"Webhook delivery {delivery.id} permanently failed after {delivery.attempt_count} attempts")
|
|
|
|
finally:
|
|
self.stats["total_deliveries"] += 1
|
|
|
|
def _create_hmac_signature(self, payload: str, secret: str) -> str:
|
|
"""Create HMAC SHA256 signature for payload"""
|
|
return hmac.new(
|
|
secret.encode('utf-8'),
|
|
payload.encode('utf-8'),
|
|
hashlib.sha256
|
|
).hexdigest()
|
|
|
|
def _generate_secret(self) -> str:
|
|
"""Generate a secure secret for webhook signing"""
|
|
import secrets
|
|
return secrets.token_urlsafe(32)
|
|
|
|
def _matches_filters(self, data: Dict[str, Any], filters: Dict[str, Any]) -> bool:
|
|
"""Check if data matches filter conditions"""
|
|
for key, expected_value in filters.items():
|
|
if key not in data:
|
|
return False
|
|
|
|
actual_value = data[key]
|
|
|
|
# Simple equality check (can be extended for more complex conditions)
|
|
if isinstance(expected_value, dict):
|
|
# Handle nested conditions
|
|
if "$in" in expected_value:
|
|
if actual_value not in expected_value["$in"]:
|
|
return False
|
|
elif "$gt" in expected_value:
|
|
if actual_value <= expected_value["$gt"]:
|
|
return False
|
|
elif "$lt" in expected_value:
|
|
if actual_value >= expected_value["$lt"]:
|
|
return False
|
|
else:
|
|
if actual_value != expected_value:
|
|
return False
|
|
|
|
return True
|
|
|
|
def _update_average_response_time(self, response_time: float):
|
|
"""Update rolling average response time"""
|
|
current_avg = self.stats["average_response_time"]
|
|
successful_count = self.stats["successful_deliveries"]
|
|
|
|
if successful_count == 1:
|
|
self.stats["average_response_time"] = response_time
|
|
else:
|
|
self.stats["average_response_time"] = (
|
|
(current_avg * (successful_count - 1) + response_time) / successful_count
|
|
)
|
|
|
|
def get_webhook_status(self, webhook_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Get webhook status and statistics"""
|
|
if webhook_id not in self.webhooks:
|
|
return None
|
|
|
|
config = self.webhooks[webhook_id]
|
|
|
|
# Calculate webhook-specific stats
|
|
webhook_deliveries = [d for d in self.deliveries.values() if d.webhook_id == webhook_id]
|
|
|
|
total = len(webhook_deliveries)
|
|
successful = len([d for d in webhook_deliveries if d.status == WebhookStatus.DELIVERED])
|
|
failed = len([d for d in webhook_deliveries if d.status == WebhookStatus.FAILED])
|
|
pending = len([d for d in webhook_deliveries if d.status in [WebhookStatus.PENDING, WebhookStatus.RETRYING]])
|
|
|
|
return {
|
|
"webhook_id": webhook_id,
|
|
"url": config.url,
|
|
"events": config.events,
|
|
"active": config.active,
|
|
"security_type": config.security_type,
|
|
"created_at": config.created_at.isoformat(),
|
|
"updated_at": config.updated_at.isoformat(),
|
|
"statistics": {
|
|
"total_deliveries": total,
|
|
"successful_deliveries": successful,
|
|
"failed_deliveries": failed,
|
|
"pending_deliveries": pending,
|
|
"success_rate": successful / total if total > 0 else 0.0
|
|
},
|
|
"recent_deliveries": [
|
|
{
|
|
"id": d.id,
|
|
"event": d.event,
|
|
"status": d.status,
|
|
"attempt_count": d.attempt_count,
|
|
"created_at": d.created_at.isoformat(),
|
|
"delivered_at": d.delivered_at.isoformat() if d.delivered_at else None
|
|
}
|
|
for d in sorted(webhook_deliveries, key=lambda x: x.created_at, reverse=True)[:10]
|
|
]
|
|
}
|
|
|
|
def get_delivery_status(self, delivery_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Get specific delivery status"""
|
|
if delivery_id not in self.deliveries:
|
|
return None
|
|
|
|
delivery = self.deliveries[delivery_id]
|
|
|
|
return {
|
|
"delivery_id": delivery.id,
|
|
"webhook_id": delivery.webhook_id,
|
|
"event": delivery.event,
|
|
"status": delivery.status,
|
|
"attempt_count": delivery.attempt_count,
|
|
"last_attempt_at": delivery.last_attempt_at.isoformat() if delivery.last_attempt_at else None,
|
|
"delivered_at": delivery.delivered_at.isoformat() if delivery.delivered_at else None,
|
|
"response_status": delivery.response_status,
|
|
"error_message": delivery.error_message,
|
|
"created_at": delivery.created_at.isoformat(),
|
|
"expires_at": delivery.expires_at.isoformat()
|
|
}
|
|
|
|
def get_system_stats(self) -> Dict[str, Any]:
|
|
"""Get overall webhook system statistics"""
|
|
active_webhooks = len([w for w in self.webhooks.values() if w.active])
|
|
|
|
return {
|
|
"webhook_manager_status": "running" if self.is_processing else "stopped",
|
|
"total_webhooks": len(self.webhooks),
|
|
"active_webhooks": active_webhooks,
|
|
"total_deliveries": self.stats["total_deliveries"],
|
|
"successful_deliveries": self.stats["successful_deliveries"],
|
|
"failed_deliveries": self.stats["failed_deliveries"],
|
|
"retry_attempts": self.stats["retry_attempts"],
|
|
"success_rate": (
|
|
self.stats["successful_deliveries"] / self.stats["total_deliveries"]
|
|
if self.stats["total_deliveries"] > 0 else 0.0
|
|
),
|
|
"average_response_time": round(self.stats["average_response_time"], 3),
|
|
"queue_size": self.delivery_queue.qsize(),
|
|
"pending_deliveries": len([
|
|
d for d in self.deliveries.values()
|
|
if d.status in [WebhookStatus.PENDING, WebhookStatus.RETRYING]
|
|
])
|
|
}
|
|
|
|
def cleanup_old_deliveries(self, days_old: int = 7):
|
|
"""Clean up old delivery records"""
|
|
cutoff_date = datetime.now() - timedelta(days=days_old)
|
|
|
|
old_deliveries = [
|
|
delivery_id for delivery_id, delivery in self.deliveries.items()
|
|
if delivery.created_at < cutoff_date and delivery.status in [
|
|
WebhookStatus.DELIVERED, WebhookStatus.FAILED, WebhookStatus.EXPIRED
|
|
]
|
|
]
|
|
|
|
for delivery_id in old_deliveries:
|
|
del self.deliveries[delivery_id]
|
|
|
|
logger.info(f"Cleaned up {len(old_deliveries)} old delivery records")
|
|
return len(old_deliveries)
|
|
|
|
# Global webhook manager instance
|
|
webhook_manager = WebhookManager()
|
|
|
|
# Convenience functions for common webhook operations
|
|
|
|
async def register_webhook(
|
|
webhook_id: str,
|
|
url: str,
|
|
events: List[WebhookEvent],
|
|
secret: Optional[str] = None,
|
|
**kwargs
|
|
) -> bool:
|
|
"""Register a webhook with the global manager"""
|
|
return webhook_manager.register_webhook(webhook_id, url, events, secret=secret, **kwargs)
|
|
|
|
async def trigger_event(event: WebhookEvent, data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None) -> List[str]:
|
|
"""Trigger an event with the global manager"""
|
|
return await webhook_manager.trigger_event(event, data, metadata)
|
|
|
|
def get_webhook_status(webhook_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Get webhook status from global manager"""
|
|
return webhook_manager.get_webhook_status(webhook_id)
|
|
|
|
def get_system_stats() -> Dict[str, Any]:
|
|
"""Get system statistics from global manager"""
|
|
return webhook_manager.get_system_stats() |