youtube-summarizer/backend/autonomous/autonomous_controller.py

"""
Autonomous Operation Controller for YouTube Summarizer
Provides intelligent automation, scheduling, and autonomous processing capabilities
"""

import asyncio
import json
import logging
from typing import Any, Dict, List, Optional, Callable, Union
from datetime import datetime, timedelta
from enum import Enum
from dataclasses import dataclass, field
import uuid

from .webhook_system import WebhookEvent, trigger_event

# Import backend services
try:
    from ..services.dual_transcript_service import DualTranscriptService
    from ..services.summary_pipeline import SummaryPipeline
    from ..services.batch_processing_service import BatchProcessingService
    from ..models.transcript import TranscriptSource
    BACKEND_SERVICES_AVAILABLE = True
except ImportError:
    BACKEND_SERVICES_AVAILABLE = False

logger = logging.getLogger(__name__)

class AutomationTrigger(str, Enum):
    """Types of automation triggers"""
    SCHEDULED = "scheduled"          # Time-based scheduling
    EVENT_DRIVEN = "event_driven"    # Triggered by events
    QUEUE_BASED = "queue_based"      # Triggered by queue depth
    THRESHOLD_BASED = "threshold_based"  # Triggered by metrics
    WEBHOOK_TRIGGERED = "webhook_triggered"  # External webhook trigger
    USER_ACTIVITY = "user_activity"  # Based on user patterns

class AutomationAction(str, Enum):
    """Types of automation actions"""
    PROCESS_VIDEO = "process_video"
    BATCH_PROCESS = "batch_process"
    CLEANUP_CACHE = "cleanup_cache"
    GENERATE_REPORT = "generate_report"
    SCALE_RESOURCES = "scale_resources"
    SEND_NOTIFICATION = "send_notification"
    OPTIMIZE_PERFORMANCE = "optimize_performance"
    BACKUP_DATA = "backup_data"

class AutomationStatus(str, Enum):
    """Status of automation rules"""
    ACTIVE = "active"
    INACTIVE = "inactive"
    PAUSED = "paused"
    ERROR = "error"
    COMPLETED = "completed"

@dataclass
class AutomationRule:
    """Defines an automation rule"""
    id: str
    name: str
    description: str
    trigger: AutomationTrigger
    action: AutomationAction
    parameters: Dict[str, Any] = field(default_factory=dict)
    conditions: Dict[str, Any] = field(default_factory=dict)
    status: AutomationStatus = AutomationStatus.ACTIVE
    last_executed: Optional[datetime] = None
    execution_count: int = 0
    success_count: int = 0
    error_count: int = 0
    created_at: datetime = field(default_factory=datetime.now)
    updated_at: datetime = field(default_factory=datetime.now)

@dataclass
class AutomationExecution:
    """Records an automation execution"""
    id: str
    rule_id: str
    started_at: datetime
    completed_at: Optional[datetime] = None
    status: str = "running"
    result: Optional[Dict[str, Any]] = None
    error_message: Optional[str] = None
    context: Dict[str, Any] = field(default_factory=dict)

class AutonomousController:
    """Main controller for autonomous operations"""

    def __init__(self):
        self.rules: Dict[str, AutomationRule] = {}
        self.executions: Dict[str, AutomationExecution] = {}
        self.is_running = False
        self.scheduler_task = None
        self.metrics = {
            "total_executions": 0,
            "successful_executions": 0,
            "failed_executions": 0,
            "average_execution_time": 0.0,
            "rules_processed_today": 0
        }

        # Initialize services
        self._initialize_services()

        # Setup default automation rules
        self._setup_default_rules()

    def _initialize_services(self):
        """Initialize backend services"""
        if BACKEND_SERVICES_AVAILABLE:
            try:
                self.transcript_service = DualTranscriptService()
                self.batch_service = BatchProcessingService()
                # Pipeline service requires dependency injection
                self.pipeline_service = None
            except Exception as e:
                logger.warning(f"Could not initialize services: {e}")
                self.transcript_service = None
                self.batch_service = None
                self.pipeline_service = None
        else:
            self.transcript_service = None
            self.batch_service = None
            self.pipeline_service = None

    def _setup_default_rules(self):
        """Setup default automation rules"""

        # Daily cleanup rule
        self.add_rule(
            name="Daily Cache Cleanup",
            description="Clean up old cache entries daily at 2 AM",
            trigger=AutomationTrigger.SCHEDULED,
            action=AutomationAction.CLEANUP_CACHE,
            parameters={
                "schedule": "0 2 * * *",  # Daily at 2 AM
                "max_age_hours": 24,
                "cleanup_types": ["transcripts", "summaries", "metadata"]
            }
        )

        # Queue depth monitoring
        self.add_rule(
            name="Queue Depth Monitor",
            description="Trigger batch processing when queue exceeds threshold",
            trigger=AutomationTrigger.QUEUE_BASED,
            action=AutomationAction.BATCH_PROCESS,
            parameters={
                "queue_threshold": 10,
                "check_interval_minutes": 5,
                "batch_size": 5
            },
            conditions={
                "min_queue_age_minutes": 10,  # Wait 10 mins before processing
                "max_concurrent_batches": 3
            }
        )

        # Performance optimization
        self.add_rule(
            name="Performance Optimizer",
            description="Optimize performance based on system metrics",
            trigger=AutomationTrigger.THRESHOLD_BASED,
            action=AutomationAction.OPTIMIZE_PERFORMANCE,
            parameters={
                "cpu_threshold": 80,
                "memory_threshold": 85,
                "response_time_threshold": 5.0,
                "check_interval_minutes": 15
            }
        )

        # Daily report generation
        self.add_rule(
            name="Daily Report",
            description="Generate daily usage and performance report",
            trigger=AutomationTrigger.SCHEDULED,
            action=AutomationAction.GENERATE_REPORT,
            parameters={
                "schedule": "0 6 * * *",  # Daily at 6 AM
                "report_types": ["usage", "performance", "errors"],
                "recipients": ["admin"]
            }
        )

        # User activity monitoring
        self.add_rule(
            name="User Activity Monitor",
            description="Monitor user activity patterns and optimize accordingly",
            trigger=AutomationTrigger.USER_ACTIVITY,
            action=AutomationAction.SCALE_RESOURCES,
            parameters={
                "activity_window_hours": 1,
                "scale_threshold": 5,  # 5+ users in window
                "check_interval_minutes": 10
            }
        )

    def add_rule(
        self,
        name: str,
        description: str,
        trigger: AutomationTrigger,
        action: AutomationAction,
        parameters: Optional[Dict[str, Any]] = None,
        conditions: Optional[Dict[str, Any]] = None
    ) -> str:
        """Add a new automation rule"""

        rule_id = str(uuid.uuid4())
        rule = AutomationRule(
            id=rule_id,
            name=name,
            description=description,
            trigger=trigger,
            action=action,
            parameters=parameters or {},
            conditions=conditions or {}
        )

        self.rules[rule_id] = rule
        logger.info(f"Added automation rule: {name} ({rule_id})")
        return rule_id

    def update_rule(self, rule_id: str, **updates) -> bool:
        """Update an automation rule"""
        if rule_id not in self.rules:
            return False

        rule = self.rules[rule_id]
        for key, value in updates.items():
            if hasattr(rule, key):
                setattr(rule, key, value)

        rule.updated_at = datetime.now()
        logger.info(f"Updated automation rule: {rule_id}")
        return True

    def remove_rule(self, rule_id: str) -> bool:
        """Remove an automation rule"""
        if rule_id not in self.rules:
            return False

        rule = self.rules[rule_id]
        del self.rules[rule_id]
        logger.info(f"Removed automation rule: {rule.name} ({rule_id})")
        return True

    def activate_rule(self, rule_id: str) -> bool:
        """Activate an automation rule"""
        return self.update_rule(rule_id, status=AutomationStatus.ACTIVE)

    def deactivate_rule(self, rule_id: str) -> bool:
        """Deactivate an automation rule"""
        return self.update_rule(rule_id, status=AutomationStatus.INACTIVE)

    async def start(self):
        """Start the autonomous controller"""
        if self.is_running:
            logger.warning("Autonomous controller is already running")
            return

        self.is_running = True
        self.scheduler_task = asyncio.create_task(self._scheduler_loop())
        logger.info("Started autonomous controller")

        # Trigger startup event
        await trigger_event(WebhookEvent.SYSTEM_STATUS, {
            "status": "autonomous_controller_started",
            "active_rules": len([r for r in self.rules.values() if r.status == AutomationStatus.ACTIVE]),
            "timestamp": datetime.now().isoformat()
        })

    async def stop(self):
        """Stop the autonomous controller"""
        if not self.is_running:
            return

        self.is_running = False

        if self.scheduler_task:
            self.scheduler_task.cancel()
            try:
                await self.scheduler_task
            except asyncio.CancelledError:
                pass

        logger.info("Stopped autonomous controller")

        # Trigger shutdown event
        await trigger_event(WebhookEvent.SYSTEM_STATUS, {
            "status": "autonomous_controller_stopped",
            "total_executions": self.metrics["total_executions"],
            "timestamp": datetime.now().isoformat()
        })

    async def _scheduler_loop(self):
        """Main scheduler loop"""
        logger.info("Starting autonomous scheduler loop")

        while self.is_running:
            try:
                # Check all active rules
                for rule in self.rules.values():
                    if rule.status != AutomationStatus.ACTIVE:
                        continue

                    # Check if rule should be executed
                    if await self._should_execute_rule(rule):
                        await self._execute_rule(rule)

                # Clean up old executions
                await self._cleanup_old_executions()

                # Wait before next iteration
                await asyncio.sleep(30)  # Check every 30 seconds

            except Exception as e:
                logger.error(f"Error in scheduler loop: {e}")
                await asyncio.sleep(60)  # Longer pause on errors

    async def _should_execute_rule(self, rule: AutomationRule) -> bool:
        """Check if a rule should be executed"""
        try:
            if rule.trigger == AutomationTrigger.SCHEDULED:
                return self._check_schedule(rule)
            elif rule.trigger == AutomationTrigger.QUEUE_BASED:
                return await self._check_queue_conditions(rule)
            elif rule.trigger == AutomationTrigger.THRESHOLD_BASED:
                return await self._check_threshold_conditions(rule)
            elif rule.trigger == AutomationTrigger.USER_ACTIVITY:
                return await self._check_user_activity(rule)
            else:
                return False
        except Exception as e:
            logger.error(f"Error checking rule {rule.id}: {e}")
            return False

    def _check_schedule(self, rule: AutomationRule) -> bool:
        """Check if scheduled rule should execute"""
        # Simple time-based check (would use croniter in production)
        schedule = rule.parameters.get("schedule")
        if not schedule:
            return False

        # For demo, check if we haven't run in the last hour
        if rule.last_executed:
            time_since_last = datetime.now() - rule.last_executed
            return time_since_last > timedelta(hours=1)

        return True

    async def _check_queue_conditions(self, rule: AutomationRule) -> bool:
        """Check queue-based conditions"""
        threshold = rule.parameters.get("queue_threshold", 10)

        # Mock queue check (would connect to real queue in production)
        mock_queue_size = 15  # Simulated queue size

        if mock_queue_size >= threshold:
            # Check additional conditions
            min_age = rule.conditions.get("min_queue_age_minutes", 0)
            max_concurrent = rule.conditions.get("max_concurrent_batches", 5)

            # Mock checks
            queue_age_ok = True  # Would check actual queue age
            concurrent_ok = True  # Would check running batches

            return queue_age_ok and concurrent_ok

        return False

    async def _check_threshold_conditions(self, rule: AutomationRule) -> bool:
        """Check threshold-based conditions"""
        cpu_threshold = rule.parameters.get("cpu_threshold", 80)
        memory_threshold = rule.parameters.get("memory_threshold", 85)
        response_time_threshold = rule.parameters.get("response_time_threshold", 5.0)

        # Mock system metrics (would use real monitoring in production)
        mock_cpu = 75
        mock_memory = 82
        mock_response_time = 4.2

        return (mock_cpu > cpu_threshold or
                mock_memory > memory_threshold or
                mock_response_time > response_time_threshold)

    async def _check_user_activity(self, rule: AutomationRule) -> bool:
        """Check user activity patterns"""
        window_hours = rule.parameters.get("activity_window_hours", 1)
        scale_threshold = rule.parameters.get("scale_threshold", 5)

        # Mock user activity check
        mock_active_users = 7  # Would query real user activity

        return mock_active_users >= scale_threshold

    async def _execute_rule(self, rule: AutomationRule):
        """Execute an automation rule"""
        execution_id = str(uuid.uuid4())
        execution = AutomationExecution(
            id=execution_id,
            rule_id=rule.id,
            started_at=datetime.now()
        )

        self.executions[execution_id] = execution
        logger.info(f"Executing rule: {rule.name} ({rule.id})")

        try:
            # Execute the action
            result = await self._perform_action(rule.action, rule.parameters)

            # Update execution record
            execution.completed_at = datetime.now()
            execution.status = "completed"
            execution.result = result

            # Update rule stats
            rule.last_executed = datetime.now()
            rule.execution_count += 1
            rule.success_count += 1

            # Update system metrics
            self.metrics["total_executions"] += 1
            self.metrics["successful_executions"] += 1

            # Calculate execution time
            if execution.completed_at and execution.started_at:
                execution_time = (execution.completed_at - execution.started_at).total_seconds()
                self._update_average_execution_time(execution_time)

            logger.info(f"Successfully executed rule: {rule.name}")

            # Trigger success webhook
            await trigger_event(WebhookEvent.SYSTEM_STATUS, {
                "event_type": "automation_rule_executed",
                "rule_id": rule.id,
                "rule_name": rule.name,
                "execution_id": execution_id,
                "result": result,
                "timestamp": datetime.now().isoformat()
            })

        except Exception as e:
            # Update execution record
            execution.completed_at = datetime.now()
            execution.status = "failed"
            execution.error_message = str(e)

            # Update rule stats
            rule.error_count += 1

            # Update system metrics
            self.metrics["total_executions"] += 1
            self.metrics["failed_executions"] += 1

            logger.error(f"Failed to execute rule {rule.name}: {e}")

            # Trigger error webhook
            await trigger_event(WebhookEvent.ERROR_OCCURRED, {
                "error_type": "automation_rule_failed",
                "rule_id": rule.id,
                "rule_name": rule.name,
                "execution_id": execution_id,
                "error": str(e),
                "timestamp": datetime.now().isoformat()
            })

    async def _perform_action(self, action: AutomationAction, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """Perform the specified automation action"""

        if action == AutomationAction.CLEANUP_CACHE:
            return await self._cleanup_cache_action(parameters)
        elif action == AutomationAction.BATCH_PROCESS:
            return await self._batch_process_action(parameters)
        elif action == AutomationAction.GENERATE_REPORT:
            return await self._generate_report_action(parameters)
        elif action == AutomationAction.SCALE_RESOURCES:
            return await self._scale_resources_action(parameters)
        elif action == AutomationAction.OPTIMIZE_PERFORMANCE:
            return await self._optimize_performance_action(parameters)
        elif action == AutomationAction.SEND_NOTIFICATION:
            return await self._send_notification_action(parameters)
        elif action == AutomationAction.BACKUP_DATA:
            return await self._backup_data_action(parameters)
        else:
            raise ValueError(f"Unknown action: {action}")

    async def _cleanup_cache_action(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """Perform cache cleanup"""
        max_age_hours = parameters.get("max_age_hours", 24)
        cleanup_types = parameters.get("cleanup_types", ["transcripts", "summaries"])

        # Mock cleanup (would connect to real cache in production)
        cleaned_items = 0
        for cleanup_type in cleanup_types:
            # Simulate cleanup
            items_cleaned = 15  # Mock number
            cleaned_items += items_cleaned
            logger.info(f"Cleaned {items_cleaned} {cleanup_type} cache entries")

        return {
            "action": "cleanup_cache",
            "items_cleaned": cleaned_items,
            "cleanup_types": cleanup_types,
            "max_age_hours": max_age_hours
        }

    async def _batch_process_action(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """Perform batch processing"""
        batch_size = parameters.get("batch_size", 5)

        # Mock batch processing
        mock_video_urls = [
            f"https://youtube.com/watch?v=mock_{i}"
            for i in range(batch_size)
        ]

        if self.batch_service and BACKEND_SERVICES_AVAILABLE:
            # Would use real batch service
            batch_id = f"auto_batch_{int(datetime.now().timestamp())}"
            logger.info(f"Started automated batch processing: {batch_id}")
        else:
            batch_id = f"mock_batch_{int(datetime.now().timestamp())}"

        return {
            "action": "batch_process",
            "batch_id": batch_id,
            "video_count": batch_size,
            "videos": mock_video_urls
        }

    async def _generate_report_action(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """Generate system reports"""
        report_types = parameters.get("report_types", ["usage"])

        reports_generated = []
        for report_type in report_types:
            report_id = f"{report_type}_{datetime.now().strftime('%Y%m%d')}"

            # Mock report generation
            if report_type == "usage":
                report_data = {
                    "total_videos_processed": 145,
                    "total_transcripts": 132,
                    "total_summaries": 98,
                    "active_users": 23
                }
            elif report_type == "performance":
                report_data = {
                    "average_processing_time": 45.2,
                    "success_rate": 0.97,
                    "error_rate": 0.03,
                    "system_uptime": "99.8%"
                }
            elif report_type == "errors":
                report_data = {
                    "total_errors": 12,
                    "critical_errors": 2,
                    "warning_errors": 10,
                    "top_error_types": ["timeout", "api_limit"]
                }
            else:
                report_data = {"message": f"Unknown report type: {report_type}"}

            reports_generated.append({
                "report_id": report_id,
                "type": report_type,
                "data": report_data
            })

        return {
            "action": "generate_report",
            "reports": reports_generated,
            "generated_at": datetime.now().isoformat()
        }

    async def _scale_resources_action(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """Scale system resources"""
        activity_window = parameters.get("activity_window_hours", 1)
        scale_threshold = parameters.get("scale_threshold", 5)

        # Mock resource scaling
        current_capacity = 100  # Mock current capacity
        recommended_capacity = 150  # Mock recommended

        return {
            "action": "scale_resources",
            "current_capacity": current_capacity,
            "recommended_capacity": recommended_capacity,
            "scaling_factor": 1.5,
            "activity_window_hours": activity_window
        }

    async def _optimize_performance_action(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """Optimize system performance"""
        cpu_threshold = parameters.get("cpu_threshold", 80)
        memory_threshold = parameters.get("memory_threshold", 85)

        optimizations = []

        # Mock performance optimizations
        optimizations.append("Enabled connection pooling")
        optimizations.append("Increased cache TTL")
        optimizations.append("Reduced background task frequency")

        return {
            "action": "optimize_performance",
            "optimizations_applied": optimizations,
            "performance_improvement": "15%",
            "resource_usage_reduction": "12%"
        }

    async def _send_notification_action(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """Send notifications"""
        recipients = parameters.get("recipients", ["admin"])
        message = parameters.get("message", "Automated notification")

        # Mock notification sending
        notifications_sent = len(recipients)

        return {
            "action": "send_notification",
            "recipients": recipients,
            "message": message,
            "notifications_sent": notifications_sent
        }

    async def _backup_data_action(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """Backup system data"""
        backup_types = parameters.get("backup_types", ["database", "cache"])

        backups_created = []
        for backup_type in backup_types:
            backup_id = f"{backup_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
            backups_created.append({
                "backup_id": backup_id,
                "type": backup_type,
                "size_mb": 250  # Mock size
            })

        return {
            "action": "backup_data",
            "backups_created": backups_created,
            "total_size_mb": sum(b["size_mb"] for b in backups_created)
        }

    def _update_average_execution_time(self, execution_time: float):
        """Update average execution time"""
        current_avg = self.metrics["average_execution_time"]
        total_executions = self.metrics["total_executions"]

        if total_executions == 1:
            self.metrics["average_execution_time"] = execution_time
        else:
            self.metrics["average_execution_time"] = (
                (current_avg * (total_executions - 1) + execution_time) / total_executions
            )

    async def _cleanup_old_executions(self):
        """Clean up old execution records"""
        cutoff_date = datetime.now() - timedelta(days=7)

        old_executions = [
            exec_id for exec_id, execution in self.executions.items()
            if execution.started_at < cutoff_date and execution.status in ["completed", "failed"]
        ]

        for exec_id in old_executions:
            del self.executions[exec_id]

        if old_executions:
            logger.info(f"Cleaned up {len(old_executions)} old execution records")

    def get_rule_status(self, rule_id: str) -> Optional[Dict[str, Any]]:
        """Get status of a specific rule"""
        if rule_id not in self.rules:
            return None

        rule = self.rules[rule_id]

        return {
            "rule_id": rule.id,
            "name": rule.name,
            "description": rule.description,
            "trigger": rule.trigger,
            "action": rule.action,
            "status": rule.status,
            "last_executed": rule.last_executed.isoformat() if rule.last_executed else None,
            "execution_count": rule.execution_count,
            "success_count": rule.success_count,
            "error_count": rule.error_count,
            "success_rate": rule.success_count / rule.execution_count if rule.execution_count > 0 else 0.0,
            "created_at": rule.created_at.isoformat(),
            "updated_at": rule.updated_at.isoformat()
        }

    def get_system_status(self) -> Dict[str, Any]:
        """Get overall system status"""
        active_rules = len([r for r in self.rules.values() if r.status == AutomationStatus.ACTIVE])
        running_executions = len([e for e in self.executions.values() if e.status == "running"])

        return {
            "controller_status": "running" if self.is_running else "stopped",
            "total_rules": len(self.rules),
            "active_rules": active_rules,
            "running_executions": running_executions,
            "total_executions": self.metrics["total_executions"],
            "successful_executions": self.metrics["successful_executions"],
            "failed_executions": self.metrics["failed_executions"],
            "success_rate": (
                self.metrics["successful_executions"] / self.metrics["total_executions"]
                if self.metrics["total_executions"] > 0 else 0.0
            ),
            "average_execution_time": round(self.metrics["average_execution_time"], 3),
            "rules_processed_today": self.metrics["rules_processed_today"],
            "services_available": BACKEND_SERVICES_AVAILABLE
        }

    def get_execution_history(self, rule_id: Optional[str] = None, limit: int = 50) -> List[Dict[str, Any]]:
        """Get execution history"""
        executions = list(self.executions.values())

        if rule_id:
            executions = [e for e in executions if e.rule_id == rule_id]

        executions.sort(key=lambda x: x.started_at, reverse=True)
        executions = executions[:limit]

        return [
            {
                "execution_id": e.id,
                "rule_id": e.rule_id,
                "started_at": e.started_at.isoformat(),
                "completed_at": e.completed_at.isoformat() if e.completed_at else None,
                "status": e.status,
                "result": e.result,
                "error_message": e.error_message
            }
            for e in executions
        ]

# Global autonomous controller instance
autonomous_controller = AutonomousController()

# Convenience functions

async def start_autonomous_operations():
    """Start autonomous operations"""
    await autonomous_controller.start()

async def stop_autonomous_operations():
    """Stop autonomous operations"""
    await autonomous_controller.stop()

def get_automation_status() -> Dict[str, Any]:
    """Get automation system status"""
    return autonomous_controller.get_system_status()

async def trigger_manual_execution(rule_id: str) -> bool:
    """Manually trigger rule execution"""
    if rule_id not in autonomous_controller.rules:
        return False

    rule = autonomous_controller.rules[rule_id]
    await autonomous_controller._execute_rule(rule)
    return True