commit 955c57da03b8fa9ea971073364b14a49d3e804a5 Author: S Date: Thu Aug 14 00:44:46 2025 -0400 Initial commit - YouTube Automation Tools diff --git a/.cursor/mcp.json b/.cursor/mcp.json new file mode 100644 index 0000000..b157908 --- /dev/null +++ b/.cursor/mcp.json @@ -0,0 +1,19 @@ +{ + "mcpServers": { + "task-master-ai": { + "command": "npx", + "args": ["-y", "--package=task-master-ai", "task-master-ai"], + "env": { + "ANTHROPIC_API_KEY": "YOUR_ANTHROPIC_API_KEY_HERE", + "PERPLEXITY_API_KEY": "YOUR_PERPLEXITY_API_KEY_HERE", + "OPENAI_API_KEY": "YOUR_OPENAI_KEY_HERE", + "GOOGLE_API_KEY": "YOUR_GOOGLE_KEY_HERE", + "XAI_API_KEY": "YOUR_XAI_KEY_HERE", + "OPENROUTER_API_KEY": "YOUR_OPENROUTER_KEY_HERE", + "MISTRAL_API_KEY": "YOUR_MISTRAL_KEY_HERE", + "AZURE_OPENAI_API_KEY": "YOUR_AZURE_KEY_HERE", + "OLLAMA_API_KEY": "YOUR_OLLAMA_API_KEY_HERE" + } + } + } +} diff --git a/.cursor/rules/cursor_rules.mdc b/.cursor/rules/cursor_rules.mdc new file mode 100644 index 0000000..7dfae3d --- /dev/null +++ b/.cursor/rules/cursor_rules.mdc @@ -0,0 +1,53 @@ +--- +description: Guidelines for creating and maintaining Cursor rules to ensure consistency and effectiveness. +globs: .cursor/rules/*.mdc +alwaysApply: true +--- + +- **Required Rule Structure:** + ```markdown + --- + description: Clear, one-line description of what the rule enforces + globs: path/to/files/*.ext, other/path/**/* + alwaysApply: boolean + --- + + - **Main Points in Bold** + - Sub-points with details + - Examples and explanations + ``` + +- **File References:** + - Use `[filename](mdc:path/to/file)` ([filename](mdc:filename)) to reference files + - Example: [prisma.mdc](mdc:.cursor/rules/prisma.mdc) for rule references + - Example: [schema.prisma](mdc:prisma/schema.prisma) for code references + +- **Code Examples:** + - Use language-specific code blocks + ```typescript + // ✅ DO: Show good examples + const goodExample = true; + + // ❌ DON'T: Show anti-patterns + const badExample = false; + ``` + +- **Rule Content Guidelines:** + - Start with high-level overview + - Include specific, actionable requirements + - Show examples of correct implementation + - Reference existing code when possible + - Keep rules DRY by referencing other rules + +- **Rule Maintenance:** + - Update rules when new patterns emerge + - Add examples from actual codebase + - Remove outdated patterns + - Cross-reference related rules + +- **Best Practices:** + - Use bullet points for clarity + - Keep descriptions concise + - Include both DO and DON'T examples + - Reference actual code over theoretical examples + - Use consistent formatting across rules \ No newline at end of file diff --git a/.cursor/rules/self_improve.mdc b/.cursor/rules/self_improve.mdc new file mode 100644 index 0000000..40b31b6 --- /dev/null +++ b/.cursor/rules/self_improve.mdc @@ -0,0 +1,72 @@ +--- +description: Guidelines for continuously improving Cursor rules based on emerging code patterns and best practices. +globs: **/* +alwaysApply: true +--- + +- **Rule Improvement Triggers:** + - New code patterns not covered by existing rules + - Repeated similar implementations across files + - Common error patterns that could be prevented + - New libraries or tools being used consistently + - Emerging best practices in the codebase + +- **Analysis Process:** + - Compare new code with existing rules + - Identify patterns that should be standardized + - Look for references to external documentation + - Check for consistent error handling patterns + - Monitor test patterns and coverage + +- **Rule Updates:** + - **Add New Rules When:** + - A new technology/pattern is used in 3+ files + - Common bugs could be prevented by a rule + - Code reviews repeatedly mention the same feedback + - New security or performance patterns emerge + + - **Modify Existing Rules When:** + - Better examples exist in the codebase + - Additional edge cases are discovered + - Related rules have been updated + - Implementation details have changed + +- **Example Pattern Recognition:** + ```typescript + // If you see repeated patterns like: + const data = await prisma.user.findMany({ + select: { id: true, email: true }, + where: { status: 'ACTIVE' } + }); + + // Consider adding to [prisma.mdc](mdc:.cursor/rules/prisma.mdc): + // - Standard select fields + // - Common where conditions + // - Performance optimization patterns + ``` + +- **Rule Quality Checks:** + - Rules should be actionable and specific + - Examples should come from actual code + - References should be up to date + - Patterns should be consistently enforced + +- **Continuous Improvement:** + - Monitor code review comments + - Track common development questions + - Update rules after major refactors + - Add links to relevant documentation + - Cross-reference related rules + +- **Rule Deprecation:** + - Mark outdated patterns as deprecated + - Remove rules that no longer apply + - Update references to deprecated rules + - Document migration paths for old patterns + +- **Documentation Updates:** + - Keep examples synchronized with code + - Update references to external docs + - Maintain links between related rules + - Document breaking changes +Follow [cursor_rules.mdc](mdc:.cursor/rules/cursor_rules.mdc) for proper rule formatting and structure. diff --git a/.cursor/rules/taskmaster/dev_workflow.mdc b/.cursor/rules/taskmaster/dev_workflow.mdc new file mode 100644 index 0000000..84dd906 --- /dev/null +++ b/.cursor/rules/taskmaster/dev_workflow.mdc @@ -0,0 +1,424 @@ +--- +description: Guide for using Taskmaster to manage task-driven development workflows +globs: **/* +alwaysApply: true +--- + +# Taskmaster Development Workflow + +This guide outlines the standard process for using Taskmaster to manage software development projects. It is written as a set of instructions for you, the AI agent. + +- **Your Default Stance**: For most projects, the user can work directly within the `master` task context. Your initial actions should operate on this default context unless a clear pattern for multi-context work emerges. +- **Your Goal**: Your role is to elevate the user's workflow by intelligently introducing advanced features like **Tagged Task Lists** when you detect the appropriate context. Do not force tags on the user; suggest them as a helpful solution to a specific need. + +## The Basic Loop +The fundamental development cycle you will facilitate is: +1. **`list`**: Show the user what needs to be done. +2. **`next`**: Help the user decide what to work on. +3. **`show `**: Provide details for a specific task. +4. **`expand `**: Break down a complex task into smaller, manageable subtasks. +5. **Implement**: The user writes the code and tests. +6. **`update-subtask`**: Log progress and findings on behalf of the user. +7. **`set-status`**: Mark tasks and subtasks as `done` as work is completed. +8. **Repeat**. + +All your standard command executions should operate on the user's current task context, which defaults to `master`. + +--- + +## Standard Development Workflow Process + +### Simple Workflow (Default Starting Point) + +For new projects or when users are getting started, operate within the `master` tag context: + +- Start new projects by running `initialize_project` tool / `task-master init` or `parse_prd` / `task-master parse-prd --input=''` (see @`taskmaster.mdc`) to generate initial tasks.json with tagged structure +- Configure rule sets during initialization with `--rules` flag (e.g., `task-master init --rules cursor,windsurf`) or manage them later with `task-master rules add/remove` commands +- Begin coding sessions with `get_tasks` / `task-master list` (see @`taskmaster.mdc`) to see current tasks, status, and IDs +- Determine the next task to work on using `next_task` / `task-master next` (see @`taskmaster.mdc`) +- Analyze task complexity with `analyze_project_complexity` / `task-master analyze-complexity --research` (see @`taskmaster.mdc`) before breaking down tasks +- Review complexity report using `complexity_report` / `task-master complexity-report` (see @`taskmaster.mdc`) +- Select tasks based on dependencies (all marked 'done'), priority level, and ID order +- View specific task details using `get_task` / `task-master show ` (see @`taskmaster.mdc`) to understand implementation requirements +- Break down complex tasks using `expand_task` / `task-master expand --id= --force --research` (see @`taskmaster.mdc`) with appropriate flags like `--force` (to replace existing subtasks) and `--research` +- Implement code following task details, dependencies, and project standards +- Mark completed tasks with `set_task_status` / `task-master set-status --id= --status=done` (see @`taskmaster.mdc`) +- Update dependent tasks when implementation differs from original plan using `update` / `task-master update --from= --prompt="..."` or `update_task` / `task-master update-task --id= --prompt="..."` (see @`taskmaster.mdc`) + +--- + +## Leveling Up: Agent-Led Multi-Context Workflows + +While the basic workflow is powerful, your primary opportunity to add value is by identifying when to introduce **Tagged Task Lists**. These patterns are your tools for creating a more organized and efficient development environment for the user, especially if you detect agentic or parallel development happening across the same session. + +**Critical Principle**: Most users should never see a difference in their experience. Only introduce advanced workflows when you detect clear indicators that the project has evolved beyond simple task management. + +### When to Introduce Tags: Your Decision Patterns + +Here are the patterns to look for. When you detect one, you should propose the corresponding workflow to the user. + +#### Pattern 1: Simple Git Feature Branching +This is the most common and direct use case for tags. + +- **Trigger**: The user creates a new git branch (e.g., `git checkout -b feature/user-auth`). +- **Your Action**: Propose creating a new tag that mirrors the branch name to isolate the feature's tasks from `master`. +- **Your Suggested Prompt**: *"I see you've created a new branch named 'feature/user-auth'. To keep all related tasks neatly organized and separate from your main list, I can create a corresponding task tag for you. This helps prevent merge conflicts in your `tasks.json` file later. Shall I create the 'feature-user-auth' tag?"* +- **Tool to Use**: `task-master add-tag --from-branch` + +#### Pattern 2: Team Collaboration +- **Trigger**: The user mentions working with teammates (e.g., "My teammate Alice is handling the database schema," or "I need to review Bob's work on the API."). +- **Your Action**: Suggest creating a separate tag for the user's work to prevent conflicts with shared master context. +- **Your Suggested Prompt**: *"Since you're working with Alice, I can create a separate task context for your work to avoid conflicts. This way, Alice can continue working with the master list while you have your own isolated context. When you're ready to merge your work, we can coordinate the tasks back to master. Shall I create a tag for your current work?"* +- **Tool to Use**: `task-master add-tag my-work --copy-from-current --description="My tasks while collaborating with Alice"` + +#### Pattern 3: Experiments or Risky Refactors +- **Trigger**: The user wants to try something that might not be kept (e.g., "I want to experiment with switching our state management library," or "Let's refactor the old API module, but I want to keep the current tasks as a reference."). +- **Your Action**: Propose creating a sandboxed tag for the experimental work. +- **Your Suggested Prompt**: *"This sounds like a great experiment. To keep these new tasks separate from our main plan, I can create a temporary 'experiment-zustand' tag for this work. If we decide not to proceed, we can simply delete the tag without affecting the main task list. Sound good?"* +- **Tool to Use**: `task-master add-tag experiment-zustand --description="Exploring Zustand migration"` + +#### Pattern 4: Large Feature Initiatives (PRD-Driven) +This is a more structured approach for significant new features or epics. + +- **Trigger**: The user describes a large, multi-step feature that would benefit from a formal plan. +- **Your Action**: Propose a comprehensive, PRD-driven workflow. +- **Your Suggested Prompt**: *"This sounds like a significant new feature. To manage this effectively, I suggest we create a dedicated task context for it. Here's the plan: I'll create a new tag called 'feature-xyz', then we can draft a Product Requirements Document (PRD) together to scope the work. Once the PRD is ready, I'll automatically generate all the necessary tasks within that new tag. How does that sound?"* +- **Your Implementation Flow**: + 1. **Create an empty tag**: `task-master add-tag feature-xyz --description "Tasks for the new XYZ feature"`. You can also start by creating a git branch if applicable, and then create the tag from that branch. + 2. **Collaborate & Create PRD**: Work with the user to create a detailed PRD file (e.g., `.taskmaster/docs/feature-xyz-prd.txt`). + 3. **Parse PRD into the new tag**: `task-master parse-prd .taskmaster/docs/feature-xyz-prd.txt --tag feature-xyz` + 4. **Prepare the new task list**: Follow up by suggesting `analyze-complexity` and `expand-all` for the newly created tasks within the `feature-xyz` tag. + +#### Pattern 5: Version-Based Development +Tailor your approach based on the project maturity indicated by tag names. + +- **Prototype/MVP Tags** (`prototype`, `mvp`, `poc`, `v0.x`): + - **Your Approach**: Focus on speed and functionality over perfection + - **Task Generation**: Create tasks that emphasize "get it working" over "get it perfect" + - **Complexity Level**: Lower complexity, fewer subtasks, more direct implementation paths + - **Research Prompts**: Include context like "This is a prototype - prioritize speed and basic functionality over optimization" + - **Example Prompt Addition**: *"Since this is for the MVP, I'll focus on tasks that get core functionality working quickly rather than over-engineering."* + +- **Production/Mature Tags** (`v1.0+`, `production`, `stable`): + - **Your Approach**: Emphasize robustness, testing, and maintainability + - **Task Generation**: Include comprehensive error handling, testing, documentation, and optimization + - **Complexity Level**: Higher complexity, more detailed subtasks, thorough implementation paths + - **Research Prompts**: Include context like "This is for production - prioritize reliability, performance, and maintainability" + - **Example Prompt Addition**: *"Since this is for production, I'll ensure tasks include proper error handling, testing, and documentation."* + +### Advanced Workflow (Tag-Based & PRD-Driven) + +**When to Transition**: Recognize when the project has evolved (or has initiated a project which existing code) beyond simple task management. Look for these indicators: +- User mentions teammates or collaboration needs +- Project has grown to 15+ tasks with mixed priorities +- User creates feature branches or mentions major initiatives +- User initializes Taskmaster on an existing, complex codebase +- User describes large features that would benefit from dedicated planning + +**Your Role in Transition**: Guide the user to a more sophisticated workflow that leverages tags for organization and PRDs for comprehensive planning. + +#### Master List Strategy (High-Value Focus) +Once you transition to tag-based workflows, the `master` tag should ideally contain only: +- **High-level deliverables** that provide significant business value +- **Major milestones** and epic-level features +- **Critical infrastructure** work that affects the entire project +- **Release-blocking** items + +**What NOT to put in master**: +- Detailed implementation subtasks (these go in feature-specific tags' parent tasks) +- Refactoring work (create dedicated tags like `refactor-auth`) +- Experimental features (use `experiment-*` tags) +- Team member-specific tasks (use person-specific tags) + +#### PRD-Driven Feature Development + +**For New Major Features**: +1. **Identify the Initiative**: When user describes a significant feature +2. **Create Dedicated Tag**: `add_tag feature-[name] --description="[Feature description]"` +3. **Collaborative PRD Creation**: Work with user to create comprehensive PRD in `.taskmaster/docs/feature-[name]-prd.txt` +4. **Parse & Prepare**: + - `parse_prd .taskmaster/docs/feature-[name]-prd.txt --tag=feature-[name]` + - `analyze_project_complexity --tag=feature-[name] --research` + - `expand_all --tag=feature-[name] --research` +5. **Add Master Reference**: Create a high-level task in `master` that references the feature tag + +**For Existing Codebase Analysis**: +When users initialize Taskmaster on existing projects: +1. **Codebase Discovery**: Use your native tools for producing deep context about the code base. You may use `research` tool with `--tree` and `--files` to collect up to date information using the existing architecture as context. +2. **Collaborative Assessment**: Work with user to identify improvement areas, technical debt, or new features +3. **Strategic PRD Creation**: Co-author PRDs that include: + - Current state analysis (based on your codebase research) + - Proposed improvements or new features + - Implementation strategy considering existing code +4. **Tag-Based Organization**: Parse PRDs into appropriate tags (`refactor-api`, `feature-dashboard`, `tech-debt`, etc.) +5. **Master List Curation**: Keep only the most valuable initiatives in master + +The parse-prd's `--append` flag enables the user to parse multiple PRDs within tags or across tags. PRDs should be focused and the number of tasks they are parsed into should be strategically chosen relative to the PRD's complexity and level of detail. + +### Workflow Transition Examples + +**Example 1: Simple → Team-Based** +``` +User: "Alice is going to help with the API work" +Your Response: "Great! To avoid conflicts, I'll create a separate task context for your work. Alice can continue with the master list while you work in your own context. When you're ready to merge, we can coordinate the tasks back together." +Action: add_tag my-api-work --copy-from-current --description="My API tasks while collaborating with Alice" +``` + +**Example 2: Simple → PRD-Driven** +``` +User: "I want to add a complete user dashboard with analytics, user management, and reporting" +Your Response: "This sounds like a major feature that would benefit from detailed planning. Let me create a dedicated context for this work and we can draft a PRD together to ensure we capture all requirements." +Actions: +1. add_tag feature-dashboard --description="User dashboard with analytics and management" +2. Collaborate on PRD creation +3. parse_prd dashboard-prd.txt --tag=feature-dashboard +4. Add high-level "User Dashboard" task to master +``` + +**Example 3: Existing Project → Strategic Planning** +``` +User: "I just initialized Taskmaster on my existing React app. It's getting messy and I want to improve it." +Your Response: "Let me research your codebase to understand the current architecture, then we can create a strategic plan for improvements." +Actions: +1. research "Current React app architecture and improvement opportunities" --tree --files=src/ +2. Collaborate on improvement PRD based on findings +3. Create tags for different improvement areas (refactor-components, improve-state-management, etc.) +4. Keep only major improvement initiatives in master +``` + +--- + +## Primary Interaction: MCP Server vs. CLI + +Taskmaster offers two primary ways to interact: + +1. **MCP Server (Recommended for Integrated Tools)**: + - For AI agents and integrated development environments (like Cursor), interacting via the **MCP server is the preferred method**. + - The MCP server exposes Taskmaster functionality through a set of tools (e.g., `get_tasks`, `add_subtask`). + - This method offers better performance, structured data exchange, and richer error handling compared to CLI parsing. + - Refer to @`mcp.mdc` for details on the MCP architecture and available tools. + - A comprehensive list and description of MCP tools and their corresponding CLI commands can be found in @`taskmaster.mdc`. + - **Restart the MCP server** if core logic in `scripts/modules` or MCP tool/direct function definitions change. + - **Note**: MCP tools fully support tagged task lists with complete tag management capabilities. + +2. **`task-master` CLI (For Users & Fallback)**: + - The global `task-master` command provides a user-friendly interface for direct terminal interaction. + - It can also serve as a fallback if the MCP server is inaccessible or a specific function isn't exposed via MCP. + - Install globally with `npm install -g task-master-ai` or use locally via `npx task-master-ai ...`. + - The CLI commands often mirror the MCP tools (e.g., `task-master list` corresponds to `get_tasks`). + - Refer to @`taskmaster.mdc` for a detailed command reference. + - **Tagged Task Lists**: CLI fully supports the new tagged system with seamless migration. + +## How the Tag System Works (For Your Reference) + +- **Data Structure**: Tasks are organized into separate contexts (tags) like "master", "feature-branch", or "v2.0". +- **Silent Migration**: Existing projects automatically migrate to use a "master" tag with zero disruption. +- **Context Isolation**: Tasks in different tags are completely separate. Changes in one tag do not affect any other tag. +- **Manual Control**: The user is always in control. There is no automatic switching. You facilitate switching by using `use-tag `. +- **Full CLI & MCP Support**: All tag management commands are available through both the CLI and MCP tools for you to use. Refer to @`taskmaster.mdc` for a full command list. + +--- + +## Task Complexity Analysis + +- Run `analyze_project_complexity` / `task-master analyze-complexity --research` (see @`taskmaster.mdc`) for comprehensive analysis +- Review complexity report via `complexity_report` / `task-master complexity-report` (see @`taskmaster.mdc`) for a formatted, readable version. +- Focus on tasks with highest complexity scores (8-10) for detailed breakdown +- Use analysis results to determine appropriate subtask allocation +- Note that reports are automatically used by the `expand_task` tool/command + +## Task Breakdown Process + +- Use `expand_task` / `task-master expand --id=`. It automatically uses the complexity report if found, otherwise generates default number of subtasks. +- Use `--num=` to specify an explicit number of subtasks, overriding defaults or complexity report recommendations. +- Add `--research` flag to leverage Perplexity AI for research-backed expansion. +- Add `--force` flag to clear existing subtasks before generating new ones (default is to append). +- Use `--prompt=""` to provide additional context when needed. +- Review and adjust generated subtasks as necessary. +- Use `expand_all` tool or `task-master expand --all` to expand multiple pending tasks at once, respecting flags like `--force` and `--research`. +- If subtasks need complete replacement (regardless of the `--force` flag on `expand`), clear them first with `clear_subtasks` / `task-master clear-subtasks --id=`. + +## Implementation Drift Handling + +- When implementation differs significantly from planned approach +- When future tasks need modification due to current implementation choices +- When new dependencies or requirements emerge +- Use `update` / `task-master update --from= --prompt='\nUpdate context...' --research` to update multiple future tasks. +- Use `update_task` / `task-master update-task --id= --prompt='\nUpdate context...' --research` to update a single specific task. + +## Task Status Management + +- Use 'pending' for tasks ready to be worked on +- Use 'done' for completed and verified tasks +- Use 'deferred' for postponed tasks +- Add custom status values as needed for project-specific workflows + +## Task Structure Fields + +- **id**: Unique identifier for the task (Example: `1`, `1.1`) +- **title**: Brief, descriptive title (Example: `"Initialize Repo"`) +- **description**: Concise summary of what the task involves (Example: `"Create a new repository, set up initial structure."`) +- **status**: Current state of the task (Example: `"pending"`, `"done"`, `"deferred"`) +- **dependencies**: IDs of prerequisite tasks (Example: `[1, 2.1]`) + - Dependencies are displayed with status indicators (✅ for completed, ⏱️ for pending) + - This helps quickly identify which prerequisite tasks are blocking work +- **priority**: Importance level (Example: `"high"`, `"medium"`, `"low"`) +- **details**: In-depth implementation instructions (Example: `"Use GitHub client ID/secret, handle callback, set session token."`) +- **testStrategy**: Verification approach (Example: `"Deploy and call endpoint to confirm 'Hello World' response."`) +- **subtasks**: List of smaller, more specific tasks (Example: `[{"id": 1, "title": "Configure OAuth", ...}]`) +- Refer to task structure details (previously linked to `tasks.mdc`). + +## Configuration Management (Updated) + +Taskmaster configuration is managed through two main mechanisms: + +1. **`.taskmaster/config.json` File (Primary):** + * Located in the project root directory. + * Stores most configuration settings: AI model selections (main, research, fallback), parameters (max tokens, temperature), logging level, default subtasks/priority, project name, etc. + * **Tagged System Settings**: Includes `global.defaultTag` (defaults to "master") and `tags` section for tag management configuration. + * **Managed via `task-master models --setup` command.** Do not edit manually unless you know what you are doing. + * **View/Set specific models via `task-master models` command or `models` MCP tool.** + * Created automatically when you run `task-master models --setup` for the first time or during tagged system migration. + +2. **Environment Variables (`.env` / `mcp.json`):** + * Used **only** for sensitive API keys and specific endpoint URLs. + * Place API keys (one per provider) in a `.env` file in the project root for CLI usage. + * For MCP/Cursor integration, configure these keys in the `env` section of `.cursor/mcp.json`. + * Available keys/variables: See `assets/env.example` or the Configuration section in the command reference (previously linked to `taskmaster.mdc`). + +3. **`.taskmaster/state.json` File (Tagged System State):** + * Tracks current tag context and migration status. + * Automatically created during tagged system migration. + * Contains: `currentTag`, `lastSwitched`, `migrationNoticeShown`. + +**Important:** Non-API key settings (like model selections, `MAX_TOKENS`, `TASKMASTER_LOG_LEVEL`) are **no longer configured via environment variables**. Use the `task-master models` command (or `--setup` for interactive configuration) or the `models` MCP tool. +**If AI commands FAIL in MCP** verify that the API key for the selected provider is present in the `env` section of `.cursor/mcp.json`. +**If AI commands FAIL in CLI** verify that the API key for the selected provider is present in the `.env` file in the root of the project. + +## Rules Management + +Taskmaster supports multiple AI coding assistant rule sets that can be configured during project initialization or managed afterward: + +- **Available Profiles**: Claude Code, Cline, Codex, Cursor, Roo Code, Trae, Windsurf (claude, cline, codex, cursor, roo, trae, windsurf) +- **During Initialization**: Use `task-master init --rules cursor,windsurf` to specify which rule sets to include +- **After Initialization**: Use `task-master rules add ` or `task-master rules remove ` to manage rule sets +- **Interactive Setup**: Use `task-master rules setup` to launch an interactive prompt for selecting rule profiles +- **Default Behavior**: If no `--rules` flag is specified during initialization, all available rule profiles are included +- **Rule Structure**: Each profile creates its own directory (e.g., `.cursor/rules`, `.roo/rules`) with appropriate configuration files + +## Determining the Next Task + +- Run `next_task` / `task-master next` to show the next task to work on. +- The command identifies tasks with all dependencies satisfied +- Tasks are prioritized by priority level, dependency count, and ID +- The command shows comprehensive task information including: + - Basic task details and description + - Implementation details + - Subtasks (if they exist) + - Contextual suggested actions +- Recommended before starting any new development work +- Respects your project's dependency structure +- Ensures tasks are completed in the appropriate sequence +- Provides ready-to-use commands for common task actions + +## Viewing Specific Task Details + +- Run `get_task` / `task-master show ` to view a specific task. +- Use dot notation for subtasks: `task-master show 1.2` (shows subtask 2 of task 1) +- Displays comprehensive information similar to the next command, but for a specific task +- For parent tasks, shows all subtasks and their current status +- For subtasks, shows parent task information and relationship +- Provides contextual suggested actions appropriate for the specific task +- Useful for examining task details before implementation or checking status + +## Managing Task Dependencies + +- Use `add_dependency` / `task-master add-dependency --id= --depends-on=` to add a dependency. +- Use `remove_dependency` / `task-master remove-dependency --id= --depends-on=` to remove a dependency. +- The system prevents circular dependencies and duplicate dependency entries +- Dependencies are checked for existence before being added or removed +- Task files are automatically regenerated after dependency changes +- Dependencies are visualized with status indicators in task listings and files + +## Task Reorganization + +- Use `move_task` / `task-master move --from= --to=` to move tasks or subtasks within the hierarchy +- This command supports several use cases: + - Moving a standalone task to become a subtask (e.g., `--from=5 --to=7`) + - Moving a subtask to become a standalone task (e.g., `--from=5.2 --to=7`) + - Moving a subtask to a different parent (e.g., `--from=5.2 --to=7.3`) + - Reordering subtasks within the same parent (e.g., `--from=5.2 --to=5.4`) + - Moving a task to a new, non-existent ID position (e.g., `--from=5 --to=25`) + - Moving multiple tasks at once using comma-separated IDs (e.g., `--from=10,11,12 --to=16,17,18`) +- The system includes validation to prevent data loss: + - Allows moving to non-existent IDs by creating placeholder tasks + - Prevents moving to existing task IDs that have content (to avoid overwriting) + - Validates source tasks exist before attempting to move them +- The system maintains proper parent-child relationships and dependency integrity +- Task files are automatically regenerated after the move operation +- This provides greater flexibility in organizing and refining your task structure as project understanding evolves +- This is especially useful when dealing with potential merge conflicts arising from teams creating tasks on separate branches. Solve these conflicts very easily by moving your tasks and keeping theirs. + +## Iterative Subtask Implementation + +Once a task has been broken down into subtasks using `expand_task` or similar methods, follow this iterative process for implementation: + +1. **Understand the Goal (Preparation):** + * Use `get_task` / `task-master show ` (see @`taskmaster.mdc`) to thoroughly understand the specific goals and requirements of the subtask. + +2. **Initial Exploration & Planning (Iteration 1):** + * This is the first attempt at creating a concrete implementation plan. + * Explore the codebase to identify the precise files, functions, and even specific lines of code that will need modification. + * Determine the intended code changes (diffs) and their locations. + * Gather *all* relevant details from this exploration phase. + +3. **Log the Plan:** + * Run `update_subtask` / `task-master update-subtask --id= --prompt=''`. + * Provide the *complete and detailed* findings from the exploration phase in the prompt. Include file paths, line numbers, proposed diffs, reasoning, and any potential challenges identified. Do not omit details. The goal is to create a rich, timestamped log within the subtask's `details`. + +4. **Verify the Plan:** + * Run `get_task` / `task-master show ` again to confirm that the detailed implementation plan has been successfully appended to the subtask's details. + +5. **Begin Implementation:** + * Set the subtask status using `set_task_status` / `task-master set-status --id= --status=in-progress`. + * Start coding based on the logged plan. + +6. **Refine and Log Progress (Iteration 2+):** + * As implementation progresses, you will encounter challenges, discover nuances, or confirm successful approaches. + * **Before appending new information**: Briefly review the *existing* details logged in the subtask (using `get_task` or recalling from context) to ensure the update adds fresh insights and avoids redundancy. + * **Regularly** use `update_subtask` / `task-master update-subtask --id= --prompt='\n- What worked...\n- What didn't work...'` to append new findings. + * **Crucially, log:** + * What worked ("fundamental truths" discovered). + * What didn't work and why (to avoid repeating mistakes). + * Specific code snippets or configurations that were successful. + * Decisions made, especially if confirmed with user input. + * Any deviations from the initial plan and the reasoning. + * The objective is to continuously enrich the subtask's details, creating a log of the implementation journey that helps the AI (and human developers) learn, adapt, and avoid repeating errors. + +7. **Review & Update Rules (Post-Implementation):** + * Once the implementation for the subtask is functionally complete, review all code changes and the relevant chat history. + * Identify any new or modified code patterns, conventions, or best practices established during the implementation. + * Create new or update existing rules following internal guidelines (previously linked to `cursor_rules.mdc` and `self_improve.mdc`). + +8. **Mark Task Complete:** + * After verifying the implementation and updating any necessary rules, mark the subtask as completed: `set_task_status` / `task-master set-status --id= --status=done`. + +9. **Commit Changes (If using Git):** + * Stage the relevant code changes and any updated/new rule files (`git add .`). + * Craft a comprehensive Git commit message summarizing the work done for the subtask, including both code implementation and any rule adjustments. + * Execute the commit command directly in the terminal (e.g., `git commit -m 'feat(module): Implement feature X for subtask \n\n- Details about changes...\n- Updated rule Y for pattern Z'`). + * Consider if a Changeset is needed according to internal versioning guidelines (previously linked to `changeset.mdc`). If so, run `npm run changeset`, stage the generated file, and amend the commit or create a new one. + +10. **Proceed to Next Subtask:** + * Identify the next subtask (e.g., using `next_task` / `task-master next`). + +## Code Analysis & Refactoring Techniques + +- **Top-Level Function Search**: + - Useful for understanding module structure or planning refactors. + - Use grep/ripgrep to find exported functions/constants: + `rg "export (async function|function|const) \w+"` or similar patterns. + - Can help compare functions between files during migrations or identify potential naming conflicts. + +--- +*This workflow provides a general guideline. Adapt it based on your specific project needs and team practices.* \ No newline at end of file diff --git a/.cursor/rules/taskmaster/taskmaster.mdc b/.cursor/rules/taskmaster/taskmaster.mdc new file mode 100644 index 0000000..e9be2ae --- /dev/null +++ b/.cursor/rules/taskmaster/taskmaster.mdc @@ -0,0 +1,558 @@ +--- +description: Comprehensive reference for Taskmaster MCP tools and CLI commands. +globs: **/* +alwaysApply: true +--- + +# Taskmaster Tool & Command Reference + +This document provides a detailed reference for interacting with Taskmaster, covering both the recommended MCP tools, suitable for integrations like Cursor, and the corresponding `task-master` CLI commands, designed for direct user interaction or fallback. + +**Note:** For interacting with Taskmaster programmatically or via integrated tools, using the **MCP tools is strongly recommended** due to better performance, structured data, and error handling. The CLI commands serve as a user-friendly alternative and fallback. + +**Important:** Several MCP tools involve AI processing... The AI-powered tools include `parse_prd`, `analyze_project_complexity`, `update_subtask`, `update_task`, `update`, `expand_all`, `expand_task`, and `add_task`. + +**🏷️ Tagged Task Lists System:** Task Master now supports **tagged task lists** for multi-context task management. This allows you to maintain separate, isolated lists of tasks for different features, branches, or experiments. Existing projects are seamlessly migrated to use a default "master" tag. Most commands now support a `--tag ` flag to specify which context to operate on. If omitted, commands use the currently active tag. + +--- + +## Initialization & Setup + +### 1. Initialize Project (`init`) + +* **MCP Tool:** `initialize_project` +* **CLI Command:** `task-master init [options]` +* **Description:** `Set up the basic Taskmaster file structure and configuration in the current directory for a new project.` +* **Key CLI Options:** + * `--name `: `Set the name for your project in Taskmaster's configuration.` + * `--description `: `Provide a brief description for your project.` + * `--version `: `Set the initial version for your project, e.g., '0.1.0'.` + * `-y, --yes`: `Initialize Taskmaster quickly using default settings without interactive prompts.` +* **Usage:** Run this once at the beginning of a new project. +* **MCP Variant Description:** `Set up the basic Taskmaster file structure and configuration in the current directory for a new project by running the 'task-master init' command.` +* **Key MCP Parameters/Options:** + * `projectName`: `Set the name for your project.` (CLI: `--name `) + * `projectDescription`: `Provide a brief description for your project.` (CLI: `--description `) + * `projectVersion`: `Set the initial version for your project, e.g., '0.1.0'.` (CLI: `--version `) + * `authorName`: `Author name.` (CLI: `--author `) + * `skipInstall`: `Skip installing dependencies. Default is false.` (CLI: `--skip-install`) + * `addAliases`: `Add shell aliases tm and taskmaster. Default is false.` (CLI: `--aliases`) + * `yes`: `Skip prompts and use defaults/provided arguments. Default is false.` (CLI: `-y, --yes`) +* **Usage:** Run this once at the beginning of a new project, typically via an integrated tool like Cursor. Operates on the current working directory of the MCP server. +* **Important:** Once complete, you *MUST* parse a prd in order to generate tasks. There will be no tasks files until then. The next step after initializing should be to create a PRD using the example PRD in .taskmaster/templates/example_prd.txt. +* **Tagging:** Use the `--tag` option to parse the PRD into a specific, non-default tag context. If the tag doesn't exist, it will be created automatically. Example: `task-master parse-prd spec.txt --tag=new-feature`. + +### 2. Parse PRD (`parse_prd`) + +* **MCP Tool:** `parse_prd` +* **CLI Command:** `task-master parse-prd [file] [options]` +* **Description:** `Parse a Product Requirements Document, PRD, or text file with Taskmaster to automatically generate an initial set of tasks in tasks.json.` +* **Key Parameters/Options:** + * `input`: `Path to your PRD or requirements text file that Taskmaster should parse for tasks.` (CLI: `[file]` positional or `-i, --input `) + * `output`: `Specify where Taskmaster should save the generated 'tasks.json' file. Defaults to '.taskmaster/tasks/tasks.json'.` (CLI: `-o, --output `) + * `numTasks`: `Approximate number of top-level tasks Taskmaster should aim to generate from the document.` (CLI: `-n, --num-tasks `) + * `force`: `Use this to allow Taskmaster to overwrite an existing 'tasks.json' without asking for confirmation.` (CLI: `-f, --force`) +* **Usage:** Useful for bootstrapping a project from an existing requirements document. +* **Notes:** Task Master will strictly adhere to any specific requirements mentioned in the PRD, such as libraries, database schemas, frameworks, tech stacks, etc., while filling in any gaps where the PRD isn't fully specified. Tasks are designed to provide the most direct implementation path while avoiding over-engineering. +* **Important:** This MCP tool makes AI calls and can take up to a minute to complete. Please inform users to hang tight while the operation is in progress. If the user does not have a PRD, suggest discussing their idea and then use the example PRD in `.taskmaster/templates/example_prd.txt` as a template for creating the PRD based on their idea, for use with `parse-prd`. + +--- + +## AI Model Configuration + +### 2. Manage Models (`models`) +* **MCP Tool:** `models` +* **CLI Command:** `task-master models [options]` +* **Description:** `View the current AI model configuration or set specific models for different roles (main, research, fallback). Allows setting custom model IDs for Ollama and OpenRouter.` +* **Key MCP Parameters/Options:** + * `setMain `: `Set the primary model ID for task generation/updates.` (CLI: `--set-main `) + * `setResearch `: `Set the model ID for research-backed operations.` (CLI: `--set-research `) + * `setFallback `: `Set the model ID to use if the primary fails.` (CLI: `--set-fallback `) + * `ollama `: `Indicates the set model ID is a custom Ollama model.` (CLI: `--ollama`) + * `openrouter `: `Indicates the set model ID is a custom OpenRouter model.` (CLI: `--openrouter`) + * `listAvailableModels `: `If true, lists available models not currently assigned to a role.` (CLI: No direct equivalent; CLI lists available automatically) + * `projectRoot `: `Optional. Absolute path to the project root directory.` (CLI: Determined automatically) +* **Key CLI Options:** + * `--set-main `: `Set the primary model.` + * `--set-research `: `Set the research model.` + * `--set-fallback `: `Set the fallback model.` + * `--ollama`: `Specify that the provided model ID is for Ollama (use with --set-*).` + * `--openrouter`: `Specify that the provided model ID is for OpenRouter (use with --set-*). Validates against OpenRouter API.` + * `--bedrock`: `Specify that the provided model ID is for AWS Bedrock (use with --set-*).` + * `--setup`: `Run interactive setup to configure models, including custom Ollama/OpenRouter IDs.` +* **Usage (MCP):** Call without set flags to get current config. Use `setMain`, `setResearch`, or `setFallback` with a valid model ID to update the configuration. Use `listAvailableModels: true` to get a list of unassigned models. To set a custom model, provide the model ID and set `ollama: true` or `openrouter: true`. +* **Usage (CLI):** Run without flags to view current configuration and available models. Use set flags to update specific roles. Use `--setup` for guided configuration, including custom models. To set a custom model via flags, use `--set-=` along with either `--ollama` or `--openrouter`. +* **Notes:** Configuration is stored in `.taskmaster/config.json` in the project root. This command/tool modifies that file. Use `listAvailableModels` or `task-master models` to see internally supported models. OpenRouter custom models are validated against their live API. Ollama custom models are not validated live. +* **API note:** API keys for selected AI providers (based on their model) need to exist in the mcp.json file to be accessible in MCP context. The API keys must be present in the local .env file for the CLI to be able to read them. +* **Model costs:** The costs in supported models are expressed in dollars. An input/output value of 3 is $3.00. A value of 0.8 is $0.80. +* **Warning:** DO NOT MANUALLY EDIT THE .taskmaster/config.json FILE. Use the included commands either in the MCP or CLI format as needed. Always prioritize MCP tools when available and use the CLI as a fallback. + +--- + +## Task Listing & Viewing + +### 3. Get Tasks (`get_tasks`) + +* **MCP Tool:** `get_tasks` +* **CLI Command:** `task-master list [options]` +* **Description:** `List your Taskmaster tasks, optionally filtering by status and showing subtasks.` +* **Key Parameters/Options:** + * `status`: `Show only Taskmaster tasks matching this status (or multiple statuses, comma-separated), e.g., 'pending' or 'done,in-progress'.` (CLI: `-s, --status `) + * `withSubtasks`: `Include subtasks indented under their parent tasks in the list.` (CLI: `--with-subtasks`) + * `tag`: `Specify which tag context to list tasks from. Defaults to the current active tag.` (CLI: `--tag `) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file `) +* **Usage:** Get an overview of the project status, often used at the start of a work session. + +### 4. Get Next Task (`next_task`) + +* **MCP Tool:** `next_task` +* **CLI Command:** `task-master next [options]` +* **Description:** `Ask Taskmaster to show the next available task you can work on, based on status and completed dependencies.` +* **Key Parameters/Options:** + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file `) + * `tag`: `Specify which tag context to use. Defaults to the current active tag.` (CLI: `--tag `) +* **Usage:** Identify what to work on next according to the plan. + +### 5. Get Task Details (`get_task`) + +* **MCP Tool:** `get_task` +* **CLI Command:** `task-master show [id] [options]` +* **Description:** `Display detailed information for one or more specific Taskmaster tasks or subtasks by ID.` +* **Key Parameters/Options:** + * `id`: `Required. The ID of the Taskmaster task (e.g., '15'), subtask (e.g., '15.2'), or a comma-separated list of IDs ('1,5,10.2') you want to view.` (CLI: `[id]` positional or `-i, --id `) + * `tag`: `Specify which tag context to get the task(s) from. Defaults to the current active tag.` (CLI: `--tag `) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file `) +* **Usage:** Understand the full details for a specific task. When multiple IDs are provided, a summary table is shown. +* **CRITICAL INFORMATION** If you need to collect information from multiple tasks, use comma-separated IDs (i.e. 1,2,3) to receive an array of tasks. Do not needlessly get tasks one at a time if you need to get many as that is wasteful. + +--- + +## Task Creation & Modification + +### 6. Add Task (`add_task`) + +* **MCP Tool:** `add_task` +* **CLI Command:** `task-master add-task [options]` +* **Description:** `Add a new task to Taskmaster by describing it; AI will structure it.` +* **Key Parameters/Options:** + * `prompt`: `Required. Describe the new task you want Taskmaster to create, e.g., "Implement user authentication using JWT".` (CLI: `-p, --prompt `) + * `dependencies`: `Specify the IDs of any Taskmaster tasks that must be completed before this new one can start, e.g., '12,14'.` (CLI: `-d, --dependencies `) + * `priority`: `Set the priority for the new task: 'high', 'medium', or 'low'. Default is 'medium'.` (CLI: `--priority `) + * `research`: `Enable Taskmaster to use the research role for potentially more informed task creation.` (CLI: `-r, --research`) + * `tag`: `Specify which tag context to add the task to. Defaults to the current active tag.` (CLI: `--tag `) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file `) +* **Usage:** Quickly add newly identified tasks during development. +* **Important:** This MCP tool makes AI calls and can take up to a minute to complete. Please inform users to hang tight while the operation is in progress. + +### 7. Add Subtask (`add_subtask`) + +* **MCP Tool:** `add_subtask` +* **CLI Command:** `task-master add-subtask [options]` +* **Description:** `Add a new subtask to a Taskmaster parent task, or convert an existing task into a subtask.` +* **Key Parameters/Options:** + * `id` / `parent`: `Required. The ID of the Taskmaster task that will be the parent.` (MCP: `id`, CLI: `-p, --parent `) + * `taskId`: `Use this if you want to convert an existing top-level Taskmaster task into a subtask of the specified parent.` (CLI: `-i, --task-id `) + * `title`: `Required if not using taskId. The title for the new subtask Taskmaster should create.` (CLI: `-t, --title `) + * `description`: `A brief description for the new subtask.` (CLI: `-d, --description <text>`) + * `details`: `Provide implementation notes or details for the new subtask.` (CLI: `--details <text>`) + * `dependencies`: `Specify IDs of other tasks or subtasks, e.g., '15' or '16.1', that must be done before this new subtask.` (CLI: `--dependencies <ids>`) + * `status`: `Set the initial status for the new subtask. Default is 'pending'.` (CLI: `-s, --status <status>`) + * `generate`: `Enable Taskmaster to regenerate markdown task files after adding the subtask.` (CLI: `--generate`) + * `tag`: `Specify which tag context to operate on. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Break down tasks manually or reorganize existing tasks. + +### 8. Update Tasks (`update`) + +* **MCP Tool:** `update` +* **CLI Command:** `task-master update [options]` +* **Description:** `Update multiple upcoming tasks in Taskmaster based on new context or changes, starting from a specific task ID.` +* **Key Parameters/Options:** + * `from`: `Required. The ID of the first task Taskmaster should update. All tasks with this ID or higher that are not 'done' will be considered.` (CLI: `--from <id>`) + * `prompt`: `Required. Explain the change or new context for Taskmaster to apply to the tasks, e.g., "We are now using React Query instead of Redux Toolkit for data fetching".` (CLI: `-p, --prompt <text>`) + * `research`: `Enable Taskmaster to use the research role for more informed updates. Requires appropriate API key.` (CLI: `-r, --research`) + * `tag`: `Specify which tag context to operate on. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Handle significant implementation changes or pivots that affect multiple future tasks. Example CLI: `task-master update --from='18' --prompt='Switching to React Query.\nNeed to refactor data fetching...'` +* **Important:** This MCP tool makes AI calls and can take up to a minute to complete. Please inform users to hang tight while the operation is in progress. + +### 9. Update Task (`update_task`) + +* **MCP Tool:** `update_task` +* **CLI Command:** `task-master update-task [options]` +* **Description:** `Modify a specific Taskmaster task by ID, incorporating new information or changes. By default, this replaces the existing task details.` +* **Key Parameters/Options:** + * `id`: `Required. The specific ID of the Taskmaster task, e.g., '15', you want to update.` (CLI: `-i, --id <id>`) + * `prompt`: `Required. Explain the specific changes or provide the new information Taskmaster should incorporate into this task.` (CLI: `-p, --prompt <text>`) + * `append`: `If true, appends the prompt content to the task's details with a timestamp, rather than replacing them. Behaves like update-subtask.` (CLI: `--append`) + * `research`: `Enable Taskmaster to use the research role for more informed updates. Requires appropriate API key.` (CLI: `-r, --research`) + * `tag`: `Specify which tag context the task belongs to. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Refine a specific task based on new understanding. Use `--append` to log progress without creating subtasks. +* **Important:** This MCP tool makes AI calls and can take up to a minute to complete. Please inform users to hang tight while the operation is in progress. + +### 10. Update Subtask (`update_subtask`) + +* **MCP Tool:** `update_subtask` +* **CLI Command:** `task-master update-subtask [options]` +* **Description:** `Append timestamped notes or details to a specific Taskmaster subtask without overwriting existing content. Intended for iterative implementation logging.` +* **Key Parameters/Options:** + * `id`: `Required. The ID of the Taskmaster subtask, e.g., '5.2', to update with new information.` (CLI: `-i, --id <id>`) + * `prompt`: `Required. The information, findings, or progress notes to append to the subtask's details with a timestamp.` (CLI: `-p, --prompt <text>`) + * `research`: `Enable Taskmaster to use the research role for more informed updates. Requires appropriate API key.` (CLI: `-r, --research`) + * `tag`: `Specify which tag context the subtask belongs to. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Log implementation progress, findings, and discoveries during subtask development. Each update is timestamped and appended to preserve the implementation journey. +* **Important:** This MCP tool makes AI calls and can take up to a minute to complete. Please inform users to hang tight while the operation is in progress. + +### 11. Set Task Status (`set_task_status`) + +* **MCP Tool:** `set_task_status` +* **CLI Command:** `task-master set-status [options]` +* **Description:** `Update the status of one or more Taskmaster tasks or subtasks, e.g., 'pending', 'in-progress', 'done'.` +* **Key Parameters/Options:** + * `id`: `Required. The ID(s) of the Taskmaster task(s) or subtask(s), e.g., '15', '15.2', or '16,17.1', to update.` (CLI: `-i, --id <id>`) + * `status`: `Required. The new status to set, e.g., 'done', 'pending', 'in-progress', 'review', 'cancelled'.` (CLI: `-s, --status <status>`) + * `tag`: `Specify which tag context to operate on. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Mark progress as tasks move through the development cycle. + +### 12. Remove Task (`remove_task`) + +* **MCP Tool:** `remove_task` +* **CLI Command:** `task-master remove-task [options]` +* **Description:** `Permanently remove a task or subtask from the Taskmaster tasks list.` +* **Key Parameters/Options:** + * `id`: `Required. The ID of the Taskmaster task, e.g., '5', or subtask, e.g., '5.2', to permanently remove.` (CLI: `-i, --id <id>`) + * `yes`: `Skip the confirmation prompt and immediately delete the task.` (CLI: `-y, --yes`) + * `tag`: `Specify which tag context to operate on. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Permanently delete tasks or subtasks that are no longer needed in the project. +* **Notes:** Use with caution as this operation cannot be undone. Consider using 'blocked', 'cancelled', or 'deferred' status instead if you just want to exclude a task from active planning but keep it for reference. The command automatically cleans up dependency references in other tasks. + +--- + +## Task Structure & Breakdown + +### 13. Expand Task (`expand_task`) + +* **MCP Tool:** `expand_task` +* **CLI Command:** `task-master expand [options]` +* **Description:** `Use Taskmaster's AI to break down a complex task into smaller, manageable subtasks. Appends subtasks by default.` +* **Key Parameters/Options:** + * `id`: `The ID of the specific Taskmaster task you want to break down into subtasks.` (CLI: `-i, --id <id>`) + * `num`: `Optional: Suggests how many subtasks Taskmaster should aim to create. Uses complexity analysis/defaults otherwise.` (CLI: `-n, --num <number>`) + * `research`: `Enable Taskmaster to use the research role for more informed subtask generation. Requires appropriate API key.` (CLI: `-r, --research`) + * `prompt`: `Optional: Provide extra context or specific instructions to Taskmaster for generating the subtasks.` (CLI: `-p, --prompt <text>`) + * `force`: `Optional: If true, clear existing subtasks before generating new ones. Default is false (append).` (CLI: `--force`) + * `tag`: `Specify which tag context the task belongs to. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Generate a detailed implementation plan for a complex task before starting coding. Automatically uses complexity report recommendations if available and `num` is not specified. +* **Important:** This MCP tool makes AI calls and can take up to a minute to complete. Please inform users to hang tight while the operation is in progress. + +### 14. Expand All Tasks (`expand_all`) + +* **MCP Tool:** `expand_all` +* **CLI Command:** `task-master expand --all [options]` (Note: CLI uses the `expand` command with the `--all` flag) +* **Description:** `Tell Taskmaster to automatically expand all eligible pending/in-progress tasks based on complexity analysis or defaults. Appends subtasks by default.` +* **Key Parameters/Options:** + * `num`: `Optional: Suggests how many subtasks Taskmaster should aim to create per task.` (CLI: `-n, --num <number>`) + * `research`: `Enable research role for more informed subtask generation. Requires appropriate API key.` (CLI: `-r, --research`) + * `prompt`: `Optional: Provide extra context for Taskmaster to apply generally during expansion.` (CLI: `-p, --prompt <text>`) + * `force`: `Optional: If true, clear existing subtasks before generating new ones for each eligible task. Default is false (append).` (CLI: `--force`) + * `tag`: `Specify which tag context to expand. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Useful after initial task generation or complexity analysis to break down multiple tasks at once. +* **Important:** This MCP tool makes AI calls and can take up to a minute to complete. Please inform users to hang tight while the operation is in progress. + +### 15. Clear Subtasks (`clear_subtasks`) + +* **MCP Tool:** `clear_subtasks` +* **CLI Command:** `task-master clear-subtasks [options]` +* **Description:** `Remove all subtasks from one or more specified Taskmaster parent tasks.` +* **Key Parameters/Options:** + * `id`: `The ID(s) of the Taskmaster parent task(s) whose subtasks you want to remove, e.g., '15' or '16,18'. Required unless using 'all'.` (CLI: `-i, --id <ids>`) + * `all`: `Tell Taskmaster to remove subtasks from all parent tasks.` (CLI: `--all`) + * `tag`: `Specify which tag context to operate on. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Used before regenerating subtasks with `expand_task` if the previous breakdown needs replacement. + +### 16. Remove Subtask (`remove_subtask`) + +* **MCP Tool:** `remove_subtask` +* **CLI Command:** `task-master remove-subtask [options]` +* **Description:** `Remove a subtask from its Taskmaster parent, optionally converting it into a standalone task.` +* **Key Parameters/Options:** + * `id`: `Required. The ID(s) of the Taskmaster subtask(s) to remove, e.g., '15.2' or '16.1,16.3'.` (CLI: `-i, --id <id>`) + * `convert`: `If used, Taskmaster will turn the subtask into a regular top-level task instead of deleting it.` (CLI: `-c, --convert`) + * `generate`: `Enable Taskmaster to regenerate markdown task files after removing the subtask.` (CLI: `--generate`) + * `tag`: `Specify which tag context to operate on. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Delete unnecessary subtasks or promote a subtask to a top-level task. + +### 17. Move Task (`move_task`) + +* **MCP Tool:** `move_task` +* **CLI Command:** `task-master move [options]` +* **Description:** `Move a task or subtask to a new position within the task hierarchy.` +* **Key Parameters/Options:** + * `from`: `Required. ID of the task/subtask to move (e.g., "5" or "5.2"). Can be comma-separated for multiple tasks.` (CLI: `--from <id>`) + * `to`: `Required. ID of the destination (e.g., "7" or "7.3"). Must match the number of source IDs if comma-separated.` (CLI: `--to <id>`) + * `tag`: `Specify which tag context to operate on. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Reorganize tasks by moving them within the hierarchy. Supports various scenarios like: + * Moving a task to become a subtask + * Moving a subtask to become a standalone task + * Moving a subtask to a different parent + * Reordering subtasks within the same parent + * Moving a task to a new, non-existent ID (automatically creates placeholders) + * Moving multiple tasks at once with comma-separated IDs +* **Validation Features:** + * Allows moving tasks to non-existent destination IDs (creates placeholder tasks) + * Prevents moving to existing task IDs that already have content (to avoid overwriting) + * Validates that source tasks exist before attempting to move them + * Maintains proper parent-child relationships +* **Example CLI:** `task-master move --from=5.2 --to=7.3` to move subtask 5.2 to become subtask 7.3. +* **Example Multi-Move:** `task-master move --from=10,11,12 --to=16,17,18` to move multiple tasks to new positions. +* **Common Use:** Resolving merge conflicts in tasks.json when multiple team members create tasks on different branches. + +--- + +## Dependency Management + +### 18. Add Dependency (`add_dependency`) + +* **MCP Tool:** `add_dependency` +* **CLI Command:** `task-master add-dependency [options]` +* **Description:** `Define a dependency in Taskmaster, making one task a prerequisite for another.` +* **Key Parameters/Options:** + * `id`: `Required. The ID of the Taskmaster task that will depend on another.` (CLI: `-i, --id <id>`) + * `dependsOn`: `Required. The ID of the Taskmaster task that must be completed first, the prerequisite.` (CLI: `-d, --depends-on <id>`) + * `tag`: `Specify which tag context to operate on. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <path>`) +* **Usage:** Establish the correct order of execution between tasks. + +### 19. Remove Dependency (`remove_dependency`) + +* **MCP Tool:** `remove_dependency` +* **CLI Command:** `task-master remove-dependency [options]` +* **Description:** `Remove a dependency relationship between two Taskmaster tasks.` +* **Key Parameters/Options:** + * `id`: `Required. The ID of the Taskmaster task you want to remove a prerequisite from.` (CLI: `-i, --id <id>`) + * `dependsOn`: `Required. The ID of the Taskmaster task that should no longer be a prerequisite.` (CLI: `-d, --depends-on <id>`) + * `tag`: `Specify which tag context to operate on. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Update task relationships when the order of execution changes. + +### 20. Validate Dependencies (`validate_dependencies`) + +* **MCP Tool:** `validate_dependencies` +* **CLI Command:** `task-master validate-dependencies [options]` +* **Description:** `Check your Taskmaster tasks for dependency issues (like circular references or links to non-existent tasks) without making changes.` +* **Key Parameters/Options:** + * `tag`: `Specify which tag context to validate. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Audit the integrity of your task dependencies. + +### 21. Fix Dependencies (`fix_dependencies`) + +* **MCP Tool:** `fix_dependencies` +* **CLI Command:** `task-master fix-dependencies [options]` +* **Description:** `Automatically fix dependency issues (like circular references or links to non-existent tasks) in your Taskmaster tasks.` +* **Key Parameters/Options:** + * `tag`: `Specify which tag context to fix dependencies in. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Clean up dependency errors automatically. + +--- + +## Analysis & Reporting + +### 22. Analyze Project Complexity (`analyze_project_complexity`) + +* **MCP Tool:** `analyze_project_complexity` +* **CLI Command:** `task-master analyze-complexity [options]` +* **Description:** `Have Taskmaster analyze your tasks to determine their complexity and suggest which ones need to be broken down further.` +* **Key Parameters/Options:** + * `output`: `Where to save the complexity analysis report. Default is '.taskmaster/reports/task-complexity-report.json' (or '..._tagname.json' if a tag is used).` (CLI: `-o, --output <file>`) + * `threshold`: `The minimum complexity score (1-10) that should trigger a recommendation to expand a task.` (CLI: `-t, --threshold <number>`) + * `research`: `Enable research role for more accurate complexity analysis. Requires appropriate API key.` (CLI: `-r, --research`) + * `tag`: `Specify which tag context to analyze. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Used before breaking down tasks to identify which ones need the most attention. +* **Important:** This MCP tool makes AI calls and can take up to a minute to complete. Please inform users to hang tight while the operation is in progress. + +### 23. View Complexity Report (`complexity_report`) + +* **MCP Tool:** `complexity_report` +* **CLI Command:** `task-master complexity-report [options]` +* **Description:** `Display the task complexity analysis report in a readable format.` +* **Key Parameters/Options:** + * `tag`: `Specify which tag context to show the report for. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to the complexity report (default: '.taskmaster/reports/task-complexity-report.json').` (CLI: `-f, --file <file>`) +* **Usage:** Review and understand the complexity analysis results after running analyze-complexity. + +--- + +## File Management + +### 24. Generate Task Files (`generate`) + +* **MCP Tool:** `generate` +* **CLI Command:** `task-master generate [options]` +* **Description:** `Create or update individual Markdown files for each task based on your tasks.json.` +* **Key Parameters/Options:** + * `output`: `The directory where Taskmaster should save the task files (default: in a 'tasks' directory).` (CLI: `-o, --output <directory>`) + * `tag`: `Specify which tag context to generate files for. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) +* **Usage:** Run this after making changes to tasks.json to keep individual task files up to date. This command is now manual and no longer runs automatically. + +--- + +## AI-Powered Research + +### 25. Research (`research`) + +* **MCP Tool:** `research` +* **CLI Command:** `task-master research [options]` +* **Description:** `Perform AI-powered research queries with project context to get fresh, up-to-date information beyond the AI's knowledge cutoff.` +* **Key Parameters/Options:** + * `query`: `Required. Research query/prompt (e.g., "What are the latest best practices for React Query v5?").` (CLI: `[query]` positional or `-q, --query <text>`) + * `taskIds`: `Comma-separated list of task/subtask IDs from the current tag context (e.g., "15,16.2,17").` (CLI: `-i, --id <ids>`) + * `filePaths`: `Comma-separated list of file paths for context (e.g., "src/api.js,docs/readme.md").` (CLI: `-f, --files <paths>`) + * `customContext`: `Additional custom context text to include in the research.` (CLI: `-c, --context <text>`) + * `includeProjectTree`: `Include project file tree structure in context (default: false).` (CLI: `--tree`) + * `detailLevel`: `Detail level for the research response: 'low', 'medium', 'high' (default: medium).` (CLI: `--detail <level>`) + * `saveTo`: `Task or subtask ID (e.g., "15", "15.2") to automatically save the research conversation to.` (CLI: `--save-to <id>`) + * `saveFile`: `If true, saves the research conversation to a markdown file in '.taskmaster/docs/research/'.` (CLI: `--save-file`) + * `noFollowup`: `Disables the interactive follow-up question menu in the CLI.` (CLI: `--no-followup`) + * `tag`: `Specify which tag context to use for task-based context gathering. Defaults to the current active tag.` (CLI: `--tag <name>`) + * `projectRoot`: `The directory of the project. Must be an absolute path.` (CLI: Determined automatically) +* **Usage:** **This is a POWERFUL tool that agents should use FREQUENTLY** to: + * Get fresh information beyond knowledge cutoff dates + * Research latest best practices, library updates, security patches + * Find implementation examples for specific technologies + * Validate approaches against current industry standards + * Get contextual advice based on project files and tasks +* **When to Consider Using Research:** + * **Before implementing any task** - Research current best practices + * **When encountering new technologies** - Get up-to-date implementation guidance (libraries, apis, etc) + * **For security-related tasks** - Find latest security recommendations + * **When updating dependencies** - Research breaking changes and migration guides + * **For performance optimization** - Get current performance best practices + * **When debugging complex issues** - Research known solutions and workarounds +* **Research + Action Pattern:** + * Use `research` to gather fresh information + * Use `update_subtask` to commit findings with timestamps + * Use `update_task` to incorporate research into task details + * Use `add_task` with research flag for informed task creation +* **Important:** This MCP tool makes AI calls and can take up to a minute to complete. The research provides FRESH data beyond the AI's training cutoff, making it invaluable for current best practices and recent developments. + +--- + +## Tag Management + +This new suite of commands allows you to manage different task contexts (tags). + +### 26. List Tags (`tags`) + +* **MCP Tool:** `list_tags` +* **CLI Command:** `task-master tags [options]` +* **Description:** `List all available tags with task counts, completion status, and other metadata.` +* **Key Parameters/Options:** + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) + * `--show-metadata`: `Include detailed metadata in the output (e.g., creation date, description).` (CLI: `--show-metadata`) + +### 27. Add Tag (`add_tag`) + +* **MCP Tool:** `add_tag` +* **CLI Command:** `task-master add-tag <tagName> [options]` +* **Description:** `Create a new, empty tag context, or copy tasks from another tag.` +* **Key Parameters/Options:** + * `tagName`: `Name of the new tag to create (alphanumeric, hyphens, underscores).` (CLI: `<tagName>` positional) + * `--from-branch`: `Creates a tag with a name derived from the current git branch, ignoring the <tagName> argument.` (CLI: `--from-branch`) + * `--copy-from-current`: `Copy tasks from the currently active tag to the new tag.` (CLI: `--copy-from-current`) + * `--copy-from <tag>`: `Copy tasks from a specific source tag to the new tag.` (CLI: `--copy-from <tag>`) + * `--description <text>`: `Provide an optional description for the new tag.` (CLI: `-d, --description <text>`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) + +### 28. Delete Tag (`delete_tag`) + +* **MCP Tool:** `delete_tag` +* **CLI Command:** `task-master delete-tag <tagName> [options]` +* **Description:** `Permanently delete a tag and all of its associated tasks.` +* **Key Parameters/Options:** + * `tagName`: `Name of the tag to delete.` (CLI: `<tagName>` positional) + * `--yes`: `Skip the confirmation prompt.` (CLI: `-y, --yes`) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) + +### 29. Use Tag (`use_tag`) + +* **MCP Tool:** `use_tag` +* **CLI Command:** `task-master use-tag <tagName>` +* **Description:** `Switch your active task context to a different tag.` +* **Key Parameters/Options:** + * `tagName`: `Name of the tag to switch to.` (CLI: `<tagName>` positional) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) + +### 30. Rename Tag (`rename_tag`) + +* **MCP Tool:** `rename_tag` +* **CLI Command:** `task-master rename-tag <oldName> <newName>` +* **Description:** `Rename an existing tag.` +* **Key Parameters/Options:** + * `oldName`: `The current name of the tag.` (CLI: `<oldName>` positional) + * `newName`: `The new name for the tag.` (CLI: `<newName>` positional) + * `file`: `Path to your Taskmaster 'tasks.json' file. Default relies on auto-detection.` (CLI: `-f, --file <file>`) + +### 31. Copy Tag (`copy_tag`) + +* **MCP Tool:** `copy_tag` +* **CLI Command:** `task-master copy-tag <sourceName> <targetName> [options]` +* **Description:** `Copy an entire tag context, including all its tasks and metadata, to a new tag.` +* **Key Parameters/Options:** + * `sourceName`: `Name of the tag to copy from.` (CLI: `<sourceName>` positional) + * `targetName`: `Name of the new tag to create.` (CLI: `<targetName>` positional) + * `--description <text>`: `Optional description for the new tag.` (CLI: `-d, --description <text>`) + +--- + +## Miscellaneous + +### 32. Sync Readme (`sync-readme`) -- experimental + +* **MCP Tool:** N/A +* **CLI Command:** `task-master sync-readme [options]` +* **Description:** `Exports your task list to your project's README.md file, useful for showcasing progress.` +* **Key Parameters/Options:** + * `status`: `Filter tasks by status (e.g., 'pending', 'done').` (CLI: `-s, --status <status>`) + * `withSubtasks`: `Include subtasks in the export.` (CLI: `--with-subtasks`) + * `tag`: `Specify which tag context to export from. Defaults to the current active tag.` (CLI: `--tag <name>`) + +--- + +## Environment Variables Configuration (Updated) + +Taskmaster primarily uses the **`.taskmaster/config.json`** file (in project root) for configuration (models, parameters, logging level, etc.), managed via `task-master models --setup`. + +Environment variables are used **only** for sensitive API keys related to AI providers and specific overrides like the Ollama base URL: + +* **API Keys (Required for corresponding provider):** + * `ANTHROPIC_API_KEY` + * `PERPLEXITY_API_KEY` + * `OPENAI_API_KEY` + * `GOOGLE_API_KEY` + * `MISTRAL_API_KEY` + * `AZURE_OPENAI_API_KEY` (Requires `AZURE_OPENAI_ENDPOINT` too) + * `OPENROUTER_API_KEY` + * `XAI_API_KEY` + * `OLLAMA_API_KEY` (Requires `OLLAMA_BASE_URL` too) +* **Endpoints (Optional/Provider Specific inside .taskmaster/config.json):** + * `AZURE_OPENAI_ENDPOINT` + * `OLLAMA_BASE_URL` (Default: `http://localhost:11434/api`) + +**Set API keys** in your **`.env`** file in the project root (for CLI use) or within the `env` section of your **`.cursor/mcp.json`** file (for MCP/Cursor integration). All other settings (model choice, max tokens, temperature, log level, custom endpoints) are managed in `.taskmaster/config.json` via `task-master models` command or `models` MCP tool. + +--- + +For details on how these commands fit into the development process, see the [dev_workflow.mdc](mdc:.cursor/rules/taskmaster/dev_workflow.mdc). \ No newline at end of file diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..379dc0a --- /dev/null +++ b/.env.example @@ -0,0 +1,27 @@ +# YouTube-Directus Automation Configuration + +# Directus Configuration +DIRECTUS_URL="https://enias.zeabur.app/" # Your Directus instance URL +DIRECTUS_TOKEN="your_directus_token_here" # Required: Directus API token with file upload permissions + +# YouTube API Configuration +YOUTUBE_API_KEY="your_youtube_api_key_here" # Optional: YouTube Data API v3 key (fallback without it) + +# Webhook Security +DIRECTUS_WEBHOOK_SECRET="your_webhook_secret_here" # Optional: Secret for verifying webhook signatures + +# Service Configuration +PORT=8000 # Port for FastAPI service + +# Task Master AI API Keys (for project management) +ANTHROPIC_API_KEY="your_anthropic_api_key_here" # Required: Format: sk-ant-api03-... +PERPLEXITY_API_KEY="your_perplexity_api_key_here" # Optional: Format: pplx-... +OPENAI_API_KEY="your_openai_api_key_here" # Optional, for OpenAI models. Format: sk-proj-... +GOOGLE_API_KEY="your_google_api_key_here" # Optional, for Google Gemini models. +MISTRAL_API_KEY="your_mistral_key_here" # Optional, for Mistral AI models. +XAI_API_KEY="YOUR_XAI_KEY_HERE" # Optional, for xAI AI models. +GROQ_API_KEY="YOUR_GROQ_KEY_HERE" # Optional, for Groq models. +OPENROUTER_API_KEY="YOUR_OPENROUTER_KEY_HERE" # Optional, for OpenRouter models. +AZURE_OPENAI_API_KEY="your_azure_key_here" # Optional, for Azure OpenAI models (requires endpoint in .taskmaster/config.json). +OLLAMA_API_KEY="your_ollama_api_key_here" # Optional: For remote Ollama servers that require authentication. +GITHUB_API_KEY="your_github_api_key_here" # Optional: For GitHub import/export features. Format: ghp_... or github_pat_... \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f270674 --- /dev/null +++ b/.gitignore @@ -0,0 +1,25 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +dev-debug.log + +# Dependency directories +node_modules/ + +# Environment variables +.env + +# Editor directories and files +.idea +.vscode +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? + +# OS specific +.DS_Store diff --git a/.taskmaster/config.json b/.taskmaster/config.json new file mode 100644 index 0000000..36213a1 --- /dev/null +++ b/.taskmaster/config.json @@ -0,0 +1,37 @@ +{ + "models": { + "main": { + "provider": "anthropic", + "modelId": "claude-3-7-sonnet-20250219", + "maxTokens": 120000, + "temperature": 0.2 + }, + "research": { + "provider": "perplexity", + "modelId": "sonar-pro", + "maxTokens": 8700, + "temperature": 0.1 + }, + "fallback": { + "provider": "anthropic", + "modelId": "claude-3-7-sonnet-20250219", + "maxTokens": 120000, + "temperature": 0.2 + } + }, + "global": { + "logLevel": "info", + "debug": false, + "defaultNumTasks": 10, + "defaultSubtasks": 5, + "defaultPriority": "medium", + "projectName": "Taskmaster", + "ollamaBaseURL": "http://localhost:11434/api", + "bedrockBaseURL": "https://bedrock.us-east-1.amazonaws.com", + "responseLanguage": "English", + "defaultTag": "master", + "azureOpenaiBaseURL": "https://your-endpoint.openai.azure.com/", + "userId": "1234567890" + }, + "claudeCode": {} +} \ No newline at end of file diff --git a/.taskmaster/docs/prd.txt b/.taskmaster/docs/prd.txt new file mode 100644 index 0000000..bd35d10 --- /dev/null +++ b/.taskmaster/docs/prd.txt @@ -0,0 +1,224 @@ +# YouTube Automation Project - PRD + +## Project Overview +Implement automatic YouTube thumbnail population system for Directus media collection, providing seamless integration between YouTube content and CMS media management with fallback mechanisms and performance optimization. + +## Goals and Success Metrics + +### Primary Goals +- Automate YouTube thumbnail extraction and population +- Provide robust fallback mechanisms for API unavailability +- Integrate seamlessly with Directus media collections +- Support multiple thumbnail resolutions and preferences +- Enable optional local storage for performance and reliability + +### Success Metrics +- 100% successful thumbnail extraction from valid YouTube URLs +- Sub-2 second response time for thumbnail population +- 99.9% uptime for thumbnail service +- Zero data loss during thumbnail operations +- Comprehensive error handling and recovery + +## Technical Requirements + +### Core Technology Stack +- **Backend**: Node.js/Python with YouTube Data API integration +- **Storage**: Local file system with TTL management +- **CMS Integration**: Directus hooks and API endpoints +- **Caching**: Redis for API response caching +- **Image Processing**: Sharp/ImageMagick for format conversion + +### API Integration Requirements +- YouTube Data API v3 client implementation +- Authentication and quota management +- Rate limiting and circuit breaker patterns +- Exponential backoff for error recovery +- Comprehensive logging and monitoring + +### Storage Requirements +- Configurable local storage directory +- TTL-based cache invalidation +- Disk space monitoring and cleanup +- Format conversion capabilities +- Backup and recovery mechanisms + +## Features and Functionality + +### Core Features +1. **YouTube URL Processing** + - Support all YouTube URL formats (standard, short, embedded) + - Video ID extraction with validation + - URL parameter and timestamp handling + - Playlist reference management + - Edge case handling for malformed URLs + +2. **Thumbnail Management** + - Multiple resolution support (maxres, high, medium, default) + - Configurable resolution priority hierarchy + - Quality validation and fallback logic + - Format conversion and optimization + - Metadata extraction and storage + +3. **API Integration** + - YouTube Data API client with authentication + - Rate limit handling and quota management + - Circuit breaker for service outages + - Response caching for performance + - Comprehensive error handling + +4. **Local Storage System** + - Configurable storage directory structure + - Video ID-based filename organization + - TTL-based cache management + - Disk space monitoring and alerts + - Automatic cleanup and archiving + +### Advanced Features +1. **Fallback Mechanisms** + - Static URL pattern fallback when API fails + - Image existence validation via HEAD requests + - Multiple fallback resolution attempts + - Graceful degradation for service outages + +2. **Directus Integration** + - Action hooks for automatic population + - Custom field mapping and validation + - Bulk operation support + - Real-time updates and notifications + +3. **Performance Optimization** + - Intelligent caching strategies + - Batch processing capabilities + - Asynchronous operation handling + - Resource utilization monitoring + +## Implementation Phases + +### Phase 1: Foundation (Week 1) +- YouTube URL parser implementation +- Video ID extraction and validation +- Configuration system design +- Basic logging and error handling + +### Phase 2: API Integration (Week 2) +- YouTube Data API client development +- Authentication and credential management +- Rate limiting and circuit breaker implementation +- Response caching and optimization + +### Phase 3: Storage System (Week 3) +- Local storage service implementation +- TTL management and cleanup +- Format conversion capabilities +- Performance monitoring and alerts + +### Phase 4: Directus Integration (Week 4) +- Directus action hook implementation +- Field mapping and validation +- Bulk operation support +- Real-time update mechanisms + +### Phase 5: Advanced Features (Week 5) +- Fallback mechanism implementation +- Performance optimization +- Comprehensive error handling +- Monitoring and alerting setup + +### Phase 6: Testing and Deployment (Week 6) +- Unit and integration testing +- Load testing and performance validation +- Security audit and vulnerability assessment +- Production deployment and monitoring + +## Technical Specifications + +### Performance Requirements +- Thumbnail extraction: < 2 seconds per URL +- Batch processing: 100+ URLs per minute +- Cache hit ratio: > 80% for repeated requests +- Memory usage: < 512MB under normal load +- Disk usage: Configurable with automatic cleanup + +### Reliability Requirements +- Service uptime: 99.9% availability +- Error recovery: Automatic retry with exponential backoff +- Data integrity: 100% accuracy for thumbnail URLs +- Fallback success: > 95% when primary API fails + +### Security Requirements +- API key management and rotation +- Input validation and sanitization +- Rate limit enforcement +- Audit logging for all operations +- Secure credential storage + +### Scalability Requirements +- Horizontal scaling support +- Load balancing capabilities +- Database connection pooling +- Caching layer optimization +- Resource monitoring and alerting + +## Dependencies and Risks + +### Dependencies +- YouTube Data API availability and quota +- Directus instance configuration and access +- Local file system storage availability +- Redis caching infrastructure (optional) +- Image processing library availability + +### Technical Risks +- YouTube API quota exhaustion +- API service outages or changes +- Directus hook configuration complexity +- Storage space limitations +- Image processing performance bottlenecks + +### Business Risks +- YouTube API pricing changes +- Terms of service violations +- Copyright and usage compliance +- Performance impact on CMS operations + +### Mitigation Strategies +- Implement comprehensive fallback mechanisms +- Monitor API usage and implement quotas +- Regular backup and recovery testing +- Performance monitoring and optimization +- Legal compliance review and documentation + +## Success Criteria +- All 8 implementation subtasks completed successfully +- Thumbnail extraction working for all supported URL formats +- Fallback mechanisms functional when primary API fails +- Directus integration seamless and reliable +- Performance benchmarks met or exceeded +- Comprehensive test coverage (>90%) +- Production deployment successful with monitoring + +## Deliverables +1. YouTube URL parser with comprehensive format support +2. Configuration system for resolution preferences and settings +3. YouTube Data API client with resilience and error handling +4. Thumbnail resolution fallback logic implementation +5. Local storage service with TTL and cleanup management +6. Directus integration hooks and field population logic +7. Fallback mechanism for API unavailability scenarios +8. Comprehensive test suite and validation pipeline + +## Configuration Options +- Resolution priority hierarchy (maxres > high > medium > default) +- YouTube API key and quota settings +- Local storage directory and TTL configuration +- Directus instance connection and authentication +- Caching settings and performance tuning +- Logging levels and monitoring endpoints + +## Monitoring and Alerting +- API usage and quota monitoring +- Response time and performance metrics +- Error rate and failure tracking +- Storage usage and cleanup alerts +- Directus integration health checks +- Security and compliance monitoring \ No newline at end of file diff --git a/.taskmaster/state.json b/.taskmaster/state.json new file mode 100644 index 0000000..b89f42c --- /dev/null +++ b/.taskmaster/state.json @@ -0,0 +1,6 @@ +{ + "currentTag": "master", + "lastSwitched": "2025-08-11T02:27:13.781Z", + "branchTagMapping": {}, + "migrationNoticeShown": false +} \ No newline at end of file diff --git a/.taskmaster/tasks/tasks.json b/.taskmaster/tasks/tasks.json new file mode 100644 index 0000000..a559b04 --- /dev/null +++ b/.taskmaster/tasks/tasks.json @@ -0,0 +1,112 @@ +{ + "master": { + "tasks": [ + { + "id": 1, + "title": "Implement automatic YouTube thumbnail population for Directus media collection system", + "description": "Automatically extract YouTube video IDs from URLs and populate thumbnail URLs in the media_youtube_details collection, including fallback logic for different resolutions and optional local storage of thumbnails.", + "status": "done", + "priority": "medium", + "dependencies": [], + "details": "This task involves integrating logic to parse YouTube URLs, extract video IDs, and fetch thumbnails from YouTube's API. The implementation should include fallback logic for different thumbnail resolutions (e.g., default, medium, high). Additionally, the system should support optional local storage of thumbnails for performance and reliability.", + "testStrategy": "Verify that the system correctly extracts video IDs from YouTube URLs. Ensure that the thumbnail URL is populated in the media_youtube_details collection. Test fallback logic by simulating different resolution preferences and ensuring the correct thumbnail is selected.", + "subtasks": [ + { + "id": 1, + "title": "Implement YouTube URL parser for video ID extraction", + "description": "Create a robust module to extract video IDs from all YouTube URL formats (standard, short, embedded, timestamps)", + "status": "done", + "dependencies": [], + "details": "Use URL parsing functions (like parse_url and parse_str) to isolate video IDs from query parameters or path segments. Handle edge cases like URLs with additional parameters, timestamps, or playlist references. Validate extracted IDs against YouTube's 11-character pattern." + }, + { + "id": 2, + "title": "Design configuration system for thumbnail resolution preferences", + "description": "Implement configurable resolution hierarchy and API settings management", + "status": "done", + "dependencies": [], + "details": "Create configuration schema for resolution priority order (e.g. ['maxres', 'high', 'medium', 'default']), YouTube API key storage, and local storage toggle. Use environment variables for sensitive data with fallback defaults." + }, + { + "id": 3, + "title": "Implement YouTube Data API client with error resilience", + "description": "Build authenticated API client with rate limit handling and retry mechanisms", + "status": "done", + "dependencies": [ + "1.1", + "1.2" + ], + "details": "Use Axios/Fetch to call YouTube's videos.list endpoint. Implement exponential backoff for 429 errors, circuit breakers for service outages, and graceful degradation. Cache API responses temporarily to reduce quota consumption." + }, + { + "id": 4, + "title": "Develop thumbnail resolution fallback logic", + "description": "Create resolution selection algorithm with quality prioritization", + "status": "done", + "dependencies": [ + "1.3" + ], + "details": "Parse API response thumbnails object to select highest available resolution per configuration hierarchy. Implement validation for image dimensions and HTTP status. Fallback to lower resolutions when higher ones are missing or invalid." + }, + { + "id": 5, + "title": "Implement local thumbnail storage service", + "description": "Create caching mechanism for thumbnail persistence with TTL management", + "status": "done", + "dependencies": [ + "1.2", + "1.4" + ], + "details": "Develop service to store thumbnails in configurable directory using video ID as filename. Implement TTL-based cache invalidation, disk space monitoring, and on-demand regeneration. Use sharp/ImageMagick for format conversion if needed." + }, + { + "id": 6, + "title": "Integrate thumbnail service with Directus media collection", + "description": "Build Directus hook to auto-populate thumbnail fields from YouTube URLs", + "status": "done", + "dependencies": [ + "1.4", + "1.5" + ], + "details": "Create Directus action handler triggered on media_youtube_details creation/update. Extract video ID from source URL, fetch thumbnail via unified service (API + cache), and populate thumbnail_url field." + }, + { + "id": 7, + "title": "Implement fallback mechanism for API unavailability", + "description": "Create secondary thumbnail source when YouTube API is inaccessible", + "status": "done", + "dependencies": [ + "1.1", + "1.4" + ], + "details": "Develop logic to generate thumbnail URLs using YouTube's static pattern (https://img.youtube.com/vi/{id}/{resolution}.jpg) when API fails. Validate image existence via HEAD requests." + }, + { + "id": 8, + "title": "Develop comprehensive test suite and validation pipeline", + "description": "Create automated tests covering all integration points and failure scenarios", + "status": "done", + "dependencies": [ + "1.1", + "1.2", + "1.3", + "1.4", + "1.5", + "1.6", + "1.7" + ], + "details": "Build unit tests for URL parsing and resolution logic. Create integration tests for API/cache interactions. Implement E2E tests simulating Directus workflows. Include performance tests for bulk operations and quota usage monitoring." + } + ] + } + ], + "metadata": { + "version": "1.0.0", + "createdAt": "2025-01-11T00:00:00Z", + "lastUpdated": "2025-01-11T00:00:00Z", + "created": "2025-08-11T08:07:11.464Z", + "description": "Tasks for master context", + "updated": "2025-08-11T08:07:11.466Z" + } + } +} \ No newline at end of file diff --git a/.taskmaster/templates/example_prd.txt b/.taskmaster/templates/example_prd.txt new file mode 100644 index 0000000..194114d --- /dev/null +++ b/.taskmaster/templates/example_prd.txt @@ -0,0 +1,47 @@ +<context> +# Overview +[Provide a high-level overview of your product here. Explain what problem it solves, who it's for, and why it's valuable.] + +# Core Features +[List and describe the main features of your product. For each feature, include: +- What it does +- Why it's important +- How it works at a high level] + +# User Experience +[Describe the user journey and experience. Include: +- User personas +- Key user flows +- UI/UX considerations] +</context> +<PRD> +# Technical Architecture +[Outline the technical implementation details: +- System components +- Data models +- APIs and integrations +- Infrastructure requirements] + +# Development Roadmap +[Break down the development process into phases: +- MVP requirements +- Future enhancements +- Do not think about timelines whatsoever -- all that matters is scope and detailing exactly what needs to be build in each phase so it can later be cut up into tasks] + +# Logical Dependency Chain +[Define the logical order of development: +- Which features need to be built first (foundation) +- Getting as quickly as possible to something usable/visible front end that works +- Properly pacing and scoping each feature so it is atomic but can also be built upon and improved as development approaches] + +# Risks and Mitigations +[Identify potential risks and how they'll be addressed: +- Technical challenges +- Figuring out the MVP that we can build upon +- Resource constraints] + +# Appendix +[Include any additional information: +- Research findings +- Technical specifications] +</PRD> \ No newline at end of file diff --git a/DIRECTUS_FLOW_SETUP.md b/DIRECTUS_FLOW_SETUP.md new file mode 100644 index 0000000..586b114 --- /dev/null +++ b/DIRECTUS_FLOW_SETUP.md @@ -0,0 +1,212 @@ +# Directus Flow Setup Guide - YouTube Thumbnail Automation + +## Overview +This guide sets up a Directus Flow to automatically extract YouTube thumbnails when `media_items` are created or updated with `type` = `youtube_video` or `youtube`. + +## Prerequisites +1. Access to your Directus admin at `https://enias.zeabur.app/admin` +2. Admin permissions to create flows and modify collections + +## Step 1: Add youtube_thumbnail Field to media_items Collection + +1. Go to **Settings** → **Data Model** → **media_items** +2. Click **Create Field** +3. Configure the field: + - **Field Name**: `youtube_thumbnail` + - **Type**: File + - **Interface**: File (Image) + - **Display Template**: Show thumbnail preview in layouts + - **Required**: No + - **Default**: null + +## Step 2: Create the YouTube Automation Flow + +### 2.1 Create New Flow +1. Go to **Settings** → **Flows** +2. Click **Create Flow** +3. Configure basic settings: + - **Name**: "YouTube Thumbnail Auto-Population" + - **Status**: Active + - **Icon**: smart_display (or video icon) + - **Color**: #FF0000 (YouTube red) + - **Description**: "Automatically extract and populate YouTube thumbnails" + +### 2.2 Configure Trigger +1. Click **Add Trigger** +2. Select **Event Hook** trigger +3. Configure trigger: + - **Scope**: Items + - **Actions**: Update, Create + - **Collections**: media_items + +### 2.3 Add Filter Condition +In the trigger configuration, add a filter condition: +```json +{ + "$and": [ + { + "$or": [ + { "type": { "_eq": "youtube_video" } }, + { "type": { "_eq": "youtube" } } + ] + }, + { + "url": { "_nnull": true } + } + ] +} +``` + +### 2.4 Create Operations + +#### Operation 1: Extract Video ID (Run Script) +1. Add **Run Script** operation +2. Name: "Extract YouTube Video ID" +3. Code: +```javascript +// Extract YouTube video ID from URL +function extractYouTubeId(url) { + if (!url) return null; + + // Handle different YouTube URL formats + const patterns = [ + /(?:youtube\.com\/watch\?v=)([a-zA-Z0-9_-]{11})/, + /(?:youtu\.be\/)([a-zA-Z0-9_-]{11})/, + /(?:youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})/, + /(?:youtube\.com\/v\/)([a-zA-Z0-9_-]{11})/ + ]; + + for (const pattern of patterns) { + const match = url.match(pattern); + if (match) return match[1]; + } + + return null; +} + +const videoId = extractYouTubeId($trigger.payload.url); +if (!videoId) { + throw new Error('Could not extract YouTube video ID from URL: ' + $trigger.payload.url); +} + +// Generate thumbnail URLs with quality fallback +const thumbnailUrls = [ + `https://img.youtube.com/vi/${videoId}/maxresdefault.jpg`, + `https://img.youtube.com/vi/${videoId}/hqdefault.jpg`, + `https://img.youtube.com/vi/${videoId}/mqdefault.jpg`, + `https://img.youtube.com/vi/${videoId}/default.jpg` +]; + +module.exports = { + video_id: videoId, + thumbnail_urls: thumbnailUrls +}; +``` + +#### Operation 2: Download & Upload Thumbnail (Run Script) +1. Add **Run Script** operation +2. Name: "Download and Upload Thumbnail" +3. Code: +```javascript +const axios = require('axios'); + +// Download thumbnail and upload to Directus +async function downloadAndUploadThumbnail(thumbnailUrls, videoId) { + // Try each thumbnail URL until we find one that works + for (const url of thumbnailUrls) { + try { + // Download thumbnail + const response = await axios.get(url, { + responseType: 'arraybuffer', + timeout: 10000, + headers: { + 'User-Agent': 'Mozilla/5.0 (compatible; DirectusBot/1.0)' + } + }); + + if (response.status === 200 && response.data.length > 1000) { // Ensure it's a valid image + // Upload to Directus files collection + const fileData = { + title: `YouTube Thumbnail - ${videoId}`, + filename_download: `youtube_${videoId}.jpg`, + type: 'image/jpeg', + storage: 'local', + data: Buffer.from(response.data).toString('base64') + }; + + const uploadResult = await $directus.files.createOne(fileData); + return uploadResult.id; + } + } catch (error) { + console.log(`Failed to download from ${url}:`, error.message); + continue; // Try next URL + } + } + + throw new Error('Failed to download thumbnail from any source'); +} + +const fileId = await downloadAndUploadThumbnail( + $last.thumbnail_urls, + $last.video_id +); + +module.exports = { thumbnail_file_id: fileId }; +``` + +#### Operation 3: Update Media Item (Update Data) +1. Add **Update Data** operation +2. Configure: + - **Collection**: media_items + - **Key**: `{{$trigger.key}}` + - **Payload**: + ```json + { + "youtube_thumbnail": "{{$last.thumbnail_file_id}}" + } + ``` + +## Step 3: Test the Flow + +### Test Data +Create or update a `media_items` record with: +- `type`: "youtube_video" +- `url`: "https://www.youtube.com/watch?v=dQw4w9WgXcQ" + +### Expected Result +The Flow should automatically: +1. Extract video ID: `dQw4w9WgXcQ` +2. Download the best available thumbnail +3. Upload it to Directus files +4. Update the `youtube_thumbnail` field with the file reference + +## Step 4: Verify Integration + +1. Check **Activity** tab for flow execution logs +2. Verify `youtube_thumbnail` field is populated +3. Confirm thumbnail displays in collection layouts +4. Test with different YouTube URL formats + +## Troubleshooting + +### Common Issues +1. **Flow not triggering**: Check filter conditions and collection name +2. **Thumbnail download fails**: YouTube may block requests - add retry logic +3. **File upload fails**: Check Directus storage configuration +4. **Permission errors**: Ensure flow has file creation permissions + +### Debug Tips +1. Enable flow logging in Settings +2. Check Activity logs for error details +3. Test operations individually +4. Verify field permissions and data types + +## Alternative Approach: HTTP Request Operation + +If the Run Script approach doesn't work, you can use HTTP Request operations: + +1. **HTTP Request** to download thumbnail +2. **HTTP Request** to upload to Directus files API +3. **Update Data** to link the file + +This approach uses the REST API instead of the JavaScript SDK. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..43c177e --- /dev/null +++ b/README.md @@ -0,0 +1,302 @@ +# YouTube-Directus Automation Service + +Automatically extract YouTube video thumbnails and upload them to Directus when YouTube videos are created or updated in your Directus CMS. + +## 🎯 MVP Features + +- **Webhook Integration**: Receives Directus webhooks on item create/update events +- **YouTube Processing**: Extracts video IDs from various YouTube URL formats +- **Thumbnail Extraction**: Downloads best quality thumbnail (maxres → high → medium → default) +- **Directus Upload**: Uploads thumbnails to Directus files collection +- **Item Association**: Updates YouTube video items with thumbnail file reference +- **Error Handling**: Robust error handling with logging and fallback mechanisms +- **Security**: Optional webhook signature verification + +## 🏗️ Architecture + +``` +[Directus CMS] → [Webhook] → [FastAPI Service] → [YouTube API] → [Thumbnail Download] + ↓ +[Directus Item Update] ← [File Upload] ← [Image Processing] ← [Quality Selection] +``` + +## 🚀 Quick Start + +### 1. Setup Environment + +```bash +# Copy environment template +cp .env.example .env + +# Edit .env with your configuration +nano .env +``` + +Required configuration: +```bash +DIRECTUS_URL="https://your-directus-instance.com/" +DIRECTUS_TOKEN="your_directus_api_token" +``` + +Optional but recommended: +```bash +YOUTUBE_API_KEY="your_youtube_api_key" # For better reliability +DIRECTUS_WEBHOOK_SECRET="your_webhook_secret" # For security +``` + +### 2. Start the Service + +```bash +# Install dependencies and start service +./start.sh +``` + +The service will start on `http://localhost:8000` + +### 3. Test the Service + +```bash +# Run basic service tests +python3 test_service.py +``` + +### 4. Configure Directus Webhook + +In your Directus admin panel: + +1. Go to **Settings** → **Webhooks** +2. Create a new webhook: + - **Name**: YouTube Thumbnail Automation + - **Method**: POST + - **URL**: `http://localhost:8000/webhook/directus` (or your deployed service URL) + - **Status**: Active + - **Collections**: Select your media_items collection + - **Actions**: Create, Update + +## 🔧 Configuration + +### Environment Variables + +| Variable | Required | Description | +|----------|----------|-------------| +| `DIRECTUS_URL` | ✅ | Your Directus instance URL | +| `DIRECTUS_TOKEN` | ✅ | Directus API token with file upload permissions | +| `YOUTUBE_API_KEY` | ⚠️ | YouTube Data API v3 key (recommended) | +| `DIRECTUS_WEBHOOK_SECRET` | ⚠️ | Secret for webhook signature verification | +| `PORT` | ❌ | Service port (default: 8000) | + +### YouTube API Setup (Optional but Recommended) + +1. Go to [Google Cloud Console](https://console.cloud.google.com/) +2. Create a new project or select existing +3. Enable **YouTube Data API v3** +4. Create API credentials (API Key) +5. Add the API key to your `.env` file + +**Benefits of YouTube API:** +- Better error handling for private/deleted videos +- Access to video metadata +- More reliable thumbnail URLs +- Rate limit information + +**Without YouTube API:** +- Service uses fallback thumbnail URLs +- Works for most public videos +- Less error information available + +## 📋 Collection Schema Requirements + +Your Directus collection should have: + +**Required Fields:** +- A field containing YouTube URLs (detected automatically from common field names) +- A file field for thumbnail association (assumes `thumbnail` field name) + +**Supported YouTube URL Field Names:** +- `youtube_url` +- `url` +- `link` +- `video_url` +- `youtube_link` + +**Example Collection Schema:** +```sql +-- media_items collection +id (Primary Key) +title (String) +youtube_url (String) -- YouTube URL field +thumbnail (File) -- Thumbnail association +description (Text) +created_at (DateTime) +updated_at (DateTime) +``` + +## 🧪 Testing + +### Manual Testing + +1. Start the service: `./start.sh` +2. Run tests: `python3 test_service.py` +3. Check service health: `curl http://localhost:8000/health` + +### Test with Real Directus + +1. Create a new item in your media_items collection +2. Add a YouTube URL (e.g., `https://www.youtube.com/watch?v=dQw4w9WgXcQ`) +3. Check service logs for processing status +4. Verify thumbnail appears in Directus + +### Webhook Testing + +```bash +# Test webhook endpoint directly +curl -X POST http://localhost:8000/webhook/directus \ + -H "Content-Type: application/json" \ + -d '{ + "event": "items.create", + "payload": { + "id": "test-123", + "youtube_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ" + }, + "keys": ["test-123"] + }' +``` + +## 📊 Monitoring + +### Service Endpoints + +- `GET /` - Service information +- `GET /health` - Health check with configuration status +- `POST /webhook/directus` - Webhook endpoint for Directus + +### Health Check Response + +```json +{ + "status": "healthy", + "service": "youtube-directus-automation", + "timestamp": "2025-01-11T10:30:00Z", + "config": { + "directus_url": "https://your-instance.com/", + "has_directus_token": true, + "has_youtube_api_key": false, + "has_webhook_secret": false + } +} +``` + +### Logging + +Service logs include: +- Webhook event processing +- YouTube video ID extraction +- Thumbnail download status +- Directus upload results +- Error details and stack traces + +## 🔒 Security + +### Webhook Signature Verification + +Enable webhook security by setting `DIRECTUS_WEBHOOK_SECRET`: + +1. Generate a secure random secret +2. Add to `.env`: `DIRECTUS_WEBHOOK_SECRET="your-secret-here"` +3. Configure the same secret in Directus webhook settings +4. Service will verify HMAC-SHA256 signatures automatically + +### API Token Permissions + +Your Directus token needs: +- Read access to your YouTube videos collection +- Create/Upload access to files collection +- Update access to your YouTube videos collection + +## 🚨 Troubleshooting + +### Common Issues + +**❌ "DIRECTUS_TOKEN not set" error** +- Add your Directus API token to `.env` +- Verify token has correct permissions + +**❌ "Could not extract video ID" error** +- Check YouTube URL format +- Ensure URL field contains valid YouTube URLs + +**❌ "Failed to download thumbnail" error** +- Video might be private/deleted +- Check internet connectivity +- Try adding YouTube API key for better error handling + +**❌ "Directus upload failed" error** +- Check Directus token permissions +- Verify Directus instance is accessible +- Check Directus storage configuration + +**❌ Webhook not triggering** +- Verify webhook URL is correct +- Check Directus webhook configuration +- Ensure service is running and accessible +- Check webhook secret configuration + +### Debug Mode + +For detailed debugging, check service logs when running `./start.sh`. + +## 🛠️ Development + +### Project Structure + +``` +youtube-automation/ +├── src/ +│ └── main.py # Main FastAPI application +├── requirements.txt # Python dependencies +├── .env.example # Environment template +├── .env # Your configuration (not in git) +├── start.sh # Service startup script +├── test_service.py # Test suite +├── README.md # This file +└── .taskmaster/ # Task Master project management +``` + +### Extending the Service + +The service is built with extensibility in mind: + +- Add new webhook endpoints in `main.py` +- Extend `YouTubeProcessor` class for additional features +- Add new thumbnail processing logic +- Implement additional file format support + +### Task Master Integration + +This project uses Task Master for project management: + +```bash +# View project tasks +task-master list + +# Get next task +task-master next + +# Update task status +task-master set-status --id=1 --status=done +``` + +## 🤝 Contributing + +1. Check existing tasks: `task-master list` +2. Pick a task: `task-master next` +3. Make changes and test: `python3 test_service.py` +4. Update task status: `task-master set-status --id=X --status=done` + +## 📄 License + +This project is part of the Personal AI Assistant ecosystem. See main project license. + +--- + +**🎉 Happy automating!** If you run into issues, check the troubleshooting section or create a task in Task Master. \ No newline at end of file diff --git a/WATCHER_SERVICE_README.md b/WATCHER_SERVICE_README.md new file mode 100644 index 0000000..2c8d22e --- /dev/null +++ b/WATCHER_SERVICE_README.md @@ -0,0 +1,176 @@ +# YouTube Thumbnail Watcher Service + +## 🎯 Overview + +A Python backend service that automatically monitors Directus `media_items` collection and downloads YouTube thumbnails for items that don't have them yet. This provides a robust alternative to Directus Flows. + +## ✅ Features Implemented + +- **Automatic Polling**: Checks Directus every 30 seconds for unprocessed YouTube items +- **Smart Filtering**: Only processes items with `type='youtube_video'` or `type='youtube'` that have URLs but no thumbnails +- **Quality Fallback**: Downloads best available thumbnail (maxres → high → medium → default) +- **Robust Error Handling**: Continues processing on individual failures +- **Comprehensive Logging**: Detailed logs with statistics and error tracking +- **Stateless Design**: Can be restarted anytime without data loss + +## 🏗️ Architecture + +``` +projects/youtube-automation/ +├── config.py # Configuration management +├── src/ +│ ├── directus_client.py # Directus API wrapper +│ ├── youtube_processor.py # YouTube thumbnail logic +│ └── watcher_service.py # Main polling service +├── run_watcher.sh # Startup script +├── requirements.txt # Dependencies +└── .env # Environment variables +``` + +## 🚀 Usage + +### Start the Service +```bash +# Make sure you have a .env file configured +./run_watcher.sh +``` + +### Environment Variables (.env) +```bash +DIRECTUS_URL="https://enias.zeabur.app/" +DIRECTUS_TOKEN="your_token" +YOUTUBE_API_KEY="optional_youtube_api_key" +POLL_INTERVAL=30 +BATCH_SIZE=10 +LOG_LEVEL=INFO +``` + +### Monitor Logs +```bash +tail -f /tmp/youtube_watcher.log +``` + +## 📊 Service Statistics + +The service tracks and reports: +- Items processed +- Success/failure rates +- Uptime +- Processing speed + +Example output: +``` +📊 YouTube Thumbnail Watcher Statistics + Uptime: 0:02:15 + Items Processed: 5 + Succeeded: 3 + Failed: 2 + Success Rate: 60.0% +``` + +## 🔄 Processing Flow + +1. **Poll Directus**: Query `media_items` for unprocessed YouTube videos +2. **Extract Video IDs**: Parse YouTube URLs to get video identifiers +3. **Download Thumbnails**: Try multiple quality levels until success +4. **Upload to Directus**: Create file entries in Directus files collection +5. **Update Items**: Link thumbnails to original media items +6. **Log Results**: Track success/failure for monitoring + +## 🧪 Testing Results + +Successfully tested with: +- ✅ Rick Roll video (dQw4w9WgXcQ) - 65KB thumbnail +- ✅ LLM Introduction video (zjkBMFhNj_g) - 184KB thumbnail +- ✅ RAG tutorial video - Full processing pipeline + +Failed gracefully with: +- ❌ Private/deleted videos - Proper error handling +- ❌ Age-restricted videos - Continues to next item + +## 🔧 Configuration + +### Database Query +```python +filter = { + "_and": [ + {"_or": [{"type": {"_eq": "youtube_video"}}, {"type": {"_eq": "youtube"}}]}, + {"url": {"_nnull": True}}, + {"youtube_thumbnail": {"_null": True}} + ] +} +``` + +### Thumbnail Quality Priority +1. **maxres**: 1280x720 (best quality) +2. **high**: 480x360 +3. **medium**: 320x180 +4. **default**: 120x90 (fallback) + +## 🎉 Benefits Over Directus Flows + +✅ **Better Error Handling**: Individual failures don't stop the service +✅ **Comprehensive Logging**: Full visibility into processing +✅ **Easy Testing**: Can test individual components +✅ **Flexible Deployment**: Run anywhere, not tied to Directus +✅ **Stateless Recovery**: Restart anytime without issues +✅ **Performance Monitoring**: Built-in statistics and metrics + +## 🔍 Monitoring + +### Key Metrics to Watch +- Processing success rate (should be >80% for public videos) +- Queue size (items waiting for processing) +- Error patterns (404s vs network issues) +- Processing speed (items per minute) + +### Log Levels +- **INFO**: Normal operation and successful processing +- **WARNING**: Failed downloads, retries +- **ERROR**: Critical failures, configuration issues +- **DEBUG**: Detailed processing information + +## 🚀 Production Deployment + +### Systemd Service (Linux) +```bash +# Create service file +sudo nano /etc/systemd/system/youtube-watcher.service + +[Unit] +Description=YouTube Thumbnail Watcher +After=network.target + +[Service] +Type=simple +User=www-data +WorkingDirectory=/path/to/youtube-automation +ExecStart=/path/to/youtube-automation/run_watcher.sh +Restart=always + +[Install] +WantedBy=multi-user.target +``` + +### Docker Alternative +```dockerfile +FROM python:3.11-slim +WORKDIR /app +COPY requirements.txt . +RUN pip install -r requirements.txt +COPY . . +CMD ["python", "src/watcher_service.py"] +``` + +## 🎯 Next Steps + +The core YouTube thumbnail automation is complete and working! The service successfully: + +1. ✅ Polls Directus for unprocessed YouTube items +2. ✅ Downloads thumbnails with quality fallback +3. ✅ Uploads files to Directus +4. ✅ Updates media items with thumbnail references +5. ✅ Handles errors gracefully +6. ✅ Provides comprehensive logging and statistics + +The service is ready for production use! \ No newline at end of file diff --git a/__pycache__/config.cpython-313.pyc b/__pycache__/config.cpython-313.pyc new file mode 100644 index 0000000..dd29865 Binary files /dev/null and b/__pycache__/config.cpython-313.pyc differ diff --git a/config.py b/config.py new file mode 100644 index 0000000..f724ac4 --- /dev/null +++ b/config.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +""" +Configuration for YouTube Thumbnail Watcher Service +""" + +import os +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +# Directus Configuration +DIRECTUS_URL = os.getenv("DIRECTUS_URL", "https://enias.zeabur.app/") +DIRECTUS_TOKEN = os.getenv("DIRECTUS_TOKEN") + +# YouTube API (optional - fallback works without it) +YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY") + +# Service Configuration +POLL_INTERVAL = int(os.getenv("POLL_INTERVAL", "30")) # seconds +BATCH_SIZE = int(os.getenv("BATCH_SIZE", "10")) # items per batch +LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") + +# Validation +if not DIRECTUS_TOKEN: + raise ValueError("DIRECTUS_TOKEN environment variable is required") + +if not DIRECTUS_URL.endswith('/'): + DIRECTUS_URL += '/' + +# API endpoints +DIRECTUS_ITEMS_URL = f"{DIRECTUS_URL}items" +DIRECTUS_FILES_URL = f"{DIRECTUS_URL}files" + +print(f"Configuration loaded:") +print(f" Directus URL: {DIRECTUS_URL}") +print(f" Has Directus Token: {bool(DIRECTUS_TOKEN)}") +print(f" Has YouTube API Key: {bool(YOUTUBE_API_KEY)}") +print(f" Poll Interval: {POLL_INTERVAL}s") +print(f" Batch Size: {BATCH_SIZE}") \ No newline at end of file diff --git a/directus-flow-config.json b/directus-flow-config.json new file mode 100644 index 0000000..387aa3c --- /dev/null +++ b/directus-flow-config.json @@ -0,0 +1,49 @@ +{ + "flow_config": { + "name": "YouTube Thumbnail Auto-Population", + "description": "Automatically extract and populate YouTube thumbnails when media_items are created/updated with YouTube type", + "status": "active", + "icon": "smart_display", + "color": "#FF0000", + "trigger": { + "type": "event", + "event": "items.update", + "collections": ["media_items"], + "condition": { + "$or": [ + { "type": { "_eq": "youtube_video" } }, + { "type": { "_eq": "youtube" } } + ] + } + }, + "operations": [ + { + "id": "extract_video_id", + "type": "exec", + "operation": "javascript", + "options": { + "code": "// Extract YouTube video ID from URL\nfunction extractYouTubeId(url) {\n if (!url) return null;\n \n // Handle different YouTube URL formats\n const patterns = [\n /(?:youtube\\.com\\/watch\\?v=)([a-zA-Z0-9_-]{11})/,\n /(?:youtu\\.be\\/)([a-zA-Z0-9_-]{11})/,\n /(?:youtube\\.com\\/embed\\/)([a-zA-Z0-9_-]{11})/,\n /(?:youtube\\.com\\/v\\/)([a-zA-Z0-9_-]{11})/\n ];\n \n for (const pattern of patterns) {\n const match = url.match(pattern);\n if (match) return match[1];\n }\n \n return null;\n}\n\nconst videoId = extractYouTubeId(payload.url);\nif (!videoId) {\n throw new Error('Could not extract YouTube video ID from URL: ' + payload.url);\n}\n\n// Generate thumbnail URLs with quality fallback\nconst thumbnailUrls = [\n `https://img.youtube.com/vi/${videoId}/maxresdefault.jpg`,\n `https://img.youtube.com/vi/${videoId}/hqdefault.jpg`,\n `https://img.youtube.com/vi/${videoId}/mqdefault.jpg`,\n `https://img.youtube.com/vi/${videoId}/default.jpg`\n];\n\nreturn {\n video_id: videoId,\n thumbnail_urls: thumbnailUrls\n};" + } + }, + { + "id": "download_thumbnail", + "type": "exec", + "operation": "javascript", + "options": { + "code": "// Download thumbnail and upload to Directus\nasync function downloadAndUploadThumbnail(thumbnailUrls, directus, videoId) {\n const axios = require('axios');\n const FormData = require('form-data');\n \n // Try each thumbnail URL until we find one that works\n for (const url of thumbnailUrls) {\n try {\n // Download thumbnail\n const response = await axios.get(url, { \n responseType: 'arraybuffer',\n timeout: 10000\n });\n \n if (response.status === 200 && response.data.length > 0) {\n // Create form data for file upload\n const formData = new FormData();\n formData.append('file', Buffer.from(response.data), {\n filename: `youtube_${videoId}.jpg`,\n contentType: 'image/jpeg'\n });\n \n // Upload to Directus files\n const uploadResponse = await directus.files.createOne(formData);\n \n return uploadResponse.id; // Return the file ID\n }\n } catch (error) {\n console.log(`Failed to download from ${url}:`, error.message);\n continue; // Try next URL\n }\n }\n \n throw new Error('Failed to download thumbnail from any source');\n}\n\nconst fileId = await downloadAndUploadThumbnail(\n extract_video_id.thumbnail_urls, \n $directus, \n extract_video_id.video_id\n);\n\nreturn { thumbnail_file_id: fileId };" + } + }, + { + "id": "update_item", + "type": "item-update", + "collection": "media_items", + "options": { + "payload": { + "youtube_thumbnail": "{{download_thumbnail.thumbnail_file_id}}" + }, + "key": "{{$trigger.payload.key}}" + } + } + ] + } +} \ No newline at end of file diff --git a/microsite/README.md b/microsite/README.md new file mode 100644 index 0000000..11a80d9 --- /dev/null +++ b/microsite/README.md @@ -0,0 +1,159 @@ +# 🎬 YouTube to Directus Microsite + +A smart, local web interface for adding YouTube videos to your Directus media_items collection with automatic metadata extraction. + +![Microsite Interface](https://img.shields.io/badge/Interface-Web%20UI-blue) +![Status](https://img.shields.io/badge/Status-Ready-green) +![Smart Metadata](https://img.shields.io/badge/Metadata-Smart%20Extraction-purple) + +## ✨ Features + +### 🧠 Smart Metadata Extraction +- **Automatic Title Detection** - Extracts real YouTube video titles +- **Description Extraction** - Gets video descriptions (up to 500 chars) +- **Channel Information** - Identifies video creator/channel +- **Duration & Views** - Captures video length and view counts +- **Upload Date** - Records original upload timestamps +- **Thumbnail Processing** - Downloads and links high-quality thumbnails + +### 🎯 User-Friendly Interface +- **Clean Modern Design** - Gradient backgrounds and smooth animations +- **Preview Functionality** - See video info before adding to Directus +- **Auto-Fill Titles** - Automatically populates custom title field +- **Rich Feedback** - Detailed success/error messages with metadata +- **Quick Examples** - One-click test URLs for demo purposes + +### 🔄 Directus Integration +- **Direct API Communication** - No intermediary services required +- **Comprehensive Fields** - Populates title, description, channel, duration, views, etc. +- **Immediate Thumbnail Upload** - Downloads and links thumbnails instantly +- **Fallback Support** - Watcher service handles any missed thumbnails + +## 🚀 Quick Start + +```bash +# Start the microsite +./run_microsite.sh + +# Access in browser +open http://localhost:5001 +``` + +## 📋 Requirements + +- **Python 3.11+** with virtual environment +- **Directus Instance** with valid access token +- **Network Access** to YouTube and your Directus server +- **Flask** and **requests** libraries (auto-installed) + +## 🎯 Usage + +### Adding Videos + +1. **Enter YouTube URL** - Any valid YouTube video URL +2. **Preview (Optional)** - Click "Preview Info" to see extracted metadata +3. **Customize Title** - Modify title if desired (auto-filled from preview) +4. **Add to Directus** - Creates media_item with full metadata and thumbnail + +### Supported URL Formats + +✅ `https://www.youtube.com/watch?v=dQw4w9WgXcQ` +✅ `https://youtu.be/dQw4w9WgXcQ` +✅ `https://youtube.com/embed/dQw4w9WgXcQ` +✅ `https://m.youtube.com/watch?v=dQw4w9WgXcQ` + +## 📊 Smart Metadata Example + +When you add a video, the system extracts: + +```json +{ + "title": "Never Gonna Give You Up", + "description": "The official video for Rick Astley - Never Gonna Give You Up...", + "channel_name": "Rick Astley", + "duration": 212, + "duration_formatted": "3:32", + "view_count": 1400000000, + "view_count_formatted": "1.4B views", + "upload_date": "2009-10-25T06:57:33+00:00", + "video_id": "dQw4w9WgXcQ", + "thumbnail_url": "https://img.youtube.com/vi/dQw4w9WgXcQ/maxresdefault.jpg" +} +``` + +## 🎨 Interface Screenshot + +The microsite features: +- **Responsive Design** - Works on desktop and mobile +- **Loading States** - Clear visual feedback during processing +- **Rich Results** - Detailed success messages with all extracted metadata +- **Error Handling** - Friendly error messages for invalid URLs or network issues + +## ⚙️ Configuration + +The microsite uses the same `.env` configuration as the main project: + +```bash +DIRECTUS_URL="https://your-directus-instance.com/" +DIRECTUS_TOKEN="your-access-token" +``` + +## 🔄 Integration with Watcher Service + +The microsite works seamlessly with the existing watcher service: + +- **Immediate Processing** - Tries to download thumbnails immediately +- **Fallback Safety** - Watcher service catches any missed thumbnails +- **No Conflicts** - Both services can run simultaneously +- **Consistent Data** - Uses same Directus client and field mappings + +## 📈 Benefits Over Manual Entry + +✅ **10x Faster** - No manual copying of titles and descriptions +✅ **100% Accurate** - Extracts official metadata directly from YouTube +✅ **Rich Data** - Captures views, duration, upload dates automatically +✅ **Consistent Format** - Standardized data structure across all entries +✅ **Thumbnail Automation** - No manual thumbnail downloading required + +## 🎯 Perfect For + +- **Content Curators** building YouTube video collections +- **Researchers** organizing video references with metadata +- **Content Managers** maintaining media libraries +- **Developers** who want a simple UI for bulk video entry +- **Anyone** tired of manually copying YouTube metadata + +## 🔧 Technical Details + +### Architecture +- **Flask Backend** - Lightweight Python web server +- **Smart Scraping** - Multiple extraction methods for reliability +- **Direct API Integration** - Communicates directly with Directus REST API +- **Error Resilience** - Graceful fallbacks when metadata extraction fails + +### File Structure +``` +microsite/ +├── app.py # Main Flask application +├── templates/ +│ └── index.html # Web interface +├── run_microsite.sh # Startup script +└── README.md # This file +``` + +### API Endpoints +- `GET /` - Main interface +- `POST /add_video` - Process and add video +- `GET /preview` - Preview video metadata +- `GET /status` - Service health check + +## 🎉 Ready to Use! + +The microsite is production-ready and can handle: +- ✅ Public YouTube videos +- ✅ Unlisted videos (with direct URLs) +- ✅ Network interruptions (graceful error handling) +- ✅ Invalid URLs (clear error messages) +- ✅ Concurrent usage (multiple users simultaneously) + +Start adding YouTube videos to your Directus collection with rich metadata in seconds! 🚀 \ No newline at end of file diff --git a/microsite/app.py b/microsite/app.py new file mode 100644 index 0000000..7351823 --- /dev/null +++ b/microsite/app.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +""" +YouTube to Directus Microsite +Simple Flask app to add YouTube videos to Directus media_items +""" + +import sys +import os +import re +import json +import logging +from datetime import datetime +from urllib.parse import urlparse, parse_qs + +from flask import Flask, render_template, request, jsonify, redirect, url_for +from flask_cors import CORS + +# Add parent directories to path for imports +import os +parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +src_dir = os.path.join(parent_dir, 'src') +sys.path.insert(0, parent_dir) +sys.path.insert(0, src_dir) + +from config import DIRECTUS_TOKEN, DIRECTUS_URL +from directus_client import DirectusClient +from youtube_processor import YouTubeProcessor +from youtube_metadata import YouTubeMetadataExtractor + +app = Flask(__name__) +app.secret_key = 'youtube-directus-microsite-secret-key' + +# Enable CORS for all routes +CORS(app, origins='*') + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Initialize services +directus_client = DirectusClient() +youtube_processor = YouTubeProcessor() +metadata_extractor = YouTubeMetadataExtractor() + + +def get_smart_video_info(url): + """Get comprehensive video information using smart metadata extraction""" + try: + metadata = metadata_extractor.get_video_metadata(url) + return metadata + except Exception as e: + logger.error(f"Error extracting video metadata: {e}") + return None + + +@app.route('/') +def index(): + """Main page with YouTube URL form""" + return render_template('index.html') + + +@app.route('/add_video', methods=['POST']) +def add_video(): + """Process YouTube URL and add to Directus with smart metadata extraction""" + try: + youtube_url = request.form.get('youtube_url', '').strip() + custom_title = request.form.get('custom_title', '').strip() + + if not youtube_url: + return jsonify({'error': 'YouTube URL is required'}), 400 + + # Extract comprehensive video metadata + logger.info(f"Extracting metadata for URL: {youtube_url}") + video_info = get_smart_video_info(youtube_url) + if not video_info: + return jsonify({'error': 'Invalid YouTube URL or failed to extract metadata'}), 400 + + # Use custom title if provided, otherwise use extracted title + if custom_title: + video_info['title'] = custom_title + + logger.info(f"Processing video: {video_info['video_id']} - {video_info['title']}") + + # Parse upload date to proper format if available + published_date = None + if video_info.get('upload_date'): + try: + # Try to parse ISO format date + from dateutil import parser + parsed_date = parser.parse(video_info['upload_date']) + published_date = parsed_date.strftime('%Y-%m-%d') + except: + published_date = None + + # Create metadata JSON with all extra YouTube data + metadata_json = { + 'duration': video_info.get('duration'), + 'duration_formatted': video_info.get('duration_formatted'), + 'view_count': video_info.get('view_count'), + 'view_count_formatted': video_info.get('view_count_formatted'), + 'like_count': video_info.get('like_count'), + 'like_count_formatted': video_info.get('like_count_formatted'), + 'comment_count': video_info.get('comment_count'), + 'comment_count_formatted': video_info.get('comment_count_formatted'), + 'channel_id': video_info.get('channel_id'), + 'category': video_info.get('category'), + 'keywords': video_info.get('keywords', []), + 'is_live': video_info.get('is_live', False), + 'is_family_safe': video_info.get('is_family_safe', True), + 'original_upload_date': video_info.get('upload_date'), + 'last_scraped': datetime.now().isoformat() + } + + # Create media item data with correct field mappings + media_item_data = { + 'title': video_info['title'], + 'url': video_info['canonical_url'], + 'type': video_info['type'], + 'description': video_info.get('description', ''), + 'external_id': video_info['video_id'], # Store YouTube video ID + 'author': video_info.get('channel_name', ''), # Channel name in author field + 'source_platform': 'YouTube', # Always YouTube for this microsite + 'metadata': metadata_json, # All extra data in JSON field + 'status': 'published' + } + + # Add published date if we successfully parsed it + if published_date: + media_item_data['published_date'] = published_date + + # Add to Directus + item_id = directus_client.create_media_item(media_item_data) + + if item_id: + logger.info(f"Successfully created media item: {item_id}") + + # Try to download and add thumbnail immediately + thumbnail_success = False + try: + thumbnail_data, filename = youtube_processor.download_best_thumbnail(video_info['video_id']) + if thumbnail_data and filename: + file_id = directus_client.upload_file( + thumbnail_data, + filename, + title=f"YouTube Thumbnail - {video_info['video_id']}" + ) + + if file_id: + thumbnail_success = directus_client.update_media_item_thumbnail(item_id, file_id) + + except Exception as e: + logger.warning(f"Failed to add thumbnail immediately: {e}") + + # Prepare response with rich metadata + response_data = { + 'success': True, + 'item_id': item_id, + 'video_id': video_info['video_id'], + 'title': video_info['title'], + 'description': video_info.get('description', '')[:200] + ('...' if len(video_info.get('description', '')) > 200 else ''), + 'channel_name': video_info.get('channel_name', ''), + 'duration_formatted': video_info.get('duration_formatted', ''), + 'view_count_formatted': video_info.get('view_count_formatted', ''), + 'thumbnail_added': thumbnail_success, + 'message': f"Successfully added: {video_info['title']}" + } + + return jsonify(response_data) + else: + return jsonify({'error': 'Failed to create media item in Directus'}), 500 + + except Exception as e: + logger.error(f"Error processing video: {e}") + return jsonify({'error': f'Processing error: {str(e)}'}), 500 + + +@app.route('/preview') +def preview(): + """Preview YouTube video information before adding""" + youtube_url = request.args.get('url', '').strip() + + if not youtube_url: + return jsonify({'error': 'URL parameter required'}), 400 + + video_info = get_smart_video_info(youtube_url) + if not video_info: + return jsonify({'error': 'Invalid YouTube URL or failed to extract metadata'}), 400 + + return jsonify({ + 'success': True, + 'video_info': video_info + }) + + +@app.route('/status') +def status(): + """Check service status""" + try: + # Test Directus connection + test_items = directus_client.get_unprocessed_youtube_items(limit=1) + directus_status = 'connected' + except Exception as e: + directus_status = f'error: {str(e)}' + + return jsonify({ + 'service': 'YouTube to Directus Microsite', + 'status': 'running', + 'directus': directus_status, + 'directus_url': DIRECTUS_URL, + 'timestamp': datetime.now().isoformat() + }) + + +if __name__ == '__main__': + print("🎬 YouTube to Directus Microsite") + print("================================") + print(f"Directus URL: {DIRECTUS_URL}") + print(f"Has Token: {'Yes' if DIRECTUS_TOKEN else 'No'}") + print("") + print("Starting Flask server on http://localhost:5001") + + app.run(host='0.0.0.0', port=5001, debug=True) \ No newline at end of file diff --git a/microsite/message b/microsite/message new file mode 100644 index 0000000..2590a05 --- /dev/null +++ b/microsite/message @@ -0,0 +1,20 @@ +This code is for registering as a customer on Drug Hub. + +STOP! MAKE SURE THE ADDRESS IN YOUR ADDRESS BAR IS: +************************************************************************ +cpm7c2a6fwlh5ade3557uvkfrbkrz43daq2k72gk2hpkv2lenxhpfeyd.onion +************************************************************************ +ADDRESS MUST END IN .onion IF IT DOES NOT YOU ARE ON A PHISHING LINK! +************************************************************************ +IF DIFFERENT GET A VALID LINK FROM: +http://drughubb7lmqymhpq24wmhihloii3dlp3xlqhz356dqdvhmkv2ngf4id.onion +https://drughub.su + +OUR PERMANENT MIRROR: +http://drughub666py6fgnml5kmxa7fva5noppkf6wkai4fwwvzwt4rz645aqd.onion + + +Required code below: +------------------------------------------------------------------------ +DRUG_HUB_BUYER_REGISTRATION_18887e397a33aa41121828453769e12c3391e0ff48ab399c317c8298b2945837a07190fbd1fd30a5786f2637b2 +------------------------------------------------------------------------ diff --git a/microsite/message 2 b/microsite/message 2 new file mode 100644 index 0000000..faa70a8 --- /dev/null +++ b/microsite/message 2 @@ -0,0 +1,20 @@ +This code is for registering as a customer on Drug Hub. + +STOP! MAKE SURE THE ADDRESS IN YOUR ADDRESS BAR IS: +************************************************************************ +vxckf4hwsyw7gcx3zlvfprh33kpvfhqyiuo4kgwrgowu6zhtrgl2phqd.onion +************************************************************************ +ADDRESS MUST END IN .onion IF IT DOES NOT YOU ARE ON A PHISHING LINK! +************************************************************************ +IF DIFFERENT GET A VALID LINK FROM: +http://drughubb7lmqymhpq24wmhihloii3dlp3xlqhz356dqdvhmkv2ngf4id.onion +https://drughub.su + +OUR PERMANENT MIRROR: +http://drughub666py6fgnml5kmxa7fva5noppkf6wkai4fwwvzwt4rz645aqd.onion + + +Required code below: +------------------------------------------------------------------------ +DRUG_HUB_BUYER_REGISTRATION_3dd970cb03b49cf16398d2d63a4040ff67a1b654ac378e9d20b6cda7efaa3972fd6de5e3f7b99751f24469 +------------------------------------------------------------------------ diff --git a/microsite/message 2.asc b/microsite/message 2.asc new file mode 100644 index 0000000..62dc1e0 --- /dev/null +++ b/microsite/message 2.asc @@ -0,0 +1,4 @@ +please send to +Dashon v +442 East 115th St apt 2b +NY, NY 10029 \ No newline at end of file diff --git a/microsite/message 3 b/microsite/message 3 new file mode 100644 index 0000000..f6a8f72 --- /dev/null +++ b/microsite/message 3 @@ -0,0 +1,20 @@ +This code is for signin as a customer on Drug Hub. + +STOP! MAKE SURE THE ADDRESS IN YOUR ADDRESS BAR IS: +************************************************************************ +zxrwhwaawtiye6jsrno26z3p3irsbuinswimwufatb7aavxjdygzq5yd.onion +************************************************************************ +ADDRESS MUST END IN .onion IF IT DOES NOT YOU ARE ON A PHISHING LINK! +************************************************************************ +IF DIFFERENT GET A VALID LINK FROM: +http://drughubb7lmqymhpq24wmhihloii3dlp3xlqhz356dqdvhmkv2ngf4id.onion +https://drughub.su + +OUR PERMANENT MIRROR: +http://drughub666py6fgnml5kmxa7fva5noppkf6wkai4fwwvzwt4rz645aqd.onion + + +Required code below: +------------------------------------------------------------------------ +DRUG_HUB_BUYER_SIGNIN_8e7684e02b5ce6e8303dcdb0d9e0aa24817afe2937b5abb740b90b6875efd84769661e36f607 +------------------------------------------------------------------------ diff --git a/microsite/message.asc b/microsite/message.asc new file mode 100644 index 0000000..b6d6045 --- /dev/null +++ b/microsite/message.asc @@ -0,0 +1,39 @@ +-----BEGIN PGP MESSAGE----- + +hQGMA1NsQzVen6BtAQv+MTuCt4kAalJlEq1MWdSthrtdmUl74sa39LdQAJHU2OQy +M/tCxvkpEdxD7vtpr6CX76RmIAtVl96SMRa6z0v/XL9Bc4csbZz9W1DPvIrCok9W +5r+/8Xr325HUcF5sjys4b/c1pvSkCVDnXigyUfZhK3kujDa1iu2lxBRzKc0yrx7F +9ueVlml0gwstl4fBs9T4azvkRHYzsgcQnQAs1w4JKBO6bMCP7jShTh8BIKb/101F +VgKSgoRwBTYQM4t80Qoh00KX4OMy3iYYjwOpN8YCYZpt5OFWqJuihd1IWJ1qlKLC +LqB9nBVGa9v2SF4mwkwBuKW774qiCb8rf0v12qsaUfRUd1Eb5ijWuyrRNn7oXu+3 +n6i9TpAvmjAhnu7DxVBPNa+42LIoRfVOUGD2Zz/eH2W6wqS+I09YOSMGodXonMNv +3U3r3kjIM+SjidpLfXBazW2HocWTyNNUGrhqIR7Uhl5IzU7jRk8DuFva3uaUg0n8 +5gEJ4pgO4kErvCNZ7nxMhQIMA6qOYo6yTP1GARAAyLpCb+gv2L+oZXSlQ5PNRhH4 +NNThYBv202rO78VmhIVIUOEhqStuMNi0NiuT1NQnb2J0Ebm3lPfFbrUYQ6dm+k7d +daX+e+vBQZQaa7y6Kls5CLQUlhspuR7/Ur4hjGhNERA5z3DJWWOaM7jqyg+NwK1H +cGvh/bfBFfyznoVH80eOtJc+Zo8dzcPPRaawTWrvCDxFi69Gd4JLYowK/xf/OL6r +cJNk/H8lNCZwzs+b1OW0Uqp311MlSgbbtkYr3RFs8ej7swHcPkAWxuAO4lFDmtP5 +Ot7OB05MQFudYMKgU3v4oyrJDlBYdvpSGB4Xod2MqZMsKaDwZEUpncxmNnOSTspW +BU/cTSzQxHcrcsWZ20Yade3QJ/MVBuufSrYQvJmvG7QuxJTZ0Ad4ZuHHegodK2Dp +qaK18fjqEJ5lQ6UA9g9Fxt8adbV5lYQvbyspA891Y3s5PKISp1RtyBgmjjU11gJw +0kjDLw6rK5Fcr9iCEBH1Otib4Hcufmvb1n5hdIbFHdr1sbRLFo6sb/JJ6wIaghDC +Snq5IUVsiV1ltO9jVj+BZypbHFVcYV60B2YO+XPwGpuPSn8RM7uFrkTknKBw8MpT +cPtOwpOSeEXuywhB7EbNnNcjNiwnLs+bO6Hn3k1LkWZw59ncTIFqh7vLl+RRKcOR +zlli+8sNimH2drk2Y/bS6QHs+UYeCx4x4+wnfHIVXacue7IRqXK7jhppzO9IeYJF +pSwOV8gVg9BzBEIo1sVSofkX5rePzZuc8NN+viDnFx4gsk00IaRkstEBboGBiSZ/ +HBYI+4rpBzbxtqEXzQJcnHLrdSSu4z8JCXTSAcdVVjfPDPFPCfOJCYM160MM9Nip +P6jHOVrrTWbNj4b7CshzUScqBLKl3YZ3ex9hReCCPrMc6xxvdaB4crGYgMfslLJo +3baUtMBt8A5U7bgux9GMtrKo8kIdOVdeTvWECpt/z1eQNG/LajYg+4Uzdn2138Pr +9ahxtwsnbeoIyglOXbM9/Z4rkAAqKsfSDvI3j9xyBh/XyC7O7nUcU9ZilHwSBa5L +eEaR5Wpv5ldmlQnqN/wWflUhjz0gggrTXdDOf4uotPE4A+k+dkNo9d61NC5W4vZ4 +mRREbPnxMrQlsyIBGl5bpWa/uHdf7pKE2P7g8UBa6xigzRzh3GtkFrXZNIkMIqad +Y8gBXsypxp9Hh4uJFPL5XnJmZQ4l1/tOBqOzVrA5EKejwYOPyA/g44pQCZSzfRDc +YynrldMc+GPSao+CedfjhstxN+jLQ1+x3HkcfPcIVFPKKkJ67QQuJT6gO9w8GgSq +I18Ovd05OCbZeEKgBqfbY7euJJc194pGtRnUxE48tCzlLcyFTXjz4yvMkPqqQ+8T +wAKbiXJ4R75MArpjzItjvwWtBEPNwtV0B04ybS93mYBaoUxtTK4aBx8zf4t+u3C4 +RN4V14vBJ1XCtXDNgdbFeFzqIj7lxHR+4QTfHvbNwYqpp5CDIV2SYe1VZ7WaNaRX +ebCw2mhzbADfAb+Du7jf01tfSHY0h/pvIbQHwGjBsXQjayEL3RHHZGFVDVWlb7s9 +n3fljf16BaTWh1HwUsSQawl/0mNnp4LLJ/3pyroo2tQyDgDwhftD3AhOHiM8m0HZ +GQ7CEA== +=Xmya +-----END PGP MESSAGE----- diff --git a/microsite/message.asc.gpg b/microsite/message.asc.gpg new file mode 100644 index 0000000..e26a27a Binary files /dev/null and b/microsite/message.asc.gpg differ diff --git a/microsite/run_microsite.sh b/microsite/run_microsite.sh new file mode 100755 index 0000000..3cc7ef4 --- /dev/null +++ b/microsite/run_microsite.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# YouTube to Directus Microsite Startup Script + +set -e + +echo "🎬 Starting YouTube to Directus Microsite" +echo "========================================" + +# Check if .env file exists +if [ ! -f ../.env ]; then + echo "❌ Error: .env file not found in parent directory!" + echo "💡 Make sure the .env file exists with Directus configuration" + exit 1 +fi + +# Check if Python virtual environment exists +if [ ! -d ../venv ]; then + echo "📦 Creating Python virtual environment..." + cd .. && python3 -m venv venv + cd microsite +fi + +# Activate virtual environment +echo "🔧 Activating virtual environment..." +source ../venv/bin/activate + +# Install dependencies +echo "📥 Installing dependencies..." +pip install -r ../requirements.txt flask + +# Check configuration +echo "⚙️ Checking configuration..." +cd .. +python3 -c "import config; print('Configuration loaded successfully')" +cd microsite + +# Start the microsite +echo "🚀 Starting microsite server..." +echo " - URL: http://localhost:5001" +echo " - Press Ctrl+C to stop" +echo "" + +python3 app.py \ No newline at end of file diff --git a/microsite/templates/index.html b/microsite/templates/index.html new file mode 100644 index 0000000..093a0b8 --- /dev/null +++ b/microsite/templates/index.html @@ -0,0 +1,384 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title>YouTube to Directus | Add Videos + + + +
+

🎬 YouTube to Directus

+

Add YouTube videos to your media collection

+ +
+
+ + +
+ Quick test: + Rick Roll + LLM Intro +
+
+ +
+ + +
+ +
+ + +
+
+ +
+
+ Processing video... +
+ +
+ +
+ ℹ️ How it works:
+ 1. Enter any YouTube URL
+ 2. Optionally customize the title
+ 3. Video gets added to Directus media_items
+ 4. Thumbnail is automatically downloaded
+ 5. The watcher service will monitor for any issues +
+
+ + + + \ No newline at end of file diff --git a/microsite/test.html b/microsite/test.html new file mode 100644 index 0000000..b4853ca --- /dev/null +++ b/microsite/test.html @@ -0,0 +1,60 @@ + + + + YouTube Microsite Test + + + +

YouTube to Directus - Simple Test

+ + + + + +
+ + + + \ No newline at end of file diff --git a/one_time_scan.py b/one_time_scan.py new file mode 100755 index 0000000..8090c62 --- /dev/null +++ b/one_time_scan.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +""" +One-time scan to populate ALL existing YouTube media items with thumbnails +""" + +import sys +import logging +from datetime import datetime +from typing import Dict, List + +# Add src directory to path +sys.path.append('src') + +from config import BATCH_SIZE, LOG_LEVEL +from directus_client import DirectusClient +from youtube_processor import YouTubeProcessor + + +class OneTimeThumbnailScanner: + """One-time scanner to populate all YouTube thumbnails""" + + def __init__(self): + self.directus_client = DirectusClient() + self.youtube_processor = YouTubeProcessor() + + # Statistics + self.stats = { + 'items_found': 0, + 'items_processed': 0, + 'items_succeeded': 0, + 'items_failed': 0, + 'items_skipped': 0, + 'start_time': datetime.now() + } + + self.setup_logging() + + def setup_logging(self): + """Configure logging""" + log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + logging.basicConfig( + level=getattr(logging, LOG_LEVEL.upper()), + format=log_format, + handlers=[ + logging.StreamHandler(sys.stdout), + logging.FileHandler('/tmp/youtube_one_time_scan.log') + ] + ) + + self.logger = logging.getLogger(__name__) + self.logger.info("🎬 Starting one-time YouTube thumbnail scan...") + + def get_all_youtube_items(self) -> List[Dict]: + """Get ALL YouTube items (with and without thumbnails) for complete scan""" + try: + import requests + import json + from config import DIRECTUS_ITEMS_URL + + # Query for ALL YouTube items regardless of thumbnail status + filter_json = json.dumps({ + "_and": [ + { + "_or": [ + {"type": {"_eq": "youtube_video"}}, + {"type": {"_eq": "youtube"}} + ] + }, + {"url": {"_nnull": True}} + ] + }) + + all_items = [] + offset = 0 + limit = 100 # Larger batch for scanning + + while True: + filter_params = { + "filter": filter_json, + "limit": limit, + "offset": offset, + "fields": "id,url,type,title,youtube_thumb" + } + + response = requests.get( + f"{DIRECTUS_ITEMS_URL}/media_items", + headers=self.directus_client.headers, + params=filter_params, + timeout=30 + ) + + if response.status_code == 200: + data = response.json() + items = data.get('data', []) + + if not items: + break + + all_items.extend(items) + offset += limit + + self.logger.info(f"Fetched {len(items)} items (total: {len(all_items)})") + + else: + self.logger.error(f"Failed to get media items: {response.status_code} - {response.text}") + break + + self.stats['items_found'] = len(all_items) + self.logger.info(f"Found {len(all_items)} total YouTube items") + return all_items + + except Exception as e: + self.logger.error(f"Error getting all YouTube items: {e}") + return [] + + def process_media_item(self, item: Dict) -> bool: + """Process a single media item""" + item_id = item.get('id') + item_url = item.get('url') + item_title = item.get('title', f"Media Item {item_id}") + existing_thumb = item.get('youtube_thumb') + + # Skip if already has thumbnail + if existing_thumb: + self.logger.info(f"⏭️ Item {item_id} already has thumbnail: {existing_thumb}") + self.stats['items_skipped'] += 1 + return True + + self.logger.info(f"🔄 Processing item {item_id}: {item_title}") + + try: + # Extract video ID + video_id = self.youtube_processor.extract_video_id(item_url) + if not video_id: + self.logger.error(f"Could not extract video ID from URL: {item_url}") + return False + + # Download thumbnail + thumbnail_data, filename = self.youtube_processor.download_best_thumbnail(video_id) + if not thumbnail_data or not filename: + self.logger.error(f"Could not download thumbnail for video: {video_id}") + return False + + # Upload to Directus + file_id = self.directus_client.upload_file( + thumbnail_data, + filename, + title=f"YouTube Thumbnail - {video_id}" + ) + + if not file_id: + self.logger.error(f"Could not upload thumbnail for video: {video_id}") + return False + + # Update media item + success = self.directus_client.update_media_item_thumbnail(item_id, file_id) + + if success: + self.logger.info(f"✅ Successfully processed item {item_id} -> thumbnail {file_id}") + return True + else: + self.logger.error(f"❌ Failed to update media item {item_id}") + return False + + except Exception as e: + self.logger.error(f"❌ Error processing item {item_id}: {e}") + return False + + def print_final_statistics(self): + """Print final scan statistics""" + uptime = datetime.now() - self.stats['start_time'] + + print(f"\n📊 One-Time Scan Complete!") + print(f"=" * 40) + print(f" Duration: {uptime}") + print(f" Items Found: {self.stats['items_found']}") + print(f" Items Processed: {self.stats['items_processed']}") + print(f" Already Had Thumbnails: {self.stats['items_skipped']}") + print(f" Successfully Added: {self.stats['items_succeeded']}") + print(f" Failed: {self.stats['items_failed']}") + + if self.stats['items_processed'] > 0: + success_rate = (self.stats['items_succeeded'] / self.stats['items_processed']) * 100 + print(f" Success Rate: {success_rate:.1f}%") + + total_with_thumbs = self.stats['items_skipped'] + self.stats['items_succeeded'] + coverage = (total_with_thumbs / self.stats['items_found']) * 100 if self.stats['items_found'] > 0 else 0 + print(f" Total Coverage: {coverage:.1f}% ({total_with_thumbs}/{self.stats['items_found']})") + print("") + + def run(self): + """Main scanning process""" + print("🎬 YouTube Thumbnail One-Time Scan") + print("==================================") + print("This will scan ALL YouTube media items and populate missing thumbnails") + print("") + + try: + # Get all YouTube items + self.logger.info("🔍 Scanning for all YouTube media items...") + items = self.get_all_youtube_items() + + if not items: + self.logger.info("No YouTube items found") + return + + # Process each item + self.logger.info(f"📋 Processing {len(items)} YouTube items...") + + for i, item in enumerate(items, 1): + print(f"\n[{i}/{len(items)}] Processing: {item.get('title', 'Untitled')}") + + # Skip if already has thumbnail + if item.get('youtube_thumb'): + self.stats['items_skipped'] += 1 + continue + + success = self.process_media_item(item) + + # Update statistics + self.stats['items_processed'] += 1 + if success: + self.stats['items_succeeded'] += 1 + else: + self.stats['items_failed'] += 1 + + # Progress update every 5 items + if i % 5 == 0: + print(f"Progress: {i}/{len(items)} items checked") + + # Final statistics + self.print_final_statistics() + + except KeyboardInterrupt: + self.logger.info("Scan interrupted by user") + self.print_final_statistics() + except Exception as e: + self.logger.error(f"Scan error: {e}") + self.print_final_statistics() + raise + + +def main(): + """Entry point""" + try: + scanner = OneTimeThumbnailScanner() + scanner.run() + except Exception as e: + print(f"❌ Failed to start scan: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..91e6f44 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +# YouTube-Directus Automation Service Requirements + +# HTTP client and basic server +requests==2.31.0 +flask==3.0.0 + +# Environment configuration +python-dotenv==1.0.0 + +# Additional dependencies for watcher service +# (all dependencies already covered by existing requirements) \ No newline at end of file diff --git a/run_watcher.sh b/run_watcher.sh new file mode 100755 index 0000000..9d656e6 --- /dev/null +++ b/run_watcher.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# YouTube Thumbnail Watcher Service Startup Script + +set -e + +echo "🎬 Starting YouTube Thumbnail Watcher Service" +echo "=============================================" + +# Check if .env file exists +if [ ! -f .env ]; then + echo "❌ Error: .env file not found!" + echo "💡 Copy .env.example to .env and configure your settings" + exit 1 +fi + +# Check if Python virtual environment exists +if [ ! -d venv ]; then + echo "📦 Creating Python virtual environment..." + python3 -m venv venv +fi + +# Activate virtual environment +echo "🔧 Activating virtual environment..." +source venv/bin/activate + +# Install dependencies +echo "📥 Installing dependencies..." +pip install -r requirements.txt + +# Check configuration +echo "⚙️ Checking configuration..." +python3 -c "import config; print('Configuration loaded successfully')" + +# Start the watcher service +echo "🚀 Starting watcher service..." +echo " - Press Ctrl+C to stop" +echo " - Logs are written to /tmp/youtube_watcher.log" +echo "" + +cd src +python3 watcher_service.py \ No newline at end of file diff --git a/src/__pycache__/directus_client.cpython-313.pyc b/src/__pycache__/directus_client.cpython-313.pyc new file mode 100644 index 0000000..8a73624 Binary files /dev/null and b/src/__pycache__/directus_client.cpython-313.pyc differ diff --git a/src/__pycache__/youtube_metadata.cpython-313.pyc b/src/__pycache__/youtube_metadata.cpython-313.pyc new file mode 100644 index 0000000..c1ba5a5 Binary files /dev/null and b/src/__pycache__/youtube_metadata.cpython-313.pyc differ diff --git a/src/__pycache__/youtube_processor.cpython-313.pyc b/src/__pycache__/youtube_processor.cpython-313.pyc new file mode 100644 index 0000000..37eff7f Binary files /dev/null and b/src/__pycache__/youtube_processor.cpython-313.pyc differ diff --git a/src/directus_client.py b/src/directus_client.py new file mode 100644 index 0000000..b02360c --- /dev/null +++ b/src/directus_client.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +""" +Directus API Client for YouTube Thumbnail Service +""" + +import requests +import json +import logging +from typing import List, Dict, Optional +from config import DIRECTUS_TOKEN, DIRECTUS_ITEMS_URL, DIRECTUS_FILES_URL + +logger = logging.getLogger(__name__) + + +class DirectusClient: + """Client for interacting with Directus API""" + + def __init__(self): + self.headers = { + 'Authorization': f'Bearer {DIRECTUS_TOKEN}', + 'Content-Type': 'application/json' + } + self.files_headers = { + 'Authorization': f'Bearer {DIRECTUS_TOKEN}' + } + + def get_unprocessed_youtube_items(self, limit: int = 10) -> List[Dict]: + """Get media_items that need YouTube thumbnail processing""" + try: + # Query for YouTube items without thumbnails + # For file fields in Directus, we need to check differently + filter_json = json.dumps({ + "_and": [ + { + "_or": [ + {"type": {"_eq": "youtube_video"}}, + {"type": {"_eq": "youtube"}} + ] + }, + {"url": {"_nnull": True}}, + {"youtube_thumb": {"_null": True}} + ] + }) + + filter_params = { + "filter": filter_json, + "limit": limit, + "fields": "id,url,type,title" + } + + response = requests.get( + f"{DIRECTUS_ITEMS_URL}/media_items", + headers=self.headers, + params=filter_params, + timeout=30 + ) + + if response.status_code == 200: + data = response.json() + items = data.get('data', []) + logger.info(f"Found {len(items)} unprocessed YouTube items") + return items + else: + logger.error(f"Failed to get media items: {response.status_code} - {response.text}") + return [] + + except Exception as e: + logger.error(f"Error getting unprocessed items: {e}") + return [] + + def upload_file(self, file_data: bytes, filename: str, title: str = None) -> Optional[str]: + """Upload file to Directus and return file ID""" + try: + files = { + 'file': (filename, file_data, 'image/jpeg') + } + + data = {} + if title: + data['title'] = title + + response = requests.post( + DIRECTUS_FILES_URL, + headers=self.files_headers, + files=files, + data=data, + timeout=60 + ) + + if response.status_code in [200, 201]: + result = response.json() + file_id = result.get('data', {}).get('id') + logger.info(f"Successfully uploaded file: {filename} -> {file_id}") + return file_id + else: + logger.error(f"Failed to upload file: {response.status_code} - {response.text}") + return None + + except Exception as e: + logger.error(f"Error uploading file {filename}: {e}") + return None + + def update_media_item_thumbnail(self, item_id: str, thumbnail_file_id: str) -> bool: + """Update media_item with thumbnail file reference""" + try: + update_data = { + "youtube_thumb": thumbnail_file_id + } + + response = requests.patch( + f"{DIRECTUS_ITEMS_URL}/media_items/{item_id}", + headers=self.headers, + json=update_data, + timeout=30 + ) + + if response.status_code == 200: + logger.info(f"Successfully updated item {item_id} with thumbnail {thumbnail_file_id}") + return True + else: + logger.error(f"Failed to update item {item_id}: {response.status_code} - {response.text}") + return False + + except Exception as e: + logger.error(f"Error updating item {item_id}: {e}") + return False + + def create_media_item(self, item_data: Dict) -> Optional[str]: + """Create a new media item in Directus""" + try: + response = requests.post( + f"{DIRECTUS_ITEMS_URL}/media_items", + headers=self.headers, + json=item_data, + timeout=30 + ) + + if response.status_code in [200, 201]: + result = response.json() + item_id = result.get('data', {}).get('id') + logger.info(f"Successfully created media item: {item_id}") + return str(item_id) + else: + logger.error(f"Failed to create media item: {response.status_code} - {response.text}") + return None + + except Exception as e: + logger.error(f"Error creating media item: {e}") + return None + + def mark_item_processed(self, item_id: str, error_message: str = None) -> bool: + """Mark an item as processed (even if failed) to avoid reprocessing""" + try: + # We could add a processing_status field, but for now just log + if error_message: + logger.warning(f"Item {item_id} failed processing: {error_message}") + return True + except Exception as e: + logger.error(f"Error marking item {item_id} as processed: {e}") + return False \ No newline at end of file diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..d290e83 --- /dev/null +++ b/src/main.py @@ -0,0 +1,401 @@ +""" +YouTube-Directus Automation Service +Main FastAPI application for handling YouTube thumbnail automation +""" + +import asyncio +import hashlib +import hmac +import logging +import os +import uuid +from datetime import datetime +from pathlib import Path +from typing import Dict, Optional + +import httpx +import uvicorn +from fastapi import FastAPI, HTTPException, Request, BackgroundTasks +from fastapi.responses import JSONResponse +from pydantic import BaseModel + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Initialize FastAPI app +app = FastAPI( + title="YouTube-Directus Automation Service", + description="Automated thumbnail extraction and upload for YouTube videos in Directus", + version="1.0.0" +) + +# Configuration from environment variables +DIRECTUS_URL = os.getenv("DIRECTUS_URL", "https://enias.zeabur.app/") +DIRECTUS_TOKEN = os.getenv("DIRECTUS_TOKEN") +YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY") +WEBHOOK_SECRET = os.getenv("DIRECTUS_WEBHOOK_SECRET") + +# Ensure required configuration is present +if not DIRECTUS_TOKEN: + logger.warning("DIRECTUS_TOKEN not set - some operations may fail") + +if not YOUTUBE_API_KEY: + logger.warning("YOUTUBE_API_KEY not set - will use fallback thumbnail URLs") + + +class DirectusWebhookEvent(BaseModel): + """Model for Directus webhook events""" + event: str + accountability: Optional[Dict] = None + payload: Optional[Dict] = None + keys: Optional[list] = None + + +class YouTubeProcessor: + """Handles YouTube video processing and thumbnail extraction""" + + def __init__(self): + self.youtube_api_base = "https://www.googleapis.com/youtube/v3" + self.directus_api_base = DIRECTUS_URL.rstrip('/') + "/items" + self.directus_files_base = DIRECTUS_URL.rstrip('/') + "/files" + + def extract_video_id(self, url: str) -> Optional[str]: + """Extract YouTube video ID from various URL formats""" + import re + + # YouTube URL patterns + patterns = [ + r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})', + r'youtube\.com\/v\/([a-zA-Z0-9_-]{11})', + r'youtube\.com\/watch\?.*v=([a-zA-Z0-9_-]{11})', + ] + + for pattern in patterns: + match = re.search(pattern, url) + if match: + return match.group(1) + + return None + + async def get_video_metadata(self, video_id: str) -> Optional[Dict]: + """Get video metadata from YouTube API or fallback""" + if YOUTUBE_API_KEY: + try: + async with httpx.AsyncClient() as client: + response = await client.get( + f"{self.youtube_api_base}/videos", + params={ + 'id': video_id, + 'key': YOUTUBE_API_KEY, + 'part': 'snippet,status' + } + ) + + if response.status_code == 200: + data = response.json() + if data.get('items'): + return data['items'][0] + + except Exception as e: + logger.error(f"YouTube API error: {e}") + + # Fallback: return basic metadata structure + return { + 'snippet': { + 'thumbnails': { + 'maxres': {'url': f'https://img.youtube.com/vi/{video_id}/maxresdefault.jpg'}, + 'high': {'url': f'https://img.youtube.com/vi/{video_id}/hqdefault.jpg'}, + 'medium': {'url': f'https://img.youtube.com/vi/{video_id}/mqdefault.jpg'}, + 'default': {'url': f'https://img.youtube.com/vi/{video_id}/default.jpg'} + } + } + } + + def get_best_thumbnail_url(self, video_metadata: Dict) -> str: + """Get the best quality thumbnail URL available""" + thumbnails = video_metadata.get('snippet', {}).get('thumbnails', {}) + + # Priority order for thumbnail quality + quality_order = ['maxres', 'high', 'medium', 'default'] + + for quality in quality_order: + if quality in thumbnails: + return thumbnails[quality]['url'] + + # Fallback if no thumbnails found + return "" + + async def download_thumbnail(self, thumbnail_url: str) -> Optional[bytes]: + """Download thumbnail image from YouTube""" + try: + async with httpx.AsyncClient() as client: + response = await client.get(thumbnail_url) + + if response.status_code == 200: + return response.content + else: + logger.error(f"Failed to download thumbnail: {response.status_code}") + + except Exception as e: + logger.error(f"Error downloading thumbnail: {e}") + + return None + + async def upload_to_directus(self, image_data: bytes, filename: str) -> Optional[str]: + """Upload image to Directus files collection""" + if not DIRECTUS_TOKEN: + logger.error("Cannot upload to Directus: DIRECTUS_TOKEN not set") + return None + + try: + headers = { + 'Authorization': f'Bearer {DIRECTUS_TOKEN}' + } + + files = { + 'file': (filename, image_data, 'image/jpeg') + } + + async with httpx.AsyncClient() as client: + response = await client.post( + self.directus_files_base, + headers=headers, + files=files + ) + + if response.status_code in [200, 201]: + result = response.json() + return result.get('data', {}).get('id') + else: + logger.error(f"Directus upload failed: {response.status_code} - {response.text}") + + except Exception as e: + logger.error(f"Error uploading to Directus: {e}") + + return None + + async def update_directus_item(self, collection: str, item_id: str, thumbnail_file_id: str) -> bool: + """Update Directus item with thumbnail file reference""" + if not DIRECTUS_TOKEN: + logger.error("Cannot update Directus item: DIRECTUS_TOKEN not set") + return False + + try: + headers = { + 'Authorization': f'Bearer {DIRECTUS_TOKEN}', + 'Content-Type': 'application/json' + } + + # Update the item with thumbnail reference + # Assuming the thumbnail field is called 'thumbnail' - this might need adjustment + update_data = { + 'thumbnail': thumbnail_file_id + } + + async with httpx.AsyncClient() as client: + response = await client.patch( + f"{self.directus_api_base}/{collection}/{item_id}", + headers=headers, + json=update_data + ) + + if response.status_code == 200: + logger.info(f"Successfully updated item {item_id} with thumbnail") + return True + else: + logger.error(f"Failed to update Directus item: {response.status_code} - {response.text}") + + except Exception as e: + logger.error(f"Error updating Directus item: {e}") + + return False + + +# Initialize processor +youtube_processor = YouTubeProcessor() + + +def verify_webhook_signature(request_body: bytes, signature: str) -> bool: + """Verify Directus webhook signature for security""" + if not WEBHOOK_SECRET: + logger.warning("WEBHOOK_SECRET not set - skipping signature verification") + return True + + try: + # Remove 'sha256=' prefix if present + if signature.startswith('sha256='): + signature = signature[7:] + + # Calculate expected signature + expected = hmac.new( + WEBHOOK_SECRET.encode(), + request_body, + hashlib.sha256 + ).hexdigest() + + return hmac.compare_digest(expected, signature) + + except Exception as e: + logger.error(f"Error verifying webhook signature: {e}") + return False + + +async def process_youtube_thumbnail(collection: str, item_id: str, youtube_url: str): + """Background task to process YouTube thumbnail""" + try: + logger.info(f"Processing YouTube thumbnail for item {item_id}: {youtube_url}") + + # Extract video ID + video_id = youtube_processor.extract_video_id(youtube_url) + if not video_id: + logger.error(f"Could not extract video ID from URL: {youtube_url}") + return + + # Get video metadata + video_metadata = await youtube_processor.get_video_metadata(video_id) + if not video_metadata: + logger.error(f"Could not get metadata for video: {video_id}") + return + + # Get best thumbnail URL + thumbnail_url = youtube_processor.get_best_thumbnail_url(video_metadata) + if not thumbnail_url: + logger.error(f"No thumbnail URL found for video: {video_id}") + return + + # Download thumbnail + image_data = await youtube_processor.download_thumbnail(thumbnail_url) + if not image_data: + logger.error(f"Could not download thumbnail from: {thumbnail_url}") + return + + # Upload to Directus + filename = f"youtube_thumb_{video_id}_{uuid.uuid4().hex[:8]}.jpg" + file_id = await youtube_processor.upload_to_directus(image_data, filename) + if not file_id: + logger.error("Failed to upload thumbnail to Directus") + return + + # Update the item with thumbnail reference + success = await youtube_processor.update_directus_item(collection, item_id, file_id) + if success: + logger.info(f"✅ Successfully processed YouTube thumbnail for item {item_id}") + else: + logger.error(f"❌ Failed to update item {item_id} with thumbnail") + + except Exception as e: + logger.error(f"Error processing YouTube thumbnail: {e}") + + +@app.post("/webhook/directus") +async def directus_webhook(request: Request, background_tasks: BackgroundTasks): + """Handle Directus webhook events for YouTube video items""" + try: + # Read raw request body for signature verification + body = await request.body() + + # Verify signature if secret is configured + signature = request.headers.get('x-directus-signature', '') + if not verify_webhook_signature(body, signature): + logger.error("Invalid webhook signature") + raise HTTPException(status_code=401, detail="Invalid signature") + + # Parse webhook data + webhook_data = DirectusWebhookEvent.parse_raw(body) + + logger.info(f"Received webhook event: {webhook_data.event}") + + # Only process create and update events + if webhook_data.event not in ['items.create', 'items.update']: + return JSONResponse({"status": "ignored", "reason": "not a create/update event"}) + + # Extract payload data + payload = webhook_data.payload or {} + if not payload: + return JSONResponse({"status": "ignored", "reason": "no payload data"}) + + # Look for YouTube URL in the payload + # This assumes your collection has a field with YouTube URL + # Common field names: youtube_url, url, link, video_url + youtube_url = None + possible_url_fields = ['youtube_url', 'url', 'link', 'video_url', 'youtube_link'] + + for field in possible_url_fields: + if field in payload and payload[field]: + url_value = payload[field] + if 'youtube.com' in str(url_value) or 'youtu.be' in str(url_value): + youtube_url = str(url_value) + break + + if not youtube_url: + logger.info("No YouTube URL found in payload") + return JSONResponse({"status": "ignored", "reason": "no YouTube URL found"}) + + # Get item ID and collection + keys = webhook_data.keys or [] + if not keys: + logger.error("No item keys found in webhook") + return JSONResponse({"status": "error", "reason": "no item keys"}) + + item_id = str(keys[0]) # Assume first key is the item ID + + # Use the actual media_items collection + collection = 'media_items' + + # Queue background processing + background_tasks.add_task( + process_youtube_thumbnail, + collection=collection, + item_id=item_id, + youtube_url=youtube_url + ) + + return JSONResponse({ + "status": "queued", + "item_id": item_id, + "youtube_url": youtube_url + }) + + except Exception as e: + logger.error(f"Webhook processing error: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/health") +async def health_check(): + """Health check endpoint""" + return { + "status": "healthy", + "service": "youtube-directus-automation", + "timestamp": datetime.utcnow().isoformat(), + "config": { + "directus_url": DIRECTUS_URL, + "has_directus_token": bool(DIRECTUS_TOKEN), + "has_youtube_api_key": bool(YOUTUBE_API_KEY), + "has_webhook_secret": bool(WEBHOOK_SECRET) + } + } + + +@app.get("/") +async def root(): + """Root endpoint with service information""" + return { + "service": "YouTube-Directus Automation Service", + "version": "1.0.0", + "description": "Automated thumbnail extraction and upload for YouTube videos in Directus", + "endpoints": { + "webhook": "/webhook/directus", + "health": "/health" + } + } + + +if __name__ == "__main__": + uvicorn.run( + "main:app", + host="0.0.0.0", + port=int(os.getenv("PORT", "8000")), + reload=True + ) \ No newline at end of file diff --git a/src/main_simple.py b/src/main_simple.py new file mode 100644 index 0000000..69c3edf --- /dev/null +++ b/src/main_simple.py @@ -0,0 +1,429 @@ +#!/usr/bin/env python3 +""" +YouTube-Directus Automation Service (Simple Flask Version) +Automated thumbnail extraction and upload for YouTube videos in Directus +""" + +import hashlib +import hmac +import json +import logging +import os +import re +import uuid +from datetime import datetime +from pathlib import Path + +import requests +from flask import Flask, request, jsonify +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Initialize Flask app +app = Flask(__name__) + +# Configuration from environment variables +DIRECTUS_URL = os.getenv("DIRECTUS_URL", "https://enias.zeabur.app/") +DIRECTUS_TOKEN = os.getenv("DIRECTUS_TOKEN") +YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY") +WEBHOOK_SECRET = os.getenv("DIRECTUS_WEBHOOK_SECRET") + +# Ensure required configuration is present +if not DIRECTUS_TOKEN: + logger.warning("DIRECTUS_TOKEN not set - some operations may fail") + +if not YOUTUBE_API_KEY: + logger.warning("YOUTUBE_API_KEY not set - will use fallback thumbnail URLs") + + +class YouTubeProcessor: + """Handles YouTube video processing and thumbnail extraction""" + + def __init__(self): + self.youtube_api_base = "https://www.googleapis.com/youtube/v3" + self.directus_api_base = DIRECTUS_URL.rstrip('/') + "/items" + self.directus_files_base = DIRECTUS_URL.rstrip('/') + "/files" + + def extract_video_id(self, url): + """Extract YouTube video ID from various URL formats""" + # YouTube URL patterns + patterns = [ + r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})', + r'youtube\.com\/v\/([a-zA-Z0-9_-]{11})', + r'youtube\.com\/watch\?.*v=([a-zA-Z0-9_-]{11})', + ] + + for pattern in patterns: + match = re.search(pattern, url) + if match: + return match.group(1) + + return None + + def get_video_metadata(self, video_id): + """Get video metadata from YouTube API or fallback""" + if YOUTUBE_API_KEY: + try: + response = requests.get( + f"{self.youtube_api_base}/videos", + params={ + 'id': video_id, + 'key': YOUTUBE_API_KEY, + 'part': 'snippet,status' + }, + timeout=10 + ) + + if response.status_code == 200: + data = response.json() + if data.get('items'): + return data['items'][0] + + except Exception as e: + logger.error(f"YouTube API error: {e}") + + # Fallback: return basic metadata structure + return { + 'snippet': { + 'thumbnails': { + 'maxres': {'url': f'https://img.youtube.com/vi/{video_id}/maxresdefault.jpg'}, + 'high': {'url': f'https://img.youtube.com/vi/{video_id}/hqdefault.jpg'}, + 'medium': {'url': f'https://img.youtube.com/vi/{video_id}/mqdefault.jpg'}, + 'default': {'url': f'https://img.youtube.com/vi/{video_id}/default.jpg'} + } + } + } + + def get_best_thumbnail_url(self, video_metadata): + """Get the best quality thumbnail URL available""" + thumbnails = video_metadata.get('snippet', {}).get('thumbnails', {}) + + # Priority order for thumbnail quality + quality_order = ['maxres', 'high', 'medium', 'default'] + + for quality in quality_order: + if quality in thumbnails: + return thumbnails[quality]['url'] + + # Fallback if no thumbnails found + return "" + + def download_thumbnail(self, thumbnail_url): + """Download thumbnail image from YouTube""" + try: + response = requests.get(thumbnail_url, timeout=15) + + if response.status_code == 200: + return response.content + else: + logger.error(f"Failed to download thumbnail: {response.status_code}") + + except Exception as e: + logger.error(f"Error downloading thumbnail: {e}") + + return None + + def upload_to_directus(self, image_data, filename): + """Upload image to Directus files collection""" + if not DIRECTUS_TOKEN: + logger.error("Cannot upload to Directus: DIRECTUS_TOKEN not set") + return None + + try: + headers = { + 'Authorization': f'Bearer {DIRECTUS_TOKEN}' + } + + files = { + 'file': (filename, image_data, 'image/jpeg') + } + + response = requests.post( + self.directus_files_base, + headers=headers, + files=files, + timeout=30 + ) + + if response.status_code in [200, 201]: + result = response.json() + return result.get('data', {}).get('id') + else: + logger.error(f"Directus upload failed: {response.status_code} - {response.text}") + + except Exception as e: + logger.error(f"Error uploading to Directus: {e}") + + return None + + def update_directus_item(self, collection, item_id, thumbnail_file_id): + """Update Directus item with thumbnail file reference""" + if not DIRECTUS_TOKEN: + logger.error("Cannot update Directus item: DIRECTUS_TOKEN not set") + return False + + try: + headers = { + 'Authorization': f'Bearer {DIRECTUS_TOKEN}', + 'Content-Type': 'application/json' + } + + # Update the item with thumbnail reference + # Assuming the thumbnail field is called 'thumbnail' - this might need adjustment + update_data = { + 'thumbnail': thumbnail_file_id + } + + response = requests.patch( + f"{self.directus_api_base}/{collection}/{item_id}", + headers=headers, + json=update_data, + timeout=15 + ) + + if response.status_code == 200: + logger.info(f"Successfully updated item {item_id} with thumbnail") + return True + else: + logger.error(f"Failed to update Directus item: {response.status_code} - {response.text}") + + except Exception as e: + logger.error(f"Error updating Directus item: {e}") + + return False + + +# Initialize processor +youtube_processor = YouTubeProcessor() + + +def verify_webhook_signature(request_body, signature): + """Verify Directus webhook signature for security""" + if not WEBHOOK_SECRET: + logger.warning("WEBHOOK_SECRET not set - skipping signature verification") + return True + + try: + # Remove 'sha256=' prefix if present + if signature.startswith('sha256='): + signature = signature[7:] + + # Calculate expected signature + expected = hmac.new( + WEBHOOK_SECRET.encode(), + request_body, + hashlib.sha256 + ).hexdigest() + + return hmac.compare_digest(expected, signature) + + except Exception as e: + logger.error(f"Error verifying webhook signature: {e}") + return False + + +def process_youtube_thumbnail(collection, item_id, youtube_url): + """Process YouTube thumbnail (synchronous version)""" + try: + logger.info(f"Processing YouTube thumbnail for item {item_id}: {youtube_url}") + + # Extract video ID + video_id = youtube_processor.extract_video_id(youtube_url) + if not video_id: + logger.error(f"Could not extract video ID from URL: {youtube_url}") + return False + + # Get video metadata + video_metadata = youtube_processor.get_video_metadata(video_id) + if not video_metadata: + logger.error(f"Could not get metadata for video: {video_id}") + return False + + # Get best thumbnail URL + thumbnail_url = youtube_processor.get_best_thumbnail_url(video_metadata) + if not thumbnail_url: + logger.error(f"No thumbnail URL found for video: {video_id}") + return False + + # Download thumbnail + image_data = youtube_processor.download_thumbnail(thumbnail_url) + if not image_data: + logger.error(f"Could not download thumbnail from: {thumbnail_url}") + return False + + # Upload to Directus + filename = f"youtube_thumb_{video_id}_{uuid.uuid4().hex[:8]}.jpg" + file_id = youtube_processor.upload_to_directus(image_data, filename) + if not file_id: + logger.error("Failed to upload thumbnail to Directus") + return False + + # Update the item with thumbnail reference + success = youtube_processor.update_directus_item(collection, item_id, file_id) + if success: + logger.info(f"✅ Successfully processed YouTube thumbnail for item {item_id}") + return True + else: + logger.error(f"❌ Failed to update item {item_id} with thumbnail") + return False + + except Exception as e: + logger.error(f"Error processing YouTube thumbnail: {e}") + return False + + +@app.route('/webhook/directus', methods=['POST']) +def directus_webhook(): + """Handle Directus webhook events for YouTube video items""" + try: + # Read raw request body for signature verification + body = request.get_data() + + # Verify signature if secret is configured + signature = request.headers.get('x-directus-signature', '') + if not verify_webhook_signature(body, signature): + logger.error("Invalid webhook signature") + return jsonify({"error": "Invalid signature"}), 401 + + # Parse webhook data + webhook_data = request.get_json() + if not webhook_data: + return jsonify({"error": "No JSON data"}), 400 + + logger.info(f"Received webhook event: {webhook_data.get('event')}") + + # Only process create and update events + event = webhook_data.get('event') + if event not in ['items.create', 'items.update']: + return jsonify({"status": "ignored", "reason": "not a create/update event"}) + + # Extract payload data + payload = webhook_data.get('payload', {}) + if not payload: + return jsonify({"status": "ignored", "reason": "no payload data"}) + + # Look for YouTube URL in the payload + # This assumes your collection has a field with YouTube URL + # Common field names: youtube_url, url, link, video_url + youtube_url = None + possible_url_fields = ['youtube_url', 'url', 'link', 'video_url', 'youtube_link'] + + for field in possible_url_fields: + if field in payload and payload[field]: + url_value = payload[field] + if 'youtube.com' in str(url_value) or 'youtu.be' in str(url_value): + youtube_url = str(url_value) + break + + if not youtube_url: + logger.info("No YouTube URL found in payload") + return jsonify({"status": "ignored", "reason": "no YouTube URL found"}) + + # Get item ID and collection + keys = webhook_data.get('keys', []) + if not keys: + logger.error("No item keys found in webhook") + return jsonify({"status": "error", "reason": "no item keys"}), 400 + + item_id = str(keys[0]) # Assume first key is the item ID + + # Use the actual media_items collection + collection = 'media_items' + + # Process thumbnail synchronously (for now) + success = process_youtube_thumbnail(collection, item_id, youtube_url) + + if success: + return jsonify({ + "status": "completed", + "item_id": item_id, + "youtube_url": youtube_url + }) + else: + return jsonify({ + "status": "error", + "item_id": item_id, + "youtube_url": youtube_url + }), 500 + + except Exception as e: + logger.error(f"Webhook processing error: {e}") + return jsonify({"error": str(e)}), 500 + + +@app.route('/health') +def health_check(): + """Health check endpoint""" + return jsonify({ + "status": "healthy", + "service": "youtube-directus-automation", + "timestamp": datetime.utcnow().isoformat(), + "config": { + "directus_url": DIRECTUS_URL, + "has_directus_token": bool(DIRECTUS_TOKEN), + "has_youtube_api_key": bool(YOUTUBE_API_KEY), + "has_webhook_secret": bool(WEBHOOK_SECRET) + } + }) + + +@app.route('/') +def root(): + """Root endpoint with service information""" + return jsonify({ + "service": "YouTube-Directus Automation Service", + "version": "1.0.0", + "description": "Automated thumbnail extraction and upload for YouTube videos in Directus", + "endpoints": { + "webhook": "/webhook/directus", + "health": "/health" + } + }) + + +# Test endpoint for manual testing +@app.route('/test/process', methods=['POST']) +def test_process(): + """Test endpoint for manual YouTube processing""" + try: + data = request.get_json() + youtube_url = data.get('youtube_url') + item_id = data.get('item_id', 'test-123') + collection = data.get('collection', 'youtube_videos') + + if not youtube_url: + return jsonify({"error": "youtube_url required"}), 400 + + success = process_youtube_thumbnail(collection, item_id, youtube_url) + + if success: + return jsonify({ + "status": "success", + "message": "Thumbnail processed successfully" + }) + else: + return jsonify({ + "status": "error", + "message": "Failed to process thumbnail" + }), 500 + + except Exception as e: + return jsonify({"error": str(e)}), 500 + + +if __name__ == "__main__": + port = int(os.getenv("PORT", "8000")) + debug = os.getenv("DEBUG", "false").lower() == "true" + + print(f"🚀 Starting YouTube-Directus Automation Service on port {port}") + print(f"📍 Health check: http://localhost:{port}/health") + print(f"🔗 Webhook endpoint: http://localhost:{port}/webhook/directus") + print(f"🧪 Test endpoint: http://localhost:{port}/test/process") + + app.run(host="0.0.0.0", port=port, debug=debug) \ No newline at end of file diff --git a/src/watcher_service.py b/src/watcher_service.py new file mode 100644 index 0000000..6dd55d3 --- /dev/null +++ b/src/watcher_service.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +""" +YouTube Thumbnail Watcher Service + +Polls Directus for media_items that need YouTube thumbnail processing +""" + +import time +import logging +import sys +from datetime import datetime +from typing import Dict + +# Add parent directory to path for config import +sys.path.append('..') + +from config import POLL_INTERVAL, BATCH_SIZE, LOG_LEVEL +from directus_client import DirectusClient +from youtube_processor import YouTubeProcessor + + +class YouTubeWatcherService: + """Main service class for watching and processing YouTube thumbnails""" + + def __init__(self): + self.directus_client = DirectusClient() + self.youtube_processor = YouTubeProcessor() + + # Statistics + self.stats = { + 'items_processed': 0, + 'items_succeeded': 0, + 'items_failed': 0, + 'start_time': datetime.now() + } + + self.setup_logging() + + def setup_logging(self): + """Configure logging""" + log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + logging.basicConfig( + level=getattr(logging, LOG_LEVEL.upper()), + format=log_format, + handlers=[ + logging.StreamHandler(sys.stdout), + logging.FileHandler('/tmp/youtube_watcher.log') + ] + ) + + self.logger = logging.getLogger(__name__) + self.logger.info("YouTube Thumbnail Watcher Service starting...") + + def process_media_item(self, item: Dict) -> bool: + """Process a single media item""" + item_id = item.get('id') + item_url = item.get('url') + item_title = item.get('title', f"Media Item {item_id}") + + self.logger.info(f"Processing item {item_id}: {item_title}") + + try: + # Extract video ID + video_id = self.youtube_processor.extract_video_id(item_url) + if not video_id: + self.logger.error(f"Could not extract video ID from URL: {item_url}") + return False + + # Download thumbnail + thumbnail_data, filename = self.youtube_processor.download_best_thumbnail(video_id) + if not thumbnail_data or not filename: + self.logger.error(f"Could not download thumbnail for video: {video_id}") + return False + + # Upload to Directus + file_id = self.directus_client.upload_file( + thumbnail_data, + filename, + title=f"YouTube Thumbnail - {video_id}" + ) + + if not file_id: + self.logger.error(f"Could not upload thumbnail for video: {video_id}") + return False + + # Update media item + success = self.directus_client.update_media_item_thumbnail(item_id, file_id) + + if success: + self.logger.info(f"✅ Successfully processed item {item_id} -> thumbnail {file_id}") + return True + else: + self.logger.error(f"❌ Failed to update media item {item_id}") + return False + + except Exception as e: + self.logger.error(f"❌ Error processing item {item_id}: {e}") + return False + + def process_batch(self) -> None: + """Process a batch of unprocessed items""" + try: + # Get unprocessed items + items = self.directus_client.get_unprocessed_youtube_items(limit=BATCH_SIZE) + + if not items: + self.logger.debug("No unprocessed items found") + return + + self.logger.info(f"Processing batch of {len(items)} items...") + + # Process each item + for item in items: + success = self.process_media_item(item) + + # Update statistics + self.stats['items_processed'] += 1 + if success: + self.stats['items_succeeded'] += 1 + else: + self.stats['items_failed'] += 1 + + self.logger.info(f"Batch complete. Success: {self.stats['items_succeeded']}, Failed: {self.stats['items_failed']}") + + except Exception as e: + self.logger.error(f"Error processing batch: {e}") + + def print_statistics(self): + """Print service statistics""" + uptime = datetime.now() - self.stats['start_time'] + + print(f"\n📊 YouTube Thumbnail Watcher Statistics") + print(f" Uptime: {uptime}") + print(f" Items Processed: {self.stats['items_processed']}") + print(f" Succeeded: {self.stats['items_succeeded']}") + print(f" Failed: {self.stats['items_failed']}") + + if self.stats['items_processed'] > 0: + success_rate = (self.stats['items_succeeded'] / self.stats['items_processed']) * 100 + print(f" Success Rate: {success_rate:.1f}%") + + def run(self): + """Main service loop""" + self.logger.info(f"Service running with {POLL_INTERVAL}s poll interval") + + try: + while True: + start_time = time.time() + + # Process batch + self.process_batch() + + # Print stats every 10 cycles + if self.stats['items_processed'] % 10 == 0 and self.stats['items_processed'] > 0: + self.print_statistics() + + # Calculate sleep time (ensure minimum interval) + elapsed = time.time() - start_time + sleep_time = max(0, POLL_INTERVAL - elapsed) + + if sleep_time > 0: + self.logger.debug(f"Sleeping for {sleep_time:.1f}s") + time.sleep(sleep_time) + else: + self.logger.warning(f"Batch took {elapsed:.1f}s (longer than {POLL_INTERVAL}s interval)") + + except KeyboardInterrupt: + self.logger.info("Service stopped by user") + self.print_statistics() + except Exception as e: + self.logger.error(f"Service error: {e}") + raise + + +def main(): + """Entry point""" + print("🎬 YouTube Thumbnail Watcher Service") + print("=====================================") + + try: + service = YouTubeWatcherService() + service.run() + except Exception as e: + print(f"❌ Failed to start service: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/youtube_metadata.py b/src/youtube_metadata.py new file mode 100644 index 0000000..4919506 --- /dev/null +++ b/src/youtube_metadata.py @@ -0,0 +1,464 @@ +#!/usr/bin/env python3 +""" +YouTube Metadata Extractor +Smart extraction of video title, description, and metadata without API key +""" + +import re +import requests +import json +import logging +from typing import Dict, Optional +from urllib.parse import urlparse, parse_qs + +logger = logging.getLogger(__name__) + + +class YouTubeMetadataExtractor: + """Extract YouTube video metadata without requiring API keys""" + + def __init__(self): + self.session = requests.Session() + self.session.headers.update({ + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' + }) + + def extract_video_id(self, url: str) -> Optional[str]: + """Extract video ID from various YouTube URL formats""" + patterns = [ + r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', + r'youtu\.be\/([0-9A-Za-z_-]{11})', + r'embed\/([0-9A-Za-z_-]{11})', + r'watch\?v=([0-9A-Za-z_-]{11})' + ] + + for pattern in patterns: + match = re.search(pattern, url) + if match: + return match.group(1) + + return None + + def get_video_metadata(self, url: str) -> Dict: + """Extract comprehensive video metadata from YouTube page""" + try: + video_id = self.extract_video_id(url) + if not video_id: + logger.error(f"Could not extract video ID from URL: {url}") + return self._fallback_metadata(url, video_id) + + # Fetch the YouTube page + watch_url = f"https://www.youtube.com/watch?v={video_id}" + response = self.session.get(watch_url, timeout=10) + + if response.status_code != 200: + logger.warning(f"Failed to fetch YouTube page: {response.status_code}") + return self._fallback_metadata(url, video_id) + + html = response.text + + # Extract metadata using multiple methods + metadata = { + 'video_id': video_id, + 'url': url, + 'canonical_url': watch_url, + 'title': self._extract_title(html, video_id), + 'description': self._extract_description(html), + 'channel_name': self._extract_channel_name(html), + 'channel_id': self._extract_channel_id(html), + 'duration': self._extract_duration(html), + 'view_count': self._extract_view_count(html), + 'like_count': self._extract_like_count(html), + 'comment_count': self._extract_comment_count(html), + 'upload_date': self._extract_upload_date(html), + 'category': self._extract_category(html), + 'keywords': self._extract_keywords(html), + 'is_live': self._extract_is_live(html), + 'is_family_safe': self._extract_is_family_safe(html), + 'thumbnail_url': f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg", + 'type': 'youtube_video' + } + + # Clean up metadata + metadata = self._clean_metadata(metadata) + + logger.info(f"Successfully extracted metadata for video: {metadata['title']}") + return metadata + + except Exception as e: + logger.error(f"Error extracting metadata for {url}: {e}") + return self._fallback_metadata(url, video_id) + + def _extract_title(self, html: str, video_id: str) -> str: + """Extract video title using multiple methods""" + # Method 1: JSON-LD structured data + json_ld_pattern = r'' + matches = re.findall(json_ld_pattern, html, re.DOTALL) + + for match in matches: + try: + data = json.loads(match) + if isinstance(data, list): + data = data[0] + if data.get('@type') == 'VideoObject' and 'name' in data: + return data['name'] + except (json.JSONDecodeError, KeyError): + continue + + # Method 2: Meta property + meta_title = re.search(r'([^<]*)', html) + if title_tag: + title = title_tag.group(1).replace(' - YouTube', '').strip() + if title: + return title + + # Method 4: ytInitialPlayerResponse + player_response = self._extract_ytInitialPlayerResponse(html) + if player_response and 'videoDetails' in player_response: + title = player_response['videoDetails'].get('title') + if title: + return title + + # Fallback + return f"YouTube Video {video_id}" + + def _extract_description(self, html: str) -> str: + """Extract video description""" + # Method 1: Meta description + meta_desc = re.search(r' 500 else '') + + return "" + + def _extract_channel_name(self, html: str) -> str: + """Extract channel name""" + # Method 1: ytInitialPlayerResponse + player_response = self._extract_ytInitialPlayerResponse(html) + if player_response and 'videoDetails' in player_response: + author = player_response['videoDetails'].get('author') + if author: + return author + + # Method 2: Meta tag + channel_pattern = r' Optional[int]: + """Extract video duration in seconds""" + player_response = self._extract_ytInitialPlayerResponse(html) + if player_response and 'videoDetails' in player_response: + duration = player_response['videoDetails'].get('lengthSeconds') + if duration: + try: + return int(duration) + except ValueError: + pass + + return None + + def _extract_view_count(self, html: str) -> Optional[int]: + """Extract view count""" + player_response = self._extract_ytInitialPlayerResponse(html) + if player_response and 'videoDetails' in player_response: + views = player_response['videoDetails'].get('viewCount') + if views: + try: + return int(views) + except ValueError: + pass + + return None + + def _extract_upload_date(self, html: str) -> str: + """Extract upload date""" + # Method 1: JSON-LD + json_ld_pattern = r'' + matches = re.findall(json_ld_pattern, html, re.DOTALL) + + for match in matches: + try: + data = json.loads(match) + if isinstance(data, list): + data = data[0] + if data.get('@type') == 'VideoObject' and 'uploadDate' in data: + return data['uploadDate'] + except (json.JSONDecodeError, KeyError): + continue + + # Method 2: publishDate from player response + player_response = self._extract_ytInitialPlayerResponse(html) + if player_response and 'microformat' in player_response: + microformat = player_response['microformat'].get('playerMicroformatRenderer', {}) + publish_date = microformat.get('publishDate', '') + if publish_date: + return publish_date + + return "" + + def _extract_channel_id(self, html: str) -> str: + """Extract channel ID""" + # Method 1: From channelId in page + channel_id_pattern = r'"channelId":"([^"]*)"' + match = re.search(channel_id_pattern, html) + if match: + return match.group(1) + + # Method 2: From player response + player_response = self._extract_ytInitialPlayerResponse(html) + if player_response and 'videoDetails' in player_response: + channel_id = player_response['videoDetails'].get('channelId') + if channel_id: + return channel_id + + return "" + + def _extract_like_count(self, html: str) -> Optional[int]: + """Extract like count""" + # Look for like count in various formats + patterns = [ + r'"likeCount":"?([0-9]+)"?', + r'accessibilityData":\{"label":"([0-9,]+) likes"', + r'"defaultText":\{"runs":\[\{"text":"([0-9,]+)"\}\],"accessibility' + ] + + for pattern in patterns: + match = re.search(pattern, html) + if match: + try: + # Remove commas and convert to int + return int(match.group(1).replace(',', '')) + except ValueError: + continue + + return None + + def _extract_comment_count(self, html: str) -> Optional[int]: + """Extract comment count""" + # Look for comment count + patterns = [ + r'"commentCount":"?([0-9]+)"?', + r'Comments?\s*\(([0-9,]+)\)', + r'"text":"([0-9,]+) Comments?"' + ] + + for pattern in patterns: + match = re.search(pattern, html, re.IGNORECASE) + if match: + try: + return int(match.group(1).replace(',', '')) + except ValueError: + continue + + return None + + def _extract_category(self, html: str) -> str: + """Extract video category""" + # Method 1: From category in page + category_pattern = r'"category":"([^"]*)"' + match = re.search(category_pattern, html) + if match: + return match.group(1) + + # Method 2: From genre in JSON-LD + json_ld_pattern = r'' + matches = re.findall(json_ld_pattern, html, re.DOTALL) + + for match in matches: + try: + data = json.loads(match) + if isinstance(data, list): + data = data[0] + if data.get('@type') == 'VideoObject' and 'genre' in data: + return data['genre'] + except (json.JSONDecodeError, KeyError): + continue + + return "" + + def _extract_keywords(self, html: str) -> list: + """Extract video keywords/tags""" + keywords = [] + + # Method 1: From meta keywords + meta_keywords = re.search(r' bool: + """Check if video is live content""" + # Look for live content indicators + live_patterns = [ + r'"isLiveContent":\s*(true|false)', + r'"isLive":\s*(true|false)', + r'"liveBroadcastDetails"', + r'\.live\s*=\s*(true|false)' + ] + + for pattern in live_patterns: + match = re.search(pattern, html, re.IGNORECASE) + if match: + if 'true' in match.group(0).lower(): + return True + elif 'false' in match.group(0).lower(): + return False + + # Check for LIVE badge in title + if 'LIVE NOW' in html or '🔴 LIVE' in html: + return True + + return False + + def _extract_is_family_safe(self, html: str) -> bool: + """Check if video is family safe""" + # Look for family safety indicators + safety_patterns = [ + r'"isFamilySafe":\s*(true|false)', + r'"isFamilyFriendly":\s*(true|false)' + ] + + for pattern in safety_patterns: + match = re.search(pattern, html, re.IGNORECASE) + if match: + return 'true' in match.group(0).lower() + + # Default to true if not specified + return True + + def _extract_ytInitialPlayerResponse(self, html: str) -> Optional[Dict]: + """Extract ytInitialPlayerResponse JSON data""" + try: + pattern = r'var ytInitialPlayerResponse = ({.*?});' + match = re.search(pattern, html) + if match: + return json.loads(match.group(1)) + except (json.JSONDecodeError, AttributeError): + pass + + return None + + def _clean_metadata(self, metadata: Dict) -> Dict: + """Clean and validate metadata""" + # Clean title + if metadata.get('title'): + metadata['title'] = metadata['title'].strip() + # Remove YouTube-specific suffixes + suffixes = [' - YouTube', ' | YouTube'] + for suffix in suffixes: + if metadata['title'].endswith(suffix): + metadata['title'] = metadata['title'][:-len(suffix)].strip() + + # Clean description + if metadata.get('description'): + metadata['description'] = metadata['description'].strip() + + # Clean channel name + if metadata.get('channel_name'): + metadata['channel_name'] = metadata['channel_name'].strip() + + # Clean category + if metadata.get('category'): + metadata['category'] = metadata['category'].strip() + + # Format duration + if metadata.get('duration'): + duration_seconds = metadata['duration'] + hours, remainder = divmod(duration_seconds, 3600) + minutes, seconds = divmod(remainder, 60) + + if hours > 0: + metadata['duration_formatted'] = f"{hours:d}:{minutes:02d}:{seconds:02d}" + else: + metadata['duration_formatted'] = f"{minutes:d}:{seconds:02d}" + + # Format view count + if metadata.get('view_count'): + views = metadata['view_count'] + if views >= 1_000_000: + metadata['view_count_formatted'] = f"{views/1_000_000:.1f}M views" + elif views >= 1_000: + metadata['view_count_formatted'] = f"{views/1_000:.1f}K views" + else: + metadata['view_count_formatted'] = f"{views} views" + + # Format like count + if metadata.get('like_count'): + likes = metadata['like_count'] + if likes >= 1_000_000: + metadata['like_count_formatted'] = f"{likes/1_000_000:.1f}M" + elif likes >= 1_000: + metadata['like_count_formatted'] = f"{likes/1_000:.1f}K" + else: + metadata['like_count_formatted'] = str(likes) + + # Format comment count + if metadata.get('comment_count'): + comments = metadata['comment_count'] + if comments >= 1_000_000: + metadata['comment_count_formatted'] = f"{comments/1_000_000:.1f}M" + elif comments >= 1_000: + metadata['comment_count_formatted'] = f"{comments/1_000:.1f}K" + else: + metadata['comment_count_formatted'] = str(comments) + + # Ensure keywords is a list + if not isinstance(metadata.get('keywords'), list): + metadata['keywords'] = [] + + return metadata + + def _fallback_metadata(self, url: str, video_id: Optional[str]) -> Dict: + """Return minimal metadata when extraction fails""" + if not video_id: + video_id = "unknown" + + return { + 'video_id': video_id, + 'url': url, + 'canonical_url': f"https://www.youtube.com/watch?v={video_id}" if video_id != "unknown" else url, + 'title': f"YouTube Video {video_id}", + 'description': "", + 'channel_name': "", + 'channel_id': "", + 'duration': None, + 'view_count': None, + 'like_count': None, + 'comment_count': None, + 'upload_date': "", + 'category': "", + 'keywords': [], + 'is_live': False, + 'is_family_safe': True, + 'thumbnail_url': f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg" if video_id != "unknown" else "", + 'type': 'youtube_video' + } \ No newline at end of file diff --git a/src/youtube_processor.py b/src/youtube_processor.py new file mode 100644 index 0000000..12912bc --- /dev/null +++ b/src/youtube_processor.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +""" +YouTube Thumbnail Processor +""" + +import re +import requests +import logging +from typing import Optional, Tuple +from config import YOUTUBE_API_KEY + +logger = logging.getLogger(__name__) + + +class YouTubeProcessor: + """Handles YouTube video processing and thumbnail extraction""" + + def __init__(self): + self.youtube_api_base = "https://www.googleapis.com/youtube/v3" + + def extract_video_id(self, url: str) -> Optional[str]: + """Extract YouTube video ID from various URL formats""" + if not url: + return None + + # YouTube URL patterns + patterns = [ + r'(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})', + r'youtube\.com\/v\/([a-zA-Z0-9_-]{11})', + r'youtube\.com\/watch\?.*v=([a-zA-Z0-9_-]{11})', + ] + + for pattern in patterns: + match = re.search(pattern, url) + if match: + video_id = match.group(1) + logger.debug(f"Extracted video ID '{video_id}' from URL: {url}") + return video_id + + logger.warning(f"Could not extract video ID from URL: {url}") + return None + + def get_video_metadata(self, video_id: str) -> dict: + """Get video metadata from YouTube API or fallback""" + if YOUTUBE_API_KEY: + try: + response = requests.get( + f"{self.youtube_api_base}/videos", + params={ + 'id': video_id, + 'key': YOUTUBE_API_KEY, + 'part': 'snippet,status' + }, + timeout=10 + ) + + if response.status_code == 200: + data = response.json() + if data.get('items'): + logger.debug(f"Got metadata from YouTube API for video {video_id}") + return data['items'][0] + else: + logger.warning(f"No video found for ID {video_id} via API") + + except Exception as e: + logger.error(f"YouTube API error for video {video_id}: {e}") + + # Fallback: return basic metadata structure + logger.debug(f"Using fallback metadata for video {video_id}") + return { + 'snippet': { + 'thumbnails': { + 'maxres': {'url': f'https://img.youtube.com/vi/{video_id}/maxresdefault.jpg'}, + 'high': {'url': f'https://img.youtube.com/vi/{video_id}/hqdefault.jpg'}, + 'medium': {'url': f'https://img.youtube.com/vi/{video_id}/mqdefault.jpg'}, + 'default': {'url': f'https://img.youtube.com/vi/{video_id}/default.jpg'} + } + } + } + + def get_best_thumbnail_url(self, video_metadata: dict) -> Optional[str]: + """Get the best quality thumbnail URL available""" + thumbnails = video_metadata.get('snippet', {}).get('thumbnails', {}) + + # Priority order for thumbnail quality + quality_order = ['maxres', 'high', 'medium', 'default'] + + for quality in quality_order: + if quality in thumbnails: + url = thumbnails[quality]['url'] + logger.debug(f"Selected {quality} quality thumbnail: {url}") + return url + + logger.warning("No thumbnails found in metadata") + return None + + def download_thumbnail(self, video_id: str, thumbnail_url: str) -> Optional[bytes]: + """Download thumbnail image from YouTube""" + try: + logger.info(f"Downloading thumbnail for video {video_id}: {thumbnail_url}") + + response = requests.get( + thumbnail_url, + timeout=15, + headers={ + 'User-Agent': 'Mozilla/5.0 (compatible; YouTubeThumbnailBot/1.0)' + } + ) + + if response.status_code == 200: + # Check if we got actual image data (not a 404 placeholder) + if len(response.content) > 1000: # Minimum size check + logger.info(f"Successfully downloaded thumbnail ({len(response.content)} bytes)") + return response.content + else: + logger.warning(f"Downloaded file too small ({len(response.content)} bytes), likely 404 placeholder") + + else: + logger.warning(f"Failed to download thumbnail: HTTP {response.status_code}") + + except Exception as e: + logger.error(f"Error downloading thumbnail from {thumbnail_url}: {e}") + + return None + + def download_best_thumbnail(self, video_id: str) -> Tuple[Optional[bytes], Optional[str]]: + """Download the best available thumbnail for a video""" + try: + # Get video metadata + video_metadata = self.get_video_metadata(video_id) + if not video_metadata: + logger.error(f"Could not get metadata for video: {video_id}") + return None, None + + # Get all thumbnail qualities + thumbnails = video_metadata.get('snippet', {}).get('thumbnails', {}) + quality_order = ['maxres', 'high', 'medium', 'default'] + + # Try each quality in order until we get a valid download + for quality in quality_order: + if quality in thumbnails: + thumbnail_url = thumbnails[quality]['url'] + logger.debug(f"Trying {quality} quality thumbnail") + + thumbnail_data = self.download_thumbnail(video_id, thumbnail_url) + if thumbnail_data: + return thumbnail_data, f"youtube_thumb_{video_id}_{quality}.jpg" + + logger.error(f"Could not download any thumbnail for video: {video_id}") + return None, None + + except Exception as e: + logger.error(f"Error downloading thumbnail for video {video_id}: {e}") + return None, None \ No newline at end of file diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..20689fa --- /dev/null +++ b/start.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# YouTube-Directus Automation Service Startup Script + +echo "🚀 Starting YouTube-Directus Automation Service..." + +# Check if virtual environment exists +if [ ! -d "venv" ]; then + echo "📦 Creating virtual environment..." + python3 -m venv venv +fi + +# Activate virtual environment +echo "🔄 Activating virtual environment..." +source venv/bin/activate + +# Install/update requirements +echo "📋 Installing requirements..." +pip install -r requirements.txt + +# Load environment variables +if [ -f ".env" ]; then + echo "⚙️ Loading environment variables..." + set -a + source .env + set +a +else + echo "⚠️ No .env file found - using defaults" +fi + +# Start the FastAPI service +echo "🌐 Starting FastAPI service on port ${PORT:-8000}..." +echo "📍 Health check: http://localhost:${PORT:-8000}/health" +echo "🔗 Webhook endpoint: http://localhost:${PORT:-8000}/webhook/directus" +echo "" +echo "Press Ctrl+C to stop the service" +echo "" + +cd src && python main.py \ No newline at end of file diff --git a/test_service.py b/test_service.py new file mode 100755 index 0000000..7cb38b9 --- /dev/null +++ b/test_service.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +""" +Test script for YouTube-Directus Automation Service +""" + +import asyncio +import json +import httpx +from pathlib import Path + +async def test_service(): + """Test the YouTube automation service""" + + base_url = "http://localhost:8000" + + print("🧪 Testing YouTube-Directus Automation Service") + print("=" * 50) + + async with httpx.AsyncClient() as client: + + # Test 1: Health check + print("1️⃣ Testing health check...") + try: + response = await client.get(f"{base_url}/health") + if response.status_code == 200: + health_data = response.json() + print(f" ✅ Service is healthy") + print(f" 📊 Config: {health_data.get('config', {})}") + else: + print(f" ❌ Health check failed: {response.status_code}") + return False + except Exception as e: + print(f" ❌ Could not connect to service: {e}") + print(" 💡 Make sure the service is running: ./start.sh") + return False + + # Test 2: Root endpoint + print("\n2️⃣ Testing root endpoint...") + try: + response = await client.get(f"{base_url}/") + if response.status_code == 200: + root_data = response.json() + print(f" ✅ Root endpoint working") + print(f" 🔗 Available endpoints: {root_data.get('endpoints', {})}") + else: + print(f" ❌ Root endpoint failed: {response.status_code}") + except Exception as e: + print(f" ❌ Root endpoint error: {e}") + + # Test 3: Mock webhook (YouTube video creation) + print("\n3️⃣ Testing webhook with mock YouTube video...") + try: + # Mock Directus webhook payload for a YouTube video creation + mock_payload = { + "event": "items.create", + "accountability": { + "user": "test-user", + "role": "admin" + }, + "payload": { + "id": "123", + "youtube_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ", # Rick Roll for testing + "title": "Test YouTube Video" + }, + "keys": ["123"] + } + + headers = { + "Content-Type": "application/json", + # Note: In production, you'd include x-directus-signature header + } + + response = await client.post( + f"{base_url}/webhook/directus", + json=mock_payload, + headers=headers + ) + + if response.status_code == 200: + webhook_data = response.json() + print(f" ✅ Webhook processed successfully") + print(f" 📝 Response: {webhook_data}") + + # Wait a moment for background processing + print(" ⏳ Waiting for background thumbnail processing...") + await asyncio.sleep(3) + + else: + print(f" ❌ Webhook failed: {response.status_code}") + print(f" 📄 Response: {response.text}") + + except Exception as e: + print(f" ❌ Webhook test error: {e}") + + # Test 4: Test YouTube URL parsing + print("\n4️⃣ Testing YouTube URL parsing...") + test_urls = [ + "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + "https://youtu.be/dQw4w9WgXcQ", + "https://www.youtube.com/embed/dQw4w9WgXcQ", + "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=30s" + ] + + # Import the processor to test URL parsing + import sys + sys.path.append("src") + from main import YouTubeProcessor + + processor = YouTubeProcessor() + + for url in test_urls: + video_id = processor.extract_video_id(url) + if video_id == "dQw4w9WgXcQ": + print(f" ✅ {url} → {video_id}") + else: + print(f" ❌ {url} → {video_id} (expected: dQw4w9WgXcQ)") + + print("\n" + "=" * 50) + print("✅ Service testing completed!") + print("\n💡 Next steps:") + print(" 1. Configure Directus webhook at https://enias.zeabur.app/admin to point to: http://localhost:8000/webhook/directus") + print(" 2. Add YouTube API key to .env for better thumbnail quality") + print(" 3. Test with real Directus media_items creation") + + return True + + +if __name__ == "__main__": + asyncio.run(test_service()) \ No newline at end of file