trax/scripts/generate_cursor_rules.py

#!/usr/bin/env python3
"""
Cursor Rules Generator
Automates the creation of Cursor rules based on codebase analysis.
Based on PageAI tutorial: https://pageai.pro/blog/cursor-rules-tutorial
"""

import os
import re
import json
from pathlib import Path
from typing import Dict, List, Optional
from dataclasses import dataclass

@dataclass
class RuleTemplate:
    """Template for generating Cursor rules"""
    name: str
    description: str
    globs: str
    always_apply: bool
    content: str

class CursorRulesGenerator:
    """Generate Cursor rules automatically from codebase analysis"""

    def __init__(self, project_root: Path):
        self.project_root = project_root
        self.rules_dir = project_root / ".cursor" / "rules"
        self.rules_dir.mkdir(parents=True, exist_ok=True)

    def analyze_file_patterns(self, file_paths: List[Path]) -> Dict[str, List[str]]:
        """Analyze files to identify common patterns"""
        patterns = {
            "imports": [],
            "function_definitions": [],
            "class_definitions": [],
            "error_handling": [],
            "testing_patterns": [],
            "naming_conventions": []
        }

        for file_path in file_paths:
            if file_path.suffix in ['.py', '.js', '.ts', '.tsx']:
                content = file_path.read_text()

                # Extract patterns
                patterns["imports"].extend(self._extract_imports(content))
                patterns["function_definitions"].extend(self._extract_functions(content))
                patterns["class_definitions"].extend(self._extract_classes(content))
                patterns["error_handling"].extend(self._extract_error_handling(content))
                patterns["testing_patterns"].extend(self._extract_testing_patterns(content))
                patterns["naming_conventions"].extend(self._extract_naming_conventions(content))

        return patterns

    def _extract_imports(self, content: str) -> List[str]:
        """Extract import patterns"""
        import_patterns = []
        lines = content.split('\n')

        for line in lines:
            if line.strip().startswith(('import ', 'from ')):
                import_patterns.append(line.strip())

        return import_patterns[:10]  # Limit to first 10

    def _extract_functions(self, content: str) -> List[str]:
        """Extract function definition patterns"""
        function_patterns = []

        # Python functions
        python_funcs = re.findall(r'def\s+(\w+)\s*\([^)]*\)\s*:', content)
        function_patterns.extend(python_funcs)

        # JavaScript/TypeScript functions
        js_funcs = re.findall(r'(?:function\s+)?(\w+)\s*\([^)]*\)\s*{', content)
        function_patterns.extend(js_funcs)

        return function_patterns[:10]

    def _extract_classes(self, content: str) -> List[str]:
        """Extract class definition patterns"""
        class_patterns = []

        # Python classes
        python_classes = re.findall(r'class\s+(\w+)', content)
        class_patterns.extend(python_classes)

        # JavaScript/TypeScript classes
        js_classes = re.findall(r'class\s+(\w+)', content)
        class_patterns.extend(js_classes)

        return class_patterns[:10]

    def _extract_error_handling(self, content: str) -> List[str]:
        """Extract error handling patterns"""
        error_patterns = []

        # Python try/except
        try_except = re.findall(r'try:\s*\n(.*?)\nexcept', content, re.DOTALL)
        error_patterns.extend(try_except)

        # JavaScript try/catch
        try_catch = re.findall(r'try\s*{\s*\n(.*?)\n}\s*catch', content, re.DOTALL)
        error_patterns.extend(try_catch)

        return error_patterns[:5]

    def _extract_testing_patterns(self, content: str) -> List[str]:
        """Extract testing patterns"""
        test_patterns = []

        # Python pytest
        pytest_funcs = re.findall(r'def\s+test_\w+', content)
        test_patterns.extend(pytest_funcs)

        # JavaScript/TypeScript tests
        js_tests = re.findall(r'(?:it|test|describe)\s*\(', content)
        test_patterns.extend(js_tests)

        return test_patterns[:5]

    def _extract_naming_conventions(self, content: str) -> List[str]:
        """Extract naming convention patterns"""
        naming_patterns = []

        # Variable names
        variables = re.findall(r'(\w+)\s*=', content)
        naming_patterns.extend(variables[:10])

        # Function names
        functions = re.findall(r'def\s+(\w+)', content)
        naming_patterns.extend(functions[:10])

        return naming_patterns

    def generate_rule_content(self, patterns: Dict[str, List[str]], rule_type: str) -> str:
        """Generate rule content based on patterns"""

        if rule_type == "python":
            return self._generate_python_rule(patterns)
        elif rule_type == "javascript":
            return self._generate_javascript_rule(patterns)
        elif rule_type == "testing":
            return self._generate_testing_rule(patterns)
        else:
            return self._generate_generic_rule(patterns)

    def _generate_python_rule(self, patterns: Dict[str, List[str]]) -> str:
        """Generate Python-specific rule"""
        content = """# Python Development Rules

## Import Patterns

Based on your codebase, use these import patterns:

```python
# Standard library imports first
import os
import re
from pathlib import Path
from typing import Dict, List, Optional

# Third-party imports
import click
from rich.console import Console

# Local imports
from src.config import config
from src.services.protocols import TranscriptionServiceProtocol
```

## Function Definitions

Follow these patterns for function definitions:

```python
def function_name(param1: str, param2: Optional[int] = None) -> ReturnType:
    \"\"\"Docstring describing the function's purpose.\"\"\"
    # Implementation
    return result
```

## Error Handling

Use consistent error handling patterns:

```python
try:
    # Operation that might fail
    result = process_data(input_data)
except SpecificError as e:
    logger.error(f"Failed to process data: {e}")
    raise
except Exception as e:
    logger.error(f"Unexpected error: {e}")
    raise
```

## Naming Conventions

- Use `snake_case` for functions and variables
- Use `PascalCase` for classes
- Use `UPPER_CASE` for constants
- Use descriptive names that explain purpose
"""
        return content

    def _generate_javascript_rule(self, patterns: Dict[str, List[str]]) -> str:
        """Generate JavaScript-specific rule"""
        content = """# JavaScript/TypeScript Development Rules

## Import Patterns

```typescript
// Third-party imports first
import React from 'react';
import { useState, useEffect } from 'react';

// Local imports
import { ComponentName } from './ComponentName';
import { useCustomHook } from '../hooks/useCustomHook';
```

## Function Definitions

```typescript
// Function declarations
function functionName(param1: string, param2?: number): ReturnType {
  // Implementation
  return result;
}

// Arrow functions for callbacks
const handleClick = (event: React.MouseEvent): void => {
  // Implementation
};
```

## Error Handling

```typescript
try {
  const result = await apiCall();
  return result;
} catch (error) {
  console.error('API call failed:', error);
  throw error;
}
```

## Naming Conventions

- Use `camelCase` for functions and variables
- Use `PascalCase` for components and classes
- Use `UPPER_CASE` for constants
- Use descriptive names that explain purpose
"""
        return content

    def _generate_testing_rule(self, patterns: Dict[str, List[str]]) -> str:
        """Generate testing-specific rule"""
        content = """# Testing Rules

## Test Structure

```python
# Python (pytest)
def test_function_name():
    \"\"\"Test description.\"\"\"
    # Arrange
    input_data = "test input"

    # Act
    result = function_to_test(input_data)

    # Assert
    assert result == expected_output
```

```typescript
// JavaScript/TypeScript (Jest)
describe('ComponentName', () => {
  it('should render correctly', () => {
    // Arrange
    const props = { test: 'value' };

    // Act
    render(<ComponentName {...props} />);

    // Assert
    expect(screen.getByText('expected text')).toBeInTheDocument();
  });
});
```

## Testing Best Practices

- Write tests for both success and failure cases
- Use descriptive test names that explain the scenario
- Follow AAA pattern (Arrange, Act, Assert)
- Mock external dependencies
- Test edge cases and error conditions
"""
        return content

    def _generate_generic_rule(self, patterns: Dict[str, List[str]]) -> str:
        """Generate generic rule"""
        content = """# Generic Development Rules

## Code Organization

- Keep functions small and focused
- Use meaningful variable names
- Add comments for complex logic
- Follow consistent formatting

## Error Handling

- Always handle potential errors
- Provide meaningful error messages
- Log errors appropriately
- Don't ignore exceptions

## Performance

- Optimize for readability first
- Profile before optimizing
- Use appropriate data structures
- Avoid premature optimization
"""
        return content

    def create_rule_file(self, rule_name: str, content: str, description: str = "",
                        globs: str = "**/*", always_apply: bool = False) -> Path:
        """Create a new Cursor rule file"""

        rule_path = self.rules_dir / f"{rule_name}.mdc"

        frontmatter = f"""---
description: {description or f"Rules for {rule_name}"}
globs: {globs}
alwaysApply: {str(always_apply).lower()}
---

"""

        full_content = frontmatter + content
        rule_path.write_text(full_content)

        print(f"✅ Created rule: {rule_path}")
        return rule_path

    def generate_rules_from_directory(self, source_dir: str, rule_type: str = "generic") -> List[Path]:
        """Generate rules from a specific directory"""
        source_path = self.project_root / source_dir

        if not source_path.exists():
            print(f"❌ Directory not found: {source_path}")
            return []

        # Find relevant files
        file_extensions = {
            "python": [".py"],
            "javascript": [".js", ".ts", ".tsx"],
            "testing": [".py", ".js", ".ts", ".tsx"]
        }

        extensions = file_extensions.get(rule_type, [".py", ".js", ".ts", ".tsx"])
        files = []

        for ext in extensions:
            files.extend(source_path.rglob(f"*{ext}"))

        if not files:
            print(f"❌ No files found in {source_dir}")
            return []

        # Analyze patterns
        patterns = self.analyze_file_patterns(files)

        # Generate rule content
        content = self.generate_rule_content(patterns, rule_type)

        # Create rule file
        rule_name = f"{source_dir.replace('/', '-')}-patterns"
        rule_path = self.create_rule_file(
            rule_name=rule_name,
            content=content,
            description=f"Patterns and conventions for {source_dir}",
            globs=f"{source_dir}/**/*",
            always_apply=False
        )

        return [rule_path]

def main():
    """Main function to run the rule generator"""
    project_root = Path.cwd()
    generator = CursorRulesGenerator(project_root)

    # Example usage
    print("🔧 Cursor Rules Generator")
    print("=" * 50)

    # Generate rules for different directories
    directories = [
        ("src", "python"),
        ("tests", "testing"),
        ("scripts", "python")
    ]

    created_rules = []
    for directory, rule_type in directories:
        print(f"\n📁 Analyzing {directory}...")
        rules = generator.generate_rules_from_directory(directory, rule_type)
        created_rules.extend(rules)

    print(f"\n✅ Generated {len(created_rules)} rules:")
    for rule in created_rules:
        print(f"  - {rule.name}")

if __name__ == "__main__":
    main()