trax/scripts/tm_trax.py

#!/usr/bin/env python3
"""
Trax-specific Task Master cache reader
Optimized for transcription pipeline tasks (v1-v4)
"""

import json
import os
import sys
import argparse
from pathlib import Path
from datetime import datetime
from collections import defaultdict
from typing import Dict, List, Optional, Any

# Add parent scripts to path for tm_cache
sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'scripts' / 'taskmaster'))

try:
    from tm_cache import TaskMasterCache
except ImportError:
    print("❌ Could not import tm_cache. Make sure parent project is set up.")
    sys.exit(1)


class TraxTaskCache(TaskMasterCache):
    """Trax-specific Task Master cache with pipeline version awareness"""

    def __init__(self, project_root: Optional[str] = None):
        """Initialize Trax cache"""
        # Use Trax project root
        trax_root = Path(__file__).parent.parent
        super().__init__(project_root or str(trax_root))

        # Trax-specific indices
        self._pipeline_versions = defaultdict(list)  # v1, v2, v3, v4 -> task_ids
        self._transcription_tasks = []
        self._audio_tasks = []
        self._ai_enhancement_tasks = []

    def _build_indices(self):
        """Build indices with Trax-specific categorization"""
        super()._build_indices()

        # Clear Trax indices
        self._pipeline_versions.clear()
        self._transcription_tasks.clear()
        self._audio_tasks.clear()
        self._ai_enhancement_tasks.clear()

        # Categorize tasks
        for task_id, task in self._tasks.items():
            title_lower = task.get('title', '').lower()
            desc_lower = task.get('description', '').lower()
            details_lower = task.get('details', '').lower()

            # Pipeline version detection
            if 'v1' in title_lower or 'basic transcription' in desc_lower:
                self._pipeline_versions['v1'].append(task_id)
            if 'v2' in title_lower or 'ai enhance' in desc_lower or 'deepseek' in details_lower:
                self._pipeline_versions['v2'].append(task_id)
            if 'v3' in title_lower or 'multi-pass' in desc_lower:
                self._pipeline_versions['v3'].append(task_id)
            if 'v4' in title_lower or 'diariz' in desc_lower or 'speaker' in details_lower:
                self._pipeline_versions['v4'].append(task_id)

            # Task type detection
            if any(word in title_lower + desc_lower for word in
                   ['transcri', 'whisper', 'audio', 'media']):
                self._transcription_tasks.append(task_id)

            if any(word in title_lower + desc_lower for word in
                   ['audio', 'ffmpeg', 'wav', 'mp3', 'mp4']):
                self._audio_tasks.append(task_id)

            if any(word in title_lower + desc_lower for word in
                   ['ai', 'enhance', 'deepseek', 'gpt', 'claude']):
                self._ai_enhancement_tasks.append(task_id)

    def show_pipeline_stats(self):
        """Show Trax pipeline-specific statistics"""
        print("\n📊 Trax Pipeline Statistics")
        print("=" * 50)

        # Overall stats
        total = len(self._tasks)
        pending = len(self._index['by_status']['pending'])
        in_progress = len(self._index['by_status']['in-progress'])
        done = len(self._index['by_status']['done'])

        print(f"Total Tasks: {total}")
        print(f"✅ Done: {done} ({done*100//total if total else 0}%)")
        print(f"🚧 In Progress: {in_progress}")
        print(f"📋 Pending: {pending}")

        # Pipeline version breakdown
        print("\n🔄 Pipeline Versions:")
        for version in ['v1', 'v2', 'v3', 'v4']:
            tasks = self._pipeline_versions[version]
            if tasks:
                done_count = sum(1 for t in tasks if self._tasks[t]['status'] == 'done')
                print(f"  {version}: {len(tasks)} tasks ({done_count} done)")

        # Task type breakdown
        print("\n📁 Task Categories:")
        print(f"  🎵 Transcription: {len(self._transcription_tasks)}")
        print(f"  🔊 Audio Processing: {len(self._audio_tasks)}")
        print(f"  🤖 AI Enhancement: {len(self._ai_enhancement_tasks)}")

        # Current focus
        if in_progress:
            print("\n🎯 Current Focus:")
            for task_id in self._index['by_status']['in-progress']:
                task = self._tasks[task_id]
                print(f"  [{task_id}] {task['title']}")

        # Next up
        next_task = self.get_next_task()
        if next_task:
            print(f"\n⏭️  Next: [{next_task['id']}] {next_task['title']}")

    def filter_by_pipeline(self, version: str) -> List[Dict]:
        """Get tasks for a specific pipeline version"""
        task_ids = self._pipeline_versions.get(version, [])
        return [self._tasks[tid] for tid in task_ids if tid in self._tasks]

    def get_transcription_roadmap(self):
        """Show transcription implementation roadmap"""
        print("\n🗺️  Trax Implementation Roadmap")
        print("=" * 50)

        for version in ['v1', 'v2', 'v3', 'v4']:
            tasks = self.filter_by_pipeline(version)
            if not tasks:
                continue

            pending = [t for t in tasks if t['status'] == 'pending']
            in_progress = [t for t in tasks if t['status'] == 'in-progress']
            done = [t for t in tasks if t['status'] == 'done']

            status_icon = "✅" if len(done) == len(tasks) else "🚧" if in_progress else "📋"
            print(f"\n{status_icon} Pipeline {version.upper()}:")

            # Show in-progress first
            for task in in_progress:
                print(f"  🚧 [{task['id']}] {task['title']}")

            # Then pending
            for task in pending[:3]:  # Limit to 3
                print(f"  ⏳ [{task['id']}] {task['title']}")

            if len(pending) > 3:
                print(f"  ... and {len(pending)-3} more pending")

            # Summary
            if tasks:
                progress = len(done) * 100 // len(tasks)
                print(f"  Progress: {progress}% ({len(done)}/{len(tasks)})")


def main():
    parser = argparse.ArgumentParser(description='Trax Task Master Cache')

    # Display options
    parser.add_argument('--list', action='store_true', help='List all tasks')
    parser.add_argument('--next', action='store_true', help='Get next task')
    parser.add_argument('--stats', action='store_true', help='Show statistics')
    parser.add_argument('--roadmap', action='store_true', help='Show implementation roadmap')

    # Filter options
    parser.add_argument('--status', help='Filter by status')
    parser.add_argument('--pipeline', help='Filter by pipeline version (v1-v4)')
    parser.add_argument('--search', help='Search in titles/descriptions')
    parser.add_argument('--show', help='Show specific task')

    # Task type filters
    parser.add_argument('--transcription', action='store_true', help='Show transcription tasks')
    parser.add_argument('--audio', action='store_true', help='Show audio processing tasks')
    parser.add_argument('--ai', action='store_true', help='Show AI enhancement tasks')

    args = parser.parse_args()

    # Initialize cache
    cache = TraxTaskCache()
    cache.load()

    # Handle commands
    if args.stats:
        cache.show_pipeline_stats()

    elif args.roadmap:
        cache.get_transcription_roadmap()

    elif args.next:
        task = cache.get_next_task()
        if task:
            cache.show_task(task['id'])
        else:
            print("✅ No pending tasks!")

    elif args.show:
        cache.show_task(args.show)

    elif args.search:
        results = cache.search(args.search)
        if results:
            print(f"\n🔍 Found {len(results)} matches for '{args.search}':")
            for task in results[:10]:
                status_icon = {
                    'done': '✅',
                    'in-progress': '🚧',
                    'pending': '📋'
                }.get(task['status'], '❓')
                print(f"{status_icon} [{task['id']}] {task['title']}")
        else:
            print(f"No matches found for '{args.search}'")

    elif args.pipeline:
        tasks = cache.filter_by_pipeline(args.pipeline)
        if tasks:
            print(f"\n🔄 Pipeline {args.pipeline.upper()} Tasks:")
            for task in tasks:
                status_icon = {
                    'done': '✅',
                    'in-progress': '🚧',
                    'pending': '📋'
                }.get(task['status'], '❓')
                print(f"{status_icon} [{task['id']}] {task['title']}")
        else:
            print(f"No tasks found for pipeline {args.pipeline}")

    elif args.transcription:
        print("\n🎵 Transcription Tasks:")
        for task_id in cache._transcription_tasks[:10]:
            task = cache._tasks[task_id]
            status_icon = {
                'done': '✅',
                'in-progress': '🚧',
                'pending': '📋'
            }.get(task['status'], '❓')
            print(f"{status_icon} [{task['id']}] {task['title']}")

    elif args.list or args.status:
        # List with optional status filter
        tasks = cache.list_tasks(status=args.status)
        if tasks:
            print(f"\n📋 Tasks ({args.status or 'all'}):")
            for task in tasks[:20]:
                status_icon = {
                    'done': '✅',
                    'in-progress': '🚧',
                    'pending': '📋'
                }.get(task['status'], '❓')
                print(f"{status_icon} [{task['id']}] {task['title']}")

            if len(tasks) > 20:
                print(f"... and {len(tasks)-20} more")
        else:
            print(f"No tasks found with status: {args.status}")

    else:
        # Default: show stats
        cache.show_pipeline_stats()
        print("\n💡 Use --help to see all options")


if __name__ == '__main__':
    main()