youtube-summarizer/test_story_4_6.py

"""Test script for Story 4.6: RAG-Powered Video Chat implementation."""

import asyncio
import sys
from pathlib import Path

# Add backend to Python path
sys.path.insert(0, str(Path(__file__).parent / "backend"))

async def test_story_4_6():
    """Test Story 4.6 RAG implementation."""
    print("🧪 Testing Story 4.6: RAG-Powered Video Chat")
    print("=" * 60)

    # Test imports
    print("\n1. Testing imports...")
    try:
        from backend.services.chroma_service import ChromaService
        from backend.services.transcript_chunker import TranscriptChunker
        from backend.services.semantic_search_service import SemanticSearchService
        from backend.services.rag_service import RAGService
        from backend.models.chat import ChatSession, ChatMessage, VideoChunk
        from backend.api.chat import router as chat_router
        print("✅ All imports successful")
    except ImportError as e:
        print(f"❌ Import failed: {e}")
        return False

    # Test service initialization
    print("\n2. Testing service initialization...")
    try:
        chroma_service = ChromaService()
        chunker_service = TranscriptChunker()
        search_service = SemanticSearchService(chroma_service=chroma_service)
        rag_service = RAGService(
            search_service=search_service,
            chroma_service=chroma_service,
            chunker_service=chunker_service
        )
        print("✅ All services initialized")
    except Exception as e:
        print(f"❌ Service initialization failed: {e}")
        return False

    # Test transcript chunking
    print("\n3. Testing transcript chunking...")
    try:
        test_transcript = """
        [00:00:10] Welcome to this tutorial on machine learning fundamentals.
        [00:00:30] Today we'll be covering the basics of neural networks and how they work.
        [00:01:00] First, let's understand what a neural network is. A neural network is a computing system inspired by biological neural networks.
        [00:01:30] It consists of interconnected nodes, called neurons, that process and transmit information.
        [00:02:00] These networks can learn patterns from data and make predictions on new, unseen data.
        [00:02:30] The learning process involves adjusting weights and biases to minimize prediction errors.
        """

        chunks = chunker_service.chunk_transcript(
            transcript=test_transcript,
            video_id="test_video_123"
        )

        print(f"✅ Created {len(chunks)} chunks from test transcript")

        # Show chunk stats
        stats = chunker_service.get_chunking_stats(chunks)
        print(f"   - Total chunks: {stats['total_chunks']}")
        print(f"   - Timestamped chunks: {stats['timestamped_chunks']}")
        print(f"   - Average chunk size: {stats['avg_chunk_size']} chars")

    except Exception as e:
        print(f"❌ Transcript chunking failed: {e}")
        return False

    # Test ChromaDB initialization (without actual indexing)
    print("\n4. Testing ChromaDB health check...")
    try:
        health = await chroma_service.health_check()
        if health.get('status') == 'healthy':
            print("✅ ChromaDB health check passed")
            print(f"   - Embedding model: {health['embedding_model']}")
            print(f"   - Collection count: {health['collection_count']}")
        else:
            print(f"⚠️ ChromaDB health check warning: {health.get('error', 'Unknown')}")
    except Exception as e:
        print(f"⚠️ ChromaDB health check failed (expected in test environment): {e}")

    # Test model classes
    print("\n5. Testing database model classes...")
    try:
        # Test model creation (not persisting)
        chat_session = ChatSession(
            video_id="test_video_123",
            title="Test Chat Session",
            is_active=True
        )

        chat_message = ChatMessage(
            session_id=chat_session.id,
            message_type="user",
            content="What is machine learning?"
        )

        video_chunk = VideoChunk(
            video_id="test_video_123",
            chunk_index=0,
            chunk_type="transcript",
            content="This is a test chunk",
            content_length=19
        )

        print("✅ All model classes created successfully")
        print(f"   - ChatSession ID: {chat_session.id}")
        print(f"   - ChatMessage type: {chat_message.message_type}")
        print(f"   - VideoChunk length: {video_chunk.content_length}")

    except Exception as e:
        print(f"❌ Model class creation failed: {e}")
        return False

    # Test API router
    print("\n6. Testing API router...")
    try:
        # Check if router has expected endpoints
        routes = [route.path for route in chat_router.routes]
        expected_routes = ['/sessions', '/index', '/stats', '/health']

        found_routes = [route for route in routes if any(expected in route for expected in expected_routes)]

        if len(found_routes) >= 4:
            print("✅ Chat API router has expected endpoints")
            for route in found_routes[:4]:  # Show first 4
                print(f"   - {route}")
        else:
            print("⚠️ Some expected API routes may be missing")

    except Exception as e:
        print(f"❌ API router test failed: {e}")
        return False

    print("\n" + "=" * 60)
    print("🎉 Story 4.6 basic structure tests completed!")
    print("\nKey Features Implemented:")
    print("✅ ChromaDB service for vector storage")
    print("✅ Transcript chunking with semantic segmentation")
    print("✅ Semantic search service with hybrid mode")
    print("✅ RAG service for query processing and response generation")
    print("✅ Chat database models (ChatSession, ChatMessage, VideoChunk)")
    print("✅ Chat API endpoints for full REST interface")

    print("\nNext Steps for Full Implementation:")
    print("📋 Frontend chat interface components")
    print("📋 WebSocket integration for real-time chat")
    print("📋 Full end-to-end testing with actual video content")

    return True

if __name__ == "__main__":
    asyncio.run(test_story_4_6())