youtube-summarizer/test_enhanced_export_system.py

303 lines
12 KiB
Python

#!/usr/bin/env python3
"""Test script for Story 4.4 Enhanced Export system."""
import asyncio
import os
import sys
import logging
from datetime import datetime
from unittest.mock import MagicMock, AsyncMock
# Set mock environment for testing (no API keys required)
os.environ.setdefault('DEEPSEEK_API_KEY', 'test-key-for-testing')
os.environ.setdefault('USE_MOCK_SERVICES', 'true')
# Add the parent directory to Python path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
from backend.services.executive_summary_generator import ExecutiveSummaryGenerator
from backend.services.timestamp_processor import TimestampProcessor
from backend.services.enhanced_markdown_formatter import (
EnhancedMarkdownFormatter,
MarkdownExportConfig
)
from backend.services.enhanced_template_manager import (
EnhancedTemplateManager,
DomainCategory
)
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
async def test_executive_summary_generator():
"""Test executive summary generation."""
logger.info("Testing ExecutiveSummaryGenerator...")
generator = ExecutiveSummaryGenerator()
# Sample content for testing
sample_content = """
In this video, we explore the latest trends in artificial intelligence and machine learning.
The presentation covers three main areas: natural language processing, computer vision, and
reinforcement learning. Key findings include a 40% improvement in language model performance,
breakthrough advances in image recognition accuracy, and new applications in autonomous systems.
The business implications are significant, with potential cost savings of $2.5M annually
through automation. However, implementation requires careful planning and a 6-month timeline
for full deployment. The competitive advantage gained could position the company as a leader
in AI-driven solutions.
Technical challenges include data quality issues, computational requirements, and integration
with existing systems. The recommended approach involves phased implementation starting with
pilot programs in two business units.
"""
try:
summary = await generator.generate_executive_summary(
content=sample_content,
video_title="AI Trends 2024: Strategic Implementation Guide",
video_duration_seconds=600
)
logger.info("✅ Executive Summary Generated:")
logger.info(f"Overview: {summary.overview[:100]}...")
if summary.business_value:
logger.info(f"Business Value: {summary.business_value[:100]}...")
else:
logger.info("Business Value: None")
logger.info(f"Action Items: {len(summary.action_items)} items")
logger.info(f"Strategic Implications: {len(summary.strategic_implications)} items")
logger.info(f"Processing time: {summary.processing_time_seconds:.2f}s")
return True
except Exception as e:
logger.error(f"❌ Executive Summary Generator test failed: {e}")
return False
async def test_timestamp_processor():
"""Test timestamp processing and section detection."""
logger.info("Testing TimestampProcessor...")
processor = TimestampProcessor()
# Sample transcript with natural breaks
sample_transcript = [
{"start": 0, "end": 30, "text": "Welcome to today's presentation on artificial intelligence. We'll cover three main topics."},
{"start": 30, "end": 90, "text": "First, let's discuss natural language processing and its current state of development."},
{"start": 90, "end": 150, "text": "The key breakthrough this year has been the improvement in language model accuracy by 40%."},
{"start": 150, "end": 210, "text": "Moving on to computer vision, we've seen remarkable advances in image recognition systems."},
{"start": 210, "end": 270, "text": "These systems can now identify objects with 99.2% accuracy in controlled environments."},
{"start": 270, "end": 330, "text": "Finally, reinforcement learning has opened new possibilities in autonomous systems."},
{"start": 330, "end": 390, "text": "Let's look at the business implications of these technological advances."},
{"start": 390, "end": 450, "text": "Cost savings could reach $2.5 million annually through strategic automation implementation."},
{"start": 450, "end": 510, "text": "However, we need to consider the technical challenges and implementation timeline."},
{"start": 510, "end": 570, "text": "In conclusion, a phased approach starting with pilot programs is recommended."}
]
try:
result = await processor.detect_semantic_sections(
transcript_data=sample_transcript,
video_url="https://youtube.com/watch?v=test123",
video_title="AI Trends 2024: Strategic Implementation Guide"
)
logger.info(f"✅ Timestamp Processing Complete:")
logger.info(f"Detected {len(result.sections)} sections")
logger.info(f"Total sections: {result.total_sections}")
logger.info(f"Quality Score: {result.quality_score}")
logger.info(f"Processing time: {result.processing_time_seconds:.2f}s")
for i, section in enumerate(result.sections):
# Format timestamp manually
start_minutes = section.start_timestamp // 60
start_seconds = section.start_timestamp % 60
timestamp_str = f"{start_minutes:02d}:{start_seconds:02d}"
logger.info(f"Section {i+1}: [{timestamp_str}] {section.title}")
return True
except Exception as e:
logger.error(f"❌ Timestamp Processor test failed: {e}")
return False
async def test_enhanced_template_manager():
"""Test enhanced template manager with domain presets."""
logger.info("Testing EnhancedTemplateManager...")
manager = EnhancedTemplateManager()
try:
# Initialize domain templates
template_map = await manager.initialize_domain_templates()
logger.info(f"✅ Template Manager Initialized:")
logger.info(f"Domain templates created: {len(template_map)}")
# List all templates
templates = await manager.list_templates()
logger.info(f"Total templates available: {len(templates)}")
# Test domain recommendations
sample_content = "In this programming tutorial, we learn about Python functions and data structures"
recommendations = await manager.get_domain_recommendations(sample_content, max_recommendations=3)
logger.info(f"Domain recommendations: {len(recommendations)} found")
for rec in recommendations:
logger.info(f"- {rec['domain_category']}: {rec['confidence_score']:.2f} confidence")
# Test template execution with educational template
if recommendations and len(recommendations) > 0:
template_id = recommendations[0]['template_id']
result = await manager.execute_template(
template_id=template_id,
variables={"content": sample_content}
)
logger.info(f"Template execution successful: {result['processing_time_seconds']:.2f}s")
# Get system stats
stats = await manager.get_system_stats()
logger.info(f"System stats: {stats['total_templates']} templates, {stats['active_experiments']} experiments")
return True
except Exception as e:
logger.error(f"❌ Enhanced Template Manager test failed: {e}")
return False
async def test_enhanced_markdown_formatter():
"""Test enhanced markdown formatting."""
logger.info("Testing EnhancedMarkdownFormatter...")
# Initialize dependencies
executive_generator = ExecutiveSummaryGenerator()
timestamp_processor = TimestampProcessor()
formatter = EnhancedMarkdownFormatter(executive_generator, timestamp_processor)
try:
# Sample data for formatting
video_title = "AI Implementation Strategy: Executive Briefing"
video_url = "https://youtube.com/watch?v=test123"
content = "This video covers strategic AI implementation with focus on business value and ROI."
# Sample transcript data
transcript_data = [
{"start": 0, "end": 60, "text": "Introduction to AI strategy and business objectives"},
{"start": 60, "end": 120, "text": "Current market analysis and competitive positioning"},
{"start": 120, "end": 180, "text": "Implementation roadmap and timeline considerations"},
{"start": 180, "end": 240, "text": "ROI projections and success metrics"}
]
# Create export configuration
config = MarkdownExportConfig(
include_executive_summary=True,
include_timestamps=True,
include_toc=True,
section_detail_level="standard",
custom_template_id=None
)
# Generate enhanced export
export_result = await formatter.create_enhanced_export(
video_title=video_title,
video_url=video_url,
content=content,
transcript_data=transcript_data,
export_config=config
)
logger.info("✅ Enhanced Markdown Export Generated:")
logger.info(f"Content length: {len(export_result.markdown_content)} characters")
logger.info(f"Quality score: {export_result.quality_score}")
logger.info(f"Processing time: {export_result.processing_time_seconds:.2f}s")
logger.info(f"Metadata keys: {list(export_result.metadata.keys())}")
# Print first part of generated markdown
logger.info("Sample output:")
logger.info(export_result.markdown_content[:500] + "..." if len(export_result.markdown_content) > 500 else export_result.markdown_content)
return True
except Exception as e:
logger.error(f"❌ Enhanced Markdown Formatter test failed: {e}")
return False
async def test_integrated_system():
"""Test the complete integrated system."""
logger.info("Testing Complete Enhanced Export System Integration...")
try:
# Test all components
tests = [
("Executive Summary Generator", test_executive_summary_generator()),
("Timestamp Processor", test_timestamp_processor()),
("Enhanced Template Manager", test_enhanced_template_manager()),
("Enhanced Markdown Formatter", test_enhanced_markdown_formatter())
]
results = []
for test_name, test_coro in tests:
logger.info(f"\n{'='*50}")
logger.info(f"Running: {test_name}")
logger.info(f"{'='*50}")
try:
result = await test_coro
results.append((test_name, result))
except Exception as e:
logger.error(f"Test {test_name} failed with exception: {e}")
results.append((test_name, False))
# Print final results
logger.info(f"\n{'='*50}")
logger.info("FINAL TEST RESULTS")
logger.info(f"{'='*50}")
passed = 0
total = len(results)
for test_name, result in results:
status = "✅ PASSED" if result else "❌ FAILED"
logger.info(f"{test_name}: {status}")
if result:
passed += 1
logger.info(f"\nOverall: {passed}/{total} tests passed ({passed/total*100:.1f}%)")
if passed == total:
logger.info("🎉 Story 4.4 Enhanced Export System: ALL TESTS PASSED!")
return True
else:
logger.warning(f"⚠️ Story 4.4 Enhanced Export System: {total-passed} tests failed")
return False
except Exception as e:
logger.error(f"❌ Integrated system test failed: {e}")
return False
async def main():
"""Main test function."""
logger.info("Starting Story 4.4 Enhanced Export System Tests")
logger.info(f"Test started at: {datetime.now().isoformat()}")
success = await test_integrated_system()
logger.info(f"Test completed at: {datetime.now().isoformat()}")
if success:
logger.info("🎉 Story 4.4: Custom AI Models & Enhanced Markdown Export - COMPLETE!")
sys.exit(0)
else:
logger.error("❌ Story 4.4 tests failed")
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())