trax/examples/domain_enhancement_demo.py

#!/usr/bin/env python3
"""Domain-Specific Enhancement Pipeline Demo

This script demonstrates the specialized enhancement workflows for different domains,
including technical terminology enhancement, medical vocabulary optimization,
academic citation handling, and domain-specific quality metrics.
"""

import asyncio
import logging
from pathlib import Path

from src.services.domain_enhancement import (
    DomainEnhancementPipeline,
    DomainEnhancementConfig,
    DomainType
)

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


async def demo_technical_enhancement(pipeline):
    """Demonstrate technical content enhancement."""
    print("\n🔧 Technical Content Enhancement Demo")
    print("=" * 50)

    technical_text = """
    The algorithm implements a singleton pattern for thread safety in the software system.
    We use python free for backend development and my sequel for the database.
    The code includes function methods and class structures with version v1.2.3.
    """

    print(f"Original Text:\n{technical_text.strip()}")

    # Configure technical enhancement
    config = DomainEnhancementConfig(
        domain=DomainType.TECHNICAL,
        enable_terminology_enhancement=True,
        enable_formatting_optimization=True
    )

    # Enhance content
    result = await pipeline.enhance_content(technical_text, domain=DomainType.TECHNICAL, config=config)

    print(f"\nEnhanced Text:\n{result.enhanced_text}")
    print(f"\nDomain: {result.domain.value}")
    print(f"Confidence Score: {result.confidence_score:.3f}")
    print(f"Processing Time: {result.processing_time:.3f}s")
    print(f"\nImprovements:")
    for improvement in result.improvements:
        print(f"  ✅ {improvement}")

    print(f"\nTerminology Corrections:")
    for correction in result.terminology_corrections:
        print(f"  🔄 {correction}")

    print(f"\nQuality Metrics:")
    for metric, value in result.quality_metrics.items():
        print(f"  📊 {metric}: {value:.3f}")


async def demo_medical_enhancement(pipeline):
    """Demonstrate medical content enhancement."""
    print("\n🏥 Medical Content Enhancement Demo")
    print("=" * 50)

    medical_text = """
    Patient presents with symptoms of hypertension and requires treatment for myocardial infarction.
    Blood pressure readings show 120/80 mmHg with heart rate of 72 bpm.
    Medication includes aspirin and ibuprofen for pain management.
    """

    print(f"Original Text:\n{medical_text.strip()}")

    # Configure medical enhancement
    config = DomainEnhancementConfig(
        domain=DomainType.MEDICAL,
        enable_terminology_enhancement=True,
        enable_formatting_optimization=True
    )

    # Enhance content
    result = await pipeline.enhance_content(medical_text, domain=DomainType.MEDICAL, config=config)

    print(f"\nEnhanced Text:\n{result.enhanced_text}")
    print(f"\nDomain: {result.domain.value}")
    print(f"Confidence Score: {result.confidence_score:.3f}")
    print(f"Processing Time: {result.processing_time:.3f}s")
    print(f"\nImprovements:")
    for improvement in result.improvements:
        print(f"  ✅ {improvement}")

    print(f"\nQuality Metrics:")
    for metric, value in result.quality_metrics.items():
        print(f"  📊 {metric}: {value:.3f}")


async def demo_academic_enhancement(pipeline):
    """Demonstrate academic content enhancement."""
    print("\n🎓 Academic Content Enhancement Demo")
    print("=" * 50)

    academic_text = """
    Research study analysis shows hypothesis testing methodology with literature review.
    The findings are supported by et al. research and ibid. references.
    Figure 1 demonstrates the results while Table 2 shows statistical data.
    """

    print(f"Original Text:\n{academic_text.strip()}")

    # Configure academic enhancement
    config = DomainEnhancementConfig(
        domain=DomainType.ACADEMIC,
        enable_terminology_enhancement=True,
        enable_citation_handling=True,
        enable_formatting_optimization=True
    )

    # Enhance content
    result = await pipeline.enhance_content(academic_text, domain=DomainType.ACADEMIC, config=config)

    print(f"\nEnhanced Text:\n{result.enhanced_text}")
    print(f"\nDomain: {result.domain.value}")
    print(f"Confidence Score: {result.confidence_score:.3f}")
    print(f"Processing Time: {result.processing_time:.3f}s")
    print(f"\nImprovements:")
    for improvement in result.improvements:
        print(f"  ✅ {improvement}")

    print(f"\nQuality Metrics:")
    for metric, value in result.quality_metrics.items():
        print(f"  📊 {metric}: {value:.3f}")


async def demo_legal_enhancement(pipeline):
    """Demonstrate legal content enhancement."""
    print("\n⚖️ Legal Content Enhancement Demo")
    print("=" * 50)

    legal_text = """
    Contract agreement compliance with law regulation and legal jurisdiction.
    The terms shall must may hereby whereas therefore be executed according to statute.
    """

    print(f"Original Text:\n{legal_text.strip()}")

    # Configure legal enhancement
    config = DomainEnhancementConfig(
        domain=DomainType.LEGAL,
        enable_terminology_enhancement=True,
        enable_formatting_optimization=True
    )

    # Enhance content
    result = await pipeline.enhance_content(legal_text, domain=DomainType.LEGAL, config=config)

    print(f"\nEnhanced Text:\n{result.enhanced_text}")
    print(f"\nDomain: {result.domain.value}")
    print(f"Confidence Score: {result.confidence_score:.3f}")
    print(f"Processing Time: {result.processing_time:.3f}s")
    print(f"\nImprovements:")
    for improvement in result.improvements:
        print(f"  ✅ {improvement}")

    print(f"\nQuality Metrics:")
    for metric, value in result.quality_metrics.items():
        print(f"  📊 {metric}: {value:.3f}")


async def demo_auto_domain_detection(pipeline):
    """Demonstrate automatic domain detection."""
    print("\n🔍 Automatic Domain Detection Demo")
    print("=" * 50)

    # Test texts for different domains
    test_texts = {
        "Technical": "The algorithm system software hardware implementation code programming development",
        "Medical": "Patient diagnosis treatment symptom clinical medical doctor nurse hospital",
        "Academic": "Research study analysis theory hypothesis methodology experiment data results",
        "Legal": "Contract agreement law regulation compliance legal court judge attorney",
        "General": "This is a general conversation about various topics and interests"
    }

    for domain_name, text in test_texts.items():
        print(f"\n--- {domain_name} Content ---")
        print(f"Text: {text}")

        # Auto-detect domain
        result = await pipeline.enhance_content(text)

        print(f"Detected Domain: {result.domain.value}")
        print(f"Confidence Score: {result.confidence_score:.3f}")
        print(f"Quality Metrics: {list(result.quality_metrics.keys())}")


async def demo_configuration_options(pipeline):
    """Demonstrate configuration options."""
    print("\n⚙️ Configuration Options Demo")
    print("=" * 50)

    technical_text = "The algorithm implements a singleton pattern for thread safety"

    # Test different configuration combinations
    configs = [
        ("Full Enhancement", DomainEnhancementConfig(
            domain=DomainType.TECHNICAL,
            enable_terminology_enhancement=True,
            enable_formatting_optimization=True
        )),
        ("Terminology Only", DomainEnhancementConfig(
            domain=DomainType.TECHNICAL,
            enable_terminology_enhancement=True,
            enable_formatting_optimization=False
        )),
        ("Formatting Only", DomainEnhancementConfig(
            domain=DomainType.TECHNICAL,
            enable_terminology_enhancement=False,
            enable_formatting_optimization=True
        )),
        ("Minimal Enhancement", DomainEnhancementConfig(
            domain=DomainType.TECHNICAL,
            enable_terminology_enhancement=False,
            enable_formatting_optimization=False
        ))
    ]

    for config_name, config in configs:
        print(f"\n--- {config_name} ---")
        result = await pipeline.enhance_content(technical_text, domain=DomainType.TECHNICAL, config=config)

        print(f"Improvements: {len(result.improvements)}")
        print(f"Terminology Corrections: {len(result.terminology_corrections)}")
        print(f"Confidence Score: {result.confidence_score:.3f}")


async def demo_quality_benchmarks(pipeline):
    """Demonstrate quality benchmarking across domains."""
    print("\n📊 Quality Benchmarking Demo")
    print("=" * 50)

    # Benchmark texts for each domain
    benchmark_texts = {
        DomainType.TECHNICAL: "algorithm system software hardware implementation code programming",
        DomainType.MEDICAL: "patient diagnosis treatment symptom clinical medical doctor",
        DomainType.ACADEMIC: "research study analysis theory hypothesis methodology",
        DomainType.LEGAL: "contract agreement law regulation compliance legal",
        DomainType.GENERAL: "general conversation topics interests various"
    }

    results = {}

    for domain, text in benchmark_texts.items():
        print(f"\nBenchmarking {domain.value.upper()} domain...")
        result = await pipeline.enhance_content(text, domain=domain)
        results[domain] = result

        print(f"  Confidence: {result.confidence_score:.3f}")
        print(f"  Processing Time: {result.processing_time:.3f}s")
        print(f"  Quality Metrics: {list(result.quality_metrics.keys())}")

    # Summary
    print(f"\n📈 Benchmark Summary:")
    print(f"{'Domain':<12} {'Confidence':<12} {'Time (s)':<10} {'Quality':<10}")
    print("-" * 50)

    for domain, result in results.items():
        quality_score = sum(result.quality_metrics.values()) / len(result.quality_metrics)
        print(f"{domain.value:<12} {result.confidence_score:<12.3f} {result.processing_time:<10.3f} {quality_score:<10.3f}")


async def main():
    """Main demonstration function."""
    print("🚀 Domain-Specific Enhancement Pipeline Demo")
    print("=" * 60)
    print("This demo showcases specialized enhancement workflows for different domains")
    print("including technical terminology, medical vocabulary, academic citations,")
    print("and comprehensive quality metrics.")

    try:
        # Initialize the pipeline
        print("\n🔧 Initializing Domain Enhancement Pipeline...")
        pipeline = DomainEnhancementPipeline()
        print("✅ Pipeline initialized successfully!")

        # Run demonstrations
        await demo_technical_enhancement(pipeline)
        await demo_medical_enhancement(pipeline)
        await demo_academic_enhancement(pipeline)
        await demo_legal_enhancement(pipeline)
        await demo_auto_domain_detection(pipeline)
        await demo_configuration_options(pipeline)
        await demo_quality_benchmarks(pipeline)

        print("\n🎉 Demo completed successfully!")
        print("\nKey Features Demonstrated:")
        print("  ✅ Domain-specific enhancement strategies")
        print("  ✅ Technical terminology enhancement")
        print("  ✅ Medical vocabulary optimization")
        print("  ✅ Academic citation handling")
        print("  ✅ Legal precision optimization")
        print("  ✅ Automatic domain detection")
        print("  ✅ Configurable enhancement options")
        print("  ✅ Comprehensive quality metrics")
        print("  ✅ Performance benchmarking")

    except Exception as e:
        print(f"\n❌ Demo failed with error: {e}")
        logger.error(f"Demo error: {e}", exc_info=True)


if __name__ == "__main__":
    # Run the demo
    asyncio.run(main())