#!/usr/bin/env python3 """Domain-Specific Enhancement Pipeline Demo This script demonstrates the specialized enhancement workflows for different domains, including technical terminology enhancement, medical vocabulary optimization, academic citation handling, and domain-specific quality metrics. """ import asyncio import logging from pathlib import Path from src.services.domain_enhancement import ( DomainEnhancementPipeline, DomainEnhancementConfig, DomainType ) # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) async def demo_technical_enhancement(pipeline): """Demonstrate technical content enhancement.""" print("\nšŸ”§ Technical Content Enhancement Demo") print("=" * 50) technical_text = """ The algorithm implements a singleton pattern for thread safety in the software system. We use python free for backend development and my sequel for the database. The code includes function methods and class structures with version v1.2.3. """ print(f"Original Text:\n{technical_text.strip()}") # Configure technical enhancement config = DomainEnhancementConfig( domain=DomainType.TECHNICAL, enable_terminology_enhancement=True, enable_formatting_optimization=True ) # Enhance content result = await pipeline.enhance_content(technical_text, domain=DomainType.TECHNICAL, config=config) print(f"\nEnhanced Text:\n{result.enhanced_text}") print(f"\nDomain: {result.domain.value}") print(f"Confidence Score: {result.confidence_score:.3f}") print(f"Processing Time: {result.processing_time:.3f}s") print(f"\nImprovements:") for improvement in result.improvements: print(f" āœ… {improvement}") print(f"\nTerminology Corrections:") for correction in result.terminology_corrections: print(f" šŸ”„ {correction}") print(f"\nQuality Metrics:") for metric, value in result.quality_metrics.items(): print(f" šŸ“Š {metric}: {value:.3f}") async def demo_medical_enhancement(pipeline): """Demonstrate medical content enhancement.""" print("\nšŸ„ Medical Content Enhancement Demo") print("=" * 50) medical_text = """ Patient presents with symptoms of hypertension and requires treatment for myocardial infarction. Blood pressure readings show 120/80 mmHg with heart rate of 72 bpm. Medication includes aspirin and ibuprofen for pain management. """ print(f"Original Text:\n{medical_text.strip()}") # Configure medical enhancement config = DomainEnhancementConfig( domain=DomainType.MEDICAL, enable_terminology_enhancement=True, enable_formatting_optimization=True ) # Enhance content result = await pipeline.enhance_content(medical_text, domain=DomainType.MEDICAL, config=config) print(f"\nEnhanced Text:\n{result.enhanced_text}") print(f"\nDomain: {result.domain.value}") print(f"Confidence Score: {result.confidence_score:.3f}") print(f"Processing Time: {result.processing_time:.3f}s") print(f"\nImprovements:") for improvement in result.improvements: print(f" āœ… {improvement}") print(f"\nQuality Metrics:") for metric, value in result.quality_metrics.items(): print(f" šŸ“Š {metric}: {value:.3f}") async def demo_academic_enhancement(pipeline): """Demonstrate academic content enhancement.""" print("\nšŸŽ“ Academic Content Enhancement Demo") print("=" * 50) academic_text = """ Research study analysis shows hypothesis testing methodology with literature review. The findings are supported by et al. research and ibid. references. Figure 1 demonstrates the results while Table 2 shows statistical data. """ print(f"Original Text:\n{academic_text.strip()}") # Configure academic enhancement config = DomainEnhancementConfig( domain=DomainType.ACADEMIC, enable_terminology_enhancement=True, enable_citation_handling=True, enable_formatting_optimization=True ) # Enhance content result = await pipeline.enhance_content(academic_text, domain=DomainType.ACADEMIC, config=config) print(f"\nEnhanced Text:\n{result.enhanced_text}") print(f"\nDomain: {result.domain.value}") print(f"Confidence Score: {result.confidence_score:.3f}") print(f"Processing Time: {result.processing_time:.3f}s") print(f"\nImprovements:") for improvement in result.improvements: print(f" āœ… {improvement}") print(f"\nQuality Metrics:") for metric, value in result.quality_metrics.items(): print(f" šŸ“Š {metric}: {value:.3f}") async def demo_legal_enhancement(pipeline): """Demonstrate legal content enhancement.""" print("\nāš–ļø Legal Content Enhancement Demo") print("=" * 50) legal_text = """ Contract agreement compliance with law regulation and legal jurisdiction. The terms shall must may hereby whereas therefore be executed according to statute. """ print(f"Original Text:\n{legal_text.strip()}") # Configure legal enhancement config = DomainEnhancementConfig( domain=DomainType.LEGAL, enable_terminology_enhancement=True, enable_formatting_optimization=True ) # Enhance content result = await pipeline.enhance_content(legal_text, domain=DomainType.LEGAL, config=config) print(f"\nEnhanced Text:\n{result.enhanced_text}") print(f"\nDomain: {result.domain.value}") print(f"Confidence Score: {result.confidence_score:.3f}") print(f"Processing Time: {result.processing_time:.3f}s") print(f"\nImprovements:") for improvement in result.improvements: print(f" āœ… {improvement}") print(f"\nQuality Metrics:") for metric, value in result.quality_metrics.items(): print(f" šŸ“Š {metric}: {value:.3f}") async def demo_auto_domain_detection(pipeline): """Demonstrate automatic domain detection.""" print("\nšŸ” Automatic Domain Detection Demo") print("=" * 50) # Test texts for different domains test_texts = { "Technical": "The algorithm system software hardware implementation code programming development", "Medical": "Patient diagnosis treatment symptom clinical medical doctor nurse hospital", "Academic": "Research study analysis theory hypothesis methodology experiment data results", "Legal": "Contract agreement law regulation compliance legal court judge attorney", "General": "This is a general conversation about various topics and interests" } for domain_name, text in test_texts.items(): print(f"\n--- {domain_name} Content ---") print(f"Text: {text}") # Auto-detect domain result = await pipeline.enhance_content(text) print(f"Detected Domain: {result.domain.value}") print(f"Confidence Score: {result.confidence_score:.3f}") print(f"Quality Metrics: {list(result.quality_metrics.keys())}") async def demo_configuration_options(pipeline): """Demonstrate configuration options.""" print("\nāš™ļø Configuration Options Demo") print("=" * 50) technical_text = "The algorithm implements a singleton pattern for thread safety" # Test different configuration combinations configs = [ ("Full Enhancement", DomainEnhancementConfig( domain=DomainType.TECHNICAL, enable_terminology_enhancement=True, enable_formatting_optimization=True )), ("Terminology Only", DomainEnhancementConfig( domain=DomainType.TECHNICAL, enable_terminology_enhancement=True, enable_formatting_optimization=False )), ("Formatting Only", DomainEnhancementConfig( domain=DomainType.TECHNICAL, enable_terminology_enhancement=False, enable_formatting_optimization=True )), ("Minimal Enhancement", DomainEnhancementConfig( domain=DomainType.TECHNICAL, enable_terminology_enhancement=False, enable_formatting_optimization=False )) ] for config_name, config in configs: print(f"\n--- {config_name} ---") result = await pipeline.enhance_content(technical_text, domain=DomainType.TECHNICAL, config=config) print(f"Improvements: {len(result.improvements)}") print(f"Terminology Corrections: {len(result.terminology_corrections)}") print(f"Confidence Score: {result.confidence_score:.3f}") async def demo_quality_benchmarks(pipeline): """Demonstrate quality benchmarking across domains.""" print("\nšŸ“Š Quality Benchmarking Demo") print("=" * 50) # Benchmark texts for each domain benchmark_texts = { DomainType.TECHNICAL: "algorithm system software hardware implementation code programming", DomainType.MEDICAL: "patient diagnosis treatment symptom clinical medical doctor", DomainType.ACADEMIC: "research study analysis theory hypothesis methodology", DomainType.LEGAL: "contract agreement law regulation compliance legal", DomainType.GENERAL: "general conversation topics interests various" } results = {} for domain, text in benchmark_texts.items(): print(f"\nBenchmarking {domain.value.upper()} domain...") result = await pipeline.enhance_content(text, domain=domain) results[domain] = result print(f" Confidence: {result.confidence_score:.3f}") print(f" Processing Time: {result.processing_time:.3f}s") print(f" Quality Metrics: {list(result.quality_metrics.keys())}") # Summary print(f"\nšŸ“ˆ Benchmark Summary:") print(f"{'Domain':<12} {'Confidence':<12} {'Time (s)':<10} {'Quality':<10}") print("-" * 50) for domain, result in results.items(): quality_score = sum(result.quality_metrics.values()) / len(result.quality_metrics) print(f"{domain.value:<12} {result.confidence_score:<12.3f} {result.processing_time:<10.3f} {quality_score:<10.3f}") async def main(): """Main demonstration function.""" print("šŸš€ Domain-Specific Enhancement Pipeline Demo") print("=" * 60) print("This demo showcases specialized enhancement workflows for different domains") print("including technical terminology, medical vocabulary, academic citations,") print("and comprehensive quality metrics.") try: # Initialize the pipeline print("\nšŸ”§ Initializing Domain Enhancement Pipeline...") pipeline = DomainEnhancementPipeline() print("āœ… Pipeline initialized successfully!") # Run demonstrations await demo_technical_enhancement(pipeline) await demo_medical_enhancement(pipeline) await demo_academic_enhancement(pipeline) await demo_legal_enhancement(pipeline) await demo_auto_domain_detection(pipeline) await demo_configuration_options(pipeline) await demo_quality_benchmarks(pipeline) print("\nšŸŽ‰ Demo completed successfully!") print("\nKey Features Demonstrated:") print(" āœ… Domain-specific enhancement strategies") print(" āœ… Technical terminology enhancement") print(" āœ… Medical vocabulary optimization") print(" āœ… Academic citation handling") print(" āœ… Legal precision optimization") print(" āœ… Automatic domain detection") print(" āœ… Configurable enhancement options") print(" āœ… Comprehensive quality metrics") print(" āœ… Performance benchmarking") except Exception as e: print(f"\nāŒ Demo failed with error: {e}") logger.error(f"Demo error: {e}", exc_info=True) if __name__ == "__main__": # Run the demo asyncio.run(main())