316 lines
12 KiB
Python
316 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""Domain-Specific Enhancement Pipeline Demo
|
|
|
|
This script demonstrates the specialized enhancement workflows for different domains,
|
|
including technical terminology enhancement, medical vocabulary optimization,
|
|
academic citation handling, and domain-specific quality metrics.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
from src.services.domain_enhancement import (
|
|
DomainEnhancementPipeline,
|
|
DomainEnhancementConfig,
|
|
DomainType
|
|
)
|
|
|
|
# Set up logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def demo_technical_enhancement(pipeline):
|
|
"""Demonstrate technical content enhancement."""
|
|
print("\n🔧 Technical Content Enhancement Demo")
|
|
print("=" * 50)
|
|
|
|
technical_text = """
|
|
The algorithm implements a singleton pattern for thread safety in the software system.
|
|
We use python free for backend development and my sequel for the database.
|
|
The code includes function methods and class structures with version v1.2.3.
|
|
"""
|
|
|
|
print(f"Original Text:\n{technical_text.strip()}")
|
|
|
|
# Configure technical enhancement
|
|
config = DomainEnhancementConfig(
|
|
domain=DomainType.TECHNICAL,
|
|
enable_terminology_enhancement=True,
|
|
enable_formatting_optimization=True
|
|
)
|
|
|
|
# Enhance content
|
|
result = await pipeline.enhance_content(technical_text, domain=DomainType.TECHNICAL, config=config)
|
|
|
|
print(f"\nEnhanced Text:\n{result.enhanced_text}")
|
|
print(f"\nDomain: {result.domain.value}")
|
|
print(f"Confidence Score: {result.confidence_score:.3f}")
|
|
print(f"Processing Time: {result.processing_time:.3f}s")
|
|
print(f"\nImprovements:")
|
|
for improvement in result.improvements:
|
|
print(f" ✅ {improvement}")
|
|
|
|
print(f"\nTerminology Corrections:")
|
|
for correction in result.terminology_corrections:
|
|
print(f" 🔄 {correction}")
|
|
|
|
print(f"\nQuality Metrics:")
|
|
for metric, value in result.quality_metrics.items():
|
|
print(f" 📊 {metric}: {value:.3f}")
|
|
|
|
|
|
async def demo_medical_enhancement(pipeline):
|
|
"""Demonstrate medical content enhancement."""
|
|
print("\n🏥 Medical Content Enhancement Demo")
|
|
print("=" * 50)
|
|
|
|
medical_text = """
|
|
Patient presents with symptoms of hypertension and requires treatment for myocardial infarction.
|
|
Blood pressure readings show 120/80 mmHg with heart rate of 72 bpm.
|
|
Medication includes aspirin and ibuprofen for pain management.
|
|
"""
|
|
|
|
print(f"Original Text:\n{medical_text.strip()}")
|
|
|
|
# Configure medical enhancement
|
|
config = DomainEnhancementConfig(
|
|
domain=DomainType.MEDICAL,
|
|
enable_terminology_enhancement=True,
|
|
enable_formatting_optimization=True
|
|
)
|
|
|
|
# Enhance content
|
|
result = await pipeline.enhance_content(medical_text, domain=DomainType.MEDICAL, config=config)
|
|
|
|
print(f"\nEnhanced Text:\n{result.enhanced_text}")
|
|
print(f"\nDomain: {result.domain.value}")
|
|
print(f"Confidence Score: {result.confidence_score:.3f}")
|
|
print(f"Processing Time: {result.processing_time:.3f}s")
|
|
print(f"\nImprovements:")
|
|
for improvement in result.improvements:
|
|
print(f" ✅ {improvement}")
|
|
|
|
print(f"\nQuality Metrics:")
|
|
for metric, value in result.quality_metrics.items():
|
|
print(f" 📊 {metric}: {value:.3f}")
|
|
|
|
|
|
async def demo_academic_enhancement(pipeline):
|
|
"""Demonstrate academic content enhancement."""
|
|
print("\n🎓 Academic Content Enhancement Demo")
|
|
print("=" * 50)
|
|
|
|
academic_text = """
|
|
Research study analysis shows hypothesis testing methodology with literature review.
|
|
The findings are supported by et al. research and ibid. references.
|
|
Figure 1 demonstrates the results while Table 2 shows statistical data.
|
|
"""
|
|
|
|
print(f"Original Text:\n{academic_text.strip()}")
|
|
|
|
# Configure academic enhancement
|
|
config = DomainEnhancementConfig(
|
|
domain=DomainType.ACADEMIC,
|
|
enable_terminology_enhancement=True,
|
|
enable_citation_handling=True,
|
|
enable_formatting_optimization=True
|
|
)
|
|
|
|
# Enhance content
|
|
result = await pipeline.enhance_content(academic_text, domain=DomainType.ACADEMIC, config=config)
|
|
|
|
print(f"\nEnhanced Text:\n{result.enhanced_text}")
|
|
print(f"\nDomain: {result.domain.value}")
|
|
print(f"Confidence Score: {result.confidence_score:.3f}")
|
|
print(f"Processing Time: {result.processing_time:.3f}s")
|
|
print(f"\nImprovements:")
|
|
for improvement in result.improvements:
|
|
print(f" ✅ {improvement}")
|
|
|
|
print(f"\nQuality Metrics:")
|
|
for metric, value in result.quality_metrics.items():
|
|
print(f" 📊 {metric}: {value:.3f}")
|
|
|
|
|
|
async def demo_legal_enhancement(pipeline):
|
|
"""Demonstrate legal content enhancement."""
|
|
print("\n⚖️ Legal Content Enhancement Demo")
|
|
print("=" * 50)
|
|
|
|
legal_text = """
|
|
Contract agreement compliance with law regulation and legal jurisdiction.
|
|
The terms shall must may hereby whereas therefore be executed according to statute.
|
|
"""
|
|
|
|
print(f"Original Text:\n{legal_text.strip()}")
|
|
|
|
# Configure legal enhancement
|
|
config = DomainEnhancementConfig(
|
|
domain=DomainType.LEGAL,
|
|
enable_terminology_enhancement=True,
|
|
enable_formatting_optimization=True
|
|
)
|
|
|
|
# Enhance content
|
|
result = await pipeline.enhance_content(legal_text, domain=DomainType.LEGAL, config=config)
|
|
|
|
print(f"\nEnhanced Text:\n{result.enhanced_text}")
|
|
print(f"\nDomain: {result.domain.value}")
|
|
print(f"Confidence Score: {result.confidence_score:.3f}")
|
|
print(f"Processing Time: {result.processing_time:.3f}s")
|
|
print(f"\nImprovements:")
|
|
for improvement in result.improvements:
|
|
print(f" ✅ {improvement}")
|
|
|
|
print(f"\nQuality Metrics:")
|
|
for metric, value in result.quality_metrics.items():
|
|
print(f" 📊 {metric}: {value:.3f}")
|
|
|
|
|
|
async def demo_auto_domain_detection(pipeline):
|
|
"""Demonstrate automatic domain detection."""
|
|
print("\n🔍 Automatic Domain Detection Demo")
|
|
print("=" * 50)
|
|
|
|
# Test texts for different domains
|
|
test_texts = {
|
|
"Technical": "The algorithm system software hardware implementation code programming development",
|
|
"Medical": "Patient diagnosis treatment symptom clinical medical doctor nurse hospital",
|
|
"Academic": "Research study analysis theory hypothesis methodology experiment data results",
|
|
"Legal": "Contract agreement law regulation compliance legal court judge attorney",
|
|
"General": "This is a general conversation about various topics and interests"
|
|
}
|
|
|
|
for domain_name, text in test_texts.items():
|
|
print(f"\n--- {domain_name} Content ---")
|
|
print(f"Text: {text}")
|
|
|
|
# Auto-detect domain
|
|
result = await pipeline.enhance_content(text)
|
|
|
|
print(f"Detected Domain: {result.domain.value}")
|
|
print(f"Confidence Score: {result.confidence_score:.3f}")
|
|
print(f"Quality Metrics: {list(result.quality_metrics.keys())}")
|
|
|
|
|
|
async def demo_configuration_options(pipeline):
|
|
"""Demonstrate configuration options."""
|
|
print("\n⚙️ Configuration Options Demo")
|
|
print("=" * 50)
|
|
|
|
technical_text = "The algorithm implements a singleton pattern for thread safety"
|
|
|
|
# Test different configuration combinations
|
|
configs = [
|
|
("Full Enhancement", DomainEnhancementConfig(
|
|
domain=DomainType.TECHNICAL,
|
|
enable_terminology_enhancement=True,
|
|
enable_formatting_optimization=True
|
|
)),
|
|
("Terminology Only", DomainEnhancementConfig(
|
|
domain=DomainType.TECHNICAL,
|
|
enable_terminology_enhancement=True,
|
|
enable_formatting_optimization=False
|
|
)),
|
|
("Formatting Only", DomainEnhancementConfig(
|
|
domain=DomainType.TECHNICAL,
|
|
enable_terminology_enhancement=False,
|
|
enable_formatting_optimization=True
|
|
)),
|
|
("Minimal Enhancement", DomainEnhancementConfig(
|
|
domain=DomainType.TECHNICAL,
|
|
enable_terminology_enhancement=False,
|
|
enable_formatting_optimization=False
|
|
))
|
|
]
|
|
|
|
for config_name, config in configs:
|
|
print(f"\n--- {config_name} ---")
|
|
result = await pipeline.enhance_content(technical_text, domain=DomainType.TECHNICAL, config=config)
|
|
|
|
print(f"Improvements: {len(result.improvements)}")
|
|
print(f"Terminology Corrections: {len(result.terminology_corrections)}")
|
|
print(f"Confidence Score: {result.confidence_score:.3f}")
|
|
|
|
|
|
async def demo_quality_benchmarks(pipeline):
|
|
"""Demonstrate quality benchmarking across domains."""
|
|
print("\n📊 Quality Benchmarking Demo")
|
|
print("=" * 50)
|
|
|
|
# Benchmark texts for each domain
|
|
benchmark_texts = {
|
|
DomainType.TECHNICAL: "algorithm system software hardware implementation code programming",
|
|
DomainType.MEDICAL: "patient diagnosis treatment symptom clinical medical doctor",
|
|
DomainType.ACADEMIC: "research study analysis theory hypothesis methodology",
|
|
DomainType.LEGAL: "contract agreement law regulation compliance legal",
|
|
DomainType.GENERAL: "general conversation topics interests various"
|
|
}
|
|
|
|
results = {}
|
|
|
|
for domain, text in benchmark_texts.items():
|
|
print(f"\nBenchmarking {domain.value.upper()} domain...")
|
|
result = await pipeline.enhance_content(text, domain=domain)
|
|
results[domain] = result
|
|
|
|
print(f" Confidence: {result.confidence_score:.3f}")
|
|
print(f" Processing Time: {result.processing_time:.3f}s")
|
|
print(f" Quality Metrics: {list(result.quality_metrics.keys())}")
|
|
|
|
# Summary
|
|
print(f"\n📈 Benchmark Summary:")
|
|
print(f"{'Domain':<12} {'Confidence':<12} {'Time (s)':<10} {'Quality':<10}")
|
|
print("-" * 50)
|
|
|
|
for domain, result in results.items():
|
|
quality_score = sum(result.quality_metrics.values()) / len(result.quality_metrics)
|
|
print(f"{domain.value:<12} {result.confidence_score:<12.3f} {result.processing_time:<10.3f} {quality_score:<10.3f}")
|
|
|
|
|
|
async def main():
|
|
"""Main demonstration function."""
|
|
print("🚀 Domain-Specific Enhancement Pipeline Demo")
|
|
print("=" * 60)
|
|
print("This demo showcases specialized enhancement workflows for different domains")
|
|
print("including technical terminology, medical vocabulary, academic citations,")
|
|
print("and comprehensive quality metrics.")
|
|
|
|
try:
|
|
# Initialize the pipeline
|
|
print("\n🔧 Initializing Domain Enhancement Pipeline...")
|
|
pipeline = DomainEnhancementPipeline()
|
|
print("✅ Pipeline initialized successfully!")
|
|
|
|
# Run demonstrations
|
|
await demo_technical_enhancement(pipeline)
|
|
await demo_medical_enhancement(pipeline)
|
|
await demo_academic_enhancement(pipeline)
|
|
await demo_legal_enhancement(pipeline)
|
|
await demo_auto_domain_detection(pipeline)
|
|
await demo_configuration_options(pipeline)
|
|
await demo_quality_benchmarks(pipeline)
|
|
|
|
print("\n🎉 Demo completed successfully!")
|
|
print("\nKey Features Demonstrated:")
|
|
print(" ✅ Domain-specific enhancement strategies")
|
|
print(" ✅ Technical terminology enhancement")
|
|
print(" ✅ Medical vocabulary optimization")
|
|
print(" ✅ Academic citation handling")
|
|
print(" ✅ Legal precision optimization")
|
|
print(" ✅ Automatic domain detection")
|
|
print(" ✅ Configurable enhancement options")
|
|
print(" ✅ Comprehensive quality metrics")
|
|
print(" ✅ Performance benchmarking")
|
|
|
|
except Exception as e:
|
|
print(f"\n❌ Demo failed with error: {e}")
|
|
logger.error(f"Demo error: {e}", exc_info=True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run the demo
|
|
asyncio.run(main())
|