"""
Unit tests for WordDetector and related classes.
"""

import pytest
import json
from pathlib import Path
from unittest.mock import Mock, patch, mock_open

from src.core.word_detector import (
    Severity,
    DetectedWord,
    WordList,
    WordDetector
)


class TestSeverity:
    """Test Severity enum."""
    
    def test_severity_values(self):
        """Test severity values are correct."""
        assert Severity.LOW.value == 1
        assert Severity.MEDIUM.value == 2
        assert Severity.HIGH.value == 3
        assert Severity.EXTREME.value == 4
    
    def test_from_string(self):
        """Test creating severity from string."""
        assert Severity.from_string('low') == Severity.LOW
        assert Severity.from_string('LOW') == Severity.LOW
        assert Severity.from_string('medium') == Severity.MEDIUM
        assert Severity.from_string('high') == Severity.HIGH
        assert Severity.from_string('extreme') == Severity.EXTREME
        
        # Unknown values should default to MEDIUM
        assert Severity.from_string('unknown') == Severity.MEDIUM
        assert Severity.from_string('') == Severity.MEDIUM
    
    def test_severity_ordering(self):
        """Test severity levels can be compared."""
        assert Severity.LOW.value < Severity.MEDIUM.value
        assert Severity.MEDIUM.value < Severity.HIGH.value
        assert Severity.HIGH.value < Severity.EXTREME.value


class TestDetectedWord:
    """Test DetectedWord dataclass."""
    
    def test_basic_creation(self):
        """Test creating a DetectedWord."""
        word = DetectedWord(
            word="badword",
            original="BadWord",
            start=5.0,
            end=6.0,
            severity=Severity.HIGH,
            confidence=0.95
        )
        
        assert word.word == "badword"
        assert word.original == "BadWord"
        assert word.start == 5.0
        assert word.end == 6.0
        assert word.severity == Severity.HIGH
        assert word.confidence == 0.95
        assert word.context == ""
    
    def test_duration_property(self):
        """Test duration calculation."""
        word = DetectedWord(
            word="test",
            original="test",
            start=2.5,
            end=4.0,
            severity=Severity.LOW,
            confidence=1.0
        )
        
        assert word.duration == 1.5
    
    def test_to_dict(self):
        """Test converting to dictionary."""
        word = DetectedWord(
            word="test",
            original="TEST",
            start=1.0,
            end=2.5,
            severity=Severity.MEDIUM,
            confidence=0.85,
            context="this is a [test] word"
        )
        
        data = word.to_dict()
        
        assert data['word'] == "test"
        assert data['original'] == "TEST"
        assert data['start'] == 1.0
        assert data['end'] == 2.5
        assert data['duration'] == 1.5
        assert data['severity'] == "MEDIUM"
        assert data['confidence'] == 0.85
        assert data['context'] == "this is a [test] word"


class TestWordList:
    """Test WordList class."""
    
    def test_initialization(self):
        """Test WordList initialization."""
        word_list = WordList()
        
        # Should have some default words loaded
        assert len(word_list) > 0
        assert isinstance(word_list.words, dict)
        assert isinstance(word_list.patterns, dict)
        assert isinstance(word_list.variations, dict)
    
    def test_add_word(self):
        """Test adding words to the list."""
        word_list = WordList()
        initial_count = len(word_list)
        
        # Add word with string severity
        word_list.add_word("testword", "high")
        assert "testword" in word_list.words
        assert word_list.words["testword"] == Severity.HIGH
        
        # Add word with Severity enum
        word_list.add_word("another", Severity.LOW)
        assert "another" in word_list.words
        assert word_list.words["another"] == Severity.LOW
        
        assert len(word_list) == initial_count + 2
    
    def test_add_word_variations(self):
        """Test that adding a word creates variations."""
        word_list = WordList()
        word_list.add_word("test", Severity.MEDIUM)
        
        # Should create plural variation
        assert "tests" in word_list.variations
        assert word_list.variations["tests"] == "test"
    
    def test_remove_word(self):
        """Test removing words from the list."""
        word_list = WordList()
        word_list.add_word("removeme", Severity.LOW)
        
        # Verify word was added
        assert "removeme" in word_list.words
        
        # Remove the word
        removed = word_list.remove_word("removeme")
        assert removed is True
        assert "removeme" not in word_list.words
        
        # Try removing non-existent word
        removed = word_list.remove_word("nonexistent")
        assert removed is False
    
    def test_contains(self):
        """Test checking if word is in list."""
        word_list = WordList()
        word_list.add_word("contained", Severity.MEDIUM)
        
        assert "contained" in word_list
        assert "CONTAINED" in word_list  # Case insensitive
        assert " contained " in word_list  # Strips whitespace
        assert "notcontained" not in word_list
    
    def test_load_from_json_file(self, temp_dir):
        """Test loading word list from JSON file."""
        # Create test JSON file
        test_data = {
            "word1": "LOW",
            "word2": "HIGH",
            "word3": "EXTREME"
        }
        
        json_file = temp_dir / "test_words.json"
        with open(json_file, 'w') as f:
            json.dump(test_data, f)
        
        word_list = WordList()
        initial_count = len(word_list)
        
        word_list.load_from_file(json_file)
        
        assert "word1" in word_list.words
        assert word_list.words["word1"] == Severity.LOW
        assert "word2" in word_list.words
        assert word_list.words["word2"] == Severity.HIGH
        assert "word3" in word_list.words
        assert word_list.words["word3"] == Severity.EXTREME
        assert len(word_list) == initial_count + 3
    
    def test_load_from_csv_file(self, temp_dir):
        """Test loading word list from CSV file."""
        # Create test CSV file
        csv_content = """word,severity
testword1,low
testword2,medium
testword3,high"""
        
        csv_file = temp_dir / "test_words.csv"
        csv_file.write_text(csv_content)
        
        word_list = WordList()
        initial_count = len(word_list)
        
        word_list.load_from_file(csv_file)
        
        assert "testword1" in word_list.words
        assert word_list.words["testword1"] == Severity.LOW
        assert "testword2" in word_list.words
        assert word_list.words["testword2"] == Severity.MEDIUM
        assert "testword3" in word_list.words
        assert word_list.words["testword3"] == Severity.HIGH
        assert len(word_list) == initial_count + 3
    
    def test_load_from_text_file(self, temp_dir):
        """Test loading word list from plain text file."""
        # Create test text file
        text_content = """word1
word2
# This is a comment
word3
"""
        
        text_file = temp_dir / "test_words.txt"
        text_file.write_text(text_content)
        
        word_list = WordList()
        initial_count = len(word_list)
        
        word_list.load_from_file(text_file)
        
        assert "word1" in word_list.words
        assert "word2" in word_list.words
        assert "word3" in word_list.words
        # Comment should be ignored
        assert "# This is a comment" not in word_list.words
        assert len(word_list) == initial_count + 3
    
    def test_load_nonexistent_file(self):
        """Test loading from non-existent file."""
        word_list = WordList()
        
        with pytest.raises(FileNotFoundError):
            word_list.load_from_file("nonexistent.json")
    
    def test_save_to_json_file(self, temp_dir):
        """Test saving word list to JSON file."""
        word_list = WordList()
        word_list.add_word("save1", Severity.LOW)
        word_list.add_word("save2", Severity.HIGH)
        
        json_file = temp_dir / "saved_words.json"
        word_list.save_to_file(json_file)
        
        assert json_file.exists()
        
        # Load and verify
        with open(json_file, 'r') as f:
            data = json.load(f)
        
        assert "save1" in data
        assert "save2" in data
        assert data["save1"] == "LOW"
        assert data["save2"] == "HIGH"
    
    def test_save_to_csv_file(self, temp_dir):
        """Test saving word list to CSV file."""
        word_list = WordList()
        word_list.add_word("csv1", Severity.MEDIUM)
        word_list.add_word("csv2", Severity.EXTREME)
        
        csv_file = temp_dir / "saved_words.csv"
        word_list.save_to_file(csv_file)
        
        assert csv_file.exists()
        
        # Verify content
        content = csv_file.read_text()
        assert "csv1,medium" in content
        assert "csv2,extreme" in content
        assert "word,severity" in content  # Header


class TestWordDetector:
    """Test WordDetector class."""
    
    def test_initialization_default(self):
        """Test detector initialization with defaults."""
        detector = WordDetector()
        
        assert detector.word_list is not None
        assert detector.min_confidence == 0.7
        assert detector.check_variations is True
        assert detector.context_window == 5
    
    def test_initialization_custom(self):
        """Test detector initialization with custom parameters."""
        word_list = WordList()
        detector = WordDetector(
            word_list=word_list,
            min_confidence=0.8,
            check_variations=False,
            context_window=3
        )
        
        assert detector.word_list == word_list
        assert detector.min_confidence == 0.8
        assert detector.check_variations is False
        assert detector.context_window == 3
    
    def test_detect_direct_match(self):
        """Test detecting direct word matches."""
        word_list = WordList()
        word_list.add_word("badword", Severity.HIGH)
        
        detector = WordDetector(word_list=word_list)
        
        # Mock transcription result
        mock_word = Mock()
        mock_word.text = "badword"
        mock_word.start = 5.0
        mock_word.end = 6.0
        
        mock_transcription = Mock()
        mock_transcription.words = [mock_word]
        
        detected = detector.detect(mock_transcription, include_context=False)
        
        assert len(detected) == 1
        assert detected[0].word == "badword"
        assert detected[0].original == "badword"
        assert detected[0].start == 5.0
        assert detected[0].end == 6.0
        assert detected[0].severity == Severity.HIGH
        assert detected[0].confidence == 1.0
    
    def test_detect_case_insensitive(self):
        """Test case-insensitive detection."""
        word_list = WordList()
        word_list.add_word("badword", Severity.MEDIUM)
        
        detector = WordDetector(word_list=word_list)
        
        # Mock transcription with uppercase word
        mock_word = Mock()
        mock_word.text = "BADWORD"
        mock_word.start = 2.0
        mock_word.end = 3.0
        
        mock_transcription = Mock()
        mock_transcription.words = [mock_word]
        
        detected = detector.detect(mock_transcription, include_context=False)
        
        assert len(detected) == 1
        assert detected[0].word == "badword"  # Normalized
        assert detected[0].original == "BADWORD"  # Original preserved
    
    def test_detect_with_context(self):
        """Test detection with context extraction."""
        word_list = WordList()
        word_list.add_word("explicit", Severity.MEDIUM)
        
        detector = WordDetector(word_list=word_list, context_window=2)
        
        # Mock transcription with multiple words
        words = []
        word_texts = ["this", "is", "explicit", "content", "here"]
        for i, text in enumerate(word_texts):
            word = Mock()
            word.text = text
            word.start = float(i)
            word.end = float(i + 1)
            words.append(word)
        
        mock_transcription = Mock()
        mock_transcription.words = words
        
        detected = detector.detect(mock_transcription, include_context=True)
        
        assert len(detected) == 1
        assert detected[0].word == "explicit"
        assert detected[0].context == "this is [explicit] content here"
    
    def test_detect_variations(self):
        """Test detection of word variations."""
        word_list = WordList()
        word_list.add_word("test", Severity.LOW)
        # This should create "tests" variation
        
        detector = WordDetector(word_list=word_list, check_variations=True)
        
        # Mock transcription with variation
        mock_word = Mock()
        mock_word.text = "tests"
        mock_word.start = 1.0
        mock_word.end = 2.0
        
        mock_transcription = Mock()
        mock_transcription.words = [mock_word]
        
        detected = detector.detect(mock_transcription, include_context=False)
        
        assert len(detected) == 1
        assert detected[0].word == "test"  # Base word
        assert detected[0].original == "tests"  # Original variation
        assert detected[0].confidence == 0.95  # Variation confidence
    
    def test_detect_no_variations(self):
        """Test detection with variations disabled."""
        word_list = WordList()
        word_list.add_word("test", Severity.LOW)
        
        detector = WordDetector(word_list=word_list, check_variations=False)
        
        # Mock transcription with variation that shouldn't match
        mock_word = Mock()
        mock_word.text = "tests"
        mock_word.start = 1.0
        mock_word.end = 2.0
        
        mock_transcription = Mock()
        mock_transcription.words = [mock_word]
        
        detected = detector.detect(mock_transcription, include_context=False)
        
        assert len(detected) == 0
    
    def test_check_variations_known(self):
        """Test checking known variations."""
        word_list = WordList()
        word_list.add_word("base", Severity.MEDIUM)
        word_list.variations["bases"] = "base"  # Manually add variation
        
        detector = WordDetector(word_list=word_list)
        
        match, confidence = detector._check_variations("bases")
        
        assert match == "bases"
        assert confidence == 0.95
    
    def test_check_variations_fuzzy(self):
        """Test fuzzy matching for variations."""
        word_list = WordList()
        word_list.add_word("hello", Severity.LOW)
        
        detector = WordDetector(word_list=word_list, min_confidence=0.8)
        
        # Test similar word
        match, confidence = detector._check_variations("helo")  # Missing 'l'
        
        if match:  # Fuzzy matching might or might not match depending on similarity
            assert confidence >= 0.8
    
    def test_get_context_boundary(self):
        """Test context extraction at boundaries."""
        detector = WordDetector(context_window=2)
        
        # Create mock words
        word_texts = ["a", "b", "target", "d", "e"]
        words = []
        for text in word_texts:
            word = Mock()
            word.text = text
            words.append(word)
        
        # Test target at beginning
        context = detector._get_context(words, 0)
        assert context == "[a] b target"
        
        # Test target at end
        context = detector._get_context(words, 4)
        assert context == "target d [e]"
        
        # Test target in middle
        context = detector._get_context(words, 2)
        assert context == "a b [target] d e"
    
    def test_filter_by_severity(self):
        """Test filtering detected words by severity."""
        detector = WordDetector()
        
        # Create detected words with different severities
        detected_words = [
            DetectedWord("low", "low", 1.0, 2.0, Severity.LOW, 1.0),
            DetectedWord("med", "med", 3.0, 4.0, Severity.MEDIUM, 1.0),
            DetectedWord("high", "high", 5.0, 6.0, Severity.HIGH, 1.0),
            DetectedWord("extreme", "extreme", 7.0, 8.0, Severity.EXTREME, 1.0)
        ]
        
        # Filter by MEDIUM and above
        filtered = detector.filter_by_severity(detected_words, Severity.MEDIUM)
        
        assert len(filtered) == 3  # MEDIUM, HIGH, EXTREME
        severities = [w.severity for w in filtered]
        assert Severity.LOW not in severities
        assert Severity.MEDIUM in severities
        assert Severity.HIGH in severities
        assert Severity.EXTREME in severities
    
    def test_get_statistics_empty(self):
        """Test statistics for empty detection results."""
        detector = WordDetector()
        
        stats = detector.get_statistics([])
        
        assert stats['total_count'] == 0
        assert stats['unique_words'] == 0
        assert stats['by_severity'] == {}
        assert stats['most_common'] == []
    
    def test_get_statistics_with_words(self):
        """Test statistics for detection results."""
        detector = WordDetector()
        
        detected_words = [
            DetectedWord("word1", "word1", 1.0, 2.0, Severity.HIGH, 0.9),
            DetectedWord("word1", "word1", 3.0, 4.0, Severity.HIGH, 0.8),
            DetectedWord("word2", "word2", 5.0, 6.0, Severity.MEDIUM, 0.95),
            DetectedWord("word3", "word3", 7.0, 8.0, Severity.LOW, 1.0)
        ]
        
        stats = detector.get_statistics(detected_words)
        
        assert stats['total_count'] == 4
        assert stats['unique_words'] == 3
        assert stats['by_severity']['HIGH'] == 2
        assert stats['by_severity']['MEDIUM'] == 1
        assert stats['by_severity']['LOW'] == 1
        assert stats['most_common'][0] == ('word1', 2)  # Most frequent
        assert stats['average_confidence'] == (0.9 + 0.8 + 0.95 + 1.0) / 4


if __name__ == '__main__':
    pytest.main([__file__, '-v'])