""" Test Discovery System Intelligently discovers and categorizes tests across the YouTube Summarizer project. Handles different test types, patterns, and dependency analysis for smart test selection. """ import ast import re from pathlib import Path from typing import Dict, List, Optional, Set, Pattern import logging from .models import TestCategory, TestInfo, TestSuite # TestInfo and TestSuite are now imported from models module class TestDiscovery: """ Intelligent test discovery and categorization. Analyzes test files to understand their category, dependencies, and requirements for optimal execution planning. """ def __init__(self, project_root: Path, config): """ Initialize test discovery. Args: project_root: Root directory of the project config: Test configuration object """ self.project_root = project_root self.config = config self.logger = logging.getLogger("TestDiscovery") # Test file patterns by category self._category_patterns = { TestCategory.UNIT: [ re.compile(r"backend/tests/unit/.*\.py$"), re.compile(r"tests/unit/.*\.py$") ], TestCategory.INTEGRATION: [ re.compile(r"backend/tests/integration/.*\.py$"), re.compile(r"tests/integration/.*\.py$") ], TestCategory.API: [ re.compile(r"backend/tests/integration/test.*api.*\.py$"), re.compile(r"backend/tests/api/.*\.py$") ], TestCategory.FRONTEND: [ re.compile(r"frontend/src/.*\.test\.(ts|tsx|js|jsx)$"), re.compile(r"frontend/tests/.*\.(ts|tsx|js|jsx)$"), re.compile(r"frontend/src/test/.*\.tsx?$") ], TestCategory.E2E: [ re.compile(r".*/e2e/.*\.py$"), re.compile(r".*/e2e/.*\.spec\.(ts|js)$") ], TestCategory.PERFORMANCE: [ re.compile(r".*/performance/.*\.py$"), re.compile(r".*test.*perf.*\.py$") ], TestCategory.DATABASE: [ re.compile(r".*test.*database.*\.py$"), re.compile(r".*test.*db.*\.py$") ] } # Keywords that help identify test categories self._category_keywords = { TestCategory.AUTH: ["auth", "login", "token", "jwt", "password"], TestCategory.PIPELINE: ["pipeline", "summary", "workflow", "orchestrat"], TestCategory.DATABASE: ["database", "db", "model", "migration"], TestCategory.API: ["api", "endpoint", "route", "client"], TestCategory.INTEGRATION: ["integration", "service", "external"] } # Dependency indicators self._dependency_patterns = { "database": [r"@pytest\.mark\.asyncio.*db", r"TestClient", r"test_db", r"Session"], "network": [r"requests\.", r"httpx\.", r"aiohttp", r"urllib"], "auth": [r"auth", r"login", r"jwt", r"token", r"password"] } async def discover_by_category(self, category: TestCategory) -> Optional[TestSuite]: """ Discover all tests in a specific category. Args: category: Test category to discover Returns: TestSuite containing all tests in the category, or None if no tests found """ self.logger.info(f"Discovering {category.value} tests...") tests = [] # Find test files matching category patterns test_files = self._find_test_files(category) for test_file in test_files: file_tests = await self._analyze_test_file(test_file, category) tests.extend(file_tests) if not tests: return None suite = TestSuite(category=category, tests=tests) self.logger.info(f"Found {len(tests)} {category.value} tests " f"(estimated duration: {suite.total_estimated_duration:.1f}s)") return suite async def discover_by_patterns(self, patterns: List[str]) -> Dict[TestCategory, TestSuite]: """ Discover tests matching specific patterns. Args: patterns: List of glob patterns or test names Returns: Dictionary mapping categories to test suites """ self.logger.info(f"Discovering tests matching patterns: {patterns}") results = {} all_test_files = [] # Find files matching patterns for pattern in patterns: if "/" in pattern or pattern.endswith(".py"): # File path pattern matching_files = list(self.project_root.glob(pattern)) all_test_files.extend(matching_files) else: # Test name pattern - search all test files all_test_files.extend(self._find_all_test_files()) # Analyze each file and categorize categorized_tests = {} for test_file in set(all_test_files): # Determine category from file path category = self._categorize_file(test_file) if category not in categorized_tests: categorized_tests[category] = [] file_tests = await self._analyze_test_file(test_file, category) # Filter by pattern if it's a test name pattern if patterns and not any("/" in p or p.endswith(".py") for p in patterns): file_tests = [ test for test in file_tests if any(pattern.lower() in test.name.lower() for pattern in patterns) ] categorized_tests[category].extend(file_tests) # Create test suites for category, tests in categorized_tests.items(): if tests: results[category] = TestSuite(category=category, tests=tests) return results def _find_test_files(self, category: TestCategory) -> List[Path]: """Find test files matching a specific category.""" test_files = [] patterns = self._category_patterns.get(category, []) for pattern in patterns: # Search for files matching the pattern for test_file in self.project_root.rglob("*.py"): relative_path = str(test_file.relative_to(self.project_root)) if pattern.search(relative_path): test_files.append(test_file) # Also search TypeScript/JavaScript files for frontend tests if category == TestCategory.FRONTEND: for ext in ["*.ts", "*.tsx", "*.js", "*.jsx"]: for test_file in self.project_root.rglob(ext): relative_path = str(test_file.relative_to(self.project_root)) if any(p.search(relative_path) for p in patterns): test_files.append(test_file) return list(set(test_files)) # Remove duplicates def _find_all_test_files(self) -> List[Path]: """Find all test files in the project.""" test_files = [] # Python test files for test_file in self.project_root.rglob("test_*.py"): test_files.append(test_file) for test_file in self.project_root.rglob("*_test.py"): test_files.append(test_file) # Frontend test files for pattern in ["*.test.ts", "*.test.tsx", "*.test.js", "*.test.jsx"]: for test_file in self.project_root.rglob(pattern): test_files.append(test_file) return list(set(test_files)) def _categorize_file(self, file_path: Path) -> TestCategory: """Determine the category of a test file.""" relative_path = str(file_path.relative_to(self.project_root)) # Check each category pattern for category, patterns in self._category_patterns.items(): if any(pattern.search(relative_path) for pattern in patterns): return category # Fallback based on keywords in path or filename path_lower = relative_path.lower() if "integration" in path_lower: return TestCategory.INTEGRATION elif "unit" in path_lower: return TestCategory.UNIT elif "api" in path_lower: return TestCategory.API elif "frontend" in path_lower or file_path.suffix in ['.ts', '.tsx', '.js', '.jsx']: return TestCategory.FRONTEND elif "e2e" in path_lower: return TestCategory.E2E else: return TestCategory.UNIT # Default fallback async def _analyze_test_file(self, file_path: Path, category: TestCategory) -> List[TestInfo]: """Analyze a test file to extract test information.""" tests = [] try: if file_path.suffix == ".py": tests = await self._analyze_python_test_file(file_path, category) elif file_path.suffix in [".ts", ".tsx", ".js", ".jsx"]: tests = await self._analyze_frontend_test_file(file_path, category) except Exception as e: self.logger.warning(f"Failed to analyze {file_path}: {e}") return tests async def _analyze_python_test_file(self, file_path: Path, category: TestCategory) -> List[TestInfo]: """Analyze a Python test file.""" tests = [] try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Parse the AST tree = ast.parse(content, filename=str(file_path)) # Extract test classes and methods for node in ast.walk(tree): if isinstance(node, ast.ClassDef) and node.name.startswith('Test'): class_name = node.name # Analyze methods in the class for item in node.body: if (isinstance(item, ast.FunctionDef) and item.name.startswith('test_')): test_info = self._create_test_info( file_path, category, content, class_name, item.name ) tests.append(test_info) elif (isinstance(node, ast.FunctionDef) and node.name.startswith('test_')): # Standalone test function test_info = self._create_test_info( file_path, category, content, None, node.name ) tests.append(test_info) except Exception as e: self.logger.warning(f"Error parsing {file_path}: {e}") return tests async def _analyze_frontend_test_file(self, file_path: Path, category: TestCategory) -> List[TestInfo]: """Analyze a frontend test file.""" tests = [] try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Simple regex-based analysis for TypeScript/JavaScript tests # Look for test declarations: it('...', describe('...', test('...' test_patterns = [ r"it\s*\(\s*['\"]([^'\"]+)['\"]", r"test\s*\(\s*['\"]([^'\"]+)['\"]", r"describe\s*\(\s*['\"]([^'\"]+)['\"]" ] for pattern in test_patterns: matches = re.finditer(pattern, content) for match in matches: test_name = match.group(1) test_info = TestInfo( name=f"{file_path.stem}::{test_name}", file_path=file_path, category=category, function_name=test_name, estimated_duration=2.0, # Frontend tests typically take longer markers=self._extract_frontend_markers(content) ) tests.append(test_info) except Exception as e: self.logger.warning(f"Error analyzing frontend test {file_path}: {e}") return tests def _create_test_info( self, file_path: Path, category: TestCategory, content: str, class_name: Optional[str], function_name: str ) -> TestInfo: """Create TestInfo object from analysis.""" # Generate test name if class_name: test_name = f"{class_name}::{function_name}" else: test_name = f"{file_path.stem}::{function_name}" # Analyze content for requirements and markers markers = self._extract_markers(content) requires_db = self._requires_database(content) requires_network = self._requires_network(content) requires_auth = self._requires_auth(content) # Estimate duration based on category and markers duration = self._estimate_test_duration(category, markers, content) # Refine category based on content analysis refined_category = self._refine_category(category, content, file_path) return TestInfo( name=test_name, file_path=file_path, category=refined_category, class_name=class_name, function_name=function_name, markers=markers, estimated_duration=duration, requires_database=requires_db, requires_network=requires_network, requires_auth=requires_auth ) def _extract_markers(self, content: str) -> List[str]: """Extract pytest markers from test content.""" markers = [] marker_patterns = [ r"@pytest\.mark\.(\w+)", r"@pytest\.mark\.parametrize", r"@pytest\.mark\.asyncio", r"@pytest\.mark\.slow", r"@pytest\.mark\.integration" ] for pattern in marker_patterns: matches = re.finditer(pattern, content) for match in matches: if match.groups(): markers.append(match.group(1)) else: # Handle complex markers like parametrize marker_line = match.group(0) if "parametrize" in marker_line: markers.append("parametrize") elif "asyncio" in marker_line: markers.append("asyncio") return markers def _extract_frontend_markers(self, content: str) -> List[str]: """Extract markers from frontend test content.""" markers = [] if "async" in content or "await" in content: markers.append("async") if "mock" in content.lower(): markers.append("mock") if "timeout" in content.lower(): markers.append("timeout") return markers def _requires_database(self, content: str) -> bool: """Check if test requires database setup.""" db_indicators = [ "Session", "test_db", "database", "db", "engine", "create_all", "drop_all", "transaction", "commit" ] content_lower = content.lower() return any(indicator.lower() in content_lower for indicator in db_indicators) def _requires_network(self, content: str) -> bool: """Check if test requires network access.""" network_indicators = [ "requests.", "httpx.", "aiohttp", "urllib", "http://", "https://", "TestClient", "client.post", "client.get", "mock_response" ] return any(indicator in content for indicator in network_indicators) def _requires_auth(self, content: str) -> bool: """Check if test requires authentication setup.""" auth_indicators = [ "auth", "login", "jwt", "token", "password", "authenticate", "Authorization", "Bearer", "session" ] content_lower = content.lower() return any(indicator.lower() in content_lower for indicator in auth_indicators) def _estimate_test_duration(self, category: TestCategory, markers: List[str], content: str) -> float: """Estimate test execution duration.""" base_duration = { TestCategory.UNIT: 0.1, TestCategory.INTEGRATION: 2.0, TestCategory.API: 1.5, TestCategory.FRONTEND: 3.0, TestCategory.E2E: 30.0, TestCategory.PERFORMANCE: 60.0, TestCategory.DATABASE: 5.0, TestCategory.AUTH: 2.0, TestCategory.PIPELINE: 10.0 }.get(category, 1.0) # Adjust based on markers if "slow" in markers: base_duration *= 5.0 if "asyncio" in markers: base_duration *= 1.5 if "parametrize" in markers: # Estimate number of parameters param_count = content.count("@pytest.mark.parametrize") * 3 # Rough estimate base_duration *= max(param_count, 1) # Adjust based on content complexity if content.count("await") > 10: base_duration *= 2.0 if content.count("mock") > 5: base_duration *= 1.5 return base_duration def _refine_category(self, base_category: TestCategory, content: str, file_path: Path) -> TestCategory: """Refine test category based on content analysis.""" content_lower = content.lower() file_name_lower = file_path.name.lower() # Check for specific patterns that might override base category for category, keywords in self._category_keywords.items(): if any(keyword in content_lower or keyword in file_name_lower for keyword in keywords): # Only override if it makes sense if (base_category == TestCategory.UNIT and category in [TestCategory.AUTH, TestCategory.PIPELINE, TestCategory.DATABASE]): return category return base_category async def get_test_dependencies(self, test_info: TestInfo) -> List[str]: """Analyze test dependencies for smart execution ordering.""" dependencies = [] try: with open(test_info.file_path, 'r', encoding='utf-8') as f: content = f.read() # Look for common dependency patterns import_patterns = [ r"from\s+(\S+)\s+import", r"import\s+(\S+)" ] for pattern in import_patterns: matches = re.finditer(pattern, content) for match in matches: module = match.group(1) if any(keyword in module for keyword in ["service", "model", "api"]): dependencies.append(module) except Exception as e: self.logger.warning(f"Error analyzing dependencies for {test_info.name}: {e}") return dependencies[:10] # Limit to avoid excessive dependencies