511 lines
19 KiB
Python
511 lines
19 KiB
Python
"""
|
|
Test Discovery System
|
|
|
|
Intelligently discovers and categorizes tests across the YouTube Summarizer project.
|
|
Handles different test types, patterns, and dependency analysis for smart test selection.
|
|
"""
|
|
|
|
import ast
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Set, Pattern
|
|
import logging
|
|
|
|
from .models import TestCategory, TestInfo, TestSuite
|
|
|
|
|
|
# TestInfo and TestSuite are now imported from models module
|
|
|
|
|
|
class TestDiscovery:
|
|
"""
|
|
Intelligent test discovery and categorization.
|
|
|
|
Analyzes test files to understand their category, dependencies, and requirements
|
|
for optimal execution planning.
|
|
"""
|
|
|
|
def __init__(self, project_root: Path, config):
|
|
"""
|
|
Initialize test discovery.
|
|
|
|
Args:
|
|
project_root: Root directory of the project
|
|
config: Test configuration object
|
|
"""
|
|
self.project_root = project_root
|
|
self.config = config
|
|
self.logger = logging.getLogger("TestDiscovery")
|
|
|
|
# Test file patterns by category
|
|
self._category_patterns = {
|
|
TestCategory.UNIT: [
|
|
re.compile(r"backend/tests/unit/.*\.py$"),
|
|
re.compile(r"tests/unit/.*\.py$")
|
|
],
|
|
TestCategory.INTEGRATION: [
|
|
re.compile(r"backend/tests/integration/.*\.py$"),
|
|
re.compile(r"tests/integration/.*\.py$")
|
|
],
|
|
TestCategory.API: [
|
|
re.compile(r"backend/tests/integration/test.*api.*\.py$"),
|
|
re.compile(r"backend/tests/api/.*\.py$")
|
|
],
|
|
TestCategory.FRONTEND: [
|
|
re.compile(r"frontend/src/.*\.test\.(ts|tsx|js|jsx)$"),
|
|
re.compile(r"frontend/tests/.*\.(ts|tsx|js|jsx)$"),
|
|
re.compile(r"frontend/src/test/.*\.tsx?$")
|
|
],
|
|
TestCategory.E2E: [
|
|
re.compile(r".*/e2e/.*\.py$"),
|
|
re.compile(r".*/e2e/.*\.spec\.(ts|js)$")
|
|
],
|
|
TestCategory.PERFORMANCE: [
|
|
re.compile(r".*/performance/.*\.py$"),
|
|
re.compile(r".*test.*perf.*\.py$")
|
|
],
|
|
TestCategory.DATABASE: [
|
|
re.compile(r".*test.*database.*\.py$"),
|
|
re.compile(r".*test.*db.*\.py$")
|
|
]
|
|
}
|
|
|
|
# Keywords that help identify test categories
|
|
self._category_keywords = {
|
|
TestCategory.AUTH: ["auth", "login", "token", "jwt", "password"],
|
|
TestCategory.PIPELINE: ["pipeline", "summary", "workflow", "orchestrat"],
|
|
TestCategory.DATABASE: ["database", "db", "model", "migration"],
|
|
TestCategory.API: ["api", "endpoint", "route", "client"],
|
|
TestCategory.INTEGRATION: ["integration", "service", "external"]
|
|
}
|
|
|
|
# Dependency indicators
|
|
self._dependency_patterns = {
|
|
"database": [r"@pytest\.mark\.asyncio.*db", r"TestClient", r"test_db", r"Session"],
|
|
"network": [r"requests\.", r"httpx\.", r"aiohttp", r"urllib"],
|
|
"auth": [r"auth", r"login", r"jwt", r"token", r"password"]
|
|
}
|
|
|
|
async def discover_by_category(self, category: TestCategory) -> Optional[TestSuite]:
|
|
"""
|
|
Discover all tests in a specific category.
|
|
|
|
Args:
|
|
category: Test category to discover
|
|
|
|
Returns:
|
|
TestSuite containing all tests in the category, or None if no tests found
|
|
"""
|
|
self.logger.info(f"Discovering {category.value} tests...")
|
|
|
|
tests = []
|
|
|
|
# Find test files matching category patterns
|
|
test_files = self._find_test_files(category)
|
|
|
|
for test_file in test_files:
|
|
file_tests = await self._analyze_test_file(test_file, category)
|
|
tests.extend(file_tests)
|
|
|
|
if not tests:
|
|
return None
|
|
|
|
suite = TestSuite(category=category, tests=tests)
|
|
|
|
self.logger.info(f"Found {len(tests)} {category.value} tests "
|
|
f"(estimated duration: {suite.total_estimated_duration:.1f}s)")
|
|
|
|
return suite
|
|
|
|
async def discover_by_patterns(self, patterns: List[str]) -> Dict[TestCategory, TestSuite]:
|
|
"""
|
|
Discover tests matching specific patterns.
|
|
|
|
Args:
|
|
patterns: List of glob patterns or test names
|
|
|
|
Returns:
|
|
Dictionary mapping categories to test suites
|
|
"""
|
|
self.logger.info(f"Discovering tests matching patterns: {patterns}")
|
|
|
|
results = {}
|
|
all_test_files = []
|
|
|
|
# Find files matching patterns
|
|
for pattern in patterns:
|
|
if "/" in pattern or pattern.endswith(".py"):
|
|
# File path pattern
|
|
matching_files = list(self.project_root.glob(pattern))
|
|
all_test_files.extend(matching_files)
|
|
else:
|
|
# Test name pattern - search all test files
|
|
all_test_files.extend(self._find_all_test_files())
|
|
|
|
# Analyze each file and categorize
|
|
categorized_tests = {}
|
|
|
|
for test_file in set(all_test_files):
|
|
# Determine category from file path
|
|
category = self._categorize_file(test_file)
|
|
|
|
if category not in categorized_tests:
|
|
categorized_tests[category] = []
|
|
|
|
file_tests = await self._analyze_test_file(test_file, category)
|
|
|
|
# Filter by pattern if it's a test name pattern
|
|
if patterns and not any("/" in p or p.endswith(".py") for p in patterns):
|
|
file_tests = [
|
|
test for test in file_tests
|
|
if any(pattern.lower() in test.name.lower() for pattern in patterns)
|
|
]
|
|
|
|
categorized_tests[category].extend(file_tests)
|
|
|
|
# Create test suites
|
|
for category, tests in categorized_tests.items():
|
|
if tests:
|
|
results[category] = TestSuite(category=category, tests=tests)
|
|
|
|
return results
|
|
|
|
def _find_test_files(self, category: TestCategory) -> List[Path]:
|
|
"""Find test files matching a specific category."""
|
|
test_files = []
|
|
patterns = self._category_patterns.get(category, [])
|
|
|
|
for pattern in patterns:
|
|
# Search for files matching the pattern
|
|
for test_file in self.project_root.rglob("*.py"):
|
|
relative_path = str(test_file.relative_to(self.project_root))
|
|
if pattern.search(relative_path):
|
|
test_files.append(test_file)
|
|
|
|
# Also search TypeScript/JavaScript files for frontend tests
|
|
if category == TestCategory.FRONTEND:
|
|
for ext in ["*.ts", "*.tsx", "*.js", "*.jsx"]:
|
|
for test_file in self.project_root.rglob(ext):
|
|
relative_path = str(test_file.relative_to(self.project_root))
|
|
if any(p.search(relative_path) for p in patterns):
|
|
test_files.append(test_file)
|
|
|
|
return list(set(test_files)) # Remove duplicates
|
|
|
|
def _find_all_test_files(self) -> List[Path]:
|
|
"""Find all test files in the project."""
|
|
test_files = []
|
|
|
|
# Python test files
|
|
for test_file in self.project_root.rglob("test_*.py"):
|
|
test_files.append(test_file)
|
|
|
|
for test_file in self.project_root.rglob("*_test.py"):
|
|
test_files.append(test_file)
|
|
|
|
# Frontend test files
|
|
for pattern in ["*.test.ts", "*.test.tsx", "*.test.js", "*.test.jsx"]:
|
|
for test_file in self.project_root.rglob(pattern):
|
|
test_files.append(test_file)
|
|
|
|
return list(set(test_files))
|
|
|
|
def _categorize_file(self, file_path: Path) -> TestCategory:
|
|
"""Determine the category of a test file."""
|
|
relative_path = str(file_path.relative_to(self.project_root))
|
|
|
|
# Check each category pattern
|
|
for category, patterns in self._category_patterns.items():
|
|
if any(pattern.search(relative_path) for pattern in patterns):
|
|
return category
|
|
|
|
# Fallback based on keywords in path or filename
|
|
path_lower = relative_path.lower()
|
|
|
|
if "integration" in path_lower:
|
|
return TestCategory.INTEGRATION
|
|
elif "unit" in path_lower:
|
|
return TestCategory.UNIT
|
|
elif "api" in path_lower:
|
|
return TestCategory.API
|
|
elif "frontend" in path_lower or file_path.suffix in ['.ts', '.tsx', '.js', '.jsx']:
|
|
return TestCategory.FRONTEND
|
|
elif "e2e" in path_lower:
|
|
return TestCategory.E2E
|
|
else:
|
|
return TestCategory.UNIT # Default fallback
|
|
|
|
async def _analyze_test_file(self, file_path: Path, category: TestCategory) -> List[TestInfo]:
|
|
"""Analyze a test file to extract test information."""
|
|
tests = []
|
|
|
|
try:
|
|
if file_path.suffix == ".py":
|
|
tests = await self._analyze_python_test_file(file_path, category)
|
|
elif file_path.suffix in [".ts", ".tsx", ".js", ".jsx"]:
|
|
tests = await self._analyze_frontend_test_file(file_path, category)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Failed to analyze {file_path}: {e}")
|
|
|
|
return tests
|
|
|
|
async def _analyze_python_test_file(self, file_path: Path, category: TestCategory) -> List[TestInfo]:
|
|
"""Analyze a Python test file."""
|
|
tests = []
|
|
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Parse the AST
|
|
tree = ast.parse(content, filename=str(file_path))
|
|
|
|
# Extract test classes and methods
|
|
for node in ast.walk(tree):
|
|
if isinstance(node, ast.ClassDef) and node.name.startswith('Test'):
|
|
class_name = node.name
|
|
|
|
# Analyze methods in the class
|
|
for item in node.body:
|
|
if (isinstance(item, ast.FunctionDef) and
|
|
item.name.startswith('test_')):
|
|
|
|
test_info = self._create_test_info(
|
|
file_path, category, content, class_name, item.name
|
|
)
|
|
tests.append(test_info)
|
|
|
|
elif (isinstance(node, ast.FunctionDef) and
|
|
node.name.startswith('test_')):
|
|
# Standalone test function
|
|
test_info = self._create_test_info(
|
|
file_path, category, content, None, node.name
|
|
)
|
|
tests.append(test_info)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error parsing {file_path}: {e}")
|
|
|
|
return tests
|
|
|
|
async def _analyze_frontend_test_file(self, file_path: Path, category: TestCategory) -> List[TestInfo]:
|
|
"""Analyze a frontend test file."""
|
|
tests = []
|
|
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Simple regex-based analysis for TypeScript/JavaScript tests
|
|
# Look for test declarations: it('...', describe('...', test('...'
|
|
test_patterns = [
|
|
r"it\s*\(\s*['\"]([^'\"]+)['\"]",
|
|
r"test\s*\(\s*['\"]([^'\"]+)['\"]",
|
|
r"describe\s*\(\s*['\"]([^'\"]+)['\"]"
|
|
]
|
|
|
|
for pattern in test_patterns:
|
|
matches = re.finditer(pattern, content)
|
|
for match in matches:
|
|
test_name = match.group(1)
|
|
|
|
test_info = TestInfo(
|
|
name=f"{file_path.stem}::{test_name}",
|
|
file_path=file_path,
|
|
category=category,
|
|
function_name=test_name,
|
|
estimated_duration=2.0, # Frontend tests typically take longer
|
|
markers=self._extract_frontend_markers(content)
|
|
)
|
|
|
|
tests.append(test_info)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error analyzing frontend test {file_path}: {e}")
|
|
|
|
return tests
|
|
|
|
def _create_test_info(
|
|
self,
|
|
file_path: Path,
|
|
category: TestCategory,
|
|
content: str,
|
|
class_name: Optional[str],
|
|
function_name: str
|
|
) -> TestInfo:
|
|
"""Create TestInfo object from analysis."""
|
|
|
|
# Generate test name
|
|
if class_name:
|
|
test_name = f"{class_name}::{function_name}"
|
|
else:
|
|
test_name = f"{file_path.stem}::{function_name}"
|
|
|
|
# Analyze content for requirements and markers
|
|
markers = self._extract_markers(content)
|
|
requires_db = self._requires_database(content)
|
|
requires_network = self._requires_network(content)
|
|
requires_auth = self._requires_auth(content)
|
|
|
|
# Estimate duration based on category and markers
|
|
duration = self._estimate_test_duration(category, markers, content)
|
|
|
|
# Refine category based on content analysis
|
|
refined_category = self._refine_category(category, content, file_path)
|
|
|
|
return TestInfo(
|
|
name=test_name,
|
|
file_path=file_path,
|
|
category=refined_category,
|
|
class_name=class_name,
|
|
function_name=function_name,
|
|
markers=markers,
|
|
estimated_duration=duration,
|
|
requires_database=requires_db,
|
|
requires_network=requires_network,
|
|
requires_auth=requires_auth
|
|
)
|
|
|
|
def _extract_markers(self, content: str) -> List[str]:
|
|
"""Extract pytest markers from test content."""
|
|
markers = []
|
|
|
|
marker_patterns = [
|
|
r"@pytest\.mark\.(\w+)",
|
|
r"@pytest\.mark\.parametrize",
|
|
r"@pytest\.mark\.asyncio",
|
|
r"@pytest\.mark\.slow",
|
|
r"@pytest\.mark\.integration"
|
|
]
|
|
|
|
for pattern in marker_patterns:
|
|
matches = re.finditer(pattern, content)
|
|
for match in matches:
|
|
if match.groups():
|
|
markers.append(match.group(1))
|
|
else:
|
|
# Handle complex markers like parametrize
|
|
marker_line = match.group(0)
|
|
if "parametrize" in marker_line:
|
|
markers.append("parametrize")
|
|
elif "asyncio" in marker_line:
|
|
markers.append("asyncio")
|
|
|
|
return markers
|
|
|
|
def _extract_frontend_markers(self, content: str) -> List[str]:
|
|
"""Extract markers from frontend test content."""
|
|
markers = []
|
|
|
|
if "async" in content or "await" in content:
|
|
markers.append("async")
|
|
if "mock" in content.lower():
|
|
markers.append("mock")
|
|
if "timeout" in content.lower():
|
|
markers.append("timeout")
|
|
|
|
return markers
|
|
|
|
def _requires_database(self, content: str) -> bool:
|
|
"""Check if test requires database setup."""
|
|
db_indicators = [
|
|
"Session", "test_db", "database", "db", "engine",
|
|
"create_all", "drop_all", "transaction", "commit"
|
|
]
|
|
|
|
content_lower = content.lower()
|
|
return any(indicator.lower() in content_lower for indicator in db_indicators)
|
|
|
|
def _requires_network(self, content: str) -> bool:
|
|
"""Check if test requires network access."""
|
|
network_indicators = [
|
|
"requests.", "httpx.", "aiohttp", "urllib", "http://", "https://",
|
|
"TestClient", "client.post", "client.get", "mock_response"
|
|
]
|
|
|
|
return any(indicator in content for indicator in network_indicators)
|
|
|
|
def _requires_auth(self, content: str) -> bool:
|
|
"""Check if test requires authentication setup."""
|
|
auth_indicators = [
|
|
"auth", "login", "jwt", "token", "password", "authenticate",
|
|
"Authorization", "Bearer", "session"
|
|
]
|
|
|
|
content_lower = content.lower()
|
|
return any(indicator.lower() in content_lower for indicator in auth_indicators)
|
|
|
|
def _estimate_test_duration(self, category: TestCategory, markers: List[str], content: str) -> float:
|
|
"""Estimate test execution duration."""
|
|
base_duration = {
|
|
TestCategory.UNIT: 0.1,
|
|
TestCategory.INTEGRATION: 2.0,
|
|
TestCategory.API: 1.5,
|
|
TestCategory.FRONTEND: 3.0,
|
|
TestCategory.E2E: 30.0,
|
|
TestCategory.PERFORMANCE: 60.0,
|
|
TestCategory.DATABASE: 5.0,
|
|
TestCategory.AUTH: 2.0,
|
|
TestCategory.PIPELINE: 10.0
|
|
}.get(category, 1.0)
|
|
|
|
# Adjust based on markers
|
|
if "slow" in markers:
|
|
base_duration *= 5.0
|
|
if "asyncio" in markers:
|
|
base_duration *= 1.5
|
|
if "parametrize" in markers:
|
|
# Estimate number of parameters
|
|
param_count = content.count("@pytest.mark.parametrize") * 3 # Rough estimate
|
|
base_duration *= max(param_count, 1)
|
|
|
|
# Adjust based on content complexity
|
|
if content.count("await") > 10:
|
|
base_duration *= 2.0
|
|
if content.count("mock") > 5:
|
|
base_duration *= 1.5
|
|
|
|
return base_duration
|
|
|
|
def _refine_category(self, base_category: TestCategory, content: str, file_path: Path) -> TestCategory:
|
|
"""Refine test category based on content analysis."""
|
|
|
|
content_lower = content.lower()
|
|
file_name_lower = file_path.name.lower()
|
|
|
|
# Check for specific patterns that might override base category
|
|
for category, keywords in self._category_keywords.items():
|
|
if any(keyword in content_lower or keyword in file_name_lower for keyword in keywords):
|
|
# Only override if it makes sense
|
|
if (base_category == TestCategory.UNIT and
|
|
category in [TestCategory.AUTH, TestCategory.PIPELINE, TestCategory.DATABASE]):
|
|
return category
|
|
|
|
return base_category
|
|
|
|
async def get_test_dependencies(self, test_info: TestInfo) -> List[str]:
|
|
"""Analyze test dependencies for smart execution ordering."""
|
|
dependencies = []
|
|
|
|
try:
|
|
with open(test_info.file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Look for common dependency patterns
|
|
import_patterns = [
|
|
r"from\s+(\S+)\s+import",
|
|
r"import\s+(\S+)"
|
|
]
|
|
|
|
for pattern in import_patterns:
|
|
matches = re.finditer(pattern, content)
|
|
for match in matches:
|
|
module = match.group(1)
|
|
if any(keyword in module for keyword in ["service", "model", "api"]):
|
|
dependencies.append(module)
|
|
|
|
except Exception as e:
|
|
self.logger.warning(f"Error analyzing dependencies for {test_info.name}: {e}")
|
|
|
|
return dependencies[:10] # Limit to avoid excessive dependencies |