youtube-summarizer/backend/test_runner/core/test_discovery.py

511 lines
19 KiB
Python

"""
Test Discovery System
Intelligently discovers and categorizes tests across the YouTube Summarizer project.
Handles different test types, patterns, and dependency analysis for smart test selection.
"""
import ast
import re
from pathlib import Path
from typing import Dict, List, Optional, Set, Pattern
import logging
from .models import TestCategory, TestInfo, TestSuite
# TestInfo and TestSuite are now imported from models module
class TestDiscovery:
"""
Intelligent test discovery and categorization.
Analyzes test files to understand their category, dependencies, and requirements
for optimal execution planning.
"""
def __init__(self, project_root: Path, config):
"""
Initialize test discovery.
Args:
project_root: Root directory of the project
config: Test configuration object
"""
self.project_root = project_root
self.config = config
self.logger = logging.getLogger("TestDiscovery")
# Test file patterns by category
self._category_patterns = {
TestCategory.UNIT: [
re.compile(r"backend/tests/unit/.*\.py$"),
re.compile(r"tests/unit/.*\.py$")
],
TestCategory.INTEGRATION: [
re.compile(r"backend/tests/integration/.*\.py$"),
re.compile(r"tests/integration/.*\.py$")
],
TestCategory.API: [
re.compile(r"backend/tests/integration/test.*api.*\.py$"),
re.compile(r"backend/tests/api/.*\.py$")
],
TestCategory.FRONTEND: [
re.compile(r"frontend/src/.*\.test\.(ts|tsx|js|jsx)$"),
re.compile(r"frontend/tests/.*\.(ts|tsx|js|jsx)$"),
re.compile(r"frontend/src/test/.*\.tsx?$")
],
TestCategory.E2E: [
re.compile(r".*/e2e/.*\.py$"),
re.compile(r".*/e2e/.*\.spec\.(ts|js)$")
],
TestCategory.PERFORMANCE: [
re.compile(r".*/performance/.*\.py$"),
re.compile(r".*test.*perf.*\.py$")
],
TestCategory.DATABASE: [
re.compile(r".*test.*database.*\.py$"),
re.compile(r".*test.*db.*\.py$")
]
}
# Keywords that help identify test categories
self._category_keywords = {
TestCategory.AUTH: ["auth", "login", "token", "jwt", "password"],
TestCategory.PIPELINE: ["pipeline", "summary", "workflow", "orchestrat"],
TestCategory.DATABASE: ["database", "db", "model", "migration"],
TestCategory.API: ["api", "endpoint", "route", "client"],
TestCategory.INTEGRATION: ["integration", "service", "external"]
}
# Dependency indicators
self._dependency_patterns = {
"database": [r"@pytest\.mark\.asyncio.*db", r"TestClient", r"test_db", r"Session"],
"network": [r"requests\.", r"httpx\.", r"aiohttp", r"urllib"],
"auth": [r"auth", r"login", r"jwt", r"token", r"password"]
}
async def discover_by_category(self, category: TestCategory) -> Optional[TestSuite]:
"""
Discover all tests in a specific category.
Args:
category: Test category to discover
Returns:
TestSuite containing all tests in the category, or None if no tests found
"""
self.logger.info(f"Discovering {category.value} tests...")
tests = []
# Find test files matching category patterns
test_files = self._find_test_files(category)
for test_file in test_files:
file_tests = await self._analyze_test_file(test_file, category)
tests.extend(file_tests)
if not tests:
return None
suite = TestSuite(category=category, tests=tests)
self.logger.info(f"Found {len(tests)} {category.value} tests "
f"(estimated duration: {suite.total_estimated_duration:.1f}s)")
return suite
async def discover_by_patterns(self, patterns: List[str]) -> Dict[TestCategory, TestSuite]:
"""
Discover tests matching specific patterns.
Args:
patterns: List of glob patterns or test names
Returns:
Dictionary mapping categories to test suites
"""
self.logger.info(f"Discovering tests matching patterns: {patterns}")
results = {}
all_test_files = []
# Find files matching patterns
for pattern in patterns:
if "/" in pattern or pattern.endswith(".py"):
# File path pattern
matching_files = list(self.project_root.glob(pattern))
all_test_files.extend(matching_files)
else:
# Test name pattern - search all test files
all_test_files.extend(self._find_all_test_files())
# Analyze each file and categorize
categorized_tests = {}
for test_file in set(all_test_files):
# Determine category from file path
category = self._categorize_file(test_file)
if category not in categorized_tests:
categorized_tests[category] = []
file_tests = await self._analyze_test_file(test_file, category)
# Filter by pattern if it's a test name pattern
if patterns and not any("/" in p or p.endswith(".py") for p in patterns):
file_tests = [
test for test in file_tests
if any(pattern.lower() in test.name.lower() for pattern in patterns)
]
categorized_tests[category].extend(file_tests)
# Create test suites
for category, tests in categorized_tests.items():
if tests:
results[category] = TestSuite(category=category, tests=tests)
return results
def _find_test_files(self, category: TestCategory) -> List[Path]:
"""Find test files matching a specific category."""
test_files = []
patterns = self._category_patterns.get(category, [])
for pattern in patterns:
# Search for files matching the pattern
for test_file in self.project_root.rglob("*.py"):
relative_path = str(test_file.relative_to(self.project_root))
if pattern.search(relative_path):
test_files.append(test_file)
# Also search TypeScript/JavaScript files for frontend tests
if category == TestCategory.FRONTEND:
for ext in ["*.ts", "*.tsx", "*.js", "*.jsx"]:
for test_file in self.project_root.rglob(ext):
relative_path = str(test_file.relative_to(self.project_root))
if any(p.search(relative_path) for p in patterns):
test_files.append(test_file)
return list(set(test_files)) # Remove duplicates
def _find_all_test_files(self) -> List[Path]:
"""Find all test files in the project."""
test_files = []
# Python test files
for test_file in self.project_root.rglob("test_*.py"):
test_files.append(test_file)
for test_file in self.project_root.rglob("*_test.py"):
test_files.append(test_file)
# Frontend test files
for pattern in ["*.test.ts", "*.test.tsx", "*.test.js", "*.test.jsx"]:
for test_file in self.project_root.rglob(pattern):
test_files.append(test_file)
return list(set(test_files))
def _categorize_file(self, file_path: Path) -> TestCategory:
"""Determine the category of a test file."""
relative_path = str(file_path.relative_to(self.project_root))
# Check each category pattern
for category, patterns in self._category_patterns.items():
if any(pattern.search(relative_path) for pattern in patterns):
return category
# Fallback based on keywords in path or filename
path_lower = relative_path.lower()
if "integration" in path_lower:
return TestCategory.INTEGRATION
elif "unit" in path_lower:
return TestCategory.UNIT
elif "api" in path_lower:
return TestCategory.API
elif "frontend" in path_lower or file_path.suffix in ['.ts', '.tsx', '.js', '.jsx']:
return TestCategory.FRONTEND
elif "e2e" in path_lower:
return TestCategory.E2E
else:
return TestCategory.UNIT # Default fallback
async def _analyze_test_file(self, file_path: Path, category: TestCategory) -> List[TestInfo]:
"""Analyze a test file to extract test information."""
tests = []
try:
if file_path.suffix == ".py":
tests = await self._analyze_python_test_file(file_path, category)
elif file_path.suffix in [".ts", ".tsx", ".js", ".jsx"]:
tests = await self._analyze_frontend_test_file(file_path, category)
except Exception as e:
self.logger.warning(f"Failed to analyze {file_path}: {e}")
return tests
async def _analyze_python_test_file(self, file_path: Path, category: TestCategory) -> List[TestInfo]:
"""Analyze a Python test file."""
tests = []
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Parse the AST
tree = ast.parse(content, filename=str(file_path))
# Extract test classes and methods
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef) and node.name.startswith('Test'):
class_name = node.name
# Analyze methods in the class
for item in node.body:
if (isinstance(item, ast.FunctionDef) and
item.name.startswith('test_')):
test_info = self._create_test_info(
file_path, category, content, class_name, item.name
)
tests.append(test_info)
elif (isinstance(node, ast.FunctionDef) and
node.name.startswith('test_')):
# Standalone test function
test_info = self._create_test_info(
file_path, category, content, None, node.name
)
tests.append(test_info)
except Exception as e:
self.logger.warning(f"Error parsing {file_path}: {e}")
return tests
async def _analyze_frontend_test_file(self, file_path: Path, category: TestCategory) -> List[TestInfo]:
"""Analyze a frontend test file."""
tests = []
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Simple regex-based analysis for TypeScript/JavaScript tests
# Look for test declarations: it('...', describe('...', test('...'
test_patterns = [
r"it\s*\(\s*['\"]([^'\"]+)['\"]",
r"test\s*\(\s*['\"]([^'\"]+)['\"]",
r"describe\s*\(\s*['\"]([^'\"]+)['\"]"
]
for pattern in test_patterns:
matches = re.finditer(pattern, content)
for match in matches:
test_name = match.group(1)
test_info = TestInfo(
name=f"{file_path.stem}::{test_name}",
file_path=file_path,
category=category,
function_name=test_name,
estimated_duration=2.0, # Frontend tests typically take longer
markers=self._extract_frontend_markers(content)
)
tests.append(test_info)
except Exception as e:
self.logger.warning(f"Error analyzing frontend test {file_path}: {e}")
return tests
def _create_test_info(
self,
file_path: Path,
category: TestCategory,
content: str,
class_name: Optional[str],
function_name: str
) -> TestInfo:
"""Create TestInfo object from analysis."""
# Generate test name
if class_name:
test_name = f"{class_name}::{function_name}"
else:
test_name = f"{file_path.stem}::{function_name}"
# Analyze content for requirements and markers
markers = self._extract_markers(content)
requires_db = self._requires_database(content)
requires_network = self._requires_network(content)
requires_auth = self._requires_auth(content)
# Estimate duration based on category and markers
duration = self._estimate_test_duration(category, markers, content)
# Refine category based on content analysis
refined_category = self._refine_category(category, content, file_path)
return TestInfo(
name=test_name,
file_path=file_path,
category=refined_category,
class_name=class_name,
function_name=function_name,
markers=markers,
estimated_duration=duration,
requires_database=requires_db,
requires_network=requires_network,
requires_auth=requires_auth
)
def _extract_markers(self, content: str) -> List[str]:
"""Extract pytest markers from test content."""
markers = []
marker_patterns = [
r"@pytest\.mark\.(\w+)",
r"@pytest\.mark\.parametrize",
r"@pytest\.mark\.asyncio",
r"@pytest\.mark\.slow",
r"@pytest\.mark\.integration"
]
for pattern in marker_patterns:
matches = re.finditer(pattern, content)
for match in matches:
if match.groups():
markers.append(match.group(1))
else:
# Handle complex markers like parametrize
marker_line = match.group(0)
if "parametrize" in marker_line:
markers.append("parametrize")
elif "asyncio" in marker_line:
markers.append("asyncio")
return markers
def _extract_frontend_markers(self, content: str) -> List[str]:
"""Extract markers from frontend test content."""
markers = []
if "async" in content or "await" in content:
markers.append("async")
if "mock" in content.lower():
markers.append("mock")
if "timeout" in content.lower():
markers.append("timeout")
return markers
def _requires_database(self, content: str) -> bool:
"""Check if test requires database setup."""
db_indicators = [
"Session", "test_db", "database", "db", "engine",
"create_all", "drop_all", "transaction", "commit"
]
content_lower = content.lower()
return any(indicator.lower() in content_lower for indicator in db_indicators)
def _requires_network(self, content: str) -> bool:
"""Check if test requires network access."""
network_indicators = [
"requests.", "httpx.", "aiohttp", "urllib", "http://", "https://",
"TestClient", "client.post", "client.get", "mock_response"
]
return any(indicator in content for indicator in network_indicators)
def _requires_auth(self, content: str) -> bool:
"""Check if test requires authentication setup."""
auth_indicators = [
"auth", "login", "jwt", "token", "password", "authenticate",
"Authorization", "Bearer", "session"
]
content_lower = content.lower()
return any(indicator.lower() in content_lower for indicator in auth_indicators)
def _estimate_test_duration(self, category: TestCategory, markers: List[str], content: str) -> float:
"""Estimate test execution duration."""
base_duration = {
TestCategory.UNIT: 0.1,
TestCategory.INTEGRATION: 2.0,
TestCategory.API: 1.5,
TestCategory.FRONTEND: 3.0,
TestCategory.E2E: 30.0,
TestCategory.PERFORMANCE: 60.0,
TestCategory.DATABASE: 5.0,
TestCategory.AUTH: 2.0,
TestCategory.PIPELINE: 10.0
}.get(category, 1.0)
# Adjust based on markers
if "slow" in markers:
base_duration *= 5.0
if "asyncio" in markers:
base_duration *= 1.5
if "parametrize" in markers:
# Estimate number of parameters
param_count = content.count("@pytest.mark.parametrize") * 3 # Rough estimate
base_duration *= max(param_count, 1)
# Adjust based on content complexity
if content.count("await") > 10:
base_duration *= 2.0
if content.count("mock") > 5:
base_duration *= 1.5
return base_duration
def _refine_category(self, base_category: TestCategory, content: str, file_path: Path) -> TestCategory:
"""Refine test category based on content analysis."""
content_lower = content.lower()
file_name_lower = file_path.name.lower()
# Check for specific patterns that might override base category
for category, keywords in self._category_keywords.items():
if any(keyword in content_lower or keyword in file_name_lower for keyword in keywords):
# Only override if it makes sense
if (base_category == TestCategory.UNIT and
category in [TestCategory.AUTH, TestCategory.PIPELINE, TestCategory.DATABASE]):
return category
return base_category
async def get_test_dependencies(self, test_info: TestInfo) -> List[str]:
"""Analyze test dependencies for smart execution ordering."""
dependencies = []
try:
with open(test_info.file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Look for common dependency patterns
import_patterns = [
r"from\s+(\S+)\s+import",
r"import\s+(\S+)"
]
for pattern in import_patterns:
matches = re.finditer(pattern, content)
for match in matches:
module = match.group(1)
if any(keyword in module for keyword in ["service", "model", "api"]):
dependencies.append(module)
except Exception as e:
self.logger.warning(f"Error analyzing dependencies for {test_info.name}: {e}")
return dependencies[:10] # Limit to avoid excessive dependencies