feat: add comprehensive type definitions and improve test coverage
Add new type definitions file with extensive Union type aliases for all core components including AsyncUrlSeeder, SeedingConfig, and various crawler strategies. Enhance test coverage with improved bot detection tests, Docker-based testing, and extended features validation. The changes provide better type safety and more robust testing infrastructure for the crawling framework.
This commit is contained in:
@@ -1,26 +1,27 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Comprehensive Test Suite for Docker Extended Features
|
||||
Tests all advanced features: URL seeding, adaptive crawling, browser adapters,
|
||||
Tests all advanced features: URL seeding, adaptive crawling, browser adapters,
|
||||
proxy rotation, and dispatchers.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import aiohttp
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
from rich.panel import Panel
|
||||
from rich import box
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
from rich.table import Table
|
||||
|
||||
# Configuration
|
||||
API_BASE_URL = "http://localhost:11235"
|
||||
console = Console()
|
||||
|
||||
|
||||
class TestResult:
|
||||
class TestResultData:
|
||||
def __init__(self, name: str, category: str):
|
||||
self.name = name
|
||||
self.category = category
|
||||
@@ -34,13 +35,15 @@ class ExtendedFeaturesTestSuite:
|
||||
def __init__(self, base_url: str = API_BASE_URL):
|
||||
self.base_url = base_url
|
||||
self.headers = {"Content-Type": "application/json"}
|
||||
self.results: List[TestResult] = []
|
||||
self.results: List[TestResultData] = []
|
||||
|
||||
async def check_server_health(self) -> bool:
|
||||
"""Check if the server is running"""
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(f"{self.base_url}/health", timeout=aiohttp.ClientTimeout(total=5)) as response:
|
||||
async with session.get(
|
||||
f"{self.base_url}/health", timeout=aiohttp.ClientTimeout(total=5)
|
||||
) as response:
|
||||
return response.status == 200
|
||||
except Exception as e:
|
||||
console.print(f"[red]Server health check failed: {e}[/red]")
|
||||
@@ -50,287 +53,285 @@ class ExtendedFeaturesTestSuite:
|
||||
# URL SEEDING TESTS
|
||||
# ========================================================================
|
||||
|
||||
async def test_url_seeding_basic(self) -> TestResult:
|
||||
async def test_url_seeding_basic(self) -> TestResultData:
|
||||
"""Test basic URL seeding functionality"""
|
||||
result = TestResult("Basic URL Seeding", "URL Seeding")
|
||||
result = TestResultData("Basic URL Seeding", "URL Seeding")
|
||||
try:
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
payload = {
|
||||
"url": "https://www.nbcnews.com",
|
||||
"config": {
|
||||
"max_urls": 10,
|
||||
"filter_type": "all"
|
||||
}
|
||||
"config": {"max_urls": 10, "filter_type": "all"},
|
||||
}
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{self.base_url}/seed",
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=30)
|
||||
timeout=aiohttp.ClientTimeout(total=30),
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
# API returns: {"seed_url": [list of urls], "count": n}
|
||||
urls = data.get('seed_url', [])
|
||||
|
||||
urls = data.get("seed_url", [])
|
||||
|
||||
result.passed = len(urls) > 0
|
||||
result.details = {
|
||||
"urls_found": len(urls),
|
||||
"sample_url": urls[0] if urls else None
|
||||
"sample_url": urls[0] if urls else None,
|
||||
}
|
||||
else:
|
||||
result.error = f"Status {response.status}"
|
||||
|
||||
|
||||
result.duration = time.time() - start
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
async def test_url_seeding_with_filters(self) -> TestResult:
|
||||
async def test_url_seeding_with_filters(self) -> TestResultData:
|
||||
"""Test URL seeding with different filter types"""
|
||||
result = TestResult("URL Seeding with Filters", "URL Seeding")
|
||||
result = TestResultData("URL Seeding with Filters", "URL Seeding")
|
||||
try:
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
payload = {
|
||||
"url": "https://www.nbcnews.com",
|
||||
"config": {
|
||||
"max_urls": 20,
|
||||
"filter_type": "domain",
|
||||
"exclude_external": True
|
||||
}
|
||||
"exclude_external": True,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{self.base_url}/seed",
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=30)
|
||||
timeout=aiohttp.ClientTimeout(total=30),
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
# API returns: {"seed_url": [list of urls], "count": n}
|
||||
urls = data.get('seed_url', [])
|
||||
|
||||
urls = data.get("seed_url", [])
|
||||
|
||||
result.passed = len(urls) > 0
|
||||
result.details = {
|
||||
"urls_found": len(urls),
|
||||
"filter_type": "domain"
|
||||
"filter_type": "domain",
|
||||
}
|
||||
else:
|
||||
result.error = f"Status {response.status}"
|
||||
|
||||
|
||||
result.duration = time.time() - start
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
# ========================================================================
|
||||
# ADAPTIVE CRAWLING TESTS
|
||||
# ========================================================================
|
||||
|
||||
async def test_adaptive_crawling_basic(self) -> TestResult:
|
||||
async def test_adaptive_crawling_basic(self) -> TestResultData:
|
||||
"""Test basic adaptive crawling"""
|
||||
result = TestResult("Basic Adaptive Crawling", "Adaptive Crawling")
|
||||
result = TestResultData("Basic Adaptive Crawling", "Adaptive Crawling")
|
||||
try:
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
payload = {
|
||||
"urls": ["https://example.com"],
|
||||
"browser_config": {"headless": True},
|
||||
"crawler_config": {
|
||||
"adaptive": True,
|
||||
"adaptive_threshold": 0.5
|
||||
}
|
||||
"crawler_config": {"adaptive": True, "adaptive_threshold": 0.5},
|
||||
}
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{self.base_url}/crawl",
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=60)
|
||||
timeout=aiohttp.ClientTimeout(total=60),
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
result.passed = data.get('success', False)
|
||||
result.details = {
|
||||
"results_count": len(data.get('results', []))
|
||||
}
|
||||
result.passed = data.get("success", False)
|
||||
result.details = {"results_count": len(data.get("results", []))}
|
||||
else:
|
||||
result.error = f"Status {response.status}"
|
||||
|
||||
|
||||
result.duration = time.time() - start
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
async def test_adaptive_crawling_with_strategy(self) -> TestResult:
|
||||
async def test_adaptive_crawling_with_strategy(self) -> TestResultData:
|
||||
"""Test adaptive crawling with custom strategy"""
|
||||
result = TestResult("Adaptive Crawling with Strategy", "Adaptive Crawling")
|
||||
result = TestResultData("Adaptive Crawling with Strategy", "Adaptive Crawling")
|
||||
try:
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
payload = {
|
||||
"urls": ["https://httpbin.org/html"],
|
||||
"browser_config": {"headless": True},
|
||||
"crawler_config": {
|
||||
"adaptive": True,
|
||||
"adaptive_threshold": 0.7,
|
||||
"word_count_threshold": 10
|
||||
}
|
||||
"word_count_threshold": 10,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{self.base_url}/crawl",
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=60)
|
||||
timeout=aiohttp.ClientTimeout(total=60),
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
result.passed = data.get('success', False)
|
||||
result.details = {
|
||||
"adaptive_threshold": 0.7
|
||||
}
|
||||
result.passed = data.get("success", False)
|
||||
result.details = {"adaptive_threshold": 0.7}
|
||||
else:
|
||||
result.error = f"Status {response.status}"
|
||||
|
||||
|
||||
result.duration = time.time() - start
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
# ========================================================================
|
||||
# BROWSER ADAPTER TESTS
|
||||
# ========================================================================
|
||||
|
||||
async def test_browser_adapter_default(self) -> TestResult:
|
||||
async def test_browser_adapter_default(self) -> TestResultData:
|
||||
"""Test default browser adapter"""
|
||||
result = TestResult("Default Browser Adapter", "Browser Adapters")
|
||||
result = TestResultData("Default Browser Adapter", "Browser Adapters")
|
||||
try:
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
payload = {
|
||||
"urls": ["https://example.com"],
|
||||
"browser_config": {"headless": True},
|
||||
"crawler_config": {},
|
||||
"anti_bot_strategy": "default"
|
||||
"anti_bot_strategy": "default",
|
||||
}
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{self.base_url}/crawl",
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=60)
|
||||
timeout=aiohttp.ClientTimeout(total=60),
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
result.passed = data.get('success', False)
|
||||
result.passed = data.get("success", False)
|
||||
result.details = {"adapter": "default"}
|
||||
else:
|
||||
result.error = f"Status {response.status}"
|
||||
|
||||
|
||||
result.duration = time.time() - start
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
async def test_browser_adapter_stealth(self) -> TestResult:
|
||||
async def test_browser_adapter_stealth(self) -> TestResultData:
|
||||
"""Test stealth browser adapter"""
|
||||
result = TestResult("Stealth Browser Adapter", "Browser Adapters")
|
||||
result = TestResultData("Stealth Browser Adapter", "Browser Adapters")
|
||||
try:
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
payload = {
|
||||
"urls": ["https://example.com"],
|
||||
"browser_config": {"headless": True},
|
||||
"crawler_config": {},
|
||||
"anti_bot_strategy": "stealth"
|
||||
"anti_bot_strategy": "stealth",
|
||||
}
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{self.base_url}/crawl",
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=60)
|
||||
timeout=aiohttp.ClientTimeout(total=60),
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
result.passed = data.get('success', False)
|
||||
result.passed = data.get("success", False)
|
||||
result.details = {"adapter": "stealth"}
|
||||
else:
|
||||
result.error = f"Status {response.status}"
|
||||
|
||||
|
||||
result.duration = time.time() - start
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
async def test_browser_adapter_undetected(self) -> TestResult:
|
||||
async def test_browser_adapter_undetected(self) -> TestResultData:
|
||||
"""Test undetected browser adapter"""
|
||||
result = TestResult("Undetected Browser Adapter", "Browser Adapters")
|
||||
result = TestResultData("Undetected Browser Adapter", "Browser Adapters")
|
||||
try:
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
payload = {
|
||||
"urls": ["https://example.com"],
|
||||
"browser_config": {"headless": True},
|
||||
"crawler_config": {},
|
||||
"anti_bot_strategy": "undetected"
|
||||
"anti_bot_strategy": "undetected",
|
||||
}
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{self.base_url}/crawl",
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=60)
|
||||
timeout=aiohttp.ClientTimeout(total=60),
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
result.passed = data.get('success', False)
|
||||
result.passed = data.get("success", False)
|
||||
result.details = {"adapter": "undetected"}
|
||||
else:
|
||||
result.error = f"Status {response.status}"
|
||||
|
||||
|
||||
result.duration = time.time() - start
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
# ========================================================================
|
||||
# PROXY ROTATION TESTS
|
||||
# ========================================================================
|
||||
|
||||
async def test_proxy_rotation_round_robin(self) -> TestResult:
|
||||
async def test_proxy_rotation_round_robin(self) -> TestResultData:
|
||||
"""Test round robin proxy rotation"""
|
||||
result = TestResult("Round Robin Proxy Rotation", "Proxy Rotation")
|
||||
result = TestResultData("Round Robin Proxy Rotation", "Proxy Rotation")
|
||||
try:
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
payload = {
|
||||
"urls": ["https://httpbin.org/ip"],
|
||||
"browser_config": {"headless": True},
|
||||
@@ -338,37 +339,41 @@ class ExtendedFeaturesTestSuite:
|
||||
"proxy_rotation_strategy": "round_robin",
|
||||
"proxies": [
|
||||
{"server": "http://proxy1.example.com:8080"},
|
||||
{"server": "http://proxy2.example.com:8080"}
|
||||
]
|
||||
{"server": "http://proxy2.example.com:8080"},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{self.base_url}/crawl",
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=60)
|
||||
timeout=aiohttp.ClientTimeout(total=60),
|
||||
) as response:
|
||||
# This might fail due to invalid proxies, but we're testing the API accepts it
|
||||
result.passed = response.status in [200, 500] # Accept either success or expected failure
|
||||
result.passed = response.status in [
|
||||
200,
|
||||
500,
|
||||
] # Accept either success or expected failure
|
||||
result.details = {
|
||||
"strategy": "round_robin",
|
||||
"status": response.status
|
||||
"status": response.status,
|
||||
}
|
||||
|
||||
|
||||
result.duration = time.time() - start
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
async def test_proxy_rotation_random(self) -> TestResult:
|
||||
async def test_proxy_rotation_random(self) -> TestResultData:
|
||||
"""Test random proxy rotation"""
|
||||
result = TestResult("Random Proxy Rotation", "Proxy Rotation")
|
||||
result = TestResultData("Random Proxy Rotation", "Proxy Rotation")
|
||||
try:
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
payload = {
|
||||
"urls": ["https://httpbin.org/ip"],
|
||||
"browser_config": {"headless": True},
|
||||
@@ -376,119 +381,121 @@ class ExtendedFeaturesTestSuite:
|
||||
"proxy_rotation_strategy": "random",
|
||||
"proxies": [
|
||||
{"server": "http://proxy1.example.com:8080"},
|
||||
{"server": "http://proxy2.example.com:8080"}
|
||||
]
|
||||
{"server": "http://proxy2.example.com:8080"},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{self.base_url}/crawl",
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=60)
|
||||
timeout=aiohttp.ClientTimeout(total=60),
|
||||
) as response:
|
||||
result.passed = response.status in [200, 500]
|
||||
result.details = {
|
||||
"strategy": "random",
|
||||
"status": response.status
|
||||
}
|
||||
|
||||
result.details = {"strategy": "random", "status": response.status}
|
||||
|
||||
result.duration = time.time() - start
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
# ========================================================================
|
||||
# DISPATCHER TESTS
|
||||
# ========================================================================
|
||||
|
||||
async def test_dispatcher_memory_adaptive(self) -> TestResult:
|
||||
async def test_dispatcher_memory_adaptive(self) -> TestResultData:
|
||||
"""Test memory adaptive dispatcher"""
|
||||
result = TestResult("Memory Adaptive Dispatcher", "Dispatchers")
|
||||
result = TestResultData("Memory Adaptive Dispatcher", "Dispatchers")
|
||||
try:
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
payload = {
|
||||
"urls": ["https://example.com"],
|
||||
"browser_config": {"headless": True},
|
||||
"crawler_config": {"screenshot": True},
|
||||
"dispatcher": "memory_adaptive"
|
||||
"dispatcher": "memory_adaptive",
|
||||
}
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{self.base_url}/crawl",
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=60)
|
||||
timeout=aiohttp.ClientTimeout(total=60),
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
result.passed = data.get('success', False)
|
||||
if result.passed and data.get('results'):
|
||||
has_screenshot = data['results'][0].get('screenshot') is not None
|
||||
result.passed = data.get("success", False)
|
||||
if result.passed and data.get("results"):
|
||||
has_screenshot = (
|
||||
data["results"][0].get("screenshot") is not None
|
||||
)
|
||||
result.details = {
|
||||
"dispatcher": "memory_adaptive",
|
||||
"screenshot_captured": has_screenshot
|
||||
"screenshot_captured": has_screenshot,
|
||||
}
|
||||
else:
|
||||
result.error = f"Status {response.status}"
|
||||
|
||||
|
||||
result.duration = time.time() - start
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
async def test_dispatcher_semaphore(self) -> TestResult:
|
||||
async def test_dispatcher_semaphore(self) -> TestResultData:
|
||||
"""Test semaphore dispatcher"""
|
||||
result = TestResult("Semaphore Dispatcher", "Dispatchers")
|
||||
result = TestResultData("Semaphore Dispatcher", "Dispatchers")
|
||||
try:
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
payload = {
|
||||
"urls": ["https://example.com"],
|
||||
"browser_config": {"headless": True},
|
||||
"crawler_config": {},
|
||||
"dispatcher": "semaphore"
|
||||
"dispatcher": "semaphore",
|
||||
}
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"{self.base_url}/crawl",
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=60)
|
||||
timeout=aiohttp.ClientTimeout(total=60),
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
result.passed = data.get('success', False)
|
||||
result.passed = data.get("success", False)
|
||||
result.details = {"dispatcher": "semaphore"}
|
||||
else:
|
||||
result.error = f"Status {response.status}"
|
||||
|
||||
|
||||
result.duration = time.time() - start
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
async def test_dispatcher_endpoints(self) -> TestResult:
|
||||
async def test_dispatcher_endpoints(self) -> TestResultData:
|
||||
"""Test dispatcher management endpoints"""
|
||||
result = TestResult("Dispatcher Management Endpoints", "Dispatchers")
|
||||
result = TestResultData("Dispatcher Management Endpoints", "Dispatchers")
|
||||
try:
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Test list dispatchers
|
||||
async with session.get(
|
||||
f"{self.base_url}/dispatchers",
|
||||
headers=self.headers,
|
||||
timeout=aiohttp.ClientTimeout(total=10)
|
||||
timeout=aiohttp.ClientTimeout(total=10),
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
@@ -497,15 +504,15 @@ class ExtendedFeaturesTestSuite:
|
||||
result.passed = len(dispatchers) > 0
|
||||
result.details = {
|
||||
"dispatcher_count": len(dispatchers),
|
||||
"available": [d.get('type') for d in dispatchers]
|
||||
"available": [d.get("type") for d in dispatchers],
|
||||
}
|
||||
else:
|
||||
result.error = f"Status {response.status}"
|
||||
|
||||
|
||||
result.duration = time.time() - start
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
# ========================================================================
|
||||
@@ -514,120 +521,145 @@ class ExtendedFeaturesTestSuite:
|
||||
|
||||
async def run_all_tests(self):
|
||||
"""Run all tests and collect results"""
|
||||
console.print(Panel.fit(
|
||||
"[bold cyan]Extended Features Test Suite[/bold cyan]\n"
|
||||
"Testing: URL Seeding, Adaptive Crawling, Browser Adapters, Proxy Rotation, Dispatchers",
|
||||
border_style="cyan"
|
||||
))
|
||||
|
||||
console.print(
|
||||
Panel.fit(
|
||||
"[bold cyan]Extended Features Test Suite[/bold cyan]\n"
|
||||
"Testing: URL Seeding, Adaptive Crawling, Browser Adapters, Proxy Rotation, Dispatchers",
|
||||
border_style="cyan",
|
||||
)
|
||||
)
|
||||
|
||||
# Check server health first
|
||||
console.print("\n[yellow]Checking server health...[/yellow]")
|
||||
if not await self.check_server_health():
|
||||
console.print("[red]❌ Server is not responding. Please start the Docker container.[/red]")
|
||||
console.print(
|
||||
"[red]❌ Server is not responding. Please start the Docker container.[/red]"
|
||||
)
|
||||
console.print(f"[yellow]Expected server at: {self.base_url}[/yellow]")
|
||||
return
|
||||
|
||||
|
||||
console.print("[green]✅ Server is healthy[/green]\n")
|
||||
|
||||
|
||||
# Define all tests
|
||||
tests = [
|
||||
# URL Seeding
|
||||
self.test_url_seeding_basic(),
|
||||
self.test_url_seeding_with_filters(),
|
||||
|
||||
# Adaptive Crawling
|
||||
self.test_adaptive_crawling_basic(),
|
||||
self.test_adaptive_crawling_with_strategy(),
|
||||
|
||||
# Browser Adapters
|
||||
self.test_browser_adapter_default(),
|
||||
self.test_browser_adapter_stealth(),
|
||||
self.test_browser_adapter_undetected(),
|
||||
|
||||
# Proxy Rotation
|
||||
self.test_proxy_rotation_round_robin(),
|
||||
self.test_proxy_rotation_random(),
|
||||
|
||||
# Dispatchers
|
||||
self.test_dispatcher_memory_adaptive(),
|
||||
self.test_dispatcher_semaphore(),
|
||||
self.test_dispatcher_endpoints(),
|
||||
]
|
||||
|
||||
|
||||
console.print(f"[cyan]Running {len(tests)} tests...[/cyan]\n")
|
||||
|
||||
|
||||
# Run tests
|
||||
for i, test_coro in enumerate(tests, 1):
|
||||
console.print(f"[yellow]Running test {i}/{len(tests)}...[/yellow]")
|
||||
test_result = await test_coro
|
||||
self.results.append(test_result)
|
||||
|
||||
|
||||
# Print immediate feedback
|
||||
if test_result.passed:
|
||||
console.print(f"[green]✅ {test_result.name} ({test_result.duration:.2f}s)[/green]")
|
||||
console.print(
|
||||
f"[green]✅ {test_result.name} ({test_result.duration:.2f}s)[/green]"
|
||||
)
|
||||
else:
|
||||
console.print(f"[red]❌ {test_result.name} ({test_result.duration:.2f}s)[/red]")
|
||||
console.print(
|
||||
f"[red]❌ {test_result.name} ({test_result.duration:.2f}s)[/red]"
|
||||
)
|
||||
if test_result.error:
|
||||
console.print(f" [red]Error: {test_result.error}[/red]")
|
||||
|
||||
|
||||
# Display results
|
||||
self.display_results()
|
||||
|
||||
def display_results(self):
|
||||
"""Display test results in a formatted table"""
|
||||
console.print("\n")
|
||||
console.print(Panel.fit("[bold]Test Results Summary[/bold]", border_style="cyan"))
|
||||
|
||||
console.print(
|
||||
Panel.fit("[bold]Test Results Summary[/bold]", border_style="cyan")
|
||||
)
|
||||
|
||||
# Group by category
|
||||
categories = {}
|
||||
for result in self.results:
|
||||
if result.category not in categories:
|
||||
categories[result.category] = []
|
||||
categories[result.category].append(result)
|
||||
|
||||
|
||||
# Display by category
|
||||
for category, tests in categories.items():
|
||||
table = Table(title=f"\n{category}", box=box.ROUNDED, show_header=True, header_style="bold cyan")
|
||||
table = Table(
|
||||
title=f"\n{category}",
|
||||
box=box.ROUNDED,
|
||||
show_header=True,
|
||||
header_style="bold cyan",
|
||||
)
|
||||
table.add_column("Test Name", style="white", width=40)
|
||||
table.add_column("Status", style="white", width=10)
|
||||
table.add_column("Duration", style="white", width=10)
|
||||
table.add_column("Details", style="white", width=40)
|
||||
|
||||
|
||||
for test in tests:
|
||||
status = "[green]✅ PASS[/green]" if test.passed else "[red]❌ FAIL[/red]"
|
||||
status = (
|
||||
"[green]✅ PASS[/green]" if test.passed else "[red]❌ FAIL[/red]"
|
||||
)
|
||||
duration = f"{test.duration:.2f}s"
|
||||
details = str(test.details) if test.details else (test.error or "")
|
||||
if test.error and len(test.error) > 40:
|
||||
details = test.error[:37] + "..."
|
||||
|
||||
|
||||
table.add_row(test.name, status, duration, details)
|
||||
|
||||
|
||||
console.print(table)
|
||||
|
||||
|
||||
# Overall statistics
|
||||
total_tests = len(self.results)
|
||||
passed_tests = sum(1 for r in self.results if r.passed)
|
||||
failed_tests = total_tests - passed_tests
|
||||
pass_rate = (passed_tests / total_tests * 100) if total_tests > 0 else 0
|
||||
|
||||
|
||||
console.print("\n")
|
||||
stats_table = Table(box=box.DOUBLE, show_header=False, width=60)
|
||||
stats_table.add_column("Metric", style="bold cyan", width=30)
|
||||
stats_table.add_column("Value", style="bold white", width=30)
|
||||
|
||||
|
||||
stats_table.add_row("Total Tests", str(total_tests))
|
||||
stats_table.add_row("Passed", f"[green]{passed_tests}[/green]")
|
||||
stats_table.add_row("Failed", f"[red]{failed_tests}[/red]")
|
||||
stats_table.add_row("Pass Rate", f"[cyan]{pass_rate:.1f}%[/cyan]")
|
||||
|
||||
console.print(Panel(stats_table, title="[bold]Overall Statistics[/bold]", border_style="green" if pass_rate >= 80 else "yellow"))
|
||||
|
||||
|
||||
console.print(
|
||||
Panel(
|
||||
stats_table,
|
||||
title="[bold]Overall Statistics[/bold]",
|
||||
border_style="green" if pass_rate >= 80 else "yellow",
|
||||
)
|
||||
)
|
||||
|
||||
# Recommendations
|
||||
if failed_tests > 0:
|
||||
console.print("\n[yellow]💡 Some tests failed. Check the errors above for details.[/yellow]")
|
||||
console.print(
|
||||
"\n[yellow]💡 Some tests failed. Check the errors above for details.[/yellow]"
|
||||
)
|
||||
console.print("[yellow] Common issues:[/yellow]")
|
||||
console.print("[yellow] - Server not fully started (wait ~30-40 seconds after docker compose up)[/yellow]")
|
||||
console.print("[yellow] - Invalid proxy servers in proxy rotation tests (expected)[/yellow]")
|
||||
console.print(
|
||||
"[yellow] - Server not fully started (wait ~30-40 seconds after docker compose up)[/yellow]"
|
||||
)
|
||||
console.print(
|
||||
"[yellow] - Invalid proxy servers in proxy rotation tests (expected)[/yellow]"
|
||||
)
|
||||
console.print("[yellow] - Network connectivity issues[/yellow]")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user