feat: Add comprehensive tests for URL discovery and virtual scroll functionality
This commit is contained in:
286
tests/test_url_discovery_e2e.py
Normal file
286
tests/test_url_discovery_e2e.py
Normal file
@@ -0,0 +1,286 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
End-to-end tests for the URL Discovery endpoint.
|
||||
|
||||
This test suite verifies the complete functionality of the /urls/discover endpoint
|
||||
including happy path scenarios and error handling.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
import json
|
||||
import pytest
|
||||
from typing import Dict, Any
|
||||
|
||||
# Test configuration
|
||||
BASE_URL = "http://localhost:11235"
|
||||
TEST_TIMEOUT = 30.0
|
||||
|
||||
|
||||
class TestURLDiscoveryEndpoint:
|
||||
"""End-to-end test suite for URL Discovery endpoint."""
|
||||
|
||||
@pytest.fixture
|
||||
async def client(self):
|
||||
"""Create an async HTTP client for testing."""
|
||||
async with httpx.AsyncClient(base_url=BASE_URL, timeout=TEST_TIMEOUT) as client:
|
||||
yield client
|
||||
|
||||
async def test_server_health(self, client):
|
||||
"""Test that the server is healthy before running other tests."""
|
||||
response = await client.get("/health")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "ok"
|
||||
|
||||
async def test_endpoint_exists(self, client):
|
||||
"""Test that the /urls/discover endpoint exists and is documented."""
|
||||
# Check OpenAPI spec includes our endpoint
|
||||
response = await client.get("/openapi.json")
|
||||
assert response.status_code == 200
|
||||
|
||||
openapi_spec = response.json()
|
||||
assert "/urls/discover" in openapi_spec["paths"]
|
||||
|
||||
endpoint_spec = openapi_spec["paths"]["/urls/discover"]
|
||||
assert "post" in endpoint_spec
|
||||
assert endpoint_spec["post"]["summary"] == "URL Discovery and Seeding"
|
||||
|
||||
async def test_basic_url_discovery_happy_path(self, client):
|
||||
"""Test basic URL discovery with minimal configuration."""
|
||||
request_data = {
|
||||
"domain": "example.com",
|
||||
"seeding_config": {
|
||||
"source": "sitemap",
|
||||
"max_urls": 5
|
||||
}
|
||||
}
|
||||
|
||||
response = await client.post("/urls/discover", json=request_data)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert isinstance(data, list)
|
||||
# Note: We don't assert length > 0 because URL discovery
|
||||
# may legitimately return empty results
|
||||
|
||||
async def test_minimal_request_with_defaults(self, client):
|
||||
"""Test that minimal request works with default seeding_config."""
|
||||
request_data = {
|
||||
"domain": "example.com"
|
||||
}
|
||||
|
||||
response = await client.post("/urls/discover", json=request_data)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert isinstance(data, list)
|
||||
|
||||
async def test_advanced_configuration(self, client):
|
||||
"""Test advanced configuration options."""
|
||||
request_data = {
|
||||
"domain": "example.com",
|
||||
"seeding_config": {
|
||||
"source": "sitemap+cc",
|
||||
"pattern": "*/docs/*",
|
||||
"extract_head": True,
|
||||
"max_urls": 3,
|
||||
"live_check": True,
|
||||
"concurrency": 50,
|
||||
"hits_per_sec": 5,
|
||||
"verbose": True
|
||||
}
|
||||
}
|
||||
|
||||
response = await client.post("/urls/discover", json=request_data)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert isinstance(data, list)
|
||||
|
||||
# If URLs are returned, they should have the expected structure
|
||||
for url_obj in data:
|
||||
assert isinstance(url_obj, dict)
|
||||
# Should have at least a URL field
|
||||
assert "url" in url_obj
|
||||
|
||||
async def test_bm25_scoring_configuration(self, client):
|
||||
"""Test BM25 relevance scoring configuration."""
|
||||
request_data = {
|
||||
"domain": "example.com",
|
||||
"seeding_config": {
|
||||
"source": "sitemap",
|
||||
"extract_head": True, # Required for scoring
|
||||
"query": "documentation",
|
||||
"scoring_method": "bm25",
|
||||
"score_threshold": 0.1,
|
||||
"max_urls": 5
|
||||
}
|
||||
}
|
||||
|
||||
response = await client.post("/urls/discover", json=request_data)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert isinstance(data, list)
|
||||
|
||||
# If URLs are returned with scoring, check structure
|
||||
for url_obj in data:
|
||||
assert isinstance(url_obj, dict)
|
||||
assert "url" in url_obj
|
||||
# Scoring may or may not add score field depending on implementation
|
||||
|
||||
async def test_missing_required_domain_field(self, client):
|
||||
"""Test error handling when required domain field is missing."""
|
||||
request_data = {
|
||||
"seeding_config": {
|
||||
"source": "sitemap",
|
||||
"max_urls": 5
|
||||
}
|
||||
}
|
||||
|
||||
response = await client.post("/urls/discover", json=request_data)
|
||||
assert response.status_code == 422 # Validation error
|
||||
|
||||
error_data = response.json()
|
||||
assert "detail" in error_data
|
||||
assert any("domain" in str(error).lower() for error in error_data["detail"])
|
||||
|
||||
async def test_invalid_request_body_structure(self, client):
|
||||
"""Test error handling with completely invalid request body."""
|
||||
invalid_request = {
|
||||
"invalid_field": "test_value",
|
||||
"another_invalid": 123
|
||||
}
|
||||
|
||||
response = await client.post("/urls/discover", json=invalid_request)
|
||||
assert response.status_code == 422 # Validation error
|
||||
|
||||
error_data = response.json()
|
||||
assert "detail" in error_data
|
||||
|
||||
async def test_invalid_seeding_config_parameters(self, client):
|
||||
"""Test handling of invalid seeding configuration parameters."""
|
||||
request_data = {
|
||||
"domain": "example.com",
|
||||
"seeding_config": {
|
||||
"source": "invalid_source", # Invalid source
|
||||
"max_urls": "not_a_number" # Invalid type
|
||||
}
|
||||
}
|
||||
|
||||
response = await client.post("/urls/discover", json=request_data)
|
||||
# The endpoint should handle this gracefully
|
||||
# It may return 200 with empty results or 500 with error details
|
||||
assert response.status_code in [200, 500]
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
assert isinstance(data, list)
|
||||
# May be empty due to invalid config
|
||||
else:
|
||||
# Should have error details
|
||||
error_data = response.json()
|
||||
assert "detail" in error_data
|
||||
|
||||
async def test_empty_seeding_config(self, client):
|
||||
"""Test with empty seeding_config object."""
|
||||
request_data = {
|
||||
"domain": "example.com",
|
||||
"seeding_config": {}
|
||||
}
|
||||
|
||||
response = await client.post("/urls/discover", json=request_data)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert isinstance(data, list)
|
||||
|
||||
async def test_response_structure_consistency(self, client):
|
||||
"""Test that response structure is consistent."""
|
||||
request_data = {
|
||||
"domain": "example.com",
|
||||
"seeding_config": {
|
||||
"source": "sitemap",
|
||||
"max_urls": 1
|
||||
}
|
||||
}
|
||||
|
||||
# Make multiple requests to ensure consistency
|
||||
for _ in range(3):
|
||||
response = await client.post("/urls/discover", json=request_data)
|
||||
assert response.status_code == 200
|
||||
|
||||
data = response.json()
|
||||
assert isinstance(data, list)
|
||||
|
||||
# If there are results, check they have consistent structure
|
||||
for url_obj in data:
|
||||
assert isinstance(url_obj, dict)
|
||||
assert "url" in url_obj
|
||||
|
||||
async def test_content_type_validation(self, client):
|
||||
"""Test that endpoint requires JSON content type."""
|
||||
# Test with wrong content type
|
||||
response = await client.post(
|
||||
"/urls/discover",
|
||||
content="domain=example.com",
|
||||
headers={"Content-Type": "application/x-www-form-urlencoded"}
|
||||
)
|
||||
assert response.status_code == 422
|
||||
|
||||
|
||||
# Standalone test runner for when pytest is not available
|
||||
async def run_tests_standalone():
|
||||
"""Run tests without pytest framework."""
|
||||
print("🧪 Running URL Discovery Endpoint Tests")
|
||||
print("=" * 50)
|
||||
|
||||
# Check server health first
|
||||
async with httpx.AsyncClient(base_url=BASE_URL, timeout=TEST_TIMEOUT) as client:
|
||||
try:
|
||||
response = await client.get("/health")
|
||||
assert response.status_code == 200
|
||||
print("✅ Server health check passed")
|
||||
except Exception as e:
|
||||
print(f"❌ Server health check failed: {e}")
|
||||
return False
|
||||
|
||||
test_suite = TestURLDiscoveryEndpoint()
|
||||
|
||||
# Run tests manually
|
||||
tests = [
|
||||
("Endpoint exists", test_suite.test_endpoint_exists),
|
||||
("Basic URL discovery", test_suite.test_basic_url_discovery_happy_path),
|
||||
("Minimal request", test_suite.test_minimal_request_with_defaults),
|
||||
("Advanced configuration", test_suite.test_advanced_configuration),
|
||||
("BM25 scoring", test_suite.test_bm25_scoring_configuration),
|
||||
("Missing domain error", test_suite.test_missing_required_domain_field),
|
||||
("Invalid request body", test_suite.test_invalid_request_body_structure),
|
||||
("Invalid config handling", test_suite.test_invalid_seeding_config_parameters),
|
||||
("Empty config", test_suite.test_empty_seeding_config),
|
||||
("Response consistency", test_suite.test_response_structure_consistency),
|
||||
("Content type validation", test_suite.test_content_type_validation),
|
||||
]
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
async with httpx.AsyncClient(base_url=BASE_URL, timeout=TEST_TIMEOUT) as client:
|
||||
for test_name, test_func in tests:
|
||||
try:
|
||||
await test_func(client)
|
||||
print(f"✅ {test_name}")
|
||||
passed += 1
|
||||
except Exception as e:
|
||||
print(f"❌ {test_name}: {e}")
|
||||
failed += 1
|
||||
|
||||
print(f"\n📊 Test Results: {passed} passed, {failed} failed")
|
||||
return failed == 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run tests standalone
|
||||
success = asyncio.run(run_tests_standalone())
|
||||
exit(0 if success else 1)
|
||||
Reference in New Issue
Block a user