reproduced AttributeError from #1642
This commit is contained in:
@@ -1,16 +1,30 @@
|
|||||||
"""
|
"""
|
||||||
Test the complete fix for both the filter serialization and JSON serialization issues.
|
Test the complete fix for both the filter serialization and JSON serialization issues.
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from crawl4ai import BrowserConfig, CacheMode, CrawlerRunConfig
|
from crawl4ai import BrowserConfig, CacheMode, CrawlerRunConfig
|
||||||
from crawl4ai.deep_crawling import BFSDeepCrawlStrategy, FilterChain, URLPatternFilter
|
from crawl4ai.deep_crawling import (
|
||||||
|
BFSDeepCrawlStrategy,
|
||||||
|
ContentRelevanceFilter,
|
||||||
|
FilterChain,
|
||||||
|
URLFilter,
|
||||||
|
URLPatternFilter,
|
||||||
|
)
|
||||||
|
|
||||||
BASE_URL = "http://localhost:11234/" # Adjust port as needed
|
CRAWL4AI_DOCKER_PORT = os.environ.get("CRAWL4AI_DOCKER_PORT", "11234")
|
||||||
|
try:
|
||||||
|
BASE_PORT = int(CRAWL4AI_DOCKER_PORT)
|
||||||
|
except TypeError:
|
||||||
|
BASE_PORT = 11234
|
||||||
|
BASE_URL = f"http://localhost:{BASE_PORT}/" # Adjust port as needed
|
||||||
|
|
||||||
async def test_with_docker_client():
|
|
||||||
|
async def test_with_docker_client(filter_chain: list[URLFilter]) -> bool:
|
||||||
"""Test using the Docker client (same as 1419.py)."""
|
"""Test using the Docker client (same as 1419.py)."""
|
||||||
from crawl4ai.docker_client import Crawl4aiDockerClient
|
from crawl4ai.docker_client import Crawl4aiDockerClient
|
||||||
|
|
||||||
@@ -24,15 +38,6 @@ async def test_with_docker_client():
|
|||||||
verbose=True,
|
verbose=True,
|
||||||
) as client:
|
) as client:
|
||||||
|
|
||||||
# Create filter chain - testing the serialization fix
|
|
||||||
filter_chain = [
|
|
||||||
URLPatternFilter(
|
|
||||||
# patterns=["*about*", "*privacy*", "*terms*"],
|
|
||||||
patterns=["*advanced*"],
|
|
||||||
reverse=True
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
crawler_config = CrawlerRunConfig(
|
crawler_config = CrawlerRunConfig(
|
||||||
deep_crawl_strategy=BFSDeepCrawlStrategy(
|
deep_crawl_strategy=BFSDeepCrawlStrategy(
|
||||||
max_depth=2, # Keep it shallow for testing
|
max_depth=2, # Keep it shallow for testing
|
||||||
@@ -79,7 +84,7 @@ async def test_with_docker_client():
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def test_with_rest_api():
|
async def test_with_rest_api(filters: list[dict[str, Any]]) -> bool:
|
||||||
"""Test using REST API directly."""
|
"""Test using REST API directly."""
|
||||||
print("\n" + "=" * 60)
|
print("\n" + "=" * 60)
|
||||||
print("Testing with REST API")
|
print("Testing with REST API")
|
||||||
@@ -94,15 +99,7 @@ async def test_with_rest_api():
|
|||||||
"filter_chain": {
|
"filter_chain": {
|
||||||
"type": "FilterChain",
|
"type": "FilterChain",
|
||||||
"params": {
|
"params": {
|
||||||
"filters": [
|
"filters": filters
|
||||||
{
|
|
||||||
"type": "URLPatternFilter",
|
|
||||||
"params": {
|
|
||||||
"patterns": ["*advanced*"],
|
|
||||||
"reverse": True
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -165,12 +162,58 @@ async def main():
|
|||||||
results = []
|
results = []
|
||||||
|
|
||||||
# Test 1: Docker client
|
# Test 1: Docker client
|
||||||
docker_passed = await test_with_docker_client()
|
filter_chain_test_cases = [
|
||||||
results.append(("Docker Client", docker_passed))
|
[
|
||||||
|
URLPatternFilter(
|
||||||
|
# patterns=["*about*", "*privacy*", "*terms*"],
|
||||||
|
patterns=["*advanced*"],
|
||||||
|
reverse=True
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
ContentRelevanceFilter(
|
||||||
|
query="about faq",
|
||||||
|
threshold=0.2,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
]
|
||||||
|
for idx, filter_chain in enumerate(filter_chain_test_cases):
|
||||||
|
docker_passed = await test_with_docker_client(filter_chain=filter_chain)
|
||||||
|
results.append((f"Docker Client w/ filter chain {idx}", docker_passed))
|
||||||
|
|
||||||
# Test 2: REST API
|
# Test 2: REST API
|
||||||
rest_passed = await test_with_rest_api()
|
filters_test_cases = [
|
||||||
results.append(("REST API", rest_passed))
|
[
|
||||||
|
{
|
||||||
|
"type": "URLPatternFilter",
|
||||||
|
"params": {
|
||||||
|
"patterns": ["*advanced*"],
|
||||||
|
"reverse": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"type": "ContentRelevanceFilter",
|
||||||
|
"params": {
|
||||||
|
"query": "about faq",
|
||||||
|
"threshold": 0.2,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"type": "ContentRelevanceFilter",
|
||||||
|
"params": {
|
||||||
|
"query": ["about", "faq"],
|
||||||
|
"threshold": 0.2,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
]
|
||||||
|
for idx, filters in enumerate(filters_test_cases):
|
||||||
|
rest_passed = await test_with_rest_api(filters=filters)
|
||||||
|
results.append((f"REST API w/ filters {idx}", rest_passed))
|
||||||
|
|
||||||
# Summary
|
# Summary
|
||||||
print("\n" + "=" * 60)
|
print("\n" + "=" * 60)
|
||||||
@@ -186,10 +229,7 @@ async def main():
|
|||||||
|
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
if all_passed:
|
if all_passed:
|
||||||
print("🎉 ALL TESTS PASSED! Both issues are fully resolved!")
|
print("🎉 ALL TESTS PASSED!")
|
||||||
print("\nThe fixes:")
|
|
||||||
print("1. Filter serialization: Fixed by not serializing private __slots__")
|
|
||||||
print("2. JSON serialization: Fixed by removing property descriptors from model_dump()")
|
|
||||||
else:
|
else:
|
||||||
print("⚠️ Some tests failed. Please check the server logs for details.")
|
print("⚠️ Some tests failed. Please check the server logs for details.")
|
||||||
|
|
||||||
@@ -198,4 +238,4 @@ async def main():
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
sys.exit(asyncio.run(main()))
|
sys.exit(asyncio.run(main()))
|
||||||
|
|||||||
Reference in New Issue
Block a user