reproduced AttributeError from #1642

This commit is contained in:
Chris Murphy
2025-12-01 11:31:07 -05:00
parent 0024c82cdc
commit 33a3cc3933

View File

@@ -1,16 +1,30 @@
""" """
Test the complete fix for both the filter serialization and JSON serialization issues. Test the complete fix for both the filter serialization and JSON serialization issues.
""" """
import os
from typing import Any
import asyncio import asyncio
import httpx import httpx
from crawl4ai import BrowserConfig, CacheMode, CrawlerRunConfig from crawl4ai import BrowserConfig, CacheMode, CrawlerRunConfig
from crawl4ai.deep_crawling import BFSDeepCrawlStrategy, FilterChain, URLPatternFilter from crawl4ai.deep_crawling import (
BFSDeepCrawlStrategy,
ContentRelevanceFilter,
FilterChain,
URLFilter,
URLPatternFilter,
)
BASE_URL = "http://localhost:11234/" # Adjust port as needed CRAWL4AI_DOCKER_PORT = os.environ.get("CRAWL4AI_DOCKER_PORT", "11234")
try:
BASE_PORT = int(CRAWL4AI_DOCKER_PORT)
except TypeError:
BASE_PORT = 11234
BASE_URL = f"http://localhost:{BASE_PORT}/" # Adjust port as needed
async def test_with_docker_client():
async def test_with_docker_client(filter_chain: list[URLFilter]) -> bool:
"""Test using the Docker client (same as 1419.py).""" """Test using the Docker client (same as 1419.py)."""
from crawl4ai.docker_client import Crawl4aiDockerClient from crawl4ai.docker_client import Crawl4aiDockerClient
@@ -24,15 +38,6 @@ async def test_with_docker_client():
verbose=True, verbose=True,
) as client: ) as client:
# Create filter chain - testing the serialization fix
filter_chain = [
URLPatternFilter(
# patterns=["*about*", "*privacy*", "*terms*"],
patterns=["*advanced*"],
reverse=True
),
]
crawler_config = CrawlerRunConfig( crawler_config = CrawlerRunConfig(
deep_crawl_strategy=BFSDeepCrawlStrategy( deep_crawl_strategy=BFSDeepCrawlStrategy(
max_depth=2, # Keep it shallow for testing max_depth=2, # Keep it shallow for testing
@@ -79,7 +84,7 @@ async def test_with_docker_client():
return False return False
async def test_with_rest_api(): async def test_with_rest_api(filters: list[dict[str, Any]]) -> bool:
"""Test using REST API directly.""" """Test using REST API directly."""
print("\n" + "=" * 60) print("\n" + "=" * 60)
print("Testing with REST API") print("Testing with REST API")
@@ -94,15 +99,7 @@ async def test_with_rest_api():
"filter_chain": { "filter_chain": {
"type": "FilterChain", "type": "FilterChain",
"params": { "params": {
"filters": [ "filters": filters
{
"type": "URLPatternFilter",
"params": {
"patterns": ["*advanced*"],
"reverse": True
}
}
]
} }
} }
} }
@@ -165,12 +162,58 @@ async def main():
results = [] results = []
# Test 1: Docker client # Test 1: Docker client
docker_passed = await test_with_docker_client() filter_chain_test_cases = [
results.append(("Docker Client", docker_passed)) [
URLPatternFilter(
# patterns=["*about*", "*privacy*", "*terms*"],
patterns=["*advanced*"],
reverse=True
),
],
[
ContentRelevanceFilter(
query="about faq",
threshold=0.2,
),
],
]
for idx, filter_chain in enumerate(filter_chain_test_cases):
docker_passed = await test_with_docker_client(filter_chain=filter_chain)
results.append((f"Docker Client w/ filter chain {idx}", docker_passed))
# Test 2: REST API # Test 2: REST API
rest_passed = await test_with_rest_api() filters_test_cases = [
results.append(("REST API", rest_passed)) [
{
"type": "URLPatternFilter",
"params": {
"patterns": ["*advanced*"],
"reverse": True
}
}
],
[
{
"type": "ContentRelevanceFilter",
"params": {
"query": "about faq",
"threshold": 0.2,
}
}
],
[
{
"type": "ContentRelevanceFilter",
"params": {
"query": ["about", "faq"],
"threshold": 0.2,
}
}
],
]
for idx, filters in enumerate(filters_test_cases):
rest_passed = await test_with_rest_api(filters=filters)
results.append((f"REST API w/ filters {idx}", rest_passed))
# Summary # Summary
print("\n" + "=" * 60) print("\n" + "=" * 60)
@@ -186,10 +229,7 @@ async def main():
print("=" * 60) print("=" * 60)
if all_passed: if all_passed:
print("🎉 ALL TESTS PASSED! Both issues are fully resolved!") print("🎉 ALL TESTS PASSED!")
print("\nThe fixes:")
print("1. Filter serialization: Fixed by not serializing private __slots__")
print("2. JSON serialization: Fixed by removing property descriptors from model_dump()")
else: else:
print("⚠️ Some tests failed. Please check the server logs for details.") print("⚠️ Some tests failed. Please check the server logs for details.")