Merge pull request #1648 from christopher-w-murphy/fix/content-relevance-filter
[Fix]: Docker server does not decode ContentRelevanceFilter
This commit is contained in:
@@ -1034,7 +1034,7 @@ Our enterprise sponsors and technology partners help scale Crawl4AI to power pro
|
|||||||
|
|
||||||
| Company | About | Sponsorship Tier |
|
| Company | About | Sponsorship Tier |
|
||||||
|------|------|----------------------------|
|
|------|------|----------------------------|
|
||||||
| <a href="https://app.scrapeless.com/passport/register?utm_source=official&utm_term=crawl4ai" target="_blank"><picture><source width="250" media="(prefers-color-scheme: dark)" srcset="https://gist.githubusercontent.com/aravindkarnam/0d275b942705604263e5c32d2db27bc1/raw/Scrapeless-light-logo.svg"><source width="250" media="(prefers-color-scheme: light)" srcset="https://gist.githubusercontent.com/aravindkarnam/22d0525cc0f3021bf19ebf6e11a69ccd/raw/Scrapeless-dark-logo.svg"><img alt="Scrapeless" src="https://gist.githubusercontent.com/aravindkarnam/22d0525cc0f3021bf19ebf6e11a69ccd/raw/Scrapeless-dark-logo.svg"></picture></a> | Scrapeless is the best full-stack web scraping toolkit offering Scraping API, Scraping Browser, Web Unlocker, Captcha Solver, and Proxies, designed to handle all your data collection needs. | 🥈 Silver |
|
| <a href="https://app.scrapeless.com/passport/register?utm_source=official&utm_term=crawl4ai" target="_blank"><picture><source width="250" media="(prefers-color-scheme: dark)" srcset="https://gist.githubusercontent.com/aravindkarnam/0d275b942705604263e5c32d2db27bc1/raw/Scrapeless-light-logo.svg"><source width="250" media="(prefers-color-scheme: light)" srcset="https://gist.githubusercontent.com/aravindkarnam/22d0525cc0f3021bf19ebf6e11a69ccd/raw/Scrapeless-dark-logo.svg"><img alt="Scrapeless" src="https://gist.githubusercontent.com/aravindkarnam/22d0525cc0f3021bf19ebf6e11a69ccd/raw/Scrapeless-dark-logo.svg"></picture></a> | Scrapeless provides production-grade infrastructure for Crawling, Automation, and AI Agents, offering Scraping Browser, 4 Proxy Types and Universal Scraping API. | 🥈 Silver |
|
||||||
| <a href="https://dashboard.capsolver.com/passport/register?inviteCode=ESVSECTX5Q23" target="_blank"><picture><source width="120" media="(prefers-color-scheme: dark)" srcset="https://docs.crawl4ai.com/uploads/sponsors/20251013045338_72a71fa4ee4d2f40.png"><source width="120" media="(prefers-color-scheme: light)" srcset="https://www.capsolver.com/assets/images/logo-text.png"><img alt="Capsolver" src="https://www.capsolver.com/assets/images/logo-text.png"></picture></a> | AI-powered Captcha solving service. Supports all major Captcha types, including reCAPTCHA, Cloudflare, and more | 🥉 Bronze |
|
| <a href="https://dashboard.capsolver.com/passport/register?inviteCode=ESVSECTX5Q23" target="_blank"><picture><source width="120" media="(prefers-color-scheme: dark)" srcset="https://docs.crawl4ai.com/uploads/sponsors/20251013045338_72a71fa4ee4d2f40.png"><source width="120" media="(prefers-color-scheme: light)" srcset="https://www.capsolver.com/assets/images/logo-text.png"><img alt="Capsolver" src="https://www.capsolver.com/assets/images/logo-text.png"></picture></a> | AI-powered Captcha solving service. Supports all major Captcha types, including reCAPTCHA, Cloudflare, and more | 🥉 Bronze |
|
||||||
| <a href="https://kipo.ai" target="_blank"><img src="https://docs.crawl4ai.com/uploads/sponsors/20251013045751_2d54f57f117c651e.png" alt="DataSync" width="120"/></a> | Helps engineers and buyers find, compare, and source electronic & industrial parts in seconds, with specs, pricing, lead times & alternatives.| 🥇 Gold |
|
| <a href="https://kipo.ai" target="_blank"><img src="https://docs.crawl4ai.com/uploads/sponsors/20251013045751_2d54f57f117c651e.png" alt="DataSync" width="120"/></a> | Helps engineers and buyers find, compare, and source electronic & industrial parts in seconds, with specs, pricing, lead times & alternatives.| 🥇 Gold |
|
||||||
| <a href="https://www.kidocode.com/" target="_blank"><img src="https://docs.crawl4ai.com/uploads/sponsors/20251013045045_bb8dace3f0440d65.svg" alt="Kidocode" width="120"/><p align="center">KidoCode</p></a> | Kidocode is a hybrid technology and entrepreneurship school for kids aged 5–18, offering both online and on-campus education. | 🥇 Gold |
|
| <a href="https://www.kidocode.com/" target="_blank"><img src="https://docs.crawl4ai.com/uploads/sponsors/20251013045045_bb8dace3f0440d65.svg" alt="Kidocode" width="120"/><p align="center">KidoCode</p></a> | Kidocode is a hybrid technology and entrepreneurship school for kids aged 5–18, offering both online and on-campus education. | 🥇 Gold |
|
||||||
|
|||||||
@@ -72,6 +72,8 @@ from .deep_crawling import (
|
|||||||
BestFirstCrawlingStrategy,
|
BestFirstCrawlingStrategy,
|
||||||
DFSDeepCrawlStrategy,
|
DFSDeepCrawlStrategy,
|
||||||
DeepCrawlDecorator,
|
DeepCrawlDecorator,
|
||||||
|
ContentRelevanceFilter,
|
||||||
|
ContentTypeScorer,
|
||||||
)
|
)
|
||||||
# NEW: Import AsyncUrlSeeder
|
# NEW: Import AsyncUrlSeeder
|
||||||
from .async_url_seeder import AsyncUrlSeeder
|
from .async_url_seeder import AsyncUrlSeeder
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
import importlib
|
||||||
import os
|
import os
|
||||||
from typing import Union
|
|
||||||
import warnings
|
import warnings
|
||||||
import requests
|
import requests
|
||||||
from .config import (
|
from .config import (
|
||||||
@@ -27,14 +27,14 @@ from .table_extraction import TableExtractionStrategy, DefaultTableExtraction
|
|||||||
from .cache_context import CacheMode
|
from .cache_context import CacheMode
|
||||||
from .proxy_strategy import ProxyRotationStrategy
|
from .proxy_strategy import ProxyRotationStrategy
|
||||||
|
|
||||||
from typing import Union, List, Callable
|
|
||||||
import inspect
|
import inspect
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Callable, Dict, List, Optional, Union
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
# Type alias for URL matching
|
# Type alias for URL matching
|
||||||
UrlMatcher = Union[str, Callable[[str], bool], List[Union[str, Callable[[str], bool]]]]
|
UrlMatcher = Union[str, Callable[[str], bool], List[Union[str, Callable[[str], bool]]]]
|
||||||
|
|
||||||
|
|
||||||
class MatchMode(Enum):
|
class MatchMode(Enum):
|
||||||
OR = "or"
|
OR = "or"
|
||||||
AND = "and"
|
AND = "and"
|
||||||
@@ -42,8 +42,7 @@ class MatchMode(Enum):
|
|||||||
# from .proxy_strategy import ProxyConfig
|
# from .proxy_strategy import ProxyConfig
|
||||||
|
|
||||||
|
|
||||||
|
def to_serializable_dict(obj: Any, ignore_default_value : bool = False):
|
||||||
def to_serializable_dict(obj: Any, ignore_default_value : bool = False) -> Dict:
|
|
||||||
"""
|
"""
|
||||||
Recursively convert an object to a serializable dictionary using {type, params} structure
|
Recursively convert an object to a serializable dictionary using {type, params} structure
|
||||||
for complex objects.
|
for complex objects.
|
||||||
@@ -110,8 +109,6 @@ def to_serializable_dict(obj: Any, ignore_default_value : bool = False) -> Dict:
|
|||||||
# if value is not None:
|
# if value is not None:
|
||||||
# current_values[attr_name] = to_serializable_dict(value)
|
# current_values[attr_name] = to_serializable_dict(value)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"type": obj.__class__.__name__,
|
"type": obj.__class__.__name__,
|
||||||
"params": current_values
|
"params": current_values
|
||||||
@@ -137,12 +134,20 @@ def from_serializable_dict(data: Any) -> Any:
|
|||||||
if data["type"] == "dict" and "value" in data:
|
if data["type"] == "dict" and "value" in data:
|
||||||
return {k: from_serializable_dict(v) for k, v in data["value"].items()}
|
return {k: from_serializable_dict(v) for k, v in data["value"].items()}
|
||||||
|
|
||||||
# Import from crawl4ai for class instances
|
cls = None
|
||||||
import crawl4ai
|
# If you are receiving an error while trying to convert a dict to an object:
|
||||||
|
# Either add a module to `modules_paths` list, or add the `data["type"]` to the crawl4ai __init__.py file
|
||||||
if hasattr(crawl4ai, data["type"]):
|
module_paths = ["crawl4ai"]
|
||||||
cls = getattr(crawl4ai, data["type"])
|
for module_path in module_paths:
|
||||||
|
try:
|
||||||
|
mod = importlib.import_module(module_path)
|
||||||
|
if hasattr(mod, data["type"]):
|
||||||
|
cls = getattr(mod, data["type"])
|
||||||
|
break
|
||||||
|
except (ImportError, AttributeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if cls is not None:
|
||||||
# Handle Enum
|
# Handle Enum
|
||||||
if issubclass(cls, Enum):
|
if issubclass(cls, Enum):
|
||||||
return cls(data["params"])
|
return cls(data["params"])
|
||||||
|
|||||||
@@ -509,18 +509,22 @@ class DomainFilter(URLFilter):
|
|||||||
class ContentRelevanceFilter(URLFilter):
|
class ContentRelevanceFilter(URLFilter):
|
||||||
"""BM25-based relevance filter using head section content"""
|
"""BM25-based relevance filter using head section content"""
|
||||||
|
|
||||||
__slots__ = ("query_terms", "threshold", "k1", "b", "avgdl")
|
__slots__ = ("query_terms", "threshold", "k1", "b", "avgdl", "query")
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: Union[str, List[str]],
|
||||||
threshold: float,
|
threshold: float,
|
||||||
k1: float = 1.2,
|
k1: float = 1.2,
|
||||||
b: float = 0.75,
|
b: float = 0.75,
|
||||||
avgdl: int = 1000,
|
avgdl: int = 1000,
|
||||||
):
|
):
|
||||||
super().__init__(name="BM25RelevanceFilter")
|
super().__init__(name="BM25RelevanceFilter")
|
||||||
self.query_terms = self._tokenize(query)
|
if isinstance(query, list):
|
||||||
|
self.query = " ".join(query)
|
||||||
|
else:
|
||||||
|
self.query = query
|
||||||
|
self.query_terms = self._tokenize(self.query)
|
||||||
self.threshold = threshold
|
self.threshold = threshold
|
||||||
self.k1 = k1 # TF saturation parameter
|
self.k1 = k1 # TF saturation parameter
|
||||||
self.b = b # Length normalization parameter
|
self.b = b # Length normalization parameter
|
||||||
|
|||||||
@@ -180,7 +180,7 @@ class Crawl4aiDockerClient:
|
|||||||
yield CrawlResult(**result)
|
yield CrawlResult(**result)
|
||||||
return stream_results()
|
return stream_results()
|
||||||
|
|
||||||
response = await self._request("POST", "/crawl", json=data)
|
response = await self._request("POST", "/crawl", json=data, timeout=hooks_timeout)
|
||||||
result_data = response.json()
|
result_data = response.json()
|
||||||
if not result_data.get("success", False):
|
if not result_data.get("success", False):
|
||||||
raise RequestError(f"Crawl failed: {result_data.get('msg', 'Unknown error')}")
|
raise RequestError(f"Crawl failed: {result_data.get('msg', 'Unknown error')}")
|
||||||
|
|||||||
61
docs/examples/cloud_browser/scrapeless_browser.py
Normal file
61
docs/examples/cloud_browser/scrapeless_browser.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
import json
|
||||||
|
import asyncio
|
||||||
|
from urllib.parse import quote, urlencode
|
||||||
|
from crawl4ai import CrawlerRunConfig, BrowserConfig, AsyncWebCrawler
|
||||||
|
|
||||||
|
# Scrapeless provides a free anti-detection fingerprint browser client and cloud browsers:
|
||||||
|
# https://www.scrapeless.com/en/blog/scrapeless-nstbrowser-strategic-integration
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
# customize browser fingerprint
|
||||||
|
fingerprint = {
|
||||||
|
"userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.1.2.3 Safari/537.36",
|
||||||
|
"platform": "Windows",
|
||||||
|
"screen": {
|
||||||
|
"width": 1280, "height": 1024
|
||||||
|
},
|
||||||
|
"localization": {
|
||||||
|
"languages": ["zh-HK", "en-US", "en"], "timezone": "Asia/Hong_Kong",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fingerprint_json = json.dumps(fingerprint)
|
||||||
|
encoded_fingerprint = quote(fingerprint_json)
|
||||||
|
|
||||||
|
scrapeless_params = {
|
||||||
|
"token": "your token",
|
||||||
|
"sessionTTL": 1000,
|
||||||
|
"sessionName": "Demo",
|
||||||
|
"fingerprint": encoded_fingerprint,
|
||||||
|
# Sets the target country/region for the proxy, sending requests via an IP address from that region. You can specify a country code (e.g., US for the United States, GB for the United Kingdom, ANY for any country). See country codes for all supported options.
|
||||||
|
# "proxyCountry": "ANY",
|
||||||
|
# create profile on scrapeless
|
||||||
|
# "profileId": "your profileId",
|
||||||
|
# For more usage details, please refer to https://docs.scrapeless.com/en/scraping-browser/quickstart/getting-started
|
||||||
|
}
|
||||||
|
query_string = urlencode(scrapeless_params)
|
||||||
|
scrapeless_connection_url = f"wss://browser.scrapeless.com/api/v2/browser?{query_string}"
|
||||||
|
async with AsyncWebCrawler(
|
||||||
|
config=BrowserConfig(
|
||||||
|
headless=False,
|
||||||
|
browser_mode="cdp",
|
||||||
|
cdp_url=scrapeless_connection_url,
|
||||||
|
)
|
||||||
|
) as crawler:
|
||||||
|
result = await crawler.arun(
|
||||||
|
url="https://www.scrapeless.com/en",
|
||||||
|
config=CrawlerRunConfig(
|
||||||
|
wait_for="css:.content",
|
||||||
|
scan_full_page=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
print("-" * 20)
|
||||||
|
print(f'Status Code: {result.status_code}')
|
||||||
|
print("-" * 20)
|
||||||
|
print(f'Title: {result.metadata["title"]}')
|
||||||
|
print(f'Description: {result.metadata["description"]}')
|
||||||
|
print("-" * 20)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
|
|
||||||
@@ -1,16 +1,31 @@
|
|||||||
"""
|
"""
|
||||||
Test the complete fix for both the filter serialization and JSON serialization issues.
|
Test the complete fix for both the filter serialization and JSON serialization issues.
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
|
import traceback
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from crawl4ai import BrowserConfig, CacheMode, CrawlerRunConfig
|
from crawl4ai import BrowserConfig, CacheMode, CrawlerRunConfig
|
||||||
from crawl4ai.deep_crawling import BFSDeepCrawlStrategy, FilterChain, URLPatternFilter
|
from crawl4ai.deep_crawling import (
|
||||||
|
BFSDeepCrawlStrategy,
|
||||||
|
ContentRelevanceFilter,
|
||||||
|
FilterChain,
|
||||||
|
URLFilter,
|
||||||
|
URLPatternFilter,
|
||||||
|
)
|
||||||
|
|
||||||
BASE_URL = "http://localhost:11234/" # Adjust port as needed
|
CRAWL4AI_DOCKER_PORT = os.environ.get("CRAWL4AI_DOCKER_PORT", "11234")
|
||||||
|
try:
|
||||||
|
BASE_PORT = int(CRAWL4AI_DOCKER_PORT)
|
||||||
|
except TypeError:
|
||||||
|
BASE_PORT = 11234
|
||||||
|
BASE_URL = f"http://localhost:{BASE_PORT}/" # Adjust port as needed
|
||||||
|
|
||||||
async def test_with_docker_client():
|
|
||||||
|
async def test_with_docker_client(filter_chain: list[URLFilter], max_pages: int = 20, timeout: int = 30) -> bool:
|
||||||
"""Test using the Docker client (same as 1419.py)."""
|
"""Test using the Docker client (same as 1419.py)."""
|
||||||
from crawl4ai.docker_client import Crawl4aiDockerClient
|
from crawl4ai.docker_client import Crawl4aiDockerClient
|
||||||
|
|
||||||
@@ -24,19 +39,10 @@ async def test_with_docker_client():
|
|||||||
verbose=True,
|
verbose=True,
|
||||||
) as client:
|
) as client:
|
||||||
|
|
||||||
# Create filter chain - testing the serialization fix
|
|
||||||
filter_chain = [
|
|
||||||
URLPatternFilter(
|
|
||||||
# patterns=["*about*", "*privacy*", "*terms*"],
|
|
||||||
patterns=["*advanced*"],
|
|
||||||
reverse=True
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
crawler_config = CrawlerRunConfig(
|
crawler_config = CrawlerRunConfig(
|
||||||
deep_crawl_strategy=BFSDeepCrawlStrategy(
|
deep_crawl_strategy=BFSDeepCrawlStrategy(
|
||||||
max_depth=2, # Keep it shallow for testing
|
max_depth=2, # Keep it shallow for testing
|
||||||
# max_pages=5, # Limit pages for testing
|
max_pages=max_pages, # Limit pages for testing
|
||||||
filter_chain=FilterChain(filter_chain)
|
filter_chain=FilterChain(filter_chain)
|
||||||
),
|
),
|
||||||
cache_mode=CacheMode.BYPASS,
|
cache_mode=CacheMode.BYPASS,
|
||||||
@@ -47,6 +53,7 @@ async def test_with_docker_client():
|
|||||||
["https://docs.crawl4ai.com"], # Simple test page
|
["https://docs.crawl4ai.com"], # Simple test page
|
||||||
browser_config=BrowserConfig(headless=True),
|
browser_config=BrowserConfig(headless=True),
|
||||||
crawler_config=crawler_config,
|
crawler_config=crawler_config,
|
||||||
|
hooks_timeout=timeout,
|
||||||
)
|
)
|
||||||
|
|
||||||
if results:
|
if results:
|
||||||
@@ -74,12 +81,11 @@ async def test_with_docker_client():
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ Docker client test failed: {e}")
|
print(f"❌ Docker client test failed: {e}")
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def test_with_rest_api():
|
async def test_with_rest_api(filters: list[dict[str, Any]], max_pages: int = 20, timeout: int = 30) -> bool:
|
||||||
"""Test using REST API directly."""
|
"""Test using REST API directly."""
|
||||||
print("\n" + "=" * 60)
|
print("\n" + "=" * 60)
|
||||||
print("Testing with REST API")
|
print("Testing with REST API")
|
||||||
@@ -90,19 +96,11 @@ async def test_with_rest_api():
|
|||||||
"type": "BFSDeepCrawlStrategy",
|
"type": "BFSDeepCrawlStrategy",
|
||||||
"params": {
|
"params": {
|
||||||
"max_depth": 2,
|
"max_depth": 2,
|
||||||
# "max_pages": 5,
|
"max_pages": max_pages,
|
||||||
"filter_chain": {
|
"filter_chain": {
|
||||||
"type": "FilterChain",
|
"type": "FilterChain",
|
||||||
"params": {
|
"params": {
|
||||||
"filters": [
|
"filters": filters
|
||||||
{
|
|
||||||
"type": "URLPatternFilter",
|
|
||||||
"params": {
|
|
||||||
"patterns": ["*advanced*"],
|
|
||||||
"reverse": True
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -126,7 +124,7 @@ async def test_with_rest_api():
|
|||||||
response = await client.post(
|
response = await client.post(
|
||||||
f"{BASE_URL}crawl",
|
f"{BASE_URL}crawl",
|
||||||
json=crawl_payload,
|
json=crawl_payload,
|
||||||
timeout=30
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
@@ -150,7 +148,6 @@ async def test_with_rest_api():
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ REST API test failed: {e}")
|
print(f"❌ REST API test failed: {e}")
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -165,12 +162,62 @@ async def main():
|
|||||||
results = []
|
results = []
|
||||||
|
|
||||||
# Test 1: Docker client
|
# Test 1: Docker client
|
||||||
docker_passed = await test_with_docker_client()
|
max_pages_ = [20, 5]
|
||||||
results.append(("Docker Client", docker_passed))
|
timeouts = [30, 60]
|
||||||
|
filter_chain_test_cases = [
|
||||||
|
[
|
||||||
|
URLPatternFilter(
|
||||||
|
# patterns=["*about*", "*privacy*", "*terms*"],
|
||||||
|
patterns=["*advanced*"],
|
||||||
|
reverse=True
|
||||||
|
),
|
||||||
|
],
|
||||||
|
[
|
||||||
|
ContentRelevanceFilter(
|
||||||
|
query="about faq",
|
||||||
|
threshold=0.2,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
]
|
||||||
|
for idx, (filter_chain, max_pages, timeout) in enumerate(zip(filter_chain_test_cases, max_pages_, timeouts)):
|
||||||
|
docker_passed = await test_with_docker_client(filter_chain=filter_chain, max_pages=max_pages, timeout=timeout)
|
||||||
|
results.append((f"Docker Client w/ filter chain {idx}", docker_passed))
|
||||||
|
|
||||||
# Test 2: REST API
|
# Test 2: REST API
|
||||||
rest_passed = await test_with_rest_api()
|
max_pages_ = [20, 5, 5]
|
||||||
results.append(("REST API", rest_passed))
|
timeouts = [30, 60, 60]
|
||||||
|
filters_test_cases = [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"type": "URLPatternFilter",
|
||||||
|
"params": {
|
||||||
|
"patterns": ["*advanced*"],
|
||||||
|
"reverse": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"type": "ContentRelevanceFilter",
|
||||||
|
"params": {
|
||||||
|
"query": "about faq",
|
||||||
|
"threshold": 0.2,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"type": "ContentRelevanceFilter",
|
||||||
|
"params": {
|
||||||
|
"query": ["about", "faq"],
|
||||||
|
"threshold": 0.2,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
]
|
||||||
|
for idx, (filters, max_pages, timeout) in enumerate(zip(filters_test_cases, max_pages_, timeouts)):
|
||||||
|
rest_passed = await test_with_rest_api(filters=filters, max_pages=max_pages, timeout=timeout)
|
||||||
|
results.append((f"REST API w/ filters {idx}", rest_passed))
|
||||||
|
|
||||||
# Summary
|
# Summary
|
||||||
print("\n" + "=" * 60)
|
print("\n" + "=" * 60)
|
||||||
@@ -186,10 +233,7 @@ async def main():
|
|||||||
|
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
if all_passed:
|
if all_passed:
|
||||||
print("🎉 ALL TESTS PASSED! Both issues are fully resolved!")
|
print("🎉 ALL TESTS PASSED!")
|
||||||
print("\nThe fixes:")
|
|
||||||
print("1. Filter serialization: Fixed by not serializing private __slots__")
|
|
||||||
print("2. JSON serialization: Fixed by removing property descriptors from model_dump()")
|
|
||||||
else:
|
else:
|
||||||
print("⚠️ Some tests failed. Please check the server logs for details.")
|
print("⚠️ Some tests failed. Please check the server logs for details.")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user