fix(dispatcher): adjust memory threshold and fix dispatcher initialization

- Increase memory threshold from 70% to 90% for better resource utilization
- Remove incorrect self parameter from MemoryAdaptiveDispatcher initialization

These changes improve the crawler's performance by allowing more memory usage before throttling and fix a bug in dispatcher initialization.
This commit is contained in:
UncleCode
2025-01-16 21:58:52 +08:00
parent 9d694da939
commit ece9202b61
2 changed files with 1 additions and 2 deletions

View File

@@ -317,7 +317,7 @@ class BaseDispatcher(ABC):
class MemoryAdaptiveDispatcher(BaseDispatcher):
def __init__(
self,
memory_threshold_percent: float = 70.0,
memory_threshold_percent: float = 90.0,
check_interval: float = 1.0,
max_session_permit: int = 20,
memory_wait_timeout: float = 300.0, # 5 minutes default timeout

View File

@@ -772,7 +772,6 @@ class AsyncWebCrawler:
# Create default dispatcher if none provided
if dispatcher is None:
dispatcher = MemoryAdaptiveDispatcher(
self,
rate_limiter=RateLimiter(
base_delay=(1.0, 3.0), max_delay=60.0, max_retries=3
),