From ece9202b610c4977eb90c34a25ef6c7a557b6e0c Mon Sep 17 00:00:00 2001 From: UncleCode Date: Thu, 16 Jan 2025 21:58:52 +0800 Subject: [PATCH] fix(dispatcher): adjust memory threshold and fix dispatcher initialization - Increase memory threshold from 70% to 90% for better resource utilization - Remove incorrect self parameter from MemoryAdaptiveDispatcher initialization These changes improve the crawler's performance by allowing more memory usage before throttling and fix a bug in dispatcher initialization. --- crawl4ai/async_dispatcher.py | 2 +- crawl4ai/async_webcrawler.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/crawl4ai/async_dispatcher.py b/crawl4ai/async_dispatcher.py index b796a92b..64578bf6 100644 --- a/crawl4ai/async_dispatcher.py +++ b/crawl4ai/async_dispatcher.py @@ -317,7 +317,7 @@ class BaseDispatcher(ABC): class MemoryAdaptiveDispatcher(BaseDispatcher): def __init__( self, - memory_threshold_percent: float = 70.0, + memory_threshold_percent: float = 90.0, check_interval: float = 1.0, max_session_permit: int = 20, memory_wait_timeout: float = 300.0, # 5 minutes default timeout diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py index 6b919c11..929fa924 100644 --- a/crawl4ai/async_webcrawler.py +++ b/crawl4ai/async_webcrawler.py @@ -772,7 +772,6 @@ class AsyncWebCrawler: # Create default dispatcher if none provided if dispatcher is None: dispatcher = MemoryAdaptiveDispatcher( - self, rate_limiter=RateLimiter( base_delay=(1.0, 3.0), max_delay=60.0, max_retries=3 ),