fix(dispatcher): adjust memory threshold and fix dispatcher initialization
- Increase memory threshold from 70% to 90% for better resource utilization - Remove incorrect self parameter from MemoryAdaptiveDispatcher initialization These changes improve the crawler's performance by allowing more memory usage before throttling and fix a bug in dispatcher initialization.
This commit is contained in:
@@ -317,7 +317,7 @@ class BaseDispatcher(ABC):
|
||||
class MemoryAdaptiveDispatcher(BaseDispatcher):
|
||||
def __init__(
|
||||
self,
|
||||
memory_threshold_percent: float = 70.0,
|
||||
memory_threshold_percent: float = 90.0,
|
||||
check_interval: float = 1.0,
|
||||
max_session_permit: int = 20,
|
||||
memory_wait_timeout: float = 300.0, # 5 minutes default timeout
|
||||
|
||||
@@ -772,7 +772,6 @@ class AsyncWebCrawler:
|
||||
# Create default dispatcher if none provided
|
||||
if dispatcher is None:
|
||||
dispatcher = MemoryAdaptiveDispatcher(
|
||||
self,
|
||||
rate_limiter=RateLimiter(
|
||||
base_delay=(1.0, 3.0), max_delay=60.0, max_retries=3
|
||||
),
|
||||
|
||||
Reference in New Issue
Block a user