fix(dispatcher): adjust memory threshold and fix dispatcher initialization
- Increase memory threshold from 70% to 90% for better resource utilization - Remove incorrect self parameter from MemoryAdaptiveDispatcher initialization These changes improve the crawler's performance by allowing more memory usage before throttling and fix a bug in dispatcher initialization.
This commit is contained in:
@@ -317,7 +317,7 @@ class BaseDispatcher(ABC):
|
|||||||
class MemoryAdaptiveDispatcher(BaseDispatcher):
|
class MemoryAdaptiveDispatcher(BaseDispatcher):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
memory_threshold_percent: float = 70.0,
|
memory_threshold_percent: float = 90.0,
|
||||||
check_interval: float = 1.0,
|
check_interval: float = 1.0,
|
||||||
max_session_permit: int = 20,
|
max_session_permit: int = 20,
|
||||||
memory_wait_timeout: float = 300.0, # 5 minutes default timeout
|
memory_wait_timeout: float = 300.0, # 5 minutes default timeout
|
||||||
|
|||||||
@@ -772,7 +772,6 @@ class AsyncWebCrawler:
|
|||||||
# Create default dispatcher if none provided
|
# Create default dispatcher if none provided
|
||||||
if dispatcher is None:
|
if dispatcher is None:
|
||||||
dispatcher = MemoryAdaptiveDispatcher(
|
dispatcher = MemoryAdaptiveDispatcher(
|
||||||
self,
|
|
||||||
rate_limiter=RateLimiter(
|
rate_limiter=RateLimiter(
|
||||||
base_delay=(1.0, 3.0), max_delay=60.0, max_retries=3
|
base_delay=(1.0, 3.0), max_delay=60.0, max_retries=3
|
||||||
),
|
),
|
||||||
|
|||||||
Reference in New Issue
Block a user