fix: prevent session closing after each request to maintain connection pool. Fixes: https://github.com/unclecode/crawl4ai/issues/867
This commit is contained in:
@@ -1702,15 +1702,6 @@ class AsyncHTTPCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||||
await self.close()
|
await self.close()
|
||||||
|
|
||||||
@contextlib.asynccontextmanager
|
|
||||||
async def _session_context(self):
|
|
||||||
try:
|
|
||||||
if not self._session:
|
|
||||||
await self.start()
|
|
||||||
yield self._session
|
|
||||||
finally:
|
|
||||||
await self.close()
|
|
||||||
|
|
||||||
def set_hook(self, hook_type: str, hook_func: Callable) -> None:
|
def set_hook(self, hook_type: str, hook_func: Callable) -> None:
|
||||||
if hook_type in self.hooks:
|
if hook_type in self.hooks:
|
||||||
self.hooks[hook_type] = partial(self._execute_hook, hook_type, hook_func)
|
self.hooks[hook_type] = partial(self._execute_hook, hook_type, hook_func)
|
||||||
@@ -1787,7 +1778,9 @@ class AsyncHTTPCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
url: str,
|
url: str,
|
||||||
config: CrawlerRunConfig
|
config: CrawlerRunConfig
|
||||||
) -> AsyncCrawlResponse:
|
) -> AsyncCrawlResponse:
|
||||||
async with self._session_context() as session:
|
if not self._session or self._session.closed:
|
||||||
|
await self.start()
|
||||||
|
|
||||||
timeout = ClientTimeout(
|
timeout = ClientTimeout(
|
||||||
total=config.page_timeout or self.DEFAULT_TIMEOUT,
|
total=config.page_timeout or self.DEFAULT_TIMEOUT,
|
||||||
connect=10,
|
connect=10,
|
||||||
@@ -1814,7 +1807,7 @@ class AsyncHTTPCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
await self.hooks['before_request'](url, request_kwargs)
|
await self.hooks['before_request'](url, request_kwargs)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async with session.request(self.browser_config.method, url, **request_kwargs) as response:
|
async with self._session.request(self.browser_config.method, url, **request_kwargs) as response:
|
||||||
content = memoryview(await response.read())
|
content = memoryview(await response.read())
|
||||||
|
|
||||||
if not (200 <= response.status < 300):
|
if not (200 <= response.status < 300):
|
||||||
|
|||||||
Reference in New Issue
Block a user