fix: prevent session closing after each request to maintain connection pool. Fixes: https://github.com/unclecode/crawl4ai/issues/867

This commit is contained in:
Aravind Karnam
2025-03-25 13:46:55 +05:30
parent 2f0e217751
commit e3111d0a32

View File

@@ -1702,15 +1702,6 @@ class AsyncHTTPCrawlerStrategy(AsyncCrawlerStrategy):
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
await self.close() await self.close()
@contextlib.asynccontextmanager
async def _session_context(self):
try:
if not self._session:
await self.start()
yield self._session
finally:
await self.close()
def set_hook(self, hook_type: str, hook_func: Callable) -> None: def set_hook(self, hook_type: str, hook_func: Callable) -> None:
if hook_type in self.hooks: if hook_type in self.hooks:
self.hooks[hook_type] = partial(self._execute_hook, hook_type, hook_func) self.hooks[hook_type] = partial(self._execute_hook, hook_type, hook_func)
@@ -1787,7 +1778,9 @@ class AsyncHTTPCrawlerStrategy(AsyncCrawlerStrategy):
url: str, url: str,
config: CrawlerRunConfig config: CrawlerRunConfig
) -> AsyncCrawlResponse: ) -> AsyncCrawlResponse:
async with self._session_context() as session: if not self._session or self._session.closed:
await self.start()
timeout = ClientTimeout( timeout = ClientTimeout(
total=config.page_timeout or self.DEFAULT_TIMEOUT, total=config.page_timeout or self.DEFAULT_TIMEOUT,
connect=10, connect=10,
@@ -1814,7 +1807,7 @@ class AsyncHTTPCrawlerStrategy(AsyncCrawlerStrategy):
await self.hooks['before_request'](url, request_kwargs) await self.hooks['before_request'](url, request_kwargs)
try: try:
async with session.request(self.browser_config.method, url, **request_kwargs) as response: async with self._session.request(self.browser_config.method, url, **request_kwargs) as response:
content = memoryview(await response.read()) content = memoryview(await response.read())
if not (200 <= response.status < 300): if not (200 <= response.status < 300):