fix: Revert changes to session management in AsyncHttpWebcrawler and solve the underlying issue by removing the session closure in finally block of session context.
This commit is contained in:
@@ -1706,6 +1706,15 @@ class AsyncHTTPCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||
await self.close()
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def _session_context(self):
|
||||
try:
|
||||
if not self._session:
|
||||
await self.start()
|
||||
yield self._session
|
||||
finally:
|
||||
pass
|
||||
|
||||
def set_hook(self, hook_type: str, hook_func: Callable) -> None:
|
||||
if hook_type in self.hooks:
|
||||
self.hooks[hook_type] = partial(self._execute_hook, hook_type, hook_func)
|
||||
@@ -1782,9 +1791,7 @@ class AsyncHTTPCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
url: str,
|
||||
config: CrawlerRunConfig
|
||||
) -> AsyncCrawlResponse:
|
||||
if not self._session or self._session.closed:
|
||||
await self.start()
|
||||
|
||||
async with self._session_context() as session:
|
||||
timeout = ClientTimeout(
|
||||
total=config.page_timeout or self.DEFAULT_TIMEOUT,
|
||||
connect=10,
|
||||
@@ -1811,7 +1818,7 @@ class AsyncHTTPCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
await self.hooks['before_request'](url, request_kwargs)
|
||||
|
||||
try:
|
||||
async with self._session.request(self.browser_config.method, url, **request_kwargs) as response:
|
||||
async with session.request(self.browser_config.method, url, **request_kwargs) as response:
|
||||
content = memoryview(await response.read())
|
||||
|
||||
if not (200 <= response.status < 300):
|
||||
|
||||
Reference in New Issue
Block a user