feat(crawler): add session management and view-source support
Add session_id feature to allow reusing browser pages across multiple crawls. Add support for view-source: protocol in URL handling. Fix browser config reference and string formatting issues. Update examples to demonstrate new session management features. BREAKING CHANGE: Browser page handling now persists when using session_id
This commit is contained in:
@@ -441,7 +441,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
status_code = 200 # Default for local/raw HTML
|
||||
screenshot_data = None
|
||||
|
||||
if url.startswith(("http://", "https://")):
|
||||
if url.startswith(("http://", "https://", "view-source:")):
|
||||
return await self._crawl_web(url, config)
|
||||
|
||||
elif url.startswith("file://"):
|
||||
@@ -784,7 +784,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
||||
except Error:
|
||||
visibility_info = await self.check_visibility(page)
|
||||
|
||||
if self.config.verbose:
|
||||
if self.browser_config.config.verbose:
|
||||
self.logger.debug(
|
||||
message="Body visibility info: {info}",
|
||||
tag="DEBUG",
|
||||
|
||||
Reference in New Issue
Block a user