Renames browser_config param to config in AsyncWebCrawler

Standardizes parameter naming convention across the codebase by renaming browser_config to the more concise config in AsyncWebCrawler constructor.

Updates all documentation examples and internal usages to reflect the new parameter name for consistency.

Also improves hook execution by adding url/response parameters to goto hooks and fixes parameter ordering in before_return_html hook.
This commit is contained in:
UncleCode
2024-12-26 16:34:36 +08:00
parent 9a4ed6bbd7
commit f2d9912697
26 changed files with 177 additions and 62 deletions

View File

@@ -928,7 +928,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
# Handle page navigation and content loading
if not config.js_only:
await self.execute_hook("before_goto", page, context=context)
await self.execute_hook("before_goto", page, context=context, url=url)
try:
response = await page.goto(
@@ -937,7 +937,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
except Error as e:
raise RuntimeError(f"Failed on navigating ACS-GOTO:\n{str(e)}")
await self.execute_hook("after_goto", page, context=context)
await self.execute_hook("after_goto", page, context=context, url=url, response=response)
if response is None:
status_code = 200
@@ -1102,7 +1102,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
# Get final HTML content
html = await page.content()
await self.execute_hook("before_return_html", page, html, context=context)
await self.execute_hook("before_return_html", page = page, html = html, context=context)
# Handle PDF and screenshot generation
start_export_time = time.perf_counter()