Renames browser_config param to config in AsyncWebCrawler

Standardizes parameter naming convention across the codebase by renaming browser_config to the more concise config in AsyncWebCrawler constructor. Updates all documentation examples and internal usages to reflect the new parameter name for consistency. Also improves hook execution by adding url/response parameters to goto hooks and fixes parameter ordering in before_return_html hook.
2024-12-26 16:34:36 +08:00
parent 9a4ed6bbd7
commit f2d9912697
26 changed files with 177 additions and 62 deletions
--- a/.local/issues_todo.md
+++ b/.local/issues_todo.md
@@ -0,0 +1 @@
+Docker: https://github.com/unclecode/crawl4ai/issues/367
--- a/.local/llm.txt/13_hooks_auth.md
+++ b/.local/llm.txt/13_hooks_auth.md
@@ -82,7 +82,7 @@ async def main():
    )

    # Initialize crawler
-    async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+    async with AsyncWebCrawler(config=browser_config) as crawler:
        crawler.crawler_strategy.set_hook("on_browser_created", on_browser_created)
        crawler.crawler_strategy.set_hook("on_page_context_created", on_page_context_created)
        crawler.crawler_strategy.set_hook("before_goto", before_goto)
--- a/.local/llm.txt/1_introduction.ex.md
+++ b/.local/llm.txt/1_introduction.ex.md
@@ -125,7 +125,7 @@ run_config = CrawlerRunConfig(

 ## 4. Basic Crawling & Simple Extraction
 ```python
-async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun("https://news.example.com/article", config=run_config)
    print(result.markdown) # Basic markdown content
 ```
@@ -375,7 +375,7 @@ async def on_page_context_created_hook(context, page, **kwargs):
    await context.route("**/*.{png,jpg,jpeg}", lambda route: route.abort())
    print("[HOOK] Image requests blocked")

-async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+async with AsyncWebCrawler(config=browser_config) as crawler:
    crawler.crawler_strategy.set_hook("on_page_context_created", on_page_context_created_hook)
    result = await crawler.arun("https://imageheavy.example.com", config=run_config)
    print("Crawl finished with images blocked.")
--- a/.local/llm.txt/3_async_webcrawler.ex.md
+++ b/.local/llm.txt/3_async_webcrawler.ex.md
@@ -19,7 +19,7 @@ async def main():
    browser_config = BrowserConfig(browser_type="chromium", headless=True)
    
    # Run the crawler asynchronously
-    async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+    async with AsyncWebCrawler(config=browser_config) as crawler:
        result = await crawler.arun("https://example.com")
        print("Extracted Markdown:")
        print(result.markdown)
@@ -52,7 +52,7 @@ browser_config = BrowserConfig(
    verbose=True
 )

-async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun("https://yourwebsite.com")
    print(result.markdown)
 ```
--- a/.local/llm.txt/3_async_webcrawler.xs.md
+++ b/.local/llm.txt/3_async_webcrawler.xs.md
@@ -10,7 +10,7 @@ from crawl4ai import AsyncWebCrawler, BrowserConfig
 import asyncio

 async def main():
-    async with AsyncWebCrawler(browser_config=BrowserConfig(browser_type="chromium", headless=True)) as c:
+    async with AsyncWebCrawler(config=BrowserConfig(browser_type="chromium", headless=True)) as c:
        r = await c.arun("https://example.com")
        print(r.markdown)

@@ -21,7 +21,7 @@ asyncio.run(main())
 **Params:** `browser_type`, `headless`, `viewport_width`, `viewport_height`, `verbose`, `proxy`.
 ```python
 browser_config = BrowserConfig(browser_type="firefox", headless=False)
-async with AsyncWebCrawler(browser_config=browser_config) as c:
+async with AsyncWebCrawler(config=browser_config) as c:
    r = await c.arun("https://site.com")
 ```

--- a/.local/llm.txt/4_browser_context_page.ex.md
+++ b/.local/llm.txt/4_browser_context_page.ex.md
@@ -37,7 +37,7 @@ Standard browser creation initializes a browser instance with default or minimal
 from crawl4ai import AsyncWebCrawler, BrowserConfig

 browser_config = BrowserConfig(browser_type="chromium", headless=True)
-async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun("https://crawl4ai.com")
    print(result.markdown)
 ```
@@ -57,7 +57,7 @@ Persistent contexts create browser sessions with stored data, enabling workflows
 ##### Example: Setting Up Persistent Contexts
 ```python
 config = BrowserConfig(user_data_dir="/path/to/user/data")
-async with AsyncWebCrawler(browser_config=config) as crawler:
+async with AsyncWebCrawler(config=config) as crawler:
    result = await crawler.arun("https://crawl4ai.com")
    print(result.markdown)
 ```
@@ -88,7 +88,7 @@ The `ManagedBrowser` class offers a high-level abstraction for managing browser
 from crawl4ai import AsyncWebCrawler, BrowserConfig

 config = BrowserConfig(headless=False, debug_port=9222)
-async with AsyncWebCrawler(browser_config=config) as crawler:
+async with AsyncWebCrawler(config=config) as crawler:
    result = await crawler.arun("https://crawl4ai.com")
    print(result.markdown)
 ```
@@ -192,7 +192,7 @@ I'll help create a section about using command-line Chrome with a user data dire
       user_data_dir="/path/to/ChromeProfiles/CrawlProfile"  # Use the same directory from step 1
   )
   
-   async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+   async with AsyncWebCrawler(config=browser_config) as crawler:
       result = await crawler.arun("https://example.com")
   ```

--- a/.local/llm.txt/4_browser_context_page.sm.md
+++ b/.local/llm.txt/4_browser_context_page.sm.md
@@ -20,21 +20,21 @@
 from crawl4ai import AsyncWebCrawler, BrowserConfig

 cfg = BrowserConfig(browser_type="chromium", headless=True)
-async with AsyncWebCrawler(browser_config=cfg) as c:
+async with AsyncWebCrawler(config=cfg) as c:
    r = await c.arun("https://example.com")
 ```

 ### Persistent Contexts
 ```python
 cfg = BrowserConfig(user_data_dir="/path/to/data")
-async with AsyncWebCrawler(browser_config=cfg) as c:
+async with AsyncWebCrawler(config=cfg) as c:
    r = await c.arun("https://example.com")
 ```

 ### Managed Browser
 ```python
 cfg = BrowserConfig(headless=False, debug_port=9222, use_managed_browser=True)
-async with AsyncWebCrawler(browser_config=cfg) as c:
+async with AsyncWebCrawler(config=cfg) as c:
    r = await c.arun("https://example.com")
 ```

@@ -80,7 +80,7 @@ cfg = BrowserConfig(
    use_managed_browser=True,
    user_data_dir="/path/to/Profile"
 )
-async with AsyncWebCrawler(browser_config=cfg) as c:
+async with AsyncWebCrawler(config=cfg) as c:
    r = await c.arun("https://example.com")
 ```

@@ -96,7 +96,7 @@ cfg = BrowserConfig(
 )
 crawl_cfg = CrawlerRunConfig(extraction_strategy=JsonCssExtractionStrategy(schema))

-async with AsyncWebCrawler(browser_config=cfg) as c:
+async with AsyncWebCrawler(config=cfg) as c:
    r = await c.arun("https://example.com", config=crawl_cfg)
 ```

--- a/.local/ttt/13_hooks_auth.md
+++ b/.local/ttt/13_hooks_auth.md
@@ -82,7 +82,7 @@ async def main():
    )

    # Initialize crawler
-    async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+    async with AsyncWebCrawler(config=browser_config) as crawler:
        crawler.crawler_strategy.set_hook("on_browser_created", on_browser_created)
        crawler.crawler_strategy.set_hook("on_page_context_created", on_page_context_created)
        crawler.crawler_strategy.set_hook("before_goto", before_goto)
--- a/.local/ttt/3_async_webcrawler.ex.md
+++ b/.local/ttt/3_async_webcrawler.ex.md
@@ -19,7 +19,7 @@ async def main():
    browser_config = BrowserConfig(browser_type="chromium", headless=True)
    
    # Run the crawler asynchronously
-    async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+    async with AsyncWebCrawler(config=browser_config) as crawler:
        result = await crawler.arun("https://example.com")
        print("Extracted Markdown:")
        print(result.markdown)
@@ -52,7 +52,7 @@ browser_config = BrowserConfig(
    verbose=True
 )

-async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun("https://yourwebsite.com")
    print(result.markdown)
 ```
--- a/.local/ttt/4_browser_context_page.ex.md
+++ b/.local/ttt/4_browser_context_page.ex.md
@@ -37,7 +37,7 @@ Standard browser creation initializes a browser instance with default or minimal
 from crawl4ai import AsyncWebCrawler, BrowserConfig

 browser_config = BrowserConfig(browser_type="chromium", headless=True)
-async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun("https://crawl4ai.com")
    print(result.markdown)
 ```
@@ -57,7 +57,7 @@ Persistent contexts create browser sessions with stored data, enabling workflows
 ##### Example: Setting Up Persistent Contexts
 ```python
 config = BrowserConfig(user_data_dir="/path/to/user/data")
-async with AsyncWebCrawler(browser_config=config) as crawler:
+async with AsyncWebCrawler(config=config) as crawler:
    result = await crawler.arun("https://crawl4ai.com")
    print(result.markdown)
 ```
@@ -88,7 +88,7 @@ The `ManagedBrowser` class offers a high-level abstraction for managing browser
 from crawl4ai import AsyncWebCrawler, BrowserConfig

 config = BrowserConfig(headless=False, debug_port=9222)
-async with AsyncWebCrawler(browser_config=config) as crawler:
+async with AsyncWebCrawler(config=config) as crawler:
    result = await crawler.arun("https://crawl4ai.com")
    print(result.markdown)
 ```
@@ -192,7 +192,7 @@ I'll help create a section about using command-line Chrome with a user data dire
       user_data_dir="/path/to/ChromeProfiles/CrawlProfile"  # Use the same directory from step 1
   )
   
-   async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+   async with AsyncWebCrawler(config=browser_config) as crawler:
       result = await crawler.arun("https://example.com")
   ```

--- a/.local/ttt/context.md
+++ b/.local/ttt/context.md
@@ -568,7 +568,7 @@ async def main():
    )

    # Initialize crawler
-    async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+    async with AsyncWebCrawler(config=browser_config) as crawler:
        crawler.crawler_strategy.set_hook("on_browser_created", on_browser_created)
        crawler.crawler_strategy.set_hook("on_page_context_created", on_page_context_created)
        crawler.crawler_strategy.set_hook("before_goto", before_goto)
@@ -1627,7 +1627,7 @@ async def main():
    browser_config = BrowserConfig(browser_type="chromium", headless=True)
    
    # Run the crawler asynchronously
-    async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+    async with AsyncWebCrawler(config=browser_config) as crawler:
        result = await crawler.arun("https://example.com")
        print("Extracted Markdown:")
        print(result.markdown)
@@ -1660,7 +1660,7 @@ browser_config = BrowserConfig(
    verbose=True
 )

-async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun("https://yourwebsite.com")
    print(result.markdown)
 ```
@@ -1927,7 +1927,7 @@ Standard browser creation initializes a browser instance with default or minimal
 from crawl4ai import AsyncWebCrawler, BrowserConfig

 browser_config = BrowserConfig(browser_type="chromium", headless=True)
-async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+async with AsyncWebCrawler(config=browser_config) as crawler:
    result = await crawler.arun("https://crawl4ai.com")
    print(result.markdown)
 ```
@@ -1947,7 +1947,7 @@ Persistent contexts create browser sessions with stored data, enabling workflows
 ##### Example: Setting Up Persistent Contexts
 ```python
 config = BrowserConfig(user_data_dir="/path/to/user/data")
-async with AsyncWebCrawler(browser_config=config) as crawler:
+async with AsyncWebCrawler(config=config) as crawler:
    result = await crawler.arun("https://crawl4ai.com")
    print(result.markdown)
 ```
@@ -1978,7 +1978,7 @@ The `ManagedBrowser` class offers a high-level abstraction for managing browser
 from crawl4ai import AsyncWebCrawler, BrowserConfig

 config = BrowserConfig(headless=False, debug_port=9222)
-async with AsyncWebCrawler(browser_config=config) as crawler:
+async with AsyncWebCrawler(config=config) as crawler:
    result = await crawler.arun("https://crawl4ai.com")
    print(result.markdown)
 ```
@@ -2082,7 +2082,7 @@ I'll help create a section about using command-line Chrome with a user data dire
       user_data_dir="/path/to/ChromeProfiles/CrawlProfile"  # Use the same directory from step 1
   )
   
-   async with AsyncWebCrawler(browser_config=browser_config) as crawler:
+   async with AsyncWebCrawler(config=browser_config) as crawler:
       result = await crawler.arun("https://example.com")
   ```
				`@@ -0,0 +1 @@`
				`Docker: https://github.com/unclecode/crawl4ai/issues/367`