Renames browser_config param to config in AsyncWebCrawler
Standardizes parameter naming convention across the codebase by renaming browser_config to the more concise config in AsyncWebCrawler constructor. Updates all documentation examples and internal usages to reflect the new parameter name for consistency. Also improves hook execution by adding url/response parameters to goto hooks and fixes parameter ordering in before_return_html hook.
This commit is contained in:
@@ -82,7 +82,7 @@ async def main():
|
||||
)
|
||||
|
||||
# Initialize crawler
|
||||
async with AsyncWebCrawler(browser_config=browser_config) as crawler:
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
crawler.crawler_strategy.set_hook("on_browser_created", on_browser_created)
|
||||
crawler.crawler_strategy.set_hook("on_page_context_created", on_page_context_created)
|
||||
crawler.crawler_strategy.set_hook("before_goto", before_goto)
|
||||
|
||||
@@ -125,7 +125,7 @@ run_config = CrawlerRunConfig(
|
||||
|
||||
## 4. Basic Crawling & Simple Extraction
|
||||
```python
|
||||
async with AsyncWebCrawler(browser_config=browser_config) as crawler:
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
result = await crawler.arun("https://news.example.com/article", config=run_config)
|
||||
print(result.markdown) # Basic markdown content
|
||||
```
|
||||
@@ -375,7 +375,7 @@ async def on_page_context_created_hook(context, page, **kwargs):
|
||||
await context.route("**/*.{png,jpg,jpeg}", lambda route: route.abort())
|
||||
print("[HOOK] Image requests blocked")
|
||||
|
||||
async with AsyncWebCrawler(browser_config=browser_config) as crawler:
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
crawler.crawler_strategy.set_hook("on_page_context_created", on_page_context_created_hook)
|
||||
result = await crawler.arun("https://imageheavy.example.com", config=run_config)
|
||||
print("Crawl finished with images blocked.")
|
||||
|
||||
@@ -19,7 +19,7 @@ async def main():
|
||||
browser_config = BrowserConfig(browser_type="chromium", headless=True)
|
||||
|
||||
# Run the crawler asynchronously
|
||||
async with AsyncWebCrawler(browser_config=browser_config) as crawler:
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
result = await crawler.arun("https://example.com")
|
||||
print("Extracted Markdown:")
|
||||
print(result.markdown)
|
||||
@@ -52,7 +52,7 @@ browser_config = BrowserConfig(
|
||||
verbose=True
|
||||
)
|
||||
|
||||
async with AsyncWebCrawler(browser_config=browser_config) as crawler:
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
result = await crawler.arun("https://yourwebsite.com")
|
||||
print(result.markdown)
|
||||
```
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
quick_start: Basic async crawl setup requires BrowserConfig and AsyncWebCrawler initialization | getting started, basic usage, initialization | asyncio.run(AsyncWebCrawler(browser_config=BrowserConfig(browser_type="chromium", headless=True)))
|
||||
quick_start: Basic async crawl setup requires BrowserConfig and AsyncWebCrawler initialization | getting started, basic usage, initialization | asyncio.run(AsyncWebCrawler(config=BrowserConfig(browser_type="chromium", headless=True)))
|
||||
browser_types: AsyncWebCrawler supports multiple browser types including Chromium and Firefox | supported browsers, browser options | BrowserConfig(browser_type="chromium")
|
||||
headless_mode: Browser can run in headless mode without UI for better performance | invisible browser, no GUI | BrowserConfig(headless=True)
|
||||
viewport_settings: Configure browser viewport dimensions for proper page rendering | screen size, window size | BrowserConfig(viewport_width=1920, viewport_height=1080)
|
||||
|
||||
@@ -10,7 +10,7 @@ from crawl4ai import AsyncWebCrawler, BrowserConfig
|
||||
import asyncio
|
||||
|
||||
async def main():
|
||||
async with AsyncWebCrawler(browser_config=BrowserConfig(browser_type="chromium", headless=True)) as c:
|
||||
async with AsyncWebCrawler(config=BrowserConfig(browser_type="chromium", headless=True)) as c:
|
||||
r = await c.arun("https://example.com")
|
||||
print(r.markdown)
|
||||
|
||||
@@ -21,7 +21,7 @@ asyncio.run(main())
|
||||
**Params:** `browser_type`, `headless`, `viewport_width`, `viewport_height`, `verbose`, `proxy`.
|
||||
```python
|
||||
browser_config = BrowserConfig(browser_type="firefox", headless=False)
|
||||
async with AsyncWebCrawler(browser_config=browser_config) as c:
|
||||
async with AsyncWebCrawler(config=browser_config) as c:
|
||||
r = await c.arun("https://site.com")
|
||||
```
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
setup_usage: Initialize AsyncWebCrawler with BrowserConfig for basic web crawling | crawler setup, initialization, basic usage | AsyncWebCrawler(browser_config=BrowserConfig(browser_type="chromium", headless=True))
|
||||
setup_usage: Initialize AsyncWebCrawler with BrowserConfig for basic web crawling | crawler setup, initialization, basic usage | AsyncWebCrawler(config=BrowserConfig(browser_type="chromium", headless=True))
|
||||
browser_configuration: Configure browser settings including type, headless mode, viewport, and proxy | browser setup, browser settings, viewport config | BrowserConfig(browser_type="firefox", headless=False, viewport_width=1920)
|
||||
docker_setup: Run crawler in Docker using python slim image with playwright installation | docker configuration, containerization | FROM python:3.10-slim; RUN pip install crawl4ai playwright
|
||||
crawler_strategy: Use AsyncPlaywrightCrawlerStrategy as default crawler implementation | crawler implementation, strategy pattern | AsyncWebCrawler(crawler_strategy=AsyncPlaywrightCrawlerStrategy())
|
||||
|
||||
@@ -37,7 +37,7 @@ Standard browser creation initializes a browser instance with default or minimal
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig
|
||||
|
||||
browser_config = BrowserConfig(browser_type="chromium", headless=True)
|
||||
async with AsyncWebCrawler(browser_config=browser_config) as crawler:
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
result = await crawler.arun("https://crawl4ai.com")
|
||||
print(result.markdown)
|
||||
```
|
||||
@@ -57,7 +57,7 @@ Persistent contexts create browser sessions with stored data, enabling workflows
|
||||
##### Example: Setting Up Persistent Contexts
|
||||
```python
|
||||
config = BrowserConfig(user_data_dir="/path/to/user/data")
|
||||
async with AsyncWebCrawler(browser_config=config) as crawler:
|
||||
async with AsyncWebCrawler(config=config) as crawler:
|
||||
result = await crawler.arun("https://crawl4ai.com")
|
||||
print(result.markdown)
|
||||
```
|
||||
@@ -88,7 +88,7 @@ The `ManagedBrowser` class offers a high-level abstraction for managing browser
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig
|
||||
|
||||
config = BrowserConfig(headless=False, debug_port=9222)
|
||||
async with AsyncWebCrawler(browser_config=config) as crawler:
|
||||
async with AsyncWebCrawler(config=config) as crawler:
|
||||
result = await crawler.arun("https://crawl4ai.com")
|
||||
print(result.markdown)
|
||||
```
|
||||
@@ -192,7 +192,7 @@ I'll help create a section about using command-line Chrome with a user data dire
|
||||
user_data_dir="/path/to/ChromeProfiles/CrawlProfile" # Use the same directory from step 1
|
||||
)
|
||||
|
||||
async with AsyncWebCrawler(browser_config=browser_config) as crawler:
|
||||
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||
result = await crawler.arun("https://example.com")
|
||||
```
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
browser_creation: Create standard browser instance with default configurations | browser initialization, basic setup, minimal config | AsyncWebCrawler(browser_config=BrowserConfig(browser_type="chromium", headless=True))
|
||||
browser_creation: Create standard browser instance with default configurations | browser initialization, basic setup, minimal config | AsyncWebCrawler(config=BrowserConfig(browser_type="chromium", headless=True))
|
||||
persistent_context: Use persistent browser contexts to maintain session data and cookies | user_data_dir, session storage, login state | BrowserConfig(user_data_dir="/path/to/user/data")
|
||||
managed_browser: High-level browser management with resource optimization and debugging | browser process, stealth mode, debugging tools | BrowserConfig(headless=False, debug_port=9222)
|
||||
context_config: Configure browser context with custom headers and cookies | headers customization, session reuse | CrawlerRunConfig(headers={"User-Agent": "Crawl4AI/1.0"})
|
||||
|
||||
@@ -20,21 +20,21 @@
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig
|
||||
|
||||
cfg = BrowserConfig(browser_type="chromium", headless=True)
|
||||
async with AsyncWebCrawler(browser_config=cfg) as c:
|
||||
async with AsyncWebCrawler(config=cfg) as c:
|
||||
r = await c.arun("https://example.com")
|
||||
```
|
||||
|
||||
### Persistent Contexts
|
||||
```python
|
||||
cfg = BrowserConfig(user_data_dir="/path/to/data")
|
||||
async with AsyncWebCrawler(browser_config=cfg) as c:
|
||||
async with AsyncWebCrawler(config=cfg) as c:
|
||||
r = await c.arun("https://example.com")
|
||||
```
|
||||
|
||||
### Managed Browser
|
||||
```python
|
||||
cfg = BrowserConfig(headless=False, debug_port=9222, use_managed_browser=True)
|
||||
async with AsyncWebCrawler(browser_config=cfg) as c:
|
||||
async with AsyncWebCrawler(config=cfg) as c:
|
||||
r = await c.arun("https://example.com")
|
||||
```
|
||||
|
||||
@@ -80,7 +80,7 @@ cfg = BrowserConfig(
|
||||
use_managed_browser=True,
|
||||
user_data_dir="/path/to/Profile"
|
||||
)
|
||||
async with AsyncWebCrawler(browser_config=cfg) as c:
|
||||
async with AsyncWebCrawler(config=cfg) as c:
|
||||
r = await c.arun("https://example.com")
|
||||
```
|
||||
|
||||
@@ -96,7 +96,7 @@ cfg = BrowserConfig(
|
||||
)
|
||||
crawl_cfg = CrawlerRunConfig(extraction_strategy=JsonCssExtractionStrategy(schema))
|
||||
|
||||
async with AsyncWebCrawler(browser_config=cfg) as c:
|
||||
async with AsyncWebCrawler(config=cfg) as c:
|
||||
r = await c.arun("https://example.com", config=crawl_cfg)
|
||||
```
|
||||
|
||||
|
||||
Reference in New Issue
Block a user