feat(browser): add geolocation, locale and timezone support
Add support for controlling browser geolocation, locale and timezone settings: - New GeolocationConfig class for managing GPS coordinates - Add locale and timezone_id parameters to CrawlerRunConfig - Update browser context creation to handle location settings - Add example script for geolocation usage - Update documentation with location-based identity features This enables more precise control over browser identity and location reporting.
This commit is contained in:
70
docs/examples/use_geo_location.py
Normal file
70
docs/examples/use_geo_location.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# use_geo_location.py
|
||||
"""
|
||||
Example: override locale, timezone, and geolocation using Crawl4ai patterns.
|
||||
|
||||
This demo uses `AsyncWebCrawler.arun()` to fetch a page with
|
||||
browser context primed for specific locale, timezone, and GPS,
|
||||
and saves a screenshot for visual verification.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from crawl4ai import (
|
||||
AsyncWebCrawler,
|
||||
CrawlerRunConfig,
|
||||
BrowserConfig,
|
||||
GeolocationConfig,
|
||||
CrawlResult,
|
||||
)
|
||||
|
||||
async def demo_geo_override():
|
||||
"""Demo: Crawl a geolocation-test page with overrides and screenshot."""
|
||||
print("\n=== Geo-Override Crawl ===")
|
||||
|
||||
# 1) Browser setup: use Playwright-managed contexts
|
||||
browser_cfg = BrowserConfig(
|
||||
headless=False,
|
||||
viewport_width=1280,
|
||||
viewport_height=720,
|
||||
use_managed_browser=False,
|
||||
)
|
||||
|
||||
# 2) Run config: include locale, timezone_id, geolocation, and screenshot
|
||||
run_cfg = CrawlerRunConfig(
|
||||
url="https://browserleaks.com/geo", # test page that shows your location
|
||||
locale="en-US", # Accept-Language & UI locale
|
||||
timezone_id="America/Los_Angeles", # JS Date()/Intl timezone
|
||||
geolocation=GeolocationConfig( # override GPS coords
|
||||
latitude=34.0522,
|
||||
longitude=-118.2437,
|
||||
accuracy=10.0,
|
||||
),
|
||||
screenshot=True, # capture screenshot after load
|
||||
session_id="geo_test", # reuse context if rerunning
|
||||
delay_before_return_html=5
|
||||
)
|
||||
|
||||
async with AsyncWebCrawler(config=browser_cfg) as crawler:
|
||||
# 3) Run crawl (returns list even for single URL)
|
||||
results: List[CrawlResult] = await crawler.arun(
|
||||
url=run_cfg.url,
|
||||
config=run_cfg,
|
||||
)
|
||||
result = results[0]
|
||||
|
||||
# 4) Save screenshot and report path
|
||||
if result.screenshot:
|
||||
__current_dir = Path(__file__).parent
|
||||
out_dir = __current_dir / "tmp"
|
||||
out_dir.mkdir(exist_ok=True)
|
||||
shot_path = out_dir / "geo_test.png"
|
||||
with open(shot_path, "wb") as f:
|
||||
f.write(base64.b64decode(result.screenshot))
|
||||
print(f"Saved screenshot to {shot_path}")
|
||||
else:
|
||||
print("No screenshot captured, check configuration.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(demo_geo_override())
|
||||
Reference in New Issue
Block a user