From 0024c82cdcbd7c2d9e9e93ec40c8fec2563eff8f Mon Sep 17 00:00:00 2001 From: Aravind Date: Mon, 24 Nov 2025 17:59:33 +0530 Subject: [PATCH] Sponsors/new (#1637) --- README.md | 2 +- .../cloud_browser/scrapeless_browser.py | 61 +++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 docs/examples/cloud_browser/scrapeless_browser.py diff --git a/README.md b/README.md index 79161a8a..09178cb9 100644 --- a/README.md +++ b/README.md @@ -1034,7 +1034,7 @@ Our enterprise sponsors and technology partners help scale Crawl4AI to power pro | Company | About | Sponsorship Tier | |------|------|----------------------------| -| Scrapeless | Scrapeless is the best full-stack web scraping toolkit offering Scraping API, Scraping Browser, Web Unlocker, Captcha Solver, and Proxies, designed to handle all your data collection needs. | 🥈 Silver | +| Scrapeless | Scrapeless provides production-grade infrastructure for Crawling, Automation, and AI Agents, offering Scraping Browser, 4 Proxy Types and Universal Scraping API. | 🥈 Silver | | Capsolver | AI-powered Captcha solving service. Supports all major Captcha types, including reCAPTCHA, Cloudflare, and more | 🥉 Bronze | | DataSync | Helps engineers and buyers find, compare, and source electronic & industrial parts in seconds, with specs, pricing, lead times & alternatives.| 🥇 Gold | | Kidocode

KidoCode

| Kidocode is a hybrid technology and entrepreneurship school for kids aged 5–18, offering both online and on-campus education. | 🥇 Gold | diff --git a/docs/examples/cloud_browser/scrapeless_browser.py b/docs/examples/cloud_browser/scrapeless_browser.py new file mode 100644 index 00000000..4981c813 --- /dev/null +++ b/docs/examples/cloud_browser/scrapeless_browser.py @@ -0,0 +1,61 @@ +import json +import asyncio +from urllib.parse import quote, urlencode +from crawl4ai import CrawlerRunConfig, BrowserConfig, AsyncWebCrawler + +# Scrapeless provides a free anti-detection fingerprint browser client and cloud browsers: +# https://www.scrapeless.com/en/blog/scrapeless-nstbrowser-strategic-integration + +async def main(): + # customize browser fingerprint + fingerprint = { + "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.1.2.3 Safari/537.36", + "platform": "Windows", + "screen": { + "width": 1280, "height": 1024 + }, + "localization": { + "languages": ["zh-HK", "en-US", "en"], "timezone": "Asia/Hong_Kong", + } + } + + fingerprint_json = json.dumps(fingerprint) + encoded_fingerprint = quote(fingerprint_json) + + scrapeless_params = { + "token": "your token", + "sessionTTL": 1000, + "sessionName": "Demo", + "fingerprint": encoded_fingerprint, + # Sets the target country/region for the proxy, sending requests via an IP address from that region. You can specify a country code (e.g., US for the United States, GB for the United Kingdom, ANY for any country). See country codes for all supported options. + # "proxyCountry": "ANY", + # create profile on scrapeless + # "profileId": "your profileId", + # For more usage details, please refer to https://docs.scrapeless.com/en/scraping-browser/quickstart/getting-started + } + query_string = urlencode(scrapeless_params) + scrapeless_connection_url = f"wss://browser.scrapeless.com/api/v2/browser?{query_string}" + async with AsyncWebCrawler( + config=BrowserConfig( + headless=False, + browser_mode="cdp", + cdp_url=scrapeless_connection_url, + ) + ) as crawler: + result = await crawler.arun( + url="https://www.scrapeless.com/en", + config=CrawlerRunConfig( + wait_for="css:.content", + scan_full_page=True, + ), + ) + print("-" * 20) + print(f'Status Code: {result.status_code}') + print("-" * 20) + print(f'Title: {result.metadata["title"]}') + print(f'Description: {result.metadata["description"]}') + print("-" * 20) + +if __name__ == "__main__": + asyncio.run(main()) + \ No newline at end of file