
| Helps engineers and buyers find, compare, and source electronic & industrial parts in seconds, with specs, pricing, lead times & alternatives.| 🥇 Gold |
| KidoCode
| Kidocode is a hybrid technology and entrepreneurship school for kids aged 5–18, offering both online and on-campus education. | 🥇 Gold | diff --git a/docs/examples/cloud_browser/scrapeless_browser.py b/docs/examples/cloud_browser/scrapeless_browser.py new file mode 100644 index 00000000..4981c813 --- /dev/null +++ b/docs/examples/cloud_browser/scrapeless_browser.py @@ -0,0 +1,61 @@ +import json +import asyncio +from urllib.parse import quote, urlencode +from crawl4ai import CrawlerRunConfig, BrowserConfig, AsyncWebCrawler + +# Scrapeless provides a free anti-detection fingerprint browser client and cloud browsers: +# https://www.scrapeless.com/en/blog/scrapeless-nstbrowser-strategic-integration + +async def main(): + # customize browser fingerprint + fingerprint = { + "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.1.2.3 Safari/537.36", + "platform": "Windows", + "screen": { + "width": 1280, "height": 1024 + }, + "localization": { + "languages": ["zh-HK", "en-US", "en"], "timezone": "Asia/Hong_Kong", + } + } + + fingerprint_json = json.dumps(fingerprint) + encoded_fingerprint = quote(fingerprint_json) + + scrapeless_params = { + "token": "your token", + "sessionTTL": 1000, + "sessionName": "Demo", + "fingerprint": encoded_fingerprint, + # Sets the target country/region for the proxy, sending requests via an IP address from that region. You can specify a country code (e.g., US for the United States, GB for the United Kingdom, ANY for any country). See country codes for all supported options. + # "proxyCountry": "ANY", + # create profile on scrapeless + # "profileId": "your profileId", + # For more usage details, please refer to https://docs.scrapeless.com/en/scraping-browser/quickstart/getting-started + } + query_string = urlencode(scrapeless_params) + scrapeless_connection_url = f"wss://browser.scrapeless.com/api/v2/browser?{query_string}" + async with AsyncWebCrawler( + config=BrowserConfig( + headless=False, + browser_mode="cdp", + cdp_url=scrapeless_connection_url, + ) + ) as crawler: + result = await crawler.arun( + url="https://www.scrapeless.com/en", + config=CrawlerRunConfig( + wait_for="css:.content", + scan_full_page=True, + ), + ) + print("-" * 20) + print(f'Status Code: {result.status_code}') + print("-" * 20) + print(f'Title: {result.metadata["title"]}') + print(f'Description: {result.metadata["description"]}') + print("-" * 20) + +if __name__ == "__main__": + asyncio.run(main()) + \ No newline at end of file