import json import asyncio from urllib.parse import quote, urlencode from crawl4ai import CrawlerRunConfig, BrowserConfig, AsyncWebCrawler # Scrapeless provides a free anti-detection fingerprint browser client and cloud browsers: # https://www.scrapeless.com/en/blog/scrapeless-nstbrowser-strategic-integration async def main(): # customize browser fingerprint fingerprint = { "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.1.2.3 Safari/537.36", "platform": "Windows", "screen": { "width": 1280, "height": 1024 }, "localization": { "languages": ["zh-HK", "en-US", "en"], "timezone": "Asia/Hong_Kong", } } fingerprint_json = json.dumps(fingerprint) encoded_fingerprint = quote(fingerprint_json) scrapeless_params = { "token": "your token", "sessionTTL": 1000, "sessionName": "Demo", "fingerprint": encoded_fingerprint, # Sets the target country/region for the proxy, sending requests via an IP address from that region. You can specify a country code (e.g., US for the United States, GB for the United Kingdom, ANY for any country). See country codes for all supported options. # "proxyCountry": "ANY", # create profile on scrapeless # "profileId": "your profileId", # For more usage details, please refer to https://docs.scrapeless.com/en/scraping-browser/quickstart/getting-started } query_string = urlencode(scrapeless_params) scrapeless_connection_url = f"wss://browser.scrapeless.com/api/v2/browser?{query_string}" async with AsyncWebCrawler( config=BrowserConfig( headless=False, browser_mode="cdp", cdp_url=scrapeless_connection_url, ) ) as crawler: result = await crawler.arun( url="https://www.scrapeless.com/en", config=CrawlerRunConfig( wait_for="css:.content", scan_full_page=True, ), ) print("-" * 20) print(f'Status Code: {result.status_code}') print("-" * 20) print(f'Title: {result.metadata["title"]}') print(f'Description: {result.metadata["description"]}') print("-" * 20) if __name__ == "__main__": asyncio.run(main())