Sponsors/new (#1637)

2025-11-24 17:59:33 +05:30
parent f68e7531e3
commit 0024c82cdc
2 changed files with 62 additions and 1 deletions
--- a/docs/examples/cloud_browser/scrapeless_browser.py
+++ b/docs/examples/cloud_browser/scrapeless_browser.py
@@ -0,0 +1,61 @@
+import json
+import asyncio
+from urllib.parse import quote, urlencode
+from crawl4ai import CrawlerRunConfig, BrowserConfig, AsyncWebCrawler
+
+# Scrapeless provides a free anti-detection fingerprint browser client and cloud browsers:
+# https://www.scrapeless.com/en/blog/scrapeless-nstbrowser-strategic-integration
+
+async def main():
+    # customize browser fingerprint
+    fingerprint = {
+        "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.1.2.3 Safari/537.36",
+        "platform": "Windows",
+        "screen": {
+            "width": 1280, "height": 1024
+        },
+        "localization": {
+            "languages": ["zh-HK", "en-US", "en"], "timezone": "Asia/Hong_Kong",
+        }
+    }
+
+    fingerprint_json = json.dumps(fingerprint)
+    encoded_fingerprint = quote(fingerprint_json)
+
+    scrapeless_params = {
+        "token": "your token",
+        "sessionTTL": 1000,
+        "sessionName": "Demo",
+        "fingerprint": encoded_fingerprint,
+        # Sets the target country/region for the proxy, sending requests via an IP address from that region. You can specify a country code (e.g., US for the United States, GB for the United Kingdom, ANY for any country). See country codes for all supported options.
+        # "proxyCountry": "ANY",
+        # create profile on scrapeless
+        # "profileId": "your profileId",
+        # For more usage details, please refer to https://docs.scrapeless.com/en/scraping-browser/quickstart/getting-started
+    }
+    query_string = urlencode(scrapeless_params)
+    scrapeless_connection_url = f"wss://browser.scrapeless.com/api/v2/browser?{query_string}"
+    async with AsyncWebCrawler(
+        config=BrowserConfig(
+            headless=False,
+            browser_mode="cdp",
+            cdp_url=scrapeless_connection_url,
+        )
+    ) as crawler:
+        result = await crawler.arun(
+            url="https://www.scrapeless.com/en",
+            config=CrawlerRunConfig(
+                wait_for="css:.content",
+                scan_full_page=True,
+            ),
+        )
+        print("-" * 20)
+        print(f'Status Code: {result.status_code}')
+        print("-" * 20)
+        print(f'Title: {result.metadata["title"]}')
+        print(f'Description: {result.metadata["description"]}')
+        print("-" * 20)
+
+if __name__ == "__main__":
+    asyncio.run(main())
+