From 57aeb70f00cddaf55b407ed3274dc240416b4079 Mon Sep 17 00:00:00 2001 From: CapSolver Date: Thu, 6 Nov 2025 15:37:31 +0800 Subject: [PATCH 1/3] Add CapSolver Captcha Solver --- README.md | 2 + .../solve_aws_waf.py | 62 +++++++++++++++ .../solve_cloudflare_challenge.py | 60 +++++++++++++++ .../solve_cloudflare_turnstile.py | 64 ++++++++++++++++ .../solve_recaptcha_v2.py | 67 +++++++++++++++++ .../solve_recaptcha_v3.py | 75 +++++++++++++++++++ .../solve_aws_waf.py | 36 +++++++++ .../solve_cloudflare_challenge.py | 36 +++++++++ .../solve_cloudflare_turnstile.py | 36 +++++++++ .../solve_recaptcha_v2.py | 36 +++++++++ .../solve_recaptcha_v3.py | 36 +++++++++ docs/md_v2/core/examples.md | 13 ++-- 12 files changed, 517 insertions(+), 6 deletions(-) create mode 100644 docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_aws_waf.py create mode 100644 docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_cloudflare_challenge.py create mode 100644 docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_cloudflare_turnstile.py create mode 100644 docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_recaptcha_v2.py create mode 100644 docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_recaptcha_v3.py create mode 100644 docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_aws_waf.py create mode 100644 docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_cloudflare_challenge.py create mode 100644 docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_cloudflare_turnstile.py create mode 100644 docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_recaptcha_v2.py create mode 100644 docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_recaptcha_v3.py diff --git a/README.md b/README.md index d9a68482..68a46f5b 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,8 @@ if __name__ == "__main__": asyncio.run(main()) ``` +If you encounter captcha during workflow, integrate CapSolver, see the [integration guide](https://www.capsolver.com/blog/Partners/crawl4ai-capsolver/?utm_source=crawl4ai&utm_medium=github_pr&utm_campaign=crawl4ai_integration), including reCAPTCHA v2, v3, Cloudflare Turnstile, Challenge, AWS WAF and more. + 3. Or use the new command-line interface: ```bash # Basic crawl with markdown output diff --git a/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_aws_waf.py b/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_aws_waf.py new file mode 100644 index 00000000..351c048d --- /dev/null +++ b/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_aws_waf.py @@ -0,0 +1,62 @@ +import asyncio +import capsolver +from crawl4ai import * + + +# TODO: set your config +# Docs: https://docs.capsolver.com/guide/captcha/awsWaf/ +api_key = "CAP-xxxxxxxxxxxxxxxxxxxxx" # your api key of capsolver +site_url = "https://nft.porsche.com/onboarding@6" # page url of your target site +cookie_domain = ".nft.porsche.com" # the domain name to which you want to apply the cookie +captcha_type = "AntiAwsWafTaskProxyLess" # type of your target captcha +capsolver.api_key = api_key + + +async def main(): + browser_config = BrowserConfig( + verbose=True, + headless=False, + use_persistent_context=True, + ) + + async with AsyncWebCrawler(config=browser_config) as crawler: + await crawler.arun( + url=site_url, + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test" + ) + + # get aws waf cookie using capsolver sdk + solution = capsolver.solve({ + "type": captcha_type, + "websiteURL": site_url, + }) + cookie = solution["cookie"] + print("aws waf cookie:", cookie) + + js_code = """ + document.cookie = \'aws-waf-token=""" + cookie + """;domain=""" + cookie_domain + """;path=/\'; + location.reload(); + """ + + wait_condition = """() => { + return document.title === \'Join Porsche’s journey into Web3\'; + }""" + + run_config = CrawlerRunConfig( + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test", + js_code=js_code, + js_only=True, + wait_for=f"js:{wait_condition}" + ) + + result_next = await crawler.arun( + url=site_url, + config=run_config, + ) + print(result_next.markdown) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_cloudflare_challenge.py b/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_cloudflare_challenge.py new file mode 100644 index 00000000..39ef3e7e --- /dev/null +++ b/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_cloudflare_challenge.py @@ -0,0 +1,60 @@ +import asyncio +import capsolver +from crawl4ai import * + + +# TODO: set your config +# Docs: https://docs.capsolver.com/guide/captcha/cloudflare_challenge/ +api_key = "CAP-xxxxxxxxxxxxxxxxxxxxx" # your api key of capsolver +site_url = "https://gitlab.com/users/sign_in" # page url of your target site +captcha_type = "AntiCloudflareTask" # type of your target captcha +# your http proxy to solve cloudflare challenge +proxy_server = "proxy.example.com:8080" +proxy_username = "myuser" +proxy_password = "mypass" +capsolver.api_key = api_key + + +async def main(): + # get challenge cookie using capsolver sdk + solution = capsolver.solve({ + "type": captcha_type, + "websiteURL": site_url, + "proxy": f"{proxy_server}:{proxy_username}:{proxy_password}", + }) + cookies = solution["cookies"] + user_agent = solution["userAgent"] + print("challenge cookies:", cookies) + + cookies_list = [] + for name, value in cookies.items(): + cookies_list.append({ + "name": name, + "value": value, + "url": site_url, + }) + + browser_config = BrowserConfig( + verbose=True, + headless=False, + use_persistent_context=True, + user_agent=user_agent, + cookies=cookies_list, + proxy_config={ + "server": f"http://{proxy_server}", + "username": proxy_username, + "password": proxy_password, + }, + ) + + async with AsyncWebCrawler(config=browser_config) as crawler: + result = await crawler.arun( + url=site_url, + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test" + ) + print(result.markdown) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_cloudflare_turnstile.py b/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_cloudflare_turnstile.py new file mode 100644 index 00000000..b1603067 --- /dev/null +++ b/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_cloudflare_turnstile.py @@ -0,0 +1,64 @@ +import asyncio +import capsolver +from crawl4ai import * + + +# TODO: set your config +# Docs: https://docs.capsolver.com/guide/captcha/cloudflare_turnstile/ +api_key = "CAP-xxxxxxxxxxxxxxxxxxxxx" # your api key of capsolver +site_key = "0x4AAAAAAAGlwMzq_9z6S9Mh" # site key of your target site +site_url = "https://clifford.io/demo/cloudflare-turnstile" # page url of your target site +captcha_type = "AntiTurnstileTaskProxyLess" # type of your target captcha +capsolver.api_key = api_key + + +async def main(): + browser_config = BrowserConfig( + verbose=True, + headless=False, + use_persistent_context=True, + ) + + async with AsyncWebCrawler(config=browser_config) as crawler: + await crawler.arun( + url=site_url, + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test" + ) + + # get turnstile token using capsolver sdk + solution = capsolver.solve({ + "type": captcha_type, + "websiteURL": site_url, + "websiteKey": site_key, + }) + token = solution["token"] + print("turnstile token:", token) + + js_code = """ + document.querySelector(\'input[name="cf-turnstile-response"]\').value = \'"""+token+"""\'; + document.querySelector(\'button[type="submit"]\').click(); + """ + + wait_condition = """() => { + const items = document.querySelectorAll(\'h1\'); + return items.length === 0; + }""" + + run_config = CrawlerRunConfig( + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test", + js_code=js_code, + js_only=True, + wait_for=f"js:{wait_condition}" + ) + + result_next = await crawler.arun( + url=site_url, + config=run_config, + ) + print(result_next.markdown) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_recaptcha_v2.py b/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_recaptcha_v2.py new file mode 100644 index 00000000..c9302c4a --- /dev/null +++ b/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_recaptcha_v2.py @@ -0,0 +1,67 @@ +import asyncio +import capsolver +from crawl4ai import * + + +# TODO: set your config +# Docs: https://docs.capsolver.com/guide/captcha/ReCaptchaV2/ +api_key = "CAP-xxxxxxxxxxxxxxxxxxxxx" # your api key of capsolver +site_key = "6LfW6wATAAAAAHLqO2pb8bDBahxlMxNdo9g947u9" # site key of your target site +site_url = "https://recaptcha-demo.appspot.com/recaptcha-v2-checkbox.php" # page url of your target site +captcha_type = "ReCaptchaV2TaskProxyLess" # type of your target captcha +capsolver.api_key = api_key + + +async def main(): + browser_config = BrowserConfig( + verbose=True, + headless=False, + use_persistent_context=True, + ) + + async with AsyncWebCrawler(config=browser_config) as crawler: + await crawler.arun( + url=site_url, + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test" + ) + + # get recaptcha token using capsolver sdk + solution = capsolver.solve({ + "type": captcha_type, + "websiteURL": site_url, + "websiteKey": site_key, + }) + token = solution["gRecaptchaResponse"] + print("recaptcha token:", token) + + js_code = """ + const textarea = document.getElementById(\'g-recaptcha-response\'); + if (textarea) { + textarea.value = \"""" + token + """\"; + document.querySelector(\'button.form-field[type="submit"]\').click(); + } + """ + + wait_condition = """() => { + const items = document.querySelectorAll(\'h2\'); + return items.length > 1; + }""" + + run_config = CrawlerRunConfig( + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test", + js_code=js_code, + js_only=True, + wait_for=f"js:{wait_condition}" + ) + + result_next = await crawler.arun( + url=site_url, + config=run_config, + ) + print(result_next.markdown) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_recaptcha_v3.py b/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_recaptcha_v3.py new file mode 100644 index 00000000..401f0c81 --- /dev/null +++ b/docs/examples/capsolver_captcha_solver/capsolver_api_integration/solve_recaptcha_v3.py @@ -0,0 +1,75 @@ +import asyncio +import capsolver +from crawl4ai import * + + +# TODO: set your config +# Docs: https://docs.capsolver.com/guide/captcha/ReCaptchaV3/ +api_key = "CAP-xxxxxxxxxxxxxxxxxxxxx" # your api key of capsolver +site_key = "6LdKlZEpAAAAAAOQjzC2v_d36tWxCl6dWsozdSy9" # site key of your target site +site_url = "https://recaptcha-demo.appspot.com/recaptcha-v3-request-scores.php" # page url of your target site +page_action = "examples/v3scores" # page action of your target site +captcha_type = "ReCaptchaV3TaskProxyLess" # type of your target captcha +capsolver.api_key = api_key + + +async def main(): + browser_config = BrowserConfig( + verbose=True, + headless=False, + use_persistent_context=True, + ) + + # get recaptcha token using capsolver sdk + solution = capsolver.solve({ + "type": captcha_type, + "websiteURL": site_url, + "websiteKey": site_key, + "pageAction": page_action, + }) + token = solution["gRecaptchaResponse"] + print("recaptcha token:", token) + + async with AsyncWebCrawler(config=browser_config) as crawler: + await crawler.arun( + url=site_url, + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test" + ) + + js_code = """ + const originalFetch = window.fetch; + + window.fetch = function(...args) { + if (typeof args[0] === 'string' && args[0].includes('/recaptcha-v3-verify.php')) { + const url = new URL(args[0], window.location.origin); + url.searchParams.set('action', '""" + token + """'); + args[0] = url.toString(); + document.querySelector('.token').innerHTML = "fetch('/recaptcha-v3-verify.php?action=examples/v3scores&token=""" + token + """')"; + console.log('Fetch URL hooked:', args[0]); + } + return originalFetch.apply(this, args); + }; + """ + + wait_condition = """() => { + return document.querySelector('.step3:not(.hidden)'); + }""" + + run_config = CrawlerRunConfig( + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test", + js_code=js_code, + js_only=True, + wait_for=f"js:{wait_condition}" + ) + + result_next = await crawler.arun( + url=site_url, + config=run_config, + ) + print(result_next.markdown) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_aws_waf.py b/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_aws_waf.py new file mode 100644 index 00000000..d1238469 --- /dev/null +++ b/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_aws_waf.py @@ -0,0 +1,36 @@ +import time +import asyncio +from crawl4ai import * + + +# TODO: the user data directory that includes the capsolver extension +user_data_dir = "/browser-profile/Default1" + +""" +The capsolver extension supports more features, such as: + - Telling the extension when to start solving captcha. + - Calling functions to check whether the captcha has been solved, etc. +Reference blog: https://docs.capsolver.com/guide/automation-tool-integration/ +""" + +browser_config = BrowserConfig( + verbose=True, + headless=False, + user_data_dir=user_data_dir, + use_persistent_context=True, +) + +async def main(): + async with AsyncWebCrawler(config=browser_config) as crawler: + result_initial = await crawler.arun( + url="https://nft.porsche.com/onboarding@6", + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test" + ) + + # do something later + time.sleep(300) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_cloudflare_challenge.py b/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_cloudflare_challenge.py new file mode 100644 index 00000000..3f0e967b --- /dev/null +++ b/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_cloudflare_challenge.py @@ -0,0 +1,36 @@ +import time +import asyncio +from crawl4ai import * + + +# TODO: the user data directory that includes the capsolver extension +user_data_dir = "/browser-profile/Default1" + +""" +The capsolver extension supports more features, such as: + - Telling the extension when to start solving captcha. + - Calling functions to check whether the captcha has been solved, etc. +Reference blog: https://docs.capsolver.com/guide/automation-tool-integration/ +""" + +browser_config = BrowserConfig( + verbose=True, + headless=False, + user_data_dir=user_data_dir, + use_persistent_context=True, +) + +async def main(): + async with AsyncWebCrawler(config=browser_config) as crawler: + result_initial = await crawler.arun( + url="https://gitlab.com/users/sign_in", + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test" + ) + + # do something later + time.sleep(300) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_cloudflare_turnstile.py b/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_cloudflare_turnstile.py new file mode 100644 index 00000000..ca074f53 --- /dev/null +++ b/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_cloudflare_turnstile.py @@ -0,0 +1,36 @@ +import time +import asyncio +from crawl4ai import * + + +# TODO: the user data directory that includes the capsolver extension +user_data_dir = "/browser-profile/Default1" + +""" +The capsolver extension supports more features, such as: + - Telling the extension when to start solving captcha. + - Calling functions to check whether the captcha has been solved, etc. +Reference blog: https://docs.capsolver.com/guide/automation-tool-integration/ +""" + +browser_config = BrowserConfig( + verbose=True, + headless=False, + user_data_dir=user_data_dir, + use_persistent_context=True, +) + +async def main(): + async with AsyncWebCrawler(config=browser_config) as crawler: + result_initial = await crawler.arun( + url="https://clifford.io/demo/cloudflare-turnstile", + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test" + ) + + # do something later + time.sleep(300) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_recaptcha_v2.py b/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_recaptcha_v2.py new file mode 100644 index 00000000..bdcd0f94 --- /dev/null +++ b/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_recaptcha_v2.py @@ -0,0 +1,36 @@ +import time +import asyncio +from crawl4ai import * + + +# TODO: the user data directory that includes the capsolver extension +user_data_dir = "/browser-profile/Default1" + +""" +The capsolver extension supports more features, such as: + - Telling the extension when to start solving captcha. + - Calling functions to check whether the captcha has been solved, etc. +Reference blog: https://docs.capsolver.com/guide/automation-tool-integration/ +""" + +browser_config = BrowserConfig( + verbose=True, + headless=False, + user_data_dir=user_data_dir, + use_persistent_context=True, +) + +async def main(): + async with AsyncWebCrawler(config=browser_config) as crawler: + result_initial = await crawler.arun( + url="https://recaptcha-demo.appspot.com/recaptcha-v2-checkbox.php", + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test" + ) + + # do something later + time.sleep(300) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_recaptcha_v3.py b/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_recaptcha_v3.py new file mode 100644 index 00000000..899b83ba --- /dev/null +++ b/docs/examples/capsolver_captcha_solver/capsolver_extension_integration/solve_recaptcha_v3.py @@ -0,0 +1,36 @@ +import time +import asyncio +from crawl4ai import * + + +# TODO: the user data directory that includes the capsolver extension +user_data_dir = "/browser-profile/Default1" + +""" +The capsolver extension supports more features, such as: + - Telling the extension when to start solving captcha. + - Calling functions to check whether the captcha has been solved, etc. +Reference blog: https://docs.capsolver.com/guide/automation-tool-integration/ +""" + +browser_config = BrowserConfig( + verbose=True, + headless=False, + user_data_dir=user_data_dir, + use_persistent_context=True, +) + +async def main(): + async with AsyncWebCrawler(config=browser_config) as crawler: + result_initial = await crawler.arun( + url="https://recaptcha-demo.appspot.com/recaptcha-v3-request-scores.php", + cache_mode=CacheMode.BYPASS, + session_id="session_captcha_test" + ) + + # do something later + time.sleep(300) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/docs/md_v2/core/examples.md b/docs/md_v2/core/examples.md index b1c52013..db11773b 100644 --- a/docs/md_v2/core/examples.md +++ b/docs/md_v2/core/examples.md @@ -56,13 +56,14 @@ This page provides a comprehensive list of example scripts that demonstrate vari ## Anti-Bot & Stealth Features -| Example | Description | Link | -|---------|-------------|------| -| Stealth Mode Quick Start | Five practical examples showing how to use stealth mode for bypassing basic bot detection. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/stealth_mode_quick_start.py) | +| Example | Description | Link | +|----------------------------|-------------|------| +| Stealth Mode Quick Start | Five practical examples showing how to use stealth mode for bypassing basic bot detection. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/stealth_mode_quick_start.py) | | Stealth Mode Comprehensive | Comprehensive demonstration of stealth mode features with bot detection testing and comparisons. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/stealth_mode_example.py) | -| Undetected Browser | Simple example showing how to use the undetected browser adapter. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/hello_world_undetected.py) | -| Undetected Browser Demo | Basic demo comparing regular and undetected browser modes. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/undetected_simple_demo.py) | -| Undetected Tests | Advanced tests comparing regular vs undetected browsers on various bot detection services. | [View Folder](https://github.com/unclecode/crawl4ai/tree/main/docs/examples/undetectability/) | +| Undetected Browser | Simple example showing how to use the undetected browser adapter. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/hello_world_undetected.py) | +| Undetected Browser Demo | Basic demo comparing regular and undetected browser modes. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/undetected_simple_demo.py) | +| Undetected Tests | Advanced tests comparing regular vs undetected browsers on various bot detection services. | [View Folder](https://github.com/unclecode/crawl4ai/tree/main/docs/examples/undetectability/) | +| CapSolver Captcha Solver | Seamlessly integrate with [CapSolver](https://www.capsolver.com/?utm_source=crawl4ai&utm_medium=github_pr&utm_campaign=crawl4ai_integration) to automatically solve reCAPTCHA v2/v3, Cloudflare Turnstile / Challenges, AWS WAF and more for uninterrupted scraping and automation. | [View Folder](https://github.com/unclecode/crawl4ai/tree/main/docs/examples/capsolver_captcha_solver/) | ## Customization & Security From 2ae9899eac131ee6847de42853def569e69b335b Mon Sep 17 00:00:00 2001 From: CapSolver <111706386+capsolver@users.noreply.github.com> Date: Thu, 6 Nov 2025 15:49:30 +0800 Subject: [PATCH 2/3] Clarify CapSolver integration instructions Updated text for clarity and capitalization. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 68a46f5b..7fd36ef2 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ if __name__ == "__main__": asyncio.run(main()) ``` -If you encounter captcha during workflow, integrate CapSolver, see the [integration guide](https://www.capsolver.com/blog/Partners/crawl4ai-capsolver/?utm_source=crawl4ai&utm_medium=github_pr&utm_campaign=crawl4ai_integration), including reCAPTCHA v2, v3, Cloudflare Turnstile, Challenge, AWS WAF and more. +If you encounter Captcha during your workflow, follow the [integration guide](https://www.capsolver.com/blog/Partners/crawl4ai-capsolver/?utm_source=crawl4ai&utm_medium=github_pr&utm_campaign=crawl4ai_integration) to integrate CapSolver. It supports reCAPTCHA v2/v3, Cloudflare Turnstile, Challenge, AWS WAF, and more. 3. Or use the new command-line interface: ```bash From 4bee230c373045778727acaa9dfb3d5f791f653e Mon Sep 17 00:00:00 2001 From: Aravind Karnam Date: Mon, 10 Nov 2025 11:20:48 +0530 Subject: [PATCH 3/3] docs: Add a tip for captcha solving usecases using a third party integration --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7fd36ef2..d3b35c51 100644 --- a/README.md +++ b/README.md @@ -95,8 +95,6 @@ if __name__ == "__main__": asyncio.run(main()) ``` -If you encounter Captcha during your workflow, follow the [integration guide](https://www.capsolver.com/blog/Partners/crawl4ai-capsolver/?utm_source=crawl4ai&utm_medium=github_pr&utm_campaign=crawl4ai_integration) to integrate CapSolver. It supports reCAPTCHA v2/v3, Cloudflare Turnstile, Challenge, AWS WAF, and more. - 3. Or use the new command-line interface: ```bash # Basic crawl with markdown output @@ -546,6 +544,10 @@ async def test_news_crawl(): +--- + +> **💡 Tip:** Some websites may use **CAPTCHA** based verification mechanisms to prevent automated access. If your workflow encounters such challenges, you may optionally integrate a third-party CAPTCHA-handling service such as [CapSolver](https://www.capsolver.com/blog/Partners/crawl4ai-capsolver/?utm_source=crawl4ai&utm_medium=github_pr&utm_campaign=crawl4ai_integration). They support reCAPTCHA v2/v3, Cloudflare Turnstile, Challenge, AWS WAF, and more. Please ensure that your usage complies with the target website’s terms of service and applicable laws. + ## ✨ Recent Updates