""" Bot Detection Test - Compare Regular vs Undetected Tests browser fingerprinting differences at bot.sannysoft.com """ import asyncio from crawl4ai import ( AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, UndetectedAdapter, CrawlResult ) from crawl4ai.async_crawler_strategy import AsyncPlaywrightCrawlerStrategy # Bot detection test site TEST_URL = "https://bot.sannysoft.com" def analyze_bot_detection(result: CrawlResult) -> dict: """Analyze bot detection results from the page""" detections = { "webdriver": False, "headless": False, "automation": False, "user_agent": False, "total_tests": 0, "failed_tests": 0 } if not result.success or not result.html: return detections # Look for specific test results in the HTML html_lower = result.html.lower() # Check for common bot indicators if "webdriver" in html_lower and ("fail" in html_lower or "true" in html_lower): detections["webdriver"] = True detections["failed_tests"] += 1 if "headless" in html_lower and ("fail" in html_lower or "true" in html_lower): detections["headless"] = True detections["failed_tests"] += 1 if "automation" in html_lower and "detected" in html_lower: detections["automation"] = True detections["failed_tests"] += 1 # Count total tests (approximate) detections["total_tests"] = html_lower.count("test") + html_lower.count("check") return detections async def test_browser_mode(adapter_name: str, adapter=None): """Test a browser mode and return results""" print(f"\n{'='*60}") print(f"Testing: {adapter_name}") print(f"{'='*60}") browser_config = BrowserConfig( headless=False, # Run in headed mode for better results verbose=True, viewport_width=1920, viewport_height=1080, ) if adapter: # Use undetected mode crawler_strategy = AsyncPlaywrightCrawlerStrategy( browser_config=browser_config, browser_adapter=adapter ) crawler = AsyncWebCrawler( crawler_strategy=crawler_strategy, config=browser_config ) else: # Use regular mode crawler = AsyncWebCrawler(config=browser_config) async with crawler: config = CrawlerRunConfig( delay_before_return_html=3.0, # Let detection scripts run wait_for_images=True, screenshot=True, simulate_user=False, # Don't simulate for accurate detection ) result = await crawler.arun(url=TEST_URL, config=config) print(f"\nāœ“ Success: {result.success}") print(f"āœ“ Status Code: {result.status_code}") if result.success: # Analyze detection results detections = analyze_bot_detection(result) print(f"\nšŸ” Bot Detection Analysis:") print(f" - WebDriver Detected: {'āŒ Yes' if detections['webdriver'] else 'āœ… No'}") print(f" - Headless Detected: {'āŒ Yes' if detections['headless'] else 'āœ… No'}") print(f" - Automation Detected: {'āŒ Yes' if detections['automation'] else 'āœ… No'}") print(f" - Failed Tests: {detections['failed_tests']}") # Show some content if result.markdown.raw_markdown: print(f"\nContent preview:") lines = result.markdown.raw_markdown.split('\n') for line in lines[:20]: # Show first 20 lines if any(keyword in line.lower() for keyword in ['test', 'pass', 'fail', 'yes', 'no']): print(f" {line.strip()}") return result, detections if result.success else {} async def main(): """Run the comparison""" print("šŸ¤– Crawl4AI - Bot Detection Test") print(f"Testing at: {TEST_URL}") print("This site runs various browser fingerprinting tests\n") # Test regular browser regular_result, regular_detections = await test_browser_mode("Regular Browser") # Small delay await asyncio.sleep(2) # Test undetected browser undetected_adapter = UndetectedAdapter() undetected_result, undetected_detections = await test_browser_mode( "Undetected Browser", undetected_adapter ) # Summary comparison print(f"\n{'='*60}") print("COMPARISON SUMMARY") print(f"{'='*60}") print(f"\n{'Test':<25} {'Regular':<15} {'Undetected':<15}") print(f"{'-'*55}") if regular_detections and undetected_detections: print(f"{'WebDriver Detection':<25} {'āŒ Detected' if regular_detections['webdriver'] else 'āœ… Passed':<15} {'āŒ Detected' if undetected_detections['webdriver'] else 'āœ… Passed':<15}") print(f"{'Headless Detection':<25} {'āŒ Detected' if regular_detections['headless'] else 'āœ… Passed':<15} {'āŒ Detected' if undetected_detections['headless'] else 'āœ… Passed':<15}") print(f"{'Automation Detection':<25} {'āŒ Detected' if regular_detections['automation'] else 'āœ… Passed':<15} {'āŒ Detected' if undetected_detections['automation'] else 'āœ… Passed':<15}") print(f"{'Failed Tests':<25} {regular_detections['failed_tests']:<15} {undetected_detections['failed_tests']:<15}") print(f"\n{'='*60}") if undetected_detections.get('failed_tests', 0) < regular_detections.get('failed_tests', 1): print("āœ… Undetected browser performed better at evading detection!") else: print("ā„¹ļø Both browsers had similar detection results") if __name__ == "__main__": asyncio.run(main())