#!/usr/bin/env python3 """ Simple test of anti-bot strategy functionality """ import asyncio import sys import os # Add the project root to Python path sys.path.insert(0, os.getcwd()) async def test_antibot_strategies(): """Test different anti-bot strategies""" print("๐Ÿงช Testing Anti-Bot Strategies with AsyncWebCrawler") print("=" * 60) try: from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig from crawl4ai.browser_adapter import PlaywrightAdapter # Test HTML content test_html = """ Test Page

Anti-Bot Strategy Test

This page tests different browser adapters.

User-Agent detection test

""" # Save test HTML with open('/tmp/antibot_test.html', 'w') as f: f.write(test_html) test_url = 'file:///tmp/antibot_test.html' strategies = [ ('default', 'Default Playwright'), ('stealth', 'Stealth Mode'), ] for strategy, description in strategies: print(f"\n๐Ÿ” Testing: {description} (strategy: {strategy})") print("-" * 40) try: # Import adapter based on strategy if strategy == 'stealth': try: from crawl4ai import StealthAdapter adapter = StealthAdapter() print(f"โœ… Using StealthAdapter") except ImportError: print(f"โš ๏ธ StealthAdapter not available, using PlaywrightAdapter") adapter = PlaywrightAdapter() else: adapter = PlaywrightAdapter() print(f"โœ… Using PlaywrightAdapter") # Configure browser browser_config = BrowserConfig( headless=True, browser_type="chromium" ) # Configure crawler crawler_config = CrawlerRunConfig( cache_mode="bypass" ) # Run crawler async with AsyncWebCrawler( config=browser_config, browser_adapter=adapter ) as crawler: result = await crawler.arun( url=test_url, config=crawler_config ) if result.success: print(f"โœ… Crawl successful") print(f" ๐Ÿ“„ Title: {result.metadata.get('title', 'N/A')}") print(f" ๐Ÿ“ Content length: {len(result.markdown)} chars") # Check if user agent info is in content if 'User-Agent' in result.markdown or 'Browser:' in result.markdown: print(f" ๐Ÿ” User-agent info detected in content") else: print(f" โ„น๏ธ No user-agent info in content") else: print(f"โŒ Crawl failed: {result.error_message}") except Exception as e: print(f"โŒ Error testing {strategy}: {e}") import traceback traceback.print_exc() print(f"\n๐ŸŽ‰ Anti-bot strategy testing completed!") except Exception as e: print(f"โŒ Setup error: {e}") import traceback traceback.print_exc() if __name__ == "__main__": asyncio.run(test_antibot_strategies())