#!/usr/bin/env python3 """ Simple test of anti-bot strategy functionality """ import asyncio import os import sys import pytest # Add the project root to Python path sys.path.insert(0, os.getcwd()) @pytest.mark.asyncio async def test_antibot_strategies(): """Test different anti-bot strategies""" print("๐Ÿงช Testing Anti-Bot Strategies with AsyncWebCrawler") print("=" * 60) try: from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig from crawl4ai.browser_adapter import PlaywrightAdapter # Test HTML content test_html = """ Test Page

Anti-Bot Strategy Test

This page tests different browser adapters.

User-Agent detection test

""" # Save test HTML with open("/tmp/antibot_test.html", "w") as f: f.write(test_html) test_url = "file:///tmp/antibot_test.html" strategies = [ ("default", "Default Playwright"), ("stealth", "Stealth Mode"), ] for strategy, description in strategies: print(f"\n๐Ÿ” Testing: {description} (strategy: {strategy})") print("-" * 40) try: # Import adapter based on strategy if strategy == "stealth": try: from crawl4ai import StealthAdapter adapter = StealthAdapter() print(f"โœ… Using StealthAdapter") except ImportError: print( f"โš ๏ธ StealthAdapter not available, using PlaywrightAdapter" ) adapter = PlaywrightAdapter() else: adapter = PlaywrightAdapter() print(f"โœ… Using PlaywrightAdapter") # Configure browser browser_config = BrowserConfig(headless=True, browser_type="chromium") # Configure crawler crawler_config = CrawlerRunConfig(cache_mode="bypass") # Run crawler async with AsyncWebCrawler( config=browser_config, browser_adapter=adapter ) as crawler: result = await crawler.arun(url=test_url, config=crawler_config) if result.success: print(f"โœ… Crawl successful") print(f" ๐Ÿ“„ Title: {result.metadata.get('title', 'N/A')}") print(f" ๐Ÿ“ Content length: {len(result.markdown)} chars") # Check if user agent info is in content if ( "User-Agent" in result.markdown or "Browser:" in result.markdown ): print(f" ๐Ÿ” User-agent info detected in content") else: print(f" โ„น๏ธ No user-agent info in content") else: print(f"โŒ Crawl failed: {result.error_message}") except Exception as e: print(f"โŒ Error testing {strategy}: {e}") import traceback traceback.print_exc() print(f"\n๐ŸŽ‰ Anti-bot strategy testing completed!") except Exception as e: print(f"โŒ Setup error: {e}") import traceback traceback.print_exc() if __name__ == "__main__": asyncio.run(test_antibot_strategies())