""" Sample script to test the max_scroll_steps parameter implementation """ import asyncio import os import sys # Get the grandparent directory grandparent_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(grandparent_dir) __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) from crawl4ai import AsyncWebCrawler from crawl4ai.async_configs import CrawlerRunConfig async def test_max_scroll_steps(): """ Test the max_scroll_steps parameter with different configurations """ print("šŸš€ Testing max_scroll_steps parameter implementation") print("=" * 60) async with AsyncWebCrawler(verbose=True) as crawler: # Test 1: Without max_scroll_steps (unlimited scrolling) print("\\nšŸ“‹ Test 1: Unlimited scrolling (max_scroll_steps=None)") config1 = CrawlerRunConfig( scan_full_page=True, scroll_delay=0.1, max_scroll_steps=None, # Default behavior verbose=True ) print(f"Config: scan_full_page={config1.scan_full_page}, max_scroll_steps={config1.max_scroll_steps}") try: result1 = await crawler.arun( url="https://example.com", # Simple page for testing config=config1 ) print(f"āœ… Test 1 Success: Crawled {len(result1.markdown)} characters") except Exception as e: print(f"āŒ Test 1 Failed: {e}") # Test 2: With limited scroll steps print("\\nšŸ“‹ Test 2: Limited scrolling (max_scroll_steps=3)") config2 = CrawlerRunConfig( scan_full_page=True, scroll_delay=0.1, max_scroll_steps=3, # Limit to 3 scroll steps verbose=True ) print(f"Config: scan_full_page={config2.scan_full_page}, max_scroll_steps={config2.max_scroll_steps}") try: result2 = await crawler.arun( url="https://techcrunch.com/", # Another test page config=config2 ) print(f"āœ… Test 2 Success: Crawled {len(result2.markdown)} characters") except Exception as e: print(f"āŒ Test 2 Failed: {e}") # Test 3: Test serialization/deserialization print("\\nšŸ“‹ Test 3: Configuration serialization test") config3 = CrawlerRunConfig( scan_full_page=True, max_scroll_steps=5, scroll_delay=0.2 ) # Test to_dict config_dict = config3.to_dict() print(f"Serialized max_scroll_steps: {config_dict.get('max_scroll_steps')}") # Test from_kwargs config4 = CrawlerRunConfig.from_kwargs({ 'scan_full_page': True, 'max_scroll_steps': 7, 'scroll_delay': 0.3 }) print(f"Deserialized max_scroll_steps: {config4.max_scroll_steps}") print("āœ… Test 3 Success: Serialization works correctly") # Test 4: Edge case - max_scroll_steps = 0 print("\\nšŸ“‹ Test 4: Edge case (max_scroll_steps=0)") config5 = CrawlerRunConfig( scan_full_page=True, max_scroll_steps=0, # Should not scroll at all verbose=True ) try: result5 = await crawler.arun( url="https://techcrunch.com/", config=config5 ) print(f"āœ… Test 4 Success: No scrolling performed, crawled {len(result5.markdown)} characters") except Exception as e: print(f"āŒ Test 4 Failed: {e}") print("\\n" + "=" * 60) print("šŸŽ‰ All tests completed!") print("\\nThe max_scroll_steps parameter is working correctly:") print("- None: Unlimited scrolling (default behavior)") print("- Positive integer: Limits scroll steps to that number") print("- 0: No scrolling performed") print("- Properly serializes/deserializes in config") if __name__ == "__main__": print("Starting max_scroll_steps test...") asyncio.run(test_max_scroll_steps())