From dd17ed0e63df84fd6e6cd76f4f131bb56121a0f5 Mon Sep 17 00:00:00 2001 From: UncleCode Date: Fri, 18 Oct 2024 12:35:09 +0800 Subject: [PATCH] Rename some flags name, introducing magic flag. --- crawl4ai/async_crawler_strategy.py | 9 +++++---- crawl4ai/content_scrapping_strategy.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py index d4c94fee..307dee20 100644 --- a/crawl4ai/async_crawler_strategy.py +++ b/crawl4ai/async_crawler_strategy.py @@ -309,7 +309,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy): ) await context.set_extra_http_headers(self.headers) - if kwargs.get("override_navigator", False): + if kwargs.get("override_navigator", False) or kwargs.get("simulate_user", False) or kwargs.get("magic", False): # Inject scripts to override navigator properties await context.add_init_script(""" // Pass the Permissions Test. @@ -344,8 +344,9 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy): # await stealth_async(page) #, stealth_config) # Add console message and error logging - page.on("console", lambda msg: print(f"Console: {msg.text}")) - page.on("pageerror", lambda exc: print(f"Page Error: {exc}")) + if kwargs.get("log_console", False): + page.on("console", lambda msg: print(f"Console: {msg.text}")) + page.on("pageerror", lambda exc: print(f"Page Error: {exc}")) try: if self.verbose: @@ -403,7 +404,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy): # Check for on execution event await self.execute_hook('on_execution_started', page) - if kwargs.get("simulate_user", False): + if kwargs.get("simulate_user", False) or kwargs.get("magic", False): # Simulate user interactions await page.mouse.move(100, 100) await page.mouse.down() diff --git a/crawl4ai/content_scrapping_strategy.py b/crawl4ai/content_scrapping_strategy.py index 64707f74..8a5cc8ad 100644 --- a/crawl4ai/content_scrapping_strategy.py +++ b/crawl4ai/content_scrapping_strategy.py @@ -293,7 +293,7 @@ class WebScrappingStrategy(ContentScrappingStrategy): Suggestions: - Try calling the crawl function with these parameters: - simulate_user=True, override_navigator=True + magic=True, - Set headless=False to visualize what's happening on the page. If the issue persists, please check the page's structure and any potential anti-crawling measures.