From 5d9213a0e9e7686c394385ed50f586b90e0dd6a8 Mon Sep 17 00:00:00 2001 From: ntohidi Date: Thu, 12 Jun 2025 12:21:40 +0200 Subject: [PATCH] fix: Update JavaScript execution in AsyncPlaywrightCrawlerStrategy to handle script errors and add basic download test case. ref #1215 --- crawl4ai/async_crawler_strategy.py | 23 +++++++++++++++++-- tests/general/test_download_file.py | 34 +++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 tests/general/test_download_file.py diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py index d349388f..c0bf6ec5 100644 --- a/crawl4ai/async_crawler_strategy.py +++ b/crawl4ai/async_crawler_strategy.py @@ -1596,12 +1596,31 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy): # then wait for the new page to load before continuing result = None try: + # OLD VERSION: + # result = await page.evaluate( + # f""" + # (async () => {{ + # try {{ + # const script_result = {script}; + # return {{ success: true, result: script_result }}; + # }} catch (err) {{ + # return {{ success: false, error: err.toString(), stack: err.stack }}; + # }} + # }})(); + # """ + # ) + + # """ NEW VERSION: + # When {script} contains statements (e.g., const link = …; link.click();), + # this forms invalid JavaScript, causing Playwright execution error: SyntaxError: Unexpected token 'const'. + # """ result = await page.evaluate( f""" (async () => {{ try {{ - const script_result = {script}; - return {{ success: true, result: script_result }}; + return await (async () => {{ + {script} + }})(); }} catch (err) {{ return {{ success: false, error: err.toString(), stack: err.stack }}; }} diff --git a/tests/general/test_download_file.py b/tests/general/test_download_file.py new file mode 100644 index 00000000..ca552779 --- /dev/null +++ b/tests/general/test_download_file.py @@ -0,0 +1,34 @@ +import asyncio +from crawl4ai import CrawlerRunConfig, AsyncWebCrawler, BrowserConfig +from pathlib import Path +import os + +async def test_basic_download(): + + # Custom folder (otherwise defaults to ~/.crawl4ai/downloads) + downloads_path = os.path.join(Path.home(), ".crawl4ai", "downloads") + os.makedirs(downloads_path, exist_ok=True) + browser_config = BrowserConfig( + accept_downloads=True, + downloads_path=downloads_path + ) + async with AsyncWebCrawler(config=browser_config) as crawler: + run_config = CrawlerRunConfig( + js_code=""" + const link = document.querySelector('a[href$=".exe"]'); + if (link) { link.click(); } + """, + delay_before_return_html=5 + ) + result = await crawler.arun("https://www.python.org/downloads/", config=run_config) + + if result.downloaded_files: + print("Downloaded files:") + for file_path in result.downloaded_files: + print("•", file_path) + else: + print("No files downloaded.") + +if __name__ == "__main__": + asyncio.run(test_basic_download()) + \ No newline at end of file