fix: Update JavaScript execution in AsyncPlaywrightCrawlerStrategy to handle script errors and add basic download test case. ref #1215
This commit is contained in:
@@ -1596,12 +1596,31 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
# then wait for the new page to load before continuing
|
# then wait for the new page to load before continuing
|
||||||
result = None
|
result = None
|
||||||
try:
|
try:
|
||||||
|
# OLD VERSION:
|
||||||
|
# result = await page.evaluate(
|
||||||
|
# f"""
|
||||||
|
# (async () => {{
|
||||||
|
# try {{
|
||||||
|
# const script_result = {script};
|
||||||
|
# return {{ success: true, result: script_result }};
|
||||||
|
# }} catch (err) {{
|
||||||
|
# return {{ success: false, error: err.toString(), stack: err.stack }};
|
||||||
|
# }}
|
||||||
|
# }})();
|
||||||
|
# """
|
||||||
|
# )
|
||||||
|
|
||||||
|
# """ NEW VERSION:
|
||||||
|
# When {script} contains statements (e.g., const link = …; link.click();),
|
||||||
|
# this forms invalid JavaScript, causing Playwright execution error: SyntaxError: Unexpected token 'const'.
|
||||||
|
# """
|
||||||
result = await page.evaluate(
|
result = await page.evaluate(
|
||||||
f"""
|
f"""
|
||||||
(async () => {{
|
(async () => {{
|
||||||
try {{
|
try {{
|
||||||
const script_result = {script};
|
return await (async () => {{
|
||||||
return {{ success: true, result: script_result }};
|
{script}
|
||||||
|
}})();
|
||||||
}} catch (err) {{
|
}} catch (err) {{
|
||||||
return {{ success: false, error: err.toString(), stack: err.stack }};
|
return {{ success: false, error: err.toString(), stack: err.stack }};
|
||||||
}}
|
}}
|
||||||
|
|||||||
34
tests/general/test_download_file.py
Normal file
34
tests/general/test_download_file.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import asyncio
|
||||||
|
from crawl4ai import CrawlerRunConfig, AsyncWebCrawler, BrowserConfig
|
||||||
|
from pathlib import Path
|
||||||
|
import os
|
||||||
|
|
||||||
|
async def test_basic_download():
|
||||||
|
|
||||||
|
# Custom folder (otherwise defaults to ~/.crawl4ai/downloads)
|
||||||
|
downloads_path = os.path.join(Path.home(), ".crawl4ai", "downloads")
|
||||||
|
os.makedirs(downloads_path, exist_ok=True)
|
||||||
|
browser_config = BrowserConfig(
|
||||||
|
accept_downloads=True,
|
||||||
|
downloads_path=downloads_path
|
||||||
|
)
|
||||||
|
async with AsyncWebCrawler(config=browser_config) as crawler:
|
||||||
|
run_config = CrawlerRunConfig(
|
||||||
|
js_code="""
|
||||||
|
const link = document.querySelector('a[href$=".exe"]');
|
||||||
|
if (link) { link.click(); }
|
||||||
|
""",
|
||||||
|
delay_before_return_html=5
|
||||||
|
)
|
||||||
|
result = await crawler.arun("https://www.python.org/downloads/", config=run_config)
|
||||||
|
|
||||||
|
if result.downloaded_files:
|
||||||
|
print("Downloaded files:")
|
||||||
|
for file_path in result.downloaded_files:
|
||||||
|
print("•", file_path)
|
||||||
|
else:
|
||||||
|
print("No files downloaded.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(test_basic_download())
|
||||||
|
|
||||||
Reference in New Issue
Block a user