From d418a04602ebe32d68d248a2995488beec768c61 Mon Sep 17 00:00:00 2001 From: Darwing Medina Date: Wed, 20 Nov 2024 04:52:11 -0600 Subject: [PATCH 1/2] Fix #260 prevent pass duplicated kwargs to scrapping_strategy (#269) Thank you for the suggestions. It totally makes sense now. Change to pop operator. --- crawl4ai/async_webcrawler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py index 38e429ca..fb8c5290 100644 --- a/crawl4ai/async_webcrawler.py +++ b/crawl4ai/async_webcrawler.py @@ -197,8 +197,8 @@ class AsyncWebCrawler: html, word_count_threshold=word_count_threshold, css_selector=css_selector, - only_text=kwargs.get("only_text", False), - image_description_min_word_threshold=kwargs.get( + only_text=kwargs.pop("only_text", False), + image_description_min_word_threshold=kwargs.pop( "image_description_min_word_threshold", IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD ), **kwargs, From 3439f7886d170e05e0c97c804b1057187325c2a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E5=91=98=E9=98=BF=E6=B1=9F=28Relakkes?= =?UTF-8?q?=29?= Date: Wed, 20 Nov 2024 20:30:25 +0800 Subject: [PATCH 2/2] fix: crawler strategy exception handling and fixes (#271) --- crawl4ai/crawler_strategy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawl4ai/crawler_strategy.py b/crawl4ai/crawler_strategy.py index ce802e49..898dcfa8 100644 --- a/crawl4ai/crawler_strategy.py +++ b/crawl4ai/crawler_strategy.py @@ -283,7 +283,7 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy): print(f"[LOG] ✅ Crawled {url} successfully!") return html - except InvalidArgumentException: + except InvalidArgumentException as e: if not hasattr(e, 'msg'): e.msg = sanitize_input_encode(str(e)) raise InvalidArgumentException(f"Failed to crawl {url}: {e.msg}")