Merge branch 'main' of https://github.com/unclecode/crawl4ai
This commit is contained in:
@@ -477,7 +477,7 @@ class AsyncWebCrawler:
|
|||||||
word_count_threshold=word_count_threshold,
|
word_count_threshold=word_count_threshold,
|
||||||
css_selector=css_selector,
|
css_selector=css_selector,
|
||||||
only_text=kwargs.pop("only_text", False),
|
only_text=kwargs.pop("only_text", False),
|
||||||
image_description_min_word_threshold=kwargs.get(
|
image_description_min_word_threshold=kwargs.pop(
|
||||||
"image_description_min_word_threshold", IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD
|
"image_description_min_word_threshold", IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD
|
||||||
),
|
),
|
||||||
content_filter = content_filter,
|
content_filter = content_filter,
|
||||||
|
|||||||
@@ -283,7 +283,7 @@ class LocalSeleniumCrawlerStrategy(CrawlerStrategy):
|
|||||||
print(f"[LOG] ✅ Crawled {url} successfully!")
|
print(f"[LOG] ✅ Crawled {url} successfully!")
|
||||||
|
|
||||||
return html
|
return html
|
||||||
except InvalidArgumentException:
|
except InvalidArgumentException as e:
|
||||||
if not hasattr(e, 'msg'):
|
if not hasattr(e, 'msg'):
|
||||||
e.msg = sanitize_input_encode(str(e))
|
e.msg = sanitize_input_encode(str(e))
|
||||||
raise InvalidArgumentException(f"Failed to crawl {url}: {e.msg}")
|
raise InvalidArgumentException(f"Failed to crawl {url}: {e.msg}")
|
||||||
|
|||||||
Reference in New Issue
Block a user