chore: Raise an exception with clear messaging when body tag is missing in the fetched html. The message should warn users to add appropriate wait_for condition to wait until body tag is loaded into DOM.
fixes: https://github.com/unclecode/crawl4ai/issues/804
This commit is contained in:
@@ -862,6 +862,8 @@ class WebScrapingStrategy(ContentScrapingStrategy):
|
|||||||
parser_type = kwargs.get("parser", "lxml")
|
parser_type = kwargs.get("parser", "lxml")
|
||||||
soup = BeautifulSoup(html, parser_type)
|
soup = BeautifulSoup(html, parser_type)
|
||||||
body = soup.body
|
body = soup.body
|
||||||
|
if body is None:
|
||||||
|
raise Exception("'<body>' tag is not found in fetched html. Consider adding wait_for=\"css:body\" to wait for body tag to be loaded into DOM.")
|
||||||
base_domain = get_base_domain(url)
|
base_domain = get_base_domain(url)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user