chore: Raise an exception with clear messaging when body tag is missing in the fetched html. The message should warn users to add appropriate wait_for condition to wait until body tag is loaded into DOM.

fixes: https://github.com/unclecode/crawl4ai/issues/804
This commit is contained in:
Aravind Karnam
2025-03-18 15:26:20 +05:30
parent 84883be513
commit 9109ecd8fc

View File

@@ -862,6 +862,8 @@ class WebScrapingStrategy(ContentScrapingStrategy):
parser_type = kwargs.get("parser", "lxml") parser_type = kwargs.get("parser", "lxml")
soup = BeautifulSoup(html, parser_type) soup = BeautifulSoup(html, parser_type)
body = soup.body body = soup.body
if body is None:
raise Exception("'<body>' tag is not found in fetched html. Consider adding wait_for=\"css:body\" to wait for body tag to be loaded into DOM.")
base_domain = get_base_domain(url) base_domain = get_base_domain(url)
try: try: