fix: address the PR review: https://github.com/unclecode/crawl4ai/pull/899#discussion_r2024639193

2025-04-03 13:47:13 +05:30
parent 9e16a4bb26
commit 73fda8a6ec
1 changed files with 1 additions and 1 deletions
--- a/crawl4ai/content_scraping_strategy.py
+++ b/crawl4ai/content_scraping_strategy.py
@@ -905,7 +905,7 @@ class WebScrapingStrategy(ContentScrapingStrategy):
                    # Creating a fresh parse of HTML for each selector to prevent element extraction
                    # from modifying the original DOM tree; this keeps the original body 
                    # intact for link processing. This is better performant than deepcopy.
-                    fresh_body = BeautifulSoup(html, "html.parser")
+                    fresh_body = BeautifulSoup(html, "lxml")
                    for_content_targeted_element.extend(fresh_body.select(target_element))
                content_element = soup.new_tag("div")
                for el in for_content_targeted_element: