From 73fda8a6ec8ef35cdb63e1bae74411976d4e63b9 Mon Sep 17 00:00:00 2001
From: Aravind Karnam <aravind.karanam@gmail.com>
Date: Thu, 3 Apr 2025 13:47:13 +0530
Subject: [PATCH] fix: address the PR review:
 https://github.com/unclecode/crawl4ai/pull/899#discussion_r2024639193

---
 crawl4ai/content_scraping_strategy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crawl4ai/content_scraping_strategy.py b/crawl4ai/content_scraping_strategy.py
index 91b1c674..eaed0816 100644
--- a/crawl4ai/content_scraping_strategy.py
+++ b/crawl4ai/content_scraping_strategy.py
@@ -905,7 +905,7 @@ class WebScrapingStrategy(ContentScrapingStrategy):
                     # Creating a fresh parse of HTML for each selector to prevent element extraction
                     # from modifying the original DOM tree; this keeps the original body 
                     # intact for link processing. This is better performant than deepcopy.
-                    fresh_body = BeautifulSoup(html, "html.parser")
+                    fresh_body = BeautifulSoup(html, "lxml")
                     for_content_targeted_element.extend(fresh_body.select(target_element))
                 content_element = soup.new_tag("div")
                 for el in for_content_targeted_element: