From d84508b4d5dad7c3b8f9b772cedfdc08c89ab2a9 Mon Sep 17 00:00:00 2001
From: Aravind Karnam <aravind.karanam@gmail.com>
Date: Sat, 12 Apr 2025 12:05:17 +0530
Subject: [PATCH] fix: revert the old target_elms code in regular webscraping
 strategy

---
 crawl4ai/content_scraping_strategy.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/crawl4ai/content_scraping_strategy.py b/crawl4ai/content_scraping_strategy.py
index 81fe9d4e..0a93352b 100644
--- a/crawl4ai/content_scraping_strategy.py
+++ b/crawl4ai/content_scraping_strategy.py
@@ -908,11 +908,7 @@ class WebScrapingStrategy(ContentScrapingStrategy):
             try:
                 for_content_targeted_element = []
                 for target_element in target_elements:
-                    # Creating a fresh parse of HTML for each selector to prevent element extraction
-                    # from modifying the original DOM tree; this keeps the original body 
-                    # intact for link processing. This is better performant than deepcopy.
-                    fresh_body = BeautifulSoup(html, "lxml")
-                    for_content_targeted_element.extend(fresh_body.select(target_element))
+                    for_content_targeted_element.extend(body.select(target_element))
                 content_element = soup.new_tag("div")
                 for el in for_content_targeted_element:
                     content_element.append(el)
@@ -920,7 +916,7 @@ class WebScrapingStrategy(ContentScrapingStrategy):
                 self._log("error", f"Error with target element detection: {str(e)}", "SCRAPE")
                 return None
         else:
-            content_element = body      
+            content_element = body     
 
         kwargs["exclude_social_media_domains"] = set(
             kwargs.get("exclude_social_media_domains", []) + SOCIAL_MEDIA_DOMAINS