fix: revert the old target_elms code in LXMLwebscraping strategy

2025-04-12 12:07:04 +05:30
parent d84508b4d5
commit 9fc5d315af
1 changed files with 4 additions and 10 deletions
--- a/crawl4ai/content_scraping_strategy.py
+++ b/crawl4ai/content_scraping_strategy.py
@@ -1535,15 +1535,9 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
            content_element = None
            if target_elements:
                try:
                    content_element = lhtml.Element("div")
                    for target_element in target_elements:
                        # Creating a fresh parse of HTML for each selector to prevent element extraction
                        # from modifying the original DOM tree; this keeps the original body 
                        # intact for link processing. This is better performant than deepcopy.
                        fresh_body = lhtml.document_fromstring(html)
                    for_content_targeted_element = []
                    for target_element in target_elements:
-                            for_content_targeted_element.extend(fresh_body.cssselect(target_element))
+                        for_content_targeted_element.extend(body.cssselect(target_element))
                    content_element = lhtml.Element("div")
                    content_element.extend(for_content_targeted_element)
                except Exception as e: