diff --git a/crawl4ai/content_scraping_strategy.py b/crawl4ai/content_scraping_strategy.py index 0a93352b..814e4b2b 100644 --- a/crawl4ai/content_scraping_strategy.py +++ b/crawl4ai/content_scraping_strategy.py @@ -1535,17 +1535,11 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy): content_element = None if target_elements: try: - content_element = lhtml.Element("div") + for_content_targeted_element = [] for target_element in target_elements: - # Creating a fresh parse of HTML for each selector to prevent element extraction - # from modifying the original DOM tree; this keeps the original body - # intact for link processing. This is better performant than deepcopy. - fresh_body = lhtml.document_fromstring(html) - for_content_targeted_element = [] - for target_element in target_elements: - for_content_targeted_element.extend(fresh_body.cssselect(target_element)) - content_element = lhtml.Element("div") - content_element.extend(for_content_targeted_element) + for_content_targeted_element.extend(body.cssselect(target_element)) + content_element = lhtml.Element("div") + content_element.extend(for_content_targeted_element) except Exception as e: self._log("error", f"Error with target element detection: {str(e)}", "SCRAPE") return None