From d2648eaa39d4232b3de6a27a1170b5fef8ecc389 Mon Sep 17 00:00:00 2001 From: Aravind Karnam Date: Sat, 19 Apr 2025 20:08:36 +0530 Subject: [PATCH] fix: solved with deepcopy of elements https://github.com/unclecode/crawl4ai/issues/902 --- crawl4ai/content_scraping_strategy.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crawl4ai/content_scraping_strategy.py b/crawl4ai/content_scraping_strategy.py index 814e4b2b..1dfbce84 100644 --- a/crawl4ai/content_scraping_strategy.py +++ b/crawl4ai/content_scraping_strategy.py @@ -28,6 +28,7 @@ from lxml import etree from lxml import html as lhtml from typing import List from .models import ScrapingResult, MediaItem, Link, Media, Links +import copy # Pre-compile regular expressions for Open Graph and Twitter metadata OG_REGEX = re.compile(r"^og:") @@ -911,7 +912,7 @@ class WebScrapingStrategy(ContentScrapingStrategy): for_content_targeted_element.extend(body.select(target_element)) content_element = soup.new_tag("div") for el in for_content_targeted_element: - content_element.append(el) + content_element.append(copy.deepcopy(el)) except Exception as e: self._log("error", f"Error with target element detection: {str(e)}", "SCRAPE") return None @@ -1539,7 +1540,7 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy): for target_element in target_elements: for_content_targeted_element.extend(body.cssselect(target_element)) content_element = lhtml.Element("div") - content_element.extend(for_content_targeted_element) + content_element.extend(copy.deepcopy(for_content_targeted_element)) except Exception as e: self._log("error", f"Error with target element detection: {str(e)}", "SCRAPE") return None