fix: solved with deepcopy of elements https://github.com/unclecode/crawl4ai/issues/902
This commit is contained in:
@@ -28,6 +28,7 @@ from lxml import etree
|
|||||||
from lxml import html as lhtml
|
from lxml import html as lhtml
|
||||||
from typing import List
|
from typing import List
|
||||||
from .models import ScrapingResult, MediaItem, Link, Media, Links
|
from .models import ScrapingResult, MediaItem, Link, Media, Links
|
||||||
|
import copy
|
||||||
|
|
||||||
# Pre-compile regular expressions for Open Graph and Twitter metadata
|
# Pre-compile regular expressions for Open Graph and Twitter metadata
|
||||||
OG_REGEX = re.compile(r"^og:")
|
OG_REGEX = re.compile(r"^og:")
|
||||||
@@ -911,7 +912,7 @@ class WebScrapingStrategy(ContentScrapingStrategy):
|
|||||||
for_content_targeted_element.extend(body.select(target_element))
|
for_content_targeted_element.extend(body.select(target_element))
|
||||||
content_element = soup.new_tag("div")
|
content_element = soup.new_tag("div")
|
||||||
for el in for_content_targeted_element:
|
for el in for_content_targeted_element:
|
||||||
content_element.append(el)
|
content_element.append(copy.deepcopy(el))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._log("error", f"Error with target element detection: {str(e)}", "SCRAPE")
|
self._log("error", f"Error with target element detection: {str(e)}", "SCRAPE")
|
||||||
return None
|
return None
|
||||||
@@ -1539,7 +1540,7 @@ class LXMLWebScrapingStrategy(WebScrapingStrategy):
|
|||||||
for target_element in target_elements:
|
for target_element in target_elements:
|
||||||
for_content_targeted_element.extend(body.cssselect(target_element))
|
for_content_targeted_element.extend(body.cssselect(target_element))
|
||||||
content_element = lhtml.Element("div")
|
content_element = lhtml.Element("div")
|
||||||
content_element.extend(for_content_targeted_element)
|
content_element.extend(copy.deepcopy(for_content_targeted_element))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._log("error", f"Error with target element detection: {str(e)}", "SCRAPE")
|
self._log("error", f"Error with target element detection: {str(e)}", "SCRAPE")
|
||||||
return None
|
return None
|
||||||
|
|||||||
Reference in New Issue
Block a user