Merge branch 'develop' of https://github.com/unclecode/crawl4ai into develop
This commit is contained in:
@@ -242,6 +242,16 @@ class LXMLWebScrapingStrategy(ContentScrapingStrategy):
|
||||
exclude_domains = set(kwargs.get("exclude_domains", []))
|
||||
|
||||
# Process links
|
||||
try:
|
||||
base_element = element.xpath("//head/base[@href]")
|
||||
if base_element:
|
||||
base_href = base_element[0].get("href", "").strip()
|
||||
if base_href:
|
||||
url = base_href
|
||||
except Exception as e:
|
||||
self._log("error", f"Error extracting base URL: {str(e)}", "SCRAPE")
|
||||
pass
|
||||
|
||||
for link in element.xpath(".//a[@href]"):
|
||||
href = link.get("href", "").strip()
|
||||
if not href:
|
||||
|
||||
Reference in New Issue
Block a user