diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py index 5b8af794..fe725317 100644 --- a/crawl4ai/utils.py +++ b/crawl4ai/utils.py @@ -1958,6 +1958,10 @@ def normalize_url(href, base_url): if not parsed_base.scheme or not parsed_base.netloc: raise ValueError(f"Invalid base URL format: {base_url}") + # Ensure base_url ends with a trailing slash if it's a directory path + if not base_url.endswith('/'): + base_url = base_url + '/' + # Use urljoin to handle all cases normalized = urljoin(base_url, href.strip()) return normalized