fix: remove trailing slash when the path is empty. This is causing dupicate crawls

This commit is contained in:
Aravind Karnam
2025-03-21 13:41:31 +05:30
parent 8b761f232b
commit 6740e87b4d

View File

@@ -2002,7 +2002,7 @@ def normalize_url_for_deep_crawl(href, base_url):
normalized = urlunparse((
parsed.scheme,
netloc,
parsed.path.rstrip('/') or '/', # Normalize trailing slash
parsed.path.rstrip('/'), # Normalize trailing slash
parsed.params,
query,
fragment
@@ -2030,7 +2030,7 @@ def efficient_normalize_url_for_deep_crawl(href, base_url):
normalized = urlunparse((
parsed.scheme,
parsed.netloc.lower(),
parsed.path,
parsed.path.rstrip('/'),
parsed.params,
parsed.query,
'' # Remove fragment