fix: remove trailing slash when the path is empty. This is causing dupicate crawls
This commit is contained in:
@@ -2002,7 +2002,7 @@ def normalize_url_for_deep_crawl(href, base_url):
|
||||
normalized = urlunparse((
|
||||
parsed.scheme,
|
||||
netloc,
|
||||
parsed.path.rstrip('/') or '/', # Normalize trailing slash
|
||||
parsed.path.rstrip('/'), # Normalize trailing slash
|
||||
parsed.params,
|
||||
query,
|
||||
fragment
|
||||
@@ -2030,7 +2030,7 @@ def efficient_normalize_url_for_deep_crawl(href, base_url):
|
||||
normalized = urlunparse((
|
||||
parsed.scheme,
|
||||
parsed.netloc.lower(),
|
||||
parsed.path,
|
||||
parsed.path.rstrip('/'),
|
||||
parsed.params,
|
||||
parsed.query,
|
||||
'' # Remove fragment
|
||||
|
||||
Reference in New Issue
Block a user