From 43738c9ed2d8892fc67d7e7249995206ff1ce7b1 Mon Sep 17 00:00:00 2001
From: unclecode <unclecode@kidocode.com>
Date: Sun, 1 Feb 2026 03:45:52 +0000
Subject: [PATCH] Fix can_process_url() to receive normalized URL in deep crawl
 strategies

Pass the normalized absolute URL instead of the raw href to
can_process_url() in BFS, BFF, and DFS deep crawl strategies.
This ensures URL validation and filter chain evaluation operate
on consistent, fully-qualified URLs.

Fixes #1743
---
 crawl4ai/deep_crawling/bff_strategy.py | 2 +-
 crawl4ai/deep_crawling/bfs_strategy.py | 2 +-
 crawl4ai/deep_crawling/dfs_strategy.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/crawl4ai/deep_crawling/bff_strategy.py b/crawl4ai/deep_crawling/bff_strategy.py
index 5e250ca6..26e8a9a1 100644
--- a/crawl4ai/deep_crawling/bff_strategy.py
+++ b/crawl4ai/deep_crawling/bff_strategy.py
@@ -177,7 +177,7 @@ class BestFirstCrawlingStrategy(DeepCrawlStrategy):
             base_url = normalize_url_for_deep_crawl(url, source_url)
             if base_url in visited:
                 continue
-            if not await self.can_process_url(url, new_depth):
+            if not await self.can_process_url(base_url, new_depth):
                 self.stats.urls_skipped += 1
                 continue
                 
diff --git a/crawl4ai/deep_crawling/bfs_strategy.py b/crawl4ai/deep_crawling/bfs_strategy.py
index dab94532..dfb75927 100644
--- a/crawl4ai/deep_crawling/bfs_strategy.py
+++ b/crawl4ai/deep_crawling/bfs_strategy.py
@@ -170,7 +170,7 @@ class BFSDeepCrawlStrategy(DeepCrawlStrategy):
             base_url = normalize_url_for_deep_crawl(url, source_url)
             if base_url in visited:
                 continue
-            if not await self.can_process_url(url, next_depth):
+            if not await self.can_process_url(base_url, next_depth):
                 self.stats.urls_skipped += 1
                 continue
 
diff --git a/crawl4ai/deep_crawling/dfs_strategy.py b/crawl4ai/deep_crawling/dfs_strategy.py
index 5e592fc1..3e4987f2 100644
--- a/crawl4ai/deep_crawling/dfs_strategy.py
+++ b/crawl4ai/deep_crawling/dfs_strategy.py
@@ -300,7 +300,7 @@ class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
             if not normalized_url or normalized_url in seen:
                 continue
 
-            if not await self.can_process_url(raw_url, next_depth):
+            if not await self.can_process_url(normalized_url, next_depth):
                 self.stats.urls_skipped += 1
                 continue