From cc95d3abd4c11a67a027c8a12621f404251f43c9 Mon Sep 17 00:00:00 2001 From: ntohidi Date: Tue, 3 Jun 2025 11:19:08 +0200 Subject: [PATCH] Fix raw URL parsing logic to correctly handle "raw://" and "raw:" prefixes. REF #1118 --- crawl4ai/async_crawler_strategy.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py index 88d94a46..a1873bfd 100644 --- a/crawl4ai/async_crawler_strategy.py +++ b/crawl4ai/async_crawler_strategy.py @@ -466,8 +466,14 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy): console_messages=captured_console, ) - elif url.startswith("raw:"): + ##### + # Since both "raw:" and "raw://" start with "raw:", the first condition is always true for both, so "raw://" will be sliced as "//...", which is incorrect. + # Fix: Check for "raw://" first, then "raw:" + # Also, the prefix "raw://" is actually 6 characters long, not 7, so it should be sliced accordingly: url[6:] + ##### + elif url.startswith("raw://") or url.startswith("raw:"): # Process raw HTML content + # raw_html = url[4:] if url[:4] == "raw:" else url[7:] raw_html = url[6:] if url.startswith("raw://") else url[4:] html = raw_html if config.screenshot: