Fix raw URL parsing logic to correctly handle "raw://" and "raw:" prefixes. REF #1118

This commit is contained in:
ntohidi
2025-06-03 11:19:08 +02:00
parent 5ce3e682f3
commit cc95d3abd4

View File

@@ -466,8 +466,14 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
console_messages=captured_console,
)
elif url.startswith("raw:"):
#####
# Since both "raw:" and "raw://" start with "raw:", the first condition is always true for both, so "raw://" will be sliced as "//...", which is incorrect.
# Fix: Check for "raw://" first, then "raw:"
# Also, the prefix "raw://" is actually 6 characters long, not 7, so it should be sliced accordingly: url[6:]
#####
elif url.startswith("raw://") or url.startswith("raw:"):
# Process raw HTML content
# raw_html = url[4:] if url[:4] == "raw:" else url[7:]
raw_html = url[6:] if url.startswith("raw://") else url[4:]
html = raw_html
if config.screenshot: