Fix raw URL parsing logic to correctly handle "raw://" and "raw:" prefixes. REF #1118
This commit is contained in:
@@ -466,8 +466,14 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
|
|||||||
console_messages=captured_console,
|
console_messages=captured_console,
|
||||||
)
|
)
|
||||||
|
|
||||||
elif url.startswith("raw:"):
|
#####
|
||||||
|
# Since both "raw:" and "raw://" start with "raw:", the first condition is always true for both, so "raw://" will be sliced as "//...", which is incorrect.
|
||||||
|
# Fix: Check for "raw://" first, then "raw:"
|
||||||
|
# Also, the prefix "raw://" is actually 6 characters long, not 7, so it should be sliced accordingly: url[6:]
|
||||||
|
#####
|
||||||
|
elif url.startswith("raw://") or url.startswith("raw:"):
|
||||||
# Process raw HTML content
|
# Process raw HTML content
|
||||||
|
# raw_html = url[4:] if url[:4] == "raw:" else url[7:]
|
||||||
raw_html = url[6:] if url.startswith("raw://") else url[4:]
|
raw_html = url[6:] if url.startswith("raw://") else url[4:]
|
||||||
html = raw_html
|
html = raw_html
|
||||||
if config.screenshot:
|
if config.screenshot:
|
||||||
|
|||||||
Reference in New Issue
Block a user